summaryrefslogtreecommitdiffstats
path: root/net/ftp/ftp_util.cc
blob: 86c72bb2a877ab48d20cf829ee5079cfbb31751d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/ftp/ftp_util.h"

#include <map>
#include <vector>

#include "base/i18n/case_conversion.h"
#include "base/i18n/char_iterator.h"
#include "base/logging.h"
#include "base/memory/singleton.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/string_tokenizer.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "third_party/icu/public/common/unicode/uchar.h"
#include "third_party/icu/public/i18n/unicode/datefmt.h"
#include "third_party/icu/public/i18n/unicode/dtfmtsym.h"

using base::StringPiece16;

// For examples of Unix<->VMS path conversions, see the unit test file. On VMS
// a path looks differently depending on whether it's a file or directory.

namespace net {

// static
std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) {
  if (unix_path.empty())
    return std::string();

  base::StringTokenizer tokenizer(unix_path, "/");
  std::vector<std::string> tokens;
  while (tokenizer.GetNext())
    tokens.push_back(tokenizer.token());

  if (unix_path[0] == '/') {
    // It's an absolute path.

    if (tokens.empty()) {
      DCHECK_EQ(1U, unix_path.length());
      return "[]";
    }

    if (tokens.size() == 1)
      return unix_path.substr(1);  // Drop the leading slash.

    std::string result(tokens[0] + ":[");
    if (tokens.size() == 2) {
      // Don't ask why, it just works that way on VMS.
      result.append("000000");
    } else {
      result.append(tokens[1]);
      for (size_t i = 2; i < tokens.size() - 1; i++)
        result.append("." + tokens[i]);
    }
    result.append("]" + tokens[tokens.size() - 1]);
    return result;
  }

  if (tokens.size() == 1)
    return unix_path;

  std::string result("[");
  for (size_t i = 0; i < tokens.size() - 1; i++)
    result.append("." + tokens[i]);
  result.append("]" + tokens[tokens.size() - 1]);
  return result;
}

// static
std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) {
  if (unix_path.empty())
    return std::string();

  std::string path(unix_path);

  if (path[path.length() - 1] != '/')
    path.append("/");

  // Reuse logic from UnixFilePathToVMS by appending a fake file name to the
  // real path and removing it after conversion.
  path.append("x");
  path = UnixFilePathToVMS(path);
  return path.substr(0, path.length() - 1);
}

// static
std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) {
  if (vms_path.empty())
    return ".";

  if (vms_path[0] == '/') {
    // This is not really a VMS path. Most likely the server is emulating UNIX.
    // Return path as-is.
    return vms_path;
  }

  if (vms_path == "[]")
    return "/";

  std::string result(vms_path);
  if (vms_path[0] == '[') {
    // It's a relative path.
    ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string());
  } else {
    // It's an absolute path.
    result.insert(0, "/");
    ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/");
    ReplaceSubstringsAfterOffset(&result, 0, ":[", "/");
  }
  std::replace(result.begin(), result.end(), '.', '/');
  std::replace(result.begin(), result.end(), ']', '/');

  // Make sure the result doesn't end with a slash.
  if (result.length() && result[result.length() - 1] == '/')
    result = result.substr(0, result.length() - 1);

  return result;
}

namespace {

// Lazy-initialized map of abbreviated month names.
class AbbreviatedMonthsMap {
 public:
  static AbbreviatedMonthsMap* GetInstance() {
    return Singleton<AbbreviatedMonthsMap>::get();
  }

  // Converts abbreviated month name |text| to its number (in range 1-12).
  // On success returns true and puts the number in |number|.
  bool GetMonthNumber(const base::string16& text, int* number) {
    // Ignore the case of the month names. The simplest way to handle that
    // is to make everything lowercase.
    base::string16 text_lower(base::i18n::ToLower(text));

    if (map_.find(text_lower) == map_.end())
      return false;

    *number = map_[text_lower];
    return true;
  }

 private:
  friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>;

  // Constructor, initializes the map based on ICU data. It is much faster
  // to do that just once.
  AbbreviatedMonthsMap() {
    int32_t locales_count;
    const icu::Locale* locales =
        icu::DateFormat::getAvailableLocales(locales_count);

    for (int32_t locale = 0; locale < locales_count; locale++) {
      UErrorCode status(U_ZERO_ERROR);

      icu::DateFormatSymbols format_symbols(locales[locale], status);

      // If we cannot get format symbols for some locale, it's not a fatal
      // error. Just try another one.
      if (U_FAILURE(status))
        continue;

      int32_t months_count;
      const icu::UnicodeString* months =
          format_symbols.getShortMonths(months_count);

      for (int32_t month = 0; month < months_count; month++) {
        base::string16 month_name(months[month].getBuffer(),
                            static_cast<size_t>(months[month].length()));

        // Ignore the case of the month names. The simplest way to handle that
        // is to make everything lowercase.
        month_name = base::i18n::ToLower(month_name);

        map_[month_name] = month + 1;

        // Sometimes ICU returns longer strings, but in FTP listings a shorter
        // abbreviation is used (for example for the Russian locale). Make sure
        // we always have a map entry for a three-letter abbreviation.
        map_[month_name.substr(0, 3)] = month + 1;
      }
    }

    // Fail loudly if the data returned by ICU is obviously incomplete.
    // This is intended to catch cases like http://crbug.com/177428
    // much earlier. Note that the issue above turned out to be non-trivial
    // to reproduce - crash data is much better indicator of a problem
    // than incomplete bug reports.
    CHECK_EQ(1, map_[ASCIIToUTF16("jan")]);
    CHECK_EQ(2, map_[ASCIIToUTF16("feb")]);
    CHECK_EQ(3, map_[ASCIIToUTF16("mar")]);
    CHECK_EQ(4, map_[ASCIIToUTF16("apr")]);
    CHECK_EQ(5, map_[ASCIIToUTF16("may")]);
    CHECK_EQ(6, map_[ASCIIToUTF16("jun")]);
    CHECK_EQ(7, map_[ASCIIToUTF16("jul")]);
    CHECK_EQ(8, map_[ASCIIToUTF16("aug")]);
    CHECK_EQ(9, map_[ASCIIToUTF16("sep")]);
    CHECK_EQ(10, map_[ASCIIToUTF16("oct")]);
    CHECK_EQ(11, map_[ASCIIToUTF16("nov")]);
    CHECK_EQ(12, map_[ASCIIToUTF16("dec")]);
  }

  // Maps lowercase month names to numbers in range 1-12.
  std::map<base::string16, int> map_;

  DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap);
};

}  // namespace

// static
bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text,
                                       int* number) {
  return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number);
}

// static
bool FtpUtil::LsDateListingToTime(const base::string16& month,
                                  const base::string16& day,
                                  const base::string16& rest,
                                  const base::Time& current_time,
                                  base::Time* result) {
  base::Time::Exploded time_exploded = { 0 };

  if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) {
    // Work around garbage sent by some servers in the same column
    // as the month. Take just last 3 characters of the string.
    if (month.length() < 3 ||
        !AbbreviatedMonthToNumber(month.substr(month.length() - 3),
                                  &time_exploded.month)) {
      return false;
    }
  }

  if (!base::StringToInt(day, &time_exploded.day_of_month))
    return false;
  if (time_exploded.day_of_month > 31)
    return false;

  if (!base::StringToInt(rest, &time_exploded.year)) {
    // Maybe it's time. Does it look like time (HH:MM)?
    if (rest.length() == 5 && rest[2] == ':') {
      if (!base::StringToInt(StringPiece16(rest.begin(), rest.begin() + 2),
                             &time_exploded.hour)) {
        return false;
      }

      if (!base::StringToInt(StringPiece16(rest.begin() + 3, rest.begin() + 5),
                             &time_exploded.minute)) {
        return false;
      }
    } else if (rest.length() == 4 && rest[1] == ':') {
      // Sometimes it's just H:MM.
      if (!base::StringToInt(StringPiece16(rest.begin(), rest.begin() + 1),
                             &time_exploded.hour)) {
        return false;
      }

      if (!base::StringToInt(StringPiece16(rest.begin() + 2, rest.begin() + 4),
                             &time_exploded.minute)) {
        return false;
      }
    } else {
      return false;
    }

    // Guess the year.
    base::Time::Exploded current_exploded;
    current_time.LocalExplode(&current_exploded);

    // If it's not possible for the parsed date to be in the current year,
    // use the previous year.
    if (time_exploded.month > current_exploded.month ||
        (time_exploded.month == current_exploded.month &&
         time_exploded.day_of_month > current_exploded.day_of_month)) {
      time_exploded.year = current_exploded.year - 1;
    } else {
      time_exploded.year = current_exploded.year;
    }
  }

  // We don't know the time zone of the listing, so just use local time.
  *result = base::Time::FromLocalExploded(time_exploded);
  return true;
}

// static
bool FtpUtil::WindowsDateListingToTime(const base::string16& date,
                                       const base::string16& time,
                                       base::Time* result) {
  base::Time::Exploded time_exploded = { 0 };

  // Date should be in format MM-DD-YY[YY].
  std::vector<base::string16> date_parts;
  base::SplitString(date, '-', &date_parts);
  if (date_parts.size() != 3)
    return false;
  if (!base::StringToInt(date_parts[0], &time_exploded.month))
    return false;
  if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month))
    return false;
  if (!base::StringToInt(date_parts[2], &time_exploded.year))
    return false;
  if (time_exploded.year < 0)
    return false;
  // If year has only two digits then assume that 00-79 is 2000-2079,
  // and 80-99 is 1980-1999.
  if (time_exploded.year < 80)
    time_exploded.year += 2000;
  else if (time_exploded.year < 100)
    time_exploded.year += 1900;

  // Time should be in format HH:MM[(AM|PM)]
  if (time.length() < 5)
    return false;

  std::vector<base::string16> time_parts;
  base::SplitString(time.substr(0, 5), ':', &time_parts);
  if (time_parts.size() != 2)
    return false;
  if (!base::StringToInt(time_parts[0], &time_exploded.hour))
    return false;
  if (!base::StringToInt(time_parts[1], &time_exploded.minute))
    return false;
  if (!time_exploded.HasValidValues())
    return false;

  if (time.length() > 5) {
    if (time.length() != 7)
      return false;
    base::string16 am_or_pm(time.substr(5, 2));
    if (EqualsASCII(am_or_pm, "PM")) {
      if (time_exploded.hour < 12)
        time_exploded.hour += 12;
    } else if (EqualsASCII(am_or_pm, "AM")) {
      if (time_exploded.hour == 12)
        time_exploded.hour = 0;
    } else {
      return false;
    }
  }

  // We don't know the time zone of the server, so just use local time.
  *result = base::Time::FromLocalExploded(time_exploded);
  return true;
}

// static
base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text,
                                                  int columns) {
  base::i18n::UTF16CharIterator iter(&text);

  // TODO(jshin): Is u_isspace the right function to use here?
  for (int i = 0; i < columns; i++) {
    // Skip the leading whitespace.
    while (!iter.end() && u_isspace(iter.get()))
      iter.Advance();

    // Skip the actual text of i-th column.
    while (!iter.end() && !u_isspace(iter.get()))
      iter.Advance();
  }

  base::string16 result(text.substr(iter.array_pos()));
  TrimWhitespace(result, TRIM_ALL, &result);
  return result;
}

}  // namespace