summaryrefslogtreecommitdiffstats
path: root/net/ftp/ftp_directory_listing_parser_ls.cc
blob: 2d23b6aecf5996361a0b7bb951981306b44be54d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/ftp/ftp_directory_listing_parser_ls.h"

#include <vector>

#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "net/ftp/ftp_directory_listing_parser.h"
#include "net/ftp/ftp_util.h"

namespace {

bool TwoColumnDateListingToTime(const base::string16& date,
                                const base::string16& time,
                                base::Time* result) {
  base::Time::Exploded time_exploded = { 0 };

  // Date should be in format YYYY-MM-DD.
  std::vector<base::string16> date_parts;
  base::SplitString(date, '-', &date_parts);
  if (date_parts.size() != 3)
    return false;
  if (!base::StringToInt(date_parts[0], &time_exploded.year))
    return false;
  if (!base::StringToInt(date_parts[1], &time_exploded.month))
    return false;
  if (!base::StringToInt(date_parts[2], &time_exploded.day_of_month))
    return false;

  // Time should be in format HH:MM
  if (time.length() != 5)
    return false;

  std::vector<base::string16> time_parts;
  base::SplitString(time, ':', &time_parts);
  if (time_parts.size() != 2)
    return false;
  if (!base::StringToInt(time_parts[0], &time_exploded.hour))
    return false;
  if (!base::StringToInt(time_parts[1], &time_exploded.minute))
    return false;
  if (!time_exploded.HasValidValues())
    return false;

  // We don't know the time zone of the server, so just use local time.
  *result = base::Time::FromLocalExploded(time_exploded);
  return true;
}

// Returns the column index of the end of the date listing and detected
// last modification time.
bool DetectColumnOffsetSizeAndModificationTime(
    const std::vector<base::string16>& columns,
    const base::Time& current_time,
    size_t* offset,
    base::string16* size,
    base::Time* modification_time) {
  // The column offset can be arbitrarily large if some fields
  // like owner or group name contain spaces. Try offsets from left to right
  // and use the first one that matches a date listing.
  //
  // Here is how a listing line should look like. A star ("*") indicates
  // a required field:
  //
  //  * 1. permission listing
  //    2. number of links (optional)
  //  * 3. owner name (may contain spaces)
  //    4. group name (optional, may contain spaces)
  //  * 5. size in bytes
  //  * 6. month
  //  * 7. day of month
  //  * 8. year or time <-- column_offset will be the index of this column
  //    9. file name (optional, may contain spaces)
  for (size_t i = 5U; i < columns.size(); i++) {
    if (net::FtpUtil::LsDateListingToTime(columns[i - 2],
                                          columns[i - 1],
                                          columns[i],
                                          current_time,
                                          modification_time)) {
      *size = columns[i - 3];
      *offset = i;
      return true;
    }
  }

  // Some FTP listings have swapped the "month" and "day of month" columns
  // (for example Russian listings). We try to recognize them only after making
  // sure no column offset works above (this is a more strict way).
  for (size_t i = 5U; i < columns.size(); i++) {
    if (net::FtpUtil::LsDateListingToTime(columns[i - 1],
                                          columns[i - 2],
                                          columns[i],
                                          current_time,
                                          modification_time)) {
      *size = columns[i - 3];
      *offset = i;
      return true;
    }
  }

  // Some FTP listings use a different date format.
  for (size_t i = 5U; i < columns.size(); i++) {
    if (TwoColumnDateListingToTime(columns[i - 1],
                                   columns[i],
                                   modification_time)) {
      *size = columns[i - 2];
      *offset = i;
      return true;
    }
  }

  return false;
}

}  // namespace

namespace net {

bool ParseFtpDirectoryListingLs(
    const std::vector<base::string16>& lines,
    const base::Time& current_time,
    std::vector<FtpDirectoryListingEntry>* entries) {
  // True after we have received a "total n" listing header, where n is an
  // integer. Only one such header is allowed per listing.
  bool received_total_line = false;

  for (size_t i = 0; i < lines.size(); i++) {
    if (lines[i].empty())
      continue;

    std::vector<base::string16> columns;
    base::SplitString(base::CollapseWhitespace(lines[i], false), ' ', &columns);

    // Some FTP servers put a "total n" line at the beginning of the listing
    // (n is an integer). Allow such a line, but only once, and only if it's
    // the first non-empty line. Do not match the word exactly, because it may
    // be in different languages (at least English and German have been seen
    // in the field).
    if (columns.size() == 2 && !received_total_line) {
      received_total_line = true;

      int64 total_number;
      if (!base::StringToInt64(columns[1], &total_number))
        return false;
      if (total_number < 0)
        return false;

      continue;
    }

    FtpDirectoryListingEntry entry;

    size_t column_offset;
    base::string16 size;
    if (!DetectColumnOffsetSizeAndModificationTime(columns,
                                                   current_time,
                                                   &column_offset,
                                                   &size,
                                                   &entry.last_modified)) {
      // Some servers send a message in one of the first few lines.
      // All those messages have in common is the string ".:",
      // where "." means the current directory, and ":" separates it
      // from the rest of the message, which may be empty.
      if (lines[i].find(base::ASCIIToUTF16(".:")) != base::string16::npos)
        continue;

      return false;
    }

    // Do not check "validity" of the permission listing. It's quirky,
    // and some servers send garbage here while other parts of the line are OK.

    if (!columns[0].empty() && columns[0][0] == 'l') {
      entry.type = FtpDirectoryListingEntry::SYMLINK;
    } else if (!columns[0].empty() && columns[0][0] == 'd') {
      entry.type = FtpDirectoryListingEntry::DIRECTORY;
    } else {
      entry.type = FtpDirectoryListingEntry::FILE;
    }

    if (!base::StringToInt64(size, &entry.size)) {
      // Some FTP servers do not separate owning group name from file size,
      // like "group1234". We still want to display the file name for that
      // entry, but can't really get the size (What if the group is named
      // "group1", and the size is in fact 234? We can't distinguish between
      // that and "group" with size 1234). Use a dummy value for the size.
      // TODO(phajdan.jr): Use a value that means "unknown" instead of 0 bytes.
      entry.size = 0;
    }
    if (entry.size < 0) {
      // Some FTP servers have bugs that cause them to display the file size
      // as negative. They're most likely big files like DVD ISO images.
      // We still want to display them, so just say the real file size
      // is unknown.
      entry.size = -1;
    }
    if (entry.type != FtpDirectoryListingEntry::FILE)
      entry.size = -1;

    if (column_offset == columns.size() - 1) {
      // If the end of the date listing is the last column, there is no file
      // name. Some FTP servers send listing entries with empty names.
      // It's not obvious how to display such an entry, so we ignore them.
      // We don't want to make the parsing fail at this point though.
      // Other entries can still be useful.
      continue;
    }

    entry.name = FtpUtil::GetStringPartAfterColumns(lines[i],
                                                    column_offset + 1);

    if (entry.type == FtpDirectoryListingEntry::SYMLINK) {
      base::string16::size_type pos =
          entry.name.rfind(base::ASCIIToUTF16(" -> "));

      // We don't require the " -> " to be present. Some FTP servers don't send
      // the symlink target, possibly for security reasons.
      if (pos != base::string16::npos)
        entry.name = entry.name.substr(0, pos);
    }

    entries->push_back(entry);
  }

  return true;
}

}  // namespace net