summaryrefslogtreecommitdiffstats
path: root/net/ftp/ftp_directory_listing_parser_ls.cc
blob: c4d3c78187c76b9d9144ab55c9b60432b86f407e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/ftp/ftp_directory_listing_parser_ls.h"

#include <vector>

#include "base/string_number_conversions.h"
#include "base/string_split.h"
#include "base/string_util.h"
#include "base/time.h"
#include "base/utf_string_conversions.h"
#include "net/ftp/ftp_directory_listing_parser.h"
#include "net/ftp/ftp_util.h"

namespace {

bool LooksLikeUnixPermission(const string16& text) {
  if (text.length() != 3)
    return false;

  // Meaning of the flags:
  // r - file is readable
  // w - file is writable
  // x - file is executable
  // s or S - setuid/setgid bit set
  // t or T - "sticky" bit set
  return ((text[0] == 'r' || text[0] == '-') &&
          (text[1] == 'w' || text[1] == '-') &&
          (text[2] == 'x' || text[2] == 's' || text[2] == 'S' ||
           text[2] == 't' || text[2] == 'T' || text[2] == '-'));
}

bool LooksLikeUnixPermissionsListing(const string16& text) {
  if (text.length() < 7)
    return false;

  // Do not check the first character (entry type). There are many weird
  // servers that use special file types (for example Plan9 and append-only
  // files). Fortunately, the rest of the permission listing is more consistent.

  // Do not check the rest of the string. Some servers fail to properly
  // separate this column from the next column (number of links), resulting
  // in additional characters at the end. Also, sometimes there is a "+"
  // sign at the end indicating the file has ACLs set.

  // In fact, we don't even expect three "rwx" triplets of permission
  // listing, as some FTP servers like Hylafax only send two.
  return (LooksLikeUnixPermission(text.substr(1, 3)) &&
          LooksLikeUnixPermission(text.substr(4, 3)));
}

// Returns the column index of the end of the date listing and detected
// last modification time.
bool DetectColumnOffsetAndModificationTime(const std::vector<string16>& columns,
                                           const base::Time& current_time,
                                           size_t* offset,
                                           base::Time* modification_time) {
  // The column offset can be arbitrarily large if some fields
  // like owner or group name contain spaces. Try offsets from left to right
  // and use the first one that matches a date listing.
  //
  // Here is how a listing line should look like. A star ("*") indicates
  // a required field:
  //
  //  * 1. permission listing
  //    2. number of links (optional)
  //  * 3. owner name (may contain spaces)
  //    4. group name (optional, may contain spaces)
  //  * 5. size in bytes
  //  * 6. month
  //  * 7. day of month
  //  * 8. year or time <-- column_offset will be the index of this column
  //    9. file name (optional, may contain spaces)
  for (size_t i = 5U; i < columns.size(); i++) {
    if (net::FtpUtil::LsDateListingToTime(columns[i - 2],
                                          columns[i - 1],
                                          columns[i],
                                          current_time,
                                          modification_time)) {
      *offset = i;
      return true;
    }
  }

  // Some FTP listings have swapped the "month" and "day of month" columns
  // (for example Russian listings). We try to recognize them only after making
  // sure no column offset works above (this is a more strict way).
  for (size_t i = 5U; i < columns.size(); i++) {
    if (net::FtpUtil::LsDateListingToTime(columns[i - 1],
                                          columns[i - 2],
                                          columns[i],
                                          current_time,
                                          modification_time)) {
      *offset = i;
      return true;
    }
  }

  return false;
}

}  // namespace

namespace net {

bool ParseFtpDirectoryListingLs(
    const std::vector<string16>& lines,
    const base::Time& current_time,
    std::vector<FtpDirectoryListingEntry>* entries) {
  // True after we have received a "total n" listing header, where n is an
  // integer. Only one such header is allowed per listing.
  bool received_total_line = false;

  for (size_t i = 0; i < lines.size(); i++) {
    if (lines[i].empty())
      continue;

    std::vector<string16> columns;
    base::SplitString(CollapseWhitespace(lines[i], false), ' ', &columns);

    // Some FTP servers put a "total n" line at the beginning of the listing
    // (n is an integer). Allow such a line, but only once, and only if it's
    // the first non-empty line. Do not match the word exactly, because it may
    // be in different languages (at least English and German have been seen
    // in the field).
    if (columns.size() == 2 && !received_total_line) {
      received_total_line = true;

      int total_number;
      if (!base::StringToInt(columns[1], &total_number))
        return false;
      if (total_number < 0)
        return false;

      continue;
    }

    FtpDirectoryListingEntry entry;

    size_t column_offset;
    if (!DetectColumnOffsetAndModificationTime(columns,
                                               current_time,
                                               &column_offset,
                                               &entry.last_modified)) {
      // Some servers send a message in one of the first few lines.
      // All those messages have in common is the string ".:",
      // where "." means the current directory, and ":" separates it
      // from the rest of the message, which may be empty.
      if (lines[i].find(ASCIIToUTF16(".:")) != string16::npos)
        continue;

      return false;
    }

    if (!LooksLikeUnixPermissionsListing(columns[0]))
      return false;
    if (columns[0][0] == 'l') {
      entry.type = FtpDirectoryListingEntry::SYMLINK;
    } else if (columns[0][0] == 'd') {
      entry.type = FtpDirectoryListingEntry::DIRECTORY;
    } else {
      entry.type = FtpDirectoryListingEntry::FILE;
    }

    if (!base::StringToInt64(columns[column_offset - 3], &entry.size)) {
      // Some FTP servers do not separate owning group name from file size,
      // like "group1234". We still want to display the file name for that
      // entry, but can't really get the size (What if the group is named
      // "group1", and the size is in fact 234? We can't distinguish between
      // that and "group" with size 1234). Use a dummy value for the size.
      // TODO(phajdan.jr): Use a value that means "unknown" instead of 0 bytes.
      entry.size = 0;
    }
    if (entry.size < 0) {
      // Some FTP servers have bugs that cause them to display the file size
      // as negative. They're most likely big files like DVD ISO images.
      // We still want to display them, so just say the real file size
      // is unknown.
      entry.size = -1;
    }
    if (entry.type != FtpDirectoryListingEntry::FILE)
      entry.size = -1;

    if (column_offset == columns.size() - 1) {
      // If the end of the date listing is the last column, there is no file
      // name. Some FTP servers send listing entries with empty names.
      // It's not obvious how to display such an entry, so we ignore them.
      // We don't want to make the parsing fail at this point though.
      // Other entries can still be useful.
      continue;
    }

    entry.name = FtpUtil::GetStringPartAfterColumns(lines[i],
                                                    column_offset + 1);

    if (entry.type == FtpDirectoryListingEntry::SYMLINK) {
      string16::size_type pos = entry.name.rfind(ASCIIToUTF16(" -> "));

      // We don't require the " -> " to be present. Some FTP servers don't send
      // the symlink target, possibly for security reasons.
      if (pos != string16::npos)
        entry.name = entry.name.substr(0, pos);
    }

    entries->push_back(entry);
  }

  return true;
}

}  // namespace net