diff options
author | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-12-03 16:13:03 +0000 |
---|---|---|
committer | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-12-03 16:13:03 +0000 |
commit | 698c61616b5725f94021464d8d4828c54cb88d25 (patch) | |
tree | 8d0f0ed9d24a6e54285d2ac0b2afa161bd88f836 /net/ftp | |
parent | 7c3764b163f9d45d23b46ea238464a54e1ec20bc (diff) | |
download | chromium_src-698c61616b5725f94021464d8d4828c54cb88d25.zip chromium_src-698c61616b5725f94021464d8d4828c54cb88d25.tar.gz chromium_src-698c61616b5725f94021464d8d4828c54cb88d25.tar.bz2 |
Make new FTP LIST parsing code more robust.
Some highlights:
- more tests (including non-ASCII characters in different encodings)
- handling more variants of "ls" listing
- handling spaces in file names in "ls" listings
TEST=Covered by net_unittests.
BUG=25520
Review URL: http://codereview.chromium.org/449011
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@33688 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/ftp')
-rw-r--r-- | net/ftp/ftp_directory_listing_buffer_unittest.cc | 6 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parsers.cc | 99 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parsers_unittest.cc | 9 |
3 files changed, 98 insertions, 16 deletions
diff --git a/net/ftp/ftp_directory_listing_buffer_unittest.cc b/net/ftp/ftp_directory_listing_buffer_unittest.cc index d9da0cf..9aaa16f 100644 --- a/net/ftp/ftp_directory_listing_buffer_unittest.cc +++ b/net/ftp/ftp_directory_listing_buffer_unittest.cc @@ -23,12 +23,18 @@ TEST(FtpDirectoryListingBufferTest, Parse) { "dir-listing-ls-3", "dir-listing-ls-4", "dir-listing-ls-5", + "dir-listing-ls-6", + "dir-listing-ls-7", + "dir-listing-ls-8", + "dir-listing-ls-9", + "dir-listing-ls-10", "dir-listing-windows-1", "dir-listing-windows-2", "dir-listing-vms-1", "dir-listing-vms-2", "dir-listing-vms-3", "dir-listing-vms-4", + "dir-listing-vms-5", }; FilePath test_dir; diff --git a/net/ftp/ftp_directory_listing_parsers.cc b/net/ftp/ftp_directory_listing_parsers.cc index 8963624..a559a69 100644 --- a/net/ftp/ftp_directory_listing_parsers.cc +++ b/net/ftp/ftp_directory_listing_parsers.cc @@ -4,6 +4,8 @@ #include "net/ftp/ftp_directory_listing_parsers.h" +#include <ctype.h> + #include "base/string_util.h" namespace { @@ -12,10 +14,16 @@ bool LooksLikeUnixPermission(const string16& text) { if (text.length() != 3) return false; + // Meaning of the flags: + // r - file is readable + // w - file is writable + // x - file is executable + // s or S - setuid/setgid bit set + // t or T - "sticky" bit set return ((text[0] == 'r' || text[0] == '-') && (text[1] == 'w' || text[1] == '-') && (text[2] == 'x' || text[2] == 's' || text[2] == 'S' || - text[2] == '-')); + text[2] == 't' || text[2] == 'T' || text[2] == '-')); } bool LooksLikeUnixPermissionsListing(const string16& text) { @@ -40,6 +48,22 @@ bool IsStringNonNegativeInteger(const string16& text) { return number >= 0; } +string16 GetStringPartAfterColumns(const string16& text, int columns) { + DCHECK_LE(1, columns); + int columns_so_far = 0; + size_t last = 0; + for (size_t i = 1; i < text.length(); ++i) { + if (!isspace(text[i - 1]) && isspace(text[i])) { + last = i; + if (++columns_so_far == columns) + break; + } + } + string16 result(text.substr(last)); + TrimWhitespace(result, TRIM_ALL, &result); + return result; +} + bool ThreeLetterMonthToNumber(const string16& text, int* number) { const static char* months[] = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" }; @@ -51,12 +75,36 @@ bool ThreeLetterMonthToNumber(const string16& text, int* number) { } } + // Special cases for listings in German (other three-letter month + // abbreviations are the same as in English). Note that we don't need to do + // a case-insensitive compare here. Only "ls -l" style listings may use + // localized month names, and they will always start capitalized. Also, + // converting non-ASCII characters to lowercase would be more complicated. + if (text == UTF8ToUTF16("M\xc3\xa4r")) { + // The full month name is M-(a-umlaut)-rz (March), which is M-(a-umlaut)r + // when abbreviated. + *number = 3; + return true; + } + if (text == ASCIIToUTF16("Mai")) { + *number = 5; + return true; + } + if (text == ASCIIToUTF16("Okt")) { + *number = 10; + return true; + } + if (text == ASCIIToUTF16("Dez")) { + *number = 12; + return true; + } + return false; } bool UnixDateListingToTime(const std::vector<string16>& columns, base::Time* time) { - DCHECK_EQ(9U, columns.size()); + DCHECK_LE(9U, columns.size()); base::Time::Exploded time_exploded = { 0 }; @@ -289,25 +337,38 @@ FtpLsDirectoryListingParser::FtpLsDirectoryListingParser() } bool FtpLsDirectoryListingParser::ConsumeLine(const string16& line) { - // Allow empty lines only at the beginning of the listing. For example VMS - // systems in Unix emulation mode add an empty line before the first listing - // entry. - if (line.empty() && !received_nonempty_line_) + if (StartsWith(line, ASCIIToUTF16("total "), true) || + StartsWith(line, ASCIIToUTF16("Gesamt "), true)) { + // Some FTP servers put a "total n" line at the beginning of the listing + // (n is an integer). Allow such a line, but only once, and only if it's + // the first non-empty line. + // + // Note: "Gesamt" is a German word for "total". The case is important here: + // for "ls -l" style listings, "total" will be lowercase, and Gesamt will be + // capitalized. This helps us distinguish that from a VMS-style listing, + // which would use "Total" (note the uppercase first letter). + + if (received_nonempty_line_) + return false; + + received_nonempty_line_ = true; + return true; + } + if (line.empty() && !received_nonempty_line_) { + // Allow empty lines only at the beginning of the listing. For example VMS + // systems in Unix emulation mode add an empty line before the first listing + // entry. return true; + } received_nonempty_line_ = true; std::vector<string16> columns; SplitString(CollapseWhitespace(line, false), ' ', &columns); - if (columns.size() == 11) { - // Check if it is a symlink. - if (!EqualsASCII(columns[9], "->")) - return false; - - // Drop the symlink target from columns, we don't use it. - columns.resize(9); - } - if (columns.size() != 9) + // We may receive file names containing spaces, which can make the number of + // columns arbitrarily large. We will handle that later. For now just make + // sure we have all the columns that should normally be there. + if (columns.size() < 9) return false; if (!LooksLikeUnixPermissionsListing(columns[0])) @@ -335,7 +396,13 @@ bool FtpLsDirectoryListingParser::ConsumeLine(const string16& line) { if (!UnixDateListingToTime(columns, &entry.last_modified)) return false; - entry.name = columns[8]; + entry.name = GetStringPartAfterColumns(line, 8); + if (entry.type == FtpDirectoryListingEntry::SYMLINK) { + string16::size_type pos = entry.name.rfind(ASCIIToUTF16(" -> ")); + if (pos == string16::npos) + return false; + entry.name = entry.name.substr(0, pos); + } entries_.push(entry); return true; diff --git a/net/ftp/ftp_directory_listing_parsers_unittest.cc b/net/ftp/ftp_directory_listing_parsers_unittest.cc index 80ddc7e..5bb69e7 100644 --- a/net/ftp/ftp_directory_listing_parsers_unittest.cc +++ b/net/ftp/ftp_directory_listing_parsers_unittest.cc @@ -71,6 +71,15 @@ TEST_F(FtpDirectoryListingParsersTest, Ls) { { "drwxrwsr-x 4 501 501 4096 Feb 20 2007 pub", net::FtpDirectoryListingEntry::DIRECTORY, "pub", -1, 2007, 2, 20, 0, 0 }, + { "drwxr-xr-x 4 (?) (?) 4096 Apr 8 2007 jigdo", + net::FtpDirectoryListingEntry::DIRECTORY, "jigdo", -1, + 2007, 4, 8, 0, 0 }, + { "drwx-wx-wt 2 root wheel 512 Jul 1 02:15 incoming", + net::FtpDirectoryListingEntry::DIRECTORY, "incoming", -1, + now_exploded.year, 7, 1, 2, 15 }, + { "-rw-r--r-- 1 2 3 3447432 May 18 2009 Foo - Manual.pdf", + net::FtpDirectoryListingEntry::FILE, "Foo - Manual.pdf", 3447432, + 2009, 5, 18, 0, 0 }, }; for (size_t i = 0; i < arraysize(good_cases); i++) { SCOPED_TRACE(StringPrintf("Test[%" PRIuS "]: %s", i, good_cases[i].input)); |