diff options
author | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-06 07:26:52 +0000 |
---|---|---|
committer | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-06 07:26:52 +0000 |
commit | da322acaebe30d84d9d8b6931752ab27333c3d36 (patch) | |
tree | c47effc5bec77fd099af537193fd0bff13a9629a /net | |
parent | 8ceea64ea9e45f97f2bac1df84e770ee23cca0e1 (diff) | |
download | chromium_src-da322acaebe30d84d9d8b6931752ab27333c3d36.zip chromium_src-da322acaebe30d84d9d8b6931752ab27333c3d36.tar.gz chromium_src-da322acaebe30d84d9d8b6931752ab27333c3d36.tar.bz2 |
FTP: Multiple fixes for localized directory listings:
- fix detection of KOI8-R and possibly other encodings
- fix parsing Russian month names
When detecting the listing encoding, we need to not only
check whether the data can be converted using given encoding,
but also whether the result can be parsed as a valid directory listing.
Also, we only need to compare the first three characters of the
abbreviated month name, because that's how they're abbreviated
in FTP directory listings.
Finally, the Russian directory listings have swapped the "month" and "day of month" columns.
BUG=65917
Review URL: http://codereview.chromium.org/6718043
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@80587 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/data/ftp/dir-listing-ls-25 | 6 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-25.expected | 53 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-26 | 6 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-26.expected | 53 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-27 | 6 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-27.expected | 53 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parser.cc | 114 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parser_ls.cc | 14 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parser_ls_unittest.cc | 13 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parser_unittest.cc | 7 | ||||
-rw-r--r-- | net/ftp/ftp_util.cc | 9 | ||||
-rw-r--r-- | net/ftp/ftp_util_unittest.cc | 19 |
12 files changed, 298 insertions, 55 deletions
diff --git a/net/data/ftp/dir-listing-ls-25 b/net/data/ftp/dir-listing-ls-25 new file mode 100644 index 0000000..7f36b14 --- /dev/null +++ b/net/data/ftp/dir-listing-ls-25 @@ -0,0 +1,6 @@ +drwxr-xr-x 3 ftp ftp 4096 15 апр 18:11 .
+drwxr-xr-x 3 ftp ftp 4096 15 июл 18:11 ..
+-rw-r--r-- 1 ftp ftp 528 01 май 2007 .message
+-rw-r--r-- 1 ftp ftp 528 01 ноя 2007 README
+-rw-r--r-- 1 ftp ftp 560 28 сен 2007 index.html
+drwxr-xr-x 33 ftp ftp 4096 12 фев 2008 pub
diff --git a/net/data/ftp/dir-listing-ls-25.expected b/net/data/ftp/dir-listing-ls-25.expected new file mode 100644 index 0000000..3405f86 --- /dev/null +++ b/net/data/ftp/dir-listing-ls-25.expected @@ -0,0 +1,53 @@ +d +. +-1 +1994 +4 +15 +18 +11 + +d +.. +-1 +1994 +7 +15 +18 +11 + +- +.message +528 +2007 +5 +1 +0 +0 + +- +README +528 +2007 +11 +1 +0 +0 + +- +index.html +560 +2007 +9 +28 +0 +0 + +d +pub +-1 +2008 +2 +12 +0 +0 diff --git a/net/data/ftp/dir-listing-ls-26 b/net/data/ftp/dir-listing-ls-26 new file mode 100644 index 0000000..73161af --- /dev/null +++ b/net/data/ftp/dir-listing-ls-26 @@ -0,0 +1,6 @@ +drwxr-xr-x 3 ftp ftp 4096 15 18:11 .
+drwxr-xr-x 3 ftp ftp 4096 15 18:11 ..
+-rw-r--r-- 1 ftp ftp 528 01 2007 .message
+-rw-r--r-- 1 ftp ftp 528 01 2007 README
+-rw-r--r-- 1 ftp ftp 560 28 2007 index.html
+drwxr-xr-x 33 ftp ftp 4096 12 2008 pub
diff --git a/net/data/ftp/dir-listing-ls-26.expected b/net/data/ftp/dir-listing-ls-26.expected new file mode 100644 index 0000000..3405f86 --- /dev/null +++ b/net/data/ftp/dir-listing-ls-26.expected @@ -0,0 +1,53 @@ +d +. +-1 +1994 +4 +15 +18 +11 + +d +.. +-1 +1994 +7 +15 +18 +11 + +- +.message +528 +2007 +5 +1 +0 +0 + +- +README +528 +2007 +11 +1 +0 +0 + +- +index.html +560 +2007 +9 +28 +0 +0 + +d +pub +-1 +2008 +2 +12 +0 +0 diff --git a/net/data/ftp/dir-listing-ls-27 b/net/data/ftp/dir-listing-ls-27 new file mode 100644 index 0000000..eec958e --- /dev/null +++ b/net/data/ftp/dir-listing-ls-27 @@ -0,0 +1,6 @@ +drwxr-xr-x 3 ftp ftp 4096 15 18:11 .
+drwxr-xr-x 3 ftp ftp 4096 15 18:11 ..
+-rw-r--r-- 1 ftp ftp 528 01 2007 .message
+-rw-r--r-- 1 ftp ftp 528 01 2007 README
+-rw-r--r-- 1 ftp ftp 560 28 2007 index.html
+drwxr-xr-x 33 ftp ftp 4096 12 2008 pub
diff --git a/net/data/ftp/dir-listing-ls-27.expected b/net/data/ftp/dir-listing-ls-27.expected new file mode 100644 index 0000000..3405f86 --- /dev/null +++ b/net/data/ftp/dir-listing-ls-27.expected @@ -0,0 +1,53 @@ +d +. +-1 +1994 +4 +15 +18 +11 + +d +.. +-1 +1994 +7 +15 +18 +11 + +- +.message +528 +2007 +5 +1 +0 +0 + +- +README +528 +2007 +11 +1 +0 +0 + +- +index.html +560 +2007 +9 +28 +0 +0 + +d +pub +-1 +2008 +2 +12 +0 +0 diff --git a/net/ftp/ftp_directory_listing_parser.cc b/net/ftp/ftp_directory_listing_parser.cc index 8c36bb6..7d47725 100644 --- a/net/ftp/ftp_directory_listing_parser.cc +++ b/net/ftp/ftp_directory_listing_parser.cc @@ -16,93 +16,109 @@ #include "net/ftp/ftp_directory_listing_parser_windows.h" #include "net/ftp/ftp_server_type_histograms.h" -namespace { - -// Converts a string with unknown character encoding to UTF-16. On success -// fills in |converted_text| and |encoding|. Returns network error code. -int ConvertStringToUTF16(const std::string& text, - string16* converted_text, - std::string* encoding) { - std::vector<std::string> encodings; - if (!base::DetectAllEncodings(text, &encodings)) - return net::ERR_ENCODING_DETECTION_FAILED; - - // Use first encoding that can be used to decode the text. - for (size_t i = 0; i < encodings.size(); i++) { - if (base::CodepageToUTF16(text, - encodings[i].c_str(), - base::OnStringConversionError::FAIL, - converted_text)) { - *encoding = encodings[i]; - return net::OK; - } - } +namespace net { - return net::ERR_ENCODING_DETECTION_FAILED; -} +namespace { +// Fills in |raw_name| for all |entries| using |encoding|. Returns network +// error code. int FillInRawName(const std::string& encoding, - std::vector<net::FtpDirectoryListingEntry>* entries) { + std::vector<FtpDirectoryListingEntry>* entries) { for (size_t i = 0; i < entries->size(); i++) { if (!base::UTF16ToCodepage(entries->at(i).name, encoding.c_str(), base::OnStringConversionError::FAIL, &entries->at(i).raw_name)) { - return net::ERR_ENCODING_CONVERSION_FAILED; + return ERR_ENCODING_CONVERSION_FAILED; } } - return net::OK; + return OK; } -} // namespace - -namespace net { - -FtpDirectoryListingEntry::FtpDirectoryListingEntry() { -} - -int ParseFtpDirectoryListing(const std::string& text, - const base::Time& current_time, - std::vector<FtpDirectoryListingEntry>* entries) { - std::string encoding; - - string16 converted_text; - int rv = ConvertStringToUTF16(text, &converted_text, &encoding); - if (rv != OK) - return rv; - +// Parses |text| as an FTP directory listing. Fills in |entries| +// and |server_type| and returns network error code. +int ParseListing(const string16& text, + const std::string& encoding, + const base::Time& current_time, + std::vector<FtpDirectoryListingEntry>* entries, + FtpServerType* server_type) { std::vector<string16> lines; - base::SplitString(converted_text, '\n', &lines); + base::SplitString(text, '\n', &lines); // TODO(phajdan.jr): Use a table of callbacks instead of repeating code. entries->clear(); if (ParseFtpDirectoryListingLs(lines, current_time, entries)) { - UpdateFtpServerTypeHistograms(SERVER_LS); + *server_type = SERVER_LS; return FillInRawName(encoding, entries); } entries->clear(); if (ParseFtpDirectoryListingWindows(lines, entries)) { - UpdateFtpServerTypeHistograms(SERVER_WINDOWS); + *server_type = SERVER_WINDOWS; return FillInRawName(encoding, entries); } entries->clear(); if (ParseFtpDirectoryListingVms(lines, entries)) { - UpdateFtpServerTypeHistograms(SERVER_VMS); + *server_type = SERVER_VMS; return FillInRawName(encoding, entries); } entries->clear(); if (ParseFtpDirectoryListingNetware(lines, current_time, entries)) { - UpdateFtpServerTypeHistograms(SERVER_NETWARE); + *server_type = SERVER_NETWARE; return FillInRawName(encoding, entries); } entries->clear(); - UpdateFtpServerTypeHistograms(SERVER_UNKNOWN); + return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT; +} + +// Detects encoding of |text| and parses it as an FTP directory listing. +// Fills in |entries| and |server_type| and returns network error code. +int DecodeAndParse(const std::string& text, + const base::Time& current_time, + std::vector<FtpDirectoryListingEntry>* entries, + FtpServerType* server_type) { + std::vector<std::string> encodings; + if (!base::DetectAllEncodings(text, &encodings)) + return ERR_ENCODING_DETECTION_FAILED; + + // Use first encoding that can be used to decode the text. + for (size_t i = 0; i < encodings.size(); i++) { + string16 converted_text; + if (base::CodepageToUTF16(text, + encodings[i].c_str(), + base::OnStringConversionError::FAIL, + &converted_text)) { + int rv = ParseListing(converted_text, + encodings[i], + current_time, + entries, + server_type); + if (rv == OK) + return rv; + } + } + + entries->clear(); + *server_type = SERVER_UNKNOWN; return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT; } } // namespace + +FtpDirectoryListingEntry::FtpDirectoryListingEntry() { +} + +int ParseFtpDirectoryListing(const std::string& text, + const base::Time& current_time, + std::vector<FtpDirectoryListingEntry>* entries) { + FtpServerType server_type = SERVER_UNKNOWN; + int rv = DecodeAndParse(text, current_time, entries, &server_type); + UpdateFtpServerTypeHistograms(server_type); + return rv; +} + +} // namespace net diff --git a/net/ftp/ftp_directory_listing_parser_ls.cc b/net/ftp/ftp_directory_listing_parser_ls.cc index 9d637a8..f7ad6ac 100644 --- a/net/ftp/ftp_directory_listing_parser_ls.cc +++ b/net/ftp/ftp_directory_listing_parser_ls.cc @@ -98,6 +98,20 @@ bool DetectColumnOffsetAndModificationTime(const std::vector<string16>& columns, } } + // Some FTP listings have swapped the "month" and "day of month" columns + // (for example Russian listings). We try to recognize them only after making + // sure no column offset works above (this is a more strict way). + for (size_t i = 5U; i < columns.size(); i++) { + if (net::FtpUtil::LsDateListingToTime(columns[i - 1], + columns[i - 2], + columns[i], + current_time, + modification_time)) { + *offset = i; + return true; + } + } + return false; } diff --git a/net/ftp/ftp_directory_listing_parser_ls_unittest.cc b/net/ftp/ftp_directory_listing_parser_ls_unittest.cc index 0414eb9..c05e691 100644 --- a/net/ftp/ftp_directory_listing_parser_ls_unittest.cc +++ b/net/ftp/ftp_directory_listing_parser_ls_unittest.cc @@ -44,7 +44,7 @@ TEST_F(FtpDirectoryListingParserLsTest, Good) { { "d-wx-wx-wt+ 4 ftp 989 512 Dec 8 15:54 incoming", FtpDirectoryListingEntry::DIRECTORY, "incoming", -1, 1993, 12, 8, 15, 54 }, - { "drwxrwxrwx 1 owner group 0 Sep 13 0:30 audio", + { "drwxrwxrwx 1 owner group 1024 Sep 13 0:30 audio", FtpDirectoryListingEntry::DIRECTORY, "audio", -1, 1994, 9, 13, 0, 30 }, { "lrwxrwxrwx 1 0 0 26 Sep 18 2008 pub", @@ -94,6 +94,15 @@ TEST_F(FtpDirectoryListingParserLsTest, Good) { { "drwxrwxr-x 3 %%%% Domain Users 4096 Dec 9 2009 %%%%%", net::FtpDirectoryListingEntry::DIRECTORY, "%%%%%", -1, 2009, 12, 9, 0, 0 }, + + // Tests for "ls -l" style listing in Russian locale (note the swapped + // parts order: the day of month is the first, before month). + { "-rwxrwxr-x 1 ftp ftp 123 23 \xd0\xbc\xd0\xb0\xd0\xb9 2011 test", + net::FtpDirectoryListingEntry::FILE, "test", 123, + 2011, 5, 23, 0, 0 }, + { "drwxrwxr-x 1 ftp ftp 4096 19 \xd0\xbe\xd0\xba\xd1\x82 2011 dir", + net::FtpDirectoryListingEntry::DIRECTORY, "dir", -1, + 2011, 10, 19, 0, 0 }, }; for (size_t i = 0; i < arraysize(good_cases); i++) { SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s", i, @@ -150,7 +159,7 @@ TEST_F(FtpDirectoryListingParserLsTest, Bad) { "qrwwr--r-- 1 ftp ftp 528 Nov 01 2007 README", "-rw-r--r-- 1 ftp ftp -528 Nov 01 2007 README", "-rw-r--r-- 1 ftp ftp 528 Foo 01 2007 README", - "drwxrwxrwx 1 owner group 0 Sep 13 0:3 audio", + "drwxrwxrwx 1 owner group 1024 Sep 13 0:3 audio", "-qqqqqqqqq+ 2 sys 512 Mar 27 2009 pub", }; diff --git a/net/ftp/ftp_directory_listing_parser_unittest.cc b/net/ftp/ftp_directory_listing_parser_unittest.cc index b8f0851..6664a89 100644 --- a/net/ftp/ftp_directory_listing_parser_unittest.cc +++ b/net/ftp/ftp_directory_listing_parser_unittest.cc @@ -46,6 +46,13 @@ TEST(FtpDirectoryListingBufferTest, Parse) { "dir-listing-ls-22", // TODO(phajdan.jr): should use windows-1251 encoding. "dir-listing-ls-23", "dir-listing-ls-24", + + // Tests for Russian listings. The only difference between those + // files is character encoding: + "dir-listing-ls-25", // UTF-8 + "dir-listing-ls-26", // KOI8-R + "dir-listing-ls-27", // windows-1251 + "dir-listing-netware-1", "dir-listing-netware-2", "dir-listing-vms-1", diff --git a/net/ftp/ftp_util.cc b/net/ftp/ftp_util.cc index 6c7959f..f96fab5 100644 --- a/net/ftp/ftp_util.cc +++ b/net/ftp/ftp_util.cc @@ -137,7 +137,12 @@ bool FtpUtil::AbbreviatedMonthToNumber(const string16& text, int* number) { // An alternative solution (to parse |text| in given locale) is more // lenient, and may accept more than we want even with setLenient(false). for (int32_t month = 0; month < months_count; month++) { - if (months[month].caseCompare(unicode_text, 0) == 0) { + // Compare (case-insensitive), but just first three characters. Sometimes + // ICU returns longer strings (for example for Russian locale), and in FTP + // listings they are abbreviated to just three characters. + // Note: ICU may also return strings shorter than three characters, + // and those also should be accepted. + if (months[month].caseCompare(0, 3, unicode_text, 0) == 0) { *number = month + 1; return true; } @@ -159,6 +164,8 @@ bool FtpUtil::LsDateListingToTime(const string16& month, const string16& day, if (!base::StringToInt(day, &time_exploded.day_of_month)) return false; + if (time_exploded.day_of_month > 31) + return false; if (!base::StringToInt(rest, &time_exploded.year)) { // Maybe it's time. Does it look like time (HH:MM)? diff --git a/net/ftp/ftp_util_unittest.cc b/net/ftp/ftp_util_unittest.cc index 98ae975..4f26817 100644 --- a/net/ftp/ftp_util_unittest.cc +++ b/net/ftp/ftp_util_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -125,13 +125,26 @@ TEST(FtpUtilTest, LsDateListingToTime) { { "Nov", "01", "2007", 2007, 11, 1, 0, 0 }, { "Jul", "25", "13:37", 1994, 7, 25, 13, 37 }, - // Test date listings in German, we should support them for FTP servers - // giving localized listings. + // Test date listings in German. { "M\xc3\xa4r", "13", "2009", 2009, 3, 13, 0, 0 }, { "Mai", "1", "10:10", 1994, 5, 1, 10, 10 }, { "Okt", "14", "21:18", 1994, 10, 14, 21, 18 }, { "Dez", "25", "2008", 2008, 12, 25, 0, 0 }, + // Test date listings in Russian. + { "\xd1\x8f\xd0\xbd\xd0\xb2", "1", "2011", 2011, 1, 1, 0, 0 }, + { "\xd1\x84\xd0\xb5\xd0\xb2", "1", "2011", 2011, 2, 1, 0, 0 }, + { "\xd0\xbc\xd0\xb0\xd1\x80", "1", "2011", 2011, 3, 1, 0, 0 }, + { "\xd0\xb0\xd0\xbf\xd1\x80", "1", "2011", 2011, 4, 1, 0, 0 }, + { "\xd0\xbc\xd0\xb0\xd0\xb9", "1", "2011", 2011, 5, 1, 0, 0 }, + { "\xd0\xb8\xd1\x8e\xd0\xbd", "1", "2011", 2011, 6, 1, 0, 0 }, + { "\xd0\xb8\xd1\x8e\xd0\xbb", "1", "2011", 2011, 7, 1, 0, 0 }, + { "\xd0\xb0\xd0\xb2\xd0\xb3", "1", "2011", 2011, 8, 1, 0, 0 }, + { "\xd1\x81\xd0\xb5\xd0\xbd", "1", "2011", 2011, 9, 1, 0, 0 }, + { "\xd0\xbe\xd0\xba\xd1\x82", "1", "2011", 2011, 10, 1, 0, 0 }, + { "\xd0\xbd\xd0\xbe\xd1\x8f", "1", "2011", 2011, 11, 1, 0, 0 }, + { "\xd0\xb4\xd0\xb5\xd0\xba", "1", "2011", 2011, 12, 1, 0, 0 }, + // Test current year detection. { "Nov", "01", "12:00", 1994, 11, 1, 12, 0 }, { "Nov", "15", "12:00", 1994, 11, 15, 12, 0 }, |