diff options
author | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-06 08:03:49 +0000 |
---|---|---|
committer | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-06 08:03:49 +0000 |
commit | 27528b197f63c0cb8d71416ffc5065570358be8b (patch) | |
tree | 5c69270196473d64a9a8aed7e9691ca3a33973ce /net/ftp | |
parent | 597787dd161b4959eb35cc9d0b365933fd2b315e (diff) | |
download | chromium_src-27528b197f63c0cb8d71416ffc5065570358be8b.zip chromium_src-27528b197f63c0cb8d71416ffc5065570358be8b.tar.gz chromium_src-27528b197f63c0cb8d71416ffc5065570358be8b.tar.bz2 |
Revert 80587 - FTP: Multiple fixes for localized directory listings:- fix detection of KOI8-R and possibly other encodings- fix parsing Russian month namesWhen detecting the listing encoding, we need to not onlycheck whether the data can be converted using given encoding,but also whether the result can be parsed as a valid directory listing.Also, we only need to compare the first three characters of theabbreviated month name, because that's how they're abbreviatedin FTP directory listings.Finally, the Russian directory listings have swapped the "month" and "day of month" columns.BUG=65917Review URL: http://codereview.chromium.org/6718043
TBR=phajdan.jr@chromium.org
Failures (Windows only, both Vista and XP):
FtpDirectoryListingBufferTest.Parse:
.\ftp\ftp_directory_listing_parser_unittest.cc(133): error: Value of: entry.name
Actual: L""
Expected: UTF8ToUTF16(name)
Which is: L".."
Google Test trace:
.\ftp\ftp_directory_listing_parser_unittest.cc(112): Filename: ..
.\ftp\ftp_directory_listing_parser_unittest.cc(83): Test[25]: dir-listing-ls-25
.\ftp\ftp_directory_listing_parser_unittest.cc(133): error: Value of: entry.name
Actual: L""
Expected: UTF8ToUTF16(name)
Which is: L".message"
Google Test trace:
.\ftp\ftp_directory_listing_parser_unittest.cc(112): Filename: .message
.\ftp\ftp_directory_listing_parser_unittest.cc(83): Test[25]: dir-listing-ls-25
.\ftp\ftp_directory_listing_parser_unittest.cc(133): error: Value of: entry.name
Actual: L""
Expected: UTF8ToUTF16(name)
Which is: L".."
Google Test trace:
.\ftp\ftp_directory_listing_parser_unittest.cc(112): Filename: ..
.\ftp\ftp_directory_listing_parser_unittest.cc(83): Test[26]: dir-listing-ls-26
.\ftp\ftp_directory_listing_parser_unittest.cc(133): error: Value of: entry.name
Actual: L""
Expected: UTF8ToUTF16(name)
Which is: L".message"
Google Test trace:
.\ftp\ftp_directory_listing_parser_unittest.cc(112): Filename: .message
.\ftp\ftp_directory_listing_parser_unittest.cc(83): Test[26]: dir-listing-ls-26
.\ftp\ftp_directory_listing_parser_unittest.cc(133): error: Value of: entry.name
Actual: L""
Expected: UTF8ToUTF16(name)
Which is: L".."
Google Test trace:
.\ftp\ftp_directory_listing_parser_unittest.cc(112): Filename: ..
.\ftp\ftp_directory_listing_parser_unittest.cc(83): Test[27]: dir-listing-ls-27
.\ftp\ftp_directory_listing_parser_unittest.cc(133): error: Value of: entry.name
Actual: L""
Expected: UTF8ToUTF16(name)
Which is: L".message"
Google Test trace:
.\ftp\ftp_directory_listing_parser_unittest.cc(112): Filename: .message
.\ftp\ftp_directory_listing_parser_unittest.cc(83): Test[27]: dir-listing-ls-27
FtpDirectoryListingParserLsTest.Good:
c:\b\build\slave\cr-win-rel\build\src\net/ftp/ftp_directory_listing_parser_unittest.h(47): error: Value of: entry.name
Actual: L""
Expected: UTF8ToUTF16(test_case.filename)
Which is: L"test"
Google Test trace:
.\ftp\ftp_directory_listing_parser_ls_unittest.cc(109): Test[21]: -rwxrwxr-x 1 ftp ftp 123 23 май 2011 test
c:\b\build\slave\cr-win-rel\build\src\net/ftp/ftp_directory_listing_parser_unittest.h(47): error: Value of: entry.name
Actual: L""
Expected: UTF8ToUTF16(test_case.filename)
Which is: L"dir"
Google Test trace:
.\ftp\ftp_directory_listing_parser_ls_unittest.cc(109): Test[22]: drwxrwxr-x 1 ftp ftp 4096 19 окт 2011 dir
Review URL: http://codereview.chromium.org/6802006
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@80590 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/ftp')
-rw-r--r-- | net/ftp/ftp_directory_listing_parser.cc | 114 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parser_ls.cc | 14 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parser_ls_unittest.cc | 13 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parser_unittest.cc | 7 | ||||
-rw-r--r-- | net/ftp/ftp_util.cc | 9 | ||||
-rw-r--r-- | net/ftp/ftp_util_unittest.cc | 19 |
6 files changed, 55 insertions, 121 deletions
diff --git a/net/ftp/ftp_directory_listing_parser.cc b/net/ftp/ftp_directory_listing_parser.cc index 7d47725..8c36bb6 100644 --- a/net/ftp/ftp_directory_listing_parser.cc +++ b/net/ftp/ftp_directory_listing_parser.cc @@ -16,109 +16,93 @@ #include "net/ftp/ftp_directory_listing_parser_windows.h" #include "net/ftp/ftp_server_type_histograms.h" -namespace net { - namespace { -// Fills in |raw_name| for all |entries| using |encoding|. Returns network -// error code. +// Converts a string with unknown character encoding to UTF-16. On success +// fills in |converted_text| and |encoding|. Returns network error code. +int ConvertStringToUTF16(const std::string& text, + string16* converted_text, + std::string* encoding) { + std::vector<std::string> encodings; + if (!base::DetectAllEncodings(text, &encodings)) + return net::ERR_ENCODING_DETECTION_FAILED; + + // Use first encoding that can be used to decode the text. + for (size_t i = 0; i < encodings.size(); i++) { + if (base::CodepageToUTF16(text, + encodings[i].c_str(), + base::OnStringConversionError::FAIL, + converted_text)) { + *encoding = encodings[i]; + return net::OK; + } + } + + return net::ERR_ENCODING_DETECTION_FAILED; +} + int FillInRawName(const std::string& encoding, - std::vector<FtpDirectoryListingEntry>* entries) { + std::vector<net::FtpDirectoryListingEntry>* entries) { for (size_t i = 0; i < entries->size(); i++) { if (!base::UTF16ToCodepage(entries->at(i).name, encoding.c_str(), base::OnStringConversionError::FAIL, &entries->at(i).raw_name)) { - return ERR_ENCODING_CONVERSION_FAILED; + return net::ERR_ENCODING_CONVERSION_FAILED; } } - return OK; + return net::OK; } -// Parses |text| as an FTP directory listing. Fills in |entries| -// and |server_type| and returns network error code. -int ParseListing(const string16& text, - const std::string& encoding, - const base::Time& current_time, - std::vector<FtpDirectoryListingEntry>* entries, - FtpServerType* server_type) { +} // namespace + +namespace net { + +FtpDirectoryListingEntry::FtpDirectoryListingEntry() { +} + +int ParseFtpDirectoryListing(const std::string& text, + const base::Time& current_time, + std::vector<FtpDirectoryListingEntry>* entries) { + std::string encoding; + + string16 converted_text; + int rv = ConvertStringToUTF16(text, &converted_text, &encoding); + if (rv != OK) + return rv; + std::vector<string16> lines; - base::SplitString(text, '\n', &lines); + base::SplitString(converted_text, '\n', &lines); // TODO(phajdan.jr): Use a table of callbacks instead of repeating code. entries->clear(); if (ParseFtpDirectoryListingLs(lines, current_time, entries)) { - *server_type = SERVER_LS; + UpdateFtpServerTypeHistograms(SERVER_LS); return FillInRawName(encoding, entries); } entries->clear(); if (ParseFtpDirectoryListingWindows(lines, entries)) { - *server_type = SERVER_WINDOWS; + UpdateFtpServerTypeHistograms(SERVER_WINDOWS); return FillInRawName(encoding, entries); } entries->clear(); if (ParseFtpDirectoryListingVms(lines, entries)) { - *server_type = SERVER_VMS; + UpdateFtpServerTypeHistograms(SERVER_VMS); return FillInRawName(encoding, entries); } entries->clear(); if (ParseFtpDirectoryListingNetware(lines, current_time, entries)) { - *server_type = SERVER_NETWARE; + UpdateFtpServerTypeHistograms(SERVER_NETWARE); return FillInRawName(encoding, entries); } entries->clear(); - return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT; -} - -// Detects encoding of |text| and parses it as an FTP directory listing. -// Fills in |entries| and |server_type| and returns network error code. -int DecodeAndParse(const std::string& text, - const base::Time& current_time, - std::vector<FtpDirectoryListingEntry>* entries, - FtpServerType* server_type) { - std::vector<std::string> encodings; - if (!base::DetectAllEncodings(text, &encodings)) - return ERR_ENCODING_DETECTION_FAILED; - - // Use first encoding that can be used to decode the text. - for (size_t i = 0; i < encodings.size(); i++) { - string16 converted_text; - if (base::CodepageToUTF16(text, - encodings[i].c_str(), - base::OnStringConversionError::FAIL, - &converted_text)) { - int rv = ParseListing(converted_text, - encodings[i], - current_time, - entries, - server_type); - if (rv == OK) - return rv; - } - } - - entries->clear(); - *server_type = SERVER_UNKNOWN; + UpdateFtpServerTypeHistograms(SERVER_UNKNOWN); return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT; } } // namespace - -FtpDirectoryListingEntry::FtpDirectoryListingEntry() { -} - -int ParseFtpDirectoryListing(const std::string& text, - const base::Time& current_time, - std::vector<FtpDirectoryListingEntry>* entries) { - FtpServerType server_type = SERVER_UNKNOWN; - int rv = DecodeAndParse(text, current_time, entries, &server_type); - UpdateFtpServerTypeHistograms(server_type); - return rv; -} - -} // namespace net diff --git a/net/ftp/ftp_directory_listing_parser_ls.cc b/net/ftp/ftp_directory_listing_parser_ls.cc index f7ad6ac..9d637a8 100644 --- a/net/ftp/ftp_directory_listing_parser_ls.cc +++ b/net/ftp/ftp_directory_listing_parser_ls.cc @@ -98,20 +98,6 @@ bool DetectColumnOffsetAndModificationTime(const std::vector<string16>& columns, } } - // Some FTP listings have swapped the "month" and "day of month" columns - // (for example Russian listings). We try to recognize them only after making - // sure no column offset works above (this is a more strict way). - for (size_t i = 5U; i < columns.size(); i++) { - if (net::FtpUtil::LsDateListingToTime(columns[i - 1], - columns[i - 2], - columns[i], - current_time, - modification_time)) { - *offset = i; - return true; - } - } - return false; } diff --git a/net/ftp/ftp_directory_listing_parser_ls_unittest.cc b/net/ftp/ftp_directory_listing_parser_ls_unittest.cc index c05e691..0414eb9 100644 --- a/net/ftp/ftp_directory_listing_parser_ls_unittest.cc +++ b/net/ftp/ftp_directory_listing_parser_ls_unittest.cc @@ -44,7 +44,7 @@ TEST_F(FtpDirectoryListingParserLsTest, Good) { { "d-wx-wx-wt+ 4 ftp 989 512 Dec 8 15:54 incoming", FtpDirectoryListingEntry::DIRECTORY, "incoming", -1, 1993, 12, 8, 15, 54 }, - { "drwxrwxrwx 1 owner group 1024 Sep 13 0:30 audio", + { "drwxrwxrwx 1 owner group 0 Sep 13 0:30 audio", FtpDirectoryListingEntry::DIRECTORY, "audio", -1, 1994, 9, 13, 0, 30 }, { "lrwxrwxrwx 1 0 0 26 Sep 18 2008 pub", @@ -94,15 +94,6 @@ TEST_F(FtpDirectoryListingParserLsTest, Good) { { "drwxrwxr-x 3 %%%% Domain Users 4096 Dec 9 2009 %%%%%", net::FtpDirectoryListingEntry::DIRECTORY, "%%%%%", -1, 2009, 12, 9, 0, 0 }, - - // Tests for "ls -l" style listing in Russian locale (note the swapped - // parts order: the day of month is the first, before month). - { "-rwxrwxr-x 1 ftp ftp 123 23 \xd0\xbc\xd0\xb0\xd0\xb9 2011 test", - net::FtpDirectoryListingEntry::FILE, "test", 123, - 2011, 5, 23, 0, 0 }, - { "drwxrwxr-x 1 ftp ftp 4096 19 \xd0\xbe\xd0\xba\xd1\x82 2011 dir", - net::FtpDirectoryListingEntry::DIRECTORY, "dir", -1, - 2011, 10, 19, 0, 0 }, }; for (size_t i = 0; i < arraysize(good_cases); i++) { SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s", i, @@ -159,7 +150,7 @@ TEST_F(FtpDirectoryListingParserLsTest, Bad) { "qrwwr--r-- 1 ftp ftp 528 Nov 01 2007 README", "-rw-r--r-- 1 ftp ftp -528 Nov 01 2007 README", "-rw-r--r-- 1 ftp ftp 528 Foo 01 2007 README", - "drwxrwxrwx 1 owner group 1024 Sep 13 0:3 audio", + "drwxrwxrwx 1 owner group 0 Sep 13 0:3 audio", "-qqqqqqqqq+ 2 sys 512 Mar 27 2009 pub", }; diff --git a/net/ftp/ftp_directory_listing_parser_unittest.cc b/net/ftp/ftp_directory_listing_parser_unittest.cc index 6664a89..b8f0851 100644 --- a/net/ftp/ftp_directory_listing_parser_unittest.cc +++ b/net/ftp/ftp_directory_listing_parser_unittest.cc @@ -46,13 +46,6 @@ TEST(FtpDirectoryListingBufferTest, Parse) { "dir-listing-ls-22", // TODO(phajdan.jr): should use windows-1251 encoding. "dir-listing-ls-23", "dir-listing-ls-24", - - // Tests for Russian listings. The only difference between those - // files is character encoding: - "dir-listing-ls-25", // UTF-8 - "dir-listing-ls-26", // KOI8-R - "dir-listing-ls-27", // windows-1251 - "dir-listing-netware-1", "dir-listing-netware-2", "dir-listing-vms-1", diff --git a/net/ftp/ftp_util.cc b/net/ftp/ftp_util.cc index f96fab5..6c7959f 100644 --- a/net/ftp/ftp_util.cc +++ b/net/ftp/ftp_util.cc @@ -137,12 +137,7 @@ bool FtpUtil::AbbreviatedMonthToNumber(const string16& text, int* number) { // An alternative solution (to parse |text| in given locale) is more // lenient, and may accept more than we want even with setLenient(false). for (int32_t month = 0; month < months_count; month++) { - // Compare (case-insensitive), but just first three characters. Sometimes - // ICU returns longer strings (for example for Russian locale), and in FTP - // listings they are abbreviated to just three characters. - // Note: ICU may also return strings shorter than three characters, - // and those also should be accepted. - if (months[month].caseCompare(0, 3, unicode_text, 0) == 0) { + if (months[month].caseCompare(unicode_text, 0) == 0) { *number = month + 1; return true; } @@ -164,8 +159,6 @@ bool FtpUtil::LsDateListingToTime(const string16& month, const string16& day, if (!base::StringToInt(day, &time_exploded.day_of_month)) return false; - if (time_exploded.day_of_month > 31) - return false; if (!base::StringToInt(rest, &time_exploded.year)) { // Maybe it's time. Does it look like time (HH:MM)? diff --git a/net/ftp/ftp_util_unittest.cc b/net/ftp/ftp_util_unittest.cc index 4f26817..98ae975 100644 --- a/net/ftp/ftp_util_unittest.cc +++ b/net/ftp/ftp_util_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -125,26 +125,13 @@ TEST(FtpUtilTest, LsDateListingToTime) { { "Nov", "01", "2007", 2007, 11, 1, 0, 0 }, { "Jul", "25", "13:37", 1994, 7, 25, 13, 37 }, - // Test date listings in German. + // Test date listings in German, we should support them for FTP servers + // giving localized listings. { "M\xc3\xa4r", "13", "2009", 2009, 3, 13, 0, 0 }, { "Mai", "1", "10:10", 1994, 5, 1, 10, 10 }, { "Okt", "14", "21:18", 1994, 10, 14, 21, 18 }, { "Dez", "25", "2008", 2008, 12, 25, 0, 0 }, - // Test date listings in Russian. - { "\xd1\x8f\xd0\xbd\xd0\xb2", "1", "2011", 2011, 1, 1, 0, 0 }, - { "\xd1\x84\xd0\xb5\xd0\xb2", "1", "2011", 2011, 2, 1, 0, 0 }, - { "\xd0\xbc\xd0\xb0\xd1\x80", "1", "2011", 2011, 3, 1, 0, 0 }, - { "\xd0\xb0\xd0\xbf\xd1\x80", "1", "2011", 2011, 4, 1, 0, 0 }, - { "\xd0\xbc\xd0\xb0\xd0\xb9", "1", "2011", 2011, 5, 1, 0, 0 }, - { "\xd0\xb8\xd1\x8e\xd0\xbd", "1", "2011", 2011, 6, 1, 0, 0 }, - { "\xd0\xb8\xd1\x8e\xd0\xbb", "1", "2011", 2011, 7, 1, 0, 0 }, - { "\xd0\xb0\xd0\xb2\xd0\xb3", "1", "2011", 2011, 8, 1, 0, 0 }, - { "\xd1\x81\xd0\xb5\xd0\xbd", "1", "2011", 2011, 9, 1, 0, 0 }, - { "\xd0\xbe\xd0\xba\xd1\x82", "1", "2011", 2011, 10, 1, 0, 0 }, - { "\xd0\xbd\xd0\xbe\xd1\x8f", "1", "2011", 2011, 11, 1, 0, 0 }, - { "\xd0\xb4\xd0\xb5\xd0\xba", "1", "2011", 2011, 12, 1, 0, 0 }, - // Test current year detection. { "Nov", "01", "12:00", 1994, 11, 1, 12, 0 }, { "Nov", "15", "12:00", 1994, 11, 15, 12, 0 }, |