diff options
author | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-11-18 19:50:44 +0000 |
---|---|---|
committer | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-11-18 19:50:44 +0000 |
commit | 1e61db506aa4c0014d384c2d990525be38cdbc60 (patch) | |
tree | 05b6c855ad25c7c541423d85427643d9ae87fb47 | |
parent | 6f6b0041b543f97fab16548168010c2ae799c688 (diff) | |
download | chromium_src-1e61db506aa4c0014d384c2d990525be38cdbc60.zip chromium_src-1e61db506aa4c0014d384c2d990525be38cdbc60.tar.gz chromium_src-1e61db506aa4c0014d384c2d990525be38cdbc60.tar.bz2 |
FTP: improve character encoding detection in cases where ICU's first guess is wrong.
Instead of using ICU's first guessed encoding immediately,
we ask it for all possible encodings, try them in order,
and use the first one that works.
For some sites this still results in a gibberish being displayed,
but at least the links are clickable so the site navigation
is possible.
BUG=61073
TEST=see bug
Review URL: http://codereview.chromium.org/4967001
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@66664 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | base/i18n/icu_encoding_detection.cc | 36 | ||||
-rw-r--r-- | base/i18n/icu_encoding_detection.h | 6 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-20 | 18 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-20.expected | 161 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-21 | 27 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-21.expected | 242 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-22 | 32 | ||||
-rw-r--r-- | net/data/ftp/dir-listing-ls-22.expected | 287 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_buffer.cc | 67 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_buffer.h | 26 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_buffer_unittest.cc | 3 |
11 files changed, 871 insertions, 34 deletions
diff --git a/base/i18n/icu_encoding_detection.cc b/base/i18n/icu_encoding_detection.cc index 55785c5..d579af2 100644 --- a/base/i18n/icu_encoding_detection.cc +++ b/base/i18n/icu_encoding_detection.cc @@ -9,8 +9,6 @@ namespace base { -// TODO(jungshik): We can apply more heuristics here (e.g. using various hints -// like TLD, the UI language/default encoding of a client, etc). bool DetectEncoding(const std::string& text, std::string* encoding) { if (IsStringASCII(text)) { *encoding = std::string(); @@ -21,9 +19,6 @@ bool DetectEncoding(const std::string& text, std::string* encoding) { UCharsetDetector* detector = ucsdet_open(&status); ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()), &status); - // TODO(jungshik): Should we check the quality of the match? A rather - // arbitrary number is assigned by ICU and it's hard to come up with - // a lower limit. const UCharsetMatch* match = ucsdet_detect(detector, &status); const char* detected_encoding = ucsdet_getName(match, &status); ucsdet_close(detector); @@ -35,4 +30,35 @@ bool DetectEncoding(const std::string& text, std::string* encoding) { return true; } +bool DetectAllEncodings(const std::string& text, + std::vector<std::string>* encodings) { + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector* detector = ucsdet_open(&status); + ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()), + &status); + int matches_count = 0; + const UCharsetMatch** matches = ucsdet_detectAll(detector, + &matches_count, + &status); + if (U_FAILURE(status)) { + ucsdet_close(detector); + return false; + } + + encodings->clear(); + for (int i = 0; i < matches_count; i++) { + UErrorCode get_name_status = U_ZERO_ERROR; + const char* encoding_name = ucsdet_getName(matches[i], &get_name_status); + + // If we failed to get the encoding's name, ignore the error. + if (U_FAILURE(get_name_status)) + continue; + + encodings->push_back(encoding_name); + } + + ucsdet_close(detector); + return !encodings->empty(); +} + } // namespace base diff --git a/base/i18n/icu_encoding_detection.h b/base/i18n/icu_encoding_detection.h index e7e6253..cdc4cb7 100644 --- a/base/i18n/icu_encoding_detection.h +++ b/base/i18n/icu_encoding_detection.h @@ -7,6 +7,7 @@ #pragma once #include <string> +#include <vector> namespace base { @@ -15,6 +16,11 @@ namespace base { // Returns true on success. bool DetectEncoding(const std::string& text, std::string* encoding); +// Detect all possible encodings of |text| and put their names +// (as returned by ICU) in |encodings|. Returns true on success. +bool DetectAllEncodings(const std::string& text, + std::vector<std::string>* encodings); + } // namespace base #endif // BASE_I18N_ICU_ENCODING_DETECTION_H_ diff --git a/net/data/ftp/dir-listing-ls-20 b/net/data/ftp/dir-listing-ls-20 new file mode 100644 index 0000000..18d5bb2 --- /dev/null +++ b/net/data/ftp/dir-listing-ls-20 @@ -0,0 +1,18 @@ +drwxrwxr-x 17 ftp ftp 4096 Nov 01 16:27 .
+drwxr-xr-x 7 ftp ftp 4096 Apr 03 2010 ..
+drwxrwxrwx 5 ftp ftp 4096 May 26 15:51 2012_-_2012-(2009)-[1080p]_[BD]
+-rw-rw-rw- 1 ftp ftp 4931 Jun 08 15:24 _READ_ME.txt
+drwxrwxrwx 5 ftp ftp 4096 Nov 01 16:27 __-_Face_Off-(1997)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 Jan 22 2010 _-_Up-(2009)-[1080p]_[BD]
+drwxrwxrwx 5 ftp ftp 4096 May 27 18:12 __-_Total_Recall-(1990)-[1080p]_[BD]
+drwxrwxrwx 3 ftp ftp 4096 May 21 14:28 __[_]_-_Law_Abiding_Citizen_[Unrated_Edition]-(2009)-[1080p]_[BD_Remux]
+drwxrwxrwx 5 ftp ftp 4096 Jan 21 2010 _-_Cloverfield-(2008)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 May 26 13:43 _-_Pandorum-(2009)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 May 27 14:18 __-_The_Last_Samurai-(2003)-[1080p]_[BD]
+drwxrwxrwx 6 ftp ftp 4096 Jan 27 2010 _9_-_District_9-(2009)-[1080p]_[BD]
+drwxrwxrwx 7 ftp ftp 4096 Jan 01 2010 __-_Rocky_The_Undisputed_Collection-(1976_1979_1982_1985_1990)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 May 31 12:57 _-_Surrogates-(2009)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 Jan 28 2010 _-__-_The_Fast_and_the_Furious-Tokyo_Drift-(2006)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 Jan 08 2010 _-_The_Fast_and_the_Furious-(2001)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 Jan 06 2010 _2_-_2_Fast_2_Furious-(2003)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 Jun 08 15:26 _4_-_Fast_&_Furious-(2009)-[1080p]_[BD]
diff --git a/net/data/ftp/dir-listing-ls-20.expected b/net/data/ftp/dir-listing-ls-20.expected new file mode 100644 index 0000000..9c636b9 --- /dev/null +++ b/net/data/ftp/dir-listing-ls-20.expected @@ -0,0 +1,161 @@ +d
+.
+-1
+1994
+11
+1
+16
+27
+
+d
+..
+-1
+2010
+4
+3
+0
+0
+
+d
+2012_-_2012-(2009)-[1080p]_[BD]
+-1
+1994
+5
+26
+15
+51
+
+-
+_READ_ME.txt
+4931
+1994
+6
+8
+15
+24
+
+d
+Áåç_ëèöà_-_Face_Off-(1997)-[1080p]_[BD]
+-1
+1994
+11
+1
+16
+27
+
+d
+Ââåðõ_-_Up-(2009)-[1080p]_[BD]
+-1
+2010
+1
+22
+0
+0
+
+d
+Âñïîìíèòü_âñå_-_Total_Recall-(1990)-[1080p]_[BD]
+-1
+1994
+5
+27
+18
+12
+
+d
+Çàêîíîïîñëóøíûé_ãðàæäàíèí_[Ðàñøèðåííàÿ_âåðñèÿ]_-_Law_Abiding_Citizen_[Unrated_Edition]-(2009)-[1080p]_[BD_Remux]
+-1
+1994
+5
+21
+14
+28
+
+d
+Ìîíñòðî_-_Cloverfield-(2008)-[1080p]_[BD]
+-1
+2010
+1
+21
+0
+0
+
+d
+Ïàíäîðóì_-_Pandorum-(2009)-[1080p]_[BD]
+-1
+1994
+5
+26
+13
+43
+
+d
+Ïîñëåäíèé_ñàìóðàé_-_The_Last_Samurai-(2003)-[1080p]_[BD]
+-1
+1994
+5
+27
+14
+18
+
+d
+Ðàéîí_9_-_District_9-(2009)-[1080p]_[BD]
+-1
+2010
+1
+27
+0
+0
+
+d
+Ðîêêè_Àíòîëîãèÿ_-_Rocky_The_Undisputed_Collection-(1976_1979_1982_1985_1990)-[1080p]_[BD]
+-1
+2010
+1
+1
+0
+0
+
+d
+Ñóððîãàòû_-_Surrogates-(2009)-[1080p]_[BD]
+-1
+1994
+5
+31
+12
+57
+
+d
+Òðîéíîé_Ôîðñàæ-Òîêèéñêèé_Äðèôò_-_The_Fast_and_the_Furious-Tokyo_Drift-(2006)-[1080p]_[BD]
+-1
+2010
+1
+28
+0
+0
+
+d
+Ôîðñàæ_-_The_Fast_and_the_Furious-(2001)-[1080p]_[BD]
+-1
+2010
+1
+8
+0
+0
+
+d
+Ôîðñàæ_2_-_2_Fast_2_Furious-(2003)-[1080p]_[BD]
+-1
+2010
+1
+6
+0
+0
+
+d
+Ôîðñàæ_4_-_Fast_&_Furious-(2009)-[1080p]_[BD]
+-1
+1994
+6
+8
+15
+26
diff --git a/net/data/ftp/dir-listing-ls-21 b/net/data/ftp/dir-listing-ls-21 new file mode 100644 index 0000000..1246efd --- /dev/null +++ b/net/data/ftp/dir-listing-ls-21 @@ -0,0 +1,27 @@ +drwxrwxr-x 26 ftp ftp 4096 Jul 15 2009 .
+drwxr-xr-x 7 ftp ftp 4096 Apr 03 2010 ..
+-rw-rw-rw- 1 ftp ftp 4931 Jun 08 15:24 _READ_ME.txt
+drwxr-xr-x 5 ftp ftp 4096 Apr 27 2009 _-_Avalon-(2001)-[1080p]_[BD_Remux]
+drwxrwxrwx 5 ftp ftp 4096 Jun 15 2009 __-_Bruce_Almighty-(2003)-[1080p]_[BD]
+drwxr-xr-x 4 ftp ftp 4096 Apr 15 2009 -_-_WALL-E-(2008)-[1080p]_[BD]
+drwxr-xr-x 4 ftp ftp 4096 Apr 28 2009 __007-__-_James_Bond_007-Quantum_of_Solace-(2008)-[1080p]_[BD]
+drwxr-xr-x 4 ftp ftp 4096 Apr 15 2009 -_c_-_Dead_Space-Downfall-(2008)-[1080p]_[BD]
+drwxrwxrwx 5 ftp ftp 4096 Jul 03 2009 _1_-_Madagascar_1-(2005)-[1080p]_[BD]
+drwxrwxrwx 5 ftp ftp 4096 Jul 03 2009 _2_-_Madagascar-Escape_2_Africa-(2008)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 Jun 13 2009 -_-_The_Matrix-Reloaded-(2003)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 Jun 14 2009 -_-_The_Matrix-Revolutions-(2003)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 Jun 12 2009 _-_The_Matrix-(1999)-[1080p]_[BD]
+drwxrwxrwx 4 ftp ftp 4096 Jul 02 2009 __3_-_Resident_Evil-Extinction-(2007)-[1080p]_[BD]
+drwxr-xr-x 3 ftp ftp 4096 May 01 2009 _-_The_Island-(2005)-[1080p]_[BD]
+drwxrwxrwx 5 ftp ftp 4096 Jul 03 2009 _3_-_Transporter_3-(2008)-[1080p]_[BD]
+drwxrwxr-x 5 ftp ftp 4096 May 02 2009 __-___-_Pirates_of_the_Caribbean-At_World's_End-(2007)-[1080p]_[BD]
+drwxrwxr-x 5 ftp ftp 4096 May 03 2009 __-___-_Pirates_of_the_Caribbean-The_Curse_of_the_Black_Pearl-(2003)-[1080p]_[BD]
+drwxrwxr-x 5 ftp ftp 4096 May 02 2009 __-__-_Pirates_of_the_Caribbean-Dead_Man's_Chest-(2006)-[1080p]_[BD]
+drwxrwxr-x 3 ftp ftp 4096 May 01 2009 __-_Ghost_Rider-(2007)-[1080p]_[BD]
+drwxr-xr-x 5 ftp ftp 4096 Apr 29 2009 -_-_The_Princess_Bride-(1987)-[1080p]_[BD]
+drwxrwxrwx 5 ftp ftp 4096 Jun 08 2009 __101__-_Sex_and_Death_101-(2007)-[1080p]_[BD]
+drwxr-xr-x 4 ftp ftp 4096 May 01 2009 -__-_Transformers-Bonus_Disk-(2007)-[1080p]_[BD]
+drwxr-xr-x 4 ftp ftp 4096 Apr 30 2009 _-_Transformers-(2007)-[1080p]_[BD]
+drwxrwxrwx 6 ftp ftp 4096 Jun 07 2009 __-_The_Thirteenth_Floor-(1999)-[1080p]_[BD]
+drwxrwxr-x 3 ftp ftp 4096 May 04 2009 __-_Street_Fighter-(1994)-[1080p]_[BD_Remux]
+drwxr-xr-x 5 ftp ftp 4096 Mar 15 2009 ___-_What_Woman_Want-(2000)-[1080p]_[BD]
diff --git a/net/data/ftp/dir-listing-ls-21.expected b/net/data/ftp/dir-listing-ls-21.expected new file mode 100644 index 0000000..0111be1 --- /dev/null +++ b/net/data/ftp/dir-listing-ls-21.expected @@ -0,0 +1,242 @@ +d
+.
+-1
+2009
+7
+15
+0
+0
+
+d
+..
+-1
+2010
+4
+3
+0
+0
+
+-
+_READ_ME.txt
+4931
+1994
+6
+8
+15
+24
+
+d
+Àâàëîí_-_Avalon-(2001)-[1080p]_[BD_Remux]
+-1
+2009
+4
+27
+0
+0
+
+d
+Áðþñ_âñåìîãóùèé_-_Bruce_Almighty-(2003)-[1080p]_[BD]
+-1
+2009
+6
+15
+0
+0
+
+d
+ÂÀËË-È_-_WALL-E-(2008)-[1080p]_[BD]
+-1
+2009
+4
+15
+0
+0
+
+d
+Äæåéìñ_Áîíä_007-Êâàíò_ìèëîñåðäèÿ_-_James_Bond_007-Quantum_of_Solace-(2008)-[1080p]_[BD]
+-1
+2009
+4
+28
+0
+0
+
+d
+Êîñìîñ-Òåððèòîðèÿ_cìåðòè_-_Dead_Space-Downfall-(2008)-[1080p]_[BD]
+-1
+2009
+4
+15
+0
+0
+
+d
+Ìàäàãàñêàð_1_-_Madagascar_1-(2005)-[1080p]_[BD]
+-1
+2009
+7
+3
+0
+0
+
+d
+Ìàäàãàñêàð_2_-_Madagascar-Escape_2_Africa-(2008)-[1080p]_[BD]
+-1
+2009
+7
+3
+0
+0
+
+d
+Ìàòðèöà-Ïåðåçàãðóçêà_-_The_Matrix-Reloaded-(2003)-[1080p]_[BD]
+-1
+2009
+6
+13
+0
+0
+
+d
+Ìàòðèöà-Ðåâîëþöèÿ_-_The_Matrix-Revolutions-(2003)-[1080p]_[BD]
+-1
+2009
+6
+14
+0
+0
+
+d
+Ìàòðèöà_-_The_Matrix-(1999)-[1080p]_[BD]
+-1
+2009
+6
+12
+0
+0
+
+d
+Îáèòåëü_çëà_3_-_Resident_Evil-Extinction-(2007)-[1080p]_[BD]
+-1
+2009
+7
+2
+0
+0
+
+d
+Îñòðîâ_-_The_Island-(2005)-[1080p]_[BD]
+-1
+2009
+5
+1
+0
+0
+
+d
+Ïåðåâîç÷èê_3_-_Transporter_3-(2008)-[1080p]_[BD]
+-1
+2009
+7
+3
+0
+0
+
+d
+Ïèðàòû_Êàðèáñêîãî_ìîðÿ-Íà_êðàþ_Ñâåòà_-_Pirates_of_the_Caribbean-At_World's_End-(2007)-[1080p]_[BD]
+-1
+2009
+5
+2
+0
+0
+
+d
+Ïèðàòû_Êàðèáñêîãî_ìîðÿ-Ïðîêëÿòèå_×åðíîé_Æåì÷óæèíû_-_Pirates_of_the_Caribbean-The_Curse_of_the_Black_Pearl-(2003)-[1080p]_[BD]
+-1
+2009
+5
+3
+0
+0
+
+d
+Ïèðàòû_Êàðèáñêîãî_ìîðÿ-Ñóíäóê_ìåðòâåöà_-_Pirates_of_the_Caribbean-Dead_Man's_Chest-(2006)-[1080p]_[BD]
+-1
+2009
+5
+2
+0
+0
+
+d
+Ïðèçðà÷íûé_ãîíùèê_-_Ghost_Rider-(2007)-[1080p]_[BD]
+-1
+2009
+5
+1
+0
+0
+
+d
+Ïðèíöåññà-íåâåñòà_-_The_Princess_Bride-(1987)-[1080p]_[BD]
+-1
+2009
+4
+29
+0
+0
+
+d
+Ñåêñ_è_101_ñìåðòü_-_Sex_and_Death_101-(2007)-[1080p]_[BD]
+-1
+2009
+6
+8
+0
+0
+
+d
+Òðàíñôîðìåðû-Áîíóñ_äèñê_-_Transformers-Bonus_Disk-(2007)-[1080p]_[BD]
+-1
+2009
+5
+1
+0
+0
+
+d
+Òðàíñôîðìåðû_-_Transformers-(2007)-[1080p]_[BD]
+-1
+2009
+4
+30
+0
+0
+
+d
+Òðèíàäöàòûé_ýòàæ_-_The_Thirteenth_Floor-(1999)-[1080p]_[BD]
+-1
+2009
+6
+7
+0
+0
+
+d
+Óëè÷íûé_áîåö_-_Street_Fighter-(1994)-[1080p]_[BD_Remux]
+-1
+2009
+5
+4
+0
+0
+
+d
+×åãî_õîòÿò_æåíùèíû_-_What_Woman_Want-(2000)-[1080p]_[BD]
+-1
+2009
+3
+15
+0
+0
diff --git a/net/data/ftp/dir-listing-ls-22 b/net/data/ftp/dir-listing-ls-22 new file mode 100644 index 0000000..df44141 --- /dev/null +++ b/net/data/ftp/dir-listing-ls-22 @@ -0,0 +1,32 @@ +drwxrwxr-x 5 ftp ftp 12288 Oct 20 17:04 .
+drwxr-xr-x 7 ftp ftp 4096 Apr 03 2010 ..
+-rw-rw-rw- 1 ftp ftp 4931 Jun 08 15:23 _READ_ME.txt
+drwxrwxrwx 4 ftp ftp 4096 May 31 16:26 _-_Avatar-(2009)-[1080p]_[BD]
+-rwxrwxr-x 1 ftp ftp 17577705968 Mar 08 2009 __1_[_]_-_American_Pie_1_[Unrated_Edition]-(1999)-[1080p]_[BD_remux].ts
+-rwxrwxr-x 1 ftp ftp 15512934868 Mar 16 2009 __-_Snatch-(2000)-[1080i]_[HDTV].ts
+drwxrwxrwx 2 ftp ftp 4096 Jun 03 19:07 __-_Snatch-(2000)-[1080p]_[BD_Remux]
+-rwxrwxr-x 1 ftp ftp 8900589105 Mar 24 2009 __-_War_of_the_Worlds-(2005)-[720p]_[HDTV].mkv
+-rwxrwxr-x 1 ftp ftp 27728321654 Mar 09 2009 _-_American_Gangster-(2007)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 31731782861 Mar 09 2009 _[_]_-_American_Gangster_[Unrated_Edition]-(2007)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 5009104014 Mar 24 2009 __-_Road_Trip-(2000)-[720p]_[HDTV_Rip].mkv
+-rwxrwxr-x 1 ftp ftp 21410583980 Mar 11 2009 _-_2-__-_Star_Wars-Episode_2-Attack_of_the_Clones-(2002)-[1080i]_[HDTV].ts
+-rwxrwxr-x 1 ftp ftp 19858181688 Mar 11 2009 _-_3-__-_Star_Wars-Episode_3-Revenge_of_the_Sith-(2005)-[1080i]_[HDTV].ts
+-rwxrwxr-x 1 ftp ftp 29026065728 Mar 16 2009 __-_Starship_Troopers-(1997)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 22169179449 Mar 16 2009 _[_]_-_Mirrors_[Unrated_Edition]-(2008)-[1080p]_[BD_remux].mkv
+drwxrwxrwx 4 ftp ftp 4096 Jun 15 14:56 -_-_Ninja_Assassin-(2009)-[1080p]_[BD]
+-rwxrwxr-x 1 ftp ftp 19717173247 Mar 11 2009 __3_-_Resident_Evil-Extinction-(2007)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 18660904388 Mar 11 2009 _-_Pathology-(2008)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 16476154520 Mar 05 2009 _1_[_]_-_Saw_I_[Director's_Cut]-(2004)-[1080p]_[HDDVD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 19917510515 Mar 05 2009 _2_[_]_-_Saw_II_[Director's_Cut]-(2005)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 18085592265 Mar 05 2009 _3_[_]_-_Saw_III_[Director's_Cut]-(2006)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 3473582701 Mar 05 2009 _4_[_]_-_Saw_IV_[Director's_Cut]-(2007)-[1080p]_[BD_remux].flac
+-rwxrwxr-x 1 ftp ftp 15263958421 Mar 05 2009 _4_[_]_-_Saw_IV_[Director's_Cut]-(2007)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 19944605507 Mar 16 2009 _5_[_]_-_Saw_V_[Director's_Cut]-(2008)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 3024333064 Mar 24 2009 __-___!_-_The_Madagascar_Penguins_in_A_Christmas_Caper-(2005)-[1080p]_[BD_remux].ts
+-rwxrwxr-x 1 ftp ftp 125961 Mar 05 2009 __[_]_-_Bad_Santa_[Unrated_Edition]-(2003)-[1080p]_[BD_remux].srt
+-rwxrwxr-x 1 ftp ftp 19908695408 Mar 05 2009 __[_]_-_Bad_Santa_[Unrated_Edition]-(2003)-[1080p]_[BD_remux].ts
+-rwxrwxr-x 1 ftp ftp 23185439267 Mar 11 2009 ___-_The_Shawshank_Redemption-(1994)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 19567287274 Mar 16 2009 ____[_]_-_Dumb_and_Dumber_[Unrated_Edition]-(1994)-[1080p]_[BD_remux].mkv
+-rwxrwxr-x 1 ftp ftp 14773061093 Mar 16 2009 _-_The_Hurricane-(1999)-[1080p]_[HDDVD_Rip].mkv
+-rwxrwxr-x 1 ftp ftp 22411268500 Mar 11 2009 _2_[_]_-_Hostel_2_[Director's_Cut]-(2007)-[1080p]_[BD_remux].ts
+-rwxrwxr-x 1 ftp ftp 23712519861 Mar 11 2009 ___[_]_-_Alien_vs_Predator_[Unrated_Edition]-(2004)-[1080p]_[BD_remux].mkv
diff --git a/net/data/ftp/dir-listing-ls-22.expected b/net/data/ftp/dir-listing-ls-22.expected new file mode 100644 index 0000000..c5a02b7 --- /dev/null +++ b/net/data/ftp/dir-listing-ls-22.expected @@ -0,0 +1,287 @@ +d
+.
+-1
+1994
+10
+20
+17
+4
+
+d
+..
+-1
+2010
+4
+3
+0
+0
+
+-
+_READ_ME.txt
+4931
+1994
+6
+8
+15
+23
+
+d
+Àâàòàð_-_Avatar-(2009)-[1080p]_[BD]
+-1
+1994
+5
+31
+16
+26
+
+-
+Àìåðèêàíñêèé_ïèðîã_1_[Ðàñøèðåííàÿ_âåðñèÿ]_-_American_Pie_1_[Unrated_Edition]-(1999)-[1080p]_[BD_remux].ts
+17577705968
+2009
+3
+8
+0
+0
+
+-
+Áîëüøîé_êóø_-_Snatch-(2000)-[1080i]_[HDTV].ts
+15512934868
+2009
+3
+16
+0
+0
+
+d
+Áîëüøîé_êóø_-_Snatch-(2000)-[1080p]_[BD_Remux]
+-1
+1994
+6
+3
+19
+7
+
+-
+Âîéíà_ìèðîâ_-_War_of_the_Worlds-(2005)-[720p]_[HDTV].mkv
+8900589105
+2009
+3
+24
+0
+0
+
+-
+Ãàíãñòåð_-_American_Gangster-(2007)-[1080p]_[BD_remux].mkv
+27728321654
+2009
+3
+9
+0
+0
+
+-
+Ãàíãñòåð_[Ðàñøèðåííàÿ_âåðñèÿ]_-_American_Gangster_[Unrated_Edition]-(2007)-[1080p]_[BD_remux].mkv
+31731782861
+2009
+3
+9
+0
+0
+
+-
+Äîðîæíîå_ïðèêëþ÷åíèå_-_Road_Trip-(2000)-[720p]_[HDTV_Rip].mkv
+5009104014
+2009
+3
+24
+0
+0
+
+-
+Çâ¸çäíûå_âîéíû-Ýïèçîä_2-Àòàêà_êëîíîâ_-_Star_Wars-Episode_2-Attack_of_the_Clones-(2002)-[1080i]_[HDTV].ts
+21410583980
+2009
+3
+11
+0
+0
+
+-
+Çâ¸çäíûå_âîéíû-Ýïèçîä_3-Ìåñòü_Ñèòõîâ_-_Star_Wars-Episode_3-Revenge_of_the_Sith-(2005)-[1080i]_[HDTV].ts
+19858181688
+2009
+3
+11
+0
+0
+
+-
+Çâ¸çäíûé_äåñàíò_-_Starship_Troopers-(1997)-[1080p]_[BD_remux].mkv
+29026065728
+2009
+3
+16
+0
+0
+
+-
+Çåðêàëà_[Ðàñøèðåííàÿ_âåðñèÿ]_-_Mirrors_[Unrated_Edition]-(2008)-[1080p]_[BD_remux].mkv
+22169179449
+2009
+3
+16
+0
+0
+
+d
+Íèíäçÿ-óáèéöà_-_Ninja_Assassin-(2009)-[1080p]_[BD]
+-1
+1994
+6
+15
+14
+56
+
+-
+Îáèòåëü_çëà_3_-_Resident_Evil-Extinction-(2007)-[1080p]_[BD_remux].mkv
+19717173247
+2009
+3
+11
+0
+0
+
+-
+Ïàòîëîãèÿ_-_Pathology-(2008)-[1080p]_[BD_remux].mkv
+18660904388
+2009
+3
+11
+0
+0
+
+-
+Ïèëà_1_[Ðåæèññ¸ðñêàÿ_âåðñèÿ]_-_Saw_I_[Director's_Cut]-(2004)-[1080p]_[HDDVD_remux].mkv
+16476154520
+2009
+3
+5
+0
+0
+
+-
+Ïèëà_2_[Ðåæèññ¸ðñêàÿ_âåðñèÿ]_-_Saw_II_[Director's_Cut]-(2005)-[1080p]_[BD_remux].mkv
+19917510515
+2009
+3
+5
+0
+0
+
+-
+Ïèëà_3_[Ðåæèññ¸ðñêàÿ_âåðñèÿ]_-_Saw_III_[Director's_Cut]-(2006)-[1080p]_[BD_remux].mkv
+18085592265
+2009
+3
+5
+0
+0
+
+-
+Ïèëà_4_[Ðåæèññ¸ðñêàÿ_âåðñèÿ]_-_Saw_IV_[Director's_Cut]-(2007)-[1080p]_[BD_remux].flac
+3473582701
+2009
+3
+5
+0
+0
+
+-
+Ïèëà_4_[Ðåæèññ¸ðñêàÿ_âåðñèÿ]_-_Saw_IV_[Director's_Cut]-(2007)-[1080p]_[BD_remux].mkv
+15263958421
+2009
+3
+5
+0
+0
+
+-
+Ïèëà_5_[Ðåæèññ¸ðñêàÿ_âåðñèÿ]_-_Saw_V_[Director's_Cut]-(2008)-[1080p]_[BD_remux].mkv
+19944605507
+2009
+3
+16
+0
+0
+
+-
+Ïèíãâèíû_èç_Ìàäàãàñêàðà-Îïåðàöèÿ_Ñ_Íîâûì_Ãîäîì!_-_The_Madagascar_Penguins_in_A_Christmas_Caper-(2005)-[1080p]_[BD_remux].ts
+3024333064
+2009
+3
+24
+0
+0
+
+-
+Ïëîõîé_Ñàíòà_[Ðàñøèðåííàÿ_âåðñèÿ]_-_Bad_Santa_[Unrated_Edition]-(2003)-[1080p]_[BD_remux].srt
+125961
+2009
+3
+5
+0
+0
+
+-
+Ïëîõîé_Ñàíòà_[Ðàñøèðåííàÿ_âåðñèÿ]_-_Bad_Santa_[Unrated_Edition]-(2003)-[1080p]_[BD_remux].ts
+19908695408
+2009
+3
+5
+0
+0
+
+-
+Ïîáåã_èç_Øîóøåíêà_-_The_Shawshank_Redemption-(1994)-[1080p]_[BD_remux].mkv
+23185439267
+2009
+3
+11
+0
+0
+
+-
+Òóïîé_è_åùå_òóïåå_[Ðàñøèðåííàÿ_âåðñèÿ]_-_Dumb_and_Dumber_[Unrated_Edition]-(1994)-[1080p]_[BD_remux].mkv
+19567287274
+2009
+3
+16
+0
+0
+
+-
+Óðàãàí_-_The_Hurricane-(1999)-[1080p]_[HDDVD_Rip].mkv
+14773061093
+2009
+3
+16
+0
+0
+
+-
+Õîñòåë_2_[Ðåæèññ¸ðñêàÿ_âåðñèÿ]_-_Hostel_2_[Director's_Cut]-(2007)-[1080p]_[BD_remux].ts
+22411268500
+2009
+3
+11
+0
+0
+
+-
+×óæîé_ïðîòèâ_Õèùíèêà_[Ðàñøèðåííàÿ_âåðñèÿ]_-_Alien_vs_Predator_[Unrated_Edition]-(2004)-[1080p]_[BD_remux].mkv
+23712519861
+2009
+3
+11
+0
+0
diff --git a/net/ftp/ftp_directory_listing_buffer.cc b/net/ftp/ftp_directory_listing_buffer.cc index a173399..f6e8748 100644 --- a/net/ftp/ftp_directory_listing_buffer.cc +++ b/net/ftp/ftp_directory_listing_buffer.cc @@ -37,7 +37,7 @@ int FtpDirectoryListingBuffer::ConsumeData(const char* data, int data_length) { buffer_.append(data, data_length); if (!encoding_.empty() || buffer_.length() > 1024) { - int rv = ExtractFullLinesFromBuffer(); + int rv = ConsumeBuffer(); if (rv != OK) return rv; } @@ -46,11 +46,12 @@ int FtpDirectoryListingBuffer::ConsumeData(const char* data, int data_length) { } int FtpDirectoryListingBuffer::ProcessRemainingData() { - int rv = ExtractFullLinesFromBuffer(); + int rv = ConsumeBuffer(); if (rv != OK) return rv; - if (!buffer_.empty()) + DCHECK(buffer_.empty()); + if (!converted_buffer_.empty()) return ERR_INVALID_RESPONSE; rv = ParseLines(); @@ -77,38 +78,62 @@ FtpServerType FtpDirectoryListingBuffer::GetServerType() const { return (current_parser_ ? current_parser_->GetServerType() : SERVER_UNKNOWN); } -bool FtpDirectoryListingBuffer::ConvertToDetectedEncoding( - const std::string& from, string16* to) { - std::string encoding(encoding_.empty() ? "ascii" : encoding_); - return base::CodepageToUTF16(from, encoding.c_str(), - base::OnStringConversionError::FAIL, to); +int FtpDirectoryListingBuffer::DecodeBufferUsingEncoding( + const std::string& encoding) { + string16 converted; + if (!base::CodepageToUTF16(buffer_, + encoding.c_str(), + base::OnStringConversionError::FAIL, + &converted)) + return ERR_ENCODING_CONVERSION_FAILED; + + buffer_.clear(); + converted_buffer_ += converted; + return OK; } -int FtpDirectoryListingBuffer::ExtractFullLinesFromBuffer() { +int FtpDirectoryListingBuffer::ConvertBufferToUTF16() { if (encoding_.empty()) { - if (!base::DetectEncoding(buffer_, &encoding_)) + std::vector<std::string> encodings; + if (!base::DetectAllEncodings(buffer_, &encodings)) return ERR_ENCODING_DETECTION_FAILED; + + // Use first encoding that can be used to decode the buffer. + for (size_t i = 0; i < encodings.size(); i++) { + if (DecodeBufferUsingEncoding(encodings[i]) == OK) { + encoding_ = encodings[i]; + return OK; + } + } + + return ERR_ENCODING_DETECTION_FAILED; } + return DecodeBufferUsingEncoding(encoding_); +} + +void FtpDirectoryListingBuffer::ExtractFullLinesFromBuffer() { int cut_pos = 0; // TODO(phajdan.jr): This code accepts all endlines matching \r*\n. Should it // be more strict, or enforce consistent line endings? - for (size_t i = 0; i < buffer_.length(); ++i) { - if (buffer_[i] != '\n') + for (size_t i = 0; i < converted_buffer_.length(); ++i) { + if (converted_buffer_[i] != '\n') continue; int line_length = i - cut_pos; - if (i >= 1 && buffer_[i - 1] == '\r') + if (i >= 1 && converted_buffer_[i - 1] == '\r') line_length--; - std::string line(buffer_.substr(cut_pos, line_length)); + lines_.push_back(converted_buffer_.substr(cut_pos, line_length)); cut_pos = i + 1; - string16 line_converted; - if (!ConvertToDetectedEncoding(line, &line_converted)) { - buffer_.erase(0, cut_pos); - return ERR_ENCODING_CONVERSION_FAILED; - } - lines_.push_back(line_converted); } - buffer_.erase(0, cut_pos); + converted_buffer_.erase(0, cut_pos); +} + +int FtpDirectoryListingBuffer::ConsumeBuffer() { + int rv = ConvertBufferToUTF16(); + if (rv != OK) + return rv; + + ExtractFullLinesFromBuffer(); return OK; } diff --git a/net/ftp/ftp_directory_listing_buffer.h b/net/ftp/ftp_directory_listing_buffer.h index 0a25fff..ea68932 100644 --- a/net/ftp/ftp_directory_listing_buffer.h +++ b/net/ftp/ftp_directory_listing_buffer.h @@ -51,13 +51,20 @@ class FtpDirectoryListingBuffer { private: typedef std::set<FtpDirectoryListingParser*> ParserSet; - // Converts the string |from| to detected encoding and stores it in |to|. - // Returns true on success. - bool ConvertToDetectedEncoding(const std::string& from, string16* to); + // Decodes the raw buffer using specified |encoding|. On success + // clears the raw buffer and appends data to |converted_buffer_|. + // Returns network error code. + int DecodeBufferUsingEncoding(const std::string& encoding); - // Tries to extract full lines from the raw buffer, converting them to the - // detected encoding. Returns network error code. - int ExtractFullLinesFromBuffer(); + // Converts the raw buffer to UTF-16. Returns network error code. + int ConvertBufferToUTF16(); + + // Extracts lines from the converted buffer, and puts them in |lines_|. + void ExtractFullLinesFromBuffer(); + + // Consumes the raw buffer (i.e. does the character set conversion + // and line splitting). Returns network error code. + int ConsumeBuffer(); // Tries to parse full lines stored in |lines_|. Returns network error code. int ParseLines(); @@ -66,12 +73,15 @@ class FtpDirectoryListingBuffer { // parsers. Returns network error code. int OnEndOfInput(); - // Detected encoding of the response (empty if unknown or ASCII). + // Detected encoding of the response (empty if unknown). std::string encoding_; - // Buffer to keep not-yet-split data. + // Buffer to keep data before character set conversion. std::string buffer_; + // Buffer to keep data before line splitting. + string16 converted_buffer_; + // CRLF-delimited lines, without the CRLF, not yet consumed by parser. std::deque<string16> lines_; diff --git a/net/ftp/ftp_directory_listing_buffer_unittest.cc b/net/ftp/ftp_directory_listing_buffer_unittest.cc index 683e2f7..ceddfc4 100644 --- a/net/ftp/ftp_directory_listing_buffer_unittest.cc +++ b/net/ftp/ftp_directory_listing_buffer_unittest.cc @@ -42,6 +42,9 @@ TEST(FtpDirectoryListingBufferTest, Parse) { "dir-listing-ls-17", "dir-listing-ls-18", "dir-listing-ls-19", + "dir-listing-ls-20", // TODO(phajdan.jr): should use windows-1251 encoding. + "dir-listing-ls-21", // TODO(phajdan.jr): should use windows-1251 encoding. + "dir-listing-ls-22", // TODO(phajdan.jr): should use windows-1251 encoding. "dir-listing-mlsd-1", "dir-listing-mlsd-2", "dir-listing-netware-1", |