diff options
author | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-11 09:19:30 +0000 |
---|---|---|
committer | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-11 09:19:30 +0000 |
commit | 193c35145799c33bde24a470f460587954d70aba (patch) | |
tree | 330c97462719bb266940cec9cd40ea70531d8500 /webkit | |
parent | 587f4ff6420df90d4e6160aeb78918fcf55e3599 (diff) | |
download | chromium_src-193c35145799c33bde24a470f460587954d70aba.zip chromium_src-193c35145799c33bde24a470f460587954d70aba.tar.gz chromium_src-193c35145799c33bde24a470f460587954d70aba.tar.bz2 |
FTP: fix navigating to files listed under non-ASCII characters
We need to convert the file name back to server encoding.
BUG=38016
TEST=see bug
Review URL: http://codereview.chromium.org/1857002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@46900 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'webkit')
-rw-r--r-- | webkit/glue/ftp_directory_listing_response_delegate.cc | 101 | ||||
-rw-r--r-- | webkit/glue/ftp_directory_listing_response_delegate.h | 7 | ||||
-rw-r--r-- | webkit/glue/webkit_glue.gypi | 1 |
3 files changed, 53 insertions, 56 deletions
diff --git a/webkit/glue/ftp_directory_listing_response_delegate.cc b/webkit/glue/ftp_directory_listing_response_delegate.cc index d27b7ad..80737db 100644 --- a/webkit/glue/ftp_directory_listing_response_delegate.cc +++ b/webkit/glue/ftp_directory_listing_response_delegate.cc @@ -6,6 +6,7 @@ #include <vector> +#include "base/i18n/icu_encoding_detection.h" #include "base/i18n/icu_string_conversions.h" #include "base/logging.h" #include "base/string_util.h" @@ -17,7 +18,6 @@ #include "net/base/net_util.h" #include "net/ftp/ftp_directory_listing_parser.h" #include "net/ftp/ftp_server_type_histograms.h" -#include "unicode/ucsdet.h" #include "third_party/WebKit/WebKit/chromium/public/WebURL.h" #include "third_party/WebKit/WebKit/chromium/public/WebURLLoaderClient.h" @@ -29,42 +29,26 @@ using WebKit::WebURLResponse; namespace { -// A very simple-minded character encoding detection. -// TODO(jungshik): We can apply more heuristics here (e.g. using various hints -// like TLD, the UI language/default encoding of a client, etc). In that case, -// this should be pulled out of here and moved somewhere in base because there -// can be other use cases. -std::string DetectEncoding(const std::string& text) { - if (IsStringASCII(text)) - return std::string(); - UErrorCode status = U_ZERO_ERROR; - UCharsetDetector* detector = ucsdet_open(&status); - ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()), - &status); - const UCharsetMatch* match = ucsdet_detect(detector, &status); - const char* encoding = ucsdet_getName(match, &status); - ucsdet_close(detector); - // Should we check the quality of the match? A rather arbitrary number is - // assigned by ICU and it's hard to come up with a lower limit. - if (U_FAILURE(status)) - return std::string(); - return encoding; -} +string16 ConvertPathToUTF16(const std::string& path) { + // Per RFC 2640, FTP servers should use UTF-8 or its proper subset ASCII, + // but many old FTP servers use legacy encodings. Try UTF-8 first. + if (IsStringUTF8(path)) + return UTF8ToUTF16(path); + + // Try detecting the encoding. The sample is rather small though, so it may + // fail. + std::string encoding; + if (base::DetectEncoding(path, &encoding) && !encoding.empty()) { + string16 path_utf16; + if (base::CodepageToUTF16(path, encoding.c_str(), + base::OnStringConversionError::SUBSTITUTE, + &path_utf16)) { + return path_utf16; + } + } -string16 RawByteSequenceToFilename(const char* raw_filename, - const std::string& encoding) { - if (encoding.empty()) - return ASCIIToUTF16(raw_filename); - - // Try the detected encoding before falling back to the native codepage. - // Using the native codepage does not make much sense, but we don't have - // much else to resort to. - string16 filename; - if (!base::CodepageToUTF16(raw_filename, encoding.c_str(), - base::OnStringConversionError::SUBSTITUTE, - &filename)) - filename = WideToUTF16Hack(base::SysNativeMBToWide(raw_filename)); - return filename; + // Use system native encoding as the last resort. + return WideToUTF16Hack(base::SysNativeMBToWide(path)); } } // namespace @@ -111,24 +95,8 @@ void FtpDirectoryListingResponseDelegate::Init() { UnescapeRule::URL_SPECIAL_CHARS; std::string unescaped_path = UnescapeURLComponent(response_url.path(), unescape_rules); - string16 path_utf16; - // Per RFC 2640, FTP servers should use UTF-8 or its proper subset ASCII, - // but many old FTP servers use legacy encodings. Try UTF-8 first and - // detect the encoding. - if (IsStringUTF8(unescaped_path)) { - path_utf16 = UTF8ToUTF16(unescaped_path); - } else { - std::string encoding = DetectEncoding(unescaped_path); - // Try the detected encoding. If it fails, resort to the - // OS native encoding. - if (encoding.empty() || - !base::CodepageToUTF16(unescaped_path, encoding.c_str(), - base::OnStringConversionError::SUBSTITUTE, - &path_utf16)) - path_utf16 = WideToUTF16Hack(base::SysNativeMBToWide(unescaped_path)); - } - - SendDataToClient(net::GetDirectoryListingHeader(path_utf16)); + SendDataToClient(net::GetDirectoryListingHeader( + ConvertPathToUTF16(unescaped_path))); // If this isn't top level directory (i.e. the path isn't "/",) // add a link to the parent directory. @@ -138,6 +106,18 @@ void FtpDirectoryListingResponseDelegate::Init() { } } +bool FtpDirectoryListingResponseDelegate::ConvertToServerEncoding( + const string16& filename, std::string* raw_bytes) const { + if (buffer_.encoding().empty()) { + *raw_bytes = std::string(); + return true; + } + + return base::UTF16ToCodepage(filename, buffer_.encoding().c_str(), + base::OnStringConversionError::FAIL, + raw_bytes); +} + void FtpDirectoryListingResponseDelegate::ProcessReceivedEntries() { if (!updated_histograms_ && buffer_.EntryAvailable()) { // Only log the server type if we got enough data to reliably detect it. @@ -157,8 +137,17 @@ void FtpDirectoryListingResponseDelegate::ProcessReceivedEntries() { int64 size = entry.size; if (entry.type != FtpDirectoryListingEntry::FILE) size = 0; - SendDataToClient(net::GetDirectoryListingEntry( - entry.name, std::string(), is_directory, size, entry.last_modified)); + std::string raw_bytes; + if (ConvertToServerEncoding(entry.name, &raw_bytes)) { + SendDataToClient(net::GetDirectoryListingEntry( + entry.name, raw_bytes, is_directory, size, entry.last_modified)); + } else { + // Consider an encoding problem a non-fatal error. The server's support + // for non-ASCII characters might be buggy. Display an error message, + // but keep trying to display the rest of the listing (most file names + // are ASCII anyway, we could be just unlucky with this one). + had_parsing_error_ = true; + } } } diff --git a/webkit/glue/ftp_directory_listing_response_delegate.h b/webkit/glue/ftp_directory_listing_response_delegate.h index 86b5c436..1218da9 100644 --- a/webkit/glue/ftp_directory_listing_response_delegate.h +++ b/webkit/glue/ftp_directory_listing_response_delegate.h @@ -33,6 +33,13 @@ class FtpDirectoryListingResponseDelegate { private: void Init(); + // Converts |filename| to detected server encoding and puts the result + // in |raw_bytes| (if no conversion is necessary, an empty string is used). + // Returns true on success. + bool ConvertToServerEncoding(const string16& filename, + std::string* raw_bytes) const; + + // Fetches the listing entries from the buffer and sends them to the client. void ProcessReceivedEntries(); void SendDataToClient(const std::string& data); diff --git a/webkit/glue/webkit_glue.gypi b/webkit/glue/webkit_glue.gypi index a0e4437..6cbf8ab 100644 --- a/webkit/glue/webkit_glue.gypi +++ b/webkit/glue/webkit_glue.gypi @@ -110,6 +110,7 @@ 'msvs_guid': 'C66B126D-0ECE-4CA2-B6DC-FA780AFBBF09', 'dependencies': [ '<(DEPTH)/app/app.gyp:app_base', + '<(DEPTH)/base/base.gyp:base_i18n', '<(DEPTH)/net/net.gyp:net', '<(DEPTH)/skia/skia.gyp:skia', '<(DEPTH)/third_party/icu/icu.gyp:icui18n', |