diff options
author | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-11 09:19:30 +0000 |
---|---|---|
committer | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-11 09:19:30 +0000 |
commit | 193c35145799c33bde24a470f460587954d70aba (patch) | |
tree | 330c97462719bb266940cec9cd40ea70531d8500 /net | |
parent | 587f4ff6420df90d4e6160aeb78918fcf55e3599 (diff) | |
download | chromium_src-193c35145799c33bde24a470f460587954d70aba.zip chromium_src-193c35145799c33bde24a470f460587954d70aba.tar.gz chromium_src-193c35145799c33bde24a470f460587954d70aba.tar.bz2 |
FTP: fix navigating to files listed under non-ASCII characters
We need to convert the file name back to server encoding.
BUG=38016
TEST=see bug
Review URL: http://codereview.chromium.org/1857002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@46900 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/base/net_error_list.h | 3 | ||||
-rw-r--r-- | net/base/net_util.h | 10 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_buffer.cc | 34 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_buffer.h | 2 |
4 files changed, 16 insertions, 33 deletions
diff --git a/net/base/net_error_list.h b/net/base/net_error_list.h index c42549d..3338229 100644 --- a/net/base/net_error_list.h +++ b/net/base/net_error_list.h @@ -335,6 +335,9 @@ NET_ERROR(INVALID_AUTH_CREDENTIALS, -338) // machine. NET_ERROR(UNSUPPORTED_AUTH_SCHEME, -339) +// Detecting the encoding of the response failed. +NET_ERROR(ENCODING_DETECTION_FAILED, -340) + // The cache does not have the requested entry. NET_ERROR(CACHE_MISS, -400) diff --git a/net/base/net_util.h b/net/base/net_util.h index 77bd69a..5c3e37e 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -210,10 +210,12 @@ std::string GetDirectoryListingHeader(const string16& title); // Currently, it's a script tag containing a call to a Javascript function // |addRow|. // -// Its 1st parameter is derived from |name| and is the Javascript-string -// escaped form of |name| (i.e \uXXXX). The 2nd parameter is the url-escaped -// |raw_bytes| if it's not empty. If empty, the 2nd parameter is the -// url-escaped |name| in UTF-8. +// |name| is the file name to be displayed. |raw_bytes| will be used +// as the actual target of the link (so for example, ftp links should use +// server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name| +// will be used. +// +// Both |name| and |raw_bytes| are escaped internally. std::string GetDirectoryListingEntry(const string16& name, const std::string& raw_bytes, bool is_dir, int64 size, diff --git a/net/ftp/ftp_directory_listing_buffer.cc b/net/ftp/ftp_directory_listing_buffer.cc index bc2db9c..41daeb4 100644 --- a/net/ftp/ftp_directory_listing_buffer.cc +++ b/net/ftp/ftp_directory_listing_buffer.cc @@ -4,6 +4,7 @@ #include "net/ftp/ftp_directory_listing_buffer.h" +#include "base/i18n/icu_encoding_detection.h" #include "base/i18n/icu_string_conversions.h" #include "base/stl_util-inl.h" #include "base/string_util.h" @@ -13,33 +14,6 @@ #include "net/ftp/ftp_directory_listing_parser_netware.h" #include "net/ftp/ftp_directory_listing_parser_vms.h" #include "net/ftp/ftp_directory_listing_parser_windows.h" -#include "unicode/ucsdet.h" - -namespace { - -// A very simple-minded character encoding detection. -// TODO(jungshik): We can apply more heuristics here (e.g. using various hints -// like TLD, the UI language/default encoding of a client, etc). In that case, -// this should be pulled out of here and moved somewhere in base because there -// can be other use cases. -std::string DetectEncoding(const std::string& text) { - if (IsStringASCII(text)) - return std::string(); - UErrorCode status = U_ZERO_ERROR; - UCharsetDetector* detector = ucsdet_open(&status); - ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()), - &status); - const UCharsetMatch* match = ucsdet_detect(detector, &status); - const char* encoding = ucsdet_getName(match, &status); - ucsdet_close(detector); - // Should we check the quality of the match? A rather arbitrary number is - // assigned by ICU and it's hard to come up with a lower limit. - if (U_FAILURE(status)) - return std::string(); - return encoding; -} - -} // namespace namespace net { @@ -109,8 +83,10 @@ bool FtpDirectoryListingBuffer::ConvertToDetectedEncoding( } int FtpDirectoryListingBuffer::ExtractFullLinesFromBuffer() { - if (encoding_.empty()) - encoding_ = DetectEncoding(buffer_); + if (encoding_.empty()) { + if (!base::DetectEncoding(buffer_, &encoding_)) + return ERR_ENCODING_DETECTION_FAILED; + } int cut_pos = 0; // TODO(phajdan.jr): This code accepts all endlines matching \r*\n. Should it diff --git a/net/ftp/ftp_directory_listing_buffer.h b/net/ftp/ftp_directory_listing_buffer.h index 7aead49..4123cf0 100644 --- a/net/ftp/ftp_directory_listing_buffer.h +++ b/net/ftp/ftp_directory_listing_buffer.h @@ -45,6 +45,8 @@ class FtpDirectoryListingBuffer { // time, although it will return SERVER_UNKNOWN if it doesn't know the answer. FtpServerType GetServerType() const; + const std::string& encoding() const { return encoding_; } + private: typedef std::set<FtpDirectoryListingParser*> ParserSet; |