summaryrefslogtreecommitdiffstats
path: root/webkit
diff options
context:
space:
mode:
authorphajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-11 09:19:30 +0000
committerphajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-11 09:19:30 +0000
commit193c35145799c33bde24a470f460587954d70aba (patch)
tree330c97462719bb266940cec9cd40ea70531d8500 /webkit
parent587f4ff6420df90d4e6160aeb78918fcf55e3599 (diff)
downloadchromium_src-193c35145799c33bde24a470f460587954d70aba.zip
chromium_src-193c35145799c33bde24a470f460587954d70aba.tar.gz
chromium_src-193c35145799c33bde24a470f460587954d70aba.tar.bz2
FTP: fix navigating to files listed under non-ASCII characters
We need to convert the file name back to server encoding. BUG=38016 TEST=see bug Review URL: http://codereview.chromium.org/1857002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@46900 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'webkit')
-rw-r--r--webkit/glue/ftp_directory_listing_response_delegate.cc101
-rw-r--r--webkit/glue/ftp_directory_listing_response_delegate.h7
-rw-r--r--webkit/glue/webkit_glue.gypi1
3 files changed, 53 insertions, 56 deletions
diff --git a/webkit/glue/ftp_directory_listing_response_delegate.cc b/webkit/glue/ftp_directory_listing_response_delegate.cc
index d27b7ad..80737db 100644
--- a/webkit/glue/ftp_directory_listing_response_delegate.cc
+++ b/webkit/glue/ftp_directory_listing_response_delegate.cc
@@ -6,6 +6,7 @@
#include <vector>
+#include "base/i18n/icu_encoding_detection.h"
#include "base/i18n/icu_string_conversions.h"
#include "base/logging.h"
#include "base/string_util.h"
@@ -17,7 +18,6 @@
#include "net/base/net_util.h"
#include "net/ftp/ftp_directory_listing_parser.h"
#include "net/ftp/ftp_server_type_histograms.h"
-#include "unicode/ucsdet.h"
#include "third_party/WebKit/WebKit/chromium/public/WebURL.h"
#include "third_party/WebKit/WebKit/chromium/public/WebURLLoaderClient.h"
@@ -29,42 +29,26 @@ using WebKit::WebURLResponse;
namespace {
-// A very simple-minded character encoding detection.
-// TODO(jungshik): We can apply more heuristics here (e.g. using various hints
-// like TLD, the UI language/default encoding of a client, etc). In that case,
-// this should be pulled out of here and moved somewhere in base because there
-// can be other use cases.
-std::string DetectEncoding(const std::string& text) {
- if (IsStringASCII(text))
- return std::string();
- UErrorCode status = U_ZERO_ERROR;
- UCharsetDetector* detector = ucsdet_open(&status);
- ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()),
- &status);
- const UCharsetMatch* match = ucsdet_detect(detector, &status);
- const char* encoding = ucsdet_getName(match, &status);
- ucsdet_close(detector);
- // Should we check the quality of the match? A rather arbitrary number is
- // assigned by ICU and it's hard to come up with a lower limit.
- if (U_FAILURE(status))
- return std::string();
- return encoding;
-}
+string16 ConvertPathToUTF16(const std::string& path) {
+ // Per RFC 2640, FTP servers should use UTF-8 or its proper subset ASCII,
+ // but many old FTP servers use legacy encodings. Try UTF-8 first.
+ if (IsStringUTF8(path))
+ return UTF8ToUTF16(path);
+
+ // Try detecting the encoding. The sample is rather small though, so it may
+ // fail.
+ std::string encoding;
+ if (base::DetectEncoding(path, &encoding) && !encoding.empty()) {
+ string16 path_utf16;
+ if (base::CodepageToUTF16(path, encoding.c_str(),
+ base::OnStringConversionError::SUBSTITUTE,
+ &path_utf16)) {
+ return path_utf16;
+ }
+ }
-string16 RawByteSequenceToFilename(const char* raw_filename,
- const std::string& encoding) {
- if (encoding.empty())
- return ASCIIToUTF16(raw_filename);
-
- // Try the detected encoding before falling back to the native codepage.
- // Using the native codepage does not make much sense, but we don't have
- // much else to resort to.
- string16 filename;
- if (!base::CodepageToUTF16(raw_filename, encoding.c_str(),
- base::OnStringConversionError::SUBSTITUTE,
- &filename))
- filename = WideToUTF16Hack(base::SysNativeMBToWide(raw_filename));
- return filename;
+ // Use system native encoding as the last resort.
+ return WideToUTF16Hack(base::SysNativeMBToWide(path));
}
} // namespace
@@ -111,24 +95,8 @@ void FtpDirectoryListingResponseDelegate::Init() {
UnescapeRule::URL_SPECIAL_CHARS;
std::string unescaped_path = UnescapeURLComponent(response_url.path(),
unescape_rules);
- string16 path_utf16;
- // Per RFC 2640, FTP servers should use UTF-8 or its proper subset ASCII,
- // but many old FTP servers use legacy encodings. Try UTF-8 first and
- // detect the encoding.
- if (IsStringUTF8(unescaped_path)) {
- path_utf16 = UTF8ToUTF16(unescaped_path);
- } else {
- std::string encoding = DetectEncoding(unescaped_path);
- // Try the detected encoding. If it fails, resort to the
- // OS native encoding.
- if (encoding.empty() ||
- !base::CodepageToUTF16(unescaped_path, encoding.c_str(),
- base::OnStringConversionError::SUBSTITUTE,
- &path_utf16))
- path_utf16 = WideToUTF16Hack(base::SysNativeMBToWide(unescaped_path));
- }
-
- SendDataToClient(net::GetDirectoryListingHeader(path_utf16));
+ SendDataToClient(net::GetDirectoryListingHeader(
+ ConvertPathToUTF16(unescaped_path)));
// If this isn't top level directory (i.e. the path isn't "/",)
// add a link to the parent directory.
@@ -138,6 +106,18 @@ void FtpDirectoryListingResponseDelegate::Init() {
}
}
+bool FtpDirectoryListingResponseDelegate::ConvertToServerEncoding(
+ const string16& filename, std::string* raw_bytes) const {
+ if (buffer_.encoding().empty()) {
+ *raw_bytes = std::string();
+ return true;
+ }
+
+ return base::UTF16ToCodepage(filename, buffer_.encoding().c_str(),
+ base::OnStringConversionError::FAIL,
+ raw_bytes);
+}
+
void FtpDirectoryListingResponseDelegate::ProcessReceivedEntries() {
if (!updated_histograms_ && buffer_.EntryAvailable()) {
// Only log the server type if we got enough data to reliably detect it.
@@ -157,8 +137,17 @@ void FtpDirectoryListingResponseDelegate::ProcessReceivedEntries() {
int64 size = entry.size;
if (entry.type != FtpDirectoryListingEntry::FILE)
size = 0;
- SendDataToClient(net::GetDirectoryListingEntry(
- entry.name, std::string(), is_directory, size, entry.last_modified));
+ std::string raw_bytes;
+ if (ConvertToServerEncoding(entry.name, &raw_bytes)) {
+ SendDataToClient(net::GetDirectoryListingEntry(
+ entry.name, raw_bytes, is_directory, size, entry.last_modified));
+ } else {
+ // Consider an encoding problem a non-fatal error. The server's support
+ // for non-ASCII characters might be buggy. Display an error message,
+ // but keep trying to display the rest of the listing (most file names
+ // are ASCII anyway, we could be just unlucky with this one).
+ had_parsing_error_ = true;
+ }
}
}
diff --git a/webkit/glue/ftp_directory_listing_response_delegate.h b/webkit/glue/ftp_directory_listing_response_delegate.h
index 86b5c436..1218da9 100644
--- a/webkit/glue/ftp_directory_listing_response_delegate.h
+++ b/webkit/glue/ftp_directory_listing_response_delegate.h
@@ -33,6 +33,13 @@ class FtpDirectoryListingResponseDelegate {
private:
void Init();
+ // Converts |filename| to detected server encoding and puts the result
+ // in |raw_bytes| (if no conversion is necessary, an empty string is used).
+ // Returns true on success.
+ bool ConvertToServerEncoding(const string16& filename,
+ std::string* raw_bytes) const;
+
+ // Fetches the listing entries from the buffer and sends them to the client.
void ProcessReceivedEntries();
void SendDataToClient(const std::string& data);
diff --git a/webkit/glue/webkit_glue.gypi b/webkit/glue/webkit_glue.gypi
index a0e4437..6cbf8ab 100644
--- a/webkit/glue/webkit_glue.gypi
+++ b/webkit/glue/webkit_glue.gypi
@@ -110,6 +110,7 @@
'msvs_guid': 'C66B126D-0ECE-4CA2-B6DC-FA780AFBBF09',
'dependencies': [
'<(DEPTH)/app/app.gyp:app_base',
+ '<(DEPTH)/base/base.gyp:base_i18n',
'<(DEPTH)/net/net.gyp:net',
'<(DEPTH)/skia/skia.gyp:skia',
'<(DEPTH)/third_party/icu/icu.gyp:icui18n',