diff options
author | estade@chromium.org <estade@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-06-11 00:08:35 +0000 |
---|---|---|
committer | estade@chromium.org <estade@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-06-11 00:08:35 +0000 |
commit | c82ab8a9ae4f81478d68a274e59ee030d272d1c3 (patch) | |
tree | 05529d78717874730b50ef6a9753229cece469db /net | |
parent | f45213772a95b2b20609546482702504c1f6acb9 (diff) | |
download | chromium_src-c82ab8a9ae4f81478d68a274e59ee030d272d1c3.zip chromium_src-c82ab8a9ae4f81478d68a274e59ee030d272d1c3.tar.gz chromium_src-c82ab8a9ae4f81478d68a274e59ee030d272d1c3.tar.bz2 |
Download filename encoding fix [try2]:
Fall back on other charsets when using the URL to suggest a filename. The URL should be escaped UTF-8, but may use another encoding instead.
Check for empty string in DecodeWord (avoids crash).
BUG=44301
TEST=see bug. Also, net_unittests.
Review URL: http://codereview.chromium.org/2733005
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@49475 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/base/net_util.cc | 24 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 17 |
2 files changed, 37 insertions, 4 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 60f626d4..5eb2e68 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -272,6 +272,11 @@ bool DecodeWord(const std::string& encoded_word, const std::string& referrer_charset, bool* is_rfc2047, std::string* output) { + *is_rfc2047 = false; + output->clear(); + if (encoded_word.empty()) + return true; + if (!IsStringASCII(encoded_word)) { // Try UTF-8, referrer_charset and the native OS default charset in turn. if (IsStringUTF8(encoded_word)) { @@ -287,7 +292,7 @@ bool DecodeWord(const std::string& encoded_word, *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); } } - *is_rfc2047 = false; + return true; } @@ -1081,7 +1086,7 @@ FilePath GetSuggestedFilename(const GURL& url, } const std::string filename_from_cd = GetFileNameFromCD(content_disposition, - referrer_charset); + referrer_charset); #if defined(OS_WIN) FilePath::StringType filename = UTF8ToWide(filename_from_cd); #elif defined(OS_POSIX) @@ -1102,10 +1107,21 @@ FilePath GetSuggestedFilename(const GURL& url, const std::string unescaped_url_filename = UnescapeURLComponent( url.ExtractFileName(), UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); + + // The URL's path should be escaped UTF-8, but may not be. + std::string decoded_filename = unescaped_url_filename; + if (!IsStringASCII(decoded_filename)) { + bool ignore; + // TODO(jshin): this is probably not robust enough. To be sure, we + // need encoding detection. + DecodeWord(unescaped_url_filename, referrer_charset, &ignore, + &decoded_filename); + } + #if defined(OS_WIN) - filename = UTF8ToWide(unescaped_url_filename); + filename = UTF8ToWide(decoded_filename); #elif defined(OS_POSIX) - filename = unescaped_url_filename; + filename = decoded_filename; #endif } } diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index dff3d1c..9bc5d58 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -1060,6 +1060,23 @@ TEST(NetUtilTest, GetSuggestedFilename) { "", L"", L"test"}, + // The filename encoding is specified by the referrer charset. + {"http://example.com/V%FDvojov%E1%20psychologie.doc", + "", + "iso-8859-1", + L"", + L"V\u00fdvojov\u00e1 psychologie.doc"}, + // The filename encoding doesn't match the referrer charset, the + // system charset, or UTF-8. + // TODO(jshin): we need to handle this case. +#if 0 + {"http://example.com/V%FDvojov%E1%20psychologie.doc", + "", + "utf-8", + L"", + L"V\u00fdvojov\u00e1 psychologie.doc", + }, +#endif }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { #if defined(OS_WIN) |