diff options
Diffstat (limited to 'net/base')
-rw-r--r-- | net/base/net_util.cc | 14 | ||||
-rw-r--r-- | net/base/net_util.h | 25 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 66 | ||||
-rw-r--r-- | net/base/net_util_win.cc | 34 |
4 files changed, 83 insertions, 56 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 2e6292c..00beb4e 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -860,7 +860,7 @@ std::string CanonicalizeHost(const std::wstring& host, return CanonicalizeHost(converted_host, host_info); } -std::string GetDirectoryListingHeader(const std::string& title) { +std::string GetDirectoryListingHeader(const string16& title) { static const StringPiece header(NetModule::GetResource(IDR_DIR_HEADER_HTML)); if (header.empty()) { NOTREACHED() << "expected resource not found"; @@ -874,15 +874,21 @@ std::string GetDirectoryListingHeader(const std::string& title) { return result; } -std::string GetDirectoryListingEntry(const std::string& name, +std::string GetDirectoryListingEntry(const string16& name, + const std::string& raw_bytes, bool is_dir, int64 size, - const Time& modified) { + Time modified) { std::string result; result.append("<script>addRow("); string_escape::JsonDoubleQuote(name, true, &result); result.append(","); - string_escape::JsonDoubleQuote(EscapePath(name), true, &result); + if (raw_bytes.empty()) { + string_escape::JsonDoubleQuote(EscapePath(UTF16ToUTF8(name)), + true, &result); + } else { + string_escape::JsonDoubleQuote(EscapePath(raw_bytes), true, &result); + } if (is_dir) { result.append(",1,"); } else { diff --git a/net/base/net_util.h b/net/base/net_util.h index 40df770..4320e1c 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -14,6 +14,7 @@ #include <string> #include "base/basictypes.h" +#include "base/string16.h" #include "net/base/escape.h" struct addrinfo; @@ -147,12 +148,24 @@ std::string CanonicalizeHost(const std::string& host, std::string CanonicalizeHost(const std::wstring& host, url_canon::CanonHostInfo* host_info); -// Call these functions to get the html for a directory listing. -// They will pass non-7bit-ascii characters unescaped, allowing -// the browser to interpret the encoding (utf8, etc). -std::string GetDirectoryListingHeader(const std::string& title); -std::string GetDirectoryListingEntry(const std::string& name, bool is_dir, - int64 size, const base::Time& modified); +// Call these functions to get the html snippet for a directory listing. +// The return values of both functions are in UTF-8. +std::string GetDirectoryListingHeader(const string16& title); + +// Given the name of a file in a directory (ftp or local) and +// other information (is_dir, size, modification time), it returns +// the html snippet to add the entry for the file to the directory listing. +// Currently, it's a script tag containing a call to a Javascript function +// |addRow|. +// +// Its 1st parameter is derived from |name| and is the Javascript-string +// escaped form of |name| (i.e \uXXXX). The 2nd parameter is the url-escaped +// |raw_bytes| if it's not empty. If empty, the 2nd parameter is the +// url-escaped |name| in UTF-8. +std::string GetDirectoryListingEntry(const string16& name, + const std::string& raw_bytes, + bool is_dir, int64 size, + base::Time modified); // If text starts with "www." it is removed, otherwise text is returned // unmodified. diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 78f7ab9..f346e92 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -407,18 +407,32 @@ TEST(NetUtilTest, FileURLConversion) { "file://some%20computer/foo/bar.txt"}, // UNC {L"D:\\Name;with%some symbols*#", "file:///D:/Name%3Bwith%25some%20symbols*%23"}, + // issue 14153: To be tested with the OS default codepage other than 1252. + {L"D:\\latin1\\caf\x00E9\x00DD.txt", + "file:///D:/latin1/caf%C3%A9%C3%9D.txt"}, + {L"D:\\otherlatin\\caf\x0119.txt", + "file:///D:/otherlatin/caf%C4%99.txt"}, + {L"D:\\greek\\\x03B1\x03B2\x03B3.txt", + "file:///D:/greek/%CE%B1%CE%B2%CE%B3.txt"}, {L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc", "file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91" "%E9%A1%B5.doc"}, + {L"D:\\plane1\\\xD835\xDC00\xD835\xDC01.txt", // Math alphabet "AB" + "file:///D:/plane1/%F0%9D%90%80%F0%9D%90%81.txt"}, #elif defined(OS_POSIX) {L"/foo/bar.txt", "file:///foo/bar.txt"}, {L"/foo/BAR.txt", "file:///foo/BAR.txt"}, {L"/C:/foo/bar.txt", "file:///C:/foo/bar.txt"}, {L"/some computer/foo/bar.txt", "file:///some%20computer/foo/bar.txt"}, {L"/Name;with%some symbols*#", "file:///Name%3Bwith%25some%20symbols*%23"}, + {L"/latin1/caf\x00E9\x00DD.txt", "file:///latin1/caf%C3%A9%C3%9D.txt"}, + {L"/otherlatin/caf\x0119.txt", "file:///otherlatin/caf%C4%99.txt"}, + {L"/greek/\x03B1\x03B2\x03B3.txt", "file:///greek/%CE%B1%CE%B2%CE%B3.txt"}, {L"/Chinese/\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc", "file:///Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD" "%91%E9%A1%B5.doc"}, + {L"/plane1/\x1D400\x1D401.txt", // Math alphabet "AB" + "file:///plane1/%F0%9D%90%80%F0%9D%90%81.txt"}, #endif }; @@ -474,21 +488,6 @@ TEST(NetUtilTest, FileURLConversion) { EXPECT_EQ(url_cases[i].file, output.ToWStringHack()); } - // Here, we test that UTF-8 encoded strings get decoded properly, even when - // they might be stored with wide characters. On posix systems, just treat - // this as a stream of bytes. - const wchar_t utf8[] = L"file:///d:/Chinese/\xe6\x89\x80\xe6\x9c\x89\xe4\xb8" - L"\xad\xe6\x96\x87\xe7\xbd\x91\xe9\xa1\xb5.doc"; -#if defined(OS_WIN) - const wchar_t wide[] = - L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc"; -#elif defined(OS_POSIX) - const wchar_t wide[] = L"/d:/Chinese/\xe6\x89\x80\xe6\x9c\x89\xe4\xb8\xad\xe6" - L"\x96\x87\xe7\xbd\x91\xe9\xa1\xb5.doc"; -#endif - EXPECT_TRUE(net::FileURLToFilePath(GURL(WideToUTF8(utf8)), &output)); - EXPECT_EQ(wide, output.ToWStringHack()); - // Unfortunately, UTF8ToWide discards invalid UTF8 input. #ifdef BUG_878908_IS_FIXED // Test that no conversion happens if the UTF-8 input is invalid, and that @@ -862,7 +861,8 @@ TEST(NetUtilTest, GetSuggestedFilename) { namespace { struct GetDirectoryListingEntryCase { - const char* name; + const wchar_t* name; + const char* raw_bytes; bool is_dir; int64 filesize; base::Time time; @@ -872,22 +872,50 @@ struct GetDirectoryListingEntryCase { } // namespace TEST(NetUtilTest, GetDirectoryListingEntry) { const GetDirectoryListingEntryCase test_cases[] = { - {"Foo", + {L"Foo", + "", false, 10000, base::Time(), "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"}, - {"quo\"tes", + {L"quo\"tes", + "", + false, + 10000, + base::Time(), + "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>" + "\n"}, + {L"quo\"tes", + "quo\"tes", false, 10000, base::Time(), "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>" "\n"}, + // U+D55C0 U+AE00. raw_bytes is empty (either a local file with + // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8 + {L"\xD55C\xAE00.txt", + "", + false, + 10000, + base::Time(), + "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%ED%95%9C%EA%B8%80.txt\"" + ",0,\"9.8 kB\",\"\");</script>\n"}, + // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence: + // a local or remote file in EUC-KR. + {L"\xD55C\xAE00.txt", + "\xC7\xD1\xB1\xDB.txt", + false, + 10000, + base::Time(), + "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%C7%D1%B1%DB.txt\"" + ",0,\"9.8 kB\",\"\");</script>\n"}, }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { const std::string results = net::GetDirectoryListingEntry( - test_cases[i].name, + WideToUTF16(test_cases[i].name), + test_cases[i].raw_bytes, test_cases[i].is_dir, test_cases[i].filesize, test_cases[i].time); diff --git a/net/base/net_util_win.cc b/net/base/net_util_win.cc index effb212..244f4ad 100644 --- a/net/base/net_util_win.cc +++ b/net/base/net_util_win.cc @@ -57,33 +57,13 @@ bool FileURLToFilePath(const GURL& url, FilePath* file_path) { } file_path_str.assign(UTF8ToWide(path)); - // Now we have an unescaped filename, but are still not sure about its - // encoding. For example, each character could be part of a UTF-8 string. - if (file_path_str.empty() || !IsString8Bit(file_path_str)) { - // assume our 16-bit encoding is correct if it won't fit into an 8-bit - // string - return true; - } - - // Convert our narrow string into the native wide path. - std::string narrow; - if (!WideToLatin1(file_path_str, &narrow)) { - NOTREACHED() << "Should have filtered out non-8-bit strings above."; - return false; - } - if (IsStringUTF8(narrow)) { - // Our string actually looks like it could be UTF-8, convert to 8-bit - // UTF-8 and then to the corresponding wide string. - file_path_str = UTF8ToWide(narrow); - } else { - // Our wide string contains only 8-bit characters and it's not UTF-8, so - // we assume it's in the native codepage. - file_path_str = base::SysNativeMBToWide(narrow); - } - - // Fail if 8-bit -> wide conversion failed and gave us an empty string back - // (we already filtered out empty strings above). - return !file_path_str.empty(); + // We used to try too hard and see if |path| made up entirely of + // the 1st 256 characters in the Unicode was a zero-extended UTF-16. + // If so, we converted it to 'Latin-1' and checked if the result was UTF-8. + // If the check passed, we converted the result to UTF-8. + // Otherwise, we treated the result as the native OS encoding. + // However, that led to http://crbug.com/4619 and http://crbug.com/14153 + return true; } } // namespace net |