summaryrefslogtreecommitdiffstats
path: root/net/base
diff options
context:
space:
mode:
Diffstat (limited to 'net/base')
-rw-r--r--net/base/net_util.cc14
-rw-r--r--net/base/net_util.h25
-rw-r--r--net/base/net_util_unittest.cc66
-rw-r--r--net/base/net_util_win.cc34
4 files changed, 83 insertions, 56 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 2e6292c..00beb4e 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -860,7 +860,7 @@ std::string CanonicalizeHost(const std::wstring& host,
return CanonicalizeHost(converted_host, host_info);
}
-std::string GetDirectoryListingHeader(const std::string& title) {
+std::string GetDirectoryListingHeader(const string16& title) {
static const StringPiece header(NetModule::GetResource(IDR_DIR_HEADER_HTML));
if (header.empty()) {
NOTREACHED() << "expected resource not found";
@@ -874,15 +874,21 @@ std::string GetDirectoryListingHeader(const std::string& title) {
return result;
}
-std::string GetDirectoryListingEntry(const std::string& name,
+std::string GetDirectoryListingEntry(const string16& name,
+ const std::string& raw_bytes,
bool is_dir,
int64 size,
- const Time& modified) {
+ Time modified) {
std::string result;
result.append("<script>addRow(");
string_escape::JsonDoubleQuote(name, true, &result);
result.append(",");
- string_escape::JsonDoubleQuote(EscapePath(name), true, &result);
+ if (raw_bytes.empty()) {
+ string_escape::JsonDoubleQuote(EscapePath(UTF16ToUTF8(name)),
+ true, &result);
+ } else {
+ string_escape::JsonDoubleQuote(EscapePath(raw_bytes), true, &result);
+ }
if (is_dir) {
result.append(",1,");
} else {
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 40df770..4320e1c 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -14,6 +14,7 @@
#include <string>
#include "base/basictypes.h"
+#include "base/string16.h"
#include "net/base/escape.h"
struct addrinfo;
@@ -147,12 +148,24 @@ std::string CanonicalizeHost(const std::string& host,
std::string CanonicalizeHost(const std::wstring& host,
url_canon::CanonHostInfo* host_info);
-// Call these functions to get the html for a directory listing.
-// They will pass non-7bit-ascii characters unescaped, allowing
-// the browser to interpret the encoding (utf8, etc).
-std::string GetDirectoryListingHeader(const std::string& title);
-std::string GetDirectoryListingEntry(const std::string& name, bool is_dir,
- int64 size, const base::Time& modified);
+// Call these functions to get the html snippet for a directory listing.
+// The return values of both functions are in UTF-8.
+std::string GetDirectoryListingHeader(const string16& title);
+
+// Given the name of a file in a directory (ftp or local) and
+// other information (is_dir, size, modification time), it returns
+// the html snippet to add the entry for the file to the directory listing.
+// Currently, it's a script tag containing a call to a Javascript function
+// |addRow|.
+//
+// Its 1st parameter is derived from |name| and is the Javascript-string
+// escaped form of |name| (i.e \uXXXX). The 2nd parameter is the url-escaped
+// |raw_bytes| if it's not empty. If empty, the 2nd parameter is the
+// url-escaped |name| in UTF-8.
+std::string GetDirectoryListingEntry(const string16& name,
+ const std::string& raw_bytes,
+ bool is_dir, int64 size,
+ base::Time modified);
// If text starts with "www." it is removed, otherwise text is returned
// unmodified.
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 78f7ab9..f346e92 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -407,18 +407,32 @@ TEST(NetUtilTest, FileURLConversion) {
"file://some%20computer/foo/bar.txt"}, // UNC
{L"D:\\Name;with%some symbols*#",
"file:///D:/Name%3Bwith%25some%20symbols*%23"},
+ // issue 14153: To be tested with the OS default codepage other than 1252.
+ {L"D:\\latin1\\caf\x00E9\x00DD.txt",
+ "file:///D:/latin1/caf%C3%A9%C3%9D.txt"},
+ {L"D:\\otherlatin\\caf\x0119.txt",
+ "file:///D:/otherlatin/caf%C4%99.txt"},
+ {L"D:\\greek\\\x03B1\x03B2\x03B3.txt",
+ "file:///D:/greek/%CE%B1%CE%B2%CE%B3.txt"},
{L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
"file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91"
"%E9%A1%B5.doc"},
+ {L"D:\\plane1\\\xD835\xDC00\xD835\xDC01.txt", // Math alphabet "AB"
+ "file:///D:/plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
#elif defined(OS_POSIX)
{L"/foo/bar.txt", "file:///foo/bar.txt"},
{L"/foo/BAR.txt", "file:///foo/BAR.txt"},
{L"/C:/foo/bar.txt", "file:///C:/foo/bar.txt"},
{L"/some computer/foo/bar.txt", "file:///some%20computer/foo/bar.txt"},
{L"/Name;with%some symbols*#", "file:///Name%3Bwith%25some%20symbols*%23"},
+ {L"/latin1/caf\x00E9\x00DD.txt", "file:///latin1/caf%C3%A9%C3%9D.txt"},
+ {L"/otherlatin/caf\x0119.txt", "file:///otherlatin/caf%C4%99.txt"},
+ {L"/greek/\x03B1\x03B2\x03B3.txt", "file:///greek/%CE%B1%CE%B2%CE%B3.txt"},
{L"/Chinese/\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
"file:///Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD"
"%91%E9%A1%B5.doc"},
+ {L"/plane1/\x1D400\x1D401.txt", // Math alphabet "AB"
+ "file:///plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
#endif
};
@@ -474,21 +488,6 @@ TEST(NetUtilTest, FileURLConversion) {
EXPECT_EQ(url_cases[i].file, output.ToWStringHack());
}
- // Here, we test that UTF-8 encoded strings get decoded properly, even when
- // they might be stored with wide characters. On posix systems, just treat
- // this as a stream of bytes.
- const wchar_t utf8[] = L"file:///d:/Chinese/\xe6\x89\x80\xe6\x9c\x89\xe4\xb8"
- L"\xad\xe6\x96\x87\xe7\xbd\x91\xe9\xa1\xb5.doc";
-#if defined(OS_WIN)
- const wchar_t wide[] =
- L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc";
-#elif defined(OS_POSIX)
- const wchar_t wide[] = L"/d:/Chinese/\xe6\x89\x80\xe6\x9c\x89\xe4\xb8\xad\xe6"
- L"\x96\x87\xe7\xbd\x91\xe9\xa1\xb5.doc";
-#endif
- EXPECT_TRUE(net::FileURLToFilePath(GURL(WideToUTF8(utf8)), &output));
- EXPECT_EQ(wide, output.ToWStringHack());
-
// Unfortunately, UTF8ToWide discards invalid UTF8 input.
#ifdef BUG_878908_IS_FIXED
// Test that no conversion happens if the UTF-8 input is invalid, and that
@@ -862,7 +861,8 @@ TEST(NetUtilTest, GetSuggestedFilename) {
namespace {
struct GetDirectoryListingEntryCase {
- const char* name;
+ const wchar_t* name;
+ const char* raw_bytes;
bool is_dir;
int64 filesize;
base::Time time;
@@ -872,22 +872,50 @@ struct GetDirectoryListingEntryCase {
} // namespace
TEST(NetUtilTest, GetDirectoryListingEntry) {
const GetDirectoryListingEntryCase test_cases[] = {
- {"Foo",
+ {L"Foo",
+ "",
false,
10000,
base::Time(),
"<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
- {"quo\"tes",
+ {L"quo\"tes",
+ "",
+ false,
+ 10000,
+ base::Time(),
+ "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
+ "\n"},
+ {L"quo\"tes",
+ "quo\"tes",
false,
10000,
base::Time(),
"<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
"\n"},
+ // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
+ // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
+ {L"\xD55C\xAE00.txt",
+ "",
+ false,
+ 10000,
+ base::Time(),
+ "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%ED%95%9C%EA%B8%80.txt\""
+ ",0,\"9.8 kB\",\"\");</script>\n"},
+ // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
+ // a local or remote file in EUC-KR.
+ {L"\xD55C\xAE00.txt",
+ "\xC7\xD1\xB1\xDB.txt",
+ false,
+ 10000,
+ base::Time(),
+ "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%C7%D1%B1%DB.txt\""
+ ",0,\"9.8 kB\",\"\");</script>\n"},
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
const std::string results = net::GetDirectoryListingEntry(
- test_cases[i].name,
+ WideToUTF16(test_cases[i].name),
+ test_cases[i].raw_bytes,
test_cases[i].is_dir,
test_cases[i].filesize,
test_cases[i].time);
diff --git a/net/base/net_util_win.cc b/net/base/net_util_win.cc
index effb212..244f4ad 100644
--- a/net/base/net_util_win.cc
+++ b/net/base/net_util_win.cc
@@ -57,33 +57,13 @@ bool FileURLToFilePath(const GURL& url, FilePath* file_path) {
}
file_path_str.assign(UTF8ToWide(path));
- // Now we have an unescaped filename, but are still not sure about its
- // encoding. For example, each character could be part of a UTF-8 string.
- if (file_path_str.empty() || !IsString8Bit(file_path_str)) {
- // assume our 16-bit encoding is correct if it won't fit into an 8-bit
- // string
- return true;
- }
-
- // Convert our narrow string into the native wide path.
- std::string narrow;
- if (!WideToLatin1(file_path_str, &narrow)) {
- NOTREACHED() << "Should have filtered out non-8-bit strings above.";
- return false;
- }
- if (IsStringUTF8(narrow)) {
- // Our string actually looks like it could be UTF-8, convert to 8-bit
- // UTF-8 and then to the corresponding wide string.
- file_path_str = UTF8ToWide(narrow);
- } else {
- // Our wide string contains only 8-bit characters and it's not UTF-8, so
- // we assume it's in the native codepage.
- file_path_str = base::SysNativeMBToWide(narrow);
- }
-
- // Fail if 8-bit -> wide conversion failed and gave us an empty string back
- // (we already filtered out empty strings above).
- return !file_path_str.empty();
+ // We used to try too hard and see if |path| made up entirely of
+ // the 1st 256 characters in the Unicode was a zero-extended UTF-16.
+ // If so, we converted it to 'Latin-1' and checked if the result was UTF-8.
+ // If the check passed, we converted the result to UTF-8.
+ // Otherwise, we treated the result as the native OS encoding.
+ // However, that led to http://crbug.com/4619 and http://crbug.com/14153
+ return true;
}
} // namespace net