4 files changed, 83 insertions, 56 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 2e6292c..00beb4e 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -860,7 +860,7 @@ std::string CanonicalizeHost(const std::wstring& host,
   return CanonicalizeHost(converted_host, host_info);
 }
 
-std::string GetDirectoryListingHeader(const std::string& title) {
+std::string GetDirectoryListingHeader(const string16& title) {
   static const StringPiece header(NetModule::GetResource(IDR_DIR_HEADER_HTML));
   if (header.empty()) {
     NOTREACHED() << "expected resource not found";
@@ -874,15 +874,21 @@ std::string GetDirectoryListingHeader(const std::string& title) {
   return result;
 }
 
-std::string GetDirectoryListingEntry(const std::string& name,
+std::string GetDirectoryListingEntry(const string16& name,
+                                     const std::string& raw_bytes,
                                      bool is_dir,
                                      int64 size,
-                                     const Time& modified) {
+                                     Time modified) {
   std::string result;
   result.append("<script>addRow(");
   string_escape::JsonDoubleQuote(name, true, &result);
   result.append(",");
-  string_escape::JsonDoubleQuote(EscapePath(name), true, &result);
+  if (raw_bytes.empty()) {
+    string_escape::JsonDoubleQuote(EscapePath(UTF16ToUTF8(name)),
+                                   true, &result);
+  } else {
+    string_escape::JsonDoubleQuote(EscapePath(raw_bytes), true, &result);
+  }
   if (is_dir) {
     result.append(",1,");
   } else {
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 40df770..4320e1c 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -14,6 +14,7 @@
 #include <string>
 
 #include "base/basictypes.h"
+#include "base/string16.h"
 #include "net/base/escape.h"
 
 struct addrinfo;
@@ -147,12 +148,24 @@ std::string CanonicalizeHost(const std::string& host,
 std::string CanonicalizeHost(const std::wstring& host,
                              url_canon::CanonHostInfo* host_info);
 
-// Call these functions to get the html for a directory listing.
-// They will pass non-7bit-ascii characters unescaped, allowing
-// the browser to interpret the encoding (utf8, etc).
-std::string GetDirectoryListingHeader(const std::string& title);
-std::string GetDirectoryListingEntry(const std::string& name, bool is_dir,
-                                     int64 size, const base::Time& modified);
+// Call these functions to get the html snippet for a directory listing.
+// The return values of both functions are in UTF-8.
+std::string GetDirectoryListingHeader(const string16& title);
+
+// Given the name of a file in a directory (ftp or local) and
+// other information (is_dir, size, modification time), it returns
+// the html snippet to add the entry for the file to the directory listing.
+// Currently, it's a script tag containing a call to a Javascript function
+// |addRow|.
+//
+// Its 1st parameter is derived from |name| and is the Javascript-string
+// escaped form of |name| (i.e \uXXXX). The 2nd parameter is the url-escaped
+// |raw_bytes| if it's not empty. If empty, the 2nd parameter is the
+// url-escaped |name| in UTF-8.
+std::string GetDirectoryListingEntry(const string16& name,
+                                     const std::string& raw_bytes,
+                                     bool is_dir, int64 size,
+                                     base::Time modified);
 
 // If text starts with "www." it is removed, otherwise text is returned
 // unmodified.
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 78f7ab9..f346e92 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -407,18 +407,32 @@ TEST(NetUtilTest, FileURLConversion) {
      "file://some%20computer/foo/bar.txt"}, // UNC
     {L"D:\\Name;with%some symbols*#",
      "file:///D:/Name%3Bwith%25some%20symbols*%23"},
+    // issue 14153: To be tested with the OS default codepage other than 1252.
+    {L"D:\\latin1\\caf\x00E9\x00DD.txt",
+     "file:///D:/latin1/caf%C3%A9%C3%9D.txt"},
+    {L"D:\\otherlatin\\caf\x0119.txt",
+     "file:///D:/otherlatin/caf%C4%99.txt"},
+    {L"D:\\greek\\\x03B1\x03B2\x03B3.txt",
+     "file:///D:/greek/%CE%B1%CE%B2%CE%B3.txt"},
     {L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
      "file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91"
          "%E9%A1%B5.doc"},
+    {L"D:\\plane1\\\xD835\xDC00\xD835\xDC01.txt",  // Math alphabet "AB"
+     "file:///D:/plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
 #elif defined(OS_POSIX)
     {L"/foo/bar.txt", "file:///foo/bar.txt"},
     {L"/foo/BAR.txt", "file:///foo/BAR.txt"},
     {L"/C:/foo/bar.txt", "file:///C:/foo/bar.txt"},
     {L"/some computer/foo/bar.txt", "file:///some%20computer/foo/bar.txt"},
     {L"/Name;with%some symbols*#", "file:///Name%3Bwith%25some%20symbols*%23"},
+    {L"/latin1/caf\x00E9\x00DD.txt", "file:///latin1/caf%C3%A9%C3%9D.txt"},
+    {L"/otherlatin/caf\x0119.txt", "file:///otherlatin/caf%C4%99.txt"},
+    {L"/greek/\x03B1\x03B2\x03B3.txt", "file:///greek/%CE%B1%CE%B2%CE%B3.txt"},
     {L"/Chinese/\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
      "file:///Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD"
          "%91%E9%A1%B5.doc"},
+    {L"/plane1/\x1D400\x1D401.txt",  // Math alphabet "AB"
+     "file:///plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
 #endif
   };
 
@@ -474,21 +488,6 @@ TEST(NetUtilTest, FileURLConversion) {
     EXPECT_EQ(url_cases[i].file, output.ToWStringHack());
   }
 
-  // Here, we test that UTF-8 encoded strings get decoded properly, even when
-  // they might be stored with wide characters.  On posix systems, just treat
-  // this as a stream of bytes.
-  const wchar_t utf8[] = L"file:///d:/Chinese/\xe6\x89\x80\xe6\x9c\x89\xe4\xb8"
-                         L"\xad\xe6\x96\x87\xe7\xbd\x91\xe9\xa1\xb5.doc";
-#if defined(OS_WIN)
-  const wchar_t wide[] =
-      L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc";
-#elif defined(OS_POSIX)
-  const wchar_t wide[] = L"/d:/Chinese/\xe6\x89\x80\xe6\x9c\x89\xe4\xb8\xad\xe6"
-                         L"\x96\x87\xe7\xbd\x91\xe9\xa1\xb5.doc";
-#endif
-  EXPECT_TRUE(net::FileURLToFilePath(GURL(WideToUTF8(utf8)), &output));
-  EXPECT_EQ(wide, output.ToWStringHack());
-
   // Unfortunately, UTF8ToWide discards invalid UTF8 input.
 #ifdef BUG_878908_IS_FIXED
   // Test that no conversion happens if the UTF-8 input is invalid, and that
@@ -862,7 +861,8 @@ TEST(NetUtilTest, GetSuggestedFilename) {
 namespace {
 
 struct GetDirectoryListingEntryCase {
-  const char* name;
+  const wchar_t* name;
+  const char* raw_bytes;
   bool is_dir;
   int64 filesize;
   base::Time time;
@@ -872,22 +872,50 @@ struct GetDirectoryListingEntryCase {
 }  // namespace
 TEST(NetUtilTest, GetDirectoryListingEntry) {
   const GetDirectoryListingEntryCase test_cases[] = {
-    {"Foo",
+    {L"Foo",
+     "",
      false,
      10000,
      base::Time(),
      "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
-    {"quo\"tes",
+    {L"quo\"tes",
+     "",
+     false,
+     10000,
+     base::Time(),
+     "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
+         "\n"},
+    {L"quo\"tes",
+     "quo\"tes",
      false,
      10000,
      base::Time(),
      "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
          "\n"},
+    // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
+    // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
+    {L"\xD55C\xAE00.txt",
+     "",
+     false,
+     10000,
+     base::Time(),
+     "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%ED%95%9C%EA%B8%80.txt\""
+         ",0,\"9.8 kB\",\"\");</script>\n"},
+    // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
+    // a local or remote file in EUC-KR.
+    {L"\xD55C\xAE00.txt",
+     "\xC7\xD1\xB1\xDB.txt",
+     false,
+     10000,
+     base::Time(),
+     "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%C7%D1%B1%DB.txt\""
+         ",0,\"9.8 kB\",\"\");</script>\n"},
   };
 
   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
     const std::string results = net::GetDirectoryListingEntry(
-        test_cases[i].name,
+        WideToUTF16(test_cases[i].name),
+        test_cases[i].raw_bytes,
         test_cases[i].is_dir,
         test_cases[i].filesize,
         test_cases[i].time);
diff --git a/net/base/net_util_win.cc b/net/base/net_util_win.cc
index effb212..244f4ad 100644
--- a/net/base/net_util_win.cc
+++ b/net/base/net_util_win.cc
@@ -57,33 +57,13 @@ bool FileURLToFilePath(const GURL& url, FilePath* file_path) {
   }
   file_path_str.assign(UTF8ToWide(path));
 
-  // Now we have an unescaped filename, but are still not sure about its
-  // encoding. For example, each character could be part of a UTF-8 string.
-  if (file_path_str.empty() || !IsString8Bit(file_path_str)) {
-    // assume our 16-bit encoding is correct if it won't fit into an 8-bit
-    // string
-    return true;
-  }
-
-  // Convert our narrow string into the native wide path.
-  std::string narrow;
-  if (!WideToLatin1(file_path_str, &narrow)) {
-    NOTREACHED() << "Should have filtered out non-8-bit strings above.";
-    return false;
-  }
-  if (IsStringUTF8(narrow)) {
-    // Our string actually looks like it could be UTF-8, convert to 8-bit
-    // UTF-8 and then to the corresponding wide string.
-    file_path_str = UTF8ToWide(narrow);
-  } else {
-    // Our wide string contains only 8-bit characters and it's not UTF-8, so
-    // we assume it's in the native codepage.
-    file_path_str = base::SysNativeMBToWide(narrow);
-  }
-
-  // Fail if 8-bit -> wide conversion failed and gave us an empty string back
-  // (we already filtered out empty strings above).
-  return !file_path_str.empty();
+  // We used to try too hard and see if |path| made up entirely of
+  // the 1st 256 characters in the Unicode was a zero-extended UTF-16.
+  // If so, we converted it to 'Latin-1' and checked if the result was UTF-8.
+  // If the check passed, we converted the result to UTF-8.
+  // Otherwise, we treated the result as the native OS encoding.
+  // However, that led to http://crbug.com/4619 and http://crbug.com/14153
+  return true;
 }
 
 }  // namespace net