summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorjshin@chromium.org <jshin@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-07-09 22:48:16 +0000
committerjshin@chromium.org <jshin@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-07-09 22:48:16 +0000
commit5420bc1e4fa6d861107a5c847843ac7bd25fb3c4 (patch)
tree7cf5fdfbbb128ec57462450e3c7167e017351bfd /net
parent8f82f9d9ae8dfd23ab63fb9e63c6246da71d29fd (diff)
downloadchromium_src-5420bc1e4fa6d861107a5c847843ac7bd25fb3c4.zip
chromium_src-5420bc1e4fa6d861107a5c847843ac7bd25fb3c4.tar.gz
chromium_src-5420bc1e4fa6d861107a5c847843ac7bd25fb3c4.tar.bz2
Fix the local directory listing, FTP directory listing and the local file handling (drag'n'drop and opening from the file list).
For the local file listing, use the OS file system encoding. For the FTP directory listing, use ICU's encoding detector.GetDirectoryListingEntry and GetDirectoryLisingHeader were changed to accept string16 for file/directory names. To the former, a new parameter (|raw_bytes|) was added. It can be used to make a FTP request to a file with a non-ASCII name encoded in a legacy encoding. For the local file handling on Windows, get rid of the code for 'doubly converted' UTF-8 in FileURLToFilePath, which led to issue 4619 and add a few cases to NetUtil*.FileURLConversion* test. In addition, add CodepageToUTF16 and UTF16ToCodepage along with a new unittest (ConvertBetweenCodepageAndUTF16) that shares the same set of case as ConvertBetweenCodepageAndWide. The test cases were expanded and revised a bit. BUG=2939,13229,4619 http://crbug.com/2939 http://crbug.com/13229 http://crbug.com/4619 TEST=1. Pass URLRequest*.FTP* (net_unittests) 2. Pass StringUtiltTest.ConvertBetweenCode* 3. Pass NetUtil*.GetDirectoryLis* (net_unittests) 4. Open a local directory containing files with non-ASCII names and they're displayed correctly in the directory list. On Windows and Mac OS X, it should always work. On Linux, your locale encoding (as returned by nl_langinfo(CODESET)) should match the actual encoding used in your filename. 5a. Pass NetUtil*.FileURL* (net_unittests) with the default codepage set to 1252 and 932. 5b. Make a file named 'caf챕.txt' on Windows and see if it can be opened both by clicking in the directory listing page of Chrome and by drag'n'drop. Test this with the default OS code pages set to Windows-1252, Windows-1251 (Russian) and Windows-932 (Japanese). Review URL: http://codereview.chromium.org/151065 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@20331 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r--net/base/net_util.cc14
-rw-r--r--net/base/net_util.h25
-rw-r--r--net/base/net_util_unittest.cc66
-rw-r--r--net/base/net_util_win.cc34
-rw-r--r--net/url_request/url_request_file_dir_job.cc17
-rw-r--r--net/url_request/url_request_ftp_job.cc31
-rw-r--r--net/url_request/url_request_new_ftp_job.cc90
-rw-r--r--net/url_request/url_request_new_ftp_job.h1
8 files changed, 204 insertions, 74 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 2e6292c..00beb4e 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -860,7 +860,7 @@ std::string CanonicalizeHost(const std::wstring& host,
return CanonicalizeHost(converted_host, host_info);
}
-std::string GetDirectoryListingHeader(const std::string& title) {
+std::string GetDirectoryListingHeader(const string16& title) {
static const StringPiece header(NetModule::GetResource(IDR_DIR_HEADER_HTML));
if (header.empty()) {
NOTREACHED() << "expected resource not found";
@@ -874,15 +874,21 @@ std::string GetDirectoryListingHeader(const std::string& title) {
return result;
}
-std::string GetDirectoryListingEntry(const std::string& name,
+std::string GetDirectoryListingEntry(const string16& name,
+ const std::string& raw_bytes,
bool is_dir,
int64 size,
- const Time& modified) {
+ Time modified) {
std::string result;
result.append("<script>addRow(");
string_escape::JsonDoubleQuote(name, true, &result);
result.append(",");
- string_escape::JsonDoubleQuote(EscapePath(name), true, &result);
+ if (raw_bytes.empty()) {
+ string_escape::JsonDoubleQuote(EscapePath(UTF16ToUTF8(name)),
+ true, &result);
+ } else {
+ string_escape::JsonDoubleQuote(EscapePath(raw_bytes), true, &result);
+ }
if (is_dir) {
result.append(",1,");
} else {
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 40df770..4320e1c 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -14,6 +14,7 @@
#include <string>
#include "base/basictypes.h"
+#include "base/string16.h"
#include "net/base/escape.h"
struct addrinfo;
@@ -147,12 +148,24 @@ std::string CanonicalizeHost(const std::string& host,
std::string CanonicalizeHost(const std::wstring& host,
url_canon::CanonHostInfo* host_info);
-// Call these functions to get the html for a directory listing.
-// They will pass non-7bit-ascii characters unescaped, allowing
-// the browser to interpret the encoding (utf8, etc).
-std::string GetDirectoryListingHeader(const std::string& title);
-std::string GetDirectoryListingEntry(const std::string& name, bool is_dir,
- int64 size, const base::Time& modified);
+// Call these functions to get the html snippet for a directory listing.
+// The return values of both functions are in UTF-8.
+std::string GetDirectoryListingHeader(const string16& title);
+
+// Given the name of a file in a directory (ftp or local) and
+// other information (is_dir, size, modification time), it returns
+// the html snippet to add the entry for the file to the directory listing.
+// Currently, it's a script tag containing a call to a Javascript function
+// |addRow|.
+//
+// Its 1st parameter is derived from |name| and is the Javascript-string
+// escaped form of |name| (i.e \uXXXX). The 2nd parameter is the url-escaped
+// |raw_bytes| if it's not empty. If empty, the 2nd parameter is the
+// url-escaped |name| in UTF-8.
+std::string GetDirectoryListingEntry(const string16& name,
+ const std::string& raw_bytes,
+ bool is_dir, int64 size,
+ base::Time modified);
// If text starts with "www." it is removed, otherwise text is returned
// unmodified.
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 78f7ab9..f346e92 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -407,18 +407,32 @@ TEST(NetUtilTest, FileURLConversion) {
"file://some%20computer/foo/bar.txt"}, // UNC
{L"D:\\Name;with%some symbols*#",
"file:///D:/Name%3Bwith%25some%20symbols*%23"},
+ // issue 14153: To be tested with the OS default codepage other than 1252.
+ {L"D:\\latin1\\caf\x00E9\x00DD.txt",
+ "file:///D:/latin1/caf%C3%A9%C3%9D.txt"},
+ {L"D:\\otherlatin\\caf\x0119.txt",
+ "file:///D:/otherlatin/caf%C4%99.txt"},
+ {L"D:\\greek\\\x03B1\x03B2\x03B3.txt",
+ "file:///D:/greek/%CE%B1%CE%B2%CE%B3.txt"},
{L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
"file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91"
"%E9%A1%B5.doc"},
+ {L"D:\\plane1\\\xD835\xDC00\xD835\xDC01.txt", // Math alphabet "AB"
+ "file:///D:/plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
#elif defined(OS_POSIX)
{L"/foo/bar.txt", "file:///foo/bar.txt"},
{L"/foo/BAR.txt", "file:///foo/BAR.txt"},
{L"/C:/foo/bar.txt", "file:///C:/foo/bar.txt"},
{L"/some computer/foo/bar.txt", "file:///some%20computer/foo/bar.txt"},
{L"/Name;with%some symbols*#", "file:///Name%3Bwith%25some%20symbols*%23"},
+ {L"/latin1/caf\x00E9\x00DD.txt", "file:///latin1/caf%C3%A9%C3%9D.txt"},
+ {L"/otherlatin/caf\x0119.txt", "file:///otherlatin/caf%C4%99.txt"},
+ {L"/greek/\x03B1\x03B2\x03B3.txt", "file:///greek/%CE%B1%CE%B2%CE%B3.txt"},
{L"/Chinese/\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
"file:///Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD"
"%91%E9%A1%B5.doc"},
+ {L"/plane1/\x1D400\x1D401.txt", // Math alphabet "AB"
+ "file:///plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
#endif
};
@@ -474,21 +488,6 @@ TEST(NetUtilTest, FileURLConversion) {
EXPECT_EQ(url_cases[i].file, output.ToWStringHack());
}
- // Here, we test that UTF-8 encoded strings get decoded properly, even when
- // they might be stored with wide characters. On posix systems, just treat
- // this as a stream of bytes.
- const wchar_t utf8[] = L"file:///d:/Chinese/\xe6\x89\x80\xe6\x9c\x89\xe4\xb8"
- L"\xad\xe6\x96\x87\xe7\xbd\x91\xe9\xa1\xb5.doc";
-#if defined(OS_WIN)
- const wchar_t wide[] =
- L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc";
-#elif defined(OS_POSIX)
- const wchar_t wide[] = L"/d:/Chinese/\xe6\x89\x80\xe6\x9c\x89\xe4\xb8\xad\xe6"
- L"\x96\x87\xe7\xbd\x91\xe9\xa1\xb5.doc";
-#endif
- EXPECT_TRUE(net::FileURLToFilePath(GURL(WideToUTF8(utf8)), &output));
- EXPECT_EQ(wide, output.ToWStringHack());
-
// Unfortunately, UTF8ToWide discards invalid UTF8 input.
#ifdef BUG_878908_IS_FIXED
// Test that no conversion happens if the UTF-8 input is invalid, and that
@@ -862,7 +861,8 @@ TEST(NetUtilTest, GetSuggestedFilename) {
namespace {
struct GetDirectoryListingEntryCase {
- const char* name;
+ const wchar_t* name;
+ const char* raw_bytes;
bool is_dir;
int64 filesize;
base::Time time;
@@ -872,22 +872,50 @@ struct GetDirectoryListingEntryCase {
} // namespace
TEST(NetUtilTest, GetDirectoryListingEntry) {
const GetDirectoryListingEntryCase test_cases[] = {
- {"Foo",
+ {L"Foo",
+ "",
false,
10000,
base::Time(),
"<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
- {"quo\"tes",
+ {L"quo\"tes",
+ "",
+ false,
+ 10000,
+ base::Time(),
+ "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
+ "\n"},
+ {L"quo\"tes",
+ "quo\"tes",
false,
10000,
base::Time(),
"<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
"\n"},
+ // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
+ // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
+ {L"\xD55C\xAE00.txt",
+ "",
+ false,
+ 10000,
+ base::Time(),
+ "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%ED%95%9C%EA%B8%80.txt\""
+ ",0,\"9.8 kB\",\"\");</script>\n"},
+ // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
+ // a local or remote file in EUC-KR.
+ {L"\xD55C\xAE00.txt",
+ "\xC7\xD1\xB1\xDB.txt",
+ false,
+ 10000,
+ base::Time(),
+ "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%C7%D1%B1%DB.txt\""
+ ",0,\"9.8 kB\",\"\");</script>\n"},
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
const std::string results = net::GetDirectoryListingEntry(
- test_cases[i].name,
+ WideToUTF16(test_cases[i].name),
+ test_cases[i].raw_bytes,
test_cases[i].is_dir,
test_cases[i].filesize,
test_cases[i].time);
diff --git a/net/base/net_util_win.cc b/net/base/net_util_win.cc
index effb212..244f4ad 100644
--- a/net/base/net_util_win.cc
+++ b/net/base/net_util_win.cc
@@ -57,33 +57,13 @@ bool FileURLToFilePath(const GURL& url, FilePath* file_path) {
}
file_path_str.assign(UTF8ToWide(path));
- // Now we have an unescaped filename, but are still not sure about its
- // encoding. For example, each character could be part of a UTF-8 string.
- if (file_path_str.empty() || !IsString8Bit(file_path_str)) {
- // assume our 16-bit encoding is correct if it won't fit into an 8-bit
- // string
- return true;
- }
-
- // Convert our narrow string into the native wide path.
- std::string narrow;
- if (!WideToLatin1(file_path_str, &narrow)) {
- NOTREACHED() << "Should have filtered out non-8-bit strings above.";
- return false;
- }
- if (IsStringUTF8(narrow)) {
- // Our string actually looks like it could be UTF-8, convert to 8-bit
- // UTF-8 and then to the corresponding wide string.
- file_path_str = UTF8ToWide(narrow);
- } else {
- // Our wide string contains only 8-bit characters and it's not UTF-8, so
- // we assume it's in the native codepage.
- file_path_str = base::SysNativeMBToWide(narrow);
- }
-
- // Fail if 8-bit -> wide conversion failed and gave us an empty string back
- // (we already filtered out empty strings above).
- return !file_path_str.empty();
+ // We used to try too hard and see if |path| made up entirely of
+ // the 1st 256 characters in the Unicode was a zero-extended UTF-16.
+ // If so, we converted it to 'Latin-1' and checked if the result was UTF-8.
+ // If the check passed, we converted the result to UTF-8.
+ // Otherwise, we treated the result as the native OS encoding.
+ // However, that led to http://crbug.com/4619 and http://crbug.com/14153
+ return true;
}
} // namespace net
diff --git a/net/url_request/url_request_file_dir_job.cc b/net/url_request/url_request_file_dir_job.cc
index c242ef9..ecdf014 100644
--- a/net/url_request/url_request_file_dir_job.cc
+++ b/net/url_request/url_request_file_dir_job.cc
@@ -7,6 +7,7 @@
#include "base/file_util.h"
#include "base/message_loop.h"
#include "base/string_util.h"
+#include "base/sys_string_conversions.h"
#include "base/time.h"
#include "googleurl/src/gurl.h"
#include "net/base/io_buffer.h"
@@ -104,9 +105,15 @@ void URLRequestFileDirJob::OnListFile(
// can catch errors from DirectoryLister and show an error page.
if (!wrote_header_) {
#if defined(OS_WIN)
- const std::string& title = WideToUTF8(dir_path_.value());
+ const string16& title = dir_path_.value();
#elif defined(OS_POSIX)
- const std::string& title = dir_path_.value();
+ // TODO(jungshik): Add SysNativeMBToUTF16 to sys_string_conversions.
+ // On Mac, need to add NFKC->NFC conversion either here or in file_path.
+ // On Linux, the file system encoding is not defined, but we assume that
+ // SysNativeMBToWide takes care of it at least for now. We can try something
+ // more sophisticated if necessary later.
+ const string16& title = WideToUTF16(
+ base::SysNativeMBToWide(dir_path_.value()));
#endif
data_.append(net::GetDirectoryListingHeader(title));
wrote_header_ = true;
@@ -119,14 +126,16 @@ void URLRequestFileDirJob::OnListFile(
data.nFileSizeLow;
data_.append(net::GetDirectoryListingEntry(
- WideToUTF8(data.cFileName),
+ data.cFileName, std::string(),
(data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) ? true : false,
size,
base::Time::FromFileTime(local_time)));
#elif defined(OS_POSIX)
+ // TOOD(jungshik): The same issue as for the directory name.
data_.append(net::GetDirectoryListingEntry(
- data.filename.c_str(),
+ WideToUTF16(base::SysNativeMBToWide(data.filename)),
+ data.filename,
S_ISDIR(data.stat.st_mode),
data.stat.st_size,
base::Time::FromTimeT(data.stat.st_mtime)));
diff --git a/net/url_request/url_request_ftp_job.cc b/net/url_request/url_request_ftp_job.cc
index bdfb0b3..c7cb333 100644
--- a/net/url_request/url_request_ftp_job.cc
+++ b/net/url_request/url_request_ftp_job.cc
@@ -9,6 +9,7 @@
#include "base/message_loop.h"
#include "base/string_util.h"
+#include "base/sys_string_conversions.h"
#include "base/time.h"
#include "net/base/auth.h"
#include "net/base/escape.h"
@@ -388,11 +389,21 @@ void URLRequestFtpJob::OnFindFile(DWORD last_error) {
(static_cast<unsigned __int64>(find_data_.nFileSizeHigh) << 32) |
find_data_.nFileSizeLow;
- // We don't know the encoding, and can't assume utf8, so pass the 8bit
- // directly to the browser for it to decide.
+ // We don't know the encoding used on an FTP server, but we
+ // use FtpFindFirstFileA, which I guess does NOT preserve
+ // the raw byte sequence because it's implemented in terms
+ // of FtpFindFirstFileW. Without the raw byte sequence, we
+ // can't apply the encoding detection or other heuristics
+ // to determine/guess the encoding. Neither can we use UTF-8
+ // used by a RFC-2640-compliant FTP server. In some cases (e.g.
+ // the default code page is an SBCS with almost all bytes assigned.
+ // In lucky cases, it's even possible with a DBCS), it's possible
+ // to recover the raw byte sequence in most cases. We can do
+ // some more here, but it's not worth the effort because we're
+ // going to replace this class with URLRequestNewFtpJob.
string file_entry = net::GetDirectoryListingEntry(
- find_data_.cFileName, false, size,
- base::Time::FromFileTime(find_data_.ftLastWriteTime));
+ base::SysNativeMBToWide(find_data_.cFileName), std::string(),
+ false, size, base::Time::FromFileTime(find_data_.ftLastWriteTime));
WriteData(&file_entry, true);
FindNextFile();
@@ -407,14 +418,20 @@ void URLRequestFtpJob::OnStartDirectoryTraversal() {
state_ = GETTING_DIRECTORY;
// Unescape the URL path and pass the raw 8bit directly to the browser.
+ //
+ // Here we can try to detect the encoding although it may not be very
+ // reliable because it's not likely to be long enough. Because this class
+ // will be replaced by URLRequestNewFtpJob and is used only on Windows,
+ // we use SysNativeMBToWide as a stopgap measure.
string html = net::GetDirectoryListingHeader(
- UnescapeURLComponent(request_->url().path(),
- UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS));
+ base::SysNativeMBToWide(UnescapeURLComponent(request_->url().path(),
+ UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)));
// If this isn't top level directory (i.e. the path isn't "/",) add a link to
// the parent directory.
if (request_->url().path().length() > 1)
- html.append(net::GetDirectoryListingEntry("..", false, 0, base::Time()));
+ html.append(net::GetDirectoryListingEntry(L"..", std::string(),
+ false, 0, base::Time()));
WriteData(&html, true);
diff --git a/net/url_request/url_request_new_ftp_job.cc b/net/url_request/url_request_new_ftp_job.cc
index d3a0c3e..d9f1d27 100644
--- a/net/url_request/url_request_new_ftp_job.cc
+++ b/net/url_request/url_request_new_ftp_job.cc
@@ -7,6 +7,7 @@
#include "base/compiler_specific.h"
#include "base/file_version_info.h"
#include "base/message_loop.h"
+#include "base/sys_string_conversions.h"
#include "net/base/escape.h"
#include "net/base/net_errors.h"
#include "net/base/net_util.h"
@@ -16,6 +17,46 @@
#include "net/url_request/url_request.h"
#include "net/url_request/url_request_context.h"
#include "net/url_request/url_request_error_job.h"
+#include "unicode/ucsdet.h"
+
+namespace {
+
+// A very simple-minded character encoding detection.
+// TODO(jungshik): We can apply more heuristics here (e.g. using various hints
+// like TLD, the UI language/default encoding of a client, etc). In that case,
+// this should be pulled out of here and moved somewhere in base because there
+// can be other use cases.
+std::string DetectEncoding(const char*input, size_t len) {
+ if (IsStringASCII(std::string(input, len)))
+ return std::string();
+ UErrorCode status = U_ZERO_ERROR;
+ UCharsetDetector* detector = ucsdet_open(&status);
+ ucsdet_setText(detector, input, static_cast<int32_t>(len), &status);
+ const UCharsetMatch* match = ucsdet_detect(detector, &status);
+ const char* encoding = ucsdet_getName(match, &status);
+ // Should we check the quality of the match? A rather arbitrary number is
+ // assigned by ICU and it's hard to come up with a lower limit.
+ if (U_FAILURE(status))
+ return std::string();
+ return encoding;
+}
+
+string16 RawByteSequenceToFilename(const char* raw_filename,
+ const std::string& encoding) {
+ if (encoding.empty())
+ return ASCIIToUTF16(raw_filename);
+
+ // Try the detected encoding before falling back to the native codepage.
+ // Using the native codepage does not make much sense, but we don't have
+ // much else to resort to.
+ string16 filename;
+ if (!CodepageToUTF16(raw_filename, encoding.c_str(),
+ OnStringUtilConversionError::SUBSTITUTE, &filename))
+ filename = WideToUTF16Hack(base::SysNativeMBToWide(raw_filename));
+ return filename;
+}
+
+} // namespace
URLRequestNewFtpJob::URLRequestNewFtpJob(URLRequest* request)
: URLRequestJob(request),
@@ -69,17 +110,36 @@ bool URLRequestNewFtpJob::ReadRawData(net::IOBuffer* buf,
if (response_info_ == NULL) {
response_info_ = transaction_->GetResponseInfo();
if (response_info_->is_directory_listing) {
- // Unescape the URL path and pass the raw 8bit directly to the browser.
- directory_html_ = net::GetDirectoryListingHeader(
+ std::string escaped_path =
UnescapeURLComponent(request_->url().path(),
- UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS));
+ UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
+ string16 path_utf16;
+ // Per RFC 2640, FTP servers should use UTF-8 or its proper subset ASCII,
+ // but many old FTP servers use legacy encodings. Try UTF-8 first and
+ // detect the encoding.
+ if (IsStringUTF8(escaped_path)) {
+ path_utf16 = UTF8ToUTF16(escaped_path);
+ } else {
+ std::string encoding = DetectEncoding(escaped_path.c_str(),
+ escaped_path.size());
+ // Try the detected encoding. If it fails, resort to the
+ // OS native encoding.
+ if (encoding.empty() ||
+ !CodepageToUTF16(escaped_path, encoding.c_str(),
+ OnStringUtilConversionError::SUBSTITUTE,
+ &path_utf16))
+ path_utf16 = WideToUTF16Hack(base::SysNativeMBToWide(escaped_path));
+ }
+
+ directory_html_ = net::GetDirectoryListingHeader(path_utf16);
// If this isn't top level directory (i.e. the path isn't "/",)
// add a link to the parent directory.
if (request_->url().path().length() > 1)
- directory_html_.append(net::GetDirectoryListingEntry("..",
- false,
- 0,
- base::Time()));
+ directory_html_.append(
+ net::GetDirectoryListingEntry(ASCIIToUTF16(".."),
+ std::string(),
+ false, 0,
+ base::Time()));
}
}
if (!directory_html_.empty()) {
@@ -121,6 +181,20 @@ int URLRequestNewFtpJob::ProcessFtpDir(net::IOBuffer *buf,
std::string file_entry;
std::string line;
buf->data()[bytes_read] = 0;
+
+ // If all we've seen so far is ASCII, encoding_ is empty. Try to detect the
+ // encoding. We don't do the separate UTF-8 check here because the encoding
+ // detection with a longer chunk (as opposed to the relatively short path
+ // component of the url) is unlikely to mistake UTF-8 for a legacy encoding.
+ // If it turns out to be wrong, a separate UTF-8 check has to be added.
+ //
+ // TODO(jungshik): UTF-8 has to be 'enforced' without any heuristics when
+ // we're talking to an FTP server compliant to RFC 2640 (that is, its response
+ // to FEAT command includes 'UTF8').
+ // See http://wiki.filezilla-project.org/Character_Set
+ if (encoding_.empty())
+ encoding_ = DetectEncoding(buf->data(), bytes_read);
+
int64 file_size;
std::istringstream iss(buf->data());
while (getline(iss, line)) {
@@ -144,6 +218,7 @@ int URLRequestNewFtpJob::ProcessFtpDir(net::IOBuffer *buf,
et.day_of_week = result.fe_time.tm_wday;
file_entry.append(net::GetDirectoryListingEntry(
+ RawByteSequenceToFilename(result.fe_fname, encoding_),
result.fe_fname, true, 0, base::Time::FromLocalExploded(et)));
break;
case net::FTP_TYPE_FILE:
@@ -163,6 +238,7 @@ int URLRequestNewFtpJob::ProcessFtpDir(net::IOBuffer *buf,
// It returns wrong date/time (Differnce is 1 day and 17 Hours).
if (StringToInt64(result.fe_size, &file_size))
file_entry.append(net::GetDirectoryListingEntry(
+ RawByteSequenceToFilename(result.fe_fname, encoding_),
result.fe_fname, false, file_size,
base::Time::FromLocalExploded(et)));
break;
diff --git a/net/url_request/url_request_new_ftp_job.h b/net/url_request/url_request_new_ftp_job.h
index a74a265..69c1fef 100644
--- a/net/url_request/url_request_new_ftp_job.h
+++ b/net/url_request/url_request_new_ftp_job.h
@@ -59,6 +59,7 @@ class URLRequestNewFtpJob : public URLRequestJob {
std::string directory_html_;
bool read_in_progress_;
+ std::string encoding_;
// Keep a reference to the url request context to be sure it's not deleted
// before us.