From 5420bc1e4fa6d861107a5c847843ac7bd25fb3c4 Mon Sep 17 00:00:00 2001 From: "jshin@chromium.org" Date: Thu, 9 Jul 2009 22:48:16 +0000 Subject: Fix the local directory listing, FTP directory listing and the local file handling (drag'n'drop and opening from the file list). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the local file listing, use the OS file system encoding. For the FTP directory listing, use ICU's encoding detector.GetDirectoryListingEntry and GetDirectoryLisingHeader were changed to accept string16 for file/directory names. To the former, a new parameter (|raw_bytes|) was added. It can be used to make a FTP request to a file with a non-ASCII name encoded in a legacy encoding. For the local file handling on Windows, get rid of the code for 'doubly converted' UTF-8 in FileURLToFilePath, which led to issue 4619 and add a few cases to NetUtil*.FileURLConversion* test. In addition, add CodepageToUTF16 and UTF16ToCodepage along with a new unittest (ConvertBetweenCodepageAndUTF16) that shares the same set of case as ConvertBetweenCodepageAndWide. The test cases were expanded and revised a bit. BUG=2939,13229,4619 http://crbug.com/2939 http://crbug.com/13229 http://crbug.com/4619 TEST=1. Pass URLRequest*.FTP* (net_unittests) 2. Pass StringUtiltTest.ConvertBetweenCode* 3. Pass NetUtil*.GetDirectoryLis* (net_unittests) 4. Open a local directory containing files with non-ASCII names and they're displayed correctly in the directory list. On Windows and Mac OS X, it should always work. On Linux, your locale encoding (as returned by nl_langinfo(CODESET)) should match the actual encoding used in your filename. 5a. Pass NetUtil*.FileURL* (net_unittests) with the default codepage set to 1252 and 932. 5b. Make a file named 'caf챕.txt' on Windows and see if it can be opened both by clicking in the directory listing page of Chrome and by drag'n'drop. Test this with the default OS code pages set to Windows-1252, Windows-1251 (Russian) and Windows-932 (Japanese). Review URL: http://codereview.chromium.org/151065 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@20331 0039d316-1c4b-4281-b951-d872f2087c98 --- net/url_request/url_request_ftp_job.cc | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'net/url_request/url_request_ftp_job.cc') diff --git a/net/url_request/url_request_ftp_job.cc b/net/url_request/url_request_ftp_job.cc index bdfb0b3..c7cb333 100644 --- a/net/url_request/url_request_ftp_job.cc +++ b/net/url_request/url_request_ftp_job.cc @@ -9,6 +9,7 @@ #include "base/message_loop.h" #include "base/string_util.h" +#include "base/sys_string_conversions.h" #include "base/time.h" #include "net/base/auth.h" #include "net/base/escape.h" @@ -388,11 +389,21 @@ void URLRequestFtpJob::OnFindFile(DWORD last_error) { (static_cast(find_data_.nFileSizeHigh) << 32) | find_data_.nFileSizeLow; - // We don't know the encoding, and can't assume utf8, so pass the 8bit - // directly to the browser for it to decide. + // We don't know the encoding used on an FTP server, but we + // use FtpFindFirstFileA, which I guess does NOT preserve + // the raw byte sequence because it's implemented in terms + // of FtpFindFirstFileW. Without the raw byte sequence, we + // can't apply the encoding detection or other heuristics + // to determine/guess the encoding. Neither can we use UTF-8 + // used by a RFC-2640-compliant FTP server. In some cases (e.g. + // the default code page is an SBCS with almost all bytes assigned. + // In lucky cases, it's even possible with a DBCS), it's possible + // to recover the raw byte sequence in most cases. We can do + // some more here, but it's not worth the effort because we're + // going to replace this class with URLRequestNewFtpJob. string file_entry = net::GetDirectoryListingEntry( - find_data_.cFileName, false, size, - base::Time::FromFileTime(find_data_.ftLastWriteTime)); + base::SysNativeMBToWide(find_data_.cFileName), std::string(), + false, size, base::Time::FromFileTime(find_data_.ftLastWriteTime)); WriteData(&file_entry, true); FindNextFile(); @@ -407,14 +418,20 @@ void URLRequestFtpJob::OnStartDirectoryTraversal() { state_ = GETTING_DIRECTORY; // Unescape the URL path and pass the raw 8bit directly to the browser. + // + // Here we can try to detect the encoding although it may not be very + // reliable because it's not likely to be long enough. Because this class + // will be replaced by URLRequestNewFtpJob and is used only on Windows, + // we use SysNativeMBToWide as a stopgap measure. string html = net::GetDirectoryListingHeader( - UnescapeURLComponent(request_->url().path(), - UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)); + base::SysNativeMBToWide(UnescapeURLComponent(request_->url().path(), + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS))); // If this isn't top level directory (i.e. the path isn't "/",) add a link to // the parent directory. if (request_->url().path().length() > 1) - html.append(net::GetDirectoryListingEntry("..", false, 0, base::Time())); + html.append(net::GetDirectoryListingEntry(L"..", std::string(), + false, 0, base::Time())); WriteData(&html, true); -- cgit v1.1