diff options
24 files changed, 257 insertions, 115 deletions
diff --git a/chrome/browser/download/download_manager.cc b/chrome/browser/download/download_manager.cc index 5a5649c..0fc444f 100644 --- a/chrome/browser/download/download_manager.cc +++ b/chrome/browser/download/download_manager.cc @@ -1085,8 +1085,10 @@ int DownloadManager::RemoveAllDownloads() { // download. void DownloadManager::DownloadUrl(const GURL& url, const GURL& referrer, + const std::string& referrer_charset, WebContents* web_contents) { DCHECK(web_contents); + request_context_->set_referrer_charset(referrer_charset); file_manager_->DownloadUrl(url, referrer, web_contents->process()->pid(), @@ -1179,6 +1181,7 @@ void DownloadManager::GenerateFilename(DownloadCreateInfo* info, *generated_name = FilePath::FromWStringHack( net::GetSuggestedFilename(GURL(info->url), info->content_disposition, + info->referrer_charset, L"download")); DCHECK(!generated_name->empty()); diff --git a/chrome/browser/download/download_manager.h b/chrome/browser/download/download_manager.h index 5cb22a4..50f4fa3 100644 --- a/chrome/browser/download/download_manager.h +++ b/chrome/browser/download/download_manager.h @@ -356,6 +356,7 @@ class DownloadManager : public base::RefCountedThreadSafe<DownloadManager>, // Download the object at the URL. Used in cases such as "Save Link As..." void DownloadUrl(const GURL& url, const GURL& referrer, + const std::string& referrer_encoding, WebContents* web_contents); // Allow objects to observe the download creation process. diff --git a/chrome/browser/download/download_manager_unittest.cc b/chrome/browser/download/download_manager_unittest.cc index f1fea31..01f4839 100644 --- a/chrome/browser/download/download_manager_unittest.cc +++ b/chrome/browser/download/download_manager_unittest.cc @@ -18,11 +18,13 @@ class DownloadManagerTest : public testing::Test { void GetGeneratedFilename(const std::string& content_disposition, const std::string& url, const std::string& mime_type, + const std::string& referrer_charset, std::wstring* generated_name_string) { DownloadCreateInfo info; info.content_disposition = content_disposition; info.url = GURL(url); info.mime_type = mime_type; + info.referrer_charset = referrer_charset; FilePath generated_name; download_manager_->GenerateFilename(&info, &generated_name); *generated_name_string = generated_name.ToWStringHack(); @@ -335,7 +337,6 @@ const struct { "application/x-tar", L"bar.bogus.tar"}, - // TODO(darin): Add some raw 8-bit Content-Disposition tests. }; } // namespace @@ -343,14 +344,35 @@ const struct { // Tests to ensure that the file names we generate from hints from the server // (content-disposition, URL name, etc) don't cause security holes. TEST_F(DownloadManagerTest, TestDownloadFilename) { + std::wstring file_name; for (int i = 0; i < arraysize(kGeneratedFiles); ++i) { - std::wstring file_name; GetGeneratedFilename(kGeneratedFiles[i].disposition, kGeneratedFiles[i].url, kGeneratedFiles[i].mime_type, + "", + &file_name); + EXPECT_EQ(kGeneratedFiles[i].expected_name, file_name); + GetGeneratedFilename(kGeneratedFiles[i].disposition, + kGeneratedFiles[i].url, + kGeneratedFiles[i].mime_type, + "GBK", &file_name); EXPECT_EQ(kGeneratedFiles[i].expected_name, file_name); } + + // A couple of cases with raw 8bit characters in C-D. + GetGeneratedFilename("attachment; filename=caf\xc3\xa9.png", + "http://www.example.com/images?id=3", + "image/png", + "iso-8859-1", + &file_name); + EXPECT_EQ(L"caf\u00e9.png", file_name); + GetGeneratedFilename("attachment; filename=caf\xe5.png", + "http://www.example.com/images?id=3", + "image/png", + "windows-1253", + &file_name); + EXPECT_EQ(L"caf\u03b5.png", file_name); } namespace { diff --git a/chrome/browser/download/save_package.cc b/chrome/browser/download/save_package.cc index 3fc1738..19d7df6 100644 --- a/chrome/browser/download/save_package.cc +++ b/chrome/browser/download/save_package.cc @@ -296,8 +296,10 @@ bool SavePackage::GenerateFilename(const std::string& disposition, const GURL& url, bool need_html_ext, FilePath::StringType* generated_name) { + // TODO(jungshik): Figure out the referrer charset when having one + // makes sense and pass it to GetSuggestedFilename. FilePath file_path = FilePath::FromWStringHack( - net::GetSuggestedFilename(url, disposition, kDefaultSaveName)); + net::GetSuggestedFilename(url, disposition, "", kDefaultSaveName)); DCHECK(!file_path.empty()); FilePath::StringType pure_file_name = diff --git a/chrome/browser/history/download_types.h b/chrome/browser/history/download_types.h index 477a249..25e039a 100644 --- a/chrome/browser/history/download_types.h +++ b/chrome/browser/history/download_types.h @@ -69,6 +69,9 @@ struct DownloadCreateInfo { bool is_dangerous; // The original name for a dangerous download. FilePath original_name; + // The charset of the referring page where the download request comes from. + // It's used to construct a suggested filename. + std::string referrer_charset; }; #endif // CHROME_BROWSER_DOWNLOAD_TYPES_H_ diff --git a/chrome/browser/net/chrome_url_request_context.cc b/chrome/browser/net/chrome_url_request_context.cc index ac5c3c3..b15db26 100644 --- a/chrome/browser/net/chrome_url_request_context.cc +++ b/chrome/browser/net/chrome_url_request_context.cc @@ -198,6 +198,20 @@ ChromeURLRequestContext::ChromeURLRequestContext(Profile* profile) accept_charset_ = net::HttpUtil::GenerateAcceptCharsetHeader( WideToASCII(prefs_->GetString(prefs::kDefaultCharset))); + // At this point, we don't know the charset of the referring page + // where a url request originates from. This is used to get a suggested + // filename from Content-Disposition header made of raw 8bit characters. + // Down the road, it can be overriden if it becomes known (for instance, + // when download request is made through the context menu in a web page). + // At the moment, it'll remain 'undeterministic' when a user + // types a URL in the omnibar or click on a download link in a page. + // For the latter, we need a change on the webkit-side. + // We initialize it to the default charset here and a user will + // have an *arguably* better default charset for interpreting a raw 8bit + // C-D header field. It means the native OS codepage fallback in + // net_util::GetSuggestedFilename is unlikely to be taken. + referrer_charset_ = accept_charset_; + cookie_policy_.SetType(net::CookiePolicy::FromInt( prefs_->GetInteger(prefs::kCookieBehavior))); diff --git a/chrome/browser/renderer_host/download_resource_handler.cc b/chrome/browser/renderer_host/download_resource_handler.cc index 9824d97..b100df3 100644 --- a/chrome/browser/renderer_host/download_resource_handler.cc +++ b/chrome/browser/renderer_host/download_resource_handler.cc @@ -8,6 +8,7 @@ #include "chrome/browser/download/download_manager.h" #include "chrome/browser/renderer_host/resource_dispatcher_host.h" #include "net/base/io_buffer.h" +#include "net/url_request/url_request_context.h" DownloadResourceHandler::DownloadResourceHandler(ResourceDispatcherHost* rdh, int render_process_host_id, @@ -64,6 +65,7 @@ bool DownloadResourceHandler::OnResponseStarted(int request_id, info->mime_type = response->response_head.mime_type; info->save_as = save_as_; info->is_dangerous = false; + info->referrer_charset = request_->context()->referrer_charset(); download_manager_->file_loop()->PostTask(FROM_HERE, NewRunnableMethod(download_manager_, &DownloadFileManager::StartDownload, diff --git a/chrome/browser/tab_contents/render_view_context_menu.cc b/chrome/browser/tab_contents/render_view_context_menu.cc index 6d45a73..c77d8d5 100644 --- a/chrome/browser/tab_contents/render_view_context_menu.cc +++ b/chrome/browser/tab_contents/render_view_context_menu.cc @@ -375,7 +375,8 @@ void RenderViewContextMenu::ExecuteItemCommand(int id) { params_.image_url); DownloadManager* dlm = source_web_contents_->profile()->GetDownloadManager(); - dlm->DownloadUrl(url, referrer, source_web_contents_); + dlm->DownloadUrl(url, referrer, params_.frame_charset, + source_web_contents_); break; } diff --git a/chrome/browser/tab_contents/tab_contents.cc b/chrome/browser/tab_contents/tab_contents.cc index a835423..37b370c 100644 --- a/chrome/browser/tab_contents/tab_contents.cc +++ b/chrome/browser/tab_contents/tab_contents.cc @@ -1091,7 +1091,7 @@ void TabContents::OnSavePage() { DownloadManager* dlm = profile()->GetDownloadManager(); const GURL& current_page_url = GetURL(); if (dlm && current_page_url.is_valid()) - dlm->DownloadUrl(current_page_url, GURL(), AsWC(this)); + dlm->DownloadUrl(current_page_url, GURL(), "", AsWC(this)); return; } diff --git a/chrome/browser/tab_contents/tab_contents_view_win.cc b/chrome/browser/tab_contents/tab_contents_view_win.cc index 5f47164..6fee9c3 100644 --- a/chrome/browser/tab_contents/tab_contents_view_win.cc +++ b/chrome/browser/tab_contents/tab_contents_view_win.cc @@ -133,7 +133,7 @@ void TabContentsViewWin::StartDragging(const WebDropData& drop_data) { if (file_name.value().empty()) { // Retrieve the name from the URL. file_name = FilePath::FromWStringHack( - net::GetSuggestedFilename(drop_data.url, L"", L"")); + net::GetSuggestedFilename(drop_data.url, "", "", L"")); } file_name = file_name.ReplaceExtension(drop_data.file_extension); data->SetFileContents(file_name.value(), drop_data.file_contents); diff --git a/chrome/common/os_exchange_data.cc b/chrome/common/os_exchange_data.cc index 2b131f2..fbedcb8 100644 --- a/chrome/common/os_exchange_data.cc +++ b/chrome/common/os_exchange_data.cc @@ -668,7 +668,7 @@ static void CreateValidFileNameFromTitle(const GURL& url, if (title.empty()) { if (url.is_valid()) { *validated = net::GetSuggestedFilename( - url, std::wstring(), std::wstring()); + url, std::string(), std::string(), std::wstring()); } else { // Nothing else can be done, just use a default. *validated = l10n_util::GetString(IDS_UNTITLED_SHORTCUT_FILE_NAME); diff --git a/chrome/common/render_messages.h b/chrome/common/render_messages.h index c689365..e187b7d 100644 --- a/chrome/common/render_messages.h +++ b/chrome/common/render_messages.h @@ -889,6 +889,7 @@ struct ParamTraits<ContextMenuParams> { WriteParam(m, p.spellcheck_enabled); WriteParam(m, p.edit_flags); WriteParam(m, p.security_info); + WriteParam(m, p.frame_charset); } static bool Read(const Message* m, void** iter, param_type* p) { return @@ -905,7 +906,8 @@ struct ParamTraits<ContextMenuParams> { ReadParam(m, iter, &p->dictionary_suggestions) && ReadParam(m, iter, &p->spellcheck_enabled) && ReadParam(m, iter, &p->edit_flags) && - ReadParam(m, iter, &p->security_info); + ReadParam(m, iter, &p->security_info) && + ReadParam(m, iter, &p->frame_charset); } static void Log(const param_type& p, std::wstring* l) { l->append(L"<ContextMenuParams>"); diff --git a/chrome/renderer/render_view.cc b/chrome/renderer/render_view.cc index a8a8c41..1236886 100644 --- a/chrome/renderer/render_view.cc +++ b/chrome/renderer/render_view.cc @@ -1994,7 +1994,8 @@ void RenderView::ShowContextMenu(WebView* webview, const std::wstring& selection_text, const std::wstring& misspelled_word, int edit_flags, - const std::string& security_info) { + const std::string& security_info, + const std::string& frame_charset) { ContextMenuParams params; params.node = node; params.x = x; @@ -2010,6 +2011,7 @@ void RenderView::ShowContextMenu(WebView* webview, webview->GetFocusedFrame()->SpellCheckEnabled(); params.edit_flags = edit_flags; params.security_info = security_info; + params.frame_charset = frame_charset; Send(new ViewHostMsg_ContextMenu(routing_id_, params)); } diff --git a/chrome/renderer/render_view.h b/chrome/renderer/render_view.h index b3d0f10..fa88cb1 100644 --- a/chrome/renderer/render_view.h +++ b/chrome/renderer/render_view.h @@ -266,7 +266,8 @@ class RenderView : public RenderWidget, const std::wstring& selection_text, const std::wstring& misspelled_word, int edit_flags, - const std::string& security_info); + const std::string& security_info, + const std::string& frame_charset); virtual void StartDragging(WebView* webview, const WebKit::WebDragData& drag_data); diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 9610e40..e5c53eb 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -247,17 +247,22 @@ bool DecodeBQEncoding(const std::string& part, RFC2047EncodingType enc_type, } bool DecodeWord(const std::string& encoded_word, + const std::string& referrer_charset, bool *is_rfc2047, std::string* output) { - // TODO(jungshik) : Revisit this later. Do we want to pass through non-ASCII - // strings which can be mozibake? WinHTTP converts a raw 8bit string - // UTF-16 assuming it's in the OS default encoding. if (!IsStringASCII(encoded_word)) { - // Try falling back to the NativeMB encoding if the raw input is not UTF-8. + // Try UTF-8, referrer_charset and the native OS default charset in turn. if (IsStringUTF8(encoded_word)) { *output = encoded_word; } else { - *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); + std::wstring wide_output; + if (!referrer_charset.empty() && + CodepageToWide(encoded_word, referrer_charset.c_str(), + OnStringUtilConversionError::FAIL, &wide_output)) { + *output = WideToUTF8(wide_output); + } else { + *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); + } } *is_rfc2047 = false; return true; @@ -357,7 +362,9 @@ bool DecodeWord(const std::string& encoded_word, return false; } -bool DecodeParamValue(const std::string& input, std::string* output) { +bool DecodeParamValue(const std::string& input, + const std::string& referrer_charset, + std::string* output) { std::string tmp; // Tokenize with whitespace characters. StringTokenizer t(input, " \t\n\r"); @@ -378,7 +385,8 @@ bool DecodeParamValue(const std::string& input, std::string* output) { // in a single encoded-word. Firefox/Thunderbird do not support // it, either. std::string decoded; - if (!DecodeWord(t.token(), &is_previous_token_rfc2047, &decoded)) + if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, + &decoded)) return false; tmp.append(decoded); } @@ -683,7 +691,8 @@ std::string GetSpecificHeader(const std::string& headers, return GetSpecificHeaderT(headers, name); } -std::wstring GetFileNameFromCD(const std::string& header) { +std::wstring GetFileNameFromCD(const std::string& header, + const std::string& referrer_charset) { std::string param_value = GetHeaderParamValue(header, "filename"); if (param_value.empty()) { // Some servers use 'name' parameter. @@ -692,7 +701,7 @@ std::wstring GetFileNameFromCD(const std::string& header) { if (param_value.empty()) return std::wstring(); std::string decoded; - if (DecodeParamValue(param_value, &decoded)) + if (DecodeParamValue(param_value, referrer_charset, &decoded)) return UTF8ToWide(decoded); return std::wstring(); } @@ -863,8 +872,10 @@ std::wstring StripWWW(const std::wstring& text) { std::wstring GetSuggestedFilename(const GURL& url, const std::string& content_disposition, + const std::string& referrer_charset, const std::wstring& default_name) { - std::wstring filename = GetFileNameFromCD(content_disposition); + std::wstring filename = GetFileNameFromCD(content_disposition, + referrer_charset); if (!filename.empty()) { // Remove any path information the server may have sent, take the name // only. @@ -901,13 +912,6 @@ std::wstring GetSuggestedFilename(const GURL& url, return filename; } -std::wstring GetSuggestedFilename(const GURL& url, - const std::wstring& content_disposition, - const std::wstring& default_name) { - return GetSuggestedFilename( - url, WideToUTF8(content_disposition), default_name); -} - bool IsPortAllowedByDefault(int port) { int array_size = arraysize(kRestrictedPorts); for (int i = 0; i < array_size; i++) { diff --git a/net/base/net_util.h b/net/base/net_util.h index 2ad81bf..9aba789 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -71,12 +71,20 @@ std::wstring GetHeaderParamValue(const std::wstring& field, std::string GetHeaderParamValue(const std::string& field, const std::string& param_name); -// Return the filename extracted from Content-Disposition header. Only two -// formats are supported: a. %-escaped UTF-8 b. RFC 2047. +// Return the filename extracted from Content-Disposition header. The following +// formats are tried in order listed below: // -// A non-ASCII param value is just returned as it is (assuming a NativeMB -// encoding). When a param value is ASCII, but is not in one of two forms -// supported, it is returned as it is unless it's pretty close to two supported +// 1. RFC 2047 +// 2. Raw-8bit-characters : +// a. UTF-8, b. referrer_charset, c. default os codepage. +// 3. %-escaped UTF-8. +// +// In step 2, if referrer_charset is empty(i.e. unknown), 2b is skipped. +// In step 3, the fallback charsets tried in step 2 are not tried. We +// can consider doing that later. +// +// When a param value is ASCII, but is not in format #1 or format #3 above, +// it is returned as it is unless it's pretty close to two supported // formats but not well-formed. In that case, an empty string is returned. // // In any case, a caller must check for the empty return value and resort to @@ -90,7 +98,8 @@ std::string GetHeaderParamValue(const std::string& field, // other caller is a unit test. Need to figure out expose this function only to // net_util_unittest. // -std::wstring GetFileNameFromCD(const std::string& header); +std::wstring GetFileNameFromCD(const std::string& header, + const std::string& referrer_charset); // Converts the given host name to unicode characters, APPENDING them to the // the given output string. This can be called for any host name, if the @@ -133,14 +142,12 @@ std::wstring StripWWW(const std::wstring& text); // Gets the filename from the raw Content-Disposition header (as read from the // network). Otherwise uses the last path component name or hostname from // |url|. Note: it's possible for the suggested filename to be empty (e.g., -// file:/// or view-cache:). +// file:/// or view-cache:). referrer_charset is used as one of charsets +// to interpret a raw 8bit string in C-D header (after interpreting +// as UTF-8 fails). See the comment for GetFilenameFromCD for more details. std::wstring GetSuggestedFilename(const GURL& url, const std::string& content_disposition, - const std::wstring& default_name); - -// DEPRECATED: Please use the above version of this method. -std::wstring GetSuggestedFilename(const GURL& url, - const std::wstring& content_disposition, + const std::string& referrer_charset, const std::wstring& default_name); // Checks the given port against a list of ports which are restricted by diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index f9bc7f7..1a3bcdc 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -39,6 +39,7 @@ struct HeaderParamCase { struct FileNameCDCase { const char* header_field; + const char* referrer_charset; const wchar_t* expected; }; @@ -58,7 +59,8 @@ struct IDNTestCase { struct SuggestedFilenameCase { const char* url; - const wchar_t* content_disp_header; + const char* content_disp_header; + const char* referrer_charset; const wchar_t* default_filename; const wchar_t* expected_filename; }; @@ -299,75 +301,96 @@ TEST(NetUtilTest, GetHeaderParamValue) { TEST(NetUtilTest, GetFileNameFromCD) { const FileNameCDCase tests[] = { // Test various forms of C-D header fields emitted by web servers. - {"content-disposition: inline; filename=\"abcde.pdf\"", L"abcde.pdf"}, - {"content-disposition: inline; name=\"abcde.pdf\"", L"abcde.pdf"}, - {"content-disposition: attachment; filename=abcde.pdf", L"abcde.pdf"}, - {"content-disposition: attachment; name=abcde.pdf", L"abcde.pdf"}, - {"content-disposition: attachment; filename=abc,de.pdf", L"abc,de.pdf"}, - {"content-disposition: filename=abcde.pdf", L"abcde.pdf"}, - {"content-disposition: filename= abcde.pdf", L"abcde.pdf"}, - {"content-disposition: filename =abcde.pdf", L"abcde.pdf"}, - {"content-disposition: filename = abcde.pdf", L"abcde.pdf"}, - {"content-disposition: filename\t=abcde.pdf", L"abcde.pdf"}, - {"content-disposition: filename \t\t =abcde.pdf", L"abcde.pdf"}, - {"content-disposition: name=abcde.pdf", L"abcde.pdf"}, - {"content-disposition: inline; filename=\"abc%20de.pdf\"", L"abc de.pdf"}, + {"content-disposition: inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"}, + {"content-disposition: inline; name=\"abcde.pdf\"", "", L"abcde.pdf"}, + {"content-disposition: attachment; filename=abcde.pdf", "", L"abcde.pdf"}, + {"content-disposition: attachment; name=abcde.pdf", "", L"abcde.pdf"}, + {"content-disposition: attachment; filename=abc,de.pdf", "", L"abc,de.pdf"}, + {"content-disposition: filename=abcde.pdf", "", L"abcde.pdf"}, + {"content-disposition: filename= abcde.pdf", "", L"abcde.pdf"}, + {"content-disposition: filename =abcde.pdf", "", L"abcde.pdf"}, + {"content-disposition: filename = abcde.pdf", "", L"abcde.pdf"}, + {"content-disposition: filename\t=abcde.pdf", "", L"abcde.pdf"}, + {"content-disposition: filename \t\t =abcde.pdf", "", L"abcde.pdf"}, + {"content-disposition: name=abcde.pdf", "", L"abcde.pdf"}, + {"content-disposition: inline; filename=\"abc%20de.pdf\"", "", + L"abc de.pdf"}, // Whitespaces are converted to a space. - {"content-disposition: inline; filename=\"abc \t\nde.pdf\"", + {"content-disposition: inline; filename=\"abc \t\nde.pdf\"", "", L"abc de.pdf"}, // %-escaped UTF-8 {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20" - "%EC%98%88%EC%88%A0.jpg\"", L"\xc608\xc220 \xc608\xc220.jpg"}, + "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"}, {"Content-Disposition: attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1" - "abc.jpg\"", L"\U00010330\U00010331abc.jpg"}, + "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"}, {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0 \n" - "%EC%98%88%EC%88%A0.jpg\"", L"\xc608\xc220 \xc608\xc220.jpg"}, + "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"}, // RFC 2047 with various charsets and Q/B encodings {"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" - "D13=2Epng?=\"", L"\x82b8\x8853" L"3.png"}, + "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"}, {"Content-Disposition: attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=", - L"\xc608\xc220 3.png"}, + "", L"\xc608\xc220 3.png"}, {"Content-Disposition: attachment; filename==?utf-8?Q?=E8=8A=B8=E8" - "=A1=93_3=2Epng?=", L"\x82b8\x8853 3.png"}, + "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"}, {"Content-Disposition: attachment; filename==?utf-8?Q?=F0=90=8C=B0" - "_3=2Epng?=", L"\U00010330 3.png"}, - {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e3_=2epng?=\"", - L"caf\x00e3 .png"}, + "_3=2Epng?=", "", L"\U00010330 3.png"}, + {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"", + "", L"caf\x00e9 .png"}, // Space after an encode word should be removed. - {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E3_?= .png\"", - L"caf\x00e3 .png"}, + {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"", + "", L"caf\x00e9 .png"}, // Two encoded words with different charsets (not very likely to be emitted // by web servers in the wild). Spaces between them are removed. {"Content-Disposition: inline; filename=\"=?euc-kr?b?v7m8+iAz?=" - " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", L"\xc608\xc220 3\xc608\xc220.png"}, - {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E3?=" - " =?iso-8859-7?b?4eI=?= .png\"", L"caf\x00e3\x03b1\x03b2.png"}, - // Non-ASCII string is passed through (and treated as UTF-8). - {"Content-Disposition: attachment; filename=caf\xc3\xa3.png", - L"caf\x00e3.png"}, + " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "", + L"\xc608\xc220 3\xc608\xc220.png"}, + {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E9?=" + " =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"}, + // Non-ASCII string is passed through and treated as UTF-8 as long as + // it's valid as UTF-8 and regardless of |referrer_charset|. + {"Content-Disposition: attachment; filename=caf\xc3\xa9.png", + "iso-8859-1", L"caf\x00e9.png"}, + {"Content-Disposition: attachment; filename=caf\xc3\xa9.png", + "", L"caf\x00e9.png"}, + // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset. + {"Content-Disposition: attachment; filename=caf\xe5.png", + "windows-1253", L"caf\x03b5.png"}, +#if 0 + // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage. + // TODO(jungshik): We need to set the OS default codepage + // to a specific value before testing. On Windows, we can use + // SetThreadLocale(). + {"Content-Disposition: attachment; filename=\xb0\xa1\xb0\xa2.png", + "", L"\xac00\xac01.png"}, +#endif // Failure cases // Invalid hex-digit "G" - {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", L""}, + {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", "", + L""}, // Incomplete RFC 2047 encoded-word (missing '='' at the end) - {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", L""}, + {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", "", L""}, // Extra character at the end of an encoded word - {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==", L""}, + {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==", + "", L""}, // Extra token at the end of an encoded word - {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?", L""}, - {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=", L""}, + {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?", + "", L""}, + {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=", + "", L""}, // Incomplete hex-escaped chars {"Content-Disposition: attachment; filename==?windows-1252?Q?=63=61=E?=", - L""}, - {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", L""}, + "", L""}, + {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", "", L""}, // %-escaped non-UTF-8 encoding is an "error" - {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", L""}, + {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", "", L""}, // Two RFC 2047 encoded words in a row without a space is an error. {"Content-Disposition: attachment; filename==?windows-1252?Q?caf=E3?=" - "=?iso-8859-7?b?4eIucG5nCg==?=", L""}, + "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""}, }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) { EXPECT_EQ(tests[i].expected, - net::GetFileNameFromCD(tests[i].header_field)); + net::GetFileNameFromCD(tests[i].header_field, + tests[i].referrer_charset)); } } @@ -669,97 +692,132 @@ TEST(NetUtilTest, StripWWW) { TEST(NetUtilTest, GetSuggestedFilename) { const SuggestedFilenameCase test_cases[] = { {"http://www.google.com/", - L"Content-disposition: attachment; filename=test.html", + "Content-disposition: attachment; filename=test.html", + "", L"", L"test.html"}, {"http://www.google.com/", - L"Content-disposition: attachment; filename=\"test.html\"", + "Content-disposition: attachment; filename=\"test.html\"", + "", L"", L"test.html"}, {"http://www.google.com/path/test.html", - L"Content-disposition: attachment", + "Content-disposition: attachment", + "", L"", L"test.html"}, {"http://www.google.com/path/test.html", - L"Content-disposition: attachment;", + "Content-disposition: attachment;", + "", L"", L"test.html"}, {"http://www.google.com/", - L"", + "", + "", L"", L"www.google.com"}, {"http://www.google.com/test.html", - L"", + "", + "", L"", L"test.html"}, // Now that we use googleurl's ExtractFileName, this case falls back // to the hostname. If this behavior is not desirable, we'd better // change ExtractFileName (in url_parse). {"http://www.google.com/path/", - L"", + "", + "", L"", L"www.google.com"}, {"http://www.google.com/path", - L"", + "", + "", L"", L"path"}, {"file:///", - L"", + "", + "", L"", L"download"}, {"view-cache:", - L"", + "", + "", L"", L"download"}, {"http://www.google.com/", - L"Content-disposition: attachment; filename =\"test.html\"", + "Content-disposition: attachment; filename =\"test.html\"", + "", L"download", L"test.html"}, {"http://www.google.com/", - L"", + "", + "", L"download", L"download"}, {"http://www.google.com/", - L"Content-disposition: attachment; filename=\"../test.html\"", + "Content-disposition: attachment; filename=\"../test.html\"", + "", L"", L"test.html"}, {"http://www.google.com/", - L"Content-disposition: attachment; filename=\"..\"", + "Content-disposition: attachment; filename=\"..\"", + "", L"download", L"download"}, {"http://www.google.com/test.html", - L"Content-disposition: attachment; filename=\"..\"", + "Content-disposition: attachment; filename=\"..\"", + "", L"download", L"test.html"}, // Below is a small subset of cases taken from GetFileNameFromCD test above. {"http://www.google.com/", - L"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20" - L"%EC%98%88%EC%88%A0.jpg\"", + "Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20" + "%EC%98%88%EC%88%A0.jpg\"", + "", L"", L"\uc608\uc220 \uc608\uc220.jpg"}, {"http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg", - L"", + "", + "", L"download", L"\uc608\uc220 \uc608\uc220.jpg"}, {"http://www.google.com/", - L"Content-disposition: attachment;", + "Content-disposition: attachment;", + "", L"\uB2E4\uC6B4\uB85C\uB4DC", L"\uB2E4\uC6B4\uB85C\uB4DC"}, {"http://www.google.com/", - L"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" - L"D13=2Epng?=\"", + "Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" + "D13=2Epng?=\"", + "", L"download", L"\u82b8\u88533.png"}, + {"http://www.example.com/images?id=3", + "Content-Disposition: attachment; filename=caf\xc3\xa9.png", + "iso-8859-1", + L"", + L"caf\u00e9.png"}, + {"http://www.example.com/images?id=3", + "Content-Disposition: attachment; filename=caf\xe5.png", + "windows-1253", + L"", + L"caf\u03b5.png"}, + {"http://www.example.com/file?id=3", + "Content-Disposition: attachment; name=\xcf\xc2\xd4\xd8.zip", + "GBK", + L"", + L"\u4e0b\u8f7d.zip"}, // Invalid C-D header. Extracts filename from url. {"http://www.google.com/test.html", - L"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", + "Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", + "", L"", L"test.html"}, }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { std::wstring filename = net::GetSuggestedFilename( GURL(test_cases[i].url), test_cases[i].content_disp_header, - test_cases[i].default_filename); + test_cases[i].referrer_charset, test_cases[i].default_filename); EXPECT_EQ(std::wstring(test_cases[i].expected_filename), filename); } } diff --git a/net/url_request/url_request_context.h b/net/url_request/url_request_context.h index 8c09c90..29735ac 100644 --- a/net/url_request/url_request_context.h +++ b/net/url_request/url_request_context.h @@ -70,6 +70,13 @@ class URLRequestContext : return EmptyString(); } + // In general, referrer_charset is not known when URLRequestContext is + // constructed. So, we need a setter. + const std::string& referrer_charset() const { return referrer_charset_; } + void set_referrer_charset(const std::string& charset) { + referrer_charset_ = charset; + } + protected: friend class base::RefCountedThreadSafe<URLRequestContext>; @@ -85,6 +92,10 @@ class URLRequestContext : net::FtpAuthCache ftp_auth_cache_; std::string accept_language_; std::string accept_charset_; + // The charset of the referrer where this request comes from. It's not + // used in communication with a server but is used to construct a suggested + // filename for file download. + std::string referrer_charset_; private: DISALLOW_COPY_AND_ASSIGN(URLRequestContext); diff --git a/webkit/glue/context_menu.h b/webkit/glue/context_menu.h index 32a6925..69ea22f 100644 --- a/webkit/glue/context_menu.h +++ b/webkit/glue/context_menu.h @@ -111,6 +111,9 @@ struct ContextMenuParams { // The security info for the resource we are showing the menu on. std::string security_info; + + // The character encoding of the frame on which the menu is invoked. + std::string frame_charset; }; #endif // WEBKIT_GLUE_CONTEXT_NODE_TYPES_H__ diff --git a/webkit/glue/context_menu_client_impl.cc b/webkit/glue/context_menu_client_impl.cc index 2947968..8fca4a6 100644 --- a/webkit/glue/context_menu_client_impl.cc +++ b/webkit/glue/context_menu_client_impl.cc @@ -177,7 +177,8 @@ WebCore::PlatformMenuDescription GURL page_url; std::string security_info; - std::wstring frame_encoding; + std::string frame_charset = WideToASCII( + webkit_glue::StringToStdWString(selected_frame->loader()->encoding())); // Send the frame and page URLs in any case. ContextNode frame_node = ContextNode(ContextNode::NONE); ContextNode page_node = @@ -188,8 +189,6 @@ WebCore::PlatformMenuDescription frame_node = GetTypeAndURLFromFrame(selected_frame, &frame_url, ContextNode(ContextNode::FRAME)); - frame_encoding = webkit_glue::StringToStdWString( - selected_frame->loader()->encoding()); } if (r.isSelected()) { @@ -253,7 +252,8 @@ WebCore::PlatformMenuDescription selection_text_string, misspelled_word_string, edit_flags, - security_info); + security_info, + frame_charset); } return NULL; } diff --git a/webkit/glue/resource_handle_impl.cc b/webkit/glue/resource_handle_impl.cc index fa3fb89..024836e 100644 --- a/webkit/glue/resource_handle_impl.cc +++ b/webkit/glue/resource_handle_impl.cc @@ -151,12 +151,14 @@ static ResourceResponse MakeResourceResponse( // TODO(darin): We should leverage HttpResponseHeaders for this, and this // should be using the same code as ResourceDispatcherHost. + // TODO(jungshik): Figure out the actual value of the referrer charset and + // pass it to GetSuggestedFilename. std::wstring suggested_filename; if (info.headers) { std::string disp_val; if (info.headers->EnumerateHeader(NULL, "content-disposition", &disp_val)) { suggested_filename = net::GetSuggestedFilename( - webkit_glue::KURLToGURL(kurl), disp_val, std::wstring()); + webkit_glue::KURLToGURL(kurl), disp_val, "", std::wstring()); } } diff --git a/webkit/glue/webview_delegate.h b/webkit/glue/webview_delegate.h index 02a8a0c..27036db 100644 --- a/webkit/glue/webview_delegate.h +++ b/webkit/glue/webview_delegate.h @@ -560,9 +560,10 @@ class WebViewDelegate : virtual public WebWidgetDelegate { // clicked on // @param misspelled_word The editable (possibily) misspelled word // in the Editor on which dictionary lookup for suggestions will be done. - // @param edit_flags Which edit operations the renderer believes are available - // @param frame_encoding Which indicates the encoding of current focused - // sub frame. + // @param edit_flags which edit operations the renderer believes are available + // @param security_info + // @param frame_charset which indicates the character encoding of + // the currently focused frame. virtual void ShowContextMenu(WebView* webview, ContextNode node, int x, @@ -574,7 +575,8 @@ class WebViewDelegate : virtual public WebWidgetDelegate { const std::wstring& selection_text, const std::wstring& misspelled_word, int edit_flags, - const std::string& security_info) { + const std::string& security_info, + const std::string& frame_charset) { } // Starts a drag session with the supplied contextual information. diff --git a/webkit/tools/test_shell/test_webview_delegate.cc b/webkit/tools/test_shell/test_webview_delegate.cc index aeae028..4bf5601 100755 --- a/webkit/tools/test_shell/test_webview_delegate.cc +++ b/webkit/tools/test_shell/test_webview_delegate.cc @@ -525,7 +525,8 @@ void TestWebViewDelegate::ShowContextMenu(WebView* webview, const std::wstring& selection_text, const std::wstring& misspelled_word, int edit_flags, - const std::string& security_info) { + const std::string& security_info, + const std::string& frame_charset) { CapturedContextMenuEvent context(node, x, y); captured_context_menu_events_.push_back(context); } diff --git a/webkit/tools/test_shell/test_webview_delegate.h b/webkit/tools/test_shell/test_webview_delegate.h index 413fe8d..79fd25d 100644 --- a/webkit/tools/test_shell/test_webview_delegate.h +++ b/webkit/tools/test_shell/test_webview_delegate.h @@ -119,7 +119,8 @@ class TestWebViewDelegate : public base::RefCounted<TestWebViewDelegate>, const std::wstring& selection_text, const std::wstring& misspelled_word, int edit_flags, - const std::string& security_info); + const std::string& security_info, + const std::string& frame_charset); virtual void DidStartProvisionalLoadForFrame( WebView* webview, WebFrame* frame, |