summaryrefslogtreecommitdiffstats
path: root/net/base/net_util_unittest.cc
diff options
context:
space:
mode:
authorjungshik@google.com <jungshik@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-05-01 22:51:50 +0000
committerjungshik@google.com <jungshik@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-05-01 22:51:50 +0000
commitc9825a4d401b014d89534020c8cf93302efd398c (patch)
treedcc654d12d5f538c2507f5f0b920cac091fe1a89 /net/base/net_util_unittest.cc
parent36c165e203d4ddf415c29b865b4c13f0b9f32d38 (diff)
downloadchromium_src-c9825a4d401b014d89534020c8cf93302efd398c.zip
chromium_src-c9825a4d401b014d89534020c8cf93302efd398c.tar.gz
chromium_src-c9825a4d401b014d89534020c8cf93302efd398c.tar.bz2
This CL makes Chrome on par with Firefox in terms of 'GetSuggestedFilename' for file download via context-menu.
For a download initiated with a click on a link in a web page, a webkit-side change is necessary, which will be done later. Add a field (referrer_charset) to URLRequestContext and DownloadCreateInfo. It's set to the character encoding of a document where the download request originates from when it's known (download initiated via "save as" in the context menu). If it's not known (a download initiated by clicking on a download link or typing a url directly to the omnibox), it's initialized to the default character encoding in the user's preference. I guess this is marginally better than leaving it empty (in that case, step 2b below will be skipped and step 2c will be taken) because a user has a better control over how raw 8bit characters in C-D are interpreted (especially on Windows where a reboot is required to change the OS default codepage). This is later passed to GetSuggestedFilename and used as one of fallback encodings (1. UTF-8, 2. origin_charset, 3. default OS codepage). With this change, we support the following: 1. RFC 2047 2. Raw-8bit-characters : a. UTF-8, b. origin_charset, c. default os codepage. 3. %-escaped UTF-8. In this CL, for #3, I didn't add a fallback similar to one used for #2. If necessary, it can be added easily. New entries are added to 3 existing tests. What's previously not covered (raw 8bit Content-Disposition header) is now covered in all 3 tests. BUG=1148 TEST=net unit test: NetUtilTest.GetFileNameFromCD NetUtilTest.GetSuggestedFilename unittest : DownloadManagerTest.TestDownloadFilename Review URL: http://codereview.chromium.org/83002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15113 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base/net_util_unittest.cc')
-rw-r--r--net/base/net_util_unittest.cc188
1 files changed, 123 insertions, 65 deletions
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index f9bc7f7..1a3bcdc 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -39,6 +39,7 @@ struct HeaderParamCase {
struct FileNameCDCase {
const char* header_field;
+ const char* referrer_charset;
const wchar_t* expected;
};
@@ -58,7 +59,8 @@ struct IDNTestCase {
struct SuggestedFilenameCase {
const char* url;
- const wchar_t* content_disp_header;
+ const char* content_disp_header;
+ const char* referrer_charset;
const wchar_t* default_filename;
const wchar_t* expected_filename;
};
@@ -299,75 +301,96 @@ TEST(NetUtilTest, GetHeaderParamValue) {
TEST(NetUtilTest, GetFileNameFromCD) {
const FileNameCDCase tests[] = {
// Test various forms of C-D header fields emitted by web servers.
- {"content-disposition: inline; filename=\"abcde.pdf\"", L"abcde.pdf"},
- {"content-disposition: inline; name=\"abcde.pdf\"", L"abcde.pdf"},
- {"content-disposition: attachment; filename=abcde.pdf", L"abcde.pdf"},
- {"content-disposition: attachment; name=abcde.pdf", L"abcde.pdf"},
- {"content-disposition: attachment; filename=abc,de.pdf", L"abc,de.pdf"},
- {"content-disposition: filename=abcde.pdf", L"abcde.pdf"},
- {"content-disposition: filename= abcde.pdf", L"abcde.pdf"},
- {"content-disposition: filename =abcde.pdf", L"abcde.pdf"},
- {"content-disposition: filename = abcde.pdf", L"abcde.pdf"},
- {"content-disposition: filename\t=abcde.pdf", L"abcde.pdf"},
- {"content-disposition: filename \t\t =abcde.pdf", L"abcde.pdf"},
- {"content-disposition: name=abcde.pdf", L"abcde.pdf"},
- {"content-disposition: inline; filename=\"abc%20de.pdf\"", L"abc de.pdf"},
+ {"content-disposition: inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"},
+ {"content-disposition: inline; name=\"abcde.pdf\"", "", L"abcde.pdf"},
+ {"content-disposition: attachment; filename=abcde.pdf", "", L"abcde.pdf"},
+ {"content-disposition: attachment; name=abcde.pdf", "", L"abcde.pdf"},
+ {"content-disposition: attachment; filename=abc,de.pdf", "", L"abc,de.pdf"},
+ {"content-disposition: filename=abcde.pdf", "", L"abcde.pdf"},
+ {"content-disposition: filename= abcde.pdf", "", L"abcde.pdf"},
+ {"content-disposition: filename =abcde.pdf", "", L"abcde.pdf"},
+ {"content-disposition: filename = abcde.pdf", "", L"abcde.pdf"},
+ {"content-disposition: filename\t=abcde.pdf", "", L"abcde.pdf"},
+ {"content-disposition: filename \t\t =abcde.pdf", "", L"abcde.pdf"},
+ {"content-disposition: name=abcde.pdf", "", L"abcde.pdf"},
+ {"content-disposition: inline; filename=\"abc%20de.pdf\"", "",
+ L"abc de.pdf"},
// Whitespaces are converted to a space.
- {"content-disposition: inline; filename=\"abc \t\nde.pdf\"",
+ {"content-disposition: inline; filename=\"abc \t\nde.pdf\"", "",
L"abc de.pdf"},
// %-escaped UTF-8
{"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
- "%EC%98%88%EC%88%A0.jpg\"", L"\xc608\xc220 \xc608\xc220.jpg"},
+ "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
{"Content-Disposition: attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1"
- "abc.jpg\"", L"\U00010330\U00010331abc.jpg"},
+ "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"},
{"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0 \n"
- "%EC%98%88%EC%88%A0.jpg\"", L"\xc608\xc220 \xc608\xc220.jpg"},
+ "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
// RFC 2047 with various charsets and Q/B encodings
{"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
- "D13=2Epng?=\"", L"\x82b8\x8853" L"3.png"},
+ "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"},
{"Content-Disposition: attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=",
- L"\xc608\xc220 3.png"},
+ "", L"\xc608\xc220 3.png"},
{"Content-Disposition: attachment; filename==?utf-8?Q?=E8=8A=B8=E8"
- "=A1=93_3=2Epng?=", L"\x82b8\x8853 3.png"},
+ "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"},
{"Content-Disposition: attachment; filename==?utf-8?Q?=F0=90=8C=B0"
- "_3=2Epng?=", L"\U00010330 3.png"},
- {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e3_=2epng?=\"",
- L"caf\x00e3 .png"},
+ "_3=2Epng?=", "", L"\U00010330 3.png"},
+ {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"",
+ "", L"caf\x00e9 .png"},
// Space after an encode word should be removed.
- {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E3_?= .png\"",
- L"caf\x00e3 .png"},
+ {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"",
+ "", L"caf\x00e9 .png"},
// Two encoded words with different charsets (not very likely to be emitted
// by web servers in the wild). Spaces between them are removed.
{"Content-Disposition: inline; filename=\"=?euc-kr?b?v7m8+iAz?="
- " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", L"\xc608\xc220 3\xc608\xc220.png"},
- {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E3?="
- " =?iso-8859-7?b?4eI=?= .png\"", L"caf\x00e3\x03b1\x03b2.png"},
- // Non-ASCII string is passed through (and treated as UTF-8).
- {"Content-Disposition: attachment; filename=caf\xc3\xa3.png",
- L"caf\x00e3.png"},
+ " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "",
+ L"\xc608\xc220 3\xc608\xc220.png"},
+ {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E9?="
+ " =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"},
+ // Non-ASCII string is passed through and treated as UTF-8 as long as
+ // it's valid as UTF-8 and regardless of |referrer_charset|.
+ {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
+ "iso-8859-1", L"caf\x00e9.png"},
+ {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
+ "", L"caf\x00e9.png"},
+ // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset.
+ {"Content-Disposition: attachment; filename=caf\xe5.png",
+ "windows-1253", L"caf\x03b5.png"},
+#if 0
+ // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage.
+ // TODO(jungshik): We need to set the OS default codepage
+ // to a specific value before testing. On Windows, we can use
+ // SetThreadLocale().
+ {"Content-Disposition: attachment; filename=\xb0\xa1\xb0\xa2.png",
+ "", L"\xac00\xac01.png"},
+#endif
// Failure cases
// Invalid hex-digit "G"
- {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", L""},
+ {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", "",
+ L""},
// Incomplete RFC 2047 encoded-word (missing '='' at the end)
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", L""},
+ {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", "", L""},
// Extra character at the end of an encoded word
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==", L""},
+ {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==",
+ "", L""},
// Extra token at the end of an encoded word
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?", L""},
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=", L""},
+ {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?",
+ "", L""},
+ {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=",
+ "", L""},
// Incomplete hex-escaped chars
{"Content-Disposition: attachment; filename==?windows-1252?Q?=63=61=E?=",
- L""},
- {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", L""},
+ "", L""},
+ {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", "", L""},
// %-escaped non-UTF-8 encoding is an "error"
- {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", L""},
+ {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", "", L""},
// Two RFC 2047 encoded words in a row without a space is an error.
{"Content-Disposition: attachment; filename==?windows-1252?Q?caf=E3?="
- "=?iso-8859-7?b?4eIucG5nCg==?=", L""},
+ "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""},
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
EXPECT_EQ(tests[i].expected,
- net::GetFileNameFromCD(tests[i].header_field));
+ net::GetFileNameFromCD(tests[i].header_field,
+ tests[i].referrer_charset));
}
}
@@ -669,97 +692,132 @@ TEST(NetUtilTest, StripWWW) {
TEST(NetUtilTest, GetSuggestedFilename) {
const SuggestedFilenameCase test_cases[] = {
{"http://www.google.com/",
- L"Content-disposition: attachment; filename=test.html",
+ "Content-disposition: attachment; filename=test.html",
+ "",
L"",
L"test.html"},
{"http://www.google.com/",
- L"Content-disposition: attachment; filename=\"test.html\"",
+ "Content-disposition: attachment; filename=\"test.html\"",
+ "",
L"",
L"test.html"},
{"http://www.google.com/path/test.html",
- L"Content-disposition: attachment",
+ "Content-disposition: attachment",
+ "",
L"",
L"test.html"},
{"http://www.google.com/path/test.html",
- L"Content-disposition: attachment;",
+ "Content-disposition: attachment;",
+ "",
L"",
L"test.html"},
{"http://www.google.com/",
- L"",
+ "",
+ "",
L"",
L"www.google.com"},
{"http://www.google.com/test.html",
- L"",
+ "",
+ "",
L"",
L"test.html"},
// Now that we use googleurl's ExtractFileName, this case falls back
// to the hostname. If this behavior is not desirable, we'd better
// change ExtractFileName (in url_parse).
{"http://www.google.com/path/",
- L"",
+ "",
+ "",
L"",
L"www.google.com"},
{"http://www.google.com/path",
- L"",
+ "",
+ "",
L"",
L"path"},
{"file:///",
- L"",
+ "",
+ "",
L"",
L"download"},
{"view-cache:",
- L"",
+ "",
+ "",
L"",
L"download"},
{"http://www.google.com/",
- L"Content-disposition: attachment; filename =\"test.html\"",
+ "Content-disposition: attachment; filename =\"test.html\"",
+ "",
L"download",
L"test.html"},
{"http://www.google.com/",
- L"",
+ "",
+ "",
L"download",
L"download"},
{"http://www.google.com/",
- L"Content-disposition: attachment; filename=\"../test.html\"",
+ "Content-disposition: attachment; filename=\"../test.html\"",
+ "",
L"",
L"test.html"},
{"http://www.google.com/",
- L"Content-disposition: attachment; filename=\"..\"",
+ "Content-disposition: attachment; filename=\"..\"",
+ "",
L"download",
L"download"},
{"http://www.google.com/test.html",
- L"Content-disposition: attachment; filename=\"..\"",
+ "Content-disposition: attachment; filename=\"..\"",
+ "",
L"download",
L"test.html"},
// Below is a small subset of cases taken from GetFileNameFromCD test above.
{"http://www.google.com/",
- L"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
- L"%EC%98%88%EC%88%A0.jpg\"",
+ "Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
+ "%EC%98%88%EC%88%A0.jpg\"",
+ "",
L"",
L"\uc608\uc220 \uc608\uc220.jpg"},
{"http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg",
- L"",
+ "",
+ "",
L"download",
L"\uc608\uc220 \uc608\uc220.jpg"},
{"http://www.google.com/",
- L"Content-disposition: attachment;",
+ "Content-disposition: attachment;",
+ "",
L"\uB2E4\uC6B4\uB85C\uB4DC",
L"\uB2E4\uC6B4\uB85C\uB4DC"},
{"http://www.google.com/",
- L"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
- L"D13=2Epng?=\"",
+ "Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
+ "D13=2Epng?=\"",
+ "",
L"download",
L"\u82b8\u88533.png"},
+ {"http://www.example.com/images?id=3",
+ "Content-Disposition: attachment; filename=caf\xc3\xa9.png",
+ "iso-8859-1",
+ L"",
+ L"caf\u00e9.png"},
+ {"http://www.example.com/images?id=3",
+ "Content-Disposition: attachment; filename=caf\xe5.png",
+ "windows-1253",
+ L"",
+ L"caf\u03b5.png"},
+ {"http://www.example.com/file?id=3",
+ "Content-Disposition: attachment; name=\xcf\xc2\xd4\xd8.zip",
+ "GBK",
+ L"",
+ L"\u4e0b\u8f7d.zip"},
// Invalid C-D header. Extracts filename from url.
{"http://www.google.com/test.html",
- L"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=",
+ "Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=",
+ "",
L"",
L"test.html"},
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
std::wstring filename = net::GetSuggestedFilename(
GURL(test_cases[i].url), test_cases[i].content_disp_header,
- test_cases[i].default_filename);
+ test_cases[i].referrer_charset, test_cases[i].default_filename);
EXPECT_EQ(std::wstring(test_cases[i].expected_filename), filename);
}
}