diff options
author | abarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-01-27 06:03:16 +0000 |
---|---|---|
committer | abarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-01-27 06:03:16 +0000 |
commit | 56eab2f216ce86217149753a407f981cb6d5de6c (patch) | |
tree | bf4b45a8fbb845b5ec177332df0ebafcfaedb98f /net | |
parent | 9b961c91eeb8ba43be7918563a07432cbcb1c4df (diff) | |
download | chromium_src-56eab2f216ce86217149753a407f981cb6d5de6c.zip chromium_src-56eab2f216ce86217149753a407f981cb6d5de6c.tar.gz chromium_src-56eab2f216ce86217149753a407f981cb6d5de6c.tar.bz2 |
Improve parsing of the Content-Disposition header
Previous, we were using GetHeaderParamValue to parse the Content-Disposition
header, which describes itself as a "quick and dirty implementation." After
this patch, we use more of our normal HTTP parsing machinery, making our
parsing much less quirky and better aligned with RFC 6266.
Some notes:
1) Many of the test cases for parsing the Content-Disposition header included
the string "Content-Disposition: " in the input. I've looked through all
of the callers of these functions, and that seems to be completely bogus.
The old parser wasn't careful enough to see that as a problem, but the new
one follows the spec more closely. I've updated the test cases to remove
this string.
2) After this patch, there's a bunch of code in net_util.cc that really should
be moved to http_content_disposition.cc. I didn't move that code in this
patch because I didn't want this path to be too large. I'll move it in a
future patch.
3) In a future patch, I'll audit the codebase for callers of
GetHeaderParamValue. With any luck, we'll be able to remove them all and
delete this less-than-amazing function.
BUG=65423
Review URL: http://codereview.chromium.org/9234055
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@119378 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/base/net_util.cc | 115 | ||||
-rw-r--r-- | net/base/net_util.h | 32 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 360 | ||||
-rw-r--r-- | net/http/http_content_disposition.cc | 93 | ||||
-rw-r--r-- | net/http/http_content_disposition.h | 42 | ||||
-rw-r--r-- | net/http/http_content_disposition_unittest.cc | 201 | ||||
-rw-r--r-- | net/http/http_util.h | 8 | ||||
-rw-r--r-- | net/net.gyp | 3 |
8 files changed, 498 insertions, 356 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc index b4f1e579..d2fd1f5 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -63,6 +63,7 @@ #if defined(OS_WIN) #include "net/base/winsock_init.h" #endif +#include "net/http/http_content_disposition.h" #include "unicode/datefmt.h" #include "unicode/regex.h" #include "unicode/ucnv.h" @@ -357,38 +358,6 @@ bool DecodeWord(const std::string& encoded_word, return false; } -bool DecodeParamValue(const std::string& input, - const std::string& referrer_charset, - std::string* output) { - std::string tmp; - // Tokenize with whitespace characters. - StringTokenizer t(input, " \t\n\r"); - t.set_options(StringTokenizer::RETURN_DELIMS); - bool is_previous_token_rfc2047 = true; - while (t.GetNext()) { - if (t.token_is_delim()) { - // If the previous non-delimeter token is not RFC2047-encoded, - // put in a space in its place. Otheriwse, skip over it. - if (!is_previous_token_rfc2047) { - tmp.push_back(' '); - } - continue; - } - // We don't support a single multibyte character split into - // adjacent encoded words. Some broken mail clients emit headers - // with that problem, but most web servers usually encode a filename - // in a single encoded-word. Firefox/Thunderbird do not support - // it, either. - std::string decoded; - if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, - &decoded)) - return false; - tmp.append(decoded); - } - output->swap(tmp); - return true; -} - // Does some simple normalization of scripts so we can allow certain scripts // to exist together. // TODO(brettw) bug 880223: we should allow some other languages to be @@ -1217,39 +1186,57 @@ bool DecodeCharset(const std::string& input, return true; } -std::string GetFileNameFromCD(const std::string& header, - const std::string& referrer_charset) { - std::string decoded; - std::string param_value = GetHeaderParamValue(header, "filename*", - QuoteRule::KEEP_OUTER_QUOTES); - if (!param_value.empty()) { - if (param_value.find('"') == std::string::npos) { - std::string charset; - std::string value; - if (DecodeCharset(param_value, &charset, &value)) { - // RFC 5987 value should be ASCII-only. - if (!IsStringASCII(value)) - return std::string(); - std::string tmp = UnescapeURLComponent( - value, - UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); - if (base::ConvertToUtf8AndNormalize(tmp, charset, &decoded)) - return decoded; +bool DecodeFilenameValue(const std::string& input, + const std::string& referrer_charset, + std::string* output) { + std::string tmp; + // Tokenize with whitespace characters. + StringTokenizer t(input, " \t\n\r"); + t.set_options(StringTokenizer::RETURN_DELIMS); + bool is_previous_token_rfc2047 = true; + while (t.GetNext()) { + if (t.token_is_delim()) { + // If the previous non-delimeter token is not RFC2047-encoded, + // put in a space in its place. Otheriwse, skip over it. + if (!is_previous_token_rfc2047) { + tmp.push_back(' '); } + continue; } + // We don't support a single multibyte character split into + // adjacent encoded words. Some broken mail clients emit headers + // with that problem, but most web servers usually encode a filename + // in a single encoded-word. Firefox/Thunderbird do not support + // it, either. + std::string decoded; + if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, + &decoded)) + return false; + tmp.append(decoded); } - param_value = GetHeaderParamValue(header, "filename", - QuoteRule::REMOVE_OUTER_QUOTES); - if (param_value.empty()) { - // Some servers use 'name' parameter. - param_value = GetHeaderParamValue(header, "name", - QuoteRule::REMOVE_OUTER_QUOTES); + output->swap(tmp); + return true; +} + +bool DecodeExtValue(const std::string& param_value, std::string* decoded) { + if (param_value.find('"') != std::string::npos) + return false; + + std::string charset; + std::string value; + if (!DecodeCharset(param_value, &charset, &value)) + return false; + + // RFC 5987 value should be ASCII-only. + if (!IsStringASCII(value)) { + decoded->clear(); + return true; } - if (param_value.empty()) - return std::string(); - if (DecodeParamValue(param_value, referrer_charset, &decoded)) - return decoded; - return std::string(); + + std::string unescaped = UnescapeURLComponent(value, + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); + + return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); } // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm @@ -1472,8 +1459,10 @@ string16 GetSuggestedFilename(const GURL& url, bool overwrite_extension = false; // Try to extract a filename from content-disposition first. - if (!content_disposition.empty()) - filename = GetFileNameFromCD(content_disposition, referrer_charset); + if (!content_disposition.empty()) { + HttpContentDisposition header(content_disposition, referrer_charset); + filename = header.filename(); + } // Then try to use the suggested name. if (filename.empty() && !suggested_name.empty()) diff --git a/net/base/net_util.h b/net/base/net_util.h index f72938f..fed20f0 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -151,31 +151,11 @@ NET_EXPORT std::string GetHeaderParamValue(const std::string& header, const std::string& param_name, QuoteRule::Type quote_rule); -// Return the filename extracted from Content-Disposition header. The following -// formats are tried in order listed below: -// -// 1. RFC 5987 -// 2. RFC 2047 -// 3. Raw-8bit-characters : -// a. UTF-8, b. referrer_charset, c. default os codepage. -// 4. %-escaped UTF-8. -// -// In step 3, if referrer_charset is empty(i.e. unknown), 3b is skipped. -// In step 4, the fallback charsets tried in step 3 are not tried. We -// can consider doing that later. -// -// When a param value is ASCII, but is not in format #2 or format #4 above, -// it is returned as it is unless it's pretty close to two supported -// formats but not well-formed. In that case, an empty string is returned. -// -// In any case, a caller must check for the empty return value and resort to -// another means to get a filename (e.g. url). -// -// This function does not do any escaping and callers are responsible for -// escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit. -NET_EXPORT_PRIVATE std::string GetFileNameFromCD( - const std::string& header, - const std::string& referrer_charset); +// TODO(abarth): Move these functions to http_content_disposition.cc. +bool DecodeFilenameValue(const std::string& input, + const std::string& referrer_charset, + std::string* output); +bool DecodeExtValue(const std::string& value, std::string* output); // Converts the given host name to unicode characters. This can be called for // any host name, if the input is not IDN or is invalid in some way, we'll just diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 7fe39ed..f34748b 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -809,185 +809,6 @@ TEST(NetUtilTest, GetHeaderParamValueQuotes) { } } -TEST(NetUtilTest, GetFileNameFromCD) { - const FileNameCDCase tests[] = { - // Test various forms of C-D header fields emitted by web servers. - {"content-disposition: inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"}, - {"content-disposition: inline; name=\"abcde.pdf\"", "", L"abcde.pdf"}, - {"content-disposition: attachment; filename=abcde.pdf", "", L"abcde.pdf"}, - {"content-disposition: attachment; name=abcde.pdf", "", L"abcde.pdf"}, - {"content-disposition: attachment; filename=abc,de.pdf", "", L"abc,de.pdf"}, - {"content-disposition: filename=abcde.pdf", "", L"abcde.pdf"}, - {"content-disposition: filename= abcde.pdf", "", L"abcde.pdf"}, - {"content-disposition: filename =abcde.pdf", "", L"abcde.pdf"}, - {"content-disposition: filename = abcde.pdf", "", L"abcde.pdf"}, - {"content-disposition: filename\t=abcde.pdf", "", L"abcde.pdf"}, - {"content-disposition: filename \t\t =abcde.pdf", "", L"abcde.pdf"}, - {"content-disposition: name=abcde.pdf", "", L"abcde.pdf"}, - {"content-disposition: inline; filename=\"abc%20de.pdf\"", "", - L"abc de.pdf"}, - // Unbalanced quotation mark - {"content-disposition: filename=\"abcdef.pdf", "", L"abcdef.pdf"}, - // Whitespaces are converted to a space. - {"content-disposition: inline; filename=\"abc \t\nde.pdf\"", "", - L"abc de.pdf"}, - // %-escaped UTF-8 - {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20" - "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"}, - {"Content-Disposition: attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1" - "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"}, - {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0 \n" - "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"}, - // RFC 2047 with various charsets and Q/B encodings - {"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" - "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"}, - {"Content-Disposition: attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=", - "", L"\xc608\xc220 3.png"}, - {"Content-Disposition: attachment; filename==?utf-8?Q?=E8=8A=B8=E8" - "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"}, - {"Content-Disposition: attachment; filename==?utf-8?Q?=F0=90=8C=B0" - "_3=2Epng?=", "", L"\U00010330 3.png"}, - {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"", - "", L"caf\x00e9 .png"}, - // Space after an encoded word should be removed. - {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"", - "", L"caf\x00e9 .png"}, - // Two encoded words with different charsets (not very likely to be emitted - // by web servers in the wild). Spaces between them are removed. - {"Content-Disposition: inline; filename=\"=?euc-kr?b?v7m8+iAz?=" - " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "", - L"\xc608\xc220 3\xc608\xc220.png"}, - {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E9?=" - " =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"}, - // Non-ASCII string is passed through and treated as UTF-8 as long as - // it's valid as UTF-8 and regardless of |referrer_charset|. - {"Content-Disposition: attachment; filename=caf\xc3\xa9.png", - "iso-8859-1", L"caf\x00e9.png"}, - {"Content-Disposition: attachment; filename=caf\xc3\xa9.png", - "", L"caf\x00e9.png"}, - // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset. - {"Content-Disposition: attachment; filename=caf\xe5.png", - "windows-1253", L"caf\x03b5.png"}, -#if 0 - // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage. - // TODO(jungshik): We need to set the OS default codepage - // to a specific value before testing. On Windows, we can use - // SetThreadLocale(). - {"Content-Disposition: attachment; filename=\xb0\xa1\xb0\xa2.png", - "", L"\xac00\xac01.png"}, -#endif - // Failure cases - // Invalid hex-digit "G" - {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", "", - L""}, - // Incomplete RFC 2047 encoded-word (missing '='' at the end) - {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", "", L""}, - // Extra character at the end of an encoded word - {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==", - "", L""}, - // Extra token at the end of an encoded word - {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?", - "", L""}, - {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=", - "", L""}, - // Incomplete hex-escaped chars - {"Content-Disposition: attachment; filename==?windows-1252?Q?=63=61=E?=", - "", L""}, - {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", "", L""}, - // %-escaped non-UTF-8 encoding is an "error" - {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", "", L""}, - // Two RFC 2047 encoded words in a row without a space is an error. - {"Content-Disposition: attachment; filename==?windows-1252?Q?caf=E3?=" - "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""}, - - // RFC 5987 tests with Filename* : see http://tools.ietf.org/html/rfc5987 - {"Content-Disposition: attachment; filename*=foo.html", "", L""}, - {"Content-Disposition: attachment; filename*=foo'.html", "", L""}, - {"Content-Disposition: attachment; filename*=''foo'.html", "", L""}, - {"Content-Disposition: attachment; filename*=''foo.html'", "", L""}, - {"Content-Disposition: attachment; filename*=''f\"oo\".html'", "", L""}, - {"Content-Disposition: attachment; filename*=bogus_charset''foo.html'", - "", L""}, - {"Content-Disposition: attachment; filename*='en'foo.html'", "", L""}, - {"Content-Disposition: attachment; filename*=iso-8859-1'en'foo.html", "", - L"foo.html"}, - {"Content-Disposition: attachment; filename*=utf-8'en'foo.html", "", - L"foo.html"}, - // charset cannot be omitted. - {"Content-Disposition: attachment; filename*='es'f\xfa.html'", "", L""}, - // Non-ASCII bytes are not allowed. - {"Content-Disposition: attachment; filename*=iso-8859-1'es'f\xfa.html", "", - L""}, - {"Content-Disposition: attachment; filename*=utf-8'es'f\xce\xba.html", "", - L""}, - // TODO(jshin): Space should be %-encoded, but currently, we allow - // spaces. - {"Content-Disposition: inline; filename*=iso88591''cafe foo.png", "", - L"cafe foo.png"}, - - // Filename* tests converted from Q-encoded tests above. - {"Content-Disposition: attachment; filename*=EUC-JP''%B7%DD%BD%D13%2Epng", - "", L"\x82b8\x8853" L"3.png"}, - {"Content-Disposition: attachment; filename*=utf-8''" - "%E8%8A%B8%E8%A1%93%203%2Epng", "", L"\x82b8\x8853 3.png"}, - {"Content-Disposition: attachment; filename*=utf-8''%F0%90%8C%B0 3.png", "", - L"\U00010330 3.png"}, - {"Content-Disposition: inline; filename*=Euc-Kr'ko'%BF%B9%BC%FA%2Epng", "", - L"\xc608\xc220.png"}, - {"Content-Disposition: attachment; filename*=windows-1252''caf%E9.png", "", - L"caf\x00e9.png"}, - - // http://greenbytes.de/tech/tc2231/ filename* test cases. - // attwithisofn2231iso - {"Content-Disposition: attachment; filename*=iso-8859-1''foo-%E4.html", "", - L"foo-\xe4.html"}, - // attwithfn2231utf8 - {"Content-Disposition: attachment; filename*=" - "UTF-8''foo-%c3%a4-%e2%82%ac.html", "", L"foo-\xe4-\x20ac.html"}, - // attwithfn2231noc : no encoding specified but UTF-8 is used. - {"Content-Disposition: attachment; filename*=''foo-%c3%a4-%e2%82%ac.html", - "", L""}, - // attwithfn2231utf8comp - {"Content-Disposition: attachment; filename*=UTF-8''foo-a%cc%88.html", "", - L"foo-\xe4.html"}, -#ifdef ICU_SHOULD_FAIL_CONVERSION_ON_INVALID_CHARACTER - // This does not work because we treat ISO-8859-1 synonymous with - // Windows-1252 per HTML5. For HTTP, in theory, we're not - // supposed to. - // attwithfn2231utf8-bad - {"Content-Disposition: attachment; filename*=" - "iso-8859-1''foo-%c3%a4-%e2%82%ac.html", "", L""}, -#endif - // attwithfn2231ws1 - {"Content-Disposition: attachment; filename *=UTF-8''foo-%c3%a4.html", "", - L""}, - // attwithfn2231ws2 - {"Content-Disposition: attachment; filename*= UTF-8''foo-%c3%a4.html", "", - L"foo-\xe4.html"}, - // attwithfn2231ws3 - {"Content-Disposition: attachment; filename* =UTF-8''foo-%c3%a4.html", "", - L"foo-\xe4.html"}, - // attwithfn2231quot - {"Content-Disposition: attachment; filename*=\"UTF-8''foo-%c3%a4.html\"", - "", L""}, - // attfnboth - {"Content-Disposition: attachment; filename=\"foo-ae.html\"; " - "filename*=UTF-8''foo-%c3%a4.html", "", L"foo-\xe4.html"}, - // attfnboth2 - {"Content-Disposition: attachment; filename*=UTF-8''foo-%c3%a4.html; " - "filename=\"foo-ae.html\"", "", L"foo-\xe4.html"}, - // attnewandfn - {"Content-Disposition: attachment; foobar=x; filename=\"foo.html\"", "", - L"foo.html"}, - }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) { - EXPECT_EQ(tests[i].expected, - UTF8ToWide(GetFileNameFromCD(tests[i].header_field, - tests[i].referrer_charset))) - << "Failed on input: " << tests[i].header_field; - } -} - TEST(NetUtilTest, IDNToUnicodeFast) { for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) { for (size_t j = 0; j < arraysize(kLanguages); j++) { @@ -1233,7 +1054,7 @@ TEST(NetUtilTest, GenerateFileName) { const GenerateFilenameCase selection_tests[] = { { "http://www.google.com/", - "Content-disposition: attachment; filename=test.html", + "attachment; filename=test.html", "", "", "", @@ -1242,7 +1063,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.google.com/", - "Content-disposition: attachment; filename=\"test.html\"", + "attachment; filename=\"test.html\"", "", "", "", @@ -1251,7 +1072,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.google.com/", - "Content-disposition: attachment; filename= \"test.html\"", + "attachment; filename= \"test.html\"", "", "", "", @@ -1260,7 +1081,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.google.com/", - "Content-disposition: attachment; filename = \"test.html\"", + "attachment; filename = \"test.html\"", "", "", "", @@ -1269,7 +1090,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // filename is whitespace. Should failover to URL host "http://www.google.com/", - "Content-disposition: attachment; filename= ", + "attachment; filename= ", "", "", "", @@ -1278,7 +1099,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // No filename. "http://www.google.com/path/test.html", - "Content-disposition: attachment", + "attachment", "", "", "", @@ -1287,7 +1108,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Ditto "http://www.google.com/path/test.html", - "Content-disposition: attachment;", + "attachment;", "", "", "", @@ -1361,7 +1182,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // C-D should override default "http://www.google.com/", - "Content-disposition: attachment; filename =\"test.html\"", + "attachment; filename =\"test.html\"", "", "", "", @@ -1379,7 +1200,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.google.com/", - "Content-disposition: attachment; filename=\"../test.html\"", + "attachment; filename=\"../test.html\"", "", "", "", @@ -1388,7 +1209,16 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.google.com/", - "Content-disposition: attachment; filename=\"..\\test.html\"", + "attachment; filename=\"..\\test.html\"", + "", + "", + "", + L"", + L"test.html" + }, + { + "http://www.google.com/", + "attachment; filename=\"..\\\\test.html\"", "", "", "", @@ -1397,7 +1227,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Filename disappears after leading and trailing periods are removed. "http://www.google.com/", - "Content-disposition: attachment; filename=\"..\"", + "attachment; filename=\"..\"", "", "", "", @@ -1406,7 +1236,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // C-D specified filename disappears. Failover to final filename. "http://www.google.com/test.html", - "Content-disposition: attachment; filename=\"..\"", + "attachment; filename=\"..\"", "", "", "", @@ -1416,7 +1246,7 @@ TEST(NetUtilTest, GenerateFileName) { // Below is a small subset of cases taken from GetFileNameFromCD test above. { "http://www.google.com/", - "Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20" + "attachment; filename=\"%EC%98%88%EC%88%A0%20" "%EC%98%88%EC%88%A0.jpg\"", "", "", @@ -1435,7 +1265,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.google.com/", - "Content-disposition: attachment;", + "attachment;", "", "", "", @@ -1444,7 +1274,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.google.com/", - "Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" + "attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" "D13=2Epng?=\"", "", "", @@ -1454,7 +1284,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.example.com/images?id=3", - "Content-Disposition: attachment; filename=caf\xc3\xa9.png", + "attachment; filename=caf\xc3\xa9.png", "iso-8859-1", "", "", @@ -1463,7 +1293,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.example.com/images?id=3", - "Content-Disposition: attachment; filename=caf\xe5.png", + "attachment; filename=caf\xe5.png", "windows-1253", "", "", @@ -1472,7 +1302,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.example.com/file?id=3", - "Content-Disposition: attachment; name=\xcf\xc2\xd4\xd8.zip", + "attachment; name=\xcf\xc2\xd4\xd8.zip", "GBK", "", "", @@ -1481,7 +1311,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Invalid C-D header. Extracts filename from url. "http://www.google.com/test.html", - "Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", + "attachment; filename==?iiso88591?Q?caf=EG?=", "", "", "", @@ -1554,7 +1384,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // The content-disposition has higher precedence over the suggested name. "http://www.google.com/test", - "Content-disposition: attachment; filename=test.html", + "attachment; filename=test.html", "", "suggested", "", @@ -1577,7 +1407,7 @@ TEST(NetUtilTest, GenerateFileName) { // Raw 8bit characters in C-D { "http://www.example.com/images?id=3", - "Content-Disposition: attachment; filename=caf\xc3\xa9.png", + "attachment; filename=caf\xc3\xa9.png", "iso-8859-1", "", "image/png", @@ -1586,7 +1416,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.example.com/images?id=3", - "Content-Disposition: attachment; filename=caf\xe5.png", + "attachment; filename=caf\xe5.png", "windows-1253", "", "image/png", @@ -1595,7 +1425,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // No 'filename' keyword in the disposition, use the URL "http://www.evil.com/my_download.txt", - "Content-Dispostion: a_file_name.txt", + "a_file_name.txt", "", "", "text/plain", @@ -1604,7 +1434,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Spaces in the disposition file name "http://www.frontpagehacker.com/a_download.exe", - "Content-Dispostion: filename=My Downloaded File.exe", + "filename=My Downloaded File.exe", "", "", "application/octet-stream", @@ -1613,7 +1443,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // % encoded "http://www.examples.com/", - "Content-Dispostion: attachment; " + "attachment; " "filename=\"%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg\"", "", "", @@ -1623,7 +1453,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // name= parameter "http://www.examples.com/q.cgi?id=abc", - "Content-Dispostion: attachment; name=abc de.pdf", + "attachment; name=abc de.pdf", "", "", "application/octet-stream", @@ -1632,7 +1462,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.example.com/path", - "Content-Dispostion: filename=\"=?EUC-JP?Q?=B7=DD=BD=D13=2Epng?=\"", + "filename=\"=?EUC-JP?Q?=B7=DD=BD=D13=2Epng?=\"", "", "", "image/png", @@ -1642,7 +1472,7 @@ TEST(NetUtilTest, GenerateFileName) { { // The following two have invalid CD headers and filenames come from the // URL. "http://www.example.com/test%20123", - "Content-Dispostion: attachment; filename==?iiso88591?Q?caf=EG?=", + "attachment; filename==?iiso88591?Q?caf=EG?=", "", "", "image/jpeg", @@ -1651,7 +1481,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg", - "Content-Dispostion: malformed_disposition", + "malformed_disposition", "", "", "image/jpeg", @@ -1660,7 +1490,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Invalid C-D. No filename from URL. Falls back to 'download'. "http://www.google.com/path1/path2/", - "Content-Dispostion: attachment; filename==?iso88591?Q?caf=E3?", + "attachment; filename==?iso88591?Q?caf=E3?", "", "", "image/jpeg", @@ -1703,7 +1533,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Disposition has relative paths, remove directory separators "http://www.evil.com/my_download.txt", - "Content-Dispostion: filename=../../../../././../a_file_name.txt", + "filename=../../../../././../a_file_name.txt", "", "", "text/plain", @@ -1712,7 +1542,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Disposition has parent directories, remove directory separators "http://www.evil.com/my_download.txt", - "Content-Dispostion: filename=dir1/dir2/a_file_name.txt", + "filename=dir1/dir2/a_file_name.txt", "", "", "text/plain", @@ -1721,7 +1551,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Disposition has relative paths, remove directory separators "http://www.evil.com/my_download.txt", - "Content-Dispostion: filename=..\\..\\..\\..\\.\\.\\..\\a_file_name.txt", + "filename=..\\..\\..\\..\\.\\.\\..\\a_file_name.txt", "", "", "text/plain", @@ -1730,7 +1560,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Disposition has parent directories, remove directory separators "http://www.evil.com/my_download.txt", - "Content-Dispostion: filename=dir1\\dir2\\a_file_name.txt", + "filename=dir1\\dir2\\a_file_name.txt", "", "", "text/plain", @@ -1748,7 +1578,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Filename looks like HTML? "http://www.evil.com/get/malware/here", - "Content-Disposition: filename=\"<blink>Hello kitty</blink>\"", + "filename=\"<blink>Hello kitty</blink>\"", "", "", "text/plain", @@ -1766,7 +1596,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Extension generation "http://www.example.com/my-cat", - "Content-Disposition: filename=my-cat", + "filename=my-cat", "", "", "image/jpeg", @@ -1775,7 +1605,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.example.com/my-cat", - "Content-Dispostion: filename=my-cat", + "filename=my-cat", "", "", "text/plain", @@ -1784,7 +1614,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.example.com/my-cat", - "Content-Dispostion: filename=my-cat", + "filename=my-cat", "", "", "text/html", @@ -1793,7 +1623,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Unknown MIME type "http://www.example.com/my-cat", - "Content-Dispostion: filename=my-cat", + "filename=my-cat", "", "", "dance/party", @@ -1802,7 +1632,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.example.com/my-cat.jpg", - "Content-Dispostion: filename=my-cat.jpg", + "filename=my-cat.jpg", "", "", "text/plain", @@ -1813,7 +1643,7 @@ TEST(NetUtilTest, GenerateFileName) { #if defined(OS_WIN) { "http://www.goodguy.com/evil.exe", - "Content-Dispostion: filename=evil.exe", + "filename=evil.exe", "", "", "image/jpeg", @@ -1822,7 +1652,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/ok.exe", - "Content-Dispostion: filename=ok.exe", + "filename=ok.exe", "", "", "binary/octet-stream", @@ -1831,7 +1661,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/evil.dll", - "Content-Dispostion: filename=evil.dll", + "filename=evil.dll", "", "", "dance/party", @@ -1840,7 +1670,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/evil.exe", - "Content-Dispostion: filename=evil", + "filename=evil", "", "", "application/rss+xml", @@ -1850,16 +1680,16 @@ TEST(NetUtilTest, GenerateFileName) { // Test truncation of trailing dots and spaces { "http://www.goodguy.com/evil.exe ", - "Content-Dispostion: filename=evil.exe ", + "filename=evil.exe ", "", "", "binary/octet-stream", L"download", - L"evil.exe-" + L"evil.exe" }, { "http://www.goodguy.com/evil.exe.", - "Content-Dispostion: filename=evil.exe.", + "filename=evil.exe.", "", "", "binary/octet-stream", @@ -1868,7 +1698,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/evil.exe. . .", - "Content-Dispostion: filename=evil.exe. . .", + "filename=evil.exe. . .", "", "", "binary/octet-stream", @@ -1877,7 +1707,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/evil.", - "Content-Dispostion: filename=evil.", + "filename=evil.", "", "", "binary/octet-stream", @@ -1886,7 +1716,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/. . . . .", - "Content-Dispostion: filename=. . . . .", + "filename=. . . . .", "", "", "binary/octet-stream", @@ -1905,7 +1735,7 @@ TEST(NetUtilTest, GenerateFileName) { #endif // OS_WIN { "http://www.goodguy.com/utils.js", - "Content-Dispostion: filename=utils.js", + "filename=utils.js", "", "", "application/x-javascript", @@ -1914,7 +1744,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/contacts.js", - "Content-Dispostion: filename=contacts.js", + "filename=contacts.js", "", "", "application/json", @@ -1923,7 +1753,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/utils.js", - "Content-Dispostion: filename=utils.js", + "filename=utils.js", "", "", "text/javascript", @@ -1932,7 +1762,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/utils.js", - "Content-Dispostion: filename=utils.js", + "filename=utils.js", "", "", "text/javascript;version=2", @@ -1941,7 +1771,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/utils.js", - "Content-Dispostion: filename=utils.js", + "filename=utils.js", "", "", "application/ecmascript", @@ -1950,7 +1780,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/utils.js", - "Content-Dispostion: filename=utils.js", + "filename=utils.js", "", "", "application/ecmascript;version=4", @@ -1959,7 +1789,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/program.exe", - "Content-Dispostion: filename=program.exe", + "filename=program.exe", "", "", "application/foo-bar", @@ -1968,7 +1798,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.evil.com/../foo.txt", - "Content-Dispostion: filename=../foo.txt", + "filename=../foo.txt", "", "", "text/plain", @@ -1977,7 +1807,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.evil.com/..\\foo.txt", - "Content-Dispostion: filename=..\\foo.txt", + "filename=..\\foo.txt", "", "", "text/plain", @@ -1986,7 +1816,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.evil.com/.hidden", - "Content-Dispostion: filename=.hidden", + "filename=.hidden", "", "", "text/plain", @@ -1995,7 +1825,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.evil.com/trailing.", - "Content-Disposition: filename=trailing.", + "filename=trailing.", "", "", "dance/party", @@ -2008,7 +1838,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.evil.com/trailing.", - "Content-Disposition: filename=trailing.", + "filename=trailing.", "", "", "text/plain", @@ -2021,7 +1851,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.evil.com/.", - "Content-Dispostion: filename=.", + "filename=.", "", "", "dance/party", @@ -2030,7 +1860,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.evil.com/..", - "Content-Dispostion: filename=..", + "filename=..", "", "", "dance/party", @@ -2039,7 +1869,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.evil.com/...", - "Content-Dispostion: filename=...", + "filename=...", "", "", "dance/party", @@ -2048,7 +1878,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Note that this one doesn't have "filename=" on it. "http://www.evil.com/", - "Content-Dispostion: a_file_name.txt", + "a_file_name.txt", "", "", "image/jpeg", @@ -2057,7 +1887,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.evil.com/", - "Content-Dispostion: filename=", + "filename=", "", "", "image/jpeg", @@ -2066,7 +1896,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.example.com/simple", - "Content-Dispostion: filename=simple", + "filename=simple", "", "", "application/octet-stream", @@ -2076,7 +1906,7 @@ TEST(NetUtilTest, GenerateFileName) { // Reserved words on Windows { "http://www.goodguy.com/COM1", - "Content-Dispostion: filename=COM1", + "filename=COM1", "", "", "application/foo-bar", @@ -2089,7 +1919,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/COM4.txt", - "Content-Dispostion: filename=COM4.txt", + "filename=COM4.txt", "", "", "text/plain", @@ -2102,7 +1932,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/lpt1.TXT", - "Content-Dispostion: filename=lpt1.TXT", + "filename=lpt1.TXT", "", "", "text/plain", @@ -2115,7 +1945,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/clock$.txt", - "Content-Dispostion: filename=clock$.txt", + "filename=clock$.txt", "", "", "text/plain", @@ -2128,7 +1958,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Validation should also apply to sugested name "http://www.goodguy.com/blah$.txt", - "Content-Dispostion: filename=clock$.txt", + "filename=clock$.txt", "", "clock$.txt", "text/plain", @@ -2141,7 +1971,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.goodguy.com/mycom1.foo", - "Content-Dispostion: filename=mycom1.foo", + "filename=mycom1.foo", "", "", "text/plain", @@ -2150,7 +1980,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.badguy.com/Setup.exe.local", - "Content-Dispostion: filename=Setup.exe.local", + "filename=Setup.exe.local", "", "", "application/foo-bar", @@ -2176,7 +2006,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.badguy.com/Setup.exe.lnk", - "Content-Dispostion: filename=Setup.exe.lnk", + "filename=Setup.exe.lnk", "", "", "application/foo-bar", @@ -2189,7 +2019,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.badguy.com/Desktop.ini", - "Content-Dispostion: filename=Desktop.ini", + "filename=Desktop.ini", "", "", "application/foo-bar", @@ -2202,7 +2032,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.badguy.com/Thumbs.db", - "Content-Dispostion: filename=Thumbs.db", + "filename=Thumbs.db", "", "", "application/foo-bar", @@ -2215,7 +2045,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.hotmail.com", - "Content-Dispostion: filename=source.jpg", + "filename=source.jpg", "", "", "application/x-javascript", @@ -2224,7 +2054,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // http://crbug.com/5772. "http://www.example.com/foo.tar.gz", - "Content-Dispostion: ", + "", "", "", "application/x-tar", @@ -2233,7 +2063,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // http://crbug.com/52250. "http://www.example.com/foo.tgz", - "Content-Dispostion: ", + "", "", "", "application/x-tar", @@ -2242,7 +2072,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // http://crbug.com/7337. "http://maged.lordaeron.org/blank.reg", - "Content-Dispostion: ", + "", "", "", "text/x-registry", @@ -2251,7 +2081,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { "http://www.example.com/bar.tar", - "Content-Dispostion: ", + "", "", "", "application/x-tar", @@ -2269,7 +2099,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // http://crbug.com/20337 "http://www.example.com/.download.txt", - "Content-Dispostion: filename=.download.txt", + "filename=.download.txt", "", "", "text/plain", @@ -2296,7 +2126,7 @@ TEST(NetUtilTest, GenerateFileName) { }, { // Shouldn't overwrite C-D specified extension. "http://www.example.com/npdf.php?fn=foobar.pdf", - "Content-Disposition: filename=foobar.jpg", + "filename=foobar.jpg", "", "", "text/plain", diff --git a/net/http/http_content_disposition.cc b/net/http/http_content_disposition.cc new file mode 100644 index 0000000..4e5d94e --- /dev/null +++ b/net/http/http_content_disposition.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/http/http_content_disposition.h" + +#include "base/logging.h" +#include "base/string_util.h" +#include "net/base/net_util.h" +#include "net/http/http_util.h" + +namespace net { + +HttpContentDisposition::HttpContentDisposition( + const std::string& header, const std::string& referrer_charset) + : type_(INLINE) { + Parse(header, referrer_charset); +} + +HttpContentDisposition::~HttpContentDisposition() { +} + +std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( + std::string::const_iterator begin, std::string::const_iterator end) { + DCHECK(type_ == INLINE); + + std::string::const_iterator delimiter = std::find(begin, end, ';'); + + // If there's an '=' in before the first ';', then the Content-Disposition + // header is malformed, and we treat the first bytes as a parameter rather + // than a disposition-type. + if (std::find(begin, delimiter, '=') != delimiter) + return begin; + + std::string::const_iterator type_begin = begin; + std::string::const_iterator type_end = delimiter; + HttpUtil::TrimLWS(&type_begin, &type_end); + if (!LowerCaseEqualsASCII(type_begin, type_end, "inline")) + type_ = ATTACHMENT; + return delimiter; +} + +// http://tools.ietf.org/html/rfc6266 +// +// content-disposition = "Content-Disposition" ":" +// disposition-type *( ";" disposition-parm ) +// +// disposition-type = "inline" | "attachment" | disp-ext-type +// ; case-insensitive +// disp-ext-type = token +// +// disposition-parm = filename-parm | disp-ext-parm +// +// filename-parm = "filename" "=" value +// | "filename*" "=" ext-value +// +// disp-ext-parm = token "=" value +// | ext-token "=" ext-value +// ext-token = <the characters in token, followed by "*"> +// +void HttpContentDisposition::Parse(const std::string& header, + const std::string& referrer_charset) { + DCHECK(type_ == INLINE); + DCHECK(filename_.empty()); + + std::string::const_iterator pos = header.begin(); + std::string::const_iterator end = header.end(); + pos = ConsumeDispositionType(pos, end); + + std::string filename; + std::string ext_filename; + + HttpUtil::NameValuePairsIterator iter(pos, end, ';'); + while (iter.GetNext()) { + if (LowerCaseEqualsASCII(iter.name_begin(), + iter.name_end(), + "filename")) { + DecodeFilenameValue(iter.value(), referrer_charset, &filename); + } else if (LowerCaseEqualsASCII(iter.name_begin(), + iter.name_end(), + "name")) { + DecodeFilenameValue(iter.value(), referrer_charset, &filename); + } else if (LowerCaseEqualsASCII(iter.name_begin(), + iter.name_end(), + "filename*")) { + DecodeExtValue(iter.raw_value(), &ext_filename); + } + } + + filename_ = ext_filename.empty() ? filename : ext_filename; +} + +} // namespace net diff --git a/net/http/http_content_disposition.h b/net/http/http_content_disposition.h new file mode 100644 index 0000000..c75610a --- /dev/null +++ b/net/http/http_content_disposition.h @@ -0,0 +1,42 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_HTTP_HTTP_CONTENT_DISPOSITION_H_ +#define NET_HTTP_HTTP_CONTENT_DISPOSITION_H_ +#pragma once + +#include <string> + +#include "base/basictypes.h" +#include "net/base/net_export.h" + +namespace net { + +class NET_EXPORT_PRIVATE HttpContentDisposition { + public: + enum Type { + INLINE, + ATTACHMENT, + }; + + HttpContentDisposition(const std::string& header, + const std::string& referrer_charset); + ~HttpContentDisposition(); + + const std::string& filename() const { return filename_; } + + private: + void Parse(const std::string& header, const std::string& referrer_charset); + std::string::const_iterator ConsumeDispositionType( + std::string::const_iterator begin, std::string::const_iterator end); + + Type type_; // TODO(abarth): Add an accessor and tests. + std::string filename_; + + DISALLOW_COPY_AND_ASSIGN(HttpContentDisposition); +}; + +} // namespace net + +#endif // NET_HTTP_HTTP_CONTENT_DISPOSITION_H_ diff --git a/net/http/http_content_disposition_unittest.cc b/net/http/http_content_disposition_unittest.cc new file mode 100644 index 0000000..3d8750e --- /dev/null +++ b/net/http/http_content_disposition_unittest.cc @@ -0,0 +1,201 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/http/http_content_disposition.h" + +#include "base/utf_string_conversions.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace net { + +namespace { + +struct FileNameCDCase { + const char* header; + const char* referrer_charset; + const wchar_t* expected; +}; + +} // anonymous namespace + +TEST(HttpContentDispositionTest, Filename) { + const FileNameCDCase tests[] = { + // Test various forms of C-D header fields emitted by web servers. + {"inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"}, + {"inline; name=\"abcde.pdf\"", "", L"abcde.pdf"}, + {"attachment; filename=abcde.pdf", "", L"abcde.pdf"}, + {"attachment; name=abcde.pdf", "", L"abcde.pdf"}, + {"attachment; filename=abc,de.pdf", "", L"abc,de.pdf"}, + {"filename=abcde.pdf", "", L"abcde.pdf"}, + {"filename= abcde.pdf", "", L"abcde.pdf"}, + {"filename =abcde.pdf", "", L"abcde.pdf"}, + {"filename = abcde.pdf", "", L"abcde.pdf"}, + {"filename\t=abcde.pdf", "", L"abcde.pdf"}, + {"filename \t\t =abcde.pdf", "", L"abcde.pdf"}, + {"name=abcde.pdf", "", L"abcde.pdf"}, + {"inline; filename=\"abc%20de.pdf\"", "", + L"abc de.pdf"}, + // Unbalanced quotation mark + {"filename=\"abcdef.pdf", "", L"abcdef.pdf"}, + // Whitespaces are converted to a space. + {"inline; filename=\"abc \t\nde.pdf\"", "", + L"abc de.pdf"}, + // %-escaped UTF-8 + {"attachment; filename=\"%EC%98%88%EC%88%A0%20" + "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"}, + {"attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1" + "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"}, + {"attachment; filename=\"%EC%98%88%EC%88%A0 \n" + "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"}, + // RFC 2047 with various charsets and Q/B encodings + {"attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" + "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"}, + {"attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=", + "", L"\xc608\xc220 3.png"}, + {"attachment; filename==?utf-8?Q?=E8=8A=B8=E8" + "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"}, + {"attachment; filename==?utf-8?Q?=F0=90=8C=B0" + "_3=2Epng?=", "", L"\U00010330 3.png"}, + {"inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"", + "", L"caf\x00e9 .png"}, + // Space after an encoded word should be removed. + {"inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"", + "", L"caf\x00e9 .png"}, + // Two encoded words with different charsets (not very likely to be emitted + // by web servers in the wild). Spaces between them are removed. + {"inline; filename=\"=?euc-kr?b?v7m8+iAz?=" + " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "", + L"\xc608\xc220 3\xc608\xc220.png"}, + {"attachment; filename=\"=?windows-1252?Q?caf=E9?=" + " =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"}, + // Non-ASCII string is passed through and treated as UTF-8 as long as + // it's valid as UTF-8 and regardless of |referrer_charset|. + {"attachment; filename=caf\xc3\xa9.png", + "iso-8859-1", L"caf\x00e9.png"}, + {"attachment; filename=caf\xc3\xa9.png", + "", L"caf\x00e9.png"}, + // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset. + {"attachment; filename=caf\xe5.png", + "windows-1253", L"caf\x03b5.png"}, +#if 0 + // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage. + // TODO(jungshik): We need to set the OS default codepage + // to a specific value before testing. On Windows, we can use + // SetThreadLocale(). + {"attachment; filename=\xb0\xa1\xb0\xa2.png", + "", L"\xac00\xac01.png"}, +#endif + // Failure cases + // Invalid hex-digit "G" + {"attachment; filename==?iiso88591?Q?caf=EG?=", "", + L""}, + // Incomplete RFC 2047 encoded-word (missing '='' at the end) + {"attachment; filename==?iso88591?Q?caf=E3?", "", L""}, + // Extra character at the end of an encoded word + {"attachment; filename==?iso88591?Q?caf=E3?==", + "", L""}, + // Extra token at the end of an encoded word + {"attachment; filename==?iso88591?Q?caf=E3?=?", + "", L""}, + {"attachment; filename==?iso88591?Q?caf=E3?=?=", + "", L""}, + // Incomplete hex-escaped chars + {"attachment; filename==?windows-1252?Q?=63=61=E?=", + "", L""}, + {"attachment; filename=%EC%98%88%EC%88%A", "", L""}, + // %-escaped non-UTF-8 encoding is an "error" + {"attachment; filename=%B7%DD%BD%D1.png", "", L""}, + // Two RFC 2047 encoded words in a row without a space is an error. + {"attachment; filename==?windows-1252?Q?caf=E3?=" + "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""}, + + // RFC 5987 tests with Filename* : see http://tools.ietf.org/html/rfc5987 + {"attachment; filename*=foo.html", "", L""}, + {"attachment; filename*=foo'.html", "", L""}, + {"attachment; filename*=''foo'.html", "", L""}, + {"attachment; filename*=''foo.html'", "", L""}, + {"attachment; filename*=''f\"oo\".html'", "", L""}, + {"attachment; filename*=bogus_charset''foo.html'", + "", L""}, + {"attachment; filename*='en'foo.html'", "", L""}, + {"attachment; filename*=iso-8859-1'en'foo.html", "", + L"foo.html"}, + {"attachment; filename*=utf-8'en'foo.html", "", + L"foo.html"}, + // charset cannot be omitted. + {"attachment; filename*='es'f\xfa.html'", "", L""}, + // Non-ASCII bytes are not allowed. + {"attachment; filename*=iso-8859-1'es'f\xfa.html", "", + L""}, + {"attachment; filename*=utf-8'es'f\xce\xba.html", "", + L""}, + // TODO(jshin): Space should be %-encoded, but currently, we allow + // spaces. + {"inline; filename*=iso88591''cafe foo.png", "", + L"cafe foo.png"}, + + // Filename* tests converted from Q-encoded tests above. + {"attachment; filename*=EUC-JP''%B7%DD%BD%D13%2Epng", + "", L"\x82b8\x8853" L"3.png"}, + {"attachment; filename*=utf-8''" + "%E8%8A%B8%E8%A1%93%203%2Epng", "", L"\x82b8\x8853 3.png"}, + {"attachment; filename*=utf-8''%F0%90%8C%B0 3.png", "", + L"\U00010330 3.png"}, + {"inline; filename*=Euc-Kr'ko'%BF%B9%BC%FA%2Epng", "", + L"\xc608\xc220.png"}, + {"attachment; filename*=windows-1252''caf%E9.png", "", + L"caf\x00e9.png"}, + + // http://greenbytes.de/tech/tc2231/ filename* test cases. + // attwithisofn2231iso + {"attachment; filename*=iso-8859-1''foo-%E4.html", "", + L"foo-\xe4.html"}, + // attwithfn2231utf8 + {"attachment; filename*=" + "UTF-8''foo-%c3%a4-%e2%82%ac.html", "", L"foo-\xe4-\x20ac.html"}, + // attwithfn2231noc : no encoding specified but UTF-8 is used. + {"attachment; filename*=''foo-%c3%a4-%e2%82%ac.html", + "", L""}, + // attwithfn2231utf8comp + {"attachment; filename*=UTF-8''foo-a%cc%88.html", "", + L"foo-\xe4.html"}, +#ifdef ICU_SHOULD_FAIL_CONVERSION_ON_INVALID_CHARACTER + // This does not work because we treat ISO-8859-1 synonymous with + // Windows-1252 per HTML5. For HTTP, in theory, we're not + // supposed to. + // attwithfn2231utf8-bad + {"attachment; filename*=" + "iso-8859-1''foo-%c3%a4-%e2%82%ac.html", "", L""}, +#endif + // attwithfn2231ws1 + {"attachment; filename *=UTF-8''foo-%c3%a4.html", "", + L""}, + // attwithfn2231ws2 + {"attachment; filename*= UTF-8''foo-%c3%a4.html", "", + L"foo-\xe4.html"}, + // attwithfn2231ws3 + {"attachment; filename* =UTF-8''foo-%c3%a4.html", "", + L"foo-\xe4.html"}, + // attwithfn2231quot + {"attachment; filename*=\"UTF-8''foo-%c3%a4.html\"", + "", L""}, + // attfnboth + {"attachment; filename=\"foo-ae.html\"; " + "filename*=UTF-8''foo-%c3%a4.html", "", L"foo-\xe4.html"}, + // attfnboth2 + {"attachment; filename*=UTF-8''foo-%c3%a4.html; " + "filename=\"foo-ae.html\"", "", L"foo-\xe4.html"}, + // attnewandfn + {"attachment; foobar=x; filename=\"foo.html\"", "", + L"foo.html"}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) { + HttpContentDisposition header(tests[i].header, tests[i].referrer_charset); + EXPECT_EQ(tests[i].expected, + UTF8ToWide(header.filename())) + << "Failed on input: " << tests[i].header; + } +} + +} // namespace net diff --git a/net/http/http_util.h b/net/http/http_util.h index 9a4b8ce..a09377e 100644 --- a/net/http/http_util.h +++ b/net/http/http_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -307,9 +307,13 @@ class NET_EXPORT HttpUtil { } std::string value() const { return value_is_quoted_ ? unquoted_value_ : std::string(value_begin_, - value_end_); + value_end_); } + // The value before unquoting (if any). + std::string raw_value() const { return std::string(value_begin_, + value_end_); } + private: HttpUtil::ValuesIterator props_; bool valid_; diff --git a/net/net.gyp b/net/net.gyp index 59a68b2..6c301a3 100644 --- a/net/net.gyp +++ b/net/net.gyp @@ -417,6 +417,8 @@ 'http/http_cache.h', 'http/http_cache_transaction.cc', 'http/http_cache_transaction.h', + 'http/http_content_disposition.cc', + 'http/http_content_disposition.h', 'http/http_chunked_decoder.cc', 'http/http_chunked_decoder.h', 'http/http_mac_signature.cc', @@ -1101,6 +1103,7 @@ 'http/http_byte_range_unittest.cc', 'http/http_cache_unittest.cc', 'http/http_chunked_decoder_unittest.cc', + 'http/http_content_disposition_unittest.cc', 'http/http_mac_signature_unittest.cc', 'http/http_network_layer_unittest.cc', 'http/http_network_transaction_unittest.cc', |