summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorabarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-01-27 06:03:16 +0000
committerabarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-01-27 06:03:16 +0000
commit56eab2f216ce86217149753a407f981cb6d5de6c (patch)
treebf4b45a8fbb845b5ec177332df0ebafcfaedb98f /net
parent9b961c91eeb8ba43be7918563a07432cbcb1c4df (diff)
downloadchromium_src-56eab2f216ce86217149753a407f981cb6d5de6c.zip
chromium_src-56eab2f216ce86217149753a407f981cb6d5de6c.tar.gz
chromium_src-56eab2f216ce86217149753a407f981cb6d5de6c.tar.bz2
Improve parsing of the Content-Disposition header
Previous, we were using GetHeaderParamValue to parse the Content-Disposition header, which describes itself as a "quick and dirty implementation." After this patch, we use more of our normal HTTP parsing machinery, making our parsing much less quirky and better aligned with RFC 6266. Some notes: 1) Many of the test cases for parsing the Content-Disposition header included the string "Content-Disposition: " in the input. I've looked through all of the callers of these functions, and that seems to be completely bogus. The old parser wasn't careful enough to see that as a problem, but the new one follows the spec more closely. I've updated the test cases to remove this string. 2) After this patch, there's a bunch of code in net_util.cc that really should be moved to http_content_disposition.cc. I didn't move that code in this patch because I didn't want this path to be too large. I'll move it in a future patch. 3) In a future patch, I'll audit the codebase for callers of GetHeaderParamValue. With any luck, we'll be able to remove them all and delete this less-than-amazing function. BUG=65423 Review URL: http://codereview.chromium.org/9234055 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@119378 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r--net/base/net_util.cc115
-rw-r--r--net/base/net_util.h32
-rw-r--r--net/base/net_util_unittest.cc360
-rw-r--r--net/http/http_content_disposition.cc93
-rw-r--r--net/http/http_content_disposition.h42
-rw-r--r--net/http/http_content_disposition_unittest.cc201
-rw-r--r--net/http/http_util.h8
-rw-r--r--net/net.gyp3
8 files changed, 498 insertions, 356 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index b4f1e579..d2fd1f5 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -63,6 +63,7 @@
#if defined(OS_WIN)
#include "net/base/winsock_init.h"
#endif
+#include "net/http/http_content_disposition.h"
#include "unicode/datefmt.h"
#include "unicode/regex.h"
#include "unicode/ucnv.h"
@@ -357,38 +358,6 @@ bool DecodeWord(const std::string& encoded_word,
return false;
}
-bool DecodeParamValue(const std::string& input,
- const std::string& referrer_charset,
- std::string* output) {
- std::string tmp;
- // Tokenize with whitespace characters.
- StringTokenizer t(input, " \t\n\r");
- t.set_options(StringTokenizer::RETURN_DELIMS);
- bool is_previous_token_rfc2047 = true;
- while (t.GetNext()) {
- if (t.token_is_delim()) {
- // If the previous non-delimeter token is not RFC2047-encoded,
- // put in a space in its place. Otheriwse, skip over it.
- if (!is_previous_token_rfc2047) {
- tmp.push_back(' ');
- }
- continue;
- }
- // We don't support a single multibyte character split into
- // adjacent encoded words. Some broken mail clients emit headers
- // with that problem, but most web servers usually encode a filename
- // in a single encoded-word. Firefox/Thunderbird do not support
- // it, either.
- std::string decoded;
- if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,
- &decoded))
- return false;
- tmp.append(decoded);
- }
- output->swap(tmp);
- return true;
-}
-
// Does some simple normalization of scripts so we can allow certain scripts
// to exist together.
// TODO(brettw) bug 880223: we should allow some other languages to be
@@ -1217,39 +1186,57 @@ bool DecodeCharset(const std::string& input,
return true;
}
-std::string GetFileNameFromCD(const std::string& header,
- const std::string& referrer_charset) {
- std::string decoded;
- std::string param_value = GetHeaderParamValue(header, "filename*",
- QuoteRule::KEEP_OUTER_QUOTES);
- if (!param_value.empty()) {
- if (param_value.find('"') == std::string::npos) {
- std::string charset;
- std::string value;
- if (DecodeCharset(param_value, &charset, &value)) {
- // RFC 5987 value should be ASCII-only.
- if (!IsStringASCII(value))
- return std::string();
- std::string tmp = UnescapeURLComponent(
- value,
- UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
- if (base::ConvertToUtf8AndNormalize(tmp, charset, &decoded))
- return decoded;
+bool DecodeFilenameValue(const std::string& input,
+ const std::string& referrer_charset,
+ std::string* output) {
+ std::string tmp;
+ // Tokenize with whitespace characters.
+ StringTokenizer t(input, " \t\n\r");
+ t.set_options(StringTokenizer::RETURN_DELIMS);
+ bool is_previous_token_rfc2047 = true;
+ while (t.GetNext()) {
+ if (t.token_is_delim()) {
+ // If the previous non-delimeter token is not RFC2047-encoded,
+ // put in a space in its place. Otheriwse, skip over it.
+ if (!is_previous_token_rfc2047) {
+ tmp.push_back(' ');
}
+ continue;
}
+ // We don't support a single multibyte character split into
+ // adjacent encoded words. Some broken mail clients emit headers
+ // with that problem, but most web servers usually encode a filename
+ // in a single encoded-word. Firefox/Thunderbird do not support
+ // it, either.
+ std::string decoded;
+ if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,
+ &decoded))
+ return false;
+ tmp.append(decoded);
}
- param_value = GetHeaderParamValue(header, "filename",
- QuoteRule::REMOVE_OUTER_QUOTES);
- if (param_value.empty()) {
- // Some servers use 'name' parameter.
- param_value = GetHeaderParamValue(header, "name",
- QuoteRule::REMOVE_OUTER_QUOTES);
+ output->swap(tmp);
+ return true;
+}
+
+bool DecodeExtValue(const std::string& param_value, std::string* decoded) {
+ if (param_value.find('"') != std::string::npos)
+ return false;
+
+ std::string charset;
+ std::string value;
+ if (!DecodeCharset(param_value, &charset, &value))
+ return false;
+
+ // RFC 5987 value should be ASCII-only.
+ if (!IsStringASCII(value)) {
+ decoded->clear();
+ return true;
}
- if (param_value.empty())
- return std::string();
- if (DecodeParamValue(param_value, referrer_charset, &decoded))
- return decoded;
- return std::string();
+
+ std::string unescaped = UnescapeURLComponent(value,
+ UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
+
+ return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded);
}
// TODO(mpcomplete): This is a quick and dirty implementation for now. I'm
@@ -1472,8 +1459,10 @@ string16 GetSuggestedFilename(const GURL& url,
bool overwrite_extension = false;
// Try to extract a filename from content-disposition first.
- if (!content_disposition.empty())
- filename = GetFileNameFromCD(content_disposition, referrer_charset);
+ if (!content_disposition.empty()) {
+ HttpContentDisposition header(content_disposition, referrer_charset);
+ filename = header.filename();
+ }
// Then try to use the suggested name.
if (filename.empty() && !suggested_name.empty())
diff --git a/net/base/net_util.h b/net/base/net_util.h
index f72938f..fed20f0 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -151,31 +151,11 @@ NET_EXPORT std::string GetHeaderParamValue(const std::string& header,
const std::string& param_name,
QuoteRule::Type quote_rule);
-// Return the filename extracted from Content-Disposition header. The following
-// formats are tried in order listed below:
-//
-// 1. RFC 5987
-// 2. RFC 2047
-// 3. Raw-8bit-characters :
-// a. UTF-8, b. referrer_charset, c. default os codepage.
-// 4. %-escaped UTF-8.
-//
-// In step 3, if referrer_charset is empty(i.e. unknown), 3b is skipped.
-// In step 4, the fallback charsets tried in step 3 are not tried. We
-// can consider doing that later.
-//
-// When a param value is ASCII, but is not in format #2 or format #4 above,
-// it is returned as it is unless it's pretty close to two supported
-// formats but not well-formed. In that case, an empty string is returned.
-//
-// In any case, a caller must check for the empty return value and resort to
-// another means to get a filename (e.g. url).
-//
-// This function does not do any escaping and callers are responsible for
-// escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit.
-NET_EXPORT_PRIVATE std::string GetFileNameFromCD(
- const std::string& header,
- const std::string& referrer_charset);
+// TODO(abarth): Move these functions to http_content_disposition.cc.
+bool DecodeFilenameValue(const std::string& input,
+ const std::string& referrer_charset,
+ std::string* output);
+bool DecodeExtValue(const std::string& value, std::string* output);
// Converts the given host name to unicode characters. This can be called for
// any host name, if the input is not IDN or is invalid in some way, we'll just
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 7fe39ed..f34748b 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -809,185 +809,6 @@ TEST(NetUtilTest, GetHeaderParamValueQuotes) {
}
}
-TEST(NetUtilTest, GetFileNameFromCD) {
- const FileNameCDCase tests[] = {
- // Test various forms of C-D header fields emitted by web servers.
- {"content-disposition: inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"},
- {"content-disposition: inline; name=\"abcde.pdf\"", "", L"abcde.pdf"},
- {"content-disposition: attachment; filename=abcde.pdf", "", L"abcde.pdf"},
- {"content-disposition: attachment; name=abcde.pdf", "", L"abcde.pdf"},
- {"content-disposition: attachment; filename=abc,de.pdf", "", L"abc,de.pdf"},
- {"content-disposition: filename=abcde.pdf", "", L"abcde.pdf"},
- {"content-disposition: filename= abcde.pdf", "", L"abcde.pdf"},
- {"content-disposition: filename =abcde.pdf", "", L"abcde.pdf"},
- {"content-disposition: filename = abcde.pdf", "", L"abcde.pdf"},
- {"content-disposition: filename\t=abcde.pdf", "", L"abcde.pdf"},
- {"content-disposition: filename \t\t =abcde.pdf", "", L"abcde.pdf"},
- {"content-disposition: name=abcde.pdf", "", L"abcde.pdf"},
- {"content-disposition: inline; filename=\"abc%20de.pdf\"", "",
- L"abc de.pdf"},
- // Unbalanced quotation mark
- {"content-disposition: filename=\"abcdef.pdf", "", L"abcdef.pdf"},
- // Whitespaces are converted to a space.
- {"content-disposition: inline; filename=\"abc \t\nde.pdf\"", "",
- L"abc de.pdf"},
- // %-escaped UTF-8
- {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
- "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
- {"Content-Disposition: attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1"
- "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"},
- {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0 \n"
- "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
- // RFC 2047 with various charsets and Q/B encodings
- {"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
- "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"},
- {"Content-Disposition: attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=",
- "", L"\xc608\xc220 3.png"},
- {"Content-Disposition: attachment; filename==?utf-8?Q?=E8=8A=B8=E8"
- "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"},
- {"Content-Disposition: attachment; filename==?utf-8?Q?=F0=90=8C=B0"
- "_3=2Epng?=", "", L"\U00010330 3.png"},
- {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"",
- "", L"caf\x00e9 .png"},
- // Space after an encoded word should be removed.
- {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"",
- "", L"caf\x00e9 .png"},
- // Two encoded words with different charsets (not very likely to be emitted
- // by web servers in the wild). Spaces between them are removed.
- {"Content-Disposition: inline; filename=\"=?euc-kr?b?v7m8+iAz?="
- " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "",
- L"\xc608\xc220 3\xc608\xc220.png"},
- {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E9?="
- " =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"},
- // Non-ASCII string is passed through and treated as UTF-8 as long as
- // it's valid as UTF-8 and regardless of |referrer_charset|.
- {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
- "iso-8859-1", L"caf\x00e9.png"},
- {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
- "", L"caf\x00e9.png"},
- // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset.
- {"Content-Disposition: attachment; filename=caf\xe5.png",
- "windows-1253", L"caf\x03b5.png"},
-#if 0
- // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage.
- // TODO(jungshik): We need to set the OS default codepage
- // to a specific value before testing. On Windows, we can use
- // SetThreadLocale().
- {"Content-Disposition: attachment; filename=\xb0\xa1\xb0\xa2.png",
- "", L"\xac00\xac01.png"},
-#endif
- // Failure cases
- // Invalid hex-digit "G"
- {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", "",
- L""},
- // Incomplete RFC 2047 encoded-word (missing '='' at the end)
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", "", L""},
- // Extra character at the end of an encoded word
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==",
- "", L""},
- // Extra token at the end of an encoded word
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?",
- "", L""},
- {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=",
- "", L""},
- // Incomplete hex-escaped chars
- {"Content-Disposition: attachment; filename==?windows-1252?Q?=63=61=E?=",
- "", L""},
- {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", "", L""},
- // %-escaped non-UTF-8 encoding is an "error"
- {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", "", L""},
- // Two RFC 2047 encoded words in a row without a space is an error.
- {"Content-Disposition: attachment; filename==?windows-1252?Q?caf=E3?="
- "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""},
-
- // RFC 5987 tests with Filename* : see http://tools.ietf.org/html/rfc5987
- {"Content-Disposition: attachment; filename*=foo.html", "", L""},
- {"Content-Disposition: attachment; filename*=foo'.html", "", L""},
- {"Content-Disposition: attachment; filename*=''foo'.html", "", L""},
- {"Content-Disposition: attachment; filename*=''foo.html'", "", L""},
- {"Content-Disposition: attachment; filename*=''f\"oo\".html'", "", L""},
- {"Content-Disposition: attachment; filename*=bogus_charset''foo.html'",
- "", L""},
- {"Content-Disposition: attachment; filename*='en'foo.html'", "", L""},
- {"Content-Disposition: attachment; filename*=iso-8859-1'en'foo.html", "",
- L"foo.html"},
- {"Content-Disposition: attachment; filename*=utf-8'en'foo.html", "",
- L"foo.html"},
- // charset cannot be omitted.
- {"Content-Disposition: attachment; filename*='es'f\xfa.html'", "", L""},
- // Non-ASCII bytes are not allowed.
- {"Content-Disposition: attachment; filename*=iso-8859-1'es'f\xfa.html", "",
- L""},
- {"Content-Disposition: attachment; filename*=utf-8'es'f\xce\xba.html", "",
- L""},
- // TODO(jshin): Space should be %-encoded, but currently, we allow
- // spaces.
- {"Content-Disposition: inline; filename*=iso88591''cafe foo.png", "",
- L"cafe foo.png"},
-
- // Filename* tests converted from Q-encoded tests above.
- {"Content-Disposition: attachment; filename*=EUC-JP''%B7%DD%BD%D13%2Epng",
- "", L"\x82b8\x8853" L"3.png"},
- {"Content-Disposition: attachment; filename*=utf-8''"
- "%E8%8A%B8%E8%A1%93%203%2Epng", "", L"\x82b8\x8853 3.png"},
- {"Content-Disposition: attachment; filename*=utf-8''%F0%90%8C%B0 3.png", "",
- L"\U00010330 3.png"},
- {"Content-Disposition: inline; filename*=Euc-Kr'ko'%BF%B9%BC%FA%2Epng", "",
- L"\xc608\xc220.png"},
- {"Content-Disposition: attachment; filename*=windows-1252''caf%E9.png", "",
- L"caf\x00e9.png"},
-
- // http://greenbytes.de/tech/tc2231/ filename* test cases.
- // attwithisofn2231iso
- {"Content-Disposition: attachment; filename*=iso-8859-1''foo-%E4.html", "",
- L"foo-\xe4.html"},
- // attwithfn2231utf8
- {"Content-Disposition: attachment; filename*="
- "UTF-8''foo-%c3%a4-%e2%82%ac.html", "", L"foo-\xe4-\x20ac.html"},
- // attwithfn2231noc : no encoding specified but UTF-8 is used.
- {"Content-Disposition: attachment; filename*=''foo-%c3%a4-%e2%82%ac.html",
- "", L""},
- // attwithfn2231utf8comp
- {"Content-Disposition: attachment; filename*=UTF-8''foo-a%cc%88.html", "",
- L"foo-\xe4.html"},
-#ifdef ICU_SHOULD_FAIL_CONVERSION_ON_INVALID_CHARACTER
- // This does not work because we treat ISO-8859-1 synonymous with
- // Windows-1252 per HTML5. For HTTP, in theory, we're not
- // supposed to.
- // attwithfn2231utf8-bad
- {"Content-Disposition: attachment; filename*="
- "iso-8859-1''foo-%c3%a4-%e2%82%ac.html", "", L""},
-#endif
- // attwithfn2231ws1
- {"Content-Disposition: attachment; filename *=UTF-8''foo-%c3%a4.html", "",
- L""},
- // attwithfn2231ws2
- {"Content-Disposition: attachment; filename*= UTF-8''foo-%c3%a4.html", "",
- L"foo-\xe4.html"},
- // attwithfn2231ws3
- {"Content-Disposition: attachment; filename* =UTF-8''foo-%c3%a4.html", "",
- L"foo-\xe4.html"},
- // attwithfn2231quot
- {"Content-Disposition: attachment; filename*=\"UTF-8''foo-%c3%a4.html\"",
- "", L""},
- // attfnboth
- {"Content-Disposition: attachment; filename=\"foo-ae.html\"; "
- "filename*=UTF-8''foo-%c3%a4.html", "", L"foo-\xe4.html"},
- // attfnboth2
- {"Content-Disposition: attachment; filename*=UTF-8''foo-%c3%a4.html; "
- "filename=\"foo-ae.html\"", "", L"foo-\xe4.html"},
- // attnewandfn
- {"Content-Disposition: attachment; foobar=x; filename=\"foo.html\"", "",
- L"foo.html"},
- };
- for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
- EXPECT_EQ(tests[i].expected,
- UTF8ToWide(GetFileNameFromCD(tests[i].header_field,
- tests[i].referrer_charset)))
- << "Failed on input: " << tests[i].header_field;
- }
-}
-
TEST(NetUtilTest, IDNToUnicodeFast) {
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
for (size_t j = 0; j < arraysize(kLanguages); j++) {
@@ -1233,7 +1054,7 @@ TEST(NetUtilTest, GenerateFileName) {
const GenerateFilenameCase selection_tests[] = {
{
"http://www.google.com/",
- "Content-disposition: attachment; filename=test.html",
+ "attachment; filename=test.html",
"",
"",
"",
@@ -1242,7 +1063,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.google.com/",
- "Content-disposition: attachment; filename=\"test.html\"",
+ "attachment; filename=\"test.html\"",
"",
"",
"",
@@ -1251,7 +1072,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.google.com/",
- "Content-disposition: attachment; filename= \"test.html\"",
+ "attachment; filename= \"test.html\"",
"",
"",
"",
@@ -1260,7 +1081,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.google.com/",
- "Content-disposition: attachment; filename = \"test.html\"",
+ "attachment; filename = \"test.html\"",
"",
"",
"",
@@ -1269,7 +1090,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // filename is whitespace. Should failover to URL host
"http://www.google.com/",
- "Content-disposition: attachment; filename= ",
+ "attachment; filename= ",
"",
"",
"",
@@ -1278,7 +1099,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // No filename.
"http://www.google.com/path/test.html",
- "Content-disposition: attachment",
+ "attachment",
"",
"",
"",
@@ -1287,7 +1108,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Ditto
"http://www.google.com/path/test.html",
- "Content-disposition: attachment;",
+ "attachment;",
"",
"",
"",
@@ -1361,7 +1182,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // C-D should override default
"http://www.google.com/",
- "Content-disposition: attachment; filename =\"test.html\"",
+ "attachment; filename =\"test.html\"",
"",
"",
"",
@@ -1379,7 +1200,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.google.com/",
- "Content-disposition: attachment; filename=\"../test.html\"",
+ "attachment; filename=\"../test.html\"",
"",
"",
"",
@@ -1388,7 +1209,16 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.google.com/",
- "Content-disposition: attachment; filename=\"..\\test.html\"",
+ "attachment; filename=\"..\\test.html\"",
+ "",
+ "",
+ "",
+ L"",
+ L"test.html"
+ },
+ {
+ "http://www.google.com/",
+ "attachment; filename=\"..\\\\test.html\"",
"",
"",
"",
@@ -1397,7 +1227,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Filename disappears after leading and trailing periods are removed.
"http://www.google.com/",
- "Content-disposition: attachment; filename=\"..\"",
+ "attachment; filename=\"..\"",
"",
"",
"",
@@ -1406,7 +1236,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // C-D specified filename disappears. Failover to final filename.
"http://www.google.com/test.html",
- "Content-disposition: attachment; filename=\"..\"",
+ "attachment; filename=\"..\"",
"",
"",
"",
@@ -1416,7 +1246,7 @@ TEST(NetUtilTest, GenerateFileName) {
// Below is a small subset of cases taken from GetFileNameFromCD test above.
{
"http://www.google.com/",
- "Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
+ "attachment; filename=\"%EC%98%88%EC%88%A0%20"
"%EC%98%88%EC%88%A0.jpg\"",
"",
"",
@@ -1435,7 +1265,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.google.com/",
- "Content-disposition: attachment;",
+ "attachment;",
"",
"",
"",
@@ -1444,7 +1274,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.google.com/",
- "Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
+ "attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
"D13=2Epng?=\"",
"",
"",
@@ -1454,7 +1284,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.example.com/images?id=3",
- "Content-Disposition: attachment; filename=caf\xc3\xa9.png",
+ "attachment; filename=caf\xc3\xa9.png",
"iso-8859-1",
"",
"",
@@ -1463,7 +1293,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.example.com/images?id=3",
- "Content-Disposition: attachment; filename=caf\xe5.png",
+ "attachment; filename=caf\xe5.png",
"windows-1253",
"",
"",
@@ -1472,7 +1302,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.example.com/file?id=3",
- "Content-Disposition: attachment; name=\xcf\xc2\xd4\xd8.zip",
+ "attachment; name=\xcf\xc2\xd4\xd8.zip",
"GBK",
"",
"",
@@ -1481,7 +1311,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Invalid C-D header. Extracts filename from url.
"http://www.google.com/test.html",
- "Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=",
+ "attachment; filename==?iiso88591?Q?caf=EG?=",
"",
"",
"",
@@ -1554,7 +1384,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // The content-disposition has higher precedence over the suggested name.
"http://www.google.com/test",
- "Content-disposition: attachment; filename=test.html",
+ "attachment; filename=test.html",
"",
"suggested",
"",
@@ -1577,7 +1407,7 @@ TEST(NetUtilTest, GenerateFileName) {
// Raw 8bit characters in C-D
{
"http://www.example.com/images?id=3",
- "Content-Disposition: attachment; filename=caf\xc3\xa9.png",
+ "attachment; filename=caf\xc3\xa9.png",
"iso-8859-1",
"",
"image/png",
@@ -1586,7 +1416,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.example.com/images?id=3",
- "Content-Disposition: attachment; filename=caf\xe5.png",
+ "attachment; filename=caf\xe5.png",
"windows-1253",
"",
"image/png",
@@ -1595,7 +1425,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // No 'filename' keyword in the disposition, use the URL
"http://www.evil.com/my_download.txt",
- "Content-Dispostion: a_file_name.txt",
+ "a_file_name.txt",
"",
"",
"text/plain",
@@ -1604,7 +1434,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Spaces in the disposition file name
"http://www.frontpagehacker.com/a_download.exe",
- "Content-Dispostion: filename=My Downloaded File.exe",
+ "filename=My Downloaded File.exe",
"",
"",
"application/octet-stream",
@@ -1613,7 +1443,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // % encoded
"http://www.examples.com/",
- "Content-Dispostion: attachment; "
+ "attachment; "
"filename=\"%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg\"",
"",
"",
@@ -1623,7 +1453,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // name= parameter
"http://www.examples.com/q.cgi?id=abc",
- "Content-Dispostion: attachment; name=abc de.pdf",
+ "attachment; name=abc de.pdf",
"",
"",
"application/octet-stream",
@@ -1632,7 +1462,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.example.com/path",
- "Content-Dispostion: filename=\"=?EUC-JP?Q?=B7=DD=BD=D13=2Epng?=\"",
+ "filename=\"=?EUC-JP?Q?=B7=DD=BD=D13=2Epng?=\"",
"",
"",
"image/png",
@@ -1642,7 +1472,7 @@ TEST(NetUtilTest, GenerateFileName) {
{ // The following two have invalid CD headers and filenames come from the
// URL.
"http://www.example.com/test%20123",
- "Content-Dispostion: attachment; filename==?iiso88591?Q?caf=EG?=",
+ "attachment; filename==?iiso88591?Q?caf=EG?=",
"",
"",
"image/jpeg",
@@ -1651,7 +1481,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg",
- "Content-Dispostion: malformed_disposition",
+ "malformed_disposition",
"",
"",
"image/jpeg",
@@ -1660,7 +1490,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Invalid C-D. No filename from URL. Falls back to 'download'.
"http://www.google.com/path1/path2/",
- "Content-Dispostion: attachment; filename==?iso88591?Q?caf=E3?",
+ "attachment; filename==?iso88591?Q?caf=E3?",
"",
"",
"image/jpeg",
@@ -1703,7 +1533,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Disposition has relative paths, remove directory separators
"http://www.evil.com/my_download.txt",
- "Content-Dispostion: filename=../../../../././../a_file_name.txt",
+ "filename=../../../../././../a_file_name.txt",
"",
"",
"text/plain",
@@ -1712,7 +1542,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Disposition has parent directories, remove directory separators
"http://www.evil.com/my_download.txt",
- "Content-Dispostion: filename=dir1/dir2/a_file_name.txt",
+ "filename=dir1/dir2/a_file_name.txt",
"",
"",
"text/plain",
@@ -1721,7 +1551,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Disposition has relative paths, remove directory separators
"http://www.evil.com/my_download.txt",
- "Content-Dispostion: filename=..\\..\\..\\..\\.\\.\\..\\a_file_name.txt",
+ "filename=..\\..\\..\\..\\.\\.\\..\\a_file_name.txt",
"",
"",
"text/plain",
@@ -1730,7 +1560,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Disposition has parent directories, remove directory separators
"http://www.evil.com/my_download.txt",
- "Content-Dispostion: filename=dir1\\dir2\\a_file_name.txt",
+ "filename=dir1\\dir2\\a_file_name.txt",
"",
"",
"text/plain",
@@ -1748,7 +1578,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Filename looks like HTML?
"http://www.evil.com/get/malware/here",
- "Content-Disposition: filename=\"<blink>Hello kitty</blink>\"",
+ "filename=\"<blink>Hello kitty</blink>\"",
"",
"",
"text/plain",
@@ -1766,7 +1596,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Extension generation
"http://www.example.com/my-cat",
- "Content-Disposition: filename=my-cat",
+ "filename=my-cat",
"",
"",
"image/jpeg",
@@ -1775,7 +1605,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.example.com/my-cat",
- "Content-Dispostion: filename=my-cat",
+ "filename=my-cat",
"",
"",
"text/plain",
@@ -1784,7 +1614,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.example.com/my-cat",
- "Content-Dispostion: filename=my-cat",
+ "filename=my-cat",
"",
"",
"text/html",
@@ -1793,7 +1623,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Unknown MIME type
"http://www.example.com/my-cat",
- "Content-Dispostion: filename=my-cat",
+ "filename=my-cat",
"",
"",
"dance/party",
@@ -1802,7 +1632,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.example.com/my-cat.jpg",
- "Content-Dispostion: filename=my-cat.jpg",
+ "filename=my-cat.jpg",
"",
"",
"text/plain",
@@ -1813,7 +1643,7 @@ TEST(NetUtilTest, GenerateFileName) {
#if defined(OS_WIN)
{
"http://www.goodguy.com/evil.exe",
- "Content-Dispostion: filename=evil.exe",
+ "filename=evil.exe",
"",
"",
"image/jpeg",
@@ -1822,7 +1652,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/ok.exe",
- "Content-Dispostion: filename=ok.exe",
+ "filename=ok.exe",
"",
"",
"binary/octet-stream",
@@ -1831,7 +1661,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/evil.dll",
- "Content-Dispostion: filename=evil.dll",
+ "filename=evil.dll",
"",
"",
"dance/party",
@@ -1840,7 +1670,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/evil.exe",
- "Content-Dispostion: filename=evil",
+ "filename=evil",
"",
"",
"application/rss+xml",
@@ -1850,16 +1680,16 @@ TEST(NetUtilTest, GenerateFileName) {
// Test truncation of trailing dots and spaces
{
"http://www.goodguy.com/evil.exe ",
- "Content-Dispostion: filename=evil.exe ",
+ "filename=evil.exe ",
"",
"",
"binary/octet-stream",
L"download",
- L"evil.exe-"
+ L"evil.exe"
},
{
"http://www.goodguy.com/evil.exe.",
- "Content-Dispostion: filename=evil.exe.",
+ "filename=evil.exe.",
"",
"",
"binary/octet-stream",
@@ -1868,7 +1698,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/evil.exe. . .",
- "Content-Dispostion: filename=evil.exe. . .",
+ "filename=evil.exe. . .",
"",
"",
"binary/octet-stream",
@@ -1877,7 +1707,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/evil.",
- "Content-Dispostion: filename=evil.",
+ "filename=evil.",
"",
"",
"binary/octet-stream",
@@ -1886,7 +1716,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/. . . . .",
- "Content-Dispostion: filename=. . . . .",
+ "filename=. . . . .",
"",
"",
"binary/octet-stream",
@@ -1905,7 +1735,7 @@ TEST(NetUtilTest, GenerateFileName) {
#endif // OS_WIN
{
"http://www.goodguy.com/utils.js",
- "Content-Dispostion: filename=utils.js",
+ "filename=utils.js",
"",
"",
"application/x-javascript",
@@ -1914,7 +1744,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/contacts.js",
- "Content-Dispostion: filename=contacts.js",
+ "filename=contacts.js",
"",
"",
"application/json",
@@ -1923,7 +1753,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/utils.js",
- "Content-Dispostion: filename=utils.js",
+ "filename=utils.js",
"",
"",
"text/javascript",
@@ -1932,7 +1762,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/utils.js",
- "Content-Dispostion: filename=utils.js",
+ "filename=utils.js",
"",
"",
"text/javascript;version=2",
@@ -1941,7 +1771,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/utils.js",
- "Content-Dispostion: filename=utils.js",
+ "filename=utils.js",
"",
"",
"application/ecmascript",
@@ -1950,7 +1780,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/utils.js",
- "Content-Dispostion: filename=utils.js",
+ "filename=utils.js",
"",
"",
"application/ecmascript;version=4",
@@ -1959,7 +1789,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/program.exe",
- "Content-Dispostion: filename=program.exe",
+ "filename=program.exe",
"",
"",
"application/foo-bar",
@@ -1968,7 +1798,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.evil.com/../foo.txt",
- "Content-Dispostion: filename=../foo.txt",
+ "filename=../foo.txt",
"",
"",
"text/plain",
@@ -1977,7 +1807,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.evil.com/..\\foo.txt",
- "Content-Dispostion: filename=..\\foo.txt",
+ "filename=..\\foo.txt",
"",
"",
"text/plain",
@@ -1986,7 +1816,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.evil.com/.hidden",
- "Content-Dispostion: filename=.hidden",
+ "filename=.hidden",
"",
"",
"text/plain",
@@ -1995,7 +1825,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.evil.com/trailing.",
- "Content-Disposition: filename=trailing.",
+ "filename=trailing.",
"",
"",
"dance/party",
@@ -2008,7 +1838,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.evil.com/trailing.",
- "Content-Disposition: filename=trailing.",
+ "filename=trailing.",
"",
"",
"text/plain",
@@ -2021,7 +1851,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.evil.com/.",
- "Content-Dispostion: filename=.",
+ "filename=.",
"",
"",
"dance/party",
@@ -2030,7 +1860,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.evil.com/..",
- "Content-Dispostion: filename=..",
+ "filename=..",
"",
"",
"dance/party",
@@ -2039,7 +1869,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.evil.com/...",
- "Content-Dispostion: filename=...",
+ "filename=...",
"",
"",
"dance/party",
@@ -2048,7 +1878,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Note that this one doesn't have "filename=" on it.
"http://www.evil.com/",
- "Content-Dispostion: a_file_name.txt",
+ "a_file_name.txt",
"",
"",
"image/jpeg",
@@ -2057,7 +1887,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.evil.com/",
- "Content-Dispostion: filename=",
+ "filename=",
"",
"",
"image/jpeg",
@@ -2066,7 +1896,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.example.com/simple",
- "Content-Dispostion: filename=simple",
+ "filename=simple",
"",
"",
"application/octet-stream",
@@ -2076,7 +1906,7 @@ TEST(NetUtilTest, GenerateFileName) {
// Reserved words on Windows
{
"http://www.goodguy.com/COM1",
- "Content-Dispostion: filename=COM1",
+ "filename=COM1",
"",
"",
"application/foo-bar",
@@ -2089,7 +1919,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/COM4.txt",
- "Content-Dispostion: filename=COM4.txt",
+ "filename=COM4.txt",
"",
"",
"text/plain",
@@ -2102,7 +1932,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/lpt1.TXT",
- "Content-Dispostion: filename=lpt1.TXT",
+ "filename=lpt1.TXT",
"",
"",
"text/plain",
@@ -2115,7 +1945,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/clock$.txt",
- "Content-Dispostion: filename=clock$.txt",
+ "filename=clock$.txt",
"",
"",
"text/plain",
@@ -2128,7 +1958,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Validation should also apply to sugested name
"http://www.goodguy.com/blah$.txt",
- "Content-Dispostion: filename=clock$.txt",
+ "filename=clock$.txt",
"",
"clock$.txt",
"text/plain",
@@ -2141,7 +1971,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.goodguy.com/mycom1.foo",
- "Content-Dispostion: filename=mycom1.foo",
+ "filename=mycom1.foo",
"",
"",
"text/plain",
@@ -2150,7 +1980,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.badguy.com/Setup.exe.local",
- "Content-Dispostion: filename=Setup.exe.local",
+ "filename=Setup.exe.local",
"",
"",
"application/foo-bar",
@@ -2176,7 +2006,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.badguy.com/Setup.exe.lnk",
- "Content-Dispostion: filename=Setup.exe.lnk",
+ "filename=Setup.exe.lnk",
"",
"",
"application/foo-bar",
@@ -2189,7 +2019,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.badguy.com/Desktop.ini",
- "Content-Dispostion: filename=Desktop.ini",
+ "filename=Desktop.ini",
"",
"",
"application/foo-bar",
@@ -2202,7 +2032,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.badguy.com/Thumbs.db",
- "Content-Dispostion: filename=Thumbs.db",
+ "filename=Thumbs.db",
"",
"",
"application/foo-bar",
@@ -2215,7 +2045,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.hotmail.com",
- "Content-Dispostion: filename=source.jpg",
+ "filename=source.jpg",
"",
"",
"application/x-javascript",
@@ -2224,7 +2054,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // http://crbug.com/5772.
"http://www.example.com/foo.tar.gz",
- "Content-Dispostion: ",
+ "",
"",
"",
"application/x-tar",
@@ -2233,7 +2063,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // http://crbug.com/52250.
"http://www.example.com/foo.tgz",
- "Content-Dispostion: ",
+ "",
"",
"",
"application/x-tar",
@@ -2242,7 +2072,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // http://crbug.com/7337.
"http://maged.lordaeron.org/blank.reg",
- "Content-Dispostion: ",
+ "",
"",
"",
"text/x-registry",
@@ -2251,7 +2081,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{
"http://www.example.com/bar.tar",
- "Content-Dispostion: ",
+ "",
"",
"",
"application/x-tar",
@@ -2269,7 +2099,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // http://crbug.com/20337
"http://www.example.com/.download.txt",
- "Content-Dispostion: filename=.download.txt",
+ "filename=.download.txt",
"",
"",
"text/plain",
@@ -2296,7 +2126,7 @@ TEST(NetUtilTest, GenerateFileName) {
},
{ // Shouldn't overwrite C-D specified extension.
"http://www.example.com/npdf.php?fn=foobar.pdf",
- "Content-Disposition: filename=foobar.jpg",
+ "filename=foobar.jpg",
"",
"",
"text/plain",
diff --git a/net/http/http_content_disposition.cc b/net/http/http_content_disposition.cc
new file mode 100644
index 0000000..4e5d94e
--- /dev/null
+++ b/net/http/http_content_disposition.cc
@@ -0,0 +1,93 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "net/http/http_content_disposition.h"
+
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "net/base/net_util.h"
+#include "net/http/http_util.h"
+
+namespace net {
+
+HttpContentDisposition::HttpContentDisposition(
+ const std::string& header, const std::string& referrer_charset)
+ : type_(INLINE) {
+ Parse(header, referrer_charset);
+}
+
+HttpContentDisposition::~HttpContentDisposition() {
+}
+
+std::string::const_iterator HttpContentDisposition::ConsumeDispositionType(
+ std::string::const_iterator begin, std::string::const_iterator end) {
+ DCHECK(type_ == INLINE);
+
+ std::string::const_iterator delimiter = std::find(begin, end, ';');
+
+ // If there's an '=' in before the first ';', then the Content-Disposition
+ // header is malformed, and we treat the first bytes as a parameter rather
+ // than a disposition-type.
+ if (std::find(begin, delimiter, '=') != delimiter)
+ return begin;
+
+ std::string::const_iterator type_begin = begin;
+ std::string::const_iterator type_end = delimiter;
+ HttpUtil::TrimLWS(&type_begin, &type_end);
+ if (!LowerCaseEqualsASCII(type_begin, type_end, "inline"))
+ type_ = ATTACHMENT;
+ return delimiter;
+}
+
+// http://tools.ietf.org/html/rfc6266
+//
+// content-disposition = "Content-Disposition" ":"
+// disposition-type *( ";" disposition-parm )
+//
+// disposition-type = "inline" | "attachment" | disp-ext-type
+// ; case-insensitive
+// disp-ext-type = token
+//
+// disposition-parm = filename-parm | disp-ext-parm
+//
+// filename-parm = "filename" "=" value
+// | "filename*" "=" ext-value
+//
+// disp-ext-parm = token "=" value
+// | ext-token "=" ext-value
+// ext-token = <the characters in token, followed by "*">
+//
+void HttpContentDisposition::Parse(const std::string& header,
+ const std::string& referrer_charset) {
+ DCHECK(type_ == INLINE);
+ DCHECK(filename_.empty());
+
+ std::string::const_iterator pos = header.begin();
+ std::string::const_iterator end = header.end();
+ pos = ConsumeDispositionType(pos, end);
+
+ std::string filename;
+ std::string ext_filename;
+
+ HttpUtil::NameValuePairsIterator iter(pos, end, ';');
+ while (iter.GetNext()) {
+ if (LowerCaseEqualsASCII(iter.name_begin(),
+ iter.name_end(),
+ "filename")) {
+ DecodeFilenameValue(iter.value(), referrer_charset, &filename);
+ } else if (LowerCaseEqualsASCII(iter.name_begin(),
+ iter.name_end(),
+ "name")) {
+ DecodeFilenameValue(iter.value(), referrer_charset, &filename);
+ } else if (LowerCaseEqualsASCII(iter.name_begin(),
+ iter.name_end(),
+ "filename*")) {
+ DecodeExtValue(iter.raw_value(), &ext_filename);
+ }
+ }
+
+ filename_ = ext_filename.empty() ? filename : ext_filename;
+}
+
+} // namespace net
diff --git a/net/http/http_content_disposition.h b/net/http/http_content_disposition.h
new file mode 100644
index 0000000..c75610a
--- /dev/null
+++ b/net/http/http_content_disposition.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef NET_HTTP_HTTP_CONTENT_DISPOSITION_H_
+#define NET_HTTP_HTTP_CONTENT_DISPOSITION_H_
+#pragma once
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "net/base/net_export.h"
+
+namespace net {
+
+class NET_EXPORT_PRIVATE HttpContentDisposition {
+ public:
+ enum Type {
+ INLINE,
+ ATTACHMENT,
+ };
+
+ HttpContentDisposition(const std::string& header,
+ const std::string& referrer_charset);
+ ~HttpContentDisposition();
+
+ const std::string& filename() const { return filename_; }
+
+ private:
+ void Parse(const std::string& header, const std::string& referrer_charset);
+ std::string::const_iterator ConsumeDispositionType(
+ std::string::const_iterator begin, std::string::const_iterator end);
+
+ Type type_; // TODO(abarth): Add an accessor and tests.
+ std::string filename_;
+
+ DISALLOW_COPY_AND_ASSIGN(HttpContentDisposition);
+};
+
+} // namespace net
+
+#endif // NET_HTTP_HTTP_CONTENT_DISPOSITION_H_
diff --git a/net/http/http_content_disposition_unittest.cc b/net/http/http_content_disposition_unittest.cc
new file mode 100644
index 0000000..3d8750e
--- /dev/null
+++ b/net/http/http_content_disposition_unittest.cc
@@ -0,0 +1,201 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "net/http/http_content_disposition.h"
+
+#include "base/utf_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace net {
+
+namespace {
+
+struct FileNameCDCase {
+ const char* header;
+ const char* referrer_charset;
+ const wchar_t* expected;
+};
+
+} // anonymous namespace
+
+TEST(HttpContentDispositionTest, Filename) {
+ const FileNameCDCase tests[] = {
+ // Test various forms of C-D header fields emitted by web servers.
+ {"inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"},
+ {"inline; name=\"abcde.pdf\"", "", L"abcde.pdf"},
+ {"attachment; filename=abcde.pdf", "", L"abcde.pdf"},
+ {"attachment; name=abcde.pdf", "", L"abcde.pdf"},
+ {"attachment; filename=abc,de.pdf", "", L"abc,de.pdf"},
+ {"filename=abcde.pdf", "", L"abcde.pdf"},
+ {"filename= abcde.pdf", "", L"abcde.pdf"},
+ {"filename =abcde.pdf", "", L"abcde.pdf"},
+ {"filename = abcde.pdf", "", L"abcde.pdf"},
+ {"filename\t=abcde.pdf", "", L"abcde.pdf"},
+ {"filename \t\t =abcde.pdf", "", L"abcde.pdf"},
+ {"name=abcde.pdf", "", L"abcde.pdf"},
+ {"inline; filename=\"abc%20de.pdf\"", "",
+ L"abc de.pdf"},
+ // Unbalanced quotation mark
+ {"filename=\"abcdef.pdf", "", L"abcdef.pdf"},
+ // Whitespaces are converted to a space.
+ {"inline; filename=\"abc \t\nde.pdf\"", "",
+ L"abc de.pdf"},
+ // %-escaped UTF-8
+ {"attachment; filename=\"%EC%98%88%EC%88%A0%20"
+ "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
+ {"attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1"
+ "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"},
+ {"attachment; filename=\"%EC%98%88%EC%88%A0 \n"
+ "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
+ // RFC 2047 with various charsets and Q/B encodings
+ {"attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
+ "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"},
+ {"attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=",
+ "", L"\xc608\xc220 3.png"},
+ {"attachment; filename==?utf-8?Q?=E8=8A=B8=E8"
+ "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"},
+ {"attachment; filename==?utf-8?Q?=F0=90=8C=B0"
+ "_3=2Epng?=", "", L"\U00010330 3.png"},
+ {"inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"",
+ "", L"caf\x00e9 .png"},
+ // Space after an encoded word should be removed.
+ {"inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"",
+ "", L"caf\x00e9 .png"},
+ // Two encoded words with different charsets (not very likely to be emitted
+ // by web servers in the wild). Spaces between them are removed.
+ {"inline; filename=\"=?euc-kr?b?v7m8+iAz?="
+ " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "",
+ L"\xc608\xc220 3\xc608\xc220.png"},
+ {"attachment; filename=\"=?windows-1252?Q?caf=E9?="
+ " =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"},
+ // Non-ASCII string is passed through and treated as UTF-8 as long as
+ // it's valid as UTF-8 and regardless of |referrer_charset|.
+ {"attachment; filename=caf\xc3\xa9.png",
+ "iso-8859-1", L"caf\x00e9.png"},
+ {"attachment; filename=caf\xc3\xa9.png",
+ "", L"caf\x00e9.png"},
+ // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset.
+ {"attachment; filename=caf\xe5.png",
+ "windows-1253", L"caf\x03b5.png"},
+#if 0
+ // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage.
+ // TODO(jungshik): We need to set the OS default codepage
+ // to a specific value before testing. On Windows, we can use
+ // SetThreadLocale().
+ {"attachment; filename=\xb0\xa1\xb0\xa2.png",
+ "", L"\xac00\xac01.png"},
+#endif
+ // Failure cases
+ // Invalid hex-digit "G"
+ {"attachment; filename==?iiso88591?Q?caf=EG?=", "",
+ L""},
+ // Incomplete RFC 2047 encoded-word (missing '='' at the end)
+ {"attachment; filename==?iso88591?Q?caf=E3?", "", L""},
+ // Extra character at the end of an encoded word
+ {"attachment; filename==?iso88591?Q?caf=E3?==",
+ "", L""},
+ // Extra token at the end of an encoded word
+ {"attachment; filename==?iso88591?Q?caf=E3?=?",
+ "", L""},
+ {"attachment; filename==?iso88591?Q?caf=E3?=?=",
+ "", L""},
+ // Incomplete hex-escaped chars
+ {"attachment; filename==?windows-1252?Q?=63=61=E?=",
+ "", L""},
+ {"attachment; filename=%EC%98%88%EC%88%A", "", L""},
+ // %-escaped non-UTF-8 encoding is an "error"
+ {"attachment; filename=%B7%DD%BD%D1.png", "", L""},
+ // Two RFC 2047 encoded words in a row without a space is an error.
+ {"attachment; filename==?windows-1252?Q?caf=E3?="
+ "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""},
+
+ // RFC 5987 tests with Filename* : see http://tools.ietf.org/html/rfc5987
+ {"attachment; filename*=foo.html", "", L""},
+ {"attachment; filename*=foo'.html", "", L""},
+ {"attachment; filename*=''foo'.html", "", L""},
+ {"attachment; filename*=''foo.html'", "", L""},
+ {"attachment; filename*=''f\"oo\".html'", "", L""},
+ {"attachment; filename*=bogus_charset''foo.html'",
+ "", L""},
+ {"attachment; filename*='en'foo.html'", "", L""},
+ {"attachment; filename*=iso-8859-1'en'foo.html", "",
+ L"foo.html"},
+ {"attachment; filename*=utf-8'en'foo.html", "",
+ L"foo.html"},
+ // charset cannot be omitted.
+ {"attachment; filename*='es'f\xfa.html'", "", L""},
+ // Non-ASCII bytes are not allowed.
+ {"attachment; filename*=iso-8859-1'es'f\xfa.html", "",
+ L""},
+ {"attachment; filename*=utf-8'es'f\xce\xba.html", "",
+ L""},
+ // TODO(jshin): Space should be %-encoded, but currently, we allow
+ // spaces.
+ {"inline; filename*=iso88591''cafe foo.png", "",
+ L"cafe foo.png"},
+
+ // Filename* tests converted from Q-encoded tests above.
+ {"attachment; filename*=EUC-JP''%B7%DD%BD%D13%2Epng",
+ "", L"\x82b8\x8853" L"3.png"},
+ {"attachment; filename*=utf-8''"
+ "%E8%8A%B8%E8%A1%93%203%2Epng", "", L"\x82b8\x8853 3.png"},
+ {"attachment; filename*=utf-8''%F0%90%8C%B0 3.png", "",
+ L"\U00010330 3.png"},
+ {"inline; filename*=Euc-Kr'ko'%BF%B9%BC%FA%2Epng", "",
+ L"\xc608\xc220.png"},
+ {"attachment; filename*=windows-1252''caf%E9.png", "",
+ L"caf\x00e9.png"},
+
+ // http://greenbytes.de/tech/tc2231/ filename* test cases.
+ // attwithisofn2231iso
+ {"attachment; filename*=iso-8859-1''foo-%E4.html", "",
+ L"foo-\xe4.html"},
+ // attwithfn2231utf8
+ {"attachment; filename*="
+ "UTF-8''foo-%c3%a4-%e2%82%ac.html", "", L"foo-\xe4-\x20ac.html"},
+ // attwithfn2231noc : no encoding specified but UTF-8 is used.
+ {"attachment; filename*=''foo-%c3%a4-%e2%82%ac.html",
+ "", L""},
+ // attwithfn2231utf8comp
+ {"attachment; filename*=UTF-8''foo-a%cc%88.html", "",
+ L"foo-\xe4.html"},
+#ifdef ICU_SHOULD_FAIL_CONVERSION_ON_INVALID_CHARACTER
+ // This does not work because we treat ISO-8859-1 synonymous with
+ // Windows-1252 per HTML5. For HTTP, in theory, we're not
+ // supposed to.
+ // attwithfn2231utf8-bad
+ {"attachment; filename*="
+ "iso-8859-1''foo-%c3%a4-%e2%82%ac.html", "", L""},
+#endif
+ // attwithfn2231ws1
+ {"attachment; filename *=UTF-8''foo-%c3%a4.html", "",
+ L""},
+ // attwithfn2231ws2
+ {"attachment; filename*= UTF-8''foo-%c3%a4.html", "",
+ L"foo-\xe4.html"},
+ // attwithfn2231ws3
+ {"attachment; filename* =UTF-8''foo-%c3%a4.html", "",
+ L"foo-\xe4.html"},
+ // attwithfn2231quot
+ {"attachment; filename*=\"UTF-8''foo-%c3%a4.html\"",
+ "", L""},
+ // attfnboth
+ {"attachment; filename=\"foo-ae.html\"; "
+ "filename*=UTF-8''foo-%c3%a4.html", "", L"foo-\xe4.html"},
+ // attfnboth2
+ {"attachment; filename*=UTF-8''foo-%c3%a4.html; "
+ "filename=\"foo-ae.html\"", "", L"foo-\xe4.html"},
+ // attnewandfn
+ {"attachment; foobar=x; filename=\"foo.html\"", "",
+ L"foo.html"},
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
+ HttpContentDisposition header(tests[i].header, tests[i].referrer_charset);
+ EXPECT_EQ(tests[i].expected,
+ UTF8ToWide(header.filename()))
+ << "Failed on input: " << tests[i].header;
+ }
+}
+
+} // namespace net
diff --git a/net/http/http_util.h b/net/http/http_util.h
index 9a4b8ce..a09377e 100644
--- a/net/http/http_util.h
+++ b/net/http/http_util.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -307,9 +307,13 @@ class NET_EXPORT HttpUtil {
}
std::string value() const {
return value_is_quoted_ ? unquoted_value_ : std::string(value_begin_,
- value_end_);
+ value_end_);
}
+ // The value before unquoting (if any).
+ std::string raw_value() const { return std::string(value_begin_,
+ value_end_); }
+
private:
HttpUtil::ValuesIterator props_;
bool valid_;
diff --git a/net/net.gyp b/net/net.gyp
index 59a68b2..6c301a3 100644
--- a/net/net.gyp
+++ b/net/net.gyp
@@ -417,6 +417,8 @@
'http/http_cache.h',
'http/http_cache_transaction.cc',
'http/http_cache_transaction.h',
+ 'http/http_content_disposition.cc',
+ 'http/http_content_disposition.h',
'http/http_chunked_decoder.cc',
'http/http_chunked_decoder.h',
'http/http_mac_signature.cc',
@@ -1101,6 +1103,7 @@
'http/http_byte_range_unittest.cc',
'http/http_cache_unittest.cc',
'http/http_chunked_decoder_unittest.cc',
+ 'http/http_content_disposition_unittest.cc',
'http/http_mac_signature_unittest.cc',
'http/http_network_layer_unittest.cc',
'http/http_network_transaction_unittest.cc',