diff options
author | asanka@chromium.org <asanka@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-12-17 00:16:54 +0000 |
---|---|---|
committer | asanka@chromium.org <asanka@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-12-17 00:16:54 +0000 |
commit | a7206e77788f434a825828b804bf6446d797f8a8 (patch) | |
tree | f0ac52f8e41cbca071cdac1ed70bf0a8c38dc33b /net | |
parent | b3dbcb5e2445baec0ceca5e57de9bf07621679ab (diff) | |
download | chromium_src-a7206e77788f434a825828b804bf6446d797f8a8.zip chromium_src-a7206e77788f434a825828b804bf6446d797f8a8.tar.gz chromium_src-a7206e77788f434a825828b804bf6446d797f8a8.tar.bz2 |
Add UMA for measuring Content-Dispostion header use and abuse.
BUG=162815
Review URL: https://chromiumcodereview.appspot.com/11478034
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@173403 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/http/http_content_disposition.cc | 68 | ||||
-rw-r--r-- | net/http/http_content_disposition.h | 35 | ||||
-rw-r--r-- | net/http/http_content_disposition_unittest.cc | 75 |
3 files changed, 158 insertions, 20 deletions
diff --git a/net/http/http_content_disposition.cc b/net/http/http_content_disposition.cc index 0726e93..35ace84 100644 --- a/net/http/http_content_disposition.cc +++ b/net/http/http_content_disposition.cc @@ -95,7 +95,8 @@ bool DecodeBQEncoding(const std::string& part, bool DecodeWord(const std::string& encoded_word, const std::string& referrer_charset, bool* is_rfc2047, - std::string* output) { + std::string* output, + int* parse_result_flags) { *is_rfc2047 = false; output->clear(); if (encoded_word.empty()) @@ -117,6 +118,7 @@ bool DecodeWord(const std::string& encoded_word, } } + *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; return true; } @@ -125,7 +127,7 @@ bool DecodeWord(const std::string& encoded_word, // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. // We don't care about the length restriction (72 bytes) because // many web servers generate encoded words longer than the limit. - std::string tmp; + std::string decoded_word; *is_rfc2047 = true; int part_index = 0; std::string charset; @@ -158,7 +160,7 @@ bool DecodeWord(const std::string& encoded_word, ++part_index; break; case 3: - *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp); + *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word); if (!*is_rfc2047) { // Last minute failure. Invalid B/Q encoding. Rather than // passing it through, return now. @@ -186,7 +188,9 @@ bool DecodeWord(const std::string& encoded_word, if (*is_rfc2047) { if (*(encoded_word.end() - 1) == '=') { - output->swap(tmp); + output->swap(decoded_word); + *parse_result_flags |= + net::HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS; return true; } // encoded_word ending prematurelly with '?' or extra '?' @@ -199,9 +203,13 @@ bool DecodeWord(const std::string& encoded_word, // web browser. // What IE6/7 does: %-escaped UTF-8. - tmp = net::UnescapeURLComponent(encoded_word, net::UnescapeRule::SPACES); - if (IsStringUTF8(tmp)) { - output->swap(tmp); + decoded_word = net::UnescapeURLComponent(encoded_word, + net::UnescapeRule::SPACES); + if (decoded_word != encoded_word) + *parse_result_flags |= + net::HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS; + if (IsStringUTF8(decoded_word)) { + output->swap(decoded_word); return true; // We can try either the OS default charset or 'origin charset' here, // As far as I can tell, IE does not support it. However, I've seen @@ -221,19 +229,21 @@ bool DecodeWord(const std::string& encoded_word, // strings. Non-ASCII strings are interpreted based on |referrer_charset|. bool DecodeFilenameValue(const std::string& input, const std::string& referrer_charset, - std::string* output) { - std::string tmp; + std::string* output, + int* parse_result_flags) { + int current_parse_result_flags = 0; + std::string decoded_value; + bool is_previous_token_rfc2047 = true; + // Tokenize with whitespace characters. StringTokenizer t(input, " \t\n\r"); t.set_options(StringTokenizer::RETURN_DELIMS); - bool is_previous_token_rfc2047 = true; while (t.GetNext()) { if (t.token_is_delim()) { // If the previous non-delimeter token is not RFC2047-encoded, // put in a space in its place. Otheriwse, skip over it. - if (!is_previous_token_rfc2047) { - tmp.push_back(' '); - } + if (!is_previous_token_rfc2047) + decoded_value.push_back(' '); continue; } // We don't support a single multibyte character split into @@ -243,11 +253,13 @@ bool DecodeFilenameValue(const std::string& input, // it, either. std::string decoded; if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, - &decoded)) + &decoded, ¤t_parse_result_flags)) return false; - tmp.append(decoded); + decoded_value.append(decoded); } - output->swap(tmp); + output->swap(decoded_value); + if (parse_result_flags && !output->empty()) + *parse_result_flags |= current_parse_result_flags; return true; } @@ -339,7 +351,8 @@ namespace net { HttpContentDisposition::HttpContentDisposition( const std::string& header, const std::string& referrer_charset) - : type_(INLINE) { + : type_(INLINE), + parse_result_flags_(INVALID) { Parse(header, referrer_charset); } @@ -361,10 +374,18 @@ std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( if (!HttpUtil::IsToken(type_begin, type_end)) return begin; + parse_result_flags_ |= HAS_DISPOSITION_TYPE; + DCHECK(std::find(type_begin, type_end, '=') == type_end); - if (!LowerCaseEqualsASCII(type_begin, type_end, "inline")) + if (LowerCaseEqualsASCII(type_begin, type_end, "inline")) { + type_ = INLINE; + } else if (LowerCaseEqualsASCII(type_begin, type_end, "attachment")) { + type_ = ATTACHMENT; + } else { + parse_result_flags_ |= HAS_UNKNOWN_DISPOSITION_TYPE; type_ = ATTACHMENT; + } return delimiter; } @@ -404,15 +425,22 @@ void HttpContentDisposition::Parse(const std::string& header, if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), iter.name_end(), "filename")) { - DecodeFilenameValue(iter.value(), referrer_charset, &filename); + DecodeFilenameValue(iter.value(), referrer_charset, &filename, + &parse_result_flags_); + if (!filename.empty()) + parse_result_flags_ |= HAS_FILENAME; } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), iter.name_end(), "name")) { - DecodeFilenameValue(iter.value(), referrer_charset, &name); + DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL); + if (!name.empty()) + parse_result_flags_ |= HAS_NAME; } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), iter.name_end(), "filename*")) { DecodeExtValue(iter.raw_value(), &ext_filename); + if (!ext_filename.empty()) + parse_result_flags_ |= HAS_EXT_FILENAME; } } diff --git a/net/http/http_content_disposition.h b/net/http/http_content_disposition.h index f3573a9..2b4ca70 100644 --- a/net/http/http_content_disposition.h +++ b/net/http/http_content_disposition.h @@ -19,6 +19,37 @@ class NET_EXPORT HttpContentDisposition { ATTACHMENT, }; + // Properties of the Content-Disposition header. Used for UMA. + enum ParseResultFlags { + INVALID = 0, + + // A valid disposition-type is present. + HAS_DISPOSITION_TYPE = 1 << 0, + + // The disposition-type is not 'inline' or 'attachment'. + HAS_UNKNOWN_DISPOSITION_TYPE = 1 << 1, + + // Has a valid non-empty 'name' attribute. + HAS_NAME = 1 << 2, + + // Has a valid non-empty 'filename' attribute. + HAS_FILENAME = 1 << 3, + + // Has a valid non-empty 'filename*' attribute. + HAS_EXT_FILENAME = 1 << 4, + + // The following fields are properties of the 'filename' attribute: + + // Quoted-string contains non-ASCII characters. + HAS_NON_ASCII_STRINGS = 1 << 5, + + // Quoted-string contains percent-encoding. + HAS_PERCENT_ENCODED_STRINGS = 1 << 6, + + // Quoted-string contains RFC 2047 encoded words. + HAS_RFC2047_ENCODED_STRINGS = 1 << 7 + }; + HttpContentDisposition(const std::string& header, const std::string& referrer_charset); ~HttpContentDisposition(); @@ -28,6 +59,9 @@ class NET_EXPORT HttpContentDisposition { Type type() const { return type_; } const std::string& filename() const { return filename_; } + // A combination of ParseResultFlags values. + int parse_result_flags() const { return parse_result_flags_; } + private: void Parse(const std::string& header, const std::string& referrer_charset); std::string::const_iterator ConsumeDispositionType( @@ -35,6 +69,7 @@ class NET_EXPORT HttpContentDisposition { Type type_; std::string filename_; + int parse_result_flags_; DISALLOW_COPY_AND_ASSIGN(HttpContentDisposition); }; diff --git a/net/http/http_content_disposition_unittest.cc b/net/http/http_content_disposition_unittest.cc index 240c699..66c1a7f 100644 --- a/net/http/http_content_disposition_unittest.cc +++ b/net/http/http_content_disposition_unittest.cc @@ -512,4 +512,79 @@ TEST(HttpContentDispositionTest, tc2231) { } } +TEST(HttpContentDispositionTest, ParseResult) { + const struct ParseResultTestCase { + const char* header; + int expected_flags; + } kTestCases[] = { + // Basic feature tests + { "", HttpContentDisposition::INVALID }, + { "example=x", HttpContentDisposition::INVALID }, + { "attachment; filename=", HttpContentDisposition::HAS_DISPOSITION_TYPE }, + { "attachment; name=", HttpContentDisposition::HAS_DISPOSITION_TYPE }, + { "attachment; filename*=", HttpContentDisposition::HAS_DISPOSITION_TYPE }, + { "attachment; filename==?utf-8?Q?\?=", + HttpContentDisposition::HAS_DISPOSITION_TYPE }, + { "filename=x", HttpContentDisposition::HAS_FILENAME }, + { "example; filename=x", + HttpContentDisposition::HAS_DISPOSITION_TYPE | + HttpContentDisposition::HAS_UNKNOWN_DISPOSITION_TYPE | + HttpContentDisposition::HAS_FILENAME}, + { "attachment; filename=x", + HttpContentDisposition::HAS_DISPOSITION_TYPE | + HttpContentDisposition::HAS_FILENAME }, + { "attachment; filename=x; name=y", + HttpContentDisposition::HAS_DISPOSITION_TYPE | + HttpContentDisposition::HAS_FILENAME | + HttpContentDisposition::HAS_NAME }, + { "attachment; name=y; filename*=utf-8''foo; name=x", + HttpContentDisposition::HAS_DISPOSITION_TYPE | + HttpContentDisposition::HAS_EXT_FILENAME | + HttpContentDisposition::HAS_NAME }, + + // Feature tests for 'filename' attribute. + { "filename=foo\xcc\x88", + HttpContentDisposition::HAS_FILENAME | + HttpContentDisposition::HAS_NON_ASCII_STRINGS }, + { "filename=foo%cc%88", + HttpContentDisposition::HAS_FILENAME | + HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS }, + { "filename==?utf-8?Q?foo?=", + HttpContentDisposition::HAS_FILENAME | + HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS }, + { "filename=\"=?utf-8?Q?foo?=\"", + HttpContentDisposition::HAS_FILENAME | + HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS }, + { "filename==?utf-8?Q?foo?", HttpContentDisposition::INVALID }, + { "name=foo\xcc\x88", + HttpContentDisposition::HAS_NAME }, + + // Shouldn't set |has_non_ascii_strings| based on 'name' attribute. + { "filename=x; name=foo\xcc\x88", + HttpContentDisposition::HAS_FILENAME | + HttpContentDisposition::HAS_NAME }, + { "filename=foo\xcc\x88 foo%cc%88 =?utf-8?Q?foo?=", + HttpContentDisposition::HAS_FILENAME | + HttpContentDisposition::HAS_NON_ASCII_STRINGS | + HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS | + HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS }, + + // If 'filename' attribute is invalid, should set any flags based on it. + { "filename=foo\xcc\x88 foo%cc%88 =?utf-8?Q?foo?", + HttpContentDisposition::INVALID }, + { "filename=foo\xcc\x88 foo%cc%88 =?utf-8?Q?foo?; name=x", + HttpContentDisposition::HAS_NAME }, + }; + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { + const ParseResultTestCase& test_case = kTestCases[i]; + HttpContentDisposition content_disposition(test_case.header, "utf-8"); + int result = content_disposition.parse_result_flags(); + + SCOPED_TRACE(testing::Message() << "Test case " << i + << " with header " << test_case.header); + EXPECT_EQ(test_case.expected_flags, result); + } +} + } // namespace net |