Add UMA for measuring Content-Dispostion header use and abuse.

BUG=162815 Review URL: https://chromiumcodereview.appspot.com/11478034 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@173403 0039d316-1c4b-4281-b951-d872f2087c98
author: asanka@chromium.org <asanka@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-12-17 00:16:54 +0000
committer: asanka@chromium.org <asanka@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-12-17 00:16:54 +0000
commit: a7206e77788f434a825828b804bf6446d797f8a8 (patch)
tree: f0ac52f8e41cbca071cdac1ed70bf0a8c38dc33b /net
parent: b3dbcb5e2445baec0ceca5e57de9bf07621679ab (diff)
download: chromium_src-a7206e77788f434a825828b804bf6446d797f8a8.zip
chromium_src-a7206e77788f434a825828b804bf6446d797f8a8.tar.gz
chromium_src-a7206e77788f434a825828b804bf6446d797f8a8.tar.bz2
3 files changed, 158 insertions, 20 deletions
diff --git a/net/http/http_content_disposition.cc b/net/http/http_content_disposition.cc
index 0726e93..35ace84 100644
--- a/net/http/http_content_disposition.cc
+++ b/net/http/http_content_disposition.cc
@@ -95,7 +95,8 @@ bool DecodeBQEncoding(const std::string& part,
 bool DecodeWord(const std::string& encoded_word,
                 const std::string& referrer_charset,
                 bool* is_rfc2047,
-                std::string* output) {
+                std::string* output,
+                int* parse_result_flags) {
   *is_rfc2047 = false;
   output->clear();
   if (encoded_word.empty())
@@ -117,6 +118,7 @@ bool DecodeWord(const std::string& encoded_word,
       }
     }
 
+    *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS;
     return true;
   }
 
@@ -125,7 +127,7 @@ bool DecodeWord(const std::string& encoded_word,
   // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.
   // We don't care about the length restriction (72 bytes) because
   // many web servers generate encoded words longer than the limit.
-  std::string tmp;
+  std::string decoded_word;
   *is_rfc2047 = true;
   int part_index = 0;
   std::string charset;
@@ -158,7 +160,7 @@ bool DecodeWord(const std::string& encoded_word,
         ++part_index;
         break;
       case 3:
-        *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp);
+        *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word);
         if (!*is_rfc2047) {
           // Last minute failure. Invalid B/Q encoding. Rather than
           // passing it through, return now.
@@ -186,7 +188,9 @@ bool DecodeWord(const std::string& encoded_word,
 
   if (*is_rfc2047) {
     if (*(encoded_word.end() - 1) == '=') {
-      output->swap(tmp);
+      output->swap(decoded_word);
+      *parse_result_flags |=
+          net::HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS;
       return true;
     }
     // encoded_word ending prematurelly with '?' or extra '?'
@@ -199,9 +203,13 @@ bool DecodeWord(const std::string& encoded_word,
   // web browser.
 
   // What IE6/7 does: %-escaped UTF-8.
-  tmp = net::UnescapeURLComponent(encoded_word, net::UnescapeRule::SPACES);
-  if (IsStringUTF8(tmp)) {
-    output->swap(tmp);
+  decoded_word = net::UnescapeURLComponent(encoded_word,
+                                           net::UnescapeRule::SPACES);
+  if (decoded_word != encoded_word)
+    *parse_result_flags |=
+        net::HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS;
+  if (IsStringUTF8(decoded_word)) {
+    output->swap(decoded_word);
     return true;
     // We can try either the OS default charset or 'origin charset' here,
     // As far as I can tell, IE does not support it. However, I've seen
@@ -221,19 +229,21 @@ bool DecodeWord(const std::string& encoded_word,
 // strings. Non-ASCII strings are interpreted based on |referrer_charset|.
 bool DecodeFilenameValue(const std::string& input,
                          const std::string& referrer_charset,
-                         std::string* output) {
-  std::string tmp;
+                         std::string* output,
+                         int* parse_result_flags) {
+  int current_parse_result_flags = 0;
+  std::string decoded_value;
+  bool is_previous_token_rfc2047 = true;
+
   // Tokenize with whitespace characters.
   StringTokenizer t(input, " \t\n\r");
   t.set_options(StringTokenizer::RETURN_DELIMS);
-  bool is_previous_token_rfc2047 = true;
   while (t.GetNext()) {
     if (t.token_is_delim()) {
       // If the previous non-delimeter token is not RFC2047-encoded,
       // put in a space in its place. Otheriwse, skip over it.
-      if (!is_previous_token_rfc2047) {
-        tmp.push_back(' ');
-      }
+      if (!is_previous_token_rfc2047)
+        decoded_value.push_back(' ');
       continue;
     }
     // We don't support a single multibyte character split into
@@ -243,11 +253,13 @@ bool DecodeFilenameValue(const std::string& input,
     // it, either.
     std::string decoded;
     if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,
-                    &decoded))
+                    &decoded, &current_parse_result_flags))
       return false;
-    tmp.append(decoded);
+    decoded_value.append(decoded);
   }
-  output->swap(tmp);
+  output->swap(decoded_value);
+  if (parse_result_flags && !output->empty())
+    *parse_result_flags |= current_parse_result_flags;
   return true;
 }
 
@@ -339,7 +351,8 @@ namespace net {
 
 HttpContentDisposition::HttpContentDisposition(
     const std::string& header, const std::string& referrer_charset)
-  : type_(INLINE) {
+  : type_(INLINE),
+    parse_result_flags_(INVALID) {
   Parse(header, referrer_charset);
 }
 
@@ -361,10 +374,18 @@ std::string::const_iterator HttpContentDisposition::ConsumeDispositionType(
   if (!HttpUtil::IsToken(type_begin, type_end))
     return begin;
 
+  parse_result_flags_ |= HAS_DISPOSITION_TYPE;
+
   DCHECK(std::find(type_begin, type_end, '=') == type_end);
 
-  if (!LowerCaseEqualsASCII(type_begin, type_end, "inline"))
+  if (LowerCaseEqualsASCII(type_begin, type_end, "inline")) {
+    type_ = INLINE;
+  } else if (LowerCaseEqualsASCII(type_begin, type_end, "attachment")) {
+    type_ = ATTACHMENT;
+  } else {
+    parse_result_flags_ |= HAS_UNKNOWN_DISPOSITION_TYPE;
     type_ = ATTACHMENT;
+  }
   return delimiter;
 }
 
@@ -404,15 +425,22 @@ void HttpContentDisposition::Parse(const std::string& header,
     if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(),
                                                  iter.name_end(),
                                                  "filename")) {
-      DecodeFilenameValue(iter.value(), referrer_charset, &filename);
+      DecodeFilenameValue(iter.value(), referrer_charset, &filename,
+                          &parse_result_flags_);
+      if (!filename.empty())
+        parse_result_flags_ |= HAS_FILENAME;
     } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(),
                                                     iter.name_end(),
                                                     "name")) {
-      DecodeFilenameValue(iter.value(), referrer_charset, &name);
+      DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL);
+      if (!name.empty())
+        parse_result_flags_ |= HAS_NAME;
     } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(),
                                                             iter.name_end(),
                                                             "filename*")) {
       DecodeExtValue(iter.raw_value(), &ext_filename);
+      if (!ext_filename.empty())
+        parse_result_flags_ |= HAS_EXT_FILENAME;
     }
   }
 
diff --git a/net/http/http_content_disposition.h b/net/http/http_content_disposition.h
index f3573a9..2b4ca70 100644
--- a/net/http/http_content_disposition.h
+++ b/net/http/http_content_disposition.h
@@ -19,6 +19,37 @@ class NET_EXPORT HttpContentDisposition {
     ATTACHMENT,
   };
 
+  // Properties of the Content-Disposition header. Used for UMA.
+  enum ParseResultFlags {
+    INVALID                      = 0,
+
+    // A valid disposition-type is present.
+    HAS_DISPOSITION_TYPE         = 1 << 0,
+
+    // The disposition-type is not 'inline' or 'attachment'.
+    HAS_UNKNOWN_DISPOSITION_TYPE = 1 << 1,
+
+    // Has a valid non-empty 'name' attribute.
+    HAS_NAME                     = 1 << 2,
+
+    // Has a valid non-empty 'filename' attribute.
+    HAS_FILENAME                 = 1 << 3,
+
+    // Has a valid non-empty 'filename*' attribute.
+    HAS_EXT_FILENAME             = 1 << 4,
+
+    // The following fields are properties of the 'filename' attribute:
+
+    // Quoted-string contains non-ASCII characters.
+    HAS_NON_ASCII_STRINGS        = 1 << 5,
+
+    // Quoted-string contains percent-encoding.
+    HAS_PERCENT_ENCODED_STRINGS  = 1 << 6,
+
+    // Quoted-string contains RFC 2047 encoded words.
+    HAS_RFC2047_ENCODED_STRINGS  = 1 << 7
+  };
+
   HttpContentDisposition(const std::string& header,
                          const std::string& referrer_charset);
   ~HttpContentDisposition();
@@ -28,6 +59,9 @@ class NET_EXPORT HttpContentDisposition {
   Type type() const { return type_; }
   const std::string& filename() const { return filename_; }
 
+  // A combination of ParseResultFlags values.
+  int parse_result_flags() const { return parse_result_flags_; }
+
  private:
   void Parse(const std::string& header, const std::string& referrer_charset);
   std::string::const_iterator ConsumeDispositionType(
@@ -35,6 +69,7 @@ class NET_EXPORT HttpContentDisposition {
 
   Type type_;
   std::string filename_;
+  int parse_result_flags_;
 
   DISALLOW_COPY_AND_ASSIGN(HttpContentDisposition);
 };
diff --git a/net/http/http_content_disposition_unittest.cc b/net/http/http_content_disposition_unittest.cc
index 240c699..66c1a7f 100644
--- a/net/http/http_content_disposition_unittest.cc
+++ b/net/http/http_content_disposition_unittest.cc
@@ -512,4 +512,79 @@ TEST(HttpContentDispositionTest, tc2231) {
   }
 }
 
+TEST(HttpContentDispositionTest, ParseResult) {
+  const struct ParseResultTestCase {
+    const char* header;
+    int expected_flags;
+  } kTestCases[] = {
+    // Basic feature tests
+    { "", HttpContentDisposition::INVALID },
+    { "example=x", HttpContentDisposition::INVALID },
+    { "attachment; filename=", HttpContentDisposition::HAS_DISPOSITION_TYPE },
+    { "attachment; name=", HttpContentDisposition::HAS_DISPOSITION_TYPE },
+    { "attachment; filename*=", HttpContentDisposition::HAS_DISPOSITION_TYPE },
+    { "attachment; filename==?utf-8?Q?\?=",
+      HttpContentDisposition::HAS_DISPOSITION_TYPE },
+    { "filename=x", HttpContentDisposition::HAS_FILENAME },
+    { "example; filename=x",
+      HttpContentDisposition::HAS_DISPOSITION_TYPE |
+      HttpContentDisposition::HAS_UNKNOWN_DISPOSITION_TYPE |
+      HttpContentDisposition::HAS_FILENAME},
+    { "attachment; filename=x",
+      HttpContentDisposition::HAS_DISPOSITION_TYPE |
+      HttpContentDisposition::HAS_FILENAME },
+    { "attachment; filename=x; name=y",
+      HttpContentDisposition::HAS_DISPOSITION_TYPE |
+      HttpContentDisposition::HAS_FILENAME |
+      HttpContentDisposition::HAS_NAME },
+    { "attachment; name=y; filename*=utf-8''foo; name=x",
+      HttpContentDisposition::HAS_DISPOSITION_TYPE |
+      HttpContentDisposition::HAS_EXT_FILENAME |
+      HttpContentDisposition::HAS_NAME },
+
+    // Feature tests for 'filename' attribute.
+    { "filename=foo\xcc\x88",
+      HttpContentDisposition::HAS_FILENAME |
+      HttpContentDisposition::HAS_NON_ASCII_STRINGS },
+    { "filename=foo%cc%88",
+      HttpContentDisposition::HAS_FILENAME |
+      HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS },
+    { "filename==?utf-8?Q?foo?=",
+      HttpContentDisposition::HAS_FILENAME |
+      HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS },
+    { "filename=\"=?utf-8?Q?foo?=\"",
+      HttpContentDisposition::HAS_FILENAME |
+      HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS },
+    { "filename==?utf-8?Q?foo?", HttpContentDisposition::INVALID },
+    { "name=foo\xcc\x88",
+      HttpContentDisposition::HAS_NAME },
+
+    // Shouldn't set |has_non_ascii_strings| based on 'name' attribute.
+    { "filename=x; name=foo\xcc\x88",
+      HttpContentDisposition::HAS_FILENAME |
+      HttpContentDisposition::HAS_NAME },
+    { "filename=foo\xcc\x88 foo%cc%88 =?utf-8?Q?foo?=",
+      HttpContentDisposition::HAS_FILENAME |
+      HttpContentDisposition::HAS_NON_ASCII_STRINGS |
+      HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS |
+      HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS },
+
+    // If 'filename' attribute is invalid, should set any flags based on it.
+    { "filename=foo\xcc\x88 foo%cc%88 =?utf-8?Q?foo?",
+      HttpContentDisposition::INVALID },
+    { "filename=foo\xcc\x88 foo%cc%88 =?utf-8?Q?foo?; name=x",
+      HttpContentDisposition::HAS_NAME },
+  };
+
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
+    const ParseResultTestCase& test_case = kTestCases[i];
+    HttpContentDisposition content_disposition(test_case.header, "utf-8");
+    int result = content_disposition.parse_result_flags();
+
+    SCOPED_TRACE(testing::Message() << "Test case " << i
+                                    << " with header " << test_case.header);
+    EXPECT_EQ(test_case.expected_flags, result);
+  }
+}
+
 }  // namespace net
author	asanka@chromium.org <asanka@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-12-17 00:16:54 +0000
committer	asanka@chromium.org <asanka@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-12-17 00:16:54 +0000
commit	a7206e77788f434a825828b804bf6446d797f8a8 (patch)
tree	f0ac52f8e41cbca071cdac1ed70bf0a8c38dc33b /net
parent	b3dbcb5e2445baec0ceca5e57de9bf07621679ab (diff)
download	chromium_src-a7206e77788f434a825828b804bf6446d797f8a8.zip chromium_src-a7206e77788f434a825828b804bf6446d797f8a8.tar.gz chromium_src-a7206e77788f434a825828b804bf6446d797f8a8.tar.bz2