Delete net::GetHeaderParamValue

This function is a trap. It's a quick-and-dirty parser that has many nutty quirks. There's only one caller left, and that callers should really be using a Content-Type-specific parser anyway. Review URL: http://codereview.chromium.org/9296005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@119790 0039d316-1c4b-4281-b951-d872f2087c98
author: abarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-01-31 01:00:49 +0000
committer: abarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-01-31 01:00:49 +0000
commit: b1fd7195a16a473e8446b24701579a3347a31b3b (patch)
tree: c1f28b37482d154b78c5f87f6fc0790a0bf3125a /net/http
parent: 5ae7f230fb22a9cb8f7f23432b251bbbfbc0bb73 (diff)
download: chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.zip
chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.tar.gz
chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.tar.bz2
4 files changed, 133 insertions, 38 deletions
diff --git a/net/http/http_response_headers.cc b/net/http/http_response_headers.cc
index 98bd052..61fed43 100644
--- a/net/http/http_response_headers.cc
+++ b/net/http/http_response_headers.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -844,7 +844,7 @@ void HttpResponseHeaders::GetMimeTypeAndCharset(std::string* mime_type,
 
   void* iter = NULL;
   while (EnumerateHeader(&iter, name, &value))
-    HttpUtil::ParseContentType(value, mime_type, charset, &had_charset);
+    HttpUtil::ParseContentType(value, mime_type, charset, &had_charset, NULL);
 }
 
 bool HttpResponseHeaders::GetMimeType(std::string* mime_type) const {
diff --git a/net/http/http_util.cc b/net/http/http_util.cc
index 1642d4f..db37dfb 100644
--- a/net/http/http_util.cc
+++ b/net/http/http_util.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -90,8 +90,12 @@ size_t HttpUtil::FindDelimiter(const string& line, size_t search_start,
 
 // static
 void HttpUtil::ParseContentType(const string& content_type_str,
-                                string* mime_type, string* charset,
-                                bool *had_charset) {
+                                string* mime_type,
+                                string* charset,
+                                bool* had_charset,
+                                string* boundary) {
+  const string::const_iterator begin = content_type_str.begin();
+
   // Trim leading and trailing whitespace from type.  We include '(' in
   // the trailing trim set to catch media-type comments, which are not at all
   // standard, but may occur in rare cases.
@@ -103,34 +107,40 @@ void HttpUtil::ParseContentType(const string& content_type_str,
 
   size_t charset_val = 0;
   size_t charset_end = 0;
+  bool type_has_charset = false;
 
   // Iterate over parameters
-  bool type_has_charset = false;
   size_t param_start = content_type_str.find_first_of(';', type_end);
   if (param_start != string::npos) {
-    // We have parameters.  Iterate over them.
-    size_t cur_param_start = param_start + 1;
-    do {
-      size_t cur_param_end =
-          FindDelimiter(content_type_str, cur_param_start, ';');
-
-      size_t param_name_start = content_type_str.find_first_not_of(
-          HTTP_LWS, cur_param_start);
-      param_name_start = std::min(param_name_start, cur_param_end);
-
-      static const char charset_str[] = "charset=";
-      size_t charset_end_offset = std::min(
-          param_name_start + sizeof(charset_str) - 1, cur_param_end);
-      if (LowerCaseEqualsASCII(
-              content_type_str.begin() + param_name_start,
-              content_type_str.begin() + charset_end_offset, charset_str)) {
-        charset_val = param_name_start + sizeof(charset_str) - 1;
-        charset_end = cur_param_end;
+    StringTokenizer tokenizer(begin + param_start, content_type_str.end(),
+                              ";");
+    tokenizer.set_quote_chars("\"");
+    while (tokenizer.GetNext()) {
+      string::const_iterator equals_sign =
+          std::find(tokenizer.token_begin(), tokenizer.token_end(), '=');
+      if (equals_sign == tokenizer.token_end())
+        continue;
+
+      string::const_iterator param_name_begin = tokenizer.token_begin();
+      string::const_iterator param_name_end = equals_sign;
+      TrimLWS(&param_name_begin, &param_name_end);
+
+      string::const_iterator param_value_begin = equals_sign + 1;
+      string::const_iterator param_value_end = tokenizer.token_end();
+      DCHECK(param_value_begin <= tokenizer.token_end());
+      TrimLWS(&param_value_begin, &param_value_end);
+
+      if (LowerCaseEqualsASCII(param_name_begin, param_name_end, "charset")) {
+        // TODO(abarth): Refactor this function to consistently use iterators.
+        charset_val = param_value_begin - begin;
+        charset_end = param_value_end - begin;
         type_has_charset = true;
+      } else if (LowerCaseEqualsASCII(param_name_begin, param_name_end,
+                                      "boundary")) {
+        if (boundary)
+          boundary->assign(param_value_begin, param_value_end);
       }
-
-      cur_param_start = cur_param_end + 1;
-    } while (cur_param_start < content_type_str.length());
+    }
   }
 
   if (type_has_charset) {
@@ -162,19 +172,16 @@ void HttpUtil::ParseContentType(const string& content_type_str,
       content_type_str != "*/*" &&
       content_type_str.find_first_of('/') != string::npos) {
     // Common case here is that mime_type is empty
-    bool eq = !mime_type->empty() &&
-              LowerCaseEqualsASCII(content_type_str.begin() + type_val,
-                                   content_type_str.begin() + type_end,
-                                   mime_type->data());
+    bool eq = !mime_type->empty() && LowerCaseEqualsASCII(begin + type_val,
+                                                          begin + type_end,
+                                                          mime_type->data());
     if (!eq) {
-      mime_type->assign(content_type_str.begin() + type_val,
-                        content_type_str.begin() + type_end);
+      mime_type->assign(begin + type_val, begin + type_end);
       StringToLowerASCII(mime_type);
     }
     if ((!eq && *had_charset) || type_has_charset) {
       *had_charset = true;
-      charset->assign(content_type_str.begin() + charset_val,
-                      content_type_str.begin() + charset_end);
+      charset->assign(begin + charset_val, begin + charset_end);
       StringToLowerASCII(charset);
     }
   }
diff --git a/net/http/http_util.h b/net/http/http_util.h
index a09377e..41f2713 100644
--- a/net/http/http_util.h
+++ b/net/http/http_util.h
@@ -43,11 +43,14 @@ class NET_EXPORT HttpUtil {
   // Parses the value of a Content-Type header.  The resulting mime_type and
   // charset values are normalized to lowercase.  The mime_type and charset
   // output values are only modified if the content_type_str contains a mime
-  // type and charset value, respectively.
+  // type and charset value, respectively.  The boundary output value is
+  // optional and will be assigned the (quoted) value of the boundary
+  // paramter, if any.
   static void ParseContentType(const std::string& content_type_str,
                                std::string* mime_type,
                                std::string* charset,
-                               bool* had_charset);
+                               bool* had_charset,
+                               std::string* boundary);
 
   // Scans the headers and look for the first "Range" header in |headers|,
   // if "Range" exists and the first one of it is well formatted then returns
diff --git a/net/http/http_util_unittest.cc b/net/http/http_util_unittest.cc
index 7da4fc9..8293c80 100644
--- a/net/http/http_util_unittest.cc
+++ b/net/http/http_util_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -632,6 +632,91 @@ TEST(HttpUtilTest, GenerateAcceptCharsetHeader) {
             HttpUtil::GenerateAcceptCharsetHeader("EUC-JP"));
 }
 
+// HttpResponseHeadersTest.GetMimeType also tests ParseContentType.
+TEST(HttpUtilTest, ParseContentType) {
+  const struct {
+    const char* content_type;
+    const char* expected_mime_type;
+    const char* expected_charset;
+    const bool expected_had_charset;
+    const char* expected_boundary;
+  } tests[] = {
+    { "text/html; charset=utf-8",
+      "text/html",
+      "utf-8",
+      true,
+      ""
+    },
+    { "text/html; charset =utf-8",
+      "text/html",
+      "utf-8",
+      true,
+      ""
+    },
+    { "text/html; charset= utf-8",
+      "text/html",
+      "utf-8",
+      true,
+      ""
+    },
+    { "text/html; charset=utf-8 ",
+      "text/html",
+      "utf-8",
+      true,
+      ""
+    },
+    { "text/html; boundary=\"WebKit-ada-df-dsf-adsfadsfs\"",
+      "text/html",
+      "",
+      false,
+      "\"WebKit-ada-df-dsf-adsfadsfs\""
+    },
+    { "text/html; boundary =\"WebKit-ada-df-dsf-adsfadsfs\"",
+      "text/html",
+      "",
+      false,
+      "\"WebKit-ada-df-dsf-adsfadsfs\""
+    },
+    { "text/html; boundary= \"WebKit-ada-df-dsf-adsfadsfs\"",
+      "text/html",
+      "",
+      false,
+      "\"WebKit-ada-df-dsf-adsfadsfs\""
+    },
+    { "text/html; boundary= \"WebKit-ada-df-dsf-adsfadsfs\"   ",
+      "text/html",
+      "",
+      false,
+      "\"WebKit-ada-df-dsf-adsfadsfs\""
+    },
+    { "text/html; boundary=\"WebKit-ada-df-dsf-adsfadsfs  \"",
+      "text/html",
+      "",
+      false,
+      "\"WebKit-ada-df-dsf-adsfadsfs  \""
+    },
+    { "text/html; boundary=WebKit-ada-df-dsf-adsfadsfs",
+      "text/html",
+      "",
+      false,
+      "WebKit-ada-df-dsf-adsfadsfs"
+    },
+    // TODO(abarth): Add more interesting test cases.
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
+    std::string mime_type;
+    std::string charset;
+    bool had_charset = false;
+    std::string boundary;
+    net::HttpUtil::ParseContentType(tests[i].content_type, &mime_type,
+                                    &charset, &had_charset, &boundary);
+    EXPECT_EQ(tests[i].expected_mime_type, mime_type) << "i=" << i;
+    EXPECT_EQ(tests[i].expected_charset, charset) << "i=" << i;
+    EXPECT_EQ(tests[i].expected_had_charset, had_charset) << "i=" << i;
+    EXPECT_EQ(tests[i].expected_boundary, boundary) << "i=" << i;
+  }
+}
+
 TEST(HttpUtilTest, ParseRanges) {
   const struct {
     const char* headers;
author	abarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-01-31 01:00:49 +0000
committer	abarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-01-31 01:00:49 +0000
commit	b1fd7195a16a473e8446b24701579a3347a31b3b (patch)
tree	c1f28b37482d154b78c5f87f6fc0790a0bf3125a /net/http
parent	5ae7f230fb22a9cb8f7f23432b251bbbfbc0bb73 (diff)
download	chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.zip chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.tar.gz chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.tar.bz2