Delete net::GetHeaderParamValue

This function is a trap. It's a quick-and-dirty parser that has many nutty quirks. There's only one caller left, and that callers should really be using a Content-Type-specific parser anyway. Review URL: http://codereview.chromium.org/9296005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@119790 0039d316-1c4b-4281-b951-d872f2087c98
author: abarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-01-31 01:00:49 +0000
committer: abarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-01-31 01:00:49 +0000
commit: b1fd7195a16a473e8446b24701579a3347a31b3b (patch)
tree: c1f28b37482d154b78c5f87f6fc0790a0bf3125a /net
parent: 5ae7f230fb22a9cb8f7f23432b251bbbfbc0bb73 (diff)
download: chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.zip
chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.tar.gz
chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.tar.bz2
7 files changed, 133 insertions, 162 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index d2fd1f5..5ecf740 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -1239,50 +1239,6 @@ bool DecodeExtValue(const std::string& param_value, std::string* decoded) {
   return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded);
 }
 
-// TODO(mpcomplete): This is a quick and dirty implementation for now.  I'm
-// sure this doesn't properly handle all (most?) cases.
-std::string GetHeaderParamValue(const std::string& header,
-                                const std::string& param_name,
-                                QuoteRule::Type quote_rule) {
-  // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".
-  std::string::const_iterator param_begin =
-      std::search(header.begin(), header.end(), param_name.begin(),
-                  param_name.end(), base::CaseInsensitiveCompareASCII<char>());
-
-  if (param_begin == header.end())
-    return std::string();
-  param_begin += param_name.length();
-
-  std::string whitespace(" \t");
-  size_t equals_offset =
-      header.find_first_not_of(whitespace, param_begin - header.begin());
-  if (equals_offset == std::string::npos || header[equals_offset] != '=')
-    return std::string();
-
-  size_t param_value_offset =
-      header.find_first_not_of(whitespace, equals_offset + 1);
-  if (param_value_offset == std::string::npos)
-    return std::string();
-
-  param_begin = header.begin() + param_value_offset;
-  if (param_begin == header.end())
-    return std::string();
-
-  std::string::const_iterator param_end;
-  if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {
-    ++param_begin;  // skip past the quote.
-    param_end = std::find(param_begin, header.end(), '"');
-    // If the closing quote is missing, we will treat the rest of the
-    // string as the parameter.  We can't set |param_end| to the
-    // location of the separator (';'), since the separator is
-    // technically quoted. See: http://crbug.com/58840
-  } else {
-    param_end = std::find(param_begin + 1, header.end(), ';');
-  }
-
-  return std::string(param_begin, param_end);
-}
-
 string16 IDNToUnicode(const std::string& host,
                       const std::string& languages) {
   return IDNToUnicodeWithOffsets(host, languages, NULL);
diff --git a/net/base/net_util.h b/net/base/net_util.h
index fed20f0..757086a 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -48,18 +48,6 @@ namespace net {
 typedef uint32 FormatUrlType;
 typedef uint32 FormatUrlTypes;
 
-// Used by GetHeaderParamValue to determine how to handle quotes in the value.
-class QuoteRule {
- public:
-  enum Type {
-    KEEP_OUTER_QUOTES,
-    REMOVE_OUTER_QUOTES,
-  };
-
- private:
-  QuoteRule();
-};
-
 // Nothing is ommitted.
 NET_EXPORT extern const FormatUrlType kFormatUrlOmitNothing;
 
@@ -144,13 +132,6 @@ NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url);
 NET_EXPORT std::string GetSpecificHeader(const std::string& headers,
                                          const std::string& name);
 
-// Return the value of the HTTP response header field's parameter named
-// 'param_name'.  Returns the empty string if the parameter is not found or is
-// improperly formatted.
-NET_EXPORT std::string GetHeaderParamValue(const std::string& header,
-                                           const std::string& param_name,
-                                           QuoteRule::Type quote_rule);
-
 // TODO(abarth): Move these functions to http_content_disposition.cc.
 bool DecodeFilenameValue(const std::string& input,
                          const std::string& referrer_charset,
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index f34748b..0577989 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -748,67 +748,6 @@ TEST(NetUtilTest, GetSpecificHeader) {
   }
 }
 
-TEST(NetUtilTest, GetHeaderParamValue) {
-  const HeaderParamCase tests[] = {
-    {"Content-type", "charset", "utf-8"},
-    {"content-disposition", "filename", "download.pdf"},
-    {"Content-Type", "badparam", ""},
-    {"X-Malformed", "arg", "test\""},
-    {"X-Malformed2", "arg", ""},
-    {"X-Test", "arg1", "val1"},
-    {"X-Test", "arg2", "val2"},
-    {"Bad-Header", "badparam", ""},
-    {"Bad-Header", "", ""},
-    {"", "badparam", ""},
-    {"", "", ""},
-  };
-  // TODO(mpcomplete): add tests for other formats of headers.
-
-  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
-    std::string header_value =
-        GetSpecificHeader(google_headers, tests[i].header_name);
-    std::string result =
-        GetHeaderParamValue(header_value, tests[i].param_name,
-                            QuoteRule::REMOVE_OUTER_QUOTES);
-    EXPECT_EQ(result, tests[i].expected);
-  }
-
-  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
-    std::string header_value =
-      GetSpecificHeader(std::string(), tests[i].header_name);
-    std::string result =
-        GetHeaderParamValue(header_value, tests[i].param_name,
-                            QuoteRule::REMOVE_OUTER_QUOTES);
-    EXPECT_EQ(result, std::string());
-  }
-}
-
-TEST(NetUtilTest, GetHeaderParamValueQuotes) {
-  struct {
-    const char* header;
-    const char* expected_with_quotes;
-    const char* expected_without_quotes;
-  } tests[] = {
-    {"filename=foo", "foo", "foo"},
-    {"filename=\"foo\"", "\"foo\"", "foo"},
-    {"filename=foo\"", "foo\"", "foo\""},
-    {"filename=fo\"o", "fo\"o", "fo\"o"},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
-    std::string actual_with_quotes =
-        GetHeaderParamValue(tests[i].header, "filename",
-                            QuoteRule::KEEP_OUTER_QUOTES);
-    std::string actual_without_quotes =
-        GetHeaderParamValue(tests[i].header, "filename",
-                            QuoteRule::REMOVE_OUTER_QUOTES);
-    EXPECT_EQ(tests[i].expected_with_quotes, actual_with_quotes)
-        << "Failed while processing: " << tests[i].header;
-    EXPECT_EQ(tests[i].expected_without_quotes, actual_without_quotes)
-        << "Failed while processing: " << tests[i].header;
-  }
-}
-
 TEST(NetUtilTest, IDNToUnicodeFast) {
   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
     for (size_t j = 0; j < arraysize(kLanguages); j++) {
diff --git a/net/http/http_response_headers.cc b/net/http/http_response_headers.cc
index 98bd052..61fed43 100644
--- a/net/http/http_response_headers.cc
+++ b/net/http/http_response_headers.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -844,7 +844,7 @@ void HttpResponseHeaders::GetMimeTypeAndCharset(std::string* mime_type,
 
   void* iter = NULL;
   while (EnumerateHeader(&iter, name, &value))
-    HttpUtil::ParseContentType(value, mime_type, charset, &had_charset);
+    HttpUtil::ParseContentType(value, mime_type, charset, &had_charset, NULL);
 }
 
 bool HttpResponseHeaders::GetMimeType(std::string* mime_type) const {
diff --git a/net/http/http_util.cc b/net/http/http_util.cc
index 1642d4f..db37dfb 100644
--- a/net/http/http_util.cc
+++ b/net/http/http_util.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -90,8 +90,12 @@ size_t HttpUtil::FindDelimiter(const string& line, size_t search_start,
 
 // static
 void HttpUtil::ParseContentType(const string& content_type_str,
-                                string* mime_type, string* charset,
-                                bool *had_charset) {
+                                string* mime_type,
+                                string* charset,
+                                bool* had_charset,
+                                string* boundary) {
+  const string::const_iterator begin = content_type_str.begin();
+
   // Trim leading and trailing whitespace from type.  We include '(' in
   // the trailing trim set to catch media-type comments, which are not at all
   // standard, but may occur in rare cases.
@@ -103,34 +107,40 @@ void HttpUtil::ParseContentType(const string& content_type_str,
 
   size_t charset_val = 0;
   size_t charset_end = 0;
+  bool type_has_charset = false;
 
   // Iterate over parameters
-  bool type_has_charset = false;
   size_t param_start = content_type_str.find_first_of(';', type_end);
   if (param_start != string::npos) {
-    // We have parameters.  Iterate over them.
-    size_t cur_param_start = param_start + 1;
-    do {
-      size_t cur_param_end =
-          FindDelimiter(content_type_str, cur_param_start, ';');
-
-      size_t param_name_start = content_type_str.find_first_not_of(
-          HTTP_LWS, cur_param_start);
-      param_name_start = std::min(param_name_start, cur_param_end);
-
-      static const char charset_str[] = "charset=";
-      size_t charset_end_offset = std::min(
-          param_name_start + sizeof(charset_str) - 1, cur_param_end);
-      if (LowerCaseEqualsASCII(
-              content_type_str.begin() + param_name_start,
-              content_type_str.begin() + charset_end_offset, charset_str)) {
-        charset_val = param_name_start + sizeof(charset_str) - 1;
-        charset_end = cur_param_end;
+    StringTokenizer tokenizer(begin + param_start, content_type_str.end(),
+                              ";");
+    tokenizer.set_quote_chars("\"");
+    while (tokenizer.GetNext()) {
+      string::const_iterator equals_sign =
+          std::find(tokenizer.token_begin(), tokenizer.token_end(), '=');
+      if (equals_sign == tokenizer.token_end())
+        continue;
+
+      string::const_iterator param_name_begin = tokenizer.token_begin();
+      string::const_iterator param_name_end = equals_sign;
+      TrimLWS(&param_name_begin, &param_name_end);
+
+      string::const_iterator param_value_begin = equals_sign + 1;
+      string::const_iterator param_value_end = tokenizer.token_end();
+      DCHECK(param_value_begin <= tokenizer.token_end());
+      TrimLWS(&param_value_begin, &param_value_end);
+
+      if (LowerCaseEqualsASCII(param_name_begin, param_name_end, "charset")) {
+        // TODO(abarth): Refactor this function to consistently use iterators.
+        charset_val = param_value_begin - begin;
+        charset_end = param_value_end - begin;
         type_has_charset = true;
+      } else if (LowerCaseEqualsASCII(param_name_begin, param_name_end,
+                                      "boundary")) {
+        if (boundary)
+          boundary->assign(param_value_begin, param_value_end);
       }
-
-      cur_param_start = cur_param_end + 1;
-    } while (cur_param_start < content_type_str.length());
+    }
   }
 
   if (type_has_charset) {
@@ -162,19 +172,16 @@ void HttpUtil::ParseContentType(const string& content_type_str,
       content_type_str != "*/*" &&
       content_type_str.find_first_of('/') != string::npos) {
     // Common case here is that mime_type is empty
-    bool eq = !mime_type->empty() &&
-              LowerCaseEqualsASCII(content_type_str.begin() + type_val,
-                                   content_type_str.begin() + type_end,
-                                   mime_type->data());
+    bool eq = !mime_type->empty() && LowerCaseEqualsASCII(begin + type_val,
+                                                          begin + type_end,
+                                                          mime_type->data());
     if (!eq) {
-      mime_type->assign(content_type_str.begin() + type_val,
-                        content_type_str.begin() + type_end);
+      mime_type->assign(begin + type_val, begin + type_end);
       StringToLowerASCII(mime_type);
     }
     if ((!eq && *had_charset) || type_has_charset) {
       *had_charset = true;
-      charset->assign(content_type_str.begin() + charset_val,
-                      content_type_str.begin() + charset_end);
+      charset->assign(begin + charset_val, begin + charset_end);
       StringToLowerASCII(charset);
     }
   }
diff --git a/net/http/http_util.h b/net/http/http_util.h
index a09377e..41f2713 100644
--- a/net/http/http_util.h
+++ b/net/http/http_util.h
@@ -43,11 +43,14 @@ class NET_EXPORT HttpUtil {
   // Parses the value of a Content-Type header.  The resulting mime_type and
   // charset values are normalized to lowercase.  The mime_type and charset
   // output values are only modified if the content_type_str contains a mime
-  // type and charset value, respectively.
+  // type and charset value, respectively.  The boundary output value is
+  // optional and will be assigned the (quoted) value of the boundary
+  // paramter, if any.
   static void ParseContentType(const std::string& content_type_str,
                                std::string* mime_type,
                                std::string* charset,
-                               bool* had_charset);
+                               bool* had_charset,
+                               std::string* boundary);
 
   // Scans the headers and look for the first "Range" header in |headers|,
   // if "Range" exists and the first one of it is well formatted then returns
diff --git a/net/http/http_util_unittest.cc b/net/http/http_util_unittest.cc
index 7da4fc9..8293c80 100644
--- a/net/http/http_util_unittest.cc
+++ b/net/http/http_util_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -632,6 +632,91 @@ TEST(HttpUtilTest, GenerateAcceptCharsetHeader) {
             HttpUtil::GenerateAcceptCharsetHeader("EUC-JP"));
 }
 
+// HttpResponseHeadersTest.GetMimeType also tests ParseContentType.
+TEST(HttpUtilTest, ParseContentType) {
+  const struct {
+    const char* content_type;
+    const char* expected_mime_type;
+    const char* expected_charset;
+    const bool expected_had_charset;
+    const char* expected_boundary;
+  } tests[] = {
+    { "text/html; charset=utf-8",
+      "text/html",
+      "utf-8",
+      true,
+      ""
+    },
+    { "text/html; charset =utf-8",
+      "text/html",
+      "utf-8",
+      true,
+      ""
+    },
+    { "text/html; charset= utf-8",
+      "text/html",
+      "utf-8",
+      true,
+      ""
+    },
+    { "text/html; charset=utf-8 ",
+      "text/html",
+      "utf-8",
+      true,
+      ""
+    },
+    { "text/html; boundary=\"WebKit-ada-df-dsf-adsfadsfs\"",
+      "text/html",
+      "",
+      false,
+      "\"WebKit-ada-df-dsf-adsfadsfs\""
+    },
+    { "text/html; boundary =\"WebKit-ada-df-dsf-adsfadsfs\"",
+      "text/html",
+      "",
+      false,
+      "\"WebKit-ada-df-dsf-adsfadsfs\""
+    },
+    { "text/html; boundary= \"WebKit-ada-df-dsf-adsfadsfs\"",
+      "text/html",
+      "",
+      false,
+      "\"WebKit-ada-df-dsf-adsfadsfs\""
+    },
+    { "text/html; boundary= \"WebKit-ada-df-dsf-adsfadsfs\"   ",
+      "text/html",
+      "",
+      false,
+      "\"WebKit-ada-df-dsf-adsfadsfs\""
+    },
+    { "text/html; boundary=\"WebKit-ada-df-dsf-adsfadsfs  \"",
+      "text/html",
+      "",
+      false,
+      "\"WebKit-ada-df-dsf-adsfadsfs  \""
+    },
+    { "text/html; boundary=WebKit-ada-df-dsf-adsfadsfs",
+      "text/html",
+      "",
+      false,
+      "WebKit-ada-df-dsf-adsfadsfs"
+    },
+    // TODO(abarth): Add more interesting test cases.
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
+    std::string mime_type;
+    std::string charset;
+    bool had_charset = false;
+    std::string boundary;
+    net::HttpUtil::ParseContentType(tests[i].content_type, &mime_type,
+                                    &charset, &had_charset, &boundary);
+    EXPECT_EQ(tests[i].expected_mime_type, mime_type) << "i=" << i;
+    EXPECT_EQ(tests[i].expected_charset, charset) << "i=" << i;
+    EXPECT_EQ(tests[i].expected_had_charset, had_charset) << "i=" << i;
+    EXPECT_EQ(tests[i].expected_boundary, boundary) << "i=" << i;
+  }
+}
+
 TEST(HttpUtilTest, ParseRanges) {
   const struct {
     const char* headers;
author	abarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-01-31 01:00:49 +0000
committer	abarth@chromium.org <abarth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-01-31 01:00:49 +0000
commit	b1fd7195a16a473e8446b24701579a3347a31b3b (patch)
tree	c1f28b37482d154b78c5f87f6fc0790a0bf3125a /net
parent	5ae7f230fb22a9cb8f7f23432b251bbbfbc0bb73 (diff)
download	chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.zip chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.tar.gz chromium_src-b1fd7195a16a473e8446b24701579a3347a31b3b.tar.bz2