4 files changed, 341 insertions, 296 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 2b11c4d..5f321c6 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -25,7 +25,6 @@
 #include <netinet/in.h>
 #endif
 
-#include "base/base64.h"
 #include "base/basictypes.h"
 #include "base/file_path.h"
 #include "base/file_util.h"
@@ -71,7 +70,6 @@
 #include "net/http/http_content_disposition.h"
 #include "unicode/datefmt.h"
 #include "unicode/regex.h"
-#include "unicode/ucnv.h"
 #include "unicode/uidna.h"
 #include "unicode/ulocdata.h"
 #include "unicode/uniset.h"
@@ -175,196 +173,6 @@ std::string::size_type CountTrailingChars(
 }
 #endif
 
-// Similar to Base64Decode. Decodes a Q-encoded string to a sequence
-// of bytes. If input is invalid, return false.
-bool QPDecode(const std::string& input, std::string* output) {
-  std::string temp;
-  temp.reserve(input.size());
-  for (std::string::const_iterator it = input.begin(); it != input.end();
-       ++it) {
-    if (*it == '_') {
-      temp.push_back(' ');
-    } else if (*it == '=') {
-      if ((input.end() - it < 3) ||
-          !IsHexDigit(static_cast<unsigned char>(*(it + 1))) ||
-          !IsHexDigit(static_cast<unsigned char>(*(it + 2))))
-        return false;
-      unsigned char ch = HexDigitToInt(*(it + 1)) * 16 +
-                         HexDigitToInt(*(it + 2));
-      temp.push_back(static_cast<char>(ch));
-      ++it;
-      ++it;
-    } else if (0x20 < *it && *it < 0x7F) {
-      // In a Q-encoded word, only printable ASCII characters
-      // represent themselves. Besides, space, '=', '_' and '?' are
-      // not allowed, but they're already filtered out.
-      DCHECK_NE('=', *it);
-      DCHECK_NE('?', *it);
-      DCHECK_NE('_', *it);
-      temp.push_back(*it);
-    } else {
-      return false;
-    }
-  }
-  output->swap(temp);
-  return true;
-}
-
-enum RFC2047EncodingType {Q_ENCODING, B_ENCODING};
-bool DecodeBQEncoding(const std::string& part,
-                      RFC2047EncodingType enc_type,
-                      const std::string& charset,
-                      std::string* output) {
-  std::string decoded;
-  if (!((enc_type == B_ENCODING) ?
-      base::Base64Decode(part, &decoded) : QPDecode(part, &decoded)))
-    return false;
-
-  if (decoded.empty()) {
-    output->clear();
-    return true;
-  }
-
-  UErrorCode err = U_ZERO_ERROR;
-  UConverter* converter(ucnv_open(charset.c_str(), &err));
-  if (U_FAILURE(err))
-    return false;
-
-  // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8.
-  // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes
-  // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a
-  // trailing '\0'.
-  size_t output_length = decoded.length() * 3 + 1;
-  char* buf = WriteInto(output, output_length);
-  output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length,
-                                     decoded.data(), decoded.length(), &err);
-  ucnv_close(converter);
-  if (U_FAILURE(err))
-    return false;
-  output->resize(output_length);
-  return true;
-}
-
-bool DecodeWord(const std::string& encoded_word,
-                const std::string& referrer_charset,
-                bool* is_rfc2047,
-                std::string* output) {
-  *is_rfc2047 = false;
-  output->clear();
-  if (encoded_word.empty())
-    return true;
-
-  if (!IsStringASCII(encoded_word)) {
-    // Try UTF-8, referrer_charset and the native OS default charset in turn.
-    if (IsStringUTF8(encoded_word)) {
-      *output = encoded_word;
-    } else {
-      string16 utf16_output;
-      if (!referrer_charset.empty() &&
-          base::CodepageToUTF16(encoded_word, referrer_charset.c_str(),
-                                base::OnStringConversionError::FAIL,
-                                &utf16_output)) {
-        *output = UTF16ToUTF8(utf16_output);
-      } else {
-        *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));
-      }
-    }
-
-    return true;
-  }
-
-  // RFC 2047 : one of encoding methods supported by Firefox and relatively
-  // widely used by web servers.
-  // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.
-  // We don't care about the length restriction (72 bytes) because
-  // many web servers generate encoded words longer than the limit.
-  std::string tmp;
-  *is_rfc2047 = true;
-  int part_index = 0;
-  std::string charset;
-  StringTokenizer t(encoded_word, "?");
-  RFC2047EncodingType enc_type = Q_ENCODING;
-  while (*is_rfc2047 && t.GetNext()) {
-    std::string part = t.token();
-    switch (part_index) {
-      case 0:
-        if (part != "=") {
-          *is_rfc2047 = false;
-          break;
-        }
-        ++part_index;
-        break;
-      case 1:
-        // Do we need charset validity check here?
-        charset = part;
-        ++part_index;
-        break;
-      case 2:
-        if (part.size() > 1 ||
-            part.find_first_of("bBqQ") == std::string::npos) {
-          *is_rfc2047 = false;
-          break;
-        }
-        if (part[0] == 'b' || part[0] == 'B') {
-          enc_type = B_ENCODING;
-        }
-        ++part_index;
-        break;
-      case 3:
-        *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp);
-        if (!*is_rfc2047) {
-          // Last minute failure. Invalid B/Q encoding. Rather than
-          // passing it through, return now.
-          return false;
-        }
-        ++part_index;
-        break;
-      case 4:
-        if (part != "=") {
-          // Another last minute failure !
-          // Likely to be a case of two encoded-words in a row or
-          // an encoded word followed by a non-encoded word. We can be
-          // generous, but it does not help much in terms of compatibility,
-          // I believe. Return immediately.
-          *is_rfc2047 = false;
-          return false;
-        }
-        ++part_index;
-        break;
-      default:
-        *is_rfc2047 = false;
-        return false;
-    }
-  }
-
-  if (*is_rfc2047) {
-    if (*(encoded_word.end() - 1) == '=') {
-      output->swap(tmp);
-      return true;
-    }
-    // encoded_word ending prematurelly with '?' or extra '?'
-    *is_rfc2047 = false;
-    return false;
-  }
-
-  // We're not handling 'especial' characters quoted with '\', but
-  // it should be Ok because we're not an email client but a
-  // web browser.
-
-  // What IE6/7 does: %-escaped UTF-8.
-  tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES);
-  if (IsStringUTF8(tmp)) {
-    output->swap(tmp);
-    return true;
-    // We can try either the OS default charset or 'origin charset' here,
-    // As far as I can tell, IE does not support it. However, I've seen
-    // web servers emit %-escaped string in a legacy encoding (usually
-    // origin charset).
-    // TODO(jungshik) : Test IE further and consider adding a fallback here.
-  }
-  return false;
-}
-
 // Does some simple normalization of scripts so we can allow certain scripts
 // to exist together.
 // TODO(brettw) bug 880223: we should allow some other languages to be
@@ -939,12 +747,20 @@ std::string GetFileNameFromURL(const GURL& url,
 
   // The URL's path should be escaped UTF-8, but may not be.
   std::string decoded_filename = unescaped_url_filename;
-  if (!IsStringASCII(decoded_filename)) {
-    bool ignore;
+  if (!IsStringUTF8(decoded_filename)) {
     // TODO(jshin): this is probably not robust enough. To be sure, we need
     // encoding detection.
-    DecodeWord(unescaped_url_filename, referrer_charset, &ignore,
-               &decoded_filename);
+    string16 utf16_output;
+    if (!referrer_charset.empty() &&
+        base::CodepageToUTF16(unescaped_url_filename,
+                              referrer_charset.c_str(),
+                              base::OnStringConversionError::FAIL,
+                              &utf16_output)) {
+      decoded_filename = UTF16ToUTF8(utf16_output);
+    } else {
+      decoded_filename = WideToUTF8(
+          base::SysNativeMBToWide(unescaped_url_filename));
+    }
   }
   // If the URL contains a (possibly empty) query, assume it is a generator, and
   // allow the determined extension to be overwritten.
@@ -1158,96 +974,6 @@ std::string GetSpecificHeader(const std::string& headers,
   return ret;
 }
 
-bool DecodeCharset(const std::string& input,
-                   std::string* decoded_charset,
-                   std::string* value) {
-  StringTokenizer t(input, "'");
-  t.set_options(StringTokenizer::RETURN_DELIMS);
-  std::string temp_charset;
-  std::string temp_value;
-  int numDelimsSeen = 0;
-  while (t.GetNext()) {
-    if (t.token_is_delim()) {
-      ++numDelimsSeen;
-      continue;
-    } else {
-      switch (numDelimsSeen) {
-        case 0:
-          temp_charset = t.token();
-          break;
-        case 1:
-          // Language is ignored.
-          break;
-        case 2:
-          temp_value = t.token();
-          break;
-        default:
-          return false;
-      }
-    }
-  }
-  if (numDelimsSeen != 2)
-    return false;
-  if (temp_charset.empty() || temp_value.empty())
-    return false;
-  decoded_charset->swap(temp_charset);
-  value->swap(temp_value);
-  return true;
-}
-
-bool DecodeFilenameValue(const std::string& input,
-                         const std::string& referrer_charset,
-                         std::string* output) {
-  std::string tmp;
-  // Tokenize with whitespace characters.
-  StringTokenizer t(input, " \t\n\r");
-  t.set_options(StringTokenizer::RETURN_DELIMS);
-  bool is_previous_token_rfc2047 = true;
-  while (t.GetNext()) {
-    if (t.token_is_delim()) {
-      // If the previous non-delimeter token is not RFC2047-encoded,
-      // put in a space in its place. Otheriwse, skip over it.
-      if (!is_previous_token_rfc2047) {
-        tmp.push_back(' ');
-      }
-      continue;
-    }
-    // We don't support a single multibyte character split into
-    // adjacent encoded words. Some broken mail clients emit headers
-    // with that problem, but most web servers usually encode a filename
-    // in a single encoded-word. Firefox/Thunderbird do not support
-    // it, either.
-    std::string decoded;
-    if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,
-                    &decoded))
-      return false;
-    tmp.append(decoded);
-  }
-  output->swap(tmp);
-  return true;
-}
-
-bool DecodeExtValue(const std::string& param_value, std::string* decoded) {
-  if (param_value.find('"') != std::string::npos)
-    return false;
-
-  std::string charset;
-  std::string value;
-  if (!DecodeCharset(param_value, &charset, &value))
-    return false;
-
-  // RFC 5987 value should be ASCII-only.
-  if (!IsStringASCII(value)) {
-    decoded->clear();
-    return true;
-  }
-
-  std::string unescaped = UnescapeURLComponent(value,
-      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
-
-  return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded);
-}
-
 string16 IDNToUnicode(const std::string& host,
                       const std::string& languages) {
   return IDNToUnicodeWithOffsets(host, languages, NULL);
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 444a547..874f3e2 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -173,12 +173,6 @@ NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url);
 NET_EXPORT std::string GetSpecificHeader(const std::string& headers,
                                          const std::string& name);
 
-// TODO(abarth): Move these functions to http_content_disposition.cc.
-bool DecodeFilenameValue(const std::string& input,
-                         const std::string& referrer_charset,
-                         std::string* output);
-bool DecodeExtValue(const std::string& value, std::string* output);
-
 // Converts the given host name to unicode characters. This can be called for
 // any host name, if the input is not IDN or is invalid in some way, we'll just
 // return the ASCII source so it is still usable.
@@ -252,9 +246,8 @@ NET_EXPORT string16 StripWWWFromHost(const GURL& url);
 // Generates a filename using the first successful method from the following (in
 // order):
 //
-// 1) The raw Content-Disposition header in |content_disposition| (as read from
-//    the network.  |referrer_charset| is used as described in the comment for
-//    GetFileNameFromCD().
+// 1) The raw Content-Disposition header in |content_disposition| as read from
+//    the network.  |referrer_charset| is used to decode non-ASCII strings.
 // 2) |suggested_name| if specified.  |suggested_name| is assumed to be in
 //    UTF-8.
 // 3) The filename extracted from the |url|.  |referrer_charset| will be used to
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 53e32f3..e4e181b 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -1179,7 +1179,7 @@ TEST(NetUtilTest, GenerateFileName) {
       L"default",
       L"default"
     },
-    // Below is a small subset of cases taken from GetFileNameFromCD test above.
+    // Below is a small subset of cases taken from HttpContentDisposition tests.
     {
       "http://www.google.com/",
       "attachment; filename=\"%EC%98%88%EC%88%A0%20"
diff --git a/net/http/http_content_disposition.cc b/net/http/http_content_disposition.cc
index 52d9f4f..0726e93 100644
--- a/net/http/http_content_disposition.cc
+++ b/net/http/http_content_disposition.cc
@@ -4,10 +4,336 @@
 
 #include "net/http/http_content_disposition.h"
 
+#include "base/base64.h"
+#include "base/i18n/icu_string_conversions.h"
 #include "base/logging.h"
 #include "base/string_util.h"
+#include "base/sys_string_conversions.h"
+#include "base/utf_string_conversions.h"
 #include "net/base/net_util.h"
 #include "net/http/http_util.h"
+#include "unicode/ucnv.h"
+
+namespace {
+
+enum RFC2047EncodingType {
+  Q_ENCODING,
+  B_ENCODING
+};
+
+// Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to
+// decoding a quoted-printable string.  Returns true if the input was valid.
+bool DecodeQEncoding(const std::string& input, std::string* output) {
+  std::string temp;
+  temp.reserve(input.size());
+  for (std::string::const_iterator it = input.begin(); it != input.end();
+       ++it) {
+    if (*it == '_') {
+      temp.push_back(' ');
+    } else if (*it == '=') {
+      if ((input.end() - it < 3) ||
+          !IsHexDigit(static_cast<unsigned char>(*(it + 1))) ||
+          !IsHexDigit(static_cast<unsigned char>(*(it + 2))))
+        return false;
+      unsigned char ch = HexDigitToInt(*(it + 1)) * 16 +
+                         HexDigitToInt(*(it + 2));
+      temp.push_back(static_cast<char>(ch));
+      ++it;
+      ++it;
+    } else if (0x20 < *it && *it < 0x7F && *it != '?') {
+      // In a Q-encoded word, only printable ASCII characters
+      // represent themselves. Besides, space, '=', '_' and '?' are
+      // not allowed, but they're already filtered out.
+      DCHECK_NE('=', *it);
+      DCHECK_NE('?', *it);
+      DCHECK_NE('_', *it);
+      temp.push_back(*it);
+    } else {
+      return false;
+    }
+  }
+  output->swap(temp);
+  return true;
+}
+
+// Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding
+// type is specified in |enc_type|.
+bool DecodeBQEncoding(const std::string& part,
+                      RFC2047EncodingType enc_type,
+                      const std::string& charset,
+                      std::string* output) {
+  std::string decoded;
+  if (!((enc_type == B_ENCODING) ?
+        base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded)))
+    return false;
+
+  if (decoded.empty()) {
+    output->clear();
+    return true;
+  }
+
+  UErrorCode err = U_ZERO_ERROR;
+  UConverter* converter(ucnv_open(charset.c_str(), &err));
+  if (U_FAILURE(err))
+    return false;
+
+  // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8.
+  // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes
+  // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a
+  // trailing '\0'.
+  size_t output_length = decoded.length() * 3 + 1;
+  char* buf = WriteInto(output, output_length);
+  output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length,
+                                     decoded.data(), decoded.length(), &err);
+  ucnv_close(converter);
+  if (U_FAILURE(err))
+    return false;
+  output->resize(output_length);
+  return true;
+}
+
+bool DecodeWord(const std::string& encoded_word,
+                const std::string& referrer_charset,
+                bool* is_rfc2047,
+                std::string* output) {
+  *is_rfc2047 = false;
+  output->clear();
+  if (encoded_word.empty())
+    return true;
+
+  if (!IsStringASCII(encoded_word)) {
+    // Try UTF-8, referrer_charset and the native OS default charset in turn.
+    if (IsStringUTF8(encoded_word)) {
+      *output = encoded_word;
+    } else {
+      string16 utf16_output;
+      if (!referrer_charset.empty() &&
+          base::CodepageToUTF16(encoded_word, referrer_charset.c_str(),
+                                base::OnStringConversionError::FAIL,
+                                &utf16_output)) {
+        *output = UTF16ToUTF8(utf16_output);
+      } else {
+        *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));
+      }
+    }
+
+    return true;
+  }
+
+  // RFC 2047 : one of encoding methods supported by Firefox and relatively
+  // widely used by web servers.
+  // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.
+  // We don't care about the length restriction (72 bytes) because
+  // many web servers generate encoded words longer than the limit.
+  std::string tmp;
+  *is_rfc2047 = true;
+  int part_index = 0;
+  std::string charset;
+  StringTokenizer t(encoded_word, "?");
+  RFC2047EncodingType enc_type = Q_ENCODING;
+  while (*is_rfc2047 && t.GetNext()) {
+    std::string part = t.token();
+    switch (part_index) {
+      case 0:
+        if (part != "=") {
+          *is_rfc2047 = false;
+          break;
+        }
+        ++part_index;
+        break;
+      case 1:
+        // Do we need charset validity check here?
+        charset = part;
+        ++part_index;
+        break;
+      case 2:
+        if (part.size() > 1 ||
+            part.find_first_of("bBqQ") == std::string::npos) {
+          *is_rfc2047 = false;
+          break;
+        }
+        if (part[0] == 'b' || part[0] == 'B') {
+          enc_type = B_ENCODING;
+        }
+        ++part_index;
+        break;
+      case 3:
+        *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp);
+        if (!*is_rfc2047) {
+          // Last minute failure. Invalid B/Q encoding. Rather than
+          // passing it through, return now.
+          return false;
+        }
+        ++part_index;
+        break;
+      case 4:
+        if (part != "=") {
+          // Another last minute failure !
+          // Likely to be a case of two encoded-words in a row or
+          // an encoded word followed by a non-encoded word. We can be
+          // generous, but it does not help much in terms of compatibility,
+          // I believe. Return immediately.
+          *is_rfc2047 = false;
+          return false;
+        }
+        ++part_index;
+        break;
+      default:
+        *is_rfc2047 = false;
+        return false;
+    }
+  }
+
+  if (*is_rfc2047) {
+    if (*(encoded_word.end() - 1) == '=') {
+      output->swap(tmp);
+      return true;
+    }
+    // encoded_word ending prematurelly with '?' or extra '?'
+    *is_rfc2047 = false;
+    return false;
+  }
+
+  // We're not handling 'especial' characters quoted with '\', but
+  // it should be Ok because we're not an email client but a
+  // web browser.
+
+  // What IE6/7 does: %-escaped UTF-8.
+  tmp = net::UnescapeURLComponent(encoded_word, net::UnescapeRule::SPACES);
+  if (IsStringUTF8(tmp)) {
+    output->swap(tmp);
+    return true;
+    // We can try either the OS default charset or 'origin charset' here,
+    // As far as I can tell, IE does not support it. However, I've seen
+    // web servers emit %-escaped string in a legacy encoding (usually
+    // origin charset).
+    // TODO(jungshik) : Test IE further and consider adding a fallback here.
+  }
+  return false;
+}
+
+// Decodes the value of a 'filename' or 'name' parameter given as |input|. The
+// value is supposed to be of the form:
+//
+//   value                   = token | quoted-string
+//
+// However we currently also allow RFC 2047 encoding and non-ASCII
+// strings. Non-ASCII strings are interpreted based on |referrer_charset|.
+bool DecodeFilenameValue(const std::string& input,
+                         const std::string& referrer_charset,
+                         std::string* output) {
+  std::string tmp;
+  // Tokenize with whitespace characters.
+  StringTokenizer t(input, " \t\n\r");
+  t.set_options(StringTokenizer::RETURN_DELIMS);
+  bool is_previous_token_rfc2047 = true;
+  while (t.GetNext()) {
+    if (t.token_is_delim()) {
+      // If the previous non-delimeter token is not RFC2047-encoded,
+      // put in a space in its place. Otheriwse, skip over it.
+      if (!is_previous_token_rfc2047) {
+        tmp.push_back(' ');
+      }
+      continue;
+    }
+    // We don't support a single multibyte character split into
+    // adjacent encoded words. Some broken mail clients emit headers
+    // with that problem, but most web servers usually encode a filename
+    // in a single encoded-word. Firefox/Thunderbird do not support
+    // it, either.
+    std::string decoded;
+    if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,
+                    &decoded))
+      return false;
+    tmp.append(decoded);
+  }
+  output->swap(tmp);
+  return true;
+}
+
+// Parses the charset and value-chars out of an ext-value string.
+//
+//  ext-value     = charset  "'" [ language ] "'" value-chars
+bool ParseExtValueComponents(const std::string& input,
+                             std::string* charset,
+                             std::string* value_chars) {
+  StringTokenizer t(input, "'");
+  t.set_options(StringTokenizer::RETURN_DELIMS);
+  std::string temp_charset;
+  std::string temp_value;
+  int numDelimsSeen = 0;
+  while (t.GetNext()) {
+    if (t.token_is_delim()) {
+      ++numDelimsSeen;
+      continue;
+    } else {
+      switch (numDelimsSeen) {
+        case 0:
+          temp_charset = t.token();
+          break;
+        case 1:
+          // Language is ignored.
+          break;
+        case 2:
+          temp_value = t.token();
+          break;
+        default:
+          return false;
+      }
+    }
+  }
+  if (numDelimsSeen != 2)
+    return false;
+  if (temp_charset.empty() || temp_value.empty())
+    return false;
+  charset->swap(temp_charset);
+  value_chars->swap(temp_value);
+  return true;
+}
+
+// http://tools.ietf.org/html/rfc5987#section-3.2
+//
+//  ext-value     = charset  "'" [ language ] "'" value-chars
+//
+//  charset       = "UTF-8" / "ISO-8859-1" / mime-charset
+//
+//  mime-charset  = 1*mime-charsetc
+//  mime-charsetc = ALPHA / DIGIT
+//                 / "!" / "#" / "$" / "%" / "&"
+//                 / "+" / "-" / "^" / "_" / "`"
+//                 / "{" / "}" / "~"
+//
+//  language      = <Language-Tag, defined in [RFC5646], Section 2.1>
+//
+//  value-chars   = *( pct-encoded / attr-char )
+//
+//  pct-encoded   = "%" HEXDIG HEXDIG
+//
+//  attr-char     = ALPHA / DIGIT
+//                 / "!" / "#" / "$" / "&" / "+" / "-" / "."
+//                 / "^" / "_" / "`" / "|" / "~"
+bool DecodeExtValue(const std::string& param_value, std::string* decoded) {
+  if (param_value.find('"') != std::string::npos)
+    return false;
+
+  std::string charset;
+  std::string value;
+  if (!ParseExtValueComponents(param_value, &charset, &value))
+    return false;
+
+  // RFC 5987 value should be ASCII-only.
+  if (!IsStringASCII(value)) {
+    decoded->clear();
+    return true;
+  }
+
+  std::string unescaped = net::UnescapeURLComponent(
+      value, net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);
+
+  return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded);
+}
+
+} // namespace
 
 namespace net {