Don't unescape characters that can change the meaning of a URL. I removed the set of characters labeled PASS in url_canon_path.cc from the characters we'll unescape for URLs. Since there is no canonical meaning for these characters, this transformation can cause problems for certain servers.

I updated the escape tests to use a different character so the test works like before (I no longer unescape semicolon). When I fixed this, I realized I broke the content disposition filename unescaping. It was using URL unescaping rules which I think is incorrect for filenames in headers, so I added the "also unescape URL special characters" flag to the unescape call that this function uses. TEST=updated unit tests BUG=64732 Review URL: http://codereview.chromium.org/5376007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@68011 0039d316-1c4b-4281-b951-d872f2087c98
author: brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2010-12-02 15:45:24 +0000
committer: brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2010-12-02 15:45:24 +0000
commit: 84d64a355095f53c5c6ed4c2c8c9760124928029 (patch)
tree: 27fd02906059c122ff4717090b5689d5749ca795 /net
parent: d1b0115733bb0d3687c65d810d3396f3470dd83b (diff)
download: chromium_src-84d64a355095f53c5c6ed4c2c8c9760124928029.zip
chromium_src-84d64a355095f53c5c6ed4c2c8c9760124928029.tar.gz
chromium_src-84d64a355095f53c5c6ed4c2c8c9760124928029.tar.bz2
3 files changed, 40 insertions, 31 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 3c39f95..d6eef5e 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -72,18 +72,25 @@ const std::string Escape(const std::string& text, const Charmap& charmap,
 // The basic rule is that we can't unescape anything that would changing parsing
 // like # or ?. We also can't unescape &, =, or + since that could be part of a
 // query and that could change the server's parsing of the query.
+//
+// Lastly, we can't unescape anything that doesn't have a canonical
+// representation in a URL. This means that unescaping will change the URL, and
+// you could get different behavior if you copy and paste the URL, or press
+// enter in the URL bar. The list of characters that fall into this category
+// are the ones labeled PASS (allow either escaped or unescaped) in the big
+// lookup table at the top of googleurl/src/url_canon_path.cc
 const char kUrlUnescape[128] = {
 //   NULL, control chars...
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 //  ' ' !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /
-     0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
+     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
 //   0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ?
-     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0,
+     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
 //   @  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O
      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 //   P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _
-     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
 //   `  a  b  c  d  e  f  g  h  i  j  k  l  m  n  o
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 //   p  q  r  s  t  u  v  w  x  y  z  {  |  }  ~  <NBSP>
diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc
index 01cd9e1..04a040e 100644
--- a/net/base/escape_unittest.cc
+++ b/net/base/escape_unittest.cc
@@ -159,17 +159,17 @@ TEST(EscapeTest, UnescapeURLComponentASCII) {
     {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
     {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
     {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
-    {"Some%20random text %25%3bOK", UnescapeRule::NONE,
-     "Some%20random text %25%3bOK"},
-    {"Some%20random text %25%3bOK", UnescapeRule::NORMAL,
-     "Some%20random text %25;OK"},
-    {"Some%20random text %25%3bOK", UnescapeRule::SPACES,
-     "Some random text %25;OK"},
-    {"Some%20random text %25%3bOK", UnescapeRule::URL_SPECIAL_CHARS,
-     "Some%20random text %;OK"},
-    {"Some%20random text %25%3bOK",
+    {"Some%20random text %25%2dOK", UnescapeRule::NONE,
+     "Some%20random text %25%2dOK"},
+    {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
+     "Some%20random text %25-OK"},
+    {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
+     "Some random text %25-OK"},
+    {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
+     "Some%20random text %-OK"},
+    {"Some%20random text %25%2dOK",
      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
-     "Some random text %;OK"},
+     "Some random text %-OK"},
     {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
     {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
     // Certain URL-sensitive characters should not be unescaped unless asked.
@@ -220,17 +220,17 @@ TEST(EscapeTest, UnescapeURLComponent) {
     {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"},
     {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"},
     {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"},
-    {L"Some%20random text %25%3bOK", UnescapeRule::NONE,
-     L"Some%20random text %25%3bOK"},
-    {L"Some%20random text %25%3bOK", UnescapeRule::NORMAL,
-     L"Some%20random text %25;OK"},
-    {L"Some%20random text %25%3bOK", UnescapeRule::SPACES,
-     L"Some random text %25;OK"},
-    {L"Some%20random text %25%3bOK", UnescapeRule::URL_SPECIAL_CHARS,
-     L"Some%20random text %;OK"},
-    {L"Some%20random text %25%3bOK",
+    {L"Some%20random text %25%2dOK", UnescapeRule::NONE,
+     L"Some%20random text %25%2dOK"},
+    {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
+     L"Some%20random text %25-OK"},
+    {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
+     L"Some random text %25-OK"},
+    {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
+     L"Some%20random text %-OK"},
+    {L"Some%20random text %25%2dOK",
      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
-     L"Some random text %;OK"},
+     L"Some random text %-OK"},
     {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"},
     {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"},
     // Certain URL-sensitive characters should not be unescaped unless asked.
@@ -307,10 +307,10 @@ TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
       "+Invalid %escape %2+",
       " Invalid %escape %2 ",
      L"+Invalid %escape %2+"},
-    { "Some random text %25%3BOK",
-      "Some random text %25;OK",
-      "Some random text %25;OK",
-     L"Some random text %25;OK"},
+    { "Some random text %25%2dOK",
+      "Some random text %25-OK",
+      "Some random text %25-OK",
+     L"Some random text %25-OK"},
     { "%01%02%03%04%05%06%07%08%09",
       "%01%02%03%04%05%06%07%08%09",
       "%01%02%03%04%05%06%07%08%09",
@@ -349,9 +349,9 @@ TEST(EscapeTest, AdjustOffset) {
     {"test", 2, 2},
     {"test", 4, std::wstring::npos},
     {"test", std::wstring::npos, std::wstring::npos},
-    {"%3Btest", 6, 4},
-    {"%3Btest", 2, std::wstring::npos},
-    {"test%3B", 2, 2},
+    {"%2dtest", 6, 4},
+    {"%2dtest", 2, std::wstring::npos},
+    {"test%2d", 2, 2},
     {"%E4%BD%A0+%E5%A5%BD", 9, 1},
     {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos},
     {"%ED%B0%80+%E5%A5%BD", 6, 6},
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 06432d9..d032d8a 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -1140,7 +1140,9 @@ std::string GetFileNameFromCD(const std::string& header,
         // RFC 5987 value should be ASCII-only.
         if (!IsStringASCII(value))
           return std::string();
-        std::string tmp = UnescapeURLComponent(value, UnescapeRule::SPACES);
+        std::string tmp = UnescapeURLComponent(
+            value,
+            UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
         if (base::ConvertToUtf8AndNormalize(tmp, charset, &decoded))
           return decoded;
       }
author	brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2010-12-02 15:45:24 +0000
committer	brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2010-12-02 15:45:24 +0000
commit	84d64a355095f53c5c6ed4c2c8c9760124928029 (patch)
tree	27fd02906059c122ff4717090b5689d5749ca795 /net
parent	d1b0115733bb0d3687c65d810d3396f3470dd83b (diff)
download	chromium_src-84d64a355095f53c5c6ed4c2c8c9760124928029.zip chromium_src-84d64a355095f53c5c6ed4c2c8c9760124928029.tar.gz chromium_src-84d64a355095f53c5c6ed4c2c8c9760124928029.tar.bz2