Local text file with spaces in filename is urlencoded in tab title

When viewing a local text file with spaces in filename, it is still urlencoded. Filename should be displayed with spaces, not with urlencoding. It would be more user-friendly. Since net::FormatURL is already implemented, using it would be great. But it doesn't escape SPACES, just NORMAL, it doesn't even escape unicode. I plumbed out a unescapeurl that could be used whether we allow conversion of spaces or not. BUG=8775 (http://crbug.com/8775) TEST=Tested whether the input is escaped in the navigational context and ran the net tests New Review: http://codereview.chromium.org/118059 Review URL: http://codereview.chromium.org/56053 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@17462 0039d316-1c4b-4281-b951-d872f2087c98
author: mhm@chromium.org <mhm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-06-03 02:16:32 +0000
committer: mhm@chromium.org <mhm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-06-03 02:16:32 +0000
commit: a23de85783d944cbb75f09737eb29c60ea87481d (patch)
tree: 30c4732b42443259134a3f83f574bfaa2a506292 /net
parent: 1b5237ecc1fea39e51e1634acbcdf11bd7ef57b0 (diff)
download: chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.zip
chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.tar.gz
chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.tar.bz2
6 files changed, 86 insertions, 56 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 54174fc..f836afb6 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -107,6 +107,10 @@ const char kUrlUnescape[128] = {
 
 std::string UnescapeURLImpl(const std::string& escaped_text,
                             UnescapeRule::Type rules) {
+  // Do not unescape anything, return the |escaped_text| text.
+  if (rules == UnescapeRule::NONE)
+    return escaped_text;
+
   // The output of the unescaping is always smaller than the input, so we can
   // reserve the input size to make sure we have enough buffer and don't have
   // to allocate in the loop below.
diff --git a/net/base/escape.h b/net/base/escape.h
index 1502b56..17f8646 100644
--- a/net/base/escape.h
+++ b/net/base/escape.h
@@ -41,32 +41,36 @@ class UnescapeRule {
   typedef uint32 Type;
 
   enum {
+    // Don't unescape anything at all.
+    NONE = 0,
+
     // Don't unescape anything special, but all normal unescaping will happen.
     // This is a placeholder and can't be combined with other flags (since it's
-    // just the absense of them). Things like escaped letters, digits, and most
-    // symbols will get unescaped with this mode.
-    NORMAL = 0,
+    // just the absence of them). All other unescape rules imply "normal" in
+    // addition to their special meaning. Things like escaped letters, digits,
+    // and most symbols will get unescaped with this mode.
+    NORMAL = 1,
 
     // Convert %20 to spaces. In some places where we're showing URLs, we may
     // want this. In places where the URL may be copied and pasted out, then
     // you wouldn't want this since it might not be interpreted in one piece
     // by other applications.
-    SPACES = 1,
+    SPACES = 2,
 
     // Unescapes various characters that will change the meaning of URLs,
-    // including '%', '+', '&', '/', '#'. If we unescaped these charaters, the
+    // including '%', '+', '&', '/', '#'. If we unescaped these characters, the
     // resulting URL won't be the same as the source one. This flag is used when
     // generating final output like filenames for URLs where we won't be
     // interpreting as a URL and want to do as much unescaping as possible.
-    URL_SPECIAL_CHARS = 2,
+    URL_SPECIAL_CHARS = 4,
 
-    // Unescapes control characters such as %01. This INCLUDES NULLs!. This is
+    // Unescapes control characters such as %01. This INCLUDES NULLs. This is
     // used for rare cases such as data: URL decoding where the result is binary
     // data. You should not use this for normal URLs!
-    CONTROL_CHARS = 4,
+    CONTROL_CHARS = 8,
 
     // URL queries use "+" for space. This flag controls that replacement.
-    REPLACE_PLUS_WITH_SPACE = 8,
+    REPLACE_PLUS_WITH_SPACE = 16,
   };
 };
 
diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc
index c1bcbd3..8c31d41 100644
--- a/net/base/escape_unittest.cc
+++ b/net/base/escape_unittest.cc
@@ -114,6 +114,8 @@ TEST(Escape, UnescapeURLComponent) {
     {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
     {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
     {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
+    {"Some%20random text %25%3bOK", UnescapeRule::NONE,
+     "Some%20random text %25%3bOK"},
     {"Some%20random text %25%3bOK", UnescapeRule::NORMAL,
      "Some%20random text %25;OK"},
     {"Some%20random text %25%3bOK", UnescapeRule::SPACES,
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index bfc2cea..3139acb 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -656,11 +656,11 @@ void IDNToUnicodeOneComponent(const char16* comp,
 namespace net {
 
 // Appends the substring |in_component| inside of the URL |spec| to |output|,
-// and the resulting range will be filled into |out_component|.  Calls the
-// unescaper for the substring if |unescape| is true.
+// and the resulting range will be filled into |out_component|. |unescape_rules|
+// defines how to clean the URL for human readability.
 static void AppendFormattedComponent(const std::string& spec,
                                      const url_parse::Component& in_component,
-                                     bool unescape,
+                                     UnescapeRule::Type unescape_rules,
                                      std::wstring* output,
                                      url_parse::Component* out_component);
 
@@ -1084,18 +1084,18 @@ void AppendFormattedHost(const GURL& url,
 /* static */
 void AppendFormattedComponent(const std::string& spec,
                               const url_parse::Component& in_component,
-                              bool unescape,
+                              UnescapeRule::Type unescape_rules,
                               std::wstring* output,
                               url_parse::Component* out_component) {
   if (in_component.is_nonempty()) {
     out_component->begin = static_cast<int>(output->length());
-    if (unescape) {
-      output->append(UnescapeAndDecodeUTF8URLComponent(
-          spec.substr(in_component.begin, in_component.len),
-          UnescapeRule::NORMAL));
-    } else {
+    if (unescape_rules == UnescapeRule::NONE) {
       output->append(UTF8ToWide(spec.substr(
           in_component.begin, in_component.len)));
+    } else {
+      output->append(UnescapeAndDecodeUTF8URLComponent(
+          spec.substr(in_component.begin, in_component.len),
+          unescape_rules));
     }
     out_component->len =
         static_cast<int>(output->length()) - out_component->begin;
@@ -1104,9 +1104,12 @@ void AppendFormattedComponent(const std::string& spec,
   }
 }
 
-std::wstring FormatUrl(
-    const GURL& url, const std::wstring& languages, bool omit_username_password,
-    bool unescape, url_parse::Parsed* new_parsed, size_t* prefix_end) {
+std::wstring FormatUrl(const GURL& url,
+                       const std::wstring& languages,
+                       bool omit_username_password,
+                       UnescapeRule::Type unescape_rules,
+                       url_parse::Parsed* new_parsed,
+                       size_t* prefix_end) {
   url_parse::Parsed parsed_temp;
   if (!new_parsed)
     new_parsed = &parsed_temp;
@@ -1140,12 +1143,14 @@ std::wstring FormatUrl(
     new_parsed->password.reset();
   } else {
     AppendFormattedComponent(
-        spec, parsed.username, unescape, &url_string, &new_parsed->username);
+        spec, parsed.username, unescape_rules,
+        &url_string, &new_parsed->username);
     if (parsed.password.is_valid()) {
       url_string.push_back(':');
     }
     AppendFormattedComponent(
-        spec, parsed.password, unescape, &url_string, &new_parsed->password);
+        spec, parsed.password, unescape_rules,
+        &url_string, &new_parsed->password);
     if (parsed.username.is_valid() || parsed.password.is_valid()) {
       url_string.push_back('@');
     }
@@ -1169,11 +1174,13 @@ std::wstring FormatUrl(
 
   // Path and query both get the same general unescape & convert treatment.
   AppendFormattedComponent(
-      spec, parsed.path, unescape, &url_string, &new_parsed->path);
+      spec, parsed.path, unescape_rules, &url_string,
+      &new_parsed->path);
   if (parsed.query.is_valid())
     url_string.push_back('?');
   AppendFormattedComponent(
-      spec, parsed.query, unescape, &url_string, &new_parsed->query);
+      spec, parsed.query, unescape_rules, &url_string,
+      &new_parsed->query);
 
   // Reference is stored in valid, unescaped UTF-8, so we can just convert.
   if (parsed.ref.is_valid()) {
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 2ab6101..d7f1de7 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -14,6 +14,7 @@
 #include <string>
 
 #include "base/basictypes.h"
+#include "net/base/escape.h"
 
 struct addrinfo;
 class FilePath;
@@ -184,23 +185,25 @@ void AppendFormattedHost(const GURL& url, const std::wstring& languages,
 // Creates a string representation of |url|. The IDN host name may
 // be in Unicode if |languages| accepts the Unicode representation.
 // If |omit_username_password| is true, the username and the password are
-// omitted. If |unescape| is true and the path part and the query part seem to
-// be encoded in %-encoded UTF-8, decodes %-encoding and UTF-8.
-// |new_parsed| will have parsing parameters of the resultant URL. |prefix_end|
-// will be the length before the hostname of the resultant URL. |new_parsed|
-// and |prefix_end| may be NULL.
+// omitted. |unescape_rules| defines how to clean the URL for human readability.
+// You will generally want |UnescapeRule::SPACES| for display to the user if you
+// can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the
+// query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and
+// UTF-8. |new_parsed| will have parsing parameters of the resultant URL.
+// |prefix_end| will be the length before the hostname of the resultant URL.
+// |new_parsed| and |prefix_end| may be NULL.
 std::wstring FormatUrl(const GURL& url,
                        const std::wstring& languages,
                        bool omit_username_password,
-                       bool unescape,
+                       UnescapeRule::Type unescape_rules,
                        url_parse::Parsed* new_parsed,
                        size_t* prefix_end);
 
 // Creates a string representation of |url| for display to the user.
 // This is a shorthand of the above function with omit_username_password=true,
-// unescape=true, new_parsed=NULL, and prefix_end=NULL.
+// unescape=SPACES, new_parsed=NULL, and prefix_end=NULL.
 inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) {
-  return FormatUrl(url, languages, true, true, NULL, NULL);
+  return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL);
 }
 
 }  // namespace net
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 5f9ed4e..29b8f702 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -7,6 +7,7 @@
 #include "base/string_util.h"
 #include "base/time.h"
 #include "googleurl/src/gurl.h"
+#include "net/base/escape.h"
 #include "net/base/net_util.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
@@ -342,7 +343,7 @@ struct UrlTestData {
   const char* input;
   const std::wstring languages;
   bool omit;
-  bool unescape;
+  UnescapeRule::Type escape_rules;
   const std::wstring output;
   size_t prefix_len;
 };
@@ -1021,81 +1022,90 @@ TEST(NetUtilTest, GetHostName) {
 
 TEST(NetUtilTest, FormatUrl) {
   const UrlTestData tests[] = {
-    {"Empty URL", "", L"", true, true, L"", 0},
+    {"Empty URL", "", L"", true, UnescapeRule::NORMAL, L"", 0},
 
     {"Simple URL",
-     "http://www.google.com/", L"", true, true,
+     "http://www.google.com/", L"", true, UnescapeRule::NORMAL,
      L"http://www.google.com/", 7},
 
     {"With a port number and a reference",
-     "http://www.google.com:8080/#\xE3\x82\xB0", L"", true, true,
+     "http://www.google.com:8080/#\xE3\x82\xB0", L"", true,
+     UnescapeRule::NORMAL,
      L"http://www.google.com:8080/#\x30B0", 7},
 
     // -------- IDN tests --------
     {"Japanese IDN with ja",
-     "http://xn--l8jvb1ey91xtjb.jp", L"ja", true, true,
+     "http://xn--l8jvb1ey91xtjb.jp", L"ja", true, UnescapeRule::NORMAL,
      L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
 
     {"Japanese IDN with en",
-     "http://xn--l8jvb1ey91xtjb.jp", L"en", true, true,
+     "http://xn--l8jvb1ey91xtjb.jp", L"en", true, UnescapeRule::NORMAL,
      L"http://xn--l8jvb1ey91xtjb.jp/", 7},
 
     {"Japanese IDN without any languages",
-     "http://xn--l8jvb1ey91xtjb.jp", L"", true, true,
+     "http://xn--l8jvb1ey91xtjb.jp", L"", true, UnescapeRule::NORMAL,
      // Single script is safe for empty languages.
      L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
 
     {"mailto: with Japanese IDN",
-     "mailto:foo@xn--l8jvb1ey91xtjb.jp", L"ja", true, true,
+     "mailto:foo@xn--l8jvb1ey91xtjb.jp", L"ja", true, UnescapeRule::NORMAL,
      // GURL doesn't assume an email address's domain part as a host name.
      L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
 
     {"file: with Japanese IDN",
-     "file://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true,
+     "file://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true,
+     UnescapeRule::NORMAL,
      L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
 
     {"ftp: with Japanese IDN",
-     "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true,
+     "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true,
+     UnescapeRule::NORMAL,
      L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
 
     // -------- omit_username_password flag tests --------
     {"With username and password, omit_username_password=false",
-     "http://user:passwd@example.com/foo", L"", false, true,
+     "http://user:passwd@example.com/foo", L"", false, UnescapeRule::NORMAL,
      L"http://user:passwd@example.com/foo", 19},
 
     {"With username and password, omit_username_password=true",
-     "http://user:passwd@example.com/foo", L"", true, true,
+     "http://user:passwd@example.com/foo", L"", true, UnescapeRule::NORMAL,
      L"http://example.com/foo", 7},
 
     {"With username and no password",
-     "http://user@example.com/foo", L"", true, true,
+     "http://user@example.com/foo", L"", true, UnescapeRule::NORMAL,
      L"http://example.com/foo", 7},
 
     {"Just '@' without username and password",
-     "http://@example.com/foo", L"", true, true,
+     "http://@example.com/foo", L"", true, UnescapeRule::NORMAL,
      L"http://example.com/foo", 7},
 
     // GURL doesn't think local-part of an email address is username for URL.
     {"mailto:, omit_username_password=true",
-     "mailto:foo@example.com", L"", true, true,
+     "mailto:foo@example.com", L"", true, UnescapeRule::NORMAL,
      L"mailto:foo@example.com", 7},
 
     // -------- unescape flag tests --------
-    {"unescape=false",
+    {"Do not unescape",
      "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
      "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
-     "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, false,
+     "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true,
+     UnescapeRule::NONE,
      // GURL parses %-encoded hostnames into Punycode.
      L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
      L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
 
-    {"unescape=true",
+    {"Unescape normally",
      "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
      "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
-     "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, true,
+     "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true,
+     UnescapeRule::NORMAL,
      L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
      L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
 
+    {"Unescape normally including unescape spaces",
+     "http://www.google.com/search?q=Hello%20World", L"en", true,
+     UnescapeRule::SPACES,
+     L"http://www.google.com/search?q=Hello World", 7}
     /*
     {"unescape=true with some special characters",
     "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", L"", false, true,
@@ -1108,7 +1118,7 @@ TEST(NetUtilTest, FormatUrl) {
     size_t prefix_len;
     std::wstring formatted = net::FormatUrl(
         GURL(tests[i].input), tests[i].languages, tests[i].omit,
-        tests[i].unescape, NULL, &prefix_len);
+        tests[i].escape_rules, NULL, &prefix_len);
     EXPECT_EQ(tests[i].output, formatted) << tests[i].description;
     EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
   }
@@ -1120,7 +1130,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
   std::wstring formatted = net::FormatUrl(
       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
-      L"ja", false, false, &parsed, NULL);
+      L"ja", false, UnescapeRule::NONE, &parsed, NULL);
   EXPECT_EQ(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
       L"/%E3%82%B0/?q=%E3%82%B0#\x30B0", formatted);
   EXPECT_EQ(L"%E3%82%B0",
@@ -1140,7 +1150,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
   formatted = net::FormatUrl(
       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
-      L"ja", false, true, &parsed, NULL);
+      L"ja", false, UnescapeRule::NORMAL, &parsed, NULL);
   EXPECT_EQ(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
       L"/\x30B0/?q=\x30B0#\x30B0", formatted);
   EXPECT_EQ(L"\x30B0",
@@ -1159,7 +1169,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
   formatted = net::FormatUrl(
       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
-      L"ja", true, true, &parsed, NULL);
+      L"ja", true, UnescapeRule::NORMAL, &parsed, NULL);
   EXPECT_EQ(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
       L"/\x30B0/?q=\x30B0#\x30B0", formatted);
   EXPECT_FALSE(parsed.username.is_valid());
author	mhm@chromium.org <mhm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-06-03 02:16:32 +0000
committer	mhm@chromium.org <mhm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-06-03 02:16:32 +0000
commit	a23de85783d944cbb75f09737eb29c60ea87481d (patch)
tree	30c4732b42443259134a3f83f574bfaa2a506292 /net
parent	1b5237ecc1fea39e51e1634acbcdf11bd7ef57b0 (diff)
download	chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.zip chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.tar.gz chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.tar.bz2