diff options
author | mhm@chromium.org <mhm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-06-03 02:16:32 +0000 |
---|---|---|
committer | mhm@chromium.org <mhm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-06-03 02:16:32 +0000 |
commit | a23de85783d944cbb75f09737eb29c60ea87481d (patch) | |
tree | 30c4732b42443259134a3f83f574bfaa2a506292 /net | |
parent | 1b5237ecc1fea39e51e1634acbcdf11bd7ef57b0 (diff) | |
download | chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.zip chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.tar.gz chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.tar.bz2 |
Local text file with spaces in filename is urlencoded in tab title
When viewing a local text file with spaces in filename, it is still urlencoded. Filename should be displayed with spaces, not with urlencoding. It would be more user-friendly.
Since net::FormatURL is already implemented, using it would be great. But it doesn't escape SPACES, just NORMAL, it doesn't even escape unicode. I plumbed out a unescapeurl that could be used whether we allow conversion of spaces or not.
BUG=8775 (http://crbug.com/8775)
TEST=Tested whether the input is escaped in the navigational context and ran the net tests
New Review: http://codereview.chromium.org/118059
Review URL: http://codereview.chromium.org/56053
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@17462 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/base/escape.cc | 4 | ||||
-rw-r--r-- | net/base/escape.h | 22 | ||||
-rw-r--r-- | net/base/escape_unittest.cc | 2 | ||||
-rw-r--r-- | net/base/net_util.cc | 39 | ||||
-rw-r--r-- | net/base/net_util.h | 19 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 56 |
6 files changed, 86 insertions, 56 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc index 54174fc..f836afb6 100644 --- a/net/base/escape.cc +++ b/net/base/escape.cc @@ -107,6 +107,10 @@ const char kUrlUnescape[128] = { std::string UnescapeURLImpl(const std::string& escaped_text, UnescapeRule::Type rules) { + // Do not unescape anything, return the |escaped_text| text. + if (rules == UnescapeRule::NONE) + return escaped_text; + // The output of the unescaping is always smaller than the input, so we can // reserve the input size to make sure we have enough buffer and don't have // to allocate in the loop below. diff --git a/net/base/escape.h b/net/base/escape.h index 1502b56..17f8646 100644 --- a/net/base/escape.h +++ b/net/base/escape.h @@ -41,32 +41,36 @@ class UnescapeRule { typedef uint32 Type; enum { + // Don't unescape anything at all. + NONE = 0, + // Don't unescape anything special, but all normal unescaping will happen. // This is a placeholder and can't be combined with other flags (since it's - // just the absense of them). Things like escaped letters, digits, and most - // symbols will get unescaped with this mode. - NORMAL = 0, + // just the absence of them). All other unescape rules imply "normal" in + // addition to their special meaning. Things like escaped letters, digits, + // and most symbols will get unescaped with this mode. + NORMAL = 1, // Convert %20 to spaces. In some places where we're showing URLs, we may // want this. In places where the URL may be copied and pasted out, then // you wouldn't want this since it might not be interpreted in one piece // by other applications. - SPACES = 1, + SPACES = 2, // Unescapes various characters that will change the meaning of URLs, - // including '%', '+', '&', '/', '#'. If we unescaped these charaters, the + // including '%', '+', '&', '/', '#'. If we unescaped these characters, the // resulting URL won't be the same as the source one. This flag is used when // generating final output like filenames for URLs where we won't be // interpreting as a URL and want to do as much unescaping as possible. - URL_SPECIAL_CHARS = 2, + URL_SPECIAL_CHARS = 4, - // Unescapes control characters such as %01. This INCLUDES NULLs!. This is + // Unescapes control characters such as %01. This INCLUDES NULLs. This is // used for rare cases such as data: URL decoding where the result is binary // data. You should not use this for normal URLs! - CONTROL_CHARS = 4, + CONTROL_CHARS = 8, // URL queries use "+" for space. This flag controls that replacement. - REPLACE_PLUS_WITH_SPACE = 8, + REPLACE_PLUS_WITH_SPACE = 16, }; }; diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc index c1bcbd3..8c31d41 100644 --- a/net/base/escape_unittest.cc +++ b/net/base/escape_unittest.cc @@ -114,6 +114,8 @@ TEST(Escape, UnescapeURLComponent) { {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"}, {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"}, {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"}, + {"Some%20random text %25%3bOK", UnescapeRule::NONE, + "Some%20random text %25%3bOK"}, {"Some%20random text %25%3bOK", UnescapeRule::NORMAL, "Some%20random text %25;OK"}, {"Some%20random text %25%3bOK", UnescapeRule::SPACES, diff --git a/net/base/net_util.cc b/net/base/net_util.cc index bfc2cea..3139acb 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -656,11 +656,11 @@ void IDNToUnicodeOneComponent(const char16* comp, namespace net { // Appends the substring |in_component| inside of the URL |spec| to |output|, -// and the resulting range will be filled into |out_component|. Calls the -// unescaper for the substring if |unescape| is true. +// and the resulting range will be filled into |out_component|. |unescape_rules| +// defines how to clean the URL for human readability. static void AppendFormattedComponent(const std::string& spec, const url_parse::Component& in_component, - bool unescape, + UnescapeRule::Type unescape_rules, std::wstring* output, url_parse::Component* out_component); @@ -1084,18 +1084,18 @@ void AppendFormattedHost(const GURL& url, /* static */ void AppendFormattedComponent(const std::string& spec, const url_parse::Component& in_component, - bool unescape, + UnescapeRule::Type unescape_rules, std::wstring* output, url_parse::Component* out_component) { if (in_component.is_nonempty()) { out_component->begin = static_cast<int>(output->length()); - if (unescape) { - output->append(UnescapeAndDecodeUTF8URLComponent( - spec.substr(in_component.begin, in_component.len), - UnescapeRule::NORMAL)); - } else { + if (unescape_rules == UnescapeRule::NONE) { output->append(UTF8ToWide(spec.substr( in_component.begin, in_component.len))); + } else { + output->append(UnescapeAndDecodeUTF8URLComponent( + spec.substr(in_component.begin, in_component.len), + unescape_rules)); } out_component->len = static_cast<int>(output->length()) - out_component->begin; @@ -1104,9 +1104,12 @@ void AppendFormattedComponent(const std::string& spec, } } -std::wstring FormatUrl( - const GURL& url, const std::wstring& languages, bool omit_username_password, - bool unescape, url_parse::Parsed* new_parsed, size_t* prefix_end) { +std::wstring FormatUrl(const GURL& url, + const std::wstring& languages, + bool omit_username_password, + UnescapeRule::Type unescape_rules, + url_parse::Parsed* new_parsed, + size_t* prefix_end) { url_parse::Parsed parsed_temp; if (!new_parsed) new_parsed = &parsed_temp; @@ -1140,12 +1143,14 @@ std::wstring FormatUrl( new_parsed->password.reset(); } else { AppendFormattedComponent( - spec, parsed.username, unescape, &url_string, &new_parsed->username); + spec, parsed.username, unescape_rules, + &url_string, &new_parsed->username); if (parsed.password.is_valid()) { url_string.push_back(':'); } AppendFormattedComponent( - spec, parsed.password, unescape, &url_string, &new_parsed->password); + spec, parsed.password, unescape_rules, + &url_string, &new_parsed->password); if (parsed.username.is_valid() || parsed.password.is_valid()) { url_string.push_back('@'); } @@ -1169,11 +1174,13 @@ std::wstring FormatUrl( // Path and query both get the same general unescape & convert treatment. AppendFormattedComponent( - spec, parsed.path, unescape, &url_string, &new_parsed->path); + spec, parsed.path, unescape_rules, &url_string, + &new_parsed->path); if (parsed.query.is_valid()) url_string.push_back('?'); AppendFormattedComponent( - spec, parsed.query, unescape, &url_string, &new_parsed->query); + spec, parsed.query, unescape_rules, &url_string, + &new_parsed->query); // Reference is stored in valid, unescaped UTF-8, so we can just convert. if (parsed.ref.is_valid()) { diff --git a/net/base/net_util.h b/net/base/net_util.h index 2ab6101..d7f1de7 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -14,6 +14,7 @@ #include <string> #include "base/basictypes.h" +#include "net/base/escape.h" struct addrinfo; class FilePath; @@ -184,23 +185,25 @@ void AppendFormattedHost(const GURL& url, const std::wstring& languages, // Creates a string representation of |url|. The IDN host name may // be in Unicode if |languages| accepts the Unicode representation. // If |omit_username_password| is true, the username and the password are -// omitted. If |unescape| is true and the path part and the query part seem to -// be encoded in %-encoded UTF-8, decodes %-encoding and UTF-8. -// |new_parsed| will have parsing parameters of the resultant URL. |prefix_end| -// will be the length before the hostname of the resultant URL. |new_parsed| -// and |prefix_end| may be NULL. +// omitted. |unescape_rules| defines how to clean the URL for human readability. +// You will generally want |UnescapeRule::SPACES| for display to the user if you +// can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the +// query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and +// UTF-8. |new_parsed| will have parsing parameters of the resultant URL. +// |prefix_end| will be the length before the hostname of the resultant URL. +// |new_parsed| and |prefix_end| may be NULL. std::wstring FormatUrl(const GURL& url, const std::wstring& languages, bool omit_username_password, - bool unescape, + UnescapeRule::Type unescape_rules, url_parse::Parsed* new_parsed, size_t* prefix_end); // Creates a string representation of |url| for display to the user. // This is a shorthand of the above function with omit_username_password=true, -// unescape=true, new_parsed=NULL, and prefix_end=NULL. +// unescape=SPACES, new_parsed=NULL, and prefix_end=NULL. inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) { - return FormatUrl(url, languages, true, true, NULL, NULL); + return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL); } } // namespace net diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 5f9ed4e..29b8f702 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -7,6 +7,7 @@ #include "base/string_util.h" #include "base/time.h" #include "googleurl/src/gurl.h" +#include "net/base/escape.h" #include "net/base/net_util.h" #include "testing/gtest/include/gtest/gtest.h" @@ -342,7 +343,7 @@ struct UrlTestData { const char* input; const std::wstring languages; bool omit; - bool unescape; + UnescapeRule::Type escape_rules; const std::wstring output; size_t prefix_len; }; @@ -1021,81 +1022,90 @@ TEST(NetUtilTest, GetHostName) { TEST(NetUtilTest, FormatUrl) { const UrlTestData tests[] = { - {"Empty URL", "", L"", true, true, L"", 0}, + {"Empty URL", "", L"", true, UnescapeRule::NORMAL, L"", 0}, {"Simple URL", - "http://www.google.com/", L"", true, true, + "http://www.google.com/", L"", true, UnescapeRule::NORMAL, L"http://www.google.com/", 7}, {"With a port number and a reference", - "http://www.google.com:8080/#\xE3\x82\xB0", L"", true, true, + "http://www.google.com:8080/#\xE3\x82\xB0", L"", true, + UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, // -------- IDN tests -------- {"Japanese IDN with ja", - "http://xn--l8jvb1ey91xtjb.jp", L"ja", true, true, + "http://xn--l8jvb1ey91xtjb.jp", L"ja", true, UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, {"Japanese IDN with en", - "http://xn--l8jvb1ey91xtjb.jp", L"en", true, true, + "http://xn--l8jvb1ey91xtjb.jp", L"en", true, UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7}, {"Japanese IDN without any languages", - "http://xn--l8jvb1ey91xtjb.jp", L"", true, true, + "http://xn--l8jvb1ey91xtjb.jp", L"", true, UnescapeRule::NORMAL, // Single script is safe for empty languages. L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, {"mailto: with Japanese IDN", - "mailto:foo@xn--l8jvb1ey91xtjb.jp", L"ja", true, true, + "mailto:foo@xn--l8jvb1ey91xtjb.jp", L"ja", true, UnescapeRule::NORMAL, // GURL doesn't assume an email address's domain part as a host name. L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, {"file: with Japanese IDN", - "file://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true, + "file://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, + UnescapeRule::NORMAL, L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, {"ftp: with Japanese IDN", - "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true, + "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, + UnescapeRule::NORMAL, L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, // -------- omit_username_password flag tests -------- {"With username and password, omit_username_password=false", - "http://user:passwd@example.com/foo", L"", false, true, + "http://user:passwd@example.com/foo", L"", false, UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, {"With username and password, omit_username_password=true", - "http://user:passwd@example.com/foo", L"", true, true, + "http://user:passwd@example.com/foo", L"", true, UnescapeRule::NORMAL, L"http://example.com/foo", 7}, {"With username and no password", - "http://user@example.com/foo", L"", true, true, + "http://user@example.com/foo", L"", true, UnescapeRule::NORMAL, L"http://example.com/foo", 7}, {"Just '@' without username and password", - "http://@example.com/foo", L"", true, true, + "http://@example.com/foo", L"", true, UnescapeRule::NORMAL, L"http://example.com/foo", 7}, // GURL doesn't think local-part of an email address is username for URL. {"mailto:, omit_username_password=true", - "mailto:foo@example.com", L"", true, true, + "mailto:foo@example.com", L"", true, UnescapeRule::NORMAL, L"mailto:foo@example.com", 7}, // -------- unescape flag tests -------- - {"unescape=false", + {"Do not unescape", "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" - "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, false, + "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, + UnescapeRule::NONE, // GURL parses %-encoded hostnames into Punycode. L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7}, - {"unescape=true", + {"Unescape normally", "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" - "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, true, + "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, + UnescapeRule::NORMAL, L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" L"?q=\x30B0\x30FC\x30B0\x30EB", 7}, + {"Unescape normally including unescape spaces", + "http://www.google.com/search?q=Hello%20World", L"en", true, + UnescapeRule::SPACES, + L"http://www.google.com/search?q=Hello World", 7} /* {"unescape=true with some special characters", "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", L"", false, true, @@ -1108,7 +1118,7 @@ TEST(NetUtilTest, FormatUrl) { size_t prefix_len; std::wstring formatted = net::FormatUrl( GURL(tests[i].input), tests[i].languages, tests[i].omit, - tests[i].unescape, NULL, &prefix_len); + tests[i].escape_rules, NULL, &prefix_len); EXPECT_EQ(tests[i].output, formatted) << tests[i].description; EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; } @@ -1120,7 +1130,7 @@ TEST(NetUtilTest, FormatUrlParsed) { std::wstring formatted = net::FormatUrl( GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - L"ja", false, false, &parsed, NULL); + L"ja", false, UnescapeRule::NONE, &parsed, NULL); EXPECT_EQ(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" L"/%E3%82%B0/?q=%E3%82%B0#\x30B0", formatted); EXPECT_EQ(L"%E3%82%B0", @@ -1140,7 +1150,7 @@ TEST(NetUtilTest, FormatUrlParsed) { formatted = net::FormatUrl( GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - L"ja", false, true, &parsed, NULL); + L"ja", false, UnescapeRule::NORMAL, &parsed, NULL); EXPECT_EQ(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" L"/\x30B0/?q=\x30B0#\x30B0", formatted); EXPECT_EQ(L"\x30B0", @@ -1159,7 +1169,7 @@ TEST(NetUtilTest, FormatUrlParsed) { formatted = net::FormatUrl( GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), - L"ja", true, true, &parsed, NULL); + L"ja", true, UnescapeRule::NORMAL, &parsed, NULL); EXPECT_EQ(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" L"/\x30B0/?q=\x30B0#\x30B0", formatted); EXPECT_FALSE(parsed.username.is_valid()); |