summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authormhm@chromium.org <mhm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-06-03 02:16:32 +0000
committermhm@chromium.org <mhm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-06-03 02:16:32 +0000
commita23de85783d944cbb75f09737eb29c60ea87481d (patch)
tree30c4732b42443259134a3f83f574bfaa2a506292 /net
parent1b5237ecc1fea39e51e1634acbcdf11bd7ef57b0 (diff)
downloadchromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.zip
chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.tar.gz
chromium_src-a23de85783d944cbb75f09737eb29c60ea87481d.tar.bz2
Local text file with spaces in filename is urlencoded in tab title
When viewing a local text file with spaces in filename, it is still urlencoded. Filename should be displayed with spaces, not with urlencoding. It would be more user-friendly. Since net::FormatURL is already implemented, using it would be great. But it doesn't escape SPACES, just NORMAL, it doesn't even escape unicode. I plumbed out a unescapeurl that could be used whether we allow conversion of spaces or not. BUG=8775 (http://crbug.com/8775) TEST=Tested whether the input is escaped in the navigational context and ran the net tests New Review: http://codereview.chromium.org/118059 Review URL: http://codereview.chromium.org/56053 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@17462 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r--net/base/escape.cc4
-rw-r--r--net/base/escape.h22
-rw-r--r--net/base/escape_unittest.cc2
-rw-r--r--net/base/net_util.cc39
-rw-r--r--net/base/net_util.h19
-rw-r--r--net/base/net_util_unittest.cc56
6 files changed, 86 insertions, 56 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 54174fc..f836afb6 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -107,6 +107,10 @@ const char kUrlUnescape[128] = {
std::string UnescapeURLImpl(const std::string& escaped_text,
UnescapeRule::Type rules) {
+ // Do not unescape anything, return the |escaped_text| text.
+ if (rules == UnescapeRule::NONE)
+ return escaped_text;
+
// The output of the unescaping is always smaller than the input, so we can
// reserve the input size to make sure we have enough buffer and don't have
// to allocate in the loop below.
diff --git a/net/base/escape.h b/net/base/escape.h
index 1502b56..17f8646 100644
--- a/net/base/escape.h
+++ b/net/base/escape.h
@@ -41,32 +41,36 @@ class UnescapeRule {
typedef uint32 Type;
enum {
+ // Don't unescape anything at all.
+ NONE = 0,
+
// Don't unescape anything special, but all normal unescaping will happen.
// This is a placeholder and can't be combined with other flags (since it's
- // just the absense of them). Things like escaped letters, digits, and most
- // symbols will get unescaped with this mode.
- NORMAL = 0,
+ // just the absence of them). All other unescape rules imply "normal" in
+ // addition to their special meaning. Things like escaped letters, digits,
+ // and most symbols will get unescaped with this mode.
+ NORMAL = 1,
// Convert %20 to spaces. In some places where we're showing URLs, we may
// want this. In places where the URL may be copied and pasted out, then
// you wouldn't want this since it might not be interpreted in one piece
// by other applications.
- SPACES = 1,
+ SPACES = 2,
// Unescapes various characters that will change the meaning of URLs,
- // including '%', '+', '&', '/', '#'. If we unescaped these charaters, the
+ // including '%', '+', '&', '/', '#'. If we unescaped these characters, the
// resulting URL won't be the same as the source one. This flag is used when
// generating final output like filenames for URLs where we won't be
// interpreting as a URL and want to do as much unescaping as possible.
- URL_SPECIAL_CHARS = 2,
+ URL_SPECIAL_CHARS = 4,
- // Unescapes control characters such as %01. This INCLUDES NULLs!. This is
+ // Unescapes control characters such as %01. This INCLUDES NULLs. This is
// used for rare cases such as data: URL decoding where the result is binary
// data. You should not use this for normal URLs!
- CONTROL_CHARS = 4,
+ CONTROL_CHARS = 8,
// URL queries use "+" for space. This flag controls that replacement.
- REPLACE_PLUS_WITH_SPACE = 8,
+ REPLACE_PLUS_WITH_SPACE = 16,
};
};
diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc
index c1bcbd3..8c31d41 100644
--- a/net/base/escape_unittest.cc
+++ b/net/base/escape_unittest.cc
@@ -114,6 +114,8 @@ TEST(Escape, UnescapeURLComponent) {
{"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
{"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
{"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
+ {"Some%20random text %25%3bOK", UnescapeRule::NONE,
+ "Some%20random text %25%3bOK"},
{"Some%20random text %25%3bOK", UnescapeRule::NORMAL,
"Some%20random text %25;OK"},
{"Some%20random text %25%3bOK", UnescapeRule::SPACES,
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index bfc2cea..3139acb 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -656,11 +656,11 @@ void IDNToUnicodeOneComponent(const char16* comp,
namespace net {
// Appends the substring |in_component| inside of the URL |spec| to |output|,
-// and the resulting range will be filled into |out_component|. Calls the
-// unescaper for the substring if |unescape| is true.
+// and the resulting range will be filled into |out_component|. |unescape_rules|
+// defines how to clean the URL for human readability.
static void AppendFormattedComponent(const std::string& spec,
const url_parse::Component& in_component,
- bool unescape,
+ UnescapeRule::Type unescape_rules,
std::wstring* output,
url_parse::Component* out_component);
@@ -1084,18 +1084,18 @@ void AppendFormattedHost(const GURL& url,
/* static */
void AppendFormattedComponent(const std::string& spec,
const url_parse::Component& in_component,
- bool unescape,
+ UnescapeRule::Type unescape_rules,
std::wstring* output,
url_parse::Component* out_component) {
if (in_component.is_nonempty()) {
out_component->begin = static_cast<int>(output->length());
- if (unescape) {
- output->append(UnescapeAndDecodeUTF8URLComponent(
- spec.substr(in_component.begin, in_component.len),
- UnescapeRule::NORMAL));
- } else {
+ if (unescape_rules == UnescapeRule::NONE) {
output->append(UTF8ToWide(spec.substr(
in_component.begin, in_component.len)));
+ } else {
+ output->append(UnescapeAndDecodeUTF8URLComponent(
+ spec.substr(in_component.begin, in_component.len),
+ unescape_rules));
}
out_component->len =
static_cast<int>(output->length()) - out_component->begin;
@@ -1104,9 +1104,12 @@ void AppendFormattedComponent(const std::string& spec,
}
}
-std::wstring FormatUrl(
- const GURL& url, const std::wstring& languages, bool omit_username_password,
- bool unescape, url_parse::Parsed* new_parsed, size_t* prefix_end) {
+std::wstring FormatUrl(const GURL& url,
+ const std::wstring& languages,
+ bool omit_username_password,
+ UnescapeRule::Type unescape_rules,
+ url_parse::Parsed* new_parsed,
+ size_t* prefix_end) {
url_parse::Parsed parsed_temp;
if (!new_parsed)
new_parsed = &parsed_temp;
@@ -1140,12 +1143,14 @@ std::wstring FormatUrl(
new_parsed->password.reset();
} else {
AppendFormattedComponent(
- spec, parsed.username, unescape, &url_string, &new_parsed->username);
+ spec, parsed.username, unescape_rules,
+ &url_string, &new_parsed->username);
if (parsed.password.is_valid()) {
url_string.push_back(':');
}
AppendFormattedComponent(
- spec, parsed.password, unescape, &url_string, &new_parsed->password);
+ spec, parsed.password, unescape_rules,
+ &url_string, &new_parsed->password);
if (parsed.username.is_valid() || parsed.password.is_valid()) {
url_string.push_back('@');
}
@@ -1169,11 +1174,13 @@ std::wstring FormatUrl(
// Path and query both get the same general unescape & convert treatment.
AppendFormattedComponent(
- spec, parsed.path, unescape, &url_string, &new_parsed->path);
+ spec, parsed.path, unescape_rules, &url_string,
+ &new_parsed->path);
if (parsed.query.is_valid())
url_string.push_back('?');
AppendFormattedComponent(
- spec, parsed.query, unescape, &url_string, &new_parsed->query);
+ spec, parsed.query, unescape_rules, &url_string,
+ &new_parsed->query);
// Reference is stored in valid, unescaped UTF-8, so we can just convert.
if (parsed.ref.is_valid()) {
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 2ab6101..d7f1de7 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -14,6 +14,7 @@
#include <string>
#include "base/basictypes.h"
+#include "net/base/escape.h"
struct addrinfo;
class FilePath;
@@ -184,23 +185,25 @@ void AppendFormattedHost(const GURL& url, const std::wstring& languages,
// Creates a string representation of |url|. The IDN host name may
// be in Unicode if |languages| accepts the Unicode representation.
// If |omit_username_password| is true, the username and the password are
-// omitted. If |unescape| is true and the path part and the query part seem to
-// be encoded in %-encoded UTF-8, decodes %-encoding and UTF-8.
-// |new_parsed| will have parsing parameters of the resultant URL. |prefix_end|
-// will be the length before the hostname of the resultant URL. |new_parsed|
-// and |prefix_end| may be NULL.
+// omitted. |unescape_rules| defines how to clean the URL for human readability.
+// You will generally want |UnescapeRule::SPACES| for display to the user if you
+// can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the
+// query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and
+// UTF-8. |new_parsed| will have parsing parameters of the resultant URL.
+// |prefix_end| will be the length before the hostname of the resultant URL.
+// |new_parsed| and |prefix_end| may be NULL.
std::wstring FormatUrl(const GURL& url,
const std::wstring& languages,
bool omit_username_password,
- bool unescape,
+ UnescapeRule::Type unescape_rules,
url_parse::Parsed* new_parsed,
size_t* prefix_end);
// Creates a string representation of |url| for display to the user.
// This is a shorthand of the above function with omit_username_password=true,
-// unescape=true, new_parsed=NULL, and prefix_end=NULL.
+// unescape=SPACES, new_parsed=NULL, and prefix_end=NULL.
inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) {
- return FormatUrl(url, languages, true, true, NULL, NULL);
+ return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL);
}
} // namespace net
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 5f9ed4e..29b8f702 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -7,6 +7,7 @@
#include "base/string_util.h"
#include "base/time.h"
#include "googleurl/src/gurl.h"
+#include "net/base/escape.h"
#include "net/base/net_util.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -342,7 +343,7 @@ struct UrlTestData {
const char* input;
const std::wstring languages;
bool omit;
- bool unescape;
+ UnescapeRule::Type escape_rules;
const std::wstring output;
size_t prefix_len;
};
@@ -1021,81 +1022,90 @@ TEST(NetUtilTest, GetHostName) {
TEST(NetUtilTest, FormatUrl) {
const UrlTestData tests[] = {
- {"Empty URL", "", L"", true, true, L"", 0},
+ {"Empty URL", "", L"", true, UnescapeRule::NORMAL, L"", 0},
{"Simple URL",
- "http://www.google.com/", L"", true, true,
+ "http://www.google.com/", L"", true, UnescapeRule::NORMAL,
L"http://www.google.com/", 7},
{"With a port number and a reference",
- "http://www.google.com:8080/#\xE3\x82\xB0", L"", true, true,
+ "http://www.google.com:8080/#\xE3\x82\xB0", L"", true,
+ UnescapeRule::NORMAL,
L"http://www.google.com:8080/#\x30B0", 7},
// -------- IDN tests --------
{"Japanese IDN with ja",
- "http://xn--l8jvb1ey91xtjb.jp", L"ja", true, true,
+ "http://xn--l8jvb1ey91xtjb.jp", L"ja", true, UnescapeRule::NORMAL,
L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
{"Japanese IDN with en",
- "http://xn--l8jvb1ey91xtjb.jp", L"en", true, true,
+ "http://xn--l8jvb1ey91xtjb.jp", L"en", true, UnescapeRule::NORMAL,
L"http://xn--l8jvb1ey91xtjb.jp/", 7},
{"Japanese IDN without any languages",
- "http://xn--l8jvb1ey91xtjb.jp", L"", true, true,
+ "http://xn--l8jvb1ey91xtjb.jp", L"", true, UnescapeRule::NORMAL,
// Single script is safe for empty languages.
L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
{"mailto: with Japanese IDN",
- "mailto:foo@xn--l8jvb1ey91xtjb.jp", L"ja", true, true,
+ "mailto:foo@xn--l8jvb1ey91xtjb.jp", L"ja", true, UnescapeRule::NORMAL,
// GURL doesn't assume an email address's domain part as a host name.
L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
{"file: with Japanese IDN",
- "file://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true,
+ "file://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true,
+ UnescapeRule::NORMAL,
L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
{"ftp: with Japanese IDN",
- "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true,
+ "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true,
+ UnescapeRule::NORMAL,
L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
// -------- omit_username_password flag tests --------
{"With username and password, omit_username_password=false",
- "http://user:passwd@example.com/foo", L"", false, true,
+ "http://user:passwd@example.com/foo", L"", false, UnescapeRule::NORMAL,
L"http://user:passwd@example.com/foo", 19},
{"With username and password, omit_username_password=true",
- "http://user:passwd@example.com/foo", L"", true, true,
+ "http://user:passwd@example.com/foo", L"", true, UnescapeRule::NORMAL,
L"http://example.com/foo", 7},
{"With username and no password",
- "http://user@example.com/foo", L"", true, true,
+ "http://user@example.com/foo", L"", true, UnescapeRule::NORMAL,
L"http://example.com/foo", 7},
{"Just '@' without username and password",
- "http://@example.com/foo", L"", true, true,
+ "http://@example.com/foo", L"", true, UnescapeRule::NORMAL,
L"http://example.com/foo", 7},
// GURL doesn't think local-part of an email address is username for URL.
{"mailto:, omit_username_password=true",
- "mailto:foo@example.com", L"", true, true,
+ "mailto:foo@example.com", L"", true, UnescapeRule::NORMAL,
L"mailto:foo@example.com", 7},
// -------- unescape flag tests --------
- {"unescape=false",
+ {"Do not unescape",
"http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
"%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
- "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, false,
+ "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true,
+ UnescapeRule::NONE,
// GURL parses %-encoded hostnames into Punycode.
L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
- {"unescape=true",
+ {"Unescape normally",
"http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
"%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
- "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, true,
+ "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true,
+ UnescapeRule::NORMAL,
L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
+ {"Unescape normally including unescape spaces",
+ "http://www.google.com/search?q=Hello%20World", L"en", true,
+ UnescapeRule::SPACES,
+ L"http://www.google.com/search?q=Hello World", 7}
/*
{"unescape=true with some special characters",
"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", L"", false, true,
@@ -1108,7 +1118,7 @@ TEST(NetUtilTest, FormatUrl) {
size_t prefix_len;
std::wstring formatted = net::FormatUrl(
GURL(tests[i].input), tests[i].languages, tests[i].omit,
- tests[i].unescape, NULL, &prefix_len);
+ tests[i].escape_rules, NULL, &prefix_len);
EXPECT_EQ(tests[i].output, formatted) << tests[i].description;
EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
}
@@ -1120,7 +1130,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
std::wstring formatted = net::FormatUrl(
GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
"%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- L"ja", false, false, &parsed, NULL);
+ L"ja", false, UnescapeRule::NONE, &parsed, NULL);
EXPECT_EQ(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
L"/%E3%82%B0/?q=%E3%82%B0#\x30B0", formatted);
EXPECT_EQ(L"%E3%82%B0",
@@ -1140,7 +1150,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
formatted = net::FormatUrl(
GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
"%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- L"ja", false, true, &parsed, NULL);
+ L"ja", false, UnescapeRule::NORMAL, &parsed, NULL);
EXPECT_EQ(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
L"/\x30B0/?q=\x30B0#\x30B0", formatted);
EXPECT_EQ(L"\x30B0",
@@ -1159,7 +1169,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
formatted = net::FormatUrl(
GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
"%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
- L"ja", true, true, &parsed, NULL);
+ L"ja", true, UnescapeRule::NORMAL, &parsed, NULL);
EXPECT_EQ(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
L"/\x30B0/?q=\x30B0#\x30B0", formatted);
EXPECT_FALSE(parsed.username.is_valid());