summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorbrettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-05-22 18:15:24 +0000
committerbrettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-05-22 18:15:24 +0000
commitf9fe8630a0ceba09f1bfcc4af7a52048be0d133c (patch)
treec3a75daafa2d78e70e17bf24fb91502e8f94171c /net
parent03ce2f5bf335b39ad24306a3a962823e46305cc4 (diff)
downloadchromium_src-f9fe8630a0ceba09f1bfcc4af7a52048be0d133c.zip
chromium_src-f9fe8630a0ceba09f1bfcc4af7a52048be0d133c.tar.gz
chromium_src-f9fe8630a0ceba09f1bfcc4af7a52048be0d133c.tar.bz2
Shows Unicode IDN instead of Punycode in the followings:
- Bookmark Manager - Edit Bookmark dialog opened by Bookmark Manager - Edit Bookmark dialog opened by the star on the left of the address bar Introduces new function, net::FormatUrl(), which has the following parameters in addition to gfx::GetCleanStringFromUrl(). - bool omit_username_password - bool unescape and moves gfx::GetClienStringFromUrl() to net:: namespace, and removed the last two parameters. BUG=3991 Checked in for tkent Original review = http://codereview.chromium.org/115346 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@16761 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r--net/base/net_util.cc144
-rw-r--r--net/base/net_util.h33
-rw-r--r--net/base/net_util_unittest.cc164
3 files changed, 341 insertions, 0 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 06b43be..bfc2cea 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -655,6 +655,15 @@ void IDNToUnicodeOneComponent(const char16* comp,
namespace net {
+// Appends the substring |in_component| inside of the URL |spec| to |output|,
+// and the resulting range will be filled into |out_component|. Calls the
+// unescaper for the substring if |unescape| is true.
+static void AppendFormattedComponent(const std::string& spec,
+ const url_parse::Component& in_component,
+ bool unescape,
+ std::wstring* output,
+ url_parse::Component* out_component);
+
GURL FilePathToFileURL(const FilePath& path) {
// Produce a URL like "file:///C:/foo" for a regular file, or
// "file://///server/path" for UNC. The URL canonicalizer will fix up the
@@ -1045,4 +1054,139 @@ std::string GetHostName() {
return std::string(buffer);
}
+void AppendFormattedHost(const GURL& url,
+ const std::wstring& languages,
+ std::wstring* output,
+ url_parse::Parsed* new_parsed) {
+ const url_parse::Component& host =
+ url.parsed_for_possibly_invalid_spec().host;
+
+ if (host.is_nonempty()) {
+ // Handle possible IDN in the host name.
+ if (new_parsed)
+ new_parsed->host.begin = static_cast<int>(output->length());
+
+ const std::string& spec = url.possibly_invalid_spec();
+ DCHECK(host.begin >= 0 &&
+ ((spec.length() == 0 && host.begin == 0) ||
+ host.begin < static_cast<int>(spec.length())));
+ net::IDNToUnicode(&spec[host.begin], host.len, languages, output);
+
+ if (new_parsed) {
+ new_parsed->host.len =
+ static_cast<int>(output->length()) - new_parsed->host.begin;
+ }
+ } else if (new_parsed) {
+ new_parsed->host.reset();
+ }
+}
+
+/* static */
+void AppendFormattedComponent(const std::string& spec,
+ const url_parse::Component& in_component,
+ bool unescape,
+ std::wstring* output,
+ url_parse::Component* out_component) {
+ if (in_component.is_nonempty()) {
+ out_component->begin = static_cast<int>(output->length());
+ if (unescape) {
+ output->append(UnescapeAndDecodeUTF8URLComponent(
+ spec.substr(in_component.begin, in_component.len),
+ UnescapeRule::NORMAL));
+ } else {
+ output->append(UTF8ToWide(spec.substr(
+ in_component.begin, in_component.len)));
+ }
+ out_component->len =
+ static_cast<int>(output->length()) - out_component->begin;
+ } else {
+ out_component->reset();
+ }
+}
+
+std::wstring FormatUrl(
+ const GURL& url, const std::wstring& languages, bool omit_username_password,
+ bool unescape, url_parse::Parsed* new_parsed, size_t* prefix_end) {
+ url_parse::Parsed parsed_temp;
+ if (!new_parsed)
+ new_parsed = &parsed_temp;
+
+ std::wstring url_string;
+
+ // Check for empty URLs or 0 available text width.
+ if (url.is_empty()) {
+ if (prefix_end)
+ *prefix_end = 0;
+ return url_string;
+ }
+
+ // We handle both valid and invalid URLs (this will give us the spec
+ // regardless of validity).
+ const std::string& spec = url.possibly_invalid_spec();
+ const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
+
+ // Copy everything before the username (the scheme and the separators.)
+ // These are ASCII.
+ int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true);
+ for (int i = 0; i < pre_end; ++i)
+ url_string.push_back(spec[i]);
+ new_parsed->scheme = parsed.scheme;
+
+ if (omit_username_password) {
+ // Remove the username and password fields. We don't want to display those
+ // to the user since they can be used for attacks,
+ // e.g. "http://google.com:search@evil.ru/"
+ new_parsed->username.reset();
+ new_parsed->password.reset();
+ } else {
+ AppendFormattedComponent(
+ spec, parsed.username, unescape, &url_string, &new_parsed->username);
+ if (parsed.password.is_valid()) {
+ url_string.push_back(':');
+ }
+ AppendFormattedComponent(
+ spec, parsed.password, unescape, &url_string, &new_parsed->password);
+ if (parsed.username.is_valid() || parsed.password.is_valid()) {
+ url_string.push_back('@');
+ }
+ }
+ if (prefix_end)
+ *prefix_end = static_cast<size_t>(url_string.length());
+
+ AppendFormattedHost(url, languages, &url_string, new_parsed);
+
+ // Port.
+ if (parsed.port.is_nonempty()) {
+ url_string.push_back(':');
+ int begin = url_string.length();
+ for (int i = parsed.port.begin; i < parsed.port.end(); ++i)
+ url_string.push_back(spec[i]);
+ new_parsed->port.begin = begin;
+ new_parsed->port.len = url_string.length() - begin;
+ } else {
+ new_parsed->port.reset();
+ }
+
+ // Path and query both get the same general unescape & convert treatment.
+ AppendFormattedComponent(
+ spec, parsed.path, unescape, &url_string, &new_parsed->path);
+ if (parsed.query.is_valid())
+ url_string.push_back('?');
+ AppendFormattedComponent(
+ spec, parsed.query, unescape, &url_string, &new_parsed->query);
+
+ // Reference is stored in valid, unescaped UTF-8, so we can just convert.
+ if (parsed.ref.is_valid()) {
+ url_string.push_back('#');
+ int begin = url_string.length();
+ if (parsed.ref.len > 0)
+ url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin],
+ parsed.ref.len)));
+ new_parsed->ref.begin = begin;
+ new_parsed->ref.len = url_string.length() - begin;
+ }
+
+ return url_string;
+}
+
} // namespace net
diff --git a/net/base/net_util.h b/net/base/net_util.h
index e64cb88..2ab6101 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -23,6 +23,10 @@ namespace base {
class Time;
}
+namespace url_parse {
+struct Parsed;
+}
+
namespace net {
// Given the full path to a file name, creates a file: URL. The returned URL
@@ -170,6 +174,35 @@ bool IsPortAllowedByFtp(int port);
// Set socket to non-blocking mode
int SetNonBlocking(int fd);
+// Appends the given part of the original URL to the output string formatted for
+// the user. The given parsed structure will be updated. The host name formatter
+// also takes the same accept languages component as ElideURL. |new_parsed| may
+// be null.
+void AppendFormattedHost(const GURL& url, const std::wstring& languages,
+ std::wstring* output, url_parse::Parsed* new_parsed);
+
+// Creates a string representation of |url|. The IDN host name may
+// be in Unicode if |languages| accepts the Unicode representation.
+// If |omit_username_password| is true, the username and the password are
+// omitted. If |unescape| is true and the path part and the query part seem to
+// be encoded in %-encoded UTF-8, decodes %-encoding and UTF-8.
+// |new_parsed| will have parsing parameters of the resultant URL. |prefix_end|
+// will be the length before the hostname of the resultant URL. |new_parsed|
+// and |prefix_end| may be NULL.
+std::wstring FormatUrl(const GURL& url,
+ const std::wstring& languages,
+ bool omit_username_password,
+ bool unescape,
+ url_parse::Parsed* new_parsed,
+ size_t* prefix_end);
+
+// Creates a string representation of |url| for display to the user.
+// This is a shorthand of the above function with omit_username_password=true,
+// unescape=true, new_parsed=NULL, and prefix_end=NULL.
+inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) {
+ return FormatUrl(url, languages, true, true, NULL, NULL);
+}
+
} // namespace net
#endif // NET_BASE_NET_UTIL_H__
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 79c1138..5f9ed4e 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -337,6 +337,16 @@ struct SuggestedFilenameCase {
const wchar_t* expected_filename;
};
+struct UrlTestData {
+ const char* description;
+ const char* input;
+ const std::wstring languages;
+ bool omit;
+ bool unescape;
+ const std::wstring output;
+ size_t prefix_len;
+};
+
// Returns an addrinfo for the given 32-bit address (IPv4.)
// The result lives in static storage, so don't delete it.
const struct addrinfo* GetIPv4Address(const uint8 bytes[4]) {
@@ -1008,3 +1018,157 @@ TEST(NetUtilTest, GetHostName) {
std::string hostname = net::GetHostName();
EXPECT_FALSE(hostname.empty());
}
+
+TEST(NetUtilTest, FormatUrl) {
+ const UrlTestData tests[] = {
+ {"Empty URL", "", L"", true, true, L"", 0},
+
+ {"Simple URL",
+ "http://www.google.com/", L"", true, true,
+ L"http://www.google.com/", 7},
+
+ {"With a port number and a reference",
+ "http://www.google.com:8080/#\xE3\x82\xB0", L"", true, true,
+ L"http://www.google.com:8080/#\x30B0", 7},
+
+ // -------- IDN tests --------
+ {"Japanese IDN with ja",
+ "http://xn--l8jvb1ey91xtjb.jp", L"ja", true, true,
+ L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
+
+ {"Japanese IDN with en",
+ "http://xn--l8jvb1ey91xtjb.jp", L"en", true, true,
+ L"http://xn--l8jvb1ey91xtjb.jp/", 7},
+
+ {"Japanese IDN without any languages",
+ "http://xn--l8jvb1ey91xtjb.jp", L"", true, true,
+ // Single script is safe for empty languages.
+ L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
+
+ {"mailto: with Japanese IDN",
+ "mailto:foo@xn--l8jvb1ey91xtjb.jp", L"ja", true, true,
+ // GURL doesn't assume an email address's domain part as a host name.
+ L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
+
+ {"file: with Japanese IDN",
+ "file://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true,
+ L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
+
+ {"ftp: with Japanese IDN",
+ "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true,
+ L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
+
+ // -------- omit_username_password flag tests --------
+ {"With username and password, omit_username_password=false",
+ "http://user:passwd@example.com/foo", L"", false, true,
+ L"http://user:passwd@example.com/foo", 19},
+
+ {"With username and password, omit_username_password=true",
+ "http://user:passwd@example.com/foo", L"", true, true,
+ L"http://example.com/foo", 7},
+
+ {"With username and no password",
+ "http://user@example.com/foo", L"", true, true,
+ L"http://example.com/foo", 7},
+
+ {"Just '@' without username and password",
+ "http://@example.com/foo", L"", true, true,
+ L"http://example.com/foo", 7},
+
+ // GURL doesn't think local-part of an email address is username for URL.
+ {"mailto:, omit_username_password=true",
+ "mailto:foo@example.com", L"", true, true,
+ L"mailto:foo@example.com", 7},
+
+ // -------- unescape flag tests --------
+ {"unescape=false",
+ "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
+ "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
+ "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, false,
+ // GURL parses %-encoded hostnames into Punycode.
+ L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
+ L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
+
+ {"unescape=true",
+ "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
+ "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
+ "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, true,
+ L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
+ L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
+
+ /*
+ {"unescape=true with some special characters",
+ "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", L"", false, true,
+ L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
+ */
+ // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
+ };
+
+ for (size_t i = 0; i < arraysize(tests); ++i) {
+ size_t prefix_len;
+ std::wstring formatted = net::FormatUrl(
+ GURL(tests[i].input), tests[i].languages, tests[i].omit,
+ tests[i].unescape, NULL, &prefix_len);
+ EXPECT_EQ(tests[i].output, formatted) << tests[i].description;
+ EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
+ }
+}
+
+TEST(NetUtilTest, FormatUrlParsed) {
+ // No unescape case.
+ url_parse::Parsed parsed;
+ std::wstring formatted = net::FormatUrl(
+ GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
+ "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
+ L"ja", false, false, &parsed, NULL);
+ EXPECT_EQ(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
+ L"/%E3%82%B0/?q=%E3%82%B0#\x30B0", formatted);
+ EXPECT_EQ(L"%E3%82%B0",
+ formatted.substr(parsed.username.begin, parsed.username.len));
+ EXPECT_EQ(L"%E3%83%BC",
+ formatted.substr(parsed.password.begin, parsed.password.len));
+ EXPECT_EQ(L"\x30B0\x30FC\x30B0\x30EB.jp",
+ formatted.substr(parsed.host.begin, parsed.host.len));
+ EXPECT_EQ(L"8080", formatted.substr(parsed.port.begin, parsed.port.len));
+ EXPECT_EQ(L"/%E3%82%B0/",
+ formatted.substr(parsed.path.begin, parsed.path.len));
+ EXPECT_EQ(L"q=%E3%82%B0",
+ formatted.substr(parsed.query.begin, parsed.query.len));
+ EXPECT_EQ(L"\x30B0", formatted.substr(parsed.ref.begin, parsed.ref.len));
+
+ // Unescape case.
+ formatted = net::FormatUrl(
+ GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
+ "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
+ L"ja", false, true, &parsed, NULL);
+ EXPECT_EQ(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
+ L"/\x30B0/?q=\x30B0#\x30B0", formatted);
+ EXPECT_EQ(L"\x30B0",
+ formatted.substr(parsed.username.begin, parsed.username.len));
+ EXPECT_EQ(L"\x30FC",
+ formatted.substr(parsed.password.begin, parsed.password.len));
+ EXPECT_EQ(L"\x30B0\x30FC\x30B0\x30EB.jp",
+ formatted.substr(parsed.host.begin, parsed.host.len));
+ EXPECT_EQ(L"8080", formatted.substr(parsed.port.begin, parsed.port.len));
+ EXPECT_EQ(L"/\x30B0/", formatted.substr(parsed.path.begin, parsed.path.len));
+ EXPECT_EQ(L"q=\x30B0",
+ formatted.substr(parsed.query.begin, parsed.query.len));
+ EXPECT_EQ(L"\x30B0", formatted.substr(parsed.ref.begin, parsed.ref.len));
+
+ // Omit_username_password + unescape case.
+ formatted = net::FormatUrl(
+ GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
+ "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
+ L"ja", true, true, &parsed, NULL);
+ EXPECT_EQ(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
+ L"/\x30B0/?q=\x30B0#\x30B0", formatted);
+ EXPECT_FALSE(parsed.username.is_valid());
+ EXPECT_FALSE(parsed.password.is_valid());
+ EXPECT_EQ(L"\x30B0\x30FC\x30B0\x30EB.jp",
+ formatted.substr(parsed.host.begin, parsed.host.len));
+ EXPECT_EQ(L"8080", formatted.substr(parsed.port.begin, parsed.port.len));
+ EXPECT_EQ(L"/\x30B0/", formatted.substr(parsed.path.begin, parsed.path.len));
+ EXPECT_EQ(L"q=\x30B0",
+ formatted.substr(parsed.query.begin, parsed.query.len));
+ EXPECT_EQ(L"\x30B0", formatted.substr(parsed.ref.begin, parsed.ref.len));
+}