diff options
author | brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-22 18:15:24 +0000 |
---|---|---|
committer | brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-22 18:15:24 +0000 |
commit | f9fe8630a0ceba09f1bfcc4af7a52048be0d133c (patch) | |
tree | c3a75daafa2d78e70e17bf24fb91502e8f94171c /net | |
parent | 03ce2f5bf335b39ad24306a3a962823e46305cc4 (diff) | |
download | chromium_src-f9fe8630a0ceba09f1bfcc4af7a52048be0d133c.zip chromium_src-f9fe8630a0ceba09f1bfcc4af7a52048be0d133c.tar.gz chromium_src-f9fe8630a0ceba09f1bfcc4af7a52048be0d133c.tar.bz2 |
Shows Unicode IDN instead of Punycode in the followings:
- Bookmark Manager
- Edit Bookmark dialog opened by Bookmark Manager
- Edit Bookmark dialog opened by the star on the left of the address bar
Introduces new function, net::FormatUrl(), which has the following
parameters in addition to gfx::GetCleanStringFromUrl().
- bool omit_username_password
- bool unescape
and moves gfx::GetClienStringFromUrl() to net:: namespace, and removed
the last two parameters.
BUG=3991
Checked in for tkent
Original review = http://codereview.chromium.org/115346
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@16761 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/base/net_util.cc | 144 | ||||
-rw-r--r-- | net/base/net_util.h | 33 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 164 |
3 files changed, 341 insertions, 0 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 06b43be..bfc2cea 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -655,6 +655,15 @@ void IDNToUnicodeOneComponent(const char16* comp, namespace net { +// Appends the substring |in_component| inside of the URL |spec| to |output|, +// and the resulting range will be filled into |out_component|. Calls the +// unescaper for the substring if |unescape| is true. +static void AppendFormattedComponent(const std::string& spec, + const url_parse::Component& in_component, + bool unescape, + std::wstring* output, + url_parse::Component* out_component); + GURL FilePathToFileURL(const FilePath& path) { // Produce a URL like "file:///C:/foo" for a regular file, or // "file://///server/path" for UNC. The URL canonicalizer will fix up the @@ -1045,4 +1054,139 @@ std::string GetHostName() { return std::string(buffer); } +void AppendFormattedHost(const GURL& url, + const std::wstring& languages, + std::wstring* output, + url_parse::Parsed* new_parsed) { + const url_parse::Component& host = + url.parsed_for_possibly_invalid_spec().host; + + if (host.is_nonempty()) { + // Handle possible IDN in the host name. + if (new_parsed) + new_parsed->host.begin = static_cast<int>(output->length()); + + const std::string& spec = url.possibly_invalid_spec(); + DCHECK(host.begin >= 0 && + ((spec.length() == 0 && host.begin == 0) || + host.begin < static_cast<int>(spec.length()))); + net::IDNToUnicode(&spec[host.begin], host.len, languages, output); + + if (new_parsed) { + new_parsed->host.len = + static_cast<int>(output->length()) - new_parsed->host.begin; + } + } else if (new_parsed) { + new_parsed->host.reset(); + } +} + +/* static */ +void AppendFormattedComponent(const std::string& spec, + const url_parse::Component& in_component, + bool unescape, + std::wstring* output, + url_parse::Component* out_component) { + if (in_component.is_nonempty()) { + out_component->begin = static_cast<int>(output->length()); + if (unescape) { + output->append(UnescapeAndDecodeUTF8URLComponent( + spec.substr(in_component.begin, in_component.len), + UnescapeRule::NORMAL)); + } else { + output->append(UTF8ToWide(spec.substr( + in_component.begin, in_component.len))); + } + out_component->len = + static_cast<int>(output->length()) - out_component->begin; + } else { + out_component->reset(); + } +} + +std::wstring FormatUrl( + const GURL& url, const std::wstring& languages, bool omit_username_password, + bool unescape, url_parse::Parsed* new_parsed, size_t* prefix_end) { + url_parse::Parsed parsed_temp; + if (!new_parsed) + new_parsed = &parsed_temp; + + std::wstring url_string; + + // Check for empty URLs or 0 available text width. + if (url.is_empty()) { + if (prefix_end) + *prefix_end = 0; + return url_string; + } + + // We handle both valid and invalid URLs (this will give us the spec + // regardless of validity). + const std::string& spec = url.possibly_invalid_spec(); + const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); + + // Copy everything before the username (the scheme and the separators.) + // These are ASCII. + int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true); + for (int i = 0; i < pre_end; ++i) + url_string.push_back(spec[i]); + new_parsed->scheme = parsed.scheme; + + if (omit_username_password) { + // Remove the username and password fields. We don't want to display those + // to the user since they can be used for attacks, + // e.g. "http://google.com:search@evil.ru/" + new_parsed->username.reset(); + new_parsed->password.reset(); + } else { + AppendFormattedComponent( + spec, parsed.username, unescape, &url_string, &new_parsed->username); + if (parsed.password.is_valid()) { + url_string.push_back(':'); + } + AppendFormattedComponent( + spec, parsed.password, unescape, &url_string, &new_parsed->password); + if (parsed.username.is_valid() || parsed.password.is_valid()) { + url_string.push_back('@'); + } + } + if (prefix_end) + *prefix_end = static_cast<size_t>(url_string.length()); + + AppendFormattedHost(url, languages, &url_string, new_parsed); + + // Port. + if (parsed.port.is_nonempty()) { + url_string.push_back(':'); + int begin = url_string.length(); + for (int i = parsed.port.begin; i < parsed.port.end(); ++i) + url_string.push_back(spec[i]); + new_parsed->port.begin = begin; + new_parsed->port.len = url_string.length() - begin; + } else { + new_parsed->port.reset(); + } + + // Path and query both get the same general unescape & convert treatment. + AppendFormattedComponent( + spec, parsed.path, unescape, &url_string, &new_parsed->path); + if (parsed.query.is_valid()) + url_string.push_back('?'); + AppendFormattedComponent( + spec, parsed.query, unescape, &url_string, &new_parsed->query); + + // Reference is stored in valid, unescaped UTF-8, so we can just convert. + if (parsed.ref.is_valid()) { + url_string.push_back('#'); + int begin = url_string.length(); + if (parsed.ref.len > 0) + url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin], + parsed.ref.len))); + new_parsed->ref.begin = begin; + new_parsed->ref.len = url_string.length() - begin; + } + + return url_string; +} + } // namespace net diff --git a/net/base/net_util.h b/net/base/net_util.h index e64cb88..2ab6101 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -23,6 +23,10 @@ namespace base { class Time; } +namespace url_parse { +struct Parsed; +} + namespace net { // Given the full path to a file name, creates a file: URL. The returned URL @@ -170,6 +174,35 @@ bool IsPortAllowedByFtp(int port); // Set socket to non-blocking mode int SetNonBlocking(int fd); +// Appends the given part of the original URL to the output string formatted for +// the user. The given parsed structure will be updated. The host name formatter +// also takes the same accept languages component as ElideURL. |new_parsed| may +// be null. +void AppendFormattedHost(const GURL& url, const std::wstring& languages, + std::wstring* output, url_parse::Parsed* new_parsed); + +// Creates a string representation of |url|. The IDN host name may +// be in Unicode if |languages| accepts the Unicode representation. +// If |omit_username_password| is true, the username and the password are +// omitted. If |unescape| is true and the path part and the query part seem to +// be encoded in %-encoded UTF-8, decodes %-encoding and UTF-8. +// |new_parsed| will have parsing parameters of the resultant URL. |prefix_end| +// will be the length before the hostname of the resultant URL. |new_parsed| +// and |prefix_end| may be NULL. +std::wstring FormatUrl(const GURL& url, + const std::wstring& languages, + bool omit_username_password, + bool unescape, + url_parse::Parsed* new_parsed, + size_t* prefix_end); + +// Creates a string representation of |url| for display to the user. +// This is a shorthand of the above function with omit_username_password=true, +// unescape=true, new_parsed=NULL, and prefix_end=NULL. +inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) { + return FormatUrl(url, languages, true, true, NULL, NULL); +} + } // namespace net #endif // NET_BASE_NET_UTIL_H__ diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 79c1138..5f9ed4e 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -337,6 +337,16 @@ struct SuggestedFilenameCase { const wchar_t* expected_filename; }; +struct UrlTestData { + const char* description; + const char* input; + const std::wstring languages; + bool omit; + bool unescape; + const std::wstring output; + size_t prefix_len; +}; + // Returns an addrinfo for the given 32-bit address (IPv4.) // The result lives in static storage, so don't delete it. const struct addrinfo* GetIPv4Address(const uint8 bytes[4]) { @@ -1008,3 +1018,157 @@ TEST(NetUtilTest, GetHostName) { std::string hostname = net::GetHostName(); EXPECT_FALSE(hostname.empty()); } + +TEST(NetUtilTest, FormatUrl) { + const UrlTestData tests[] = { + {"Empty URL", "", L"", true, true, L"", 0}, + + {"Simple URL", + "http://www.google.com/", L"", true, true, + L"http://www.google.com/", 7}, + + {"With a port number and a reference", + "http://www.google.com:8080/#\xE3\x82\xB0", L"", true, true, + L"http://www.google.com:8080/#\x30B0", 7}, + + // -------- IDN tests -------- + {"Japanese IDN with ja", + "http://xn--l8jvb1ey91xtjb.jp", L"ja", true, true, + L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, + + {"Japanese IDN with en", + "http://xn--l8jvb1ey91xtjb.jp", L"en", true, true, + L"http://xn--l8jvb1ey91xtjb.jp/", 7}, + + {"Japanese IDN without any languages", + "http://xn--l8jvb1ey91xtjb.jp", L"", true, true, + // Single script is safe for empty languages. + L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, + + {"mailto: with Japanese IDN", + "mailto:foo@xn--l8jvb1ey91xtjb.jp", L"ja", true, true, + // GURL doesn't assume an email address's domain part as a host name. + L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, + + {"file: with Japanese IDN", + "file://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true, + L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, + + {"ftp: with Japanese IDN", + "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true, + L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, + + // -------- omit_username_password flag tests -------- + {"With username and password, omit_username_password=false", + "http://user:passwd@example.com/foo", L"", false, true, + L"http://user:passwd@example.com/foo", 19}, + + {"With username and password, omit_username_password=true", + "http://user:passwd@example.com/foo", L"", true, true, + L"http://example.com/foo", 7}, + + {"With username and no password", + "http://user@example.com/foo", L"", true, true, + L"http://example.com/foo", 7}, + + {"Just '@' without username and password", + "http://@example.com/foo", L"", true, true, + L"http://example.com/foo", 7}, + + // GURL doesn't think local-part of an email address is username for URL. + {"mailto:, omit_username_password=true", + "mailto:foo@example.com", L"", true, true, + L"mailto:foo@example.com", 7}, + + // -------- unescape flag tests -------- + {"unescape=false", + "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" + "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" + "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, false, + // GURL parses %-encoded hostnames into Punycode. + L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" + L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7}, + + {"unescape=true", + "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" + "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" + "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, true, + L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" + L"?q=\x30B0\x30FC\x30B0\x30EB", 7}, + + /* + {"unescape=true with some special characters", + "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", L"", false, true, + L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, + */ + // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". + }; + + for (size_t i = 0; i < arraysize(tests); ++i) { + size_t prefix_len; + std::wstring formatted = net::FormatUrl( + GURL(tests[i].input), tests[i].languages, tests[i].omit, + tests[i].unescape, NULL, &prefix_len); + EXPECT_EQ(tests[i].output, formatted) << tests[i].description; + EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; + } +} + +TEST(NetUtilTest, FormatUrlParsed) { + // No unescape case. + url_parse::Parsed parsed; + std::wstring formatted = net::FormatUrl( + GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" + "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), + L"ja", false, false, &parsed, NULL); + EXPECT_EQ(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" + L"/%E3%82%B0/?q=%E3%82%B0#\x30B0", formatted); + EXPECT_EQ(L"%E3%82%B0", + formatted.substr(parsed.username.begin, parsed.username.len)); + EXPECT_EQ(L"%E3%83%BC", + formatted.substr(parsed.password.begin, parsed.password.len)); + EXPECT_EQ(L"\x30B0\x30FC\x30B0\x30EB.jp", + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(L"8080", formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(L"/%E3%82%B0/", + formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(L"q=%E3%82%B0", + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(L"\x30B0", formatted.substr(parsed.ref.begin, parsed.ref.len)); + + // Unescape case. + formatted = net::FormatUrl( + GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" + "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), + L"ja", false, true, &parsed, NULL); + EXPECT_EQ(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" + L"/\x30B0/?q=\x30B0#\x30B0", formatted); + EXPECT_EQ(L"\x30B0", + formatted.substr(parsed.username.begin, parsed.username.len)); + EXPECT_EQ(L"\x30FC", + formatted.substr(parsed.password.begin, parsed.password.len)); + EXPECT_EQ(L"\x30B0\x30FC\x30B0\x30EB.jp", + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(L"8080", formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(L"/\x30B0/", formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(L"q=\x30B0", + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(L"\x30B0", formatted.substr(parsed.ref.begin, parsed.ref.len)); + + // Omit_username_password + unescape case. + formatted = net::FormatUrl( + GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" + "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), + L"ja", true, true, &parsed, NULL); + EXPECT_EQ(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" + L"/\x30B0/?q=\x30B0#\x30B0", formatted); + EXPECT_FALSE(parsed.username.is_valid()); + EXPECT_FALSE(parsed.password.is_valid()); + EXPECT_EQ(L"\x30B0\x30FC\x30B0\x30EB.jp", + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(L"8080", formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(L"/\x30B0/", formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(L"q=\x30B0", + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(L"\x30B0", formatted.substr(parsed.ref.begin, parsed.ref.len)); +} |