diff options
author | ericroman@google.com <ericroman@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-06-23 22:52:42 +0000 |
---|---|---|
committer | ericroman@google.com <ericroman@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-06-23 22:52:42 +0000 |
commit | 01dbd931735c7b7497b7b83664462512f272bf58 (patch) | |
tree | 39eaf9d56354fa8b45aadedd6faaf03aec000b98 | |
parent | 99224582c3e56f3cb354f633baf06caa5b98e751 (diff) | |
download | chromium_src-01dbd931735c7b7497b7b83664462512f272bf58.zip chromium_src-01dbd931735c7b7497b7b83664462512f272bf58.tar.gz chromium_src-01dbd931735c7b7497b7b83664462512f272bf58.tar.bz2 |
Original patch by pmarks@google.com (see http://codereview.chromium.org/113944)
- Pull in googleurl r107, which includes the new CanonicalizeHostVerbose()
function:
http://code.google.com/p/google-url/source/detail?r=107
- Atomically update Chromium to make use of this new function. This allows us
to extract better information about IP addresses using fewer, and cleaner, calls
to googleurl.
- Also, change a call to CanonicalizeIPAddress() to stay compatible with r107.
The upshot of all this is, Chrome will no longer try to connect to IPv4
addresses with overflow "http://192.168.0.257", or hostnames surrounded by
square brackets "http://[google.com]"
BUG=none
TEST={unit_tests,googleurl_unittests,net_unittests}
Review URL: http://codereview.chromium.org/146053
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@19076 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | DEPS | 2 | ||||
-rw-r--r-- | chrome/browser/autocomplete/autocomplete.cc | 28 | ||||
-rw-r--r-- | chrome/browser/autocomplete/autocomplete_unittest.cc | 2 | ||||
-rw-r--r-- | net/base/cookie_monster.cc | 3 | ||||
-rw-r--r-- | net/base/net_util.cc | 47 | ||||
-rw-r--r-- | net/base/net_util.h | 14 | ||||
-rw-r--r-- | net/base/registry_controlled_domain.cc | 24 | ||||
-rw-r--r-- | net/proxy/proxy_config.cc | 8 |
8 files changed, 61 insertions, 67 deletions
@@ -13,7 +13,7 @@ deps = { "/trunk/deps/support@18977", "src/googleurl": - "http://google-url.googlecode.com/svn/trunk@106", + "http://google-url.googlecode.com/svn/trunk@107", "src/sdch/open-vcdiff": "http://open-vcdiff.googlecode.com/svn/trunk@26", diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc index 4c94637..7678fc7 100644 --- a/chrome/browser/autocomplete/autocomplete.cc +++ b/chrome/browser/autocomplete/autocomplete.cc @@ -164,7 +164,7 @@ AutocompleteInput::Type AutocompleteInput::Parse( const size_t registry_length = net::RegistryControlledDomainService::GetRegistryLength(host, false); if (registry_length == std::wstring::npos) - return QUERY; // It's not clear to me that we can reach this... + return QUERY; // Could be a broken IP address, etc. // A space in the "host" means this is a query. (Technically, IE and GURL // allow hostnames with spaces for wierd intranet machines, but it's supposed @@ -180,27 +180,21 @@ AutocompleteInput::Type AutocompleteInput::Parse( return URL; // See if the host is an IP address. - bool is_ip_address; - const std::string canon_host(net::CanonicalizeHost(host, &is_ip_address)); - if (is_ip_address) { - // If the user typed a valid IPv6 address, treat it as a URL. - if (canon_host[0] == '[') - return URL; - + url_canon::CanonHostInfo host_info; + net::CanonicalizeHost(host, &host_info); + if (host_info.family == url_canon::CanonHostInfo::IPV4) { // If the user originally typed a host that looks like an IP address (a // dotted quad), they probably want to open it. If the original input was // something else (like a single number), they probably wanted to search for // it. This is true even if the URL appears to have a path: "1.2/45" is // more likely a search (for the answer to a math problem) than a URL. - url_parse::Component components[4]; - const bool found_ipv4 = - url_canon::FindIPv4Components(WideToUTF8(text).c_str(), - parts->host, components); - DCHECK(found_ipv4); - for (size_t i = 0; i < arraysize(components); ++i) { - if (!components[i].is_nonempty()) - return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; - } + if (host_info.num_ipv4_components == 4) + return URL; + return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; + } + + if (host_info.family == url_canon::CanonHostInfo::IPV6) { + // If the user typed a valid bracketed IPv6 address, treat it as a URL. return URL; } diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc index 500d79d3..5115e2f 100644 --- a/chrome/browser/autocomplete/autocomplete_unittest.cc +++ b/chrome/browser/autocomplete/autocomplete_unittest.cc @@ -239,6 +239,8 @@ TEST(AutocompleteTest, InputType) { { L"\u6d4b\u8bd5", AutocompleteInput::UNKNOWN }, { L"[2001:]", AutocompleteInput::QUERY }, // Not a valid IP { L"[2001:dB8::1]", AutocompleteInput::URL }, + { L"192.168.0.256", AutocompleteInput::QUERY }, // Invalid IPv4 literal. + { L"[foo.com]", AutocompleteInput::QUERY }, // Invalid IPv6 literal. }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(input_cases); ++i) { diff --git a/net/base/cookie_monster.cc b/net/base/cookie_monster.cc index 00696fe..31d55dc 100644 --- a/net/base/cookie_monster.cc +++ b/net/base/cookie_monster.cc @@ -285,7 +285,8 @@ static bool GetCookieDomainKey(const GURL& url, // domain=.my.domain.com -- for compatibility we do the same here. Firefox // also treats domain=.....my.domain.com like domain=.my.domain.com, but // neither IE nor Safari do this, and we don't either. - std::string cookie_domain(net::CanonicalizeHost(pc.Domain(), NULL)); + url_canon::CanonHostInfo ignored; + std::string cookie_domain(net::CanonicalizeHost(pc.Domain(), &ignored)); if (cookie_domain.empty()) return false; if (cookie_domain[0] != '.') diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 0b2f243..d7fa844 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -829,43 +829,34 @@ void IDNToUnicode(const char* host, #endif } -std::string CanonicalizeHost(const std::string& host, bool* is_ip_address) { +std::string CanonicalizeHost(const std::string& host, + url_canon::CanonHostInfo* host_info) { // Try to canonicalize the host. - const url_parse::Component raw_host_component(0, - static_cast<int>(host.length())); + const url_parse::Component raw_host_component( + 0, static_cast<int>(host.length())); std::string canon_host; url_canon::StdStringCanonOutput canon_host_output(&canon_host); - url_parse::Component canon_host_component; - if (!url_canon::CanonicalizeHost(host.c_str(), raw_host_component, - &canon_host_output, &canon_host_component)) { - if (is_ip_address) - *is_ip_address = false; - return std::string(); - } - canon_host_output.Complete(); - - if (is_ip_address) { - // See if the host is an IP address. - url_canon::RawCanonOutputT<char, 128> ignored_output; - url_parse::Component ignored_component; - *is_ip_address = url_canon::CanonicalizeIPAddress(canon_host.c_str(), - canon_host_component, - &ignored_output, - &ignored_component); + url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, + &canon_host_output, host_info); + + if (host_info->out_host.is_nonempty() && + host_info->family != url_canon::CanonHostInfo::BROKEN) { + // Success! Assert that there's no extra garbage. + canon_host_output.Complete(); + DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); + } else { + // Empty host, or canonicalization failed. We'll return empty. + canon_host.clear(); } - // Return the host as a string, stripping any unnecessary bits off the ends. - if ((canon_host_component.begin == 0) && - (static_cast<size_t>(canon_host_component.len) == canon_host.length())) - return canon_host; - return canon_host.substr(canon_host_component.begin, - canon_host_component.len); + return canon_host; } -std::string CanonicalizeHost(const std::wstring& host, bool* is_ip_address) { +std::string CanonicalizeHost(const std::wstring& host, + url_canon::CanonHostInfo* host_info) { std::string converted_host; WideToUTF8(host.c_str(), host.length(), &converted_host); - return CanonicalizeHost(converted_host, is_ip_address); + return CanonicalizeHost(converted_host, host_info); } std::string GetDirectoryListingHeader(const std::string& title) { diff --git a/net/base/net_util.h b/net/base/net_util.h index d7f1de7..40df770 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -24,6 +24,10 @@ namespace base { class Time; } +namespace url_canon { +struct CanonHostInfo; +} + namespace url_parse { struct Parsed; } @@ -136,10 +140,12 @@ void IDNToUnicode(const char* host, const std::wstring& languages, std::wstring* out); -// Canonicalizes |host| and returns it. If |is_ip_address| is non-NULL, sets it -// to true if |host| is an IP address. -std::string CanonicalizeHost(const std::string& host, bool* is_ip_address); -std::string CanonicalizeHost(const std::wstring& host, bool* is_ip_address); +// Canonicalizes |host| and returns it. Also fills |host_info| with +// IP address information. |host_info| must not be NULL. +std::string CanonicalizeHost(const std::string& host, + url_canon::CanonHostInfo* host_info); +std::string CanonicalizeHost(const std::wstring& host, + url_canon::CanonHostInfo* host_info); // Call these functions to get the html for a directory listing. // They will pass non-7bit-ascii characters unescaped, allowing diff --git a/net/base/registry_controlled_domain.cc b/net/base/registry_controlled_domain.cc index 7ea0a18..2bcfba5 100644 --- a/net/base/registry_controlled_domain.cc +++ b/net/base/registry_controlled_domain.cc @@ -63,9 +63,9 @@ std::string RegistryControlledDomainService::GetDomainAndRegistry( // static std::string RegistryControlledDomainService::GetDomainAndRegistry( const std::string& host) { - bool is_ip_address; - const std::string canon_host(net::CanonicalizeHost(host, &is_ip_address)); - if (canon_host.empty() || is_ip_address) + url_canon::CanonHostInfo host_info; + const std::string canon_host(net::CanonicalizeHost(host, &host_info)); + if (canon_host.empty() || host_info.IsIPAddress()) return std::string(); return GetDomainAndRegistryImpl(canon_host); } @@ -73,9 +73,9 @@ std::string RegistryControlledDomainService::GetDomainAndRegistry( // static std::string RegistryControlledDomainService::GetDomainAndRegistry( const std::wstring& host) { - bool is_ip_address; - const std::string canon_host(net::CanonicalizeHost(host, &is_ip_address)); - if (canon_host.empty() || is_ip_address) + url_canon::CanonHostInfo host_info; + const std::string canon_host(net::CanonicalizeHost(host, &host_info)); + if (canon_host.empty() || host_info.IsIPAddress()) return std::string(); return GetDomainAndRegistryImpl(canon_host); } @@ -121,11 +121,11 @@ size_t RegistryControlledDomainService::GetRegistryLength( size_t RegistryControlledDomainService::GetRegistryLength( const std::string& host, bool allow_unknown_registries) { - bool is_ip_address; - const std::string canon_host(net::CanonicalizeHost(host, &is_ip_address)); + url_canon::CanonHostInfo host_info; + const std::string canon_host(net::CanonicalizeHost(host, &host_info)); if (canon_host.empty()) return std::string::npos; - if (is_ip_address) + if (host_info.IsIPAddress()) return 0; return GetInstance()->GetRegistryLengthImpl(canon_host, allow_unknown_registries); @@ -135,11 +135,11 @@ size_t RegistryControlledDomainService::GetRegistryLength( size_t RegistryControlledDomainService::GetRegistryLength( const std::wstring& host, bool allow_unknown_registries) { - bool is_ip_address; - const std::string canon_host(net::CanonicalizeHost(host, &is_ip_address)); + url_canon::CanonHostInfo host_info; + const std::string canon_host(net::CanonicalizeHost(host, &host_info)); if (canon_host.empty()) return std::string::npos; - if (is_ip_address) + if (host_info.IsIPAddress()) return 0; return GetInstance()->GetRegistryLengthImpl(canon_host, allow_unknown_registries); diff --git a/net/proxy/proxy_config.cc b/net/proxy/proxy_config.cc index f2ff8ce..8ae4baa 100644 --- a/net/proxy/proxy_config.cc +++ b/net/proxy/proxy_config.cc @@ -97,11 +97,11 @@ namespace { bool IsIPAddress(const std::string& domain) { // From GURL::HostIsIPAddress() url_canon::RawCanonOutputT<char, 128> ignored_output; - url_parse::Component ignored_component; + url_canon::CanonHostInfo host_info; url_parse::Component domain_comp(0, domain.size()); - return url_canon::CanonicalizeIPAddress(domain.c_str(), domain_comp, - &ignored_output, - &ignored_component); + url_canon::CanonicalizeIPAddress(domain.c_str(), domain_comp, + &ignored_output, &host_info); + return host_info.IsIPAddress(); } } // namespace |