diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-16 19:49:22 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-16 19:49:22 +0000 |
commit | 96be43e90d30140bc5266cb8f667fbf03a8a7d0e (patch) | |
tree | 3c4fd679967fc2ee1cf9c51028a15b4eee2751d9 /chrome/browser/autocomplete | |
parent | 878d73010d3e415857730205d0892ad6ecf1a26d (diff) | |
download | chromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.zip chromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.tar.gz chromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.tar.bz2 |
Classify more types of input as queries.
This adds an oracle that can determine if a hostname is RFC 1738 compliant; if we get a hostname that's not an IP address and not compliant here, we almost certainly have a query.
This also rejects "ports" that don't consist of a number between 0 and 65535 (e.g. port "abc" or port "123456").
BUG=18817
TEST=The string "foo: bar" (with quotes) should default to search, not navigate, in the address bar.
Review URL: http://codereview.chromium.org/271116
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@29317 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/autocomplete')
-rw-r--r-- | chrome/browser/autocomplete/autocomplete.cc | 38 | ||||
-rw-r--r-- | chrome/browser/autocomplete/autocomplete_unittest.cc | 4 |
2 files changed, 27 insertions, 15 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc index 45b9285..492ce3c 100644 --- a/chrome/browser/autocomplete/autocomplete.cc +++ b/chrome/browser/autocomplete/autocomplete.cc @@ -168,22 +168,33 @@ AutocompleteInput::Type AutocompleteInput::Parse( if (registry_length == std::wstring::npos) return QUERY; // Could be a broken IP address, etc. - // A space in the "host" means this is a query. (Technically, IE and GURL - // allow hostnames with spaces for wierd intranet machines, but it's supposed - // to be illegal and I'm not worried about users trying to type these in.) - if (host.find(' ') != std::wstring::npos) + // See if the hostname is valid per RFC 1738. While IE and GURL allow + // hostnames to contain many other characters (perhaps for weird intranet + // machines), it's extremely unlikely that a user would be trying to type + // those in for anything other than a search query. + url_canon::CanonHostInfo host_info; + const std::string canonicalized_host(net::CanonicalizeHost(host, &host_info)); + if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) && + !net::IsCanonicalizedHostRFC1738Compliant(canonicalized_host)) return QUERY; - // Presence of a password/port mean this is almost certainly a URL. We don't - // treat usernames (without passwords) as indicating a URL, because this could - // be an email address like "user@mail.com" which is more likely a search than - // an HTTP auth login attempt. - if (parts->password.is_nonempty() || parts->port.is_nonempty()) + // Presence of a port means this is likely a URL, if the port is really a port + // number. If it's just garbage after a colon, this is a query. + if (parts->port.is_nonempty()) { + int port; + return (StringToInt(WideToUTF16( + text.substr(parts->port.begin, parts->port.len)), &port) && + (port >= 0) && (port <= 65535)) ? URL : QUERY; + } + + // Presence of a password means this is likely a URL. We don't treat + // usernames (without passwords) as indicating a URL, because this could be an + // email address like "user@mail.com" which is more likely a search than an + // HTTP auth login attempt. + if (parts->password.is_nonempty()) return URL; // See if the host is an IP address. - url_canon::CanonHostInfo host_info; - net::CanonicalizeHost(host, &host_info); if (host_info.family == url_canon::CanonHostInfo::IPV4) { // If the user originally typed a host that looks like an IP address (a // dotted quad), they probably want to open it. If the original input was @@ -194,11 +205,8 @@ AutocompleteInput::Type AutocompleteInput::Parse( return URL; return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; } - - if (host_info.family == url_canon::CanonHostInfo::IPV6) { - // If the user typed a valid bracketed IPv6 address, treat it as a URL. + if (host_info.family == url_canon::CanonHostInfo::IPV6) return URL; - } // The host doesn't look like a number, so see if the user's given us a path. if (parts->path.is_nonempty()) { diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc index 40e6ecf..4ab6ea4 100644 --- a/chrome/browser/autocomplete/autocomplete_unittest.cc +++ b/chrome/browser/autocomplete/autocomplete_unittest.cc @@ -211,13 +211,17 @@ TEST(AutocompleteTest, InputType) { { L"?http://foo.com/bar", AutocompleteInput::FORCED_QUERY }, { L"foo", AutocompleteInput::UNKNOWN }, { L"foo.com", AutocompleteInput::URL }, + { L"-.com", AutocompleteInput::QUERY }, { L"foo/bar", AutocompleteInput::URL }, { L"foo/bar baz", AutocompleteInput::UNKNOWN }, { L"http://foo/bar baz", AutocompleteInput::URL }, { L"foo bar", AutocompleteInput::QUERY }, + { L"\"foo:bar\"", AutocompleteInput::QUERY }, { L"link:foo.com", AutocompleteInput::UNKNOWN }, { L"www.foo.com:81", AutocompleteInput::URL }, { L"localhost:8080", AutocompleteInput::URL }, + { L"foo.com:123456", AutocompleteInput::QUERY }, + { L"foo.com:abc", AutocompleteInput::QUERY }, { L"en.wikipedia.org/wiki/James Bond", AutocompleteInput::URL }, // In Chrome itself, mailto: will get handled by ShellExecute, but in // unittest mode, we don't have the data loaded in the external protocol |