summaryrefslogtreecommitdiffstats
path: root/chrome/browser/autocomplete
diff options
context:
space:
mode:
authorpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-16 19:49:22 +0000
committerpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-16 19:49:22 +0000
commit96be43e90d30140bc5266cb8f667fbf03a8a7d0e (patch)
tree3c4fd679967fc2ee1cf9c51028a15b4eee2751d9 /chrome/browser/autocomplete
parent878d73010d3e415857730205d0892ad6ecf1a26d (diff)
downloadchromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.zip
chromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.tar.gz
chromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.tar.bz2
Classify more types of input as queries.
This adds an oracle that can determine if a hostname is RFC 1738 compliant; if we get a hostname that's not an IP address and not compliant here, we almost certainly have a query. This also rejects "ports" that don't consist of a number between 0 and 65535 (e.g. port "abc" or port "123456"). BUG=18817 TEST=The string "foo: bar" (with quotes) should default to search, not navigate, in the address bar. Review URL: http://codereview.chromium.org/271116 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@29317 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/autocomplete')
-rw-r--r--chrome/browser/autocomplete/autocomplete.cc38
-rw-r--r--chrome/browser/autocomplete/autocomplete_unittest.cc4
2 files changed, 27 insertions, 15 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc
index 45b9285..492ce3c 100644
--- a/chrome/browser/autocomplete/autocomplete.cc
+++ b/chrome/browser/autocomplete/autocomplete.cc
@@ -168,22 +168,33 @@ AutocompleteInput::Type AutocompleteInput::Parse(
if (registry_length == std::wstring::npos)
return QUERY; // Could be a broken IP address, etc.
- // A space in the "host" means this is a query. (Technically, IE and GURL
- // allow hostnames with spaces for wierd intranet machines, but it's supposed
- // to be illegal and I'm not worried about users trying to type these in.)
- if (host.find(' ') != std::wstring::npos)
+ // See if the hostname is valid per RFC 1738. While IE and GURL allow
+ // hostnames to contain many other characters (perhaps for weird intranet
+ // machines), it's extremely unlikely that a user would be trying to type
+ // those in for anything other than a search query.
+ url_canon::CanonHostInfo host_info;
+ const std::string canonicalized_host(net::CanonicalizeHost(host, &host_info));
+ if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) &&
+ !net::IsCanonicalizedHostRFC1738Compliant(canonicalized_host))
return QUERY;
- // Presence of a password/port mean this is almost certainly a URL. We don't
- // treat usernames (without passwords) as indicating a URL, because this could
- // be an email address like "user@mail.com" which is more likely a search than
- // an HTTP auth login attempt.
- if (parts->password.is_nonempty() || parts->port.is_nonempty())
+ // Presence of a port means this is likely a URL, if the port is really a port
+ // number. If it's just garbage after a colon, this is a query.
+ if (parts->port.is_nonempty()) {
+ int port;
+ return (StringToInt(WideToUTF16(
+ text.substr(parts->port.begin, parts->port.len)), &port) &&
+ (port >= 0) && (port <= 65535)) ? URL : QUERY;
+ }
+
+ // Presence of a password means this is likely a URL. We don't treat
+ // usernames (without passwords) as indicating a URL, because this could be an
+ // email address like "user@mail.com" which is more likely a search than an
+ // HTTP auth login attempt.
+ if (parts->password.is_nonempty())
return URL;
// See if the host is an IP address.
- url_canon::CanonHostInfo host_info;
- net::CanonicalizeHost(host, &host_info);
if (host_info.family == url_canon::CanonHostInfo::IPV4) {
// If the user originally typed a host that looks like an IP address (a
// dotted quad), they probably want to open it. If the original input was
@@ -194,11 +205,8 @@ AutocompleteInput::Type AutocompleteInput::Parse(
return URL;
return desired_tld.empty() ? UNKNOWN : REQUESTED_URL;
}
-
- if (host_info.family == url_canon::CanonHostInfo::IPV6) {
- // If the user typed a valid bracketed IPv6 address, treat it as a URL.
+ if (host_info.family == url_canon::CanonHostInfo::IPV6)
return URL;
- }
// The host doesn't look like a number, so see if the user's given us a path.
if (parts->path.is_nonempty()) {
diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc
index 40e6ecf..4ab6ea4 100644
--- a/chrome/browser/autocomplete/autocomplete_unittest.cc
+++ b/chrome/browser/autocomplete/autocomplete_unittest.cc
@@ -211,13 +211,17 @@ TEST(AutocompleteTest, InputType) {
{ L"?http://foo.com/bar", AutocompleteInput::FORCED_QUERY },
{ L"foo", AutocompleteInput::UNKNOWN },
{ L"foo.com", AutocompleteInput::URL },
+ { L"-.com", AutocompleteInput::QUERY },
{ L"foo/bar", AutocompleteInput::URL },
{ L"foo/bar baz", AutocompleteInput::UNKNOWN },
{ L"http://foo/bar baz", AutocompleteInput::URL },
{ L"foo bar", AutocompleteInput::QUERY },
+ { L"\"foo:bar\"", AutocompleteInput::QUERY },
{ L"link:foo.com", AutocompleteInput::UNKNOWN },
{ L"www.foo.com:81", AutocompleteInput::URL },
{ L"localhost:8080", AutocompleteInput::URL },
+ { L"foo.com:123456", AutocompleteInput::QUERY },
+ { L"foo.com:abc", AutocompleteInput::QUERY },
{ L"en.wikipedia.org/wiki/James Bond", AutocompleteInput::URL },
// In Chrome itself, mailto: will get handled by ShellExecute, but in
// unittest mode, we don't have the data loaded in the external protocol