summaryrefslogtreecommitdiffstats
path: root/chrome/browser/autocomplete
diff options
context:
space:
mode:
authorpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-19 20:37:48 +0000
committerpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-19 20:37:48 +0000
commit258e8970224599686f320fe75044e1043ef87222 (patch)
treeaafe9c28f36d2260842f6631c6f411b70ddb5794 /chrome/browser/autocomplete
parent6f509ac2faf29c7fbe3b57c0f2bf8f3cb4d25ca0 (diff)
downloadchromium_src-258e8970224599686f320fe75044e1043ef87222.zip
chromium_src-258e8970224599686f320fe75044e1043ef87222.tar.gz
chromium_src-258e8970224599686f320fe75044e1043ef87222.tar.bz2
Parse input with explicit schemes better. Before, if the user typed "http://..." we always parsed as a URL. Now we parse more like we would without the scheme, so that we can reject various kinds of invalid inputs.
BUG=none TEST=Input "http://foo:bar" (without quotes) in the address bar, and the default action should be search, not navigate. Review URL: http://codereview.chromium.org/292003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@29450 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/autocomplete')
-rw-r--r--chrome/browser/autocomplete/autocomplete.cc64
-rw-r--r--chrome/browser/autocomplete/autocomplete_unittest.cc21
2 files changed, 59 insertions, 26 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc
index 492ce3c..7db2966 100644
--- a/chrome/browser/autocomplete/autocomplete.cc
+++ b/chrome/browser/autocomplete/autocomplete.cc
@@ -114,8 +114,14 @@ AutocompleteInput::Type AutocompleteInput::Parse(
return URL;
}
- // If the user typed a scheme, determine our available actions based on that.
- if (parts->scheme.is_valid()) {
+ // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it
+ // well enough that we can fall through to the heuristics below. If it's
+ // something else, we can just determine our action based on what we do with
+ // any input of this scheme. In theory we could do better with some schemes
+ // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that
+ // until I run into some cases that really need it.
+ if (parts->scheme.is_nonempty() &&
+ (parsed_scheme != L"http") && (parsed_scheme != L"https")) {
// See if we know how to handle the URL internally.
if (URLRequest::IsHandledProtocol(WideToASCII(parsed_scheme)))
return URL;
@@ -153,15 +159,16 @@ AutocompleteInput::Type AutocompleteInput::Parse(
}
}
- // The user didn't type a scheme. Assume that this is either an HTTP URL or
- // not a URL at all; try to determine which.
+ // Either the user didn't type a scheme, in which case we need to distinguish
+ // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which
+ // case we should reject invalid formulations.
- // It's not clear that we can reach here with an empty "host" (maybe on some
- // kinds of garbage input?), but if we did, it couldn't be a URL.
+ // If we have an empty host it can't be a URL.
if (!parts->host.is_nonempty())
return QUERY;
- // (We use the registry length later below but ask for it here so we can check
- // the host's validity at this point.)
+
+ // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also
+ // use the registry length later below.)
const std::wstring host(text.substr(parts->host.begin, parts->host.len));
const size_t registry_length =
net::RegistryControlledDomainService::GetRegistryLength(host, false);
@@ -187,10 +194,16 @@ AutocompleteInput::Type AutocompleteInput::Parse(
(port >= 0) && (port <= 65535)) ? URL : QUERY;
}
- // Presence of a password means this is likely a URL. We don't treat
- // usernames (without passwords) as indicating a URL, because this could be an
- // email address like "user@mail.com" which is more likely a search than an
- // HTTP auth login attempt.
+ // Presence of a username could either indicate a URL or an email address
+ // ("user@mail.com"). E-mail addresses are likely queries so we only open
+ // this as a URL if the user explicitly typed a scheme.
+ if (parts->username.is_nonempty() && parts->scheme.is_nonempty())
+ return URL;
+
+ // Presence of a password means this is likely a URL. Note that unless the
+ // user has typed an explicit "http://" or similar, we'll probably think that
+ // the username is some unknown scheme, and bail out in the scheme-handling
+ // code above.
if (parts->password.is_nonempty())
return URL;
@@ -199,9 +212,10 @@ AutocompleteInput::Type AutocompleteInput::Parse(
// If the user originally typed a host that looks like an IP address (a
// dotted quad), they probably want to open it. If the original input was
// something else (like a single number), they probably wanted to search for
- // it. This is true even if the URL appears to have a path: "1.2/45" is
- // more likely a search (for the answer to a math problem) than a URL.
- if (host_info.num_ipv4_components == 4)
+ // it, unless they explicitly typed a scheme. This is true even if the URL
+ // appears to have a path: "1.2/45" is more likely a search (for the answer
+ // to a math problem) than a URL.
+ if ((host_info.num_ipv4_components == 4) || parts->scheme.is_nonempty())
return URL;
return desired_tld.empty() ? UNKNOWN : REQUESTED_URL;
}
@@ -211,13 +225,13 @@ AutocompleteInput::Type AutocompleteInput::Parse(
// The host doesn't look like a number, so see if the user's given us a path.
if (parts->path.is_nonempty()) {
// Most inputs with paths are URLs, even ones without known registries (e.g.
- // intranet URLs). However, if there's no known registry, and the path has
- // a space, this is more likely a query with a slash in the first term (e.g.
- // "ps/2 games") than a URL. We can still open URLs with spaces in the path
- // by escaping the space, and we will still inline autocomplete them if
- // users have typed them in the past, but we default to searching since
- // that's the common case.
- return ((registry_length == 0) &&
+ // intranet URLs). However, if the user didn't type a scheme, there's no
+ // known registry, and the path has a space, this is more likely a query
+ // with a slash in the first term (e.g. "ps/2 games") than a URL. We can
+ // still open URLs with spaces in the path by escaping the space, and we
+ // will still inline autocomplete them if users have typed them in the past,
+ // but we default to searching since that's the common case.
+ return (!parts->scheme.is_nonempty() && (registry_length == 0) &&
(text.substr(parts->path.begin, parts->path.len).find(' ') !=
std::wstring::npos)) ? UNKNOWN : URL;
}
@@ -228,9 +242,9 @@ AutocompleteInput::Type AutocompleteInput::Parse(
if (parts->username.is_nonempty())
return UNKNOWN;
- // We have a bare host string. See if it has a known TLD. If so, it's
- // probably a URL.
- if (registry_length != 0)
+ // We have a bare host string. See if it has a known TLD or the user typed a
+ // scheme. If so, it's probably a URL.
+ if (parts->scheme.is_nonempty() || (registry_length != 0))
return URL;
// No TLD that we know about. This could be:
diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc
index 4ab6ea4..572beb5 100644
--- a/chrome/browser/autocomplete/autocomplete_unittest.cc
+++ b/chrome/browser/autocomplete/autocomplete_unittest.cc
@@ -210,6 +210,7 @@ TEST(AutocompleteTest, InputType) {
{ L"?foo bar", AutocompleteInput::FORCED_QUERY },
{ L"?http://foo.com/bar", AutocompleteInput::FORCED_QUERY },
{ L"foo", AutocompleteInput::UNKNOWN },
+ { L"foo.c", AutocompleteInput::UNKNOWN },
{ L"foo.com", AutocompleteInput::URL },
{ L"-.com", AutocompleteInput::QUERY },
{ L"foo/bar", AutocompleteInput::URL },
@@ -222,6 +223,11 @@ TEST(AutocompleteTest, InputType) {
{ L"localhost:8080", AutocompleteInput::URL },
{ L"foo.com:123456", AutocompleteInput::QUERY },
{ L"foo.com:abc", AutocompleteInput::QUERY },
+ { L"user@foo.com", AutocompleteInput::UNKNOWN },
+ { L"user:pass@foo.com", AutocompleteInput::UNKNOWN },
+ { L"1.2", AutocompleteInput::UNKNOWN },
+ { L"1.2/45", AutocompleteInput::UNKNOWN },
+ { L"ps/2 games", AutocompleteInput::UNKNOWN },
{ L"en.wikipedia.org/wiki/James Bond", AutocompleteInput::URL },
// In Chrome itself, mailto: will get handled by ShellExecute, but in
// unittest mode, we don't have the data loaded in the external protocol
@@ -233,7 +239,20 @@ TEST(AutocompleteTest, InputType) {
{ L"C:\\Program Files", AutocompleteInput::URL },
{ L"\\\\Server\\Folder\\File", AutocompleteInput::URL },
#endif // defined(OS_WIN)
- { L"http://foo.com/", AutocompleteInput::URL },
+ { L"http:foo", AutocompleteInput::URL },
+ { L"http://foo", AutocompleteInput::URL },
+ { L"http://foo.c", AutocompleteInput::URL },
+ { L"http://foo.com", AutocompleteInput::URL },
+ { L"http://-.com", AutocompleteInput::QUERY },
+ { L"http://foo.com:abc", AutocompleteInput::QUERY },
+ { L"http://foo.com:123456", AutocompleteInput::QUERY },
+ { L"http:user@foo.com", AutocompleteInput::URL },
+ { L"http://user@foo.com", AutocompleteInput::URL },
+ { L"http://user:pass@foo.com", AutocompleteInput::URL },
+ { L"http://1.2", AutocompleteInput::URL },
+ { L"http://1.2/45", AutocompleteInput::URL },
+ { L"http:ps/2 games", AutocompleteInput::URL },
+ { L"http://ps/2 games", AutocompleteInput::URL },
{ L"127.0.0.1", AutocompleteInput::URL },
{ L"127.0.1", AutocompleteInput::UNKNOWN },
{ L"127.0.1/", AutocompleteInput::UNKNOWN },