diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-19 20:37:48 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-19 20:37:48 +0000 |
commit | 258e8970224599686f320fe75044e1043ef87222 (patch) | |
tree | aafe9c28f36d2260842f6631c6f411b70ddb5794 /chrome/browser/autocomplete | |
parent | 6f509ac2faf29c7fbe3b57c0f2bf8f3cb4d25ca0 (diff) | |
download | chromium_src-258e8970224599686f320fe75044e1043ef87222.zip chromium_src-258e8970224599686f320fe75044e1043ef87222.tar.gz chromium_src-258e8970224599686f320fe75044e1043ef87222.tar.bz2 |
Parse input with explicit schemes better. Before, if the user typed "http://..." we always parsed as a URL. Now we parse more like we would without the scheme, so that we can reject various kinds of invalid inputs.
BUG=none
TEST=Input "http://foo:bar" (without quotes) in the address bar, and the default action should be search, not navigate.
Review URL: http://codereview.chromium.org/292003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@29450 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/autocomplete')
-rw-r--r-- | chrome/browser/autocomplete/autocomplete.cc | 64 | ||||
-rw-r--r-- | chrome/browser/autocomplete/autocomplete_unittest.cc | 21 |
2 files changed, 59 insertions, 26 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc index 492ce3c..7db2966 100644 --- a/chrome/browser/autocomplete/autocomplete.cc +++ b/chrome/browser/autocomplete/autocomplete.cc @@ -114,8 +114,14 @@ AutocompleteInput::Type AutocompleteInput::Parse( return URL; } - // If the user typed a scheme, determine our available actions based on that. - if (parts->scheme.is_valid()) { + // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it + // well enough that we can fall through to the heuristics below. If it's + // something else, we can just determine our action based on what we do with + // any input of this scheme. In theory we could do better with some schemes + // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that + // until I run into some cases that really need it. + if (parts->scheme.is_nonempty() && + (parsed_scheme != L"http") && (parsed_scheme != L"https")) { // See if we know how to handle the URL internally. if (URLRequest::IsHandledProtocol(WideToASCII(parsed_scheme))) return URL; @@ -153,15 +159,16 @@ AutocompleteInput::Type AutocompleteInput::Parse( } } - // The user didn't type a scheme. Assume that this is either an HTTP URL or - // not a URL at all; try to determine which. + // Either the user didn't type a scheme, in which case we need to distinguish + // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which + // case we should reject invalid formulations. - // It's not clear that we can reach here with an empty "host" (maybe on some - // kinds of garbage input?), but if we did, it couldn't be a URL. + // If we have an empty host it can't be a URL. if (!parts->host.is_nonempty()) return QUERY; - // (We use the registry length later below but ask for it here so we can check - // the host's validity at this point.) + + // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also + // use the registry length later below.) const std::wstring host(text.substr(parts->host.begin, parts->host.len)); const size_t registry_length = net::RegistryControlledDomainService::GetRegistryLength(host, false); @@ -187,10 +194,16 @@ AutocompleteInput::Type AutocompleteInput::Parse( (port >= 0) && (port <= 65535)) ? URL : QUERY; } - // Presence of a password means this is likely a URL. We don't treat - // usernames (without passwords) as indicating a URL, because this could be an - // email address like "user@mail.com" which is more likely a search than an - // HTTP auth login attempt. + // Presence of a username could either indicate a URL or an email address + // ("user@mail.com"). E-mail addresses are likely queries so we only open + // this as a URL if the user explicitly typed a scheme. + if (parts->username.is_nonempty() && parts->scheme.is_nonempty()) + return URL; + + // Presence of a password means this is likely a URL. Note that unless the + // user has typed an explicit "http://" or similar, we'll probably think that + // the username is some unknown scheme, and bail out in the scheme-handling + // code above. if (parts->password.is_nonempty()) return URL; @@ -199,9 +212,10 @@ AutocompleteInput::Type AutocompleteInput::Parse( // If the user originally typed a host that looks like an IP address (a // dotted quad), they probably want to open it. If the original input was // something else (like a single number), they probably wanted to search for - // it. This is true even if the URL appears to have a path: "1.2/45" is - // more likely a search (for the answer to a math problem) than a URL. - if (host_info.num_ipv4_components == 4) + // it, unless they explicitly typed a scheme. This is true even if the URL + // appears to have a path: "1.2/45" is more likely a search (for the answer + // to a math problem) than a URL. + if ((host_info.num_ipv4_components == 4) || parts->scheme.is_nonempty()) return URL; return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; } @@ -211,13 +225,13 @@ AutocompleteInput::Type AutocompleteInput::Parse( // The host doesn't look like a number, so see if the user's given us a path. if (parts->path.is_nonempty()) { // Most inputs with paths are URLs, even ones without known registries (e.g. - // intranet URLs). However, if there's no known registry, and the path has - // a space, this is more likely a query with a slash in the first term (e.g. - // "ps/2 games") than a URL. We can still open URLs with spaces in the path - // by escaping the space, and we will still inline autocomplete them if - // users have typed them in the past, but we default to searching since - // that's the common case. - return ((registry_length == 0) && + // intranet URLs). However, if the user didn't type a scheme, there's no + // known registry, and the path has a space, this is more likely a query + // with a slash in the first term (e.g. "ps/2 games") than a URL. We can + // still open URLs with spaces in the path by escaping the space, and we + // will still inline autocomplete them if users have typed them in the past, + // but we default to searching since that's the common case. + return (!parts->scheme.is_nonempty() && (registry_length == 0) && (text.substr(parts->path.begin, parts->path.len).find(' ') != std::wstring::npos)) ? UNKNOWN : URL; } @@ -228,9 +242,9 @@ AutocompleteInput::Type AutocompleteInput::Parse( if (parts->username.is_nonempty()) return UNKNOWN; - // We have a bare host string. See if it has a known TLD. If so, it's - // probably a URL. - if (registry_length != 0) + // We have a bare host string. See if it has a known TLD or the user typed a + // scheme. If so, it's probably a URL. + if (parts->scheme.is_nonempty() || (registry_length != 0)) return URL; // No TLD that we know about. This could be: diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc index 4ab6ea4..572beb5 100644 --- a/chrome/browser/autocomplete/autocomplete_unittest.cc +++ b/chrome/browser/autocomplete/autocomplete_unittest.cc @@ -210,6 +210,7 @@ TEST(AutocompleteTest, InputType) { { L"?foo bar", AutocompleteInput::FORCED_QUERY }, { L"?http://foo.com/bar", AutocompleteInput::FORCED_QUERY }, { L"foo", AutocompleteInput::UNKNOWN }, + { L"foo.c", AutocompleteInput::UNKNOWN }, { L"foo.com", AutocompleteInput::URL }, { L"-.com", AutocompleteInput::QUERY }, { L"foo/bar", AutocompleteInput::URL }, @@ -222,6 +223,11 @@ TEST(AutocompleteTest, InputType) { { L"localhost:8080", AutocompleteInput::URL }, { L"foo.com:123456", AutocompleteInput::QUERY }, { L"foo.com:abc", AutocompleteInput::QUERY }, + { L"user@foo.com", AutocompleteInput::UNKNOWN }, + { L"user:pass@foo.com", AutocompleteInput::UNKNOWN }, + { L"1.2", AutocompleteInput::UNKNOWN }, + { L"1.2/45", AutocompleteInput::UNKNOWN }, + { L"ps/2 games", AutocompleteInput::UNKNOWN }, { L"en.wikipedia.org/wiki/James Bond", AutocompleteInput::URL }, // In Chrome itself, mailto: will get handled by ShellExecute, but in // unittest mode, we don't have the data loaded in the external protocol @@ -233,7 +239,20 @@ TEST(AutocompleteTest, InputType) { { L"C:\\Program Files", AutocompleteInput::URL }, { L"\\\\Server\\Folder\\File", AutocompleteInput::URL }, #endif // defined(OS_WIN) - { L"http://foo.com/", AutocompleteInput::URL }, + { L"http:foo", AutocompleteInput::URL }, + { L"http://foo", AutocompleteInput::URL }, + { L"http://foo.c", AutocompleteInput::URL }, + { L"http://foo.com", AutocompleteInput::URL }, + { L"http://-.com", AutocompleteInput::QUERY }, + { L"http://foo.com:abc", AutocompleteInput::QUERY }, + { L"http://foo.com:123456", AutocompleteInput::QUERY }, + { L"http:user@foo.com", AutocompleteInput::URL }, + { L"http://user@foo.com", AutocompleteInput::URL }, + { L"http://user:pass@foo.com", AutocompleteInput::URL }, + { L"http://1.2", AutocompleteInput::URL }, + { L"http://1.2/45", AutocompleteInput::URL }, + { L"http:ps/2 games", AutocompleteInput::URL }, + { L"http://ps/2 games", AutocompleteInput::URL }, { L"127.0.0.1", AutocompleteInput::URL }, { L"127.0.1", AutocompleteInput::UNKNOWN }, { L"127.0.1/", AutocompleteInput::UNKNOWN }, |