diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-13 20:37:25 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-13 20:37:25 +0000 |
commit | ab237e4b349273aa73e9cb0b831e999cac4b980f (patch) | |
tree | 7dfd3c0fe46dda695017db61216016855c9f05db /chrome/browser/autocomplete | |
parent | 3ce0dd658406a9bdf4db487bf9d0f1790dffee53 (diff) | |
download | chromium_src-ab237e4b349273aa73e9cb0b831e999cac4b980f.zip chromium_src-ab237e4b349273aa73e9cb0b831e999cac4b980f.tar.gz chromium_src-ab237e4b349273aa73e9cb0b831e999cac4b980f.tar.bz2 |
Tweak omnibox parsing heuristics more:
* UNKNOWN inputs with at least one non-host component get displayed as "possible navigations" (by setting |have_what_you_typed_match| true in HistoryURLProvider::DoAutocomplete()).
* Inputs with at least two non-host components (generally) get treated as URLs by AutocompleteInput::Parse(). Technically these could be searches but intranet URLs are much more likely.
* Allow more cases to be REQUESTED_URL, such as "user@host" + ctrl. I'm not sure these were ever intentionally excluded from the ctrl-enter handling, and I don't see why they should be.
Also use url_parse::ParsePort(), which didn't use to exist (I think?) to replace some code in AutocompleteInput::Parse().
BUG=99131
TEST=Covered by unittests
Review URL: http://codereview.chromium.org/8258004
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@105363 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/autocomplete')
-rw-r--r-- | chrome/browser/autocomplete/autocomplete.cc | 87 | ||||
-rw-r--r-- | chrome/browser/autocomplete/autocomplete.h | 3 | ||||
-rw-r--r-- | chrome/browser/autocomplete/autocomplete_unittest.cc | 35 | ||||
-rw-r--r-- | chrome/browser/autocomplete/history_url_provider.cc | 3 | ||||
-rw-r--r-- | chrome/browser/autocomplete/history_url_provider_unittest.cc | 46 |
5 files changed, 109 insertions, 65 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc index 3cfe87f..6dbb9a1 100644 --- a/chrome/browser/autocomplete/autocomplete.cc +++ b/chrome/browser/autocomplete/autocomplete.cc @@ -324,13 +324,9 @@ AutocompleteInput::Type AutocompleteInput::Parse( // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save // handling legal port numbers until after the "IP address" determination // below. - if (parts->port.is_nonempty()) { - int port; - if (!base::StringToInt(text.substr(parts->port.begin, parts->port.len), - &port) || - (port < 0) || (port > 65535)) - return QUERY; - } + if (url_parse::ParsePort(text.c_str(), parts->port) == + url_parse::PORT_INVALID) + return QUERY; // Now that we've ruled out all schemes other than http or https and done a // little more sanity checking, the presence of a scheme means this is likely @@ -339,19 +335,21 @@ AutocompleteInput::Type AutocompleteInput::Parse( return URL; // See if the host is an IP address. - if (host_info.family == url_canon::CanonHostInfo::IPV4) { - // If the user originally typed a host that looks like an IP address (a - // dotted quad), they probably want to open it. If the original input was - // something else (like a single number), they probably wanted to search for - // it, unless they explicitly typed a scheme. This is true even if the URL - // appears to have a path: "1.2/45" is more likely a search (for the answer - // to a math problem) than a URL. - if (host_info.num_ipv4_components == 4) - return URL; - return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; - } if (host_info.family == url_canon::CanonHostInfo::IPV6) return URL; + // If the user originally typed a host that looks like an IP address (a + // dotted quad), they probably want to open it. If the original input was + // something else (like a single number), they probably wanted to search for + // it, unless they explicitly typed a scheme. This is true even if the URL + // appears to have a path: "1.2/45" is more likely a search (for the answer + // to a math problem) than a URL. However, if there are more non-host + // components, then maybe this really was intended to be a navigation. For + // this reason we only check the dotted-quad case here, and save the "other + // IP addresses" case for after we check the number of non-host components + // below. + if ((host_info.family == url_canon::CanonHostInfo::IPV4) && + (host_info.num_ipv4_components == 4)) + return URL; // Presence of a password means this is likely a URL. Note that unless the // user has typed an explicit "http://" or similar, we'll probably think that @@ -364,27 +362,36 @@ AutocompleteInput::Type AutocompleteInput::Parse( if (parts->path.len == 1) return URL; - // If we reach here with a username, but no port or path, our input looks like - // "user@host". Because there is no scheme explicitly specified, we think - // this is more likely an email address than an HTTP auth attempt. Hence, we - // search by default and let users correct us on a case-by-case basis. - if (parts->username.is_nonempty() && !parts->port.is_nonempty() && - !parts->path.is_nonempty()) - return UNKNOWN; - - // If the host has a known TLD, it's probably a URL. Also special-case - // "localhost" as a known hostname. - if ((registry_length != 0) || (host == ASCIIToUTF16("localhost"))) + // If there is more than one recognized non-host component, this is likely to + // be a URL, even if the TLD is unknown (in which case this is likely an + // intranet URL). + if (NumNonHostComponents(*parts) > 1) return URL; + // If the host has a known TLD, it's probably a URL, with the following + // exceptions: + // * Any "IP addresses" that make it here are more likely searches + // (see above). + // * If we reach here with a username, our input looks like "user@host[.tld]". + // Because there is no scheme explicitly specified, we think this is more + // likely an email address than an HTTP auth attempt. Hence, we search by + // default and let users correct us on a case-by-case basis. + // Note that we special-case "localhost" as a known hostname. + if ((host_info.family != url_canon::CanonHostInfo::IPV4) && + ((registry_length != 0) || (host == ASCIIToUTF16("localhost")))) + return parts->username.is_nonempty() ? UNKNOWN : URL; + // If we reach this point, we know there's no known TLD on the input, so if // the user wishes to add a desired_tld, the fixup code will oblige; thus this // is a URL. if (!desired_tld.empty()) return REQUESTED_URL; - // No scheme, username, password, port, path, and no known TLD on the host. + // No scheme, password, port, path, and no known TLD on the host. // This could be: + // * An "incomplete IP address"; likely a search (see above). + // * An email-like input like "user@host", where "host" has no known TLD. + // It's not clear what the user means here and searching seems reasonable. // * A single word "foo"; possibly an intranet site, but more likely a search. // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code // catch our mistakes. @@ -456,6 +463,26 @@ string16 AutocompleteInput::FormattedStringWithEquivalentMeaning( formatted_url : url_with_path; } +// static +int AutocompleteInput::NumNonHostComponents(const url_parse::Parsed& parts) { + int num_nonhost_components = 0; + if (parts.scheme.is_nonempty()) + ++num_nonhost_components; + if (parts.username.is_nonempty()) + ++num_nonhost_components; + if (parts.password.is_nonempty()) + ++num_nonhost_components; + if (parts.port.is_nonempty()) + ++num_nonhost_components; + if (parts.path.is_nonempty()) + ++num_nonhost_components; + if (parts.query.is_nonempty()) + ++num_nonhost_components; + if (parts.ref.is_nonempty()) + ++num_nonhost_components; + return num_nonhost_components; +} + void AutocompleteInput::UpdateText(const string16& text, const url_parse::Parsed& parts) { text_ = text; diff --git a/chrome/browser/autocomplete/autocomplete.h b/chrome/browser/autocomplete/autocomplete.h index c481bed..02dc34a 100644 --- a/chrome/browser/autocomplete/autocomplete.h +++ b/chrome/browser/autocomplete/autocomplete.h @@ -264,6 +264,9 @@ class AutocompleteInput { const GURL& url, const string16& formatted_url); + // Returns the number of non-empty components in |parts| besides the host. + static int NumNonHostComponents(const url_parse::Parsed& parts); + // User-provided text to be completed. const string16& text() const { return text_; } diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc index 8e683af..e5e95fe 100644 --- a/chrome/browser/autocomplete/autocomplete_unittest.cc +++ b/chrome/browser/autocomplete/autocomplete_unittest.cc @@ -291,6 +291,7 @@ TEST_F(AutocompleteTest, InputType) { { ASCIIToUTF16("foo-.com"), AutocompleteInput::UNKNOWN }, { ASCIIToUTF16("foo.-com"), AutocompleteInput::QUERY }, { ASCIIToUTF16("foo/bar"), AutocompleteInput::UNKNOWN }, + { ASCIIToUTF16("foo.com/bar"), AutocompleteInput::URL }, { ASCIIToUTF16("foo;bar"), AutocompleteInput::QUERY }, { ASCIIToUTF16("foo/bar baz"), AutocompleteInput::UNKNOWN }, { ASCIIToUTF16("foo bar.com"), AutocompleteInput::QUERY }, @@ -306,8 +307,8 @@ TEST_F(AutocompleteTest, InputType) { { ASCIIToUTF16("foo.com:abc"), AutocompleteInput::QUERY }, { ASCIIToUTF16("1.2.3.4:abc"), AutocompleteInput::QUERY }, { ASCIIToUTF16("user@foo.com"), AutocompleteInput::UNKNOWN }, - { ASCIIToUTF16("user@foo/z"), AutocompleteInput::UNKNOWN }, - { ASCIIToUTF16("user@foo/z z"), AutocompleteInput::UNKNOWN }, + { ASCIIToUTF16("user@foo/z"), AutocompleteInput::URL }, + { ASCIIToUTF16("user@foo/z z"), AutocompleteInput::URL }, { ASCIIToUTF16("user@foo.com/z"), AutocompleteInput::URL }, { ASCIIToUTF16("user:pass@"), AutocompleteInput::UNKNOWN }, { ASCIIToUTF16("user:pass@!foo.com"), AutocompleteInput::UNKNOWN }, @@ -319,9 +320,13 @@ TEST_F(AutocompleteTest, InputType) { { ASCIIToUTF16("1.2"), AutocompleteInput::UNKNOWN }, { ASCIIToUTF16("1.2/45"), AutocompleteInput::UNKNOWN }, { ASCIIToUTF16("1.2:45"), AutocompleteInput::UNKNOWN }, - { ASCIIToUTF16("user@1.2:45"), AutocompleteInput::UNKNOWN }, - { ASCIIToUTF16("user@foo:45"), AutocompleteInput::UNKNOWN }, + { ASCIIToUTF16("user@1.2:45"), AutocompleteInput::URL }, + { ASCIIToUTF16("user@foo:45"), AutocompleteInput::URL }, { ASCIIToUTF16("user:pass@1.2:45"), AutocompleteInput::URL }, + { ASCIIToUTF16("host?query"), AutocompleteInput::UNKNOWN }, + { ASCIIToUTF16("host#ref"), AutocompleteInput::UNKNOWN }, + { ASCIIToUTF16("host/path?query"), AutocompleteInput::URL }, + { ASCIIToUTF16("host/path#ref"), AutocompleteInput::URL }, { ASCIIToUTF16("en.wikipedia.org/wiki/James Bond"), AutocompleteInput::URL }, // In Chrome itself, mailto: will get handled by ShellExecute, but in @@ -358,7 +363,7 @@ TEST_F(AutocompleteTest, InputType) { { ASCIIToUTF16("https://foo.com"), AutocompleteInput::URL }, { ASCIIToUTF16("127.0.0.1"), AutocompleteInput::URL }, { ASCIIToUTF16("127.0.1"), AutocompleteInput::UNKNOWN }, - { ASCIIToUTF16("127.0.1/"), AutocompleteInput::UNKNOWN }, + { ASCIIToUTF16("127.0.1/"), AutocompleteInput::URL }, { ASCIIToUTF16("browser.tabs.closeButtons"), AutocompleteInput::UNKNOWN }, { WideToUTF16(L"\u6d4b\u8bd5"), AutocompleteInput::UNKNOWN }, { ASCIIToUTF16("[2001:]"), AutocompleteInput::QUERY }, // Not a valid IP @@ -384,14 +389,15 @@ TEST_F(AutocompleteTest, InputTypeWithDesiredTLD) { } input_cases[] = { { ASCIIToUTF16("401k"), AutocompleteInput::REQUESTED_URL }, { ASCIIToUTF16("999999999999999"), AutocompleteInput::REQUESTED_URL }, - { ASCIIToUTF16("x@y/z z"), AutocompleteInput::REQUESTED_URL }, + { ASCIIToUTF16("x@y"), AutocompleteInput::REQUESTED_URL }, + { ASCIIToUTF16("y/z z"), AutocompleteInput::REQUESTED_URL }, }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(input_cases); ++i) { + SCOPED_TRACE(input_cases[i].input); AutocompleteInput input(input_cases[i].input, ASCIIToUTF16("com"), true, false, true, AutocompleteInput::ALL_MATCHES); - EXPECT_EQ(input_cases[i].type, input.type()) << "Input: " << - input_cases[i].input; + EXPECT_EQ(input_cases[i].type, input.type()); } } @@ -463,6 +469,7 @@ TEST(AutocompleteInput, ParseForEmphasizeComponent) { }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(input_cases); ++i) { + SCOPED_TRACE(input_cases[i].input); Component scheme, host; AutocompleteInput::ParseForEmphasizeComponents(input_cases[i].input, string16(), @@ -470,14 +477,10 @@ TEST(AutocompleteInput, ParseForEmphasizeComponent) { &host); AutocompleteInput input(input_cases[i].input, string16(), true, false, true, AutocompleteInput::ALL_MATCHES); - EXPECT_EQ(input_cases[i].scheme.begin, scheme.begin) << "Input: " << - input_cases[i].input; - EXPECT_EQ(input_cases[i].scheme.len, scheme.len) << "Input: " << - input_cases[i].input; - EXPECT_EQ(input_cases[i].host.begin, host.begin) << "Input: " << - input_cases[i].input; - EXPECT_EQ(input_cases[i].host.len, host.len) << "Input: " << - input_cases[i].input; + EXPECT_EQ(input_cases[i].scheme.begin, scheme.begin); + EXPECT_EQ(input_cases[i].scheme.len, scheme.len); + EXPECT_EQ(input_cases[i].host.begin, host.begin); + EXPECT_EQ(input_cases[i].host.len, host.len); } } diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc index 1851456..d644d66 100644 --- a/chrome/browser/autocomplete/history_url_provider.cc +++ b/chrome/browser/autocomplete/history_url_provider.cc @@ -381,8 +381,7 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, ((params->input.type() != AutocompleteInput::UNKNOWN) || (classifier.type() == VisitClassifier::UNVISITED_INTRANET) || !params->trim_http || - url_util::FindAndCompareScheme(UTF16ToUTF8(params->input.text()), - chrome::kHttpsScheme, NULL)); + (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0)); AutocompleteMatch what_you_typed_match(SuggestExactInput(params->input, params->trim_http)); diff --git a/chrome/browser/autocomplete/history_url_provider_unittest.cc b/chrome/browser/autocomplete/history_url_provider_unittest.cc index 1b75a7f..1eab02c 100644 --- a/chrome/browser/autocomplete/history_url_provider_unittest.cc +++ b/chrome/browser/autocomplete/history_url_provider_unittest.cc @@ -445,7 +445,9 @@ TEST_F(HistoryURLProviderTest, Fixup) { RunTest(ASCIIToUTF16("\\"), string16(), false, NULL, 0); RunTest(ASCIIToUTF16("#"), string16(), false, NULL, 0); RunTest(ASCIIToUTF16("%20"), string16(), false, NULL, 0); - RunTest(WideToUTF16(L"\uff65@s"), string16(), false, NULL, 0); + const std::string fixup_crash[] = {"http://%EF%BD%A5@s/"}; + RunTest(WideToUTF16(L"\uff65@s"), string16(), false, fixup_crash, + arraysize(fixup_crash)); RunTest(WideToUTF16(L"\u2015\u2015@ \uff7c"), string16(), false, NULL, 0); // Fixing up "file:" should result in an inline autocomplete offset of just @@ -548,30 +550,40 @@ TEST_F(HistoryURLProviderTest, DontAutocompleteOnTrailingWhitespace) { TEST_F(HistoryURLProviderTest, TreatEmailsAsSearches) { // Visiting foo.com should not make this string be treated as a navigation. - RunTest(ASCIIToUTF16("user@foo.com"), string16(), false, NULL, 0); + // That means the result should be scored at 1200 ("what you typed") and not + // 1400+. + const std::string expected[] = {"http://user@foo.com/"}; + ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("user@foo.com"), string16(), + false, expected, arraysize(expected))); + EXPECT_EQ(1200, matches_[0].relevance); } TEST_F(HistoryURLProviderTest, IntranetURLsWithPaths) { struct TestCase { const char* input; - bool has_output; + int relevance; } test_cases[] = { - { "fooey", false }, - { "fooey/", true }, - { "fooey/a", false }, - { "fooey/a b", false }, - { "gooey", true }, - { "gooey/", true }, - { "gooey/a", true }, - { "gooey/a b", true }, + { "fooey", 0 }, + { "fooey/", 1200 }, // 1200 for URL would still navigate by default. + { "fooey/a", 1200 }, // 1200 for UNKNOWN would not. + { "fooey/a b", 1200 }, // Also UNKNOWN. + { "gooey", 1410 }, + { "gooey/", 1410 }, + { "gooey/a", 1400 }, + { "gooey/a b", 1400 }, }; for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { - const std::string output[1] = { - URLFixerUpper::FixupURL(test_cases[i].input, std::string()).spec() - }; - RunTest(ASCIIToUTF16(test_cases[i].input), string16(), false, - test_cases[i].has_output ? output : NULL, - test_cases[i].has_output ? 1 : 0); + SCOPED_TRACE(test_cases[i].input); + if (test_cases[i].relevance == 0) { + RunTest(ASCIIToUTF16(test_cases[i].input), string16(), false, NULL, 0); + } else { + const std::string output[] = { + URLFixerUpper::FixupURL(test_cases[i].input, std::string()).spec() + }; + ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16(test_cases[i].input), + string16(), false, output, arraysize(output))); + EXPECT_EQ(test_cases[i].relevance, matches_[0].relevance); + } } } |