summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-10-13 20:37:25 +0000
committerpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-10-13 20:37:25 +0000
commitab237e4b349273aa73e9cb0b831e999cac4b980f (patch)
tree7dfd3c0fe46dda695017db61216016855c9f05db
parent3ce0dd658406a9bdf4db487bf9d0f1790dffee53 (diff)
downloadchromium_src-ab237e4b349273aa73e9cb0b831e999cac4b980f.zip
chromium_src-ab237e4b349273aa73e9cb0b831e999cac4b980f.tar.gz
chromium_src-ab237e4b349273aa73e9cb0b831e999cac4b980f.tar.bz2
Tweak omnibox parsing heuristics more:
* UNKNOWN inputs with at least one non-host component get displayed as "possible navigations" (by setting |have_what_you_typed_match| true in HistoryURLProvider::DoAutocomplete()). * Inputs with at least two non-host components (generally) get treated as URLs by AutocompleteInput::Parse(). Technically these could be searches but intranet URLs are much more likely. * Allow more cases to be REQUESTED_URL, such as "user@host" + ctrl. I'm not sure these were ever intentionally excluded from the ctrl-enter handling, and I don't see why they should be. Also use url_parse::ParsePort(), which didn't use to exist (I think?) to replace some code in AutocompleteInput::Parse(). BUG=99131 TEST=Covered by unittests Review URL: http://codereview.chromium.org/8258004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@105363 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/browser/autocomplete/autocomplete.cc87
-rw-r--r--chrome/browser/autocomplete/autocomplete.h3
-rw-r--r--chrome/browser/autocomplete/autocomplete_unittest.cc35
-rw-r--r--chrome/browser/autocomplete/history_url_provider.cc3
-rw-r--r--chrome/browser/autocomplete/history_url_provider_unittest.cc46
5 files changed, 109 insertions, 65 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc
index 3cfe87f..6dbb9a1 100644
--- a/chrome/browser/autocomplete/autocomplete.cc
+++ b/chrome/browser/autocomplete/autocomplete.cc
@@ -324,13 +324,9 @@ AutocompleteInput::Type AutocompleteInput::Parse(
// thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save
// handling legal port numbers until after the "IP address" determination
// below.
- if (parts->port.is_nonempty()) {
- int port;
- if (!base::StringToInt(text.substr(parts->port.begin, parts->port.len),
- &port) ||
- (port < 0) || (port > 65535))
- return QUERY;
- }
+ if (url_parse::ParsePort(text.c_str(), parts->port) ==
+ url_parse::PORT_INVALID)
+ return QUERY;
// Now that we've ruled out all schemes other than http or https and done a
// little more sanity checking, the presence of a scheme means this is likely
@@ -339,19 +335,21 @@ AutocompleteInput::Type AutocompleteInput::Parse(
return URL;
// See if the host is an IP address.
- if (host_info.family == url_canon::CanonHostInfo::IPV4) {
- // If the user originally typed a host that looks like an IP address (a
- // dotted quad), they probably want to open it. If the original input was
- // something else (like a single number), they probably wanted to search for
- // it, unless they explicitly typed a scheme. This is true even if the URL
- // appears to have a path: "1.2/45" is more likely a search (for the answer
- // to a math problem) than a URL.
- if (host_info.num_ipv4_components == 4)
- return URL;
- return desired_tld.empty() ? UNKNOWN : REQUESTED_URL;
- }
if (host_info.family == url_canon::CanonHostInfo::IPV6)
return URL;
+ // If the user originally typed a host that looks like an IP address (a
+ // dotted quad), they probably want to open it. If the original input was
+ // something else (like a single number), they probably wanted to search for
+ // it, unless they explicitly typed a scheme. This is true even if the URL
+ // appears to have a path: "1.2/45" is more likely a search (for the answer
+ // to a math problem) than a URL. However, if there are more non-host
+ // components, then maybe this really was intended to be a navigation. For
+ // this reason we only check the dotted-quad case here, and save the "other
+ // IP addresses" case for after we check the number of non-host components
+ // below.
+ if ((host_info.family == url_canon::CanonHostInfo::IPV4) &&
+ (host_info.num_ipv4_components == 4))
+ return URL;
// Presence of a password means this is likely a URL. Note that unless the
// user has typed an explicit "http://" or similar, we'll probably think that
@@ -364,27 +362,36 @@ AutocompleteInput::Type AutocompleteInput::Parse(
if (parts->path.len == 1)
return URL;
- // If we reach here with a username, but no port or path, our input looks like
- // "user@host". Because there is no scheme explicitly specified, we think
- // this is more likely an email address than an HTTP auth attempt. Hence, we
- // search by default and let users correct us on a case-by-case basis.
- if (parts->username.is_nonempty() && !parts->port.is_nonempty() &&
- !parts->path.is_nonempty())
- return UNKNOWN;
-
- // If the host has a known TLD, it's probably a URL. Also special-case
- // "localhost" as a known hostname.
- if ((registry_length != 0) || (host == ASCIIToUTF16("localhost")))
+ // If there is more than one recognized non-host component, this is likely to
+ // be a URL, even if the TLD is unknown (in which case this is likely an
+ // intranet URL).
+ if (NumNonHostComponents(*parts) > 1)
return URL;
+ // If the host has a known TLD, it's probably a URL, with the following
+ // exceptions:
+ // * Any "IP addresses" that make it here are more likely searches
+ // (see above).
+ // * If we reach here with a username, our input looks like "user@host[.tld]".
+ // Because there is no scheme explicitly specified, we think this is more
+ // likely an email address than an HTTP auth attempt. Hence, we search by
+ // default and let users correct us on a case-by-case basis.
+ // Note that we special-case "localhost" as a known hostname.
+ if ((host_info.family != url_canon::CanonHostInfo::IPV4) &&
+ ((registry_length != 0) || (host == ASCIIToUTF16("localhost"))))
+ return parts->username.is_nonempty() ? UNKNOWN : URL;
+
// If we reach this point, we know there's no known TLD on the input, so if
// the user wishes to add a desired_tld, the fixup code will oblige; thus this
// is a URL.
if (!desired_tld.empty())
return REQUESTED_URL;
- // No scheme, username, password, port, path, and no known TLD on the host.
+ // No scheme, password, port, path, and no known TLD on the host.
// This could be:
+ // * An "incomplete IP address"; likely a search (see above).
+ // * An email-like input like "user@host", where "host" has no known TLD.
+ // It's not clear what the user means here and searching seems reasonable.
// * A single word "foo"; possibly an intranet site, but more likely a search.
// This is ideally an UNKNOWN, and we can let the Alternate Nav URL code
// catch our mistakes.
@@ -456,6 +463,26 @@ string16 AutocompleteInput::FormattedStringWithEquivalentMeaning(
formatted_url : url_with_path;
}
+// static
+int AutocompleteInput::NumNonHostComponents(const url_parse::Parsed& parts) {
+ int num_nonhost_components = 0;
+ if (parts.scheme.is_nonempty())
+ ++num_nonhost_components;
+ if (parts.username.is_nonempty())
+ ++num_nonhost_components;
+ if (parts.password.is_nonempty())
+ ++num_nonhost_components;
+ if (parts.port.is_nonempty())
+ ++num_nonhost_components;
+ if (parts.path.is_nonempty())
+ ++num_nonhost_components;
+ if (parts.query.is_nonempty())
+ ++num_nonhost_components;
+ if (parts.ref.is_nonempty())
+ ++num_nonhost_components;
+ return num_nonhost_components;
+}
+
void AutocompleteInput::UpdateText(const string16& text,
const url_parse::Parsed& parts) {
text_ = text;
diff --git a/chrome/browser/autocomplete/autocomplete.h b/chrome/browser/autocomplete/autocomplete.h
index c481bed..02dc34a 100644
--- a/chrome/browser/autocomplete/autocomplete.h
+++ b/chrome/browser/autocomplete/autocomplete.h
@@ -264,6 +264,9 @@ class AutocompleteInput {
const GURL& url,
const string16& formatted_url);
+ // Returns the number of non-empty components in |parts| besides the host.
+ static int NumNonHostComponents(const url_parse::Parsed& parts);
+
// User-provided text to be completed.
const string16& text() const { return text_; }
diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc
index 8e683af..e5e95fe 100644
--- a/chrome/browser/autocomplete/autocomplete_unittest.cc
+++ b/chrome/browser/autocomplete/autocomplete_unittest.cc
@@ -291,6 +291,7 @@ TEST_F(AutocompleteTest, InputType) {
{ ASCIIToUTF16("foo-.com"), AutocompleteInput::UNKNOWN },
{ ASCIIToUTF16("foo.-com"), AutocompleteInput::QUERY },
{ ASCIIToUTF16("foo/bar"), AutocompleteInput::UNKNOWN },
+ { ASCIIToUTF16("foo.com/bar"), AutocompleteInput::URL },
{ ASCIIToUTF16("foo;bar"), AutocompleteInput::QUERY },
{ ASCIIToUTF16("foo/bar baz"), AutocompleteInput::UNKNOWN },
{ ASCIIToUTF16("foo bar.com"), AutocompleteInput::QUERY },
@@ -306,8 +307,8 @@ TEST_F(AutocompleteTest, InputType) {
{ ASCIIToUTF16("foo.com:abc"), AutocompleteInput::QUERY },
{ ASCIIToUTF16("1.2.3.4:abc"), AutocompleteInput::QUERY },
{ ASCIIToUTF16("user@foo.com"), AutocompleteInput::UNKNOWN },
- { ASCIIToUTF16("user@foo/z"), AutocompleteInput::UNKNOWN },
- { ASCIIToUTF16("user@foo/z z"), AutocompleteInput::UNKNOWN },
+ { ASCIIToUTF16("user@foo/z"), AutocompleteInput::URL },
+ { ASCIIToUTF16("user@foo/z z"), AutocompleteInput::URL },
{ ASCIIToUTF16("user@foo.com/z"), AutocompleteInput::URL },
{ ASCIIToUTF16("user:pass@"), AutocompleteInput::UNKNOWN },
{ ASCIIToUTF16("user:pass@!foo.com"), AutocompleteInput::UNKNOWN },
@@ -319,9 +320,13 @@ TEST_F(AutocompleteTest, InputType) {
{ ASCIIToUTF16("1.2"), AutocompleteInput::UNKNOWN },
{ ASCIIToUTF16("1.2/45"), AutocompleteInput::UNKNOWN },
{ ASCIIToUTF16("1.2:45"), AutocompleteInput::UNKNOWN },
- { ASCIIToUTF16("user@1.2:45"), AutocompleteInput::UNKNOWN },
- { ASCIIToUTF16("user@foo:45"), AutocompleteInput::UNKNOWN },
+ { ASCIIToUTF16("user@1.2:45"), AutocompleteInput::URL },
+ { ASCIIToUTF16("user@foo:45"), AutocompleteInput::URL },
{ ASCIIToUTF16("user:pass@1.2:45"), AutocompleteInput::URL },
+ { ASCIIToUTF16("host?query"), AutocompleteInput::UNKNOWN },
+ { ASCIIToUTF16("host#ref"), AutocompleteInput::UNKNOWN },
+ { ASCIIToUTF16("host/path?query"), AutocompleteInput::URL },
+ { ASCIIToUTF16("host/path#ref"), AutocompleteInput::URL },
{ ASCIIToUTF16("en.wikipedia.org/wiki/James Bond"),
AutocompleteInput::URL },
// In Chrome itself, mailto: will get handled by ShellExecute, but in
@@ -358,7 +363,7 @@ TEST_F(AutocompleteTest, InputType) {
{ ASCIIToUTF16("https://foo.com"), AutocompleteInput::URL },
{ ASCIIToUTF16("127.0.0.1"), AutocompleteInput::URL },
{ ASCIIToUTF16("127.0.1"), AutocompleteInput::UNKNOWN },
- { ASCIIToUTF16("127.0.1/"), AutocompleteInput::UNKNOWN },
+ { ASCIIToUTF16("127.0.1/"), AutocompleteInput::URL },
{ ASCIIToUTF16("browser.tabs.closeButtons"), AutocompleteInput::UNKNOWN },
{ WideToUTF16(L"\u6d4b\u8bd5"), AutocompleteInput::UNKNOWN },
{ ASCIIToUTF16("[2001:]"), AutocompleteInput::QUERY }, // Not a valid IP
@@ -384,14 +389,15 @@ TEST_F(AutocompleteTest, InputTypeWithDesiredTLD) {
} input_cases[] = {
{ ASCIIToUTF16("401k"), AutocompleteInput::REQUESTED_URL },
{ ASCIIToUTF16("999999999999999"), AutocompleteInput::REQUESTED_URL },
- { ASCIIToUTF16("x@y/z z"), AutocompleteInput::REQUESTED_URL },
+ { ASCIIToUTF16("x@y"), AutocompleteInput::REQUESTED_URL },
+ { ASCIIToUTF16("y/z z"), AutocompleteInput::REQUESTED_URL },
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(input_cases); ++i) {
+ SCOPED_TRACE(input_cases[i].input);
AutocompleteInput input(input_cases[i].input, ASCIIToUTF16("com"), true,
false, true, AutocompleteInput::ALL_MATCHES);
- EXPECT_EQ(input_cases[i].type, input.type()) << "Input: " <<
- input_cases[i].input;
+ EXPECT_EQ(input_cases[i].type, input.type());
}
}
@@ -463,6 +469,7 @@ TEST(AutocompleteInput, ParseForEmphasizeComponent) {
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(input_cases); ++i) {
+ SCOPED_TRACE(input_cases[i].input);
Component scheme, host;
AutocompleteInput::ParseForEmphasizeComponents(input_cases[i].input,
string16(),
@@ -470,14 +477,10 @@ TEST(AutocompleteInput, ParseForEmphasizeComponent) {
&host);
AutocompleteInput input(input_cases[i].input, string16(), true, false,
true, AutocompleteInput::ALL_MATCHES);
- EXPECT_EQ(input_cases[i].scheme.begin, scheme.begin) << "Input: " <<
- input_cases[i].input;
- EXPECT_EQ(input_cases[i].scheme.len, scheme.len) << "Input: " <<
- input_cases[i].input;
- EXPECT_EQ(input_cases[i].host.begin, host.begin) << "Input: " <<
- input_cases[i].input;
- EXPECT_EQ(input_cases[i].host.len, host.len) << "Input: " <<
- input_cases[i].input;
+ EXPECT_EQ(input_cases[i].scheme.begin, scheme.begin);
+ EXPECT_EQ(input_cases[i].scheme.len, scheme.len);
+ EXPECT_EQ(input_cases[i].host.begin, host.begin);
+ EXPECT_EQ(input_cases[i].host.len, host.len);
}
}
diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc
index 1851456..d644d66 100644
--- a/chrome/browser/autocomplete/history_url_provider.cc
+++ b/chrome/browser/autocomplete/history_url_provider.cc
@@ -381,8 +381,7 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend,
((params->input.type() != AutocompleteInput::UNKNOWN) ||
(classifier.type() == VisitClassifier::UNVISITED_INTRANET) ||
!params->trim_http ||
- url_util::FindAndCompareScheme(UTF16ToUTF8(params->input.text()),
- chrome::kHttpsScheme, NULL));
+ (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0));
AutocompleteMatch what_you_typed_match(SuggestExactInput(params->input,
params->trim_http));
diff --git a/chrome/browser/autocomplete/history_url_provider_unittest.cc b/chrome/browser/autocomplete/history_url_provider_unittest.cc
index 1b75a7f..1eab02c 100644
--- a/chrome/browser/autocomplete/history_url_provider_unittest.cc
+++ b/chrome/browser/autocomplete/history_url_provider_unittest.cc
@@ -445,7 +445,9 @@ TEST_F(HistoryURLProviderTest, Fixup) {
RunTest(ASCIIToUTF16("\\"), string16(), false, NULL, 0);
RunTest(ASCIIToUTF16("#"), string16(), false, NULL, 0);
RunTest(ASCIIToUTF16("%20"), string16(), false, NULL, 0);
- RunTest(WideToUTF16(L"\uff65@s"), string16(), false, NULL, 0);
+ const std::string fixup_crash[] = {"http://%EF%BD%A5@s/"};
+ RunTest(WideToUTF16(L"\uff65@s"), string16(), false, fixup_crash,
+ arraysize(fixup_crash));
RunTest(WideToUTF16(L"\u2015\u2015@ \uff7c"), string16(), false, NULL, 0);
// Fixing up "file:" should result in an inline autocomplete offset of just
@@ -548,30 +550,40 @@ TEST_F(HistoryURLProviderTest, DontAutocompleteOnTrailingWhitespace) {
TEST_F(HistoryURLProviderTest, TreatEmailsAsSearches) {
// Visiting foo.com should not make this string be treated as a navigation.
- RunTest(ASCIIToUTF16("user@foo.com"), string16(), false, NULL, 0);
+ // That means the result should be scored at 1200 ("what you typed") and not
+ // 1400+.
+ const std::string expected[] = {"http://user@foo.com/"};
+ ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("user@foo.com"), string16(),
+ false, expected, arraysize(expected)));
+ EXPECT_EQ(1200, matches_[0].relevance);
}
TEST_F(HistoryURLProviderTest, IntranetURLsWithPaths) {
struct TestCase {
const char* input;
- bool has_output;
+ int relevance;
} test_cases[] = {
- { "fooey", false },
- { "fooey/", true },
- { "fooey/a", false },
- { "fooey/a b", false },
- { "gooey", true },
- { "gooey/", true },
- { "gooey/a", true },
- { "gooey/a b", true },
+ { "fooey", 0 },
+ { "fooey/", 1200 }, // 1200 for URL would still navigate by default.
+ { "fooey/a", 1200 }, // 1200 for UNKNOWN would not.
+ { "fooey/a b", 1200 }, // Also UNKNOWN.
+ { "gooey", 1410 },
+ { "gooey/", 1410 },
+ { "gooey/a", 1400 },
+ { "gooey/a b", 1400 },
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
- const std::string output[1] = {
- URLFixerUpper::FixupURL(test_cases[i].input, std::string()).spec()
- };
- RunTest(ASCIIToUTF16(test_cases[i].input), string16(), false,
- test_cases[i].has_output ? output : NULL,
- test_cases[i].has_output ? 1 : 0);
+ SCOPED_TRACE(test_cases[i].input);
+ if (test_cases[i].relevance == 0) {
+ RunTest(ASCIIToUTF16(test_cases[i].input), string16(), false, NULL, 0);
+ } else {
+ const std::string output[] = {
+ URLFixerUpper::FixupURL(test_cases[i].input, std::string()).spec()
+ };
+ ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16(test_cases[i].input),
+ string16(), false, output, arraysize(output)));
+ EXPECT_EQ(test_cases[i].relevance, matches_[0].relevance);
+ }
}
}