summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--chrome/browser/autocomplete/autocomplete.cc38
-rw-r--r--chrome/browser/autocomplete/autocomplete_unittest.cc4
-rw-r--r--net/base/net_util.cc48
-rw-r--r--net/base/net_util.h11
-rw-r--r--net/base/net_util_unittest.cc31
5 files changed, 117 insertions, 15 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc
index 45b9285..492ce3c 100644
--- a/chrome/browser/autocomplete/autocomplete.cc
+++ b/chrome/browser/autocomplete/autocomplete.cc
@@ -168,22 +168,33 @@ AutocompleteInput::Type AutocompleteInput::Parse(
if (registry_length == std::wstring::npos)
return QUERY; // Could be a broken IP address, etc.
- // A space in the "host" means this is a query. (Technically, IE and GURL
- // allow hostnames with spaces for wierd intranet machines, but it's supposed
- // to be illegal and I'm not worried about users trying to type these in.)
- if (host.find(' ') != std::wstring::npos)
+ // See if the hostname is valid per RFC 1738. While IE and GURL allow
+ // hostnames to contain many other characters (perhaps for weird intranet
+ // machines), it's extremely unlikely that a user would be trying to type
+ // those in for anything other than a search query.
+ url_canon::CanonHostInfo host_info;
+ const std::string canonicalized_host(net::CanonicalizeHost(host, &host_info));
+ if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) &&
+ !net::IsCanonicalizedHostRFC1738Compliant(canonicalized_host))
return QUERY;
- // Presence of a password/port mean this is almost certainly a URL. We don't
- // treat usernames (without passwords) as indicating a URL, because this could
- // be an email address like "user@mail.com" which is more likely a search than
- // an HTTP auth login attempt.
- if (parts->password.is_nonempty() || parts->port.is_nonempty())
+ // Presence of a port means this is likely a URL, if the port is really a port
+ // number. If it's just garbage after a colon, this is a query.
+ if (parts->port.is_nonempty()) {
+ int port;
+ return (StringToInt(WideToUTF16(
+ text.substr(parts->port.begin, parts->port.len)), &port) &&
+ (port >= 0) && (port <= 65535)) ? URL : QUERY;
+ }
+
+ // Presence of a password means this is likely a URL. We don't treat
+ // usernames (without passwords) as indicating a URL, because this could be an
+ // email address like "user@mail.com" which is more likely a search than an
+ // HTTP auth login attempt.
+ if (parts->password.is_nonempty())
return URL;
// See if the host is an IP address.
- url_canon::CanonHostInfo host_info;
- net::CanonicalizeHost(host, &host_info);
if (host_info.family == url_canon::CanonHostInfo::IPV4) {
// If the user originally typed a host that looks like an IP address (a
// dotted quad), they probably want to open it. If the original input was
@@ -194,11 +205,8 @@ AutocompleteInput::Type AutocompleteInput::Parse(
return URL;
return desired_tld.empty() ? UNKNOWN : REQUESTED_URL;
}
-
- if (host_info.family == url_canon::CanonHostInfo::IPV6) {
- // If the user typed a valid bracketed IPv6 address, treat it as a URL.
+ if (host_info.family == url_canon::CanonHostInfo::IPV6)
return URL;
- }
// The host doesn't look like a number, so see if the user's given us a path.
if (parts->path.is_nonempty()) {
diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc
index 40e6ecf..4ab6ea4 100644
--- a/chrome/browser/autocomplete/autocomplete_unittest.cc
+++ b/chrome/browser/autocomplete/autocomplete_unittest.cc
@@ -211,13 +211,17 @@ TEST(AutocompleteTest, InputType) {
{ L"?http://foo.com/bar", AutocompleteInput::FORCED_QUERY },
{ L"foo", AutocompleteInput::UNKNOWN },
{ L"foo.com", AutocompleteInput::URL },
+ { L"-.com", AutocompleteInput::QUERY },
{ L"foo/bar", AutocompleteInput::URL },
{ L"foo/bar baz", AutocompleteInput::UNKNOWN },
{ L"http://foo/bar baz", AutocompleteInput::URL },
{ L"foo bar", AutocompleteInput::QUERY },
+ { L"\"foo:bar\"", AutocompleteInput::QUERY },
{ L"link:foo.com", AutocompleteInput::UNKNOWN },
{ L"www.foo.com:81", AutocompleteInput::URL },
{ L"localhost:8080", AutocompleteInput::URL },
+ { L"foo.com:123456", AutocompleteInput::QUERY },
+ { L"foo.com:abc", AutocompleteInput::QUERY },
{ L"en.wikipedia.org/wiki/James Bond", AutocompleteInput::URL },
// In Chrome itself, mailto: will get handled by ShellExecute, but in
// unittest mode, we don't have the data loaded in the external protocol
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 1d7d558..05f5841 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -939,6 +939,54 @@ std::string GetDirectoryListingHeader(const string16& title) {
return result;
}
+inline bool IsHostCharAlpha(char c) {
+ // We can just check lowercase because uppercase characters have already been
+ // normalized.
+ return (c >= 'a') && (c <= 'z');
+}
+
+inline bool IsHostCharDigit(char c) {
+ return (c >= '0') && (c <= '9');
+}
+
+bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) {
+ if (host.empty())
+ return false;
+
+ enum State {
+ NOT_IN_COMPONENT,
+ IN_COMPONENT_STARTED_DIGIT,
+ IN_COMPONENT_STARTED_ALPHA
+ } state = NOT_IN_COMPONENT;
+ bool last_char_was_hyphen = false;
+
+ for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) {
+ const char c = *i;
+ if (state == NOT_IN_COMPONENT) {
+ if (IsHostCharDigit(c))
+ state = IN_COMPONENT_STARTED_DIGIT;
+ else if (IsHostCharAlpha(c))
+ state = IN_COMPONENT_STARTED_ALPHA;
+ else
+ return false;
+ } else {
+ if (c == '.') {
+ if (last_char_was_hyphen)
+ return false;
+ state = NOT_IN_COMPONENT;
+ } else if (IsHostCharAlpha(c) || IsHostCharDigit(c)) {
+ last_char_was_hyphen = false;
+ } else if (c == '-') {
+ last_char_was_hyphen = true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ return state == IN_COMPONENT_STARTED_ALPHA;
+}
+
std::string GetDirectoryListingEntry(const string16& name,
const std::string& raw_bytes,
bool is_dir,
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 4d7e0aa..302a55f 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -158,6 +158,17 @@ std::string CanonicalizeHost(const std::string& host,
std::string CanonicalizeHost(const std::wstring& host,
url_canon::CanonHostInfo* host_info);
+// Returns true if |host| is RFC 1738-compliant (and not an IP address). The
+// rules are:
+// * One or more components separated by '.'
+// * Each component begins and ends with an alphanumeric character
+// * Each component contains only alphanumeric characters and '-'
+// * The last component does not begin with a digit
+//
+// NOTE: You should only pass in hosts that have been returned from
+// CanonicalizeHost(), or you may not get accurate results.
+bool IsCanonicalizedHostRFC1738Compliant(const std::string& host);
+
// Call these functions to get the html snippet for a directory listing.
// The return values of both functions are in UTF-8.
std::string GetDirectoryListingHeader(const string16& title);
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 6a01ec9..22d467c 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -344,6 +344,11 @@ const IDNTestCase idn_cases[] = {
#endif
};
+struct RFC1738Case {
+ const char* host;
+ bool expected_output;
+};
+
struct SuggestedFilenameCase {
const char* url;
const char* content_disp_header;
@@ -810,6 +815,32 @@ TEST(NetUtilTest, IDNToUnicodeSlow) {
}
}
+TEST(NetUtilTest, RFC1738) {
+ const RFC1738Case rfc1738_cases[] = {
+ {"", false},
+ {"a", true},
+ {"-", false},
+ {".", false},
+ {"a.", false},
+ {"a.a", true},
+ {"9.a", true},
+ {"a.9", false},
+ {"a.a9", true},
+ {"a.9a", false},
+ {"a+9a", false},
+ {"1-.a-b", false},
+ {"1-2.a-b", true},
+ {"a.b.c.d.e", true},
+ {"1.2.3.4.e", true},
+ {"a.b.c.d.5", false},
+ };
+
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(rfc1738_cases); ++i) {
+ EXPECT_EQ(rfc1738_cases[i].expected_output,
+ net::IsCanonicalizedHostRFC1738Compliant(rfc1738_cases[i].host));
+ }
+}
+
TEST(NetUtilTest, StripWWW) {
EXPECT_EQ(L"", net::StripWWW(L""));
EXPECT_EQ(L"", net::StripWWW(L"www."));