diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-27 21:06:11 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-27 21:06:11 +0000 |
commit | 833fa265a5239fca9c675fb029cd2ef53b5c5aa7 (patch) | |
tree | 807d05a9aa7f87b72b38f7d0a9328297e85d9500 | |
parent | e33972ead0b1acd54d1614ba794e5db2152527ba (diff) | |
download | chromium_src-833fa265a5239fca9c675fb029cd2ef53b5c5aa7.zip chromium_src-833fa265a5239fca9c675fb029cd2ef53b5c5aa7.tar.gz chromium_src-833fa265a5239fca9c675fb029cd2ef53b5c5aa7.tar.bz2 |
Loosen RFC 1738 compliance check to allow underscores where we already allowed hyphens, to match real-world needs.
I don't believe further loosening will be required but that data will hopefully be coming soon. In the meantime people are asking for this fix.
BUG=25714
TEST=Entring "a_b.com" in the omnibox should default to navigate, not search
Review URL: http://codereview.chromium.org/339017
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@30245 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/autocomplete/autocomplete.cc | 10 | ||||
-rw-r--r-- | chrome/browser/autocomplete/autocomplete_unittest.cc | 2 | ||||
-rw-r--r-- | net/base/net_util.cc | 12 | ||||
-rw-r--r-- | net/base/net_util.h | 9 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 15 |
5 files changed, 26 insertions, 22 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc index 7db2966..db663d3 100644 --- a/chrome/browser/autocomplete/autocomplete.cc +++ b/chrome/browser/autocomplete/autocomplete.cc @@ -175,14 +175,14 @@ AutocompleteInput::Type AutocompleteInput::Parse( if (registry_length == std::wstring::npos) return QUERY; // Could be a broken IP address, etc. - // See if the hostname is valid per RFC 1738. While IE and GURL allow - // hostnames to contain many other characters (perhaps for weird intranet - // machines), it's extremely unlikely that a user would be trying to type - // those in for anything other than a search query. + // See if the hostname is valid. While IE and GURL allow hostnames to contain + // many other characters (perhaps for weird intranet machines), it's extremely + // unlikely that a user would be trying to type those in for anything other + // than a search query. url_canon::CanonHostInfo host_info; const std::string canonicalized_host(net::CanonicalizeHost(host, &host_info)); if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) && - !net::IsCanonicalizedHostRFC1738Compliant(canonicalized_host)) + !net::IsCanonicalizedHostCompliant(canonicalized_host)) return QUERY; // Presence of a port means this is likely a URL, if the port is really a port diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc index 572beb5..45a9fe7 100644 --- a/chrome/browser/autocomplete/autocomplete_unittest.cc +++ b/chrome/browser/autocomplete/autocomplete_unittest.cc @@ -243,7 +243,9 @@ TEST(AutocompleteTest, InputType) { { L"http://foo", AutocompleteInput::URL }, { L"http://foo.c", AutocompleteInput::URL }, { L"http://foo.com", AutocompleteInput::URL }, + { L"http://foo_bar.com", AutocompleteInput::URL }, { L"http://-.com", AutocompleteInput::QUERY }, + { L"http://_foo_.com", AutocompleteInput::QUERY }, { L"http://foo.com:abc", AutocompleteInput::QUERY }, { L"http://foo.com:123456", AutocompleteInput::QUERY }, { L"http:user@foo.com", AutocompleteInput::URL }, diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 5d2089d..5c91a68 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -961,7 +961,7 @@ inline bool IsHostCharDigit(char c) { return (c >= '0') && (c <= '9'); } -bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) { +bool IsCanonicalizedHostCompliant(const std::string& host) { if (host.empty()) return false; @@ -970,7 +970,7 @@ bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) { IN_COMPONENT_STARTED_DIGIT, IN_COMPONENT_STARTED_ALPHA } state = NOT_IN_COMPONENT; - bool last_char_was_hyphen = false; + bool last_char_was_hyphen_or_underscore = false; for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) { const char c = *i; @@ -983,13 +983,13 @@ bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) { return false; } else { if (c == '.') { - if (last_char_was_hyphen) + if (last_char_was_hyphen_or_underscore) return false; state = NOT_IN_COMPONENT; } else if (IsHostCharAlpha(c) || IsHostCharDigit(c)) { - last_char_was_hyphen = false; - } else if (c == '-') { - last_char_was_hyphen = true; + last_char_was_hyphen_or_underscore = false; + } else if ((c == '-') || (c == '_')) { + last_char_was_hyphen_or_underscore = true; } else { return false; } diff --git a/net/base/net_util.h b/net/base/net_util.h index 1ad4ac2..0d78598 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -158,16 +158,17 @@ std::string CanonicalizeHost(const std::string& host, std::string CanonicalizeHost(const std::wstring& host, url_canon::CanonHostInfo* host_info); -// Returns true if |host| is RFC 1738-compliant (and not an IP address). The -// rules are: +// Returns true if |host| is not an IP address and is compliant with a set of +// rules based on RFC 1738 and tweaked to be compatible with the real world. +// The rules are: // * One or more components separated by '.' // * Each component begins and ends with an alphanumeric character -// * Each component contains only alphanumeric characters and '-' +// * Each component contains only alphanumeric characters and '-' or '_' // * The last component does not begin with a digit // // NOTE: You should only pass in hosts that have been returned from // CanonicalizeHost(), or you may not get accurate results. -bool IsCanonicalizedHostRFC1738Compliant(const std::string& host); +bool IsCanonicalizedHostCompliant(const std::string& host); // Call these functions to get the html snippet for a directory listing. // The return values of both functions are in UTF-8. diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index f8faedf..23376d5 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -344,7 +344,7 @@ const IDNTestCase idn_cases[] = { #endif }; -struct RFC1738Case { +struct CompliantHostCase { const char* host; bool expected_output; }; @@ -815,8 +815,8 @@ TEST(NetUtilTest, IDNToUnicodeSlow) { } } -TEST(NetUtilTest, RFC1738) { - const RFC1738Case rfc1738_cases[] = { +TEST(NetUtilTest, CompliantHost) { + const CompliantHostCase compliant_host_cases[] = { {"", false}, {"a", true}, {"-", false}, @@ -825,19 +825,20 @@ TEST(NetUtilTest, RFC1738) { {"a.a", true}, {"9.a", true}, {"a.9", false}, + {"_9a", false}, {"a.a9", true}, {"a.9a", false}, {"a+9a", false}, {"1-.a-b", false}, - {"1-2.a-b", true}, + {"1-2.a_b", true}, {"a.b.c.d.e", true}, {"1.2.3.4.e", true}, {"a.b.c.d.5", false}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(rfc1738_cases); ++i) { - EXPECT_EQ(rfc1738_cases[i].expected_output, - net::IsCanonicalizedHostRFC1738Compliant(rfc1738_cases[i].host)); + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(compliant_host_cases); ++i) { + EXPECT_EQ(compliant_host_cases[i].expected_output, + net::IsCanonicalizedHostCompliant(compliant_host_cases[i].host)); } } |