diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-27 21:06:11 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-27 21:06:11 +0000 |
commit | 833fa265a5239fca9c675fb029cd2ef53b5c5aa7 (patch) | |
tree | 807d05a9aa7f87b72b38f7d0a9328297e85d9500 /net/base | |
parent | e33972ead0b1acd54d1614ba794e5db2152527ba (diff) | |
download | chromium_src-833fa265a5239fca9c675fb029cd2ef53b5c5aa7.zip chromium_src-833fa265a5239fca9c675fb029cd2ef53b5c5aa7.tar.gz chromium_src-833fa265a5239fca9c675fb029cd2ef53b5c5aa7.tar.bz2 |
Loosen RFC 1738 compliance check to allow underscores where we already allowed hyphens, to match real-world needs.
I don't believe further loosening will be required but that data will hopefully be coming soon. In the meantime people are asking for this fix.
BUG=25714
TEST=Entring "a_b.com" in the omnibox should default to navigate, not search
Review URL: http://codereview.chromium.org/339017
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@30245 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base')
-rw-r--r-- | net/base/net_util.cc | 12 | ||||
-rw-r--r-- | net/base/net_util.h | 9 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 15 |
3 files changed, 19 insertions, 17 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 5d2089d..5c91a68 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -961,7 +961,7 @@ inline bool IsHostCharDigit(char c) { return (c >= '0') && (c <= '9'); } -bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) { +bool IsCanonicalizedHostCompliant(const std::string& host) { if (host.empty()) return false; @@ -970,7 +970,7 @@ bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) { IN_COMPONENT_STARTED_DIGIT, IN_COMPONENT_STARTED_ALPHA } state = NOT_IN_COMPONENT; - bool last_char_was_hyphen = false; + bool last_char_was_hyphen_or_underscore = false; for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) { const char c = *i; @@ -983,13 +983,13 @@ bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) { return false; } else { if (c == '.') { - if (last_char_was_hyphen) + if (last_char_was_hyphen_or_underscore) return false; state = NOT_IN_COMPONENT; } else if (IsHostCharAlpha(c) || IsHostCharDigit(c)) { - last_char_was_hyphen = false; - } else if (c == '-') { - last_char_was_hyphen = true; + last_char_was_hyphen_or_underscore = false; + } else if ((c == '-') || (c == '_')) { + last_char_was_hyphen_or_underscore = true; } else { return false; } diff --git a/net/base/net_util.h b/net/base/net_util.h index 1ad4ac2..0d78598 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -158,16 +158,17 @@ std::string CanonicalizeHost(const std::string& host, std::string CanonicalizeHost(const std::wstring& host, url_canon::CanonHostInfo* host_info); -// Returns true if |host| is RFC 1738-compliant (and not an IP address). The -// rules are: +// Returns true if |host| is not an IP address and is compliant with a set of +// rules based on RFC 1738 and tweaked to be compatible with the real world. +// The rules are: // * One or more components separated by '.' // * Each component begins and ends with an alphanumeric character -// * Each component contains only alphanumeric characters and '-' +// * Each component contains only alphanumeric characters and '-' or '_' // * The last component does not begin with a digit // // NOTE: You should only pass in hosts that have been returned from // CanonicalizeHost(), or you may not get accurate results. -bool IsCanonicalizedHostRFC1738Compliant(const std::string& host); +bool IsCanonicalizedHostCompliant(const std::string& host); // Call these functions to get the html snippet for a directory listing. // The return values of both functions are in UTF-8. diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index f8faedf..23376d5 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -344,7 +344,7 @@ const IDNTestCase idn_cases[] = { #endif }; -struct RFC1738Case { +struct CompliantHostCase { const char* host; bool expected_output; }; @@ -815,8 +815,8 @@ TEST(NetUtilTest, IDNToUnicodeSlow) { } } -TEST(NetUtilTest, RFC1738) { - const RFC1738Case rfc1738_cases[] = { +TEST(NetUtilTest, CompliantHost) { + const CompliantHostCase compliant_host_cases[] = { {"", false}, {"a", true}, {"-", false}, @@ -825,19 +825,20 @@ TEST(NetUtilTest, RFC1738) { {"a.a", true}, {"9.a", true}, {"a.9", false}, + {"_9a", false}, {"a.a9", true}, {"a.9a", false}, {"a+9a", false}, {"1-.a-b", false}, - {"1-2.a-b", true}, + {"1-2.a_b", true}, {"a.b.c.d.e", true}, {"1.2.3.4.e", true}, {"a.b.c.d.5", false}, }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(rfc1738_cases); ++i) { - EXPECT_EQ(rfc1738_cases[i].expected_output, - net::IsCanonicalizedHostRFC1738Compliant(rfc1738_cases[i].host)); + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(compliant_host_cases); ++i) { + EXPECT_EQ(compliant_host_cases[i].expected_output, + net::IsCanonicalizedHostCompliant(compliant_host_cases[i].host)); } } |