summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-27 21:06:11 +0000
committerpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-27 21:06:11 +0000
commit833fa265a5239fca9c675fb029cd2ef53b5c5aa7 (patch)
tree807d05a9aa7f87b72b38f7d0a9328297e85d9500
parente33972ead0b1acd54d1614ba794e5db2152527ba (diff)
downloadchromium_src-833fa265a5239fca9c675fb029cd2ef53b5c5aa7.zip
chromium_src-833fa265a5239fca9c675fb029cd2ef53b5c5aa7.tar.gz
chromium_src-833fa265a5239fca9c675fb029cd2ef53b5c5aa7.tar.bz2
Loosen RFC 1738 compliance check to allow underscores where we already allowed hyphens, to match real-world needs.
I don't believe further loosening will be required but that data will hopefully be coming soon. In the meantime people are asking for this fix. BUG=25714 TEST=Entring "a_b.com" in the omnibox should default to navigate, not search Review URL: http://codereview.chromium.org/339017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@30245 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/browser/autocomplete/autocomplete.cc10
-rw-r--r--chrome/browser/autocomplete/autocomplete_unittest.cc2
-rw-r--r--net/base/net_util.cc12
-rw-r--r--net/base/net_util.h9
-rw-r--r--net/base/net_util_unittest.cc15
5 files changed, 26 insertions, 22 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc
index 7db2966..db663d3 100644
--- a/chrome/browser/autocomplete/autocomplete.cc
+++ b/chrome/browser/autocomplete/autocomplete.cc
@@ -175,14 +175,14 @@ AutocompleteInput::Type AutocompleteInput::Parse(
if (registry_length == std::wstring::npos)
return QUERY; // Could be a broken IP address, etc.
- // See if the hostname is valid per RFC 1738. While IE and GURL allow
- // hostnames to contain many other characters (perhaps for weird intranet
- // machines), it's extremely unlikely that a user would be trying to type
- // those in for anything other than a search query.
+ // See if the hostname is valid. While IE and GURL allow hostnames to contain
+ // many other characters (perhaps for weird intranet machines), it's extremely
+ // unlikely that a user would be trying to type those in for anything other
+ // than a search query.
url_canon::CanonHostInfo host_info;
const std::string canonicalized_host(net::CanonicalizeHost(host, &host_info));
if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) &&
- !net::IsCanonicalizedHostRFC1738Compliant(canonicalized_host))
+ !net::IsCanonicalizedHostCompliant(canonicalized_host))
return QUERY;
// Presence of a port means this is likely a URL, if the port is really a port
diff --git a/chrome/browser/autocomplete/autocomplete_unittest.cc b/chrome/browser/autocomplete/autocomplete_unittest.cc
index 572beb5..45a9fe7 100644
--- a/chrome/browser/autocomplete/autocomplete_unittest.cc
+++ b/chrome/browser/autocomplete/autocomplete_unittest.cc
@@ -243,7 +243,9 @@ TEST(AutocompleteTest, InputType) {
{ L"http://foo", AutocompleteInput::URL },
{ L"http://foo.c", AutocompleteInput::URL },
{ L"http://foo.com", AutocompleteInput::URL },
+ { L"http://foo_bar.com", AutocompleteInput::URL },
{ L"http://-.com", AutocompleteInput::QUERY },
+ { L"http://_foo_.com", AutocompleteInput::QUERY },
{ L"http://foo.com:abc", AutocompleteInput::QUERY },
{ L"http://foo.com:123456", AutocompleteInput::QUERY },
{ L"http:user@foo.com", AutocompleteInput::URL },
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 5d2089d..5c91a68 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -961,7 +961,7 @@ inline bool IsHostCharDigit(char c) {
return (c >= '0') && (c <= '9');
}
-bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) {
+bool IsCanonicalizedHostCompliant(const std::string& host) {
if (host.empty())
return false;
@@ -970,7 +970,7 @@ bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) {
IN_COMPONENT_STARTED_DIGIT,
IN_COMPONENT_STARTED_ALPHA
} state = NOT_IN_COMPONENT;
- bool last_char_was_hyphen = false;
+ bool last_char_was_hyphen_or_underscore = false;
for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) {
const char c = *i;
@@ -983,13 +983,13 @@ bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) {
return false;
} else {
if (c == '.') {
- if (last_char_was_hyphen)
+ if (last_char_was_hyphen_or_underscore)
return false;
state = NOT_IN_COMPONENT;
} else if (IsHostCharAlpha(c) || IsHostCharDigit(c)) {
- last_char_was_hyphen = false;
- } else if (c == '-') {
- last_char_was_hyphen = true;
+ last_char_was_hyphen_or_underscore = false;
+ } else if ((c == '-') || (c == '_')) {
+ last_char_was_hyphen_or_underscore = true;
} else {
return false;
}
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 1ad4ac2..0d78598 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -158,16 +158,17 @@ std::string CanonicalizeHost(const std::string& host,
std::string CanonicalizeHost(const std::wstring& host,
url_canon::CanonHostInfo* host_info);
-// Returns true if |host| is RFC 1738-compliant (and not an IP address). The
-// rules are:
+// Returns true if |host| is not an IP address and is compliant with a set of
+// rules based on RFC 1738 and tweaked to be compatible with the real world.
+// The rules are:
// * One or more components separated by '.'
// * Each component begins and ends with an alphanumeric character
-// * Each component contains only alphanumeric characters and '-'
+// * Each component contains only alphanumeric characters and '-' or '_'
// * The last component does not begin with a digit
//
// NOTE: You should only pass in hosts that have been returned from
// CanonicalizeHost(), or you may not get accurate results.
-bool IsCanonicalizedHostRFC1738Compliant(const std::string& host);
+bool IsCanonicalizedHostCompliant(const std::string& host);
// Call these functions to get the html snippet for a directory listing.
// The return values of both functions are in UTF-8.
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index f8faedf..23376d5 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -344,7 +344,7 @@ const IDNTestCase idn_cases[] = {
#endif
};
-struct RFC1738Case {
+struct CompliantHostCase {
const char* host;
bool expected_output;
};
@@ -815,8 +815,8 @@ TEST(NetUtilTest, IDNToUnicodeSlow) {
}
}
-TEST(NetUtilTest, RFC1738) {
- const RFC1738Case rfc1738_cases[] = {
+TEST(NetUtilTest, CompliantHost) {
+ const CompliantHostCase compliant_host_cases[] = {
{"", false},
{"a", true},
{"-", false},
@@ -825,19 +825,20 @@ TEST(NetUtilTest, RFC1738) {
{"a.a", true},
{"9.a", true},
{"a.9", false},
+ {"_9a", false},
{"a.a9", true},
{"a.9a", false},
{"a+9a", false},
{"1-.a-b", false},
- {"1-2.a-b", true},
+ {"1-2.a_b", true},
{"a.b.c.d.e", true},
{"1.2.3.4.e", true},
{"a.b.c.d.5", false},
};
- for (size_t i = 0; i < ARRAYSIZE_UNSAFE(rfc1738_cases); ++i) {
- EXPECT_EQ(rfc1738_cases[i].expected_output,
- net::IsCanonicalizedHostRFC1738Compliant(rfc1738_cases[i].host));
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(compliant_host_cases); ++i) {
+ EXPECT_EQ(compliant_host_cases[i].expected_output,
+ net::IsCanonicalizedHostCompliant(compliant_host_cases[i].host));
}
}