diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-16 19:49:22 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-16 19:49:22 +0000 |
commit | 96be43e90d30140bc5266cb8f667fbf03a8a7d0e (patch) | |
tree | 3c4fd679967fc2ee1cf9c51028a15b4eee2751d9 /net | |
parent | 878d73010d3e415857730205d0892ad6ecf1a26d (diff) | |
download | chromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.zip chromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.tar.gz chromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.tar.bz2 |
Classify more types of input as queries.
This adds an oracle that can determine if a hostname is RFC 1738 compliant; if we get a hostname that's not an IP address and not compliant here, we almost certainly have a query.
This also rejects "ports" that don't consist of a number between 0 and 65535 (e.g. port "abc" or port "123456").
BUG=18817
TEST=The string "foo: bar" (with quotes) should default to search, not navigate, in the address bar.
Review URL: http://codereview.chromium.org/271116
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@29317 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/base/net_util.cc | 48 | ||||
-rw-r--r-- | net/base/net_util.h | 11 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 31 |
3 files changed, 90 insertions, 0 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 1d7d558..05f5841 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -939,6 +939,54 @@ std::string GetDirectoryListingHeader(const string16& title) { return result; } +inline bool IsHostCharAlpha(char c) { + // We can just check lowercase because uppercase characters have already been + // normalized. + return (c >= 'a') && (c <= 'z'); +} + +inline bool IsHostCharDigit(char c) { + return (c >= '0') && (c <= '9'); +} + +bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) { + if (host.empty()) + return false; + + enum State { + NOT_IN_COMPONENT, + IN_COMPONENT_STARTED_DIGIT, + IN_COMPONENT_STARTED_ALPHA + } state = NOT_IN_COMPONENT; + bool last_char_was_hyphen = false; + + for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) { + const char c = *i; + if (state == NOT_IN_COMPONENT) { + if (IsHostCharDigit(c)) + state = IN_COMPONENT_STARTED_DIGIT; + else if (IsHostCharAlpha(c)) + state = IN_COMPONENT_STARTED_ALPHA; + else + return false; + } else { + if (c == '.') { + if (last_char_was_hyphen) + return false; + state = NOT_IN_COMPONENT; + } else if (IsHostCharAlpha(c) || IsHostCharDigit(c)) { + last_char_was_hyphen = false; + } else if (c == '-') { + last_char_was_hyphen = true; + } else { + return false; + } + } + } + + return state == IN_COMPONENT_STARTED_ALPHA; +} + std::string GetDirectoryListingEntry(const string16& name, const std::string& raw_bytes, bool is_dir, diff --git a/net/base/net_util.h b/net/base/net_util.h index 4d7e0aa..302a55f 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -158,6 +158,17 @@ std::string CanonicalizeHost(const std::string& host, std::string CanonicalizeHost(const std::wstring& host, url_canon::CanonHostInfo* host_info); +// Returns true if |host| is RFC 1738-compliant (and not an IP address). The +// rules are: +// * One or more components separated by '.' +// * Each component begins and ends with an alphanumeric character +// * Each component contains only alphanumeric characters and '-' +// * The last component does not begin with a digit +// +// NOTE: You should only pass in hosts that have been returned from +// CanonicalizeHost(), or you may not get accurate results. +bool IsCanonicalizedHostRFC1738Compliant(const std::string& host); + // Call these functions to get the html snippet for a directory listing. // The return values of both functions are in UTF-8. std::string GetDirectoryListingHeader(const string16& title); diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 6a01ec9..22d467c 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -344,6 +344,11 @@ const IDNTestCase idn_cases[] = { #endif }; +struct RFC1738Case { + const char* host; + bool expected_output; +}; + struct SuggestedFilenameCase { const char* url; const char* content_disp_header; @@ -810,6 +815,32 @@ TEST(NetUtilTest, IDNToUnicodeSlow) { } } +TEST(NetUtilTest, RFC1738) { + const RFC1738Case rfc1738_cases[] = { + {"", false}, + {"a", true}, + {"-", false}, + {".", false}, + {"a.", false}, + {"a.a", true}, + {"9.a", true}, + {"a.9", false}, + {"a.a9", true}, + {"a.9a", false}, + {"a+9a", false}, + {"1-.a-b", false}, + {"1-2.a-b", true}, + {"a.b.c.d.e", true}, + {"1.2.3.4.e", true}, + {"a.b.c.d.5", false}, + }; + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(rfc1738_cases); ++i) { + EXPECT_EQ(rfc1738_cases[i].expected_output, + net::IsCanonicalizedHostRFC1738Compliant(rfc1738_cases[i].host)); + } +} + TEST(NetUtilTest, StripWWW) { EXPECT_EQ(L"", net::StripWWW(L"")); EXPECT_EQ(L"", net::StripWWW(L"www.")); |