summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-16 19:49:22 +0000
committerpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-16 19:49:22 +0000
commit96be43e90d30140bc5266cb8f667fbf03a8a7d0e (patch)
tree3c4fd679967fc2ee1cf9c51028a15b4eee2751d9 /net
parent878d73010d3e415857730205d0892ad6ecf1a26d (diff)
downloadchromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.zip
chromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.tar.gz
chromium_src-96be43e90d30140bc5266cb8f667fbf03a8a7d0e.tar.bz2
Classify more types of input as queries.
This adds an oracle that can determine if a hostname is RFC 1738 compliant; if we get a hostname that's not an IP address and not compliant here, we almost certainly have a query. This also rejects "ports" that don't consist of a number between 0 and 65535 (e.g. port "abc" or port "123456"). BUG=18817 TEST=The string "foo: bar" (with quotes) should default to search, not navigate, in the address bar. Review URL: http://codereview.chromium.org/271116 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@29317 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r--net/base/net_util.cc48
-rw-r--r--net/base/net_util.h11
-rw-r--r--net/base/net_util_unittest.cc31
3 files changed, 90 insertions, 0 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 1d7d558..05f5841 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -939,6 +939,54 @@ std::string GetDirectoryListingHeader(const string16& title) {
return result;
}
+inline bool IsHostCharAlpha(char c) {
+ // We can just check lowercase because uppercase characters have already been
+ // normalized.
+ return (c >= 'a') && (c <= 'z');
+}
+
+inline bool IsHostCharDigit(char c) {
+ return (c >= '0') && (c <= '9');
+}
+
+bool IsCanonicalizedHostRFC1738Compliant(const std::string& host) {
+ if (host.empty())
+ return false;
+
+ enum State {
+ NOT_IN_COMPONENT,
+ IN_COMPONENT_STARTED_DIGIT,
+ IN_COMPONENT_STARTED_ALPHA
+ } state = NOT_IN_COMPONENT;
+ bool last_char_was_hyphen = false;
+
+ for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) {
+ const char c = *i;
+ if (state == NOT_IN_COMPONENT) {
+ if (IsHostCharDigit(c))
+ state = IN_COMPONENT_STARTED_DIGIT;
+ else if (IsHostCharAlpha(c))
+ state = IN_COMPONENT_STARTED_ALPHA;
+ else
+ return false;
+ } else {
+ if (c == '.') {
+ if (last_char_was_hyphen)
+ return false;
+ state = NOT_IN_COMPONENT;
+ } else if (IsHostCharAlpha(c) || IsHostCharDigit(c)) {
+ last_char_was_hyphen = false;
+ } else if (c == '-') {
+ last_char_was_hyphen = true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ return state == IN_COMPONENT_STARTED_ALPHA;
+}
+
std::string GetDirectoryListingEntry(const string16& name,
const std::string& raw_bytes,
bool is_dir,
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 4d7e0aa..302a55f 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -158,6 +158,17 @@ std::string CanonicalizeHost(const std::string& host,
std::string CanonicalizeHost(const std::wstring& host,
url_canon::CanonHostInfo* host_info);
+// Returns true if |host| is RFC 1738-compliant (and not an IP address). The
+// rules are:
+// * One or more components separated by '.'
+// * Each component begins and ends with an alphanumeric character
+// * Each component contains only alphanumeric characters and '-'
+// * The last component does not begin with a digit
+//
+// NOTE: You should only pass in hosts that have been returned from
+// CanonicalizeHost(), or you may not get accurate results.
+bool IsCanonicalizedHostRFC1738Compliant(const std::string& host);
+
// Call these functions to get the html snippet for a directory listing.
// The return values of both functions are in UTF-8.
std::string GetDirectoryListingHeader(const string16& title);
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 6a01ec9..22d467c 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -344,6 +344,11 @@ const IDNTestCase idn_cases[] = {
#endif
};
+struct RFC1738Case {
+ const char* host;
+ bool expected_output;
+};
+
struct SuggestedFilenameCase {
const char* url;
const char* content_disp_header;
@@ -810,6 +815,32 @@ TEST(NetUtilTest, IDNToUnicodeSlow) {
}
}
+TEST(NetUtilTest, RFC1738) {
+ const RFC1738Case rfc1738_cases[] = {
+ {"", false},
+ {"a", true},
+ {"-", false},
+ {".", false},
+ {"a.", false},
+ {"a.a", true},
+ {"9.a", true},
+ {"a.9", false},
+ {"a.a9", true},
+ {"a.9a", false},
+ {"a+9a", false},
+ {"1-.a-b", false},
+ {"1-2.a-b", true},
+ {"a.b.c.d.e", true},
+ {"1.2.3.4.e", true},
+ {"a.b.c.d.5", false},
+ };
+
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(rfc1738_cases); ++i) {
+ EXPECT_EQ(rfc1738_cases[i].expected_output,
+ net::IsCanonicalizedHostRFC1738Compliant(rfc1738_cases[i].host));
+ }
+}
+
TEST(NetUtilTest, StripWWW) {
EXPECT_EQ(L"", net::StripWWW(L""));
EXPECT_EQ(L"", net::StripWWW(L"www."));