diff options
author | qyearsley <qyearsley@chromium.org> | 2015-08-03 00:03:49 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-08-03 07:04:22 +0000 |
commit | 7ffaa682fba595ffb50c7865eba2410532f3a2cf (patch) | |
tree | e98fdddf7ef01e722b7a4e8577f2be22c85abfb1 | |
parent | af5d69366c913e915e235285fc4f4926e4923f6b (diff) | |
download | chromium_src-7ffaa682fba595ffb50c7865eba2410532f3a2cf.zip chromium_src-7ffaa682fba595ffb50c7865eba2410532f3a2cf.tar.gz chromium_src-7ffaa682fba595ffb50c7865eba2410532f3a2cf.tar.bz2 |
Change GURL::DomainIs to take a StringPiece.
This CL also changes the comments, adds tests, and updates any callers of DomainIs that passed a length along with the domain.
Review URL: https://codereview.chromium.org/1259263003
Cr-Commit-Position: refs/heads/master@{#341486}
-rw-r--r-- | chrome/browser/safe_browsing/incident_reporting/off_domain_inclusion_detector.cc | 2 | ||||
-rw-r--r-- | content/browser/frame_host/debug_urls.cc | 4 | ||||
-rw-r--r-- | content/renderer/render_frame_impl.cc | 2 | ||||
-rw-r--r-- | net/base/sdch_dictionary.cc | 2 | ||||
-rw-r--r-- | url/gurl.cc | 41 | ||||
-rw-r--r-- | url/gurl.h | 25 | ||||
-rw-r--r-- | url/gurl_unittest.cc | 71 |
7 files changed, 76 insertions, 71 deletions
diff --git a/chrome/browser/safe_browsing/incident_reporting/off_domain_inclusion_detector.cc b/chrome/browser/safe_browsing/incident_reporting/off_domain_inclusion_detector.cc index 54cec01..ed258e7 100644 --- a/chrome/browser/safe_browsing/incident_reporting/off_domain_inclusion_detector.cc +++ b/chrome/browser/safe_browsing/incident_reporting/off_domain_inclusion_detector.cc @@ -203,7 +203,7 @@ void OffDomainInclusionDetector::BeginAnalysis( } else if (!main_frame_is_ip && !request_is_ip) { // Neither are: compare as domains is_off_domain = !off_domain_inclusion_info->request_url.DomainIs( - off_domain_inclusion_info->main_frame_domain.c_str()); + off_domain_inclusion_info->main_frame_domain); } else { // Just one is an IP is_off_domain = true; diff --git a/content/browser/frame_host/debug_urls.cc b/content/browser/frame_host/debug_urls.cc index 6cb45ad..21e6bd2 100644 --- a/content/browser/frame_host/debug_urls.cc +++ b/content/browser/frame_host/debug_urls.cc @@ -68,7 +68,7 @@ void HandlePpapiFlashDebugURL(const GURL& url) { bool IsKaskoDebugURL(const GURL& url) { #if defined(KASKO) return (url.is_valid() && url.SchemeIs(kChromeUIScheme) && - url.DomainIs(kKaskoCrashDomain, sizeof(kKaskoCrashDomain) - 1) && + url.DomainIs(kKaskoCrashDomain) && url.path() == kKaskoSendReport); #else return false; @@ -101,7 +101,7 @@ bool IsAsanDebugURL(const GURL& url) { #endif if (!(url.is_valid() && url.SchemeIs(kChromeUIScheme) && - url.DomainIs(kAsanCrashDomain, sizeof(kAsanCrashDomain) - 1) && + url.DomainIs(kAsanCrashDomain) && url.has_path())) { return false; } diff --git a/content/renderer/render_frame_impl.cc b/content/renderer/render_frame_impl.cc index acf0c52..afa9f3d 100644 --- a/content/renderer/render_frame_impl.cc +++ b/content/renderer/render_frame_impl.cc @@ -338,7 +338,7 @@ NOINLINE void MaybeTriggerAsanError(const GURL& url) { const char kCorruptHeap[] = "/corrupt-heap"; #endif - if (!url.DomainIs(kCrashDomain, sizeof(kCrashDomain) - 1)) + if (!url.DomainIs(kCrashDomain)) return; if (!url.has_path()) diff --git a/net/base/sdch_dictionary.cc b/net/base/sdch_dictionary.cc index 6e4abe1..b8faf55 100644 --- a/net/base/sdch_dictionary.cc +++ b/net/base/sdch_dictionary.cc @@ -13,7 +13,7 @@ namespace { bool DomainMatch(const GURL& gurl, const std::string& restriction) { // TODO(jar): This is not precisely a domain match definition. - return gurl.DomainIs(restriction.data(), restriction.size()); + return gurl.DomainIs(restriction); } } // namespace diff --git a/url/gurl.cc b/url/gurl.cc index 2547a95..31e8c75 100644 --- a/url/gurl.cc +++ b/url/gurl.cc @@ -14,6 +14,7 @@ #include "url/gurl.h" #include "base/logging.h" +#include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "url/url_canon_stdstring.h" #include "url/url_util.h" @@ -481,47 +482,45 @@ const GURL& GURL::EmptyGURL() { #endif // WIN32 -bool GURL::DomainIs(const char* lower_ascii_domain, - int domain_len) const { - // Return false if this URL is not valid or domain is empty. - if (!is_valid_ || !domain_len) +bool GURL::DomainIs(base::StringPiece lower_ascii_domain) const { + if (!is_valid_ || lower_ascii_domain.empty()) return false; // FileSystem URLs have empty parsed_.host, so check this first. if (SchemeIsFileSystem() && inner_url_) - return inner_url_->DomainIs(lower_ascii_domain, domain_len); + return inner_url_->DomainIs(lower_ascii_domain); if (!parsed_.host.is_nonempty()) return false; - // Check whether the host name is end with a dot. If yes, treat it - // the same as no-dot unless the input comparison domain is end - // with dot. - const char* last_pos = spec_.data() + parsed_.host.end() - 1; + // If the host name ends with a dot but the input domain doesn't, + // then we ignore the dot in the host name. + const char* host_last_pos = spec_.data() + parsed_.host.end() - 1; int host_len = parsed_.host.len; - if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) { - last_pos--; + int domain_len = lower_ascii_domain.length(); + if ('.' == *host_last_pos && '.' != lower_ascii_domain[domain_len - 1]) { + host_last_pos--; host_len--; } - // Return false if host's length is less than domain's length. if (host_len < domain_len) return false; - // Compare this url whether belong specific domain. - const char* start_pos = spec_.data() + parsed_.host.begin + - host_len - domain_len; + // |host_first_pos| is the start of the compared part of the host name, not + // start of the whole host name. + const char* host_first_pos = spec_.data() + parsed_.host.begin + + host_len - domain_len; if (!base::LowerCaseEqualsASCII( - base::StringPiece(start_pos, last_pos - start_pos + 1), - base::StringPiece(lower_ascii_domain, domain_len))) + base::StringPiece(host_first_pos, domain_len), lower_ascii_domain)) return false; - // Check whether host has right domain start with dot, make sure we got - // right domain range. For example www.google.com has domain - // "google.com" but www.iamnotgoogle.com does not. + // Make sure there aren't extra characters in host before the compared part; + // if the host name is longer than the input domain name, then the character + // immediately before the compared part should be a dot. For example, + // www.google.com has domain "google.com", but www.iamnotgoogle.com does not. if ('.' != lower_ascii_domain[0] && host_len > domain_len && - '.' != *(start_pos - 1)) + '.' != *(host_first_pos - 1)) return false; return true; @@ -10,6 +10,7 @@ #include "base/memory/scoped_ptr.h" #include "base/strings/string16.h" +#include "base/strings/string_piece.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" #include "url/url_canon_stdstring.h" @@ -332,27 +333,19 @@ class URL_EXPORT GURL { std::string PathForRequest() const; // Returns the host, excluding the square brackets surrounding IPv6 address - // literals. This can be useful for passing to getaddrinfo(). + // literals. This can be useful for passing to getaddrinfo(). std::string HostNoBrackets() const; // Returns true if this URL's host matches or is in the same domain as - // the given input string. For example if this URL was "www.google.com", - // this would match "com", "google.com", and "www.google.com - // (input domain should be lower-case ASCII to match the canonicalized - // scheme). This call is more efficient than getting the host and check + // the given input string. For example, if the hostname of the URL is + // "www.google.com", this will return true for "com", "google.com", and + // "www.google.com". + // + // The input domain should be lower-case ASCII to match the canonicalized + // scheme. This call is more efficient than getting the host and check // whether host has the specific domain or not because no copies or // object constructions are done. - // - // If function DomainIs has parameter domain_len, which means the parameter - // lower_ascii_domain does not gurantee to terminate with NULL character. - bool DomainIs(const char* lower_ascii_domain, int domain_len) const; - - // If function DomainIs only has parameter lower_ascii_domain, which means - // domain string should be terminate with NULL character. - bool DomainIs(const char* lower_ascii_domain) const { - return DomainIs(lower_ascii_domain, - static_cast<int>(strlen(lower_ascii_domain))); - } + bool DomainIs(base::StringPiece lower_ascii_domain) const; // Swaps the contents of this GURL object with the argument without doing // any memory allocations. diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc index 1a8088b..788c3d8 100644 --- a/url/gurl_unittest.cc +++ b/url/gurl_unittest.cc @@ -561,43 +561,56 @@ TEST(GURLTest, HostNoBrackets) { } TEST(GURLTest, DomainIs) { - const char google_domain[] = "google.com"; + GURL url_1("http://google.com/foo"); + EXPECT_TRUE(url_1.DomainIs("google.com")); - GURL url_1("http://www.google.com:99/foo"); - EXPECT_TRUE(url_1.DomainIs(google_domain)); + // Subdomain and port are ignored. + GURL url_2("http://www.google.com:99/foo"); + EXPECT_TRUE(url_2.DomainIs("google.com")); - GURL url_2("http://google.com:99/foo"); - EXPECT_TRUE(url_2.DomainIs(google_domain)); + // Different top-level domain. + GURL url_3("http://www.google.com.cn/foo"); + EXPECT_FALSE(url_3.DomainIs("google.com")); - GURL url_3("http://google.com./foo"); - EXPECT_TRUE(url_3.DomainIs(google_domain)); + // Different host name. + GURL url_4("http://www.iamnotgoogle.com/foo"); + EXPECT_FALSE(url_4.DomainIs("google.com")); - GURL url_4("http://google.com/foo"); - EXPECT_FALSE(url_4.DomainIs("google.com.")); + // The input must be lower-cased otherwise DomainIs returns false. + GURL url_5("http://www.google.com/foo"); + EXPECT_FALSE(url_5.DomainIs("Google.com")); - GURL url_5("http://google.com./foo"); - EXPECT_TRUE(url_5.DomainIs("google.com.")); - - GURL url_6("http://www.google.com./foo"); - EXPECT_TRUE(url_6.DomainIs(".com.")); - - GURL url_7("http://www.balabala.com/foo"); - EXPECT_FALSE(url_7.DomainIs(google_domain)); - - GURL url_8("http://www.google.com.cn/foo"); - EXPECT_FALSE(url_8.DomainIs(google_domain)); - - GURL url_9("http://www.iamnotgoogle.com/foo"); - EXPECT_FALSE(url_9.DomainIs(google_domain)); + // If the URL is invalid, DomainIs returns false. + GURL invalid_url("google.com"); + EXPECT_FALSE(invalid_url.is_valid()); + EXPECT_FALSE(invalid_url.DomainIs("google.com")); +} - GURL url_10("http://www.iamnotgoogle.com../foo"); - EXPECT_FALSE(url_10.DomainIs(".com")); +TEST(GURLTest, DomainIsTerminatingDotBehavior) { + // If the host part ends with a dot, it matches input domains + // with or without a dot. + GURL url_with_dot("http://www.google.com./foo"); + EXPECT_TRUE(url_with_dot.DomainIs("google.com")); + EXPECT_TRUE(url_with_dot.DomainIs("google.com.")); + EXPECT_TRUE(url_with_dot.DomainIs(".com")); + EXPECT_TRUE(url_with_dot.DomainIs(".com.")); + + // But, if the host name doesn't end with a dot and the input + // domain does, then it's considered to not match. + GURL url_without_dot("http://google.com/foo"); + EXPECT_FALSE(url_without_dot.DomainIs("google.com.")); + + // If the URL ends with two dots, it doesn't match. + GURL url_with_two_dots("http://www.google.com../foo"); + EXPECT_FALSE(url_with_two_dots.DomainIs("google.com")); +} - GURL url_11("filesystem:http://www.google.com:99/foo/"); - EXPECT_TRUE(url_11.DomainIs(google_domain)); +TEST(GURLTest, DomainIsWithFilesystemScheme) { + GURL url_1("filesystem:http://www.google.com:99/foo/"); + EXPECT_TRUE(url_1.DomainIs("google.com")); - GURL url_12("filesystem:http://www.iamnotgoogle.com/foo/"); - EXPECT_FALSE(url_12.DomainIs(google_domain)); + GURL url_2("filesystem:http://www.iamnotgoogle.com/foo/"); + EXPECT_FALSE(url_2.DomainIs("google.com")); } // Newlines should be stripped from inputs. |