diff options
author | aa@chromium.org <aa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-11-13 20:27:42 +0000 |
---|---|---|
committer | aa@chromium.org <aa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-11-13 20:27:42 +0000 |
commit | 885c0e94fd5e3d81bb258191d720b7b22ca683df (patch) | |
tree | 27022b02d52dda9e1cb8b86cde08c86220615f56 /extensions | |
parent | bf4d4efcd977d9f4438725d77e6bc9e02aac4b1c (diff) | |
download | chromium_src-885c0e94fd5e3d81bb258191d720b7b22ca683df.zip chromium_src-885c0e94fd5e3d81bb258191d720b7b22ca683df.tar.gz chromium_src-885c0e94fd5e3d81bb258191d720b7b22ca683df.tar.bz2 |
Establish /extensions directory and move URLPattern there.
This doesn't create an extensions gyp target, or a module with a public API.
It does set of DEPS rules and seems like it will make it easier for people to
write correctly layered code today, while we are in transition.
BUG=159265
TBR=ben@chromium.org
Review URL: https://codereview.chromium.org/11410015
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@167454 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'extensions')
-rw-r--r-- | extensions/DEPS | 4 | ||||
-rw-r--r-- | extensions/README | 3 | ||||
-rw-r--r-- | extensions/common/DEPS | 3 | ||||
-rw-r--r-- | extensions/common/constants.cc | 11 | ||||
-rw-r--r-- | extensions/common/constants.h | 15 | ||||
-rw-r--r-- | extensions/common/url_pattern.cc | 524 | ||||
-rw-r--r-- | extensions/common/url_pattern.h | 236 | ||||
-rw-r--r-- | extensions/common/url_pattern_unittest.cc | 659 |
8 files changed, 1455 insertions, 0 deletions
diff --git a/extensions/DEPS b/extensions/DEPS new file mode 100644 index 0000000..fd6200f --- /dev/null +++ b/extensions/DEPS @@ -0,0 +1,4 @@ +include_rules = [ + "+base", + "+googleurl" +] diff --git a/extensions/README b/extensions/README new file mode 100644 index 0000000..c994549 --- /dev/null +++ b/extensions/README @@ -0,0 +1,3 @@ +This will become a reusable extensions module. It implements the core parts of +Chrome's extension system, and can be used with any host of the 'content' +module. diff --git a/extensions/common/DEPS b/extensions/common/DEPS new file mode 100644 index 0000000..630953d --- /dev/null +++ b/extensions/common/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+content/public/common" +] diff --git a/extensions/common/constants.cc b/extensions/common/constants.cc new file mode 100644 index 0000000..ecafa43 --- /dev/null +++ b/extensions/common/constants.cc @@ -0,0 +1,11 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extensions/common/constants.h" + +namespace extensions { + +const char kExtensionScheme[] = "chrome-extension"; + +} // namespace extensions diff --git a/extensions/common/constants.h b/extensions/common/constants.h new file mode 100644 index 0000000..f601b38 --- /dev/null +++ b/extensions/common/constants.h @@ -0,0 +1,15 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef EXTENSIONS_COMMON_CONSTANTS_H_ +#define EXTENSIONS_COMMON_CONSTANTS_H_ + +namespace extensions { + +// Scheme we serve extension content from. +extern const char kExtensionScheme[]; + +} // namespace extensions + +#endif // EXTENSIONS_COMMON_CONSTANTS_H_ diff --git a/extensions/common/url_pattern.cc b/extensions/common/url_pattern.cc new file mode 100644 index 0000000..73159b3 --- /dev/null +++ b/extensions/common/url_pattern.cc @@ -0,0 +1,524 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extensions/common/url_pattern.h" + +#include "base/string_number_conversions.h" +#include "base/string_piece.h" +#include "base/string_split.h" +#include "base/string_util.h" +#include "content/public/common/url_constants.h" +#include "extensions/common/constants.h" +#include "googleurl/src/gurl.h" +#include "googleurl/src/url_util.h" + +const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; + +namespace { + +// TODO(aa): What about more obscure schemes like data: and javascript: ? +// Note: keep this array in sync with kValidSchemeMasks. +const char* kValidSchemes[] = { + chrome::kHttpScheme, + chrome::kHttpsScheme, + chrome::kFileScheme, + chrome::kFtpScheme, + chrome::kChromeUIScheme, + extensions::kExtensionScheme, + chrome::kFileSystemScheme, +}; + +const int kValidSchemeMasks[] = { + URLPattern::SCHEME_HTTP, + URLPattern::SCHEME_HTTPS, + URLPattern::SCHEME_FILE, + URLPattern::SCHEME_FTP, + URLPattern::SCHEME_CHROMEUI, + URLPattern::SCHEME_EXTENSION, + URLPattern::SCHEME_FILESYSTEM, +}; + +COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks), + must_keep_these_arrays_in_sync); + +const char kParseSuccess[] = "Success."; +const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator."; +const char kParseErrorInvalidScheme[] = "Invalid scheme."; +const char kParseErrorWrongSchemeType[] = "Wrong scheme type."; +const char kParseErrorEmptyHost[] = "Host can not be empty."; +const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard."; +const char kParseErrorEmptyPath[] = "Empty path."; +const char kParseErrorInvalidPort[] = "Invalid port."; + +// Message explaining each URLPattern::ParseResult. +const char* const kParseResultMessages[] = { + kParseSuccess, + kParseErrorMissingSchemeSeparator, + kParseErrorInvalidScheme, + kParseErrorWrongSchemeType, + kParseErrorEmptyHost, + kParseErrorInvalidHostWildcard, + kParseErrorEmptyPath, + kParseErrorInvalidPort, +}; + +COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), + must_add_message_for_each_parse_result); + +const char kPathSeparator[] = "/"; + +bool IsStandardScheme(const std::string& scheme) { + // "*" gets the same treatment as a standard scheme. + if (scheme == "*") + return true; + + return url_util::IsStandard(scheme.c_str(), + url_parse::Component(0, static_cast<int>(scheme.length()))); +} + +bool IsValidPortForScheme(const std::string& scheme, const std::string& port) { + if (port == "*") + return true; + + // Only accept non-wildcard ports if the scheme uses ports. + if (url_canon::DefaultPortForScheme(scheme.c_str(), scheme.length()) == + url_parse::PORT_UNSPECIFIED) { + return false; + } + + int parsed_port = url_parse::PORT_UNSPECIFIED; + if (!base::StringToInt(port, &parsed_port)) + return false; + return (parsed_port >= 0) && (parsed_port < 65536); +} + +} // namespace + +URLPattern::URLPattern() + : valid_schemes_(SCHEME_NONE), + match_all_urls_(false), + match_subdomains_(false), + port_("*") {} + +URLPattern::URLPattern(int valid_schemes) + : valid_schemes_(valid_schemes), + match_all_urls_(false), + match_subdomains_(false), + port_("*") {} + +URLPattern::URLPattern(int valid_schemes, const std::string& pattern) + // Strict error checking is used, because this constructor is only + // appropriate when we know |pattern| is valid. + : valid_schemes_(valid_schemes), + match_all_urls_(false), + match_subdomains_(false), + port_("*") { + if (PARSE_SUCCESS != Parse(pattern)) + NOTREACHED() << "URLPattern is invalid: " << pattern; +} + +URLPattern::~URLPattern() { +} + +bool URLPattern::operator<(const URLPattern& other) const { + return GetAsString() < other.GetAsString(); +} + +bool URLPattern::operator==(const URLPattern& other) const { + return GetAsString() == other.GetAsString(); +} + +URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) { + spec_.clear(); + SetMatchAllURLs(false); + SetMatchSubdomains(false); + SetPort("*"); + + // Special case pattern to match every valid URL. + if (pattern == kAllUrlsPattern) { + SetMatchAllURLs(true); + return PARSE_SUCCESS; + } + + // Parse out the scheme. + size_t scheme_end_pos = pattern.find(content::kStandardSchemeSeparator); + bool has_standard_scheme_separator = true; + + // Some urls also use ':' alone as the scheme separator. + if (scheme_end_pos == std::string::npos) { + scheme_end_pos = pattern.find(':'); + has_standard_scheme_separator = false; + } + + if (scheme_end_pos == std::string::npos) + return PARSE_ERROR_MISSING_SCHEME_SEPARATOR; + + if (!SetScheme(pattern.substr(0, scheme_end_pos))) + return PARSE_ERROR_INVALID_SCHEME; + + bool standard_scheme = IsStandardScheme(scheme_); + if (standard_scheme != has_standard_scheme_separator) + return PARSE_ERROR_WRONG_SCHEME_SEPARATOR; + + // Advance past the scheme separator. + scheme_end_pos += + (standard_scheme ? strlen(content::kStandardSchemeSeparator) : 1); + if (scheme_end_pos >= pattern.size()) + return PARSE_ERROR_EMPTY_HOST; + + // Parse out the host and path. + size_t host_start_pos = scheme_end_pos; + size_t path_start_pos = 0; + + if (!standard_scheme) { + path_start_pos = host_start_pos; + } else if (scheme_ == chrome::kFileScheme) { + size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); + if (host_end_pos == std::string::npos) { + // Allow hostname omission. + // e.g. file://* is interpreted as file:///*, + // file://foo* is interpreted as file:///foo*. + path_start_pos = host_start_pos - 1; + } else { + // Ignore hostname if scheme is file://. + // e.g. file://localhost/foo is equal to file:///foo. + path_start_pos = host_end_pos; + } + } else { + size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); + + // Host is required. + if (host_start_pos == host_end_pos) + return PARSE_ERROR_EMPTY_HOST; + + if (host_end_pos == std::string::npos) + return PARSE_ERROR_EMPTY_PATH; + + host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos); + + // The first component can optionally be '*' to match all subdomains. + std::vector<std::string> host_components; + base::SplitString(host_, '.', &host_components); + if (host_components[0] == "*") { + match_subdomains_ = true; + host_components.erase(host_components.begin(), + host_components.begin() + 1); + } + host_ = JoinString(host_components, '.'); + + path_start_pos = host_end_pos; + } + + SetPath(pattern.substr(path_start_pos)); + + size_t port_pos = host_.find(':'); + if (port_pos != std::string::npos) { + if (!SetPort(host_.substr(port_pos + 1))) + return PARSE_ERROR_INVALID_PORT; + host_ = host_.substr(0, port_pos); + } + + // No other '*' can occur in the host, though. This isn't necessary, but is + // done as a convenience to developers who might otherwise be confused and + // think '*' works as a glob in the host. + if (host_.find('*') != std::string::npos) + return PARSE_ERROR_INVALID_HOST_WILDCARD; + + return PARSE_SUCCESS; +} + +void URLPattern::SetValidSchemes(int valid_schemes) { + spec_.clear(); + valid_schemes_ = valid_schemes; +} + +void URLPattern::SetHost(const std::string& host) { + spec_.clear(); + host_ = host; +} + +void URLPattern::SetMatchAllURLs(bool val) { + spec_.clear(); + match_all_urls_ = val; + + if (val) { + match_subdomains_ = true; + scheme_ = "*"; + host_.clear(); + SetPath("/*"); + } +} + +void URLPattern::SetMatchSubdomains(bool val) { + spec_.clear(); + match_subdomains_ = val; +} + +bool URLPattern::SetScheme(const std::string& scheme) { + spec_.clear(); + scheme_ = scheme; + if (scheme_ == "*") { + valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); + } else if (!IsValidScheme(scheme_)) { + return false; + } + return true; +} + +bool URLPattern::IsValidScheme(const std::string& scheme) const { + if (valid_schemes_ == SCHEME_ALL) + return true; + + for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { + if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i])) + return true; + } + + return false; +} + +void URLPattern::SetPath(const std::string& path) { + spec_.clear(); + path_ = path; + path_escaped_ = path_; + ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); + ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); +} + +bool URLPattern::SetPort(const std::string& port) { + spec_.clear(); + if (IsValidPortForScheme(scheme_, port)) { + port_ = port; + return true; + } + return false; +} + +bool URLPattern::MatchesURL(const GURL& test) const { + const GURL* test_url = &test; + bool has_inner_url = test.inner_url() != NULL; + + if (has_inner_url) { + if (!test.SchemeIsFileSystem()) + return false; // The only nested URLs we handle are filesystem URLs. + test_url = test.inner_url(); + } + + if (!MatchesScheme(test_url->scheme())) + return false; + + if (match_all_urls_) + return true; + + std::string path_for_request = test.PathForRequest(); + if (has_inner_url) + path_for_request = test_url->path() + path_for_request; + + return MatchesSecurityOriginHelper(*test_url) && + MatchesPath(path_for_request); +} + +bool URLPattern::MatchesSecurityOrigin(const GURL& test) const { + const GURL* test_url = &test; + bool has_inner_url = test.inner_url() != NULL; + + if (has_inner_url) { + if (!test.SchemeIsFileSystem()) + return false; // The only nested URLs we handle are filesystem URLs. + test_url = test.inner_url(); + } + + if (!MatchesScheme(test_url->scheme())) + return false; + + if (match_all_urls_) + return true; + + return MatchesSecurityOriginHelper(*test_url); +} + +bool URLPattern::MatchesScheme(const std::string& test) const { + if (!IsValidScheme(test)) + return false; + + return scheme_ == "*" || test == scheme_; +} + +bool URLPattern::MatchesHost(const std::string& host) const { + std::string test(chrome::kHttpScheme); + test += content::kStandardSchemeSeparator; + test += host; + test += "/"; + return MatchesHost(GURL(test)); +} + +bool URLPattern::MatchesHost(const GURL& test) const { + // If the hosts are exactly equal, we have a match. + if (test.host() == host_) + return true; + + // If we're matching subdomains, and we have no host in the match pattern, + // that means that we're matching all hosts, which means we have a match no + // matter what the test host is. + if (match_subdomains_ && host_.empty()) + return true; + + // Otherwise, we can only match if our match pattern matches subdomains. + if (!match_subdomains_) + return false; + + // We don't do subdomain matching against IP addresses, so we can give up now + // if the test host is an IP address. + if (test.HostIsIPAddress()) + return false; + + // Check if the test host is a subdomain of our host. + if (test.host().length() <= (host_.length() + 1)) + return false; + + if (test.host().compare(test.host().length() - host_.length(), + host_.length(), host_) != 0) + return false; + + return test.host()[test.host().length() - host_.length() - 1] == '.'; +} + +bool URLPattern::MatchesPath(const std::string& test) const { + if (!MatchPattern(test, path_escaped_)) + return false; + + return true; +} + +bool URLPattern::MatchesPort(int port) const { + if (port == url_parse::PORT_INVALID) + return false; + + return port_ == "*" || port_ == base::IntToString(port); +} + + +const std::string& URLPattern::GetAsString() const { + if (!spec_.empty()) + return spec_; + + if (match_all_urls_) { + spec_ = kAllUrlsPattern; + return spec_; + } + + bool standard_scheme = IsStandardScheme(scheme_); + + std::string spec = scheme_ + + (standard_scheme ? content::kStandardSchemeSeparator : ":"); + + if (scheme_ != chrome::kFileScheme && standard_scheme) { + if (match_subdomains_) { + spec += "*"; + if (!host_.empty()) + spec += "."; + } + + if (!host_.empty()) + spec += host_; + + if (port_ != "*") { + spec += ":"; + spec += port_; + } + } + + if (!path_.empty()) + spec += path_; + + spec_ = spec; + return spec_; +} + +bool URLPattern::OverlapsWith(const URLPattern& other) const { + if (!MatchesAnyScheme(other.GetExplicitSchemes()) && + !other.MatchesAnyScheme(GetExplicitSchemes())) { + return false; + } + + if (!MatchesHost(other.host()) && !other.MatchesHost(host_)) + return false; + + if (port_ != "*" && other.port() != "*" && port_ != other.port()) + return false; + + // We currently only use OverlapsWith() for the patterns inside + // URLPatternSet. In those cases, we know that the path will have only a + // single wildcard at the end. This makes figuring out overlap much easier. It + // seems like there is probably a computer-sciency way to solve the general + // case, but we don't need that yet. + DCHECK(path_.find('*') == path_.size() - 1); + DCHECK(other.path().find('*') == other.path().size() - 1); + + if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) && + !other.MatchesPath(path_.substr(0, path_.size() - 1))) + return false; + + return true; +} + +bool URLPattern::MatchesAnyScheme( + const std::vector<std::string>& schemes) const { + for (std::vector<std::string>::const_iterator i = schemes.begin(); + i != schemes.end(); ++i) { + if (MatchesScheme(*i)) + return true; + } + + return false; +} + +bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const { + // Ignore hostname if scheme is file://. + if (scheme_ != chrome::kFileScheme && !MatchesHost(test)) + return false; + + if (!MatchesPort(test.EffectiveIntPort())) + return false; + + return true; +} + +std::vector<std::string> URLPattern::GetExplicitSchemes() const { + std::vector<std::string> result; + + if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) { + result.push_back(scheme_); + return result; + } + + for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { + if (MatchesScheme(kValidSchemes[i])) { + result.push_back(kValidSchemes[i]); + } + } + + return result; +} + +std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const { + std::vector<std::string> explicit_schemes = GetExplicitSchemes(); + std::vector<URLPattern> result; + + for (std::vector<std::string>::const_iterator i = explicit_schemes.begin(); + i != explicit_schemes.end(); ++i) { + URLPattern temp = *this; + temp.SetScheme(*i); + temp.SetMatchAllURLs(false); + result.push_back(temp); + } + + return result; +} + +// static +const char* URLPattern::GetParseResultString( + URLPattern::ParseResult parse_result) { + return kParseResultMessages[parse_result]; +} diff --git a/extensions/common/url_pattern.h b/extensions/common/url_pattern.h new file mode 100644 index 0000000..42188a5 --- /dev/null +++ b/extensions/common/url_pattern.h @@ -0,0 +1,236 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#ifndef EXTENSIONS_COMMON_URL_PATTERN_H_ +#define EXTENSIONS_COMMON_URL_PATTERN_H_ + +#include <functional> +#include <string> +#include <vector> + +class GURL; + +// A pattern that can be used to match URLs. A URLPattern is a very restricted +// subset of URL syntax: +// +// <url-pattern> := <scheme>://<host><port><path> | '<all_urls>' +// <scheme> := '*' | 'http' | 'https' | 'file' | 'ftp' | 'chrome' | +// 'chrome-extension' | 'filesystem' +// <host> := '*' | '*.' <anychar except '/' and '*'>+ +// <port> := [':' ('*' | <port number between 0 and 65535>)] +// <path> := '/' <any chars> +// +// * Host is not used when the scheme is 'file'. +// * The path can have embedded '*' characters which act as glob wildcards. +// * '<all_urls>' is a special pattern that matches any URL that contains a +// valid scheme (as specified by valid_schemes_). +// * The '*' scheme pattern excludes file URLs. +// +// Examples of valid patterns: +// - http://*/* +// - http://*/foo* +// - https://*.google.com/foo*bar +// - file://monkey* +// - http://127.0.0.1/* +// +// Examples of invalid patterns: +// - http://* -- path not specified +// - http://*foo/bar -- * not allowed as substring of host component +// - http://foo.*.bar/baz -- * must be first component +// - http:/bar -- scheme separator not found +// - foo://* -- invalid scheme +// - chrome:// -- we don't support chrome internal URLs +class URLPattern { + public: + // A collection of scheme bitmasks for use with valid_schemes. + enum SchemeMasks { + SCHEME_NONE = 0, + SCHEME_HTTP = 1 << 0, + SCHEME_HTTPS = 1 << 1, + SCHEME_FILE = 1 << 2, + SCHEME_FTP = 1 << 3, + SCHEME_CHROMEUI = 1 << 4, + SCHEME_EXTENSION = 1 << 5, + SCHEME_FILESYSTEM = 1 << 6, + + // IMPORTANT! + // SCHEME_ALL will match every scheme, including chrome://, chrome- + // extension://, about:, etc. Because this has lots of security + // implications, third-party extensions should usually not be able to get + // access to URL patterns initialized this way. If there is a reason + // for violating this general rule, document why this it safe. + SCHEME_ALL = -1, + }; + + // Error codes returned from Parse(). + enum ParseResult { + PARSE_SUCCESS = 0, + PARSE_ERROR_MISSING_SCHEME_SEPARATOR, + PARSE_ERROR_INVALID_SCHEME, + PARSE_ERROR_WRONG_SCHEME_SEPARATOR, + PARSE_ERROR_EMPTY_HOST, + PARSE_ERROR_INVALID_HOST_WILDCARD, + PARSE_ERROR_EMPTY_PATH, + PARSE_ERROR_INVALID_PORT, + NUM_PARSE_RESULTS + }; + + // The <all_urls> string pattern. + static const char kAllUrlsPattern[]; + + explicit URLPattern(int valid_schemes); + + // Convenience to construct a URLPattern from a string. If the string is not + // known ahead of time, use Parse() instead, which returns success or failure. + URLPattern(int valid_schemes, const std::string& pattern); + + URLPattern(); + ~URLPattern(); + + bool operator<(const URLPattern& other) const; + bool operator==(const URLPattern& other) const; + + // Initializes this instance by parsing the provided string. Returns + // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On + // failure, this instance will have some intermediate values and is in an + // invalid state. + ParseResult Parse(const std::string& pattern_str); + + // Gets the bitmask of valid schemes. + int valid_schemes() const { return valid_schemes_; } + void SetValidSchemes(int valid_schemes); + + // Gets the host the pattern matches. This can be an empty string if the + // pattern matches all hosts (the input was <scheme>://*/<whatever>). + const std::string& host() const { return host_; } + void SetHost(const std::string& host); + + // Gets whether to match subdomains of host(). + bool match_subdomains() const { return match_subdomains_; } + void SetMatchSubdomains(bool val); + + // Gets the path the pattern matches with the leading slash. This can have + // embedded asterisks which are interpreted using glob rules. + const std::string& path() const { return path_; } + void SetPath(const std::string& path); + + // Returns true if this pattern matches all urls. + bool match_all_urls() const { return match_all_urls_; } + void SetMatchAllURLs(bool val); + + // Sets the scheme for pattern matches. This can be a single '*' if the + // pattern matches all valid schemes (as defined by the valid_schemes_ + // property). Returns false on failure (if the scheme is not valid). + bool SetScheme(const std::string& scheme); + // Note: You should use MatchesScheme() instead of this getter unless you + // absolutely need the exact scheme. This is exposed for testing. + const std::string& scheme() const { return scheme_; } + + // Returns true if the specified scheme can be used in this URL pattern, and + // false otherwise. Uses valid_schemes_ to determine validity. + bool IsValidScheme(const std::string& scheme) const; + + // Returns true if this instance matches the specified URL. + bool MatchesURL(const GURL& test) const; + + // Returns true if this instance matches the specified security origin. + bool MatchesSecurityOrigin(const GURL& test) const; + + // Returns true if |test| matches our scheme. + // Note that if test is "filesystem", this may fail whereas MatchesURL + // may succeed. MatchesURL is smart enough to look at the inner_url instead + // of the outer "filesystem:" part. + bool MatchesScheme(const std::string& test) const; + + // Returns true if |test| matches our host. + bool MatchesHost(const std::string& test) const; + bool MatchesHost(const GURL& test) const; + + // Returns true if |test| matches our path. + bool MatchesPath(const std::string& test) const; + + // Returns true if |port| matches our port. + bool MatchesPort(int port) const; + + // Sets the port. Returns false if the port is invalid. + bool SetPort(const std::string& port); + const std::string& port() const { return port_; } + + // Returns a string representing this instance. + const std::string& GetAsString() const; + + // Determine whether there is a URL that would match this instance and another + // instance. This method is symmetrical: Calling other.OverlapsWith(this) + // would result in the same answer. + bool OverlapsWith(const URLPattern& other) const; + + // Convert this URLPattern into an equivalent set of URLPatterns that don't + // use a wildcard in the scheme component. If this URLPattern doesn't use a + // wildcard scheme, then the returned set will contain one element that is + // equivalent to this instance. + std::vector<URLPattern> ConvertToExplicitSchemes() const; + + static bool EffectiveHostCompare(const URLPattern& a, const URLPattern& b) { + if (a.match_all_urls_ && b.match_all_urls_) + return false; + return a.host_.compare(b.host_) < 0; + }; + + // Used for origin comparisons in a std::set. + class EffectiveHostCompareFunctor { + public: + bool operator()(const URLPattern& a, const URLPattern& b) const { + return EffectiveHostCompare(a, b); + }; + }; + + // Get an error string for a ParseResult. + static const char* GetParseResultString(URLPattern::ParseResult parse_result); + + private: + // Returns true if any of the |schemes| items matches our scheme. + bool MatchesAnyScheme(const std::vector<std::string>& schemes) const; + + bool MatchesSecurityOriginHelper(const GURL& test) const; + + // If the URLPattern contains a wildcard scheme, returns a list of + // equivalent literal schemes, otherwise returns the current scheme. + std::vector<std::string> GetExplicitSchemes() const; + + // A bitmask containing the schemes which are considered valid for this + // pattern. Parse() uses this to decide whether a pattern contains a valid + // scheme. MatchesScheme uses this to decide whether a wildcard scheme_ + // matches a given test scheme. + int valid_schemes_; + + // True if this is a special-case "<all_urls>" pattern. + bool match_all_urls_; + + // The scheme for the pattern. + std::string scheme_; + + // The host without any leading "*" components. + std::string host_; + + // Whether we should match subdomains of the host. This is true if the first + // component of the pattern's host was "*". + bool match_subdomains_; + + // The port. + std::string port_; + + // The path to match. This is everything after the host of the URL, or + // everything after the scheme in the case of file:// URLs. + std::string path_; + + // The path with "?" and "\" characters escaped for use with the + // MatchPattern() function. + std::string path_escaped_; + + // A string representing this URLPattern. + mutable std::string spec_; +}; + +typedef std::vector<URLPattern> URLPatternList; + +#endif // EXTENSIONS_COMMON_URL_PATTERN_H_ diff --git a/extensions/common/url_pattern_unittest.cc b/extensions/common/url_pattern_unittest.cc new file mode 100644 index 0000000..c37b73d --- /dev/null +++ b/extensions/common/url_pattern_unittest.cc @@ -0,0 +1,659 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/memory/scoped_ptr.h" +#include "extensions/common/url_pattern.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "googleurl/src/gurl.h" + +// See url_pattern.h for examples of valid and invalid patterns. + +static const int kAllSchemes = + URLPattern::SCHEME_HTTP | + URLPattern::SCHEME_HTTPS | + URLPattern::SCHEME_FILE | + URLPattern::SCHEME_FTP | + URLPattern::SCHEME_CHROMEUI | + URLPattern::SCHEME_EXTENSION | + URLPattern::SCHEME_FILESYSTEM; + +TEST(ExtensionURLPatternTest, ParseInvalid) { + const struct { + const char* pattern; + URLPattern::ParseResult expected_result; + } kInvalidPatterns[] = { + { "http", URLPattern::PARSE_ERROR_MISSING_SCHEME_SEPARATOR }, + { "http:", URLPattern::PARSE_ERROR_WRONG_SCHEME_SEPARATOR }, + { "http:/", URLPattern::PARSE_ERROR_WRONG_SCHEME_SEPARATOR }, + { "about://", URLPattern::PARSE_ERROR_WRONG_SCHEME_SEPARATOR }, + { "http://", URLPattern::PARSE_ERROR_EMPTY_HOST }, + { "http:///", URLPattern::PARSE_ERROR_EMPTY_HOST }, + { "http://*foo/bar", URLPattern::PARSE_ERROR_INVALID_HOST_WILDCARD }, + { "http://foo.*.bar/baz", URLPattern::PARSE_ERROR_INVALID_HOST_WILDCARD }, + { "http://fo.*.ba:123/baz", URLPattern::PARSE_ERROR_INVALID_HOST_WILDCARD }, + { "http:/bar", URLPattern::PARSE_ERROR_WRONG_SCHEME_SEPARATOR }, + { "http://bar", URLPattern::PARSE_ERROR_EMPTY_PATH }, + }; + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kInvalidPatterns); ++i) { + URLPattern pattern(URLPattern::SCHEME_ALL); + EXPECT_EQ(kInvalidPatterns[i].expected_result, + pattern.Parse(kInvalidPatterns[i].pattern)) + << kInvalidPatterns[i].pattern; + } +}; + +TEST(ExtensionURLPatternTest, Ports) { + const struct { + const char* pattern; + URLPattern::ParseResult expected_result; + const char* expected_port; + } kTestPatterns[] = { + { "http://foo:1234/", URLPattern::PARSE_SUCCESS, "1234" }, + { "http://foo:1234/bar", URLPattern::PARSE_SUCCESS, "1234" }, + { "http://*.foo:1234/", URLPattern::PARSE_SUCCESS, "1234" }, + { "http://*.foo:1234/bar", URLPattern::PARSE_SUCCESS,"1234" }, + { "http://:1234/", URLPattern::PARSE_SUCCESS, "1234" }, + { "http://foo:/", URLPattern::PARSE_ERROR_INVALID_PORT, "*" }, + { "http://foo:*/", URLPattern::PARSE_SUCCESS, "*" }, + { "http://*.foo:/", URLPattern::PARSE_ERROR_INVALID_PORT, "*" }, + { "http://foo:com/", URLPattern::PARSE_ERROR_INVALID_PORT, "*" }, + { "http://foo:123456/", URLPattern::PARSE_ERROR_INVALID_PORT, "*" }, + { "http://foo:80:80/monkey", URLPattern::PARSE_ERROR_INVALID_PORT, "*" }, + { "file://foo:1234/bar", URLPattern::PARSE_SUCCESS, "*" }, + { "chrome://foo:1234/bar", URLPattern::PARSE_ERROR_INVALID_PORT, "*" }, + + // Port-like strings in the path should not trigger a warning. + { "http://*/:1234", URLPattern::PARSE_SUCCESS, "*" }, + { "http://*.foo/bar:1234", URLPattern::PARSE_SUCCESS, "*" }, + { "http://foo/bar:1234/path", URLPattern::PARSE_SUCCESS,"*" }, + }; + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestPatterns); ++i) { + URLPattern pattern(URLPattern::SCHEME_ALL); + EXPECT_EQ(kTestPatterns[i].expected_result, + pattern.Parse(kTestPatterns[i].pattern)) + << "Got unexpected result for URL pattern: " + << kTestPatterns[i].pattern; + EXPECT_EQ(kTestPatterns[i].expected_port, pattern.port()) + << "Got unexpected port for URL pattern: " << kTestPatterns[i].pattern; + } +}; + +// all pages for a given scheme +TEST(ExtensionURLPatternTest, Match1) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("http://*/*")); + EXPECT_EQ("http", pattern.scheme()); + EXPECT_EQ("", pattern.host()); + EXPECT_TRUE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/*", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://google.com"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://yahoo.com"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://google.com/foo"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("https://google.com"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://74.125.127.100/search"))); +} + +// all domains +TEST(ExtensionURLPatternTest, Match2) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("https://*/foo*")); + EXPECT_EQ("https", pattern.scheme()); + EXPECT_EQ("", pattern.host()); + EXPECT_TRUE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/foo*", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL(GURL("https://www.google.com/foo"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("https://www.google.com/foobar"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("http://www.google.com/foo"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("https://www.google.com/"))); + EXPECT_TRUE(pattern.MatchesURL( + GURL("filesystem:https://www.google.com/foobar/"))); +} + +// subdomains +TEST(URLPatternTest, Match3) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, + pattern.Parse("http://*.google.com/foo*bar")); + EXPECT_EQ("http", pattern.scheme()); + EXPECT_EQ("google.com", pattern.host()); + EXPECT_TRUE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/foo*bar", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://google.com/foobar"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://www.google.com/foo?bar"))); + EXPECT_TRUE(pattern.MatchesURL( + GURL("http://monkey.images.google.com/foooobar"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("http://yahoo.com/foobar"))); + EXPECT_TRUE(pattern.MatchesURL( + GURL("filesystem:http://google.com/foo/bar"))); + EXPECT_FALSE(pattern.MatchesURL( + GURL("filesystem:http://google.com/temporary/foobar"))); +} + +// glob escaping +TEST(ExtensionURLPatternTest, Match5) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("file:///foo?bar\\*baz")); + EXPECT_EQ("file", pattern.scheme()); + EXPECT_EQ("", pattern.host()); + EXPECT_FALSE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/foo?bar\\*baz", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL(GURL("file:///foo?bar\\hellobaz"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("file:///fooXbar\\hellobaz"))); +} + +// ip addresses +TEST(ExtensionURLPatternTest, Match6) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("http://127.0.0.1/*")); + EXPECT_EQ("http", pattern.scheme()); + EXPECT_EQ("127.0.0.1", pattern.host()); + EXPECT_FALSE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/*", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://127.0.0.1"))); +} + +// subdomain matching with ip addresses +TEST(ExtensionURLPatternTest, Match7) { + URLPattern pattern(kAllSchemes); + // allowed, but useless + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("http://*.0.0.1/*")); + EXPECT_EQ("http", pattern.scheme()); + EXPECT_EQ("0.0.1", pattern.host()); + EXPECT_TRUE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/*", pattern.path()); + // Subdomain matching is never done if the argument has an IP address host. + EXPECT_FALSE(pattern.MatchesURL(GURL("http://127.0.0.1"))); +}; + +// unicode +TEST(ExtensionURLPatternTest, Match8) { + URLPattern pattern(kAllSchemes); + // The below is the ASCII encoding of the following URL: + // http://*.\xe1\x80\xbf/a\xc2\x81\xe1* + EXPECT_EQ(URLPattern::PARSE_SUCCESS, + pattern.Parse("http://*.xn--gkd/a%C2%81%E1*")); + EXPECT_EQ("http", pattern.scheme()); + EXPECT_EQ("xn--gkd", pattern.host()); + EXPECT_TRUE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/a%C2%81%E1*", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL( + GURL("http://abc.\xe1\x80\xbf/a\xc2\x81\xe1xyz"))); + EXPECT_TRUE(pattern.MatchesURL( + GURL("http://\xe1\x80\xbf/a\xc2\x81\xe1\xe1"))); +}; + +// chrome:// +TEST(ExtensionURLPatternTest, Match9) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("chrome://favicon/*")); + EXPECT_EQ("chrome", pattern.scheme()); + EXPECT_EQ("favicon", pattern.host()); + EXPECT_FALSE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/*", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL(GURL("chrome://favicon/http://google.com"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("chrome://favicon/https://google.com"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("chrome://history"))); +}; + +// *:// +TEST(ExtensionURLPatternTest, Match10) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("*://*/*")); + EXPECT_TRUE(pattern.MatchesScheme("http")); + EXPECT_TRUE(pattern.MatchesScheme("https")); + EXPECT_FALSE(pattern.MatchesScheme("chrome")); + EXPECT_FALSE(pattern.MatchesScheme("file")); + EXPECT_FALSE(pattern.MatchesScheme("ftp")); + EXPECT_TRUE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/*", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://127.0.0.1"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("chrome://favicon/http://google.com"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("file:///foo/bar"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("file://localhost/foo/bar"))); +}; + +// <all_urls> +TEST(ExtensionURLPatternTest, Match11) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("<all_urls>")); + EXPECT_TRUE(pattern.MatchesScheme("chrome")); + EXPECT_TRUE(pattern.MatchesScheme("http")); + EXPECT_TRUE(pattern.MatchesScheme("https")); + EXPECT_TRUE(pattern.MatchesScheme("file")); + EXPECT_TRUE(pattern.MatchesScheme("filesystem")); + EXPECT_TRUE(pattern.MatchesScheme("chrome-extension")); + EXPECT_TRUE(pattern.match_subdomains()); + EXPECT_TRUE(pattern.match_all_urls()); + EXPECT_EQ("/*", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL(GURL("chrome://favicon/http://google.com"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://127.0.0.1"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file:///foo/bar"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file://localhost/foo/bar"))); + + // Make sure the properties are the same when creating an <all_urls> pattern + // via SetMatchAllURLs and by parsing <all_urls>. + URLPattern pattern2(kAllSchemes); + pattern2.SetMatchAllURLs(true); + + EXPECT_EQ(pattern.valid_schemes(), pattern2.valid_schemes()); + EXPECT_EQ(pattern.match_subdomains(), pattern2.match_subdomains()); + EXPECT_EQ(pattern.path(), pattern2.path()); + EXPECT_EQ(pattern.match_all_urls(), pattern2.match_all_urls()); + EXPECT_EQ(pattern.scheme(), pattern2.scheme()); + EXPECT_EQ(pattern.port(), pattern2.port()); + EXPECT_EQ(pattern.GetAsString(), pattern2.GetAsString()); +}; + +// SCHEME_ALL matches all schemes. +TEST(ExtensionURLPatternTest, Match12) { + URLPattern pattern(URLPattern::SCHEME_ALL); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("<all_urls>")); + EXPECT_TRUE(pattern.MatchesScheme("chrome")); + EXPECT_TRUE(pattern.MatchesScheme("http")); + EXPECT_TRUE(pattern.MatchesScheme("https")); + EXPECT_TRUE(pattern.MatchesScheme("file")); + EXPECT_TRUE(pattern.MatchesScheme("filesystem")); + EXPECT_TRUE(pattern.MatchesScheme("javascript")); + EXPECT_TRUE(pattern.MatchesScheme("data")); + EXPECT_TRUE(pattern.MatchesScheme("about")); + EXPECT_TRUE(pattern.MatchesScheme("chrome-extension")); + EXPECT_TRUE(pattern.match_subdomains()); + EXPECT_TRUE(pattern.match_all_urls()); + EXPECT_EQ("/*", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL(GURL("chrome://favicon/http://google.com"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://127.0.0.1"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file:///foo/bar"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file://localhost/foo/bar"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("chrome://newtab"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("about:blank"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("about:version"))); + EXPECT_TRUE(pattern.MatchesURL( + GURL("data:text/html;charset=utf-8,<html>asdf</html>"))); +}; + +static const struct MatchPatterns { + const char* pattern; + const char* matches; +} kMatch13UrlPatternTestCases[] = { + {"about:*", "about:blank"}, + {"about:blank", "about:blank"}, + {"about:*", "about:version"}, + {"chrome-extension://*/*", "chrome-extension://FTW"}, + {"data:*", "data:monkey"}, + {"javascript:*", "javascript:atemyhomework"}, +}; + +// SCHEME_ALL and specific schemes. +TEST(ExtensionURLPatternTest, Match13) { + for (size_t i = 0; i < arraysize(kMatch13UrlPatternTestCases); ++i) { + URLPattern pattern(URLPattern::SCHEME_ALL); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, + pattern.Parse(kMatch13UrlPatternTestCases[i].pattern)) + << " while parsing " << kMatch13UrlPatternTestCases[i].pattern; + EXPECT_TRUE(pattern.MatchesURL( + GURL(kMatch13UrlPatternTestCases[i].matches))) + << " while matching " << kMatch13UrlPatternTestCases[i].matches; + } + + // Negative test. + URLPattern pattern(URLPattern::SCHEME_ALL); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("data:*")); + EXPECT_FALSE(pattern.MatchesURL(GURL("about:blank"))); +}; + +// file scheme with empty hostname +TEST(ExtensionURLPatternTest, Match14) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("file:///foo*")); + EXPECT_EQ("file", pattern.scheme()); + EXPECT_EQ("", pattern.host()); + EXPECT_FALSE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/foo*", pattern.path()); + EXPECT_FALSE(pattern.MatchesURL(GURL("file://foo"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("file://foobar"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file:///foo"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file:///foobar"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file://localhost/foo"))); +} + +// file scheme without hostname part +TEST(ExtensionURLPatternTest, Match15) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("file://foo*")); + EXPECT_EQ("file", pattern.scheme()); + EXPECT_EQ("", pattern.host()); + EXPECT_FALSE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/foo*", pattern.path()); + EXPECT_FALSE(pattern.MatchesURL(GURL("file://foo"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("file://foobar"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file:///foo"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file:///foobar"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file://localhost/foo"))); +} + +// file scheme with hostname +TEST(ExtensionURLPatternTest, Match16) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse("file://localhost/foo*")); + EXPECT_EQ("file", pattern.scheme()); + // Since hostname is ignored for file://. + EXPECT_EQ("", pattern.host()); + EXPECT_FALSE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/foo*", pattern.path()); + EXPECT_FALSE(pattern.MatchesURL(GURL("file://foo"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("file://foobar"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file:///foo"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file:///foobar"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("file://localhost/foo"))); +} + +// Specific port +TEST(ExtensionURLPatternTest, Match17) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, + pattern.Parse("http://www.example.com:80/foo")); + EXPECT_EQ("http", pattern.scheme()); + EXPECT_EQ("www.example.com", pattern.host()); + EXPECT_FALSE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/foo", pattern.path()); + EXPECT_EQ("80", pattern.port()); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://www.example.com:80/foo"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://www.example.com/foo"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("http://www.example.com:8080/foo"))); + EXPECT_FALSE(pattern.MatchesURL( + GURL("filesystem:http://www.example.com:8080/foo/"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("filesystem:http://www.example.com/f/foo"))); +} + +// Explicit port wildcard +TEST(ExtensionURLPatternTest, Match18) { + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, + pattern.Parse("http://www.example.com:*/foo")); + EXPECT_EQ("http", pattern.scheme()); + EXPECT_EQ("www.example.com", pattern.host()); + EXPECT_FALSE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/foo", pattern.path()); + EXPECT_EQ("*", pattern.port()); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://www.example.com:80/foo"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://www.example.com/foo"))); + EXPECT_TRUE(pattern.MatchesURL(GURL("http://www.example.com:8080/foo"))); + EXPECT_FALSE(pattern.MatchesURL( + GURL("filesystem:http://www.example.com:8080/foo/"))); +} + +// chrome-extension:// +TEST(ExtensionURLPatternTest, Match19) { + URLPattern pattern(URLPattern::SCHEME_EXTENSION); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, + pattern.Parse("chrome-extension://ftw/*")); + EXPECT_EQ("chrome-extension", pattern.scheme()); + EXPECT_EQ("ftw", pattern.host()); + EXPECT_FALSE(pattern.match_subdomains()); + EXPECT_FALSE(pattern.match_all_urls()); + EXPECT_EQ("/*", pattern.path()); + EXPECT_TRUE(pattern.MatchesURL(GURL("chrome-extension://ftw"))); + EXPECT_TRUE(pattern.MatchesURL( + GURL("chrome-extension://ftw/http://google.com"))); + EXPECT_TRUE(pattern.MatchesURL( + GURL("chrome-extension://ftw/https://google.com"))); + EXPECT_FALSE(pattern.MatchesURL(GURL("chrome-extension://foobar"))); + EXPECT_TRUE(pattern.MatchesURL( + GURL("filesystem:chrome-extension://ftw/t/file.txt"))); +}; + +static const struct GetAsStringPatterns { + const char* pattern; +} kGetAsStringTestCases[] = { + { "http://www/" }, + { "http://*/*" }, + { "chrome://*/*" }, + { "chrome://newtab/" }, + { "about:*" }, + { "about:blank" }, + { "chrome-extension://*/*" }, + { "chrome-extension://FTW/" }, + { "data:*" }, + { "data:monkey" }, + { "javascript:*" }, + { "javascript:atemyhomework" }, + { "http://www.example.com:8080/foo" }, +}; + +TEST(ExtensionURLPatternTest, GetAsString) { + for (size_t i = 0; i < arraysize(kGetAsStringTestCases); ++i) { + URLPattern pattern(URLPattern::SCHEME_ALL); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, + pattern.Parse(kGetAsStringTestCases[i].pattern)) + << "Error parsing " << kGetAsStringTestCases[i].pattern; + EXPECT_EQ(kGetAsStringTestCases[i].pattern, + pattern.GetAsString()); + } +} + +void TestPatternOverlap(const URLPattern& pattern1, const URLPattern& pattern2, + bool expect_overlap) { + EXPECT_EQ(expect_overlap, pattern1.OverlapsWith(pattern2)) + << pattern1.GetAsString() << ", " << pattern2.GetAsString(); + EXPECT_EQ(expect_overlap, pattern2.OverlapsWith(pattern1)) + << pattern2.GetAsString() << ", " << pattern1.GetAsString(); +} + +TEST(ExtensionURLPatternTest, OverlapsWith) { + URLPattern pattern1(kAllSchemes, "http://www.google.com/foo/*"); + URLPattern pattern2(kAllSchemes, "https://www.google.com/foo/*"); + URLPattern pattern3(kAllSchemes, "http://*.google.com/foo/*"); + URLPattern pattern4(kAllSchemes, "http://*.yahooo.com/foo/*"); + URLPattern pattern5(kAllSchemes, "http://www.yahooo.com/bar/*"); + URLPattern pattern6(kAllSchemes, + "http://www.yahooo.com/bar/baz/*"); + URLPattern pattern7(kAllSchemes, "file:///*"); + URLPattern pattern8(kAllSchemes, "*://*/*"); + URLPattern pattern9(URLPattern::SCHEME_HTTPS, "*://*/*"); + URLPattern pattern10(kAllSchemes, "<all_urls>"); + + TestPatternOverlap(pattern1, pattern1, true); + TestPatternOverlap(pattern1, pattern2, false); + TestPatternOverlap(pattern1, pattern3, true); + TestPatternOverlap(pattern1, pattern4, false); + TestPatternOverlap(pattern3, pattern4, false); + TestPatternOverlap(pattern4, pattern5, false); + TestPatternOverlap(pattern5, pattern6, true); + + // Test that scheme restrictions work. + TestPatternOverlap(pattern1, pattern8, true); + TestPatternOverlap(pattern1, pattern9, false); + TestPatternOverlap(pattern1, pattern10, true); + + // Test that '<all_urls>' includes file URLs, while scheme '*' does not. + TestPatternOverlap(pattern7, pattern8, false); + TestPatternOverlap(pattern7, pattern10, true); + + // Test that wildcard schemes are handled correctly, especially when compared + // to each-other. + URLPattern pattern11(kAllSchemes, "http://example.com/*"); + URLPattern pattern12(kAllSchemes, "*://example.com/*"); + URLPattern pattern13(kAllSchemes, "*://example.com/foo/*"); + URLPattern pattern14(kAllSchemes, "*://google.com/*"); + TestPatternOverlap(pattern8, pattern12, true); + TestPatternOverlap(pattern9, pattern12, true); + TestPatternOverlap(pattern10, pattern12, true); + TestPatternOverlap(pattern11, pattern12, true); + TestPatternOverlap(pattern12, pattern13, true); + TestPatternOverlap(pattern11, pattern13, true); + TestPatternOverlap(pattern14, pattern12, false); + TestPatternOverlap(pattern14, pattern13, false); +} + +TEST(ExtensionURLPatternTest, ConvertToExplicitSchemes) { + URLPatternList all_urls(URLPattern( + kAllSchemes, + "<all_urls>").ConvertToExplicitSchemes()); + + URLPatternList all_schemes(URLPattern( + kAllSchemes, + "*://google.com/foo").ConvertToExplicitSchemes()); + + URLPatternList monkey(URLPattern( + URLPattern::SCHEME_HTTP | URLPattern::SCHEME_HTTPS | + URLPattern::SCHEME_FTP, + "http://google.com/monkey").ConvertToExplicitSchemes()); + + ASSERT_EQ(7u, all_urls.size()); + ASSERT_EQ(2u, all_schemes.size()); + ASSERT_EQ(1u, monkey.size()); + + EXPECT_EQ("http://*/*", all_urls[0].GetAsString()); + EXPECT_EQ("https://*/*", all_urls[1].GetAsString()); + EXPECT_EQ("file:///*", all_urls[2].GetAsString()); + EXPECT_EQ("ftp://*/*", all_urls[3].GetAsString()); + EXPECT_EQ("chrome://*/*", all_urls[4].GetAsString()); + + EXPECT_EQ("http://google.com/foo", all_schemes[0].GetAsString()); + EXPECT_EQ("https://google.com/foo", all_schemes[1].GetAsString()); + + EXPECT_EQ("http://google.com/monkey", monkey[0].GetAsString()); +} + +TEST(ExtensionURLPatternTest, IgnorePorts) { + std::string pattern_str = "http://www.example.com:8080/foo"; + GURL url("http://www.example.com:1234/foo"); + + URLPattern pattern(kAllSchemes); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern.Parse(pattern_str)); + + EXPECT_EQ(pattern_str, pattern.GetAsString()); + EXPECT_FALSE(pattern.MatchesURL(url)); +} + +TEST(ExtensionURLPatternTest, Equals) { + const struct { + const char* pattern1; + const char* pattern2; + bool expected_equal; + } kEqualsTestCases[] = { + // schemes + { "http://en.google.com/blah/*/foo", + "https://en.google.com/blah/*/foo", + false + }, + { "https://en.google.com/blah/*/foo", + "https://en.google.com/blah/*/foo", + true + }, + { "https://en.google.com/blah/*/foo", + "ftp://en.google.com/blah/*/foo", + false + }, + + // subdomains + { "https://en.google.com/blah/*/foo", + "https://fr.google.com/blah/*/foo", + false + }, + { "https://www.google.com/blah/*/foo", + "https://*.google.com/blah/*/foo", + false + }, + { "https://*.google.com/blah/*/foo", + "https://*.google.com/blah/*/foo", + true + }, + + // domains + { "http://en.example.com/blah/*/foo", + "http://en.google.com/blah/*/foo", + false + }, + + // ports + { "http://en.google.com:8000/blah/*/foo", + "http://en.google.com/blah/*/foo", + false + }, + { "http://fr.google.com:8000/blah/*/foo", + "http://fr.google.com:8000/blah/*/foo", + true + }, + { "http://en.google.com:8000/blah/*/foo", + "http://en.google.com:8080/blah/*/foo", + false + }, + + // paths + { "http://en.google.com/blah/*/foo", + "http://en.google.com/blah/*", + false + }, + { "http://en.google.com/*", + "http://en.google.com/", + false + }, + { "http://en.google.com/*", + "http://en.google.com/*", + true + }, + + // all_urls + { "<all_urls>", + "<all_urls>", + true + }, + { "<all_urls>", + "http://*/*", + false + } + }; + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEqualsTestCases); ++i) { + std::string message = kEqualsTestCases[i].pattern1; + message += " "; + message += kEqualsTestCases[i].pattern2; + + URLPattern pattern1(URLPattern::SCHEME_ALL); + URLPattern pattern2(URLPattern::SCHEME_ALL); + + pattern1.Parse(kEqualsTestCases[i].pattern1); + pattern2.Parse(kEqualsTestCases[i].pattern2); + EXPECT_EQ(kEqualsTestCases[i].expected_equal, pattern1 == pattern2) + << message; + } +} + +TEST(ExtensionURLPatternTest, CanReusePatternWithParse) { + URLPattern pattern1(URLPattern::SCHEME_ALL); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern1.Parse("http://aa.com/*")); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern1.Parse("http://bb.com/*")); + + EXPECT_TRUE(pattern1.MatchesURL(GURL("http://bb.com/path"))); + EXPECT_FALSE(pattern1.MatchesURL(GURL("http://aa.com/path"))); + + URLPattern pattern2(URLPattern::SCHEME_ALL, URLPattern::kAllUrlsPattern); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern2.Parse("http://aa.com/*")); + + EXPECT_FALSE(pattern2.MatchesURL(GURL("http://bb.com/path"))); + EXPECT_TRUE(pattern2.MatchesURL(GURL("http://aa.com/path"))); + EXPECT_FALSE(pattern2.MatchesURL(GURL("http://sub.aa.com/path"))); + + URLPattern pattern3(URLPattern::SCHEME_ALL, "http://aa.com/*"); + EXPECT_EQ(URLPattern::PARSE_SUCCESS, pattern3.Parse("http://aa.com:88/*")); + EXPECT_FALSE(pattern3.MatchesURL(GURL("http://aa.com/path"))); + EXPECT_TRUE(pattern3.MatchesURL(GURL("http://aa.com:88/path"))); +} |