diff options
Diffstat (limited to 'extensions/common/url_pattern.cc')
-rw-r--r-- | extensions/common/url_pattern.cc | 524 |
1 files changed, 524 insertions, 0 deletions
diff --git a/extensions/common/url_pattern.cc b/extensions/common/url_pattern.cc new file mode 100644 index 0000000..73159b3 --- /dev/null +++ b/extensions/common/url_pattern.cc @@ -0,0 +1,524 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extensions/common/url_pattern.h" + +#include "base/string_number_conversions.h" +#include "base/string_piece.h" +#include "base/string_split.h" +#include "base/string_util.h" +#include "content/public/common/url_constants.h" +#include "extensions/common/constants.h" +#include "googleurl/src/gurl.h" +#include "googleurl/src/url_util.h" + +const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; + +namespace { + +// TODO(aa): What about more obscure schemes like data: and javascript: ? +// Note: keep this array in sync with kValidSchemeMasks. +const char* kValidSchemes[] = { + chrome::kHttpScheme, + chrome::kHttpsScheme, + chrome::kFileScheme, + chrome::kFtpScheme, + chrome::kChromeUIScheme, + extensions::kExtensionScheme, + chrome::kFileSystemScheme, +}; + +const int kValidSchemeMasks[] = { + URLPattern::SCHEME_HTTP, + URLPattern::SCHEME_HTTPS, + URLPattern::SCHEME_FILE, + URLPattern::SCHEME_FTP, + URLPattern::SCHEME_CHROMEUI, + URLPattern::SCHEME_EXTENSION, + URLPattern::SCHEME_FILESYSTEM, +}; + +COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks), + must_keep_these_arrays_in_sync); + +const char kParseSuccess[] = "Success."; +const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator."; +const char kParseErrorInvalidScheme[] = "Invalid scheme."; +const char kParseErrorWrongSchemeType[] = "Wrong scheme type."; +const char kParseErrorEmptyHost[] = "Host can not be empty."; +const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard."; +const char kParseErrorEmptyPath[] = "Empty path."; +const char kParseErrorInvalidPort[] = "Invalid port."; + +// Message explaining each URLPattern::ParseResult. +const char* const kParseResultMessages[] = { + kParseSuccess, + kParseErrorMissingSchemeSeparator, + kParseErrorInvalidScheme, + kParseErrorWrongSchemeType, + kParseErrorEmptyHost, + kParseErrorInvalidHostWildcard, + kParseErrorEmptyPath, + kParseErrorInvalidPort, +}; + +COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), + must_add_message_for_each_parse_result); + +const char kPathSeparator[] = "/"; + +bool IsStandardScheme(const std::string& scheme) { + // "*" gets the same treatment as a standard scheme. + if (scheme == "*") + return true; + + return url_util::IsStandard(scheme.c_str(), + url_parse::Component(0, static_cast<int>(scheme.length()))); +} + +bool IsValidPortForScheme(const std::string& scheme, const std::string& port) { + if (port == "*") + return true; + + // Only accept non-wildcard ports if the scheme uses ports. + if (url_canon::DefaultPortForScheme(scheme.c_str(), scheme.length()) == + url_parse::PORT_UNSPECIFIED) { + return false; + } + + int parsed_port = url_parse::PORT_UNSPECIFIED; + if (!base::StringToInt(port, &parsed_port)) + return false; + return (parsed_port >= 0) && (parsed_port < 65536); +} + +} // namespace + +URLPattern::URLPattern() + : valid_schemes_(SCHEME_NONE), + match_all_urls_(false), + match_subdomains_(false), + port_("*") {} + +URLPattern::URLPattern(int valid_schemes) + : valid_schemes_(valid_schemes), + match_all_urls_(false), + match_subdomains_(false), + port_("*") {} + +URLPattern::URLPattern(int valid_schemes, const std::string& pattern) + // Strict error checking is used, because this constructor is only + // appropriate when we know |pattern| is valid. + : valid_schemes_(valid_schemes), + match_all_urls_(false), + match_subdomains_(false), + port_("*") { + if (PARSE_SUCCESS != Parse(pattern)) + NOTREACHED() << "URLPattern is invalid: " << pattern; +} + +URLPattern::~URLPattern() { +} + +bool URLPattern::operator<(const URLPattern& other) const { + return GetAsString() < other.GetAsString(); +} + +bool URLPattern::operator==(const URLPattern& other) const { + return GetAsString() == other.GetAsString(); +} + +URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) { + spec_.clear(); + SetMatchAllURLs(false); + SetMatchSubdomains(false); + SetPort("*"); + + // Special case pattern to match every valid URL. + if (pattern == kAllUrlsPattern) { + SetMatchAllURLs(true); + return PARSE_SUCCESS; + } + + // Parse out the scheme. + size_t scheme_end_pos = pattern.find(content::kStandardSchemeSeparator); + bool has_standard_scheme_separator = true; + + // Some urls also use ':' alone as the scheme separator. + if (scheme_end_pos == std::string::npos) { + scheme_end_pos = pattern.find(':'); + has_standard_scheme_separator = false; + } + + if (scheme_end_pos == std::string::npos) + return PARSE_ERROR_MISSING_SCHEME_SEPARATOR; + + if (!SetScheme(pattern.substr(0, scheme_end_pos))) + return PARSE_ERROR_INVALID_SCHEME; + + bool standard_scheme = IsStandardScheme(scheme_); + if (standard_scheme != has_standard_scheme_separator) + return PARSE_ERROR_WRONG_SCHEME_SEPARATOR; + + // Advance past the scheme separator. + scheme_end_pos += + (standard_scheme ? strlen(content::kStandardSchemeSeparator) : 1); + if (scheme_end_pos >= pattern.size()) + return PARSE_ERROR_EMPTY_HOST; + + // Parse out the host and path. + size_t host_start_pos = scheme_end_pos; + size_t path_start_pos = 0; + + if (!standard_scheme) { + path_start_pos = host_start_pos; + } else if (scheme_ == chrome::kFileScheme) { + size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); + if (host_end_pos == std::string::npos) { + // Allow hostname omission. + // e.g. file://* is interpreted as file:///*, + // file://foo* is interpreted as file:///foo*. + path_start_pos = host_start_pos - 1; + } else { + // Ignore hostname if scheme is file://. + // e.g. file://localhost/foo is equal to file:///foo. + path_start_pos = host_end_pos; + } + } else { + size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); + + // Host is required. + if (host_start_pos == host_end_pos) + return PARSE_ERROR_EMPTY_HOST; + + if (host_end_pos == std::string::npos) + return PARSE_ERROR_EMPTY_PATH; + + host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos); + + // The first component can optionally be '*' to match all subdomains. + std::vector<std::string> host_components; + base::SplitString(host_, '.', &host_components); + if (host_components[0] == "*") { + match_subdomains_ = true; + host_components.erase(host_components.begin(), + host_components.begin() + 1); + } + host_ = JoinString(host_components, '.'); + + path_start_pos = host_end_pos; + } + + SetPath(pattern.substr(path_start_pos)); + + size_t port_pos = host_.find(':'); + if (port_pos != std::string::npos) { + if (!SetPort(host_.substr(port_pos + 1))) + return PARSE_ERROR_INVALID_PORT; + host_ = host_.substr(0, port_pos); + } + + // No other '*' can occur in the host, though. This isn't necessary, but is + // done as a convenience to developers who might otherwise be confused and + // think '*' works as a glob in the host. + if (host_.find('*') != std::string::npos) + return PARSE_ERROR_INVALID_HOST_WILDCARD; + + return PARSE_SUCCESS; +} + +void URLPattern::SetValidSchemes(int valid_schemes) { + spec_.clear(); + valid_schemes_ = valid_schemes; +} + +void URLPattern::SetHost(const std::string& host) { + spec_.clear(); + host_ = host; +} + +void URLPattern::SetMatchAllURLs(bool val) { + spec_.clear(); + match_all_urls_ = val; + + if (val) { + match_subdomains_ = true; + scheme_ = "*"; + host_.clear(); + SetPath("/*"); + } +} + +void URLPattern::SetMatchSubdomains(bool val) { + spec_.clear(); + match_subdomains_ = val; +} + +bool URLPattern::SetScheme(const std::string& scheme) { + spec_.clear(); + scheme_ = scheme; + if (scheme_ == "*") { + valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); + } else if (!IsValidScheme(scheme_)) { + return false; + } + return true; +} + +bool URLPattern::IsValidScheme(const std::string& scheme) const { + if (valid_schemes_ == SCHEME_ALL) + return true; + + for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { + if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i])) + return true; + } + + return false; +} + +void URLPattern::SetPath(const std::string& path) { + spec_.clear(); + path_ = path; + path_escaped_ = path_; + ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); + ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); +} + +bool URLPattern::SetPort(const std::string& port) { + spec_.clear(); + if (IsValidPortForScheme(scheme_, port)) { + port_ = port; + return true; + } + return false; +} + +bool URLPattern::MatchesURL(const GURL& test) const { + const GURL* test_url = &test; + bool has_inner_url = test.inner_url() != NULL; + + if (has_inner_url) { + if (!test.SchemeIsFileSystem()) + return false; // The only nested URLs we handle are filesystem URLs. + test_url = test.inner_url(); + } + + if (!MatchesScheme(test_url->scheme())) + return false; + + if (match_all_urls_) + return true; + + std::string path_for_request = test.PathForRequest(); + if (has_inner_url) + path_for_request = test_url->path() + path_for_request; + + return MatchesSecurityOriginHelper(*test_url) && + MatchesPath(path_for_request); +} + +bool URLPattern::MatchesSecurityOrigin(const GURL& test) const { + const GURL* test_url = &test; + bool has_inner_url = test.inner_url() != NULL; + + if (has_inner_url) { + if (!test.SchemeIsFileSystem()) + return false; // The only nested URLs we handle are filesystem URLs. + test_url = test.inner_url(); + } + + if (!MatchesScheme(test_url->scheme())) + return false; + + if (match_all_urls_) + return true; + + return MatchesSecurityOriginHelper(*test_url); +} + +bool URLPattern::MatchesScheme(const std::string& test) const { + if (!IsValidScheme(test)) + return false; + + return scheme_ == "*" || test == scheme_; +} + +bool URLPattern::MatchesHost(const std::string& host) const { + std::string test(chrome::kHttpScheme); + test += content::kStandardSchemeSeparator; + test += host; + test += "/"; + return MatchesHost(GURL(test)); +} + +bool URLPattern::MatchesHost(const GURL& test) const { + // If the hosts are exactly equal, we have a match. + if (test.host() == host_) + return true; + + // If we're matching subdomains, and we have no host in the match pattern, + // that means that we're matching all hosts, which means we have a match no + // matter what the test host is. + if (match_subdomains_ && host_.empty()) + return true; + + // Otherwise, we can only match if our match pattern matches subdomains. + if (!match_subdomains_) + return false; + + // We don't do subdomain matching against IP addresses, so we can give up now + // if the test host is an IP address. + if (test.HostIsIPAddress()) + return false; + + // Check if the test host is a subdomain of our host. + if (test.host().length() <= (host_.length() + 1)) + return false; + + if (test.host().compare(test.host().length() - host_.length(), + host_.length(), host_) != 0) + return false; + + return test.host()[test.host().length() - host_.length() - 1] == '.'; +} + +bool URLPattern::MatchesPath(const std::string& test) const { + if (!MatchPattern(test, path_escaped_)) + return false; + + return true; +} + +bool URLPattern::MatchesPort(int port) const { + if (port == url_parse::PORT_INVALID) + return false; + + return port_ == "*" || port_ == base::IntToString(port); +} + + +const std::string& URLPattern::GetAsString() const { + if (!spec_.empty()) + return spec_; + + if (match_all_urls_) { + spec_ = kAllUrlsPattern; + return spec_; + } + + bool standard_scheme = IsStandardScheme(scheme_); + + std::string spec = scheme_ + + (standard_scheme ? content::kStandardSchemeSeparator : ":"); + + if (scheme_ != chrome::kFileScheme && standard_scheme) { + if (match_subdomains_) { + spec += "*"; + if (!host_.empty()) + spec += "."; + } + + if (!host_.empty()) + spec += host_; + + if (port_ != "*") { + spec += ":"; + spec += port_; + } + } + + if (!path_.empty()) + spec += path_; + + spec_ = spec; + return spec_; +} + +bool URLPattern::OverlapsWith(const URLPattern& other) const { + if (!MatchesAnyScheme(other.GetExplicitSchemes()) && + !other.MatchesAnyScheme(GetExplicitSchemes())) { + return false; + } + + if (!MatchesHost(other.host()) && !other.MatchesHost(host_)) + return false; + + if (port_ != "*" && other.port() != "*" && port_ != other.port()) + return false; + + // We currently only use OverlapsWith() for the patterns inside + // URLPatternSet. In those cases, we know that the path will have only a + // single wildcard at the end. This makes figuring out overlap much easier. It + // seems like there is probably a computer-sciency way to solve the general + // case, but we don't need that yet. + DCHECK(path_.find('*') == path_.size() - 1); + DCHECK(other.path().find('*') == other.path().size() - 1); + + if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) && + !other.MatchesPath(path_.substr(0, path_.size() - 1))) + return false; + + return true; +} + +bool URLPattern::MatchesAnyScheme( + const std::vector<std::string>& schemes) const { + for (std::vector<std::string>::const_iterator i = schemes.begin(); + i != schemes.end(); ++i) { + if (MatchesScheme(*i)) + return true; + } + + return false; +} + +bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const { + // Ignore hostname if scheme is file://. + if (scheme_ != chrome::kFileScheme && !MatchesHost(test)) + return false; + + if (!MatchesPort(test.EffectiveIntPort())) + return false; + + return true; +} + +std::vector<std::string> URLPattern::GetExplicitSchemes() const { + std::vector<std::string> result; + + if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) { + result.push_back(scheme_); + return result; + } + + for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { + if (MatchesScheme(kValidSchemes[i])) { + result.push_back(kValidSchemes[i]); + } + } + + return result; +} + +std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const { + std::vector<std::string> explicit_schemes = GetExplicitSchemes(); + std::vector<URLPattern> result; + + for (std::vector<std::string>::const_iterator i = explicit_schemes.begin(); + i != explicit_schemes.end(); ++i) { + URLPattern temp = *this; + temp.SetScheme(*i); + temp.SetMatchAllURLs(false); + result.push_back(temp); + } + + return result; +} + +// static +const char* URLPattern::GetParseResultString( + URLPattern::ParseResult parse_result) { + return kParseResultMessages[parse_result]; +} |