summaryrefslogtreecommitdiffstats
path: root/extensions/common/url_pattern.cc
diff options
context:
space:
mode:
Diffstat (limited to 'extensions/common/url_pattern.cc')
-rw-r--r--extensions/common/url_pattern.cc524
1 files changed, 524 insertions, 0 deletions
diff --git a/extensions/common/url_pattern.cc b/extensions/common/url_pattern.cc
new file mode 100644
index 0000000..73159b3
--- /dev/null
+++ b/extensions/common/url_pattern.cc
@@ -0,0 +1,524 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "extensions/common/url_pattern.h"
+
+#include "base/string_number_conversions.h"
+#include "base/string_piece.h"
+#include "base/string_split.h"
+#include "base/string_util.h"
+#include "content/public/common/url_constants.h"
+#include "extensions/common/constants.h"
+#include "googleurl/src/gurl.h"
+#include "googleurl/src/url_util.h"
+
+const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
+
+namespace {
+
+// TODO(aa): What about more obscure schemes like data: and javascript: ?
+// Note: keep this array in sync with kValidSchemeMasks.
+const char* kValidSchemes[] = {
+ chrome::kHttpScheme,
+ chrome::kHttpsScheme,
+ chrome::kFileScheme,
+ chrome::kFtpScheme,
+ chrome::kChromeUIScheme,
+ extensions::kExtensionScheme,
+ chrome::kFileSystemScheme,
+};
+
+const int kValidSchemeMasks[] = {
+ URLPattern::SCHEME_HTTP,
+ URLPattern::SCHEME_HTTPS,
+ URLPattern::SCHEME_FILE,
+ URLPattern::SCHEME_FTP,
+ URLPattern::SCHEME_CHROMEUI,
+ URLPattern::SCHEME_EXTENSION,
+ URLPattern::SCHEME_FILESYSTEM,
+};
+
+COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
+ must_keep_these_arrays_in_sync);
+
+const char kParseSuccess[] = "Success.";
+const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator.";
+const char kParseErrorInvalidScheme[] = "Invalid scheme.";
+const char kParseErrorWrongSchemeType[] = "Wrong scheme type.";
+const char kParseErrorEmptyHost[] = "Host can not be empty.";
+const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard.";
+const char kParseErrorEmptyPath[] = "Empty path.";
+const char kParseErrorInvalidPort[] = "Invalid port.";
+
+// Message explaining each URLPattern::ParseResult.
+const char* const kParseResultMessages[] = {
+ kParseSuccess,
+ kParseErrorMissingSchemeSeparator,
+ kParseErrorInvalidScheme,
+ kParseErrorWrongSchemeType,
+ kParseErrorEmptyHost,
+ kParseErrorInvalidHostWildcard,
+ kParseErrorEmptyPath,
+ kParseErrorInvalidPort,
+};
+
+COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
+ must_add_message_for_each_parse_result);
+
+const char kPathSeparator[] = "/";
+
+bool IsStandardScheme(const std::string& scheme) {
+ // "*" gets the same treatment as a standard scheme.
+ if (scheme == "*")
+ return true;
+
+ return url_util::IsStandard(scheme.c_str(),
+ url_parse::Component(0, static_cast<int>(scheme.length())));
+}
+
+bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
+ if (port == "*")
+ return true;
+
+ // Only accept non-wildcard ports if the scheme uses ports.
+ if (url_canon::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
+ url_parse::PORT_UNSPECIFIED) {
+ return false;
+ }
+
+ int parsed_port = url_parse::PORT_UNSPECIFIED;
+ if (!base::StringToInt(port, &parsed_port))
+ return false;
+ return (parsed_port >= 0) && (parsed_port < 65536);
+}
+
+} // namespace
+
+URLPattern::URLPattern()
+ : valid_schemes_(SCHEME_NONE),
+ match_all_urls_(false),
+ match_subdomains_(false),
+ port_("*") {}
+
+URLPattern::URLPattern(int valid_schemes)
+ : valid_schemes_(valid_schemes),
+ match_all_urls_(false),
+ match_subdomains_(false),
+ port_("*") {}
+
+URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
+ // Strict error checking is used, because this constructor is only
+ // appropriate when we know |pattern| is valid.
+ : valid_schemes_(valid_schemes),
+ match_all_urls_(false),
+ match_subdomains_(false),
+ port_("*") {
+ if (PARSE_SUCCESS != Parse(pattern))
+ NOTREACHED() << "URLPattern is invalid: " << pattern;
+}
+
+URLPattern::~URLPattern() {
+}
+
+bool URLPattern::operator<(const URLPattern& other) const {
+ return GetAsString() < other.GetAsString();
+}
+
+bool URLPattern::operator==(const URLPattern& other) const {
+ return GetAsString() == other.GetAsString();
+}
+
+URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
+ spec_.clear();
+ SetMatchAllURLs(false);
+ SetMatchSubdomains(false);
+ SetPort("*");
+
+ // Special case pattern to match every valid URL.
+ if (pattern == kAllUrlsPattern) {
+ SetMatchAllURLs(true);
+ return PARSE_SUCCESS;
+ }
+
+ // Parse out the scheme.
+ size_t scheme_end_pos = pattern.find(content::kStandardSchemeSeparator);
+ bool has_standard_scheme_separator = true;
+
+ // Some urls also use ':' alone as the scheme separator.
+ if (scheme_end_pos == std::string::npos) {
+ scheme_end_pos = pattern.find(':');
+ has_standard_scheme_separator = false;
+ }
+
+ if (scheme_end_pos == std::string::npos)
+ return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
+
+ if (!SetScheme(pattern.substr(0, scheme_end_pos)))
+ return PARSE_ERROR_INVALID_SCHEME;
+
+ bool standard_scheme = IsStandardScheme(scheme_);
+ if (standard_scheme != has_standard_scheme_separator)
+ return PARSE_ERROR_WRONG_SCHEME_SEPARATOR;
+
+ // Advance past the scheme separator.
+ scheme_end_pos +=
+ (standard_scheme ? strlen(content::kStandardSchemeSeparator) : 1);
+ if (scheme_end_pos >= pattern.size())
+ return PARSE_ERROR_EMPTY_HOST;
+
+ // Parse out the host and path.
+ size_t host_start_pos = scheme_end_pos;
+ size_t path_start_pos = 0;
+
+ if (!standard_scheme) {
+ path_start_pos = host_start_pos;
+ } else if (scheme_ == chrome::kFileScheme) {
+ size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
+ if (host_end_pos == std::string::npos) {
+ // Allow hostname omission.
+ // e.g. file://* is interpreted as file:///*,
+ // file://foo* is interpreted as file:///foo*.
+ path_start_pos = host_start_pos - 1;
+ } else {
+ // Ignore hostname if scheme is file://.
+ // e.g. file://localhost/foo is equal to file:///foo.
+ path_start_pos = host_end_pos;
+ }
+ } else {
+ size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
+
+ // Host is required.
+ if (host_start_pos == host_end_pos)
+ return PARSE_ERROR_EMPTY_HOST;
+
+ if (host_end_pos == std::string::npos)
+ return PARSE_ERROR_EMPTY_PATH;
+
+ host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
+
+ // The first component can optionally be '*' to match all subdomains.
+ std::vector<std::string> host_components;
+ base::SplitString(host_, '.', &host_components);
+ if (host_components[0] == "*") {
+ match_subdomains_ = true;
+ host_components.erase(host_components.begin(),
+ host_components.begin() + 1);
+ }
+ host_ = JoinString(host_components, '.');
+
+ path_start_pos = host_end_pos;
+ }
+
+ SetPath(pattern.substr(path_start_pos));
+
+ size_t port_pos = host_.find(':');
+ if (port_pos != std::string::npos) {
+ if (!SetPort(host_.substr(port_pos + 1)))
+ return PARSE_ERROR_INVALID_PORT;
+ host_ = host_.substr(0, port_pos);
+ }
+
+ // No other '*' can occur in the host, though. This isn't necessary, but is
+ // done as a convenience to developers who might otherwise be confused and
+ // think '*' works as a glob in the host.
+ if (host_.find('*') != std::string::npos)
+ return PARSE_ERROR_INVALID_HOST_WILDCARD;
+
+ return PARSE_SUCCESS;
+}
+
+void URLPattern::SetValidSchemes(int valid_schemes) {
+ spec_.clear();
+ valid_schemes_ = valid_schemes;
+}
+
+void URLPattern::SetHost(const std::string& host) {
+ spec_.clear();
+ host_ = host;
+}
+
+void URLPattern::SetMatchAllURLs(bool val) {
+ spec_.clear();
+ match_all_urls_ = val;
+
+ if (val) {
+ match_subdomains_ = true;
+ scheme_ = "*";
+ host_.clear();
+ SetPath("/*");
+ }
+}
+
+void URLPattern::SetMatchSubdomains(bool val) {
+ spec_.clear();
+ match_subdomains_ = val;
+}
+
+bool URLPattern::SetScheme(const std::string& scheme) {
+ spec_.clear();
+ scheme_ = scheme;
+ if (scheme_ == "*") {
+ valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
+ } else if (!IsValidScheme(scheme_)) {
+ return false;
+ }
+ return true;
+}
+
+bool URLPattern::IsValidScheme(const std::string& scheme) const {
+ if (valid_schemes_ == SCHEME_ALL)
+ return true;
+
+ for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
+ if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i]))
+ return true;
+ }
+
+ return false;
+}
+
+void URLPattern::SetPath(const std::string& path) {
+ spec_.clear();
+ path_ = path;
+ path_escaped_ = path_;
+ ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
+ ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
+}
+
+bool URLPattern::SetPort(const std::string& port) {
+ spec_.clear();
+ if (IsValidPortForScheme(scheme_, port)) {
+ port_ = port;
+ return true;
+ }
+ return false;
+}
+
+bool URLPattern::MatchesURL(const GURL& test) const {
+ const GURL* test_url = &test;
+ bool has_inner_url = test.inner_url() != NULL;
+
+ if (has_inner_url) {
+ if (!test.SchemeIsFileSystem())
+ return false; // The only nested URLs we handle are filesystem URLs.
+ test_url = test.inner_url();
+ }
+
+ if (!MatchesScheme(test_url->scheme()))
+ return false;
+
+ if (match_all_urls_)
+ return true;
+
+ std::string path_for_request = test.PathForRequest();
+ if (has_inner_url)
+ path_for_request = test_url->path() + path_for_request;
+
+ return MatchesSecurityOriginHelper(*test_url) &&
+ MatchesPath(path_for_request);
+}
+
+bool URLPattern::MatchesSecurityOrigin(const GURL& test) const {
+ const GURL* test_url = &test;
+ bool has_inner_url = test.inner_url() != NULL;
+
+ if (has_inner_url) {
+ if (!test.SchemeIsFileSystem())
+ return false; // The only nested URLs we handle are filesystem URLs.
+ test_url = test.inner_url();
+ }
+
+ if (!MatchesScheme(test_url->scheme()))
+ return false;
+
+ if (match_all_urls_)
+ return true;
+
+ return MatchesSecurityOriginHelper(*test_url);
+}
+
+bool URLPattern::MatchesScheme(const std::string& test) const {
+ if (!IsValidScheme(test))
+ return false;
+
+ return scheme_ == "*" || test == scheme_;
+}
+
+bool URLPattern::MatchesHost(const std::string& host) const {
+ std::string test(chrome::kHttpScheme);
+ test += content::kStandardSchemeSeparator;
+ test += host;
+ test += "/";
+ return MatchesHost(GURL(test));
+}
+
+bool URLPattern::MatchesHost(const GURL& test) const {
+ // If the hosts are exactly equal, we have a match.
+ if (test.host() == host_)
+ return true;
+
+ // If we're matching subdomains, and we have no host in the match pattern,
+ // that means that we're matching all hosts, which means we have a match no
+ // matter what the test host is.
+ if (match_subdomains_ && host_.empty())
+ return true;
+
+ // Otherwise, we can only match if our match pattern matches subdomains.
+ if (!match_subdomains_)
+ return false;
+
+ // We don't do subdomain matching against IP addresses, so we can give up now
+ // if the test host is an IP address.
+ if (test.HostIsIPAddress())
+ return false;
+
+ // Check if the test host is a subdomain of our host.
+ if (test.host().length() <= (host_.length() + 1))
+ return false;
+
+ if (test.host().compare(test.host().length() - host_.length(),
+ host_.length(), host_) != 0)
+ return false;
+
+ return test.host()[test.host().length() - host_.length() - 1] == '.';
+}
+
+bool URLPattern::MatchesPath(const std::string& test) const {
+ if (!MatchPattern(test, path_escaped_))
+ return false;
+
+ return true;
+}
+
+bool URLPattern::MatchesPort(int port) const {
+ if (port == url_parse::PORT_INVALID)
+ return false;
+
+ return port_ == "*" || port_ == base::IntToString(port);
+}
+
+
+const std::string& URLPattern::GetAsString() const {
+ if (!spec_.empty())
+ return spec_;
+
+ if (match_all_urls_) {
+ spec_ = kAllUrlsPattern;
+ return spec_;
+ }
+
+ bool standard_scheme = IsStandardScheme(scheme_);
+
+ std::string spec = scheme_ +
+ (standard_scheme ? content::kStandardSchemeSeparator : ":");
+
+ if (scheme_ != chrome::kFileScheme && standard_scheme) {
+ if (match_subdomains_) {
+ spec += "*";
+ if (!host_.empty())
+ spec += ".";
+ }
+
+ if (!host_.empty())
+ spec += host_;
+
+ if (port_ != "*") {
+ spec += ":";
+ spec += port_;
+ }
+ }
+
+ if (!path_.empty())
+ spec += path_;
+
+ spec_ = spec;
+ return spec_;
+}
+
+bool URLPattern::OverlapsWith(const URLPattern& other) const {
+ if (!MatchesAnyScheme(other.GetExplicitSchemes()) &&
+ !other.MatchesAnyScheme(GetExplicitSchemes())) {
+ return false;
+ }
+
+ if (!MatchesHost(other.host()) && !other.MatchesHost(host_))
+ return false;
+
+ if (port_ != "*" && other.port() != "*" && port_ != other.port())
+ return false;
+
+ // We currently only use OverlapsWith() for the patterns inside
+ // URLPatternSet. In those cases, we know that the path will have only a
+ // single wildcard at the end. This makes figuring out overlap much easier. It
+ // seems like there is probably a computer-sciency way to solve the general
+ // case, but we don't need that yet.
+ DCHECK(path_.find('*') == path_.size() - 1);
+ DCHECK(other.path().find('*') == other.path().size() - 1);
+
+ if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) &&
+ !other.MatchesPath(path_.substr(0, path_.size() - 1)))
+ return false;
+
+ return true;
+}
+
+bool URLPattern::MatchesAnyScheme(
+ const std::vector<std::string>& schemes) const {
+ for (std::vector<std::string>::const_iterator i = schemes.begin();
+ i != schemes.end(); ++i) {
+ if (MatchesScheme(*i))
+ return true;
+ }
+
+ return false;
+}
+
+bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
+ // Ignore hostname if scheme is file://.
+ if (scheme_ != chrome::kFileScheme && !MatchesHost(test))
+ return false;
+
+ if (!MatchesPort(test.EffectiveIntPort()))
+ return false;
+
+ return true;
+}
+
+std::vector<std::string> URLPattern::GetExplicitSchemes() const {
+ std::vector<std::string> result;
+
+ if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) {
+ result.push_back(scheme_);
+ return result;
+ }
+
+ for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
+ if (MatchesScheme(kValidSchemes[i])) {
+ result.push_back(kValidSchemes[i]);
+ }
+ }
+
+ return result;
+}
+
+std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const {
+ std::vector<std::string> explicit_schemes = GetExplicitSchemes();
+ std::vector<URLPattern> result;
+
+ for (std::vector<std::string>::const_iterator i = explicit_schemes.begin();
+ i != explicit_schemes.end(); ++i) {
+ URLPattern temp = *this;
+ temp.SetScheme(*i);
+ temp.SetMatchAllURLs(false);
+ result.push_back(temp);
+ }
+
+ return result;
+}
+
+// static
+const char* URLPattern::GetParseResultString(
+ URLPattern::ParseResult parse_result) {
+ return kParseResultMessages[parse_result];
+}