summaryrefslogtreecommitdiffstats
path: root/extensions/common/url_pattern.h
diff options
context:
space:
mode:
authoraa@chromium.org <aa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-11-13 20:27:42 +0000
committeraa@chromium.org <aa@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-11-13 20:27:42 +0000
commit885c0e94fd5e3d81bb258191d720b7b22ca683df (patch)
tree27022b02d52dda9e1cb8b86cde08c86220615f56 /extensions/common/url_pattern.h
parentbf4d4efcd977d9f4438725d77e6bc9e02aac4b1c (diff)
downloadchromium_src-885c0e94fd5e3d81bb258191d720b7b22ca683df.zip
chromium_src-885c0e94fd5e3d81bb258191d720b7b22ca683df.tar.gz
chromium_src-885c0e94fd5e3d81bb258191d720b7b22ca683df.tar.bz2
Establish /extensions directory and move URLPattern there.
This doesn't create an extensions gyp target, or a module with a public API. It does set of DEPS rules and seems like it will make it easier for people to write correctly layered code today, while we are in transition. BUG=159265 TBR=ben@chromium.org Review URL: https://codereview.chromium.org/11410015 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@167454 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'extensions/common/url_pattern.h')
-rw-r--r--extensions/common/url_pattern.h236
1 files changed, 236 insertions, 0 deletions
diff --git a/extensions/common/url_pattern.h b/extensions/common/url_pattern.h
new file mode 100644
index 0000000..42188a5
--- /dev/null
+++ b/extensions/common/url_pattern.h
@@ -0,0 +1,236 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#ifndef EXTENSIONS_COMMON_URL_PATTERN_H_
+#define EXTENSIONS_COMMON_URL_PATTERN_H_
+
+#include <functional>
+#include <string>
+#include <vector>
+
+class GURL;
+
+// A pattern that can be used to match URLs. A URLPattern is a very restricted
+// subset of URL syntax:
+//
+// <url-pattern> := <scheme>://<host><port><path> | '<all_urls>'
+// <scheme> := '*' | 'http' | 'https' | 'file' | 'ftp' | 'chrome' |
+// 'chrome-extension' | 'filesystem'
+// <host> := '*' | '*.' <anychar except '/' and '*'>+
+// <port> := [':' ('*' | <port number between 0 and 65535>)]
+// <path> := '/' <any chars>
+//
+// * Host is not used when the scheme is 'file'.
+// * The path can have embedded '*' characters which act as glob wildcards.
+// * '<all_urls>' is a special pattern that matches any URL that contains a
+// valid scheme (as specified by valid_schemes_).
+// * The '*' scheme pattern excludes file URLs.
+//
+// Examples of valid patterns:
+// - http://*/*
+// - http://*/foo*
+// - https://*.google.com/foo*bar
+// - file://monkey*
+// - http://127.0.0.1/*
+//
+// Examples of invalid patterns:
+// - http://* -- path not specified
+// - http://*foo/bar -- * not allowed as substring of host component
+// - http://foo.*.bar/baz -- * must be first component
+// - http:/bar -- scheme separator not found
+// - foo://* -- invalid scheme
+// - chrome:// -- we don't support chrome internal URLs
+class URLPattern {
+ public:
+ // A collection of scheme bitmasks for use with valid_schemes.
+ enum SchemeMasks {
+ SCHEME_NONE = 0,
+ SCHEME_HTTP = 1 << 0,
+ SCHEME_HTTPS = 1 << 1,
+ SCHEME_FILE = 1 << 2,
+ SCHEME_FTP = 1 << 3,
+ SCHEME_CHROMEUI = 1 << 4,
+ SCHEME_EXTENSION = 1 << 5,
+ SCHEME_FILESYSTEM = 1 << 6,
+
+ // IMPORTANT!
+ // SCHEME_ALL will match every scheme, including chrome://, chrome-
+ // extension://, about:, etc. Because this has lots of security
+ // implications, third-party extensions should usually not be able to get
+ // access to URL patterns initialized this way. If there is a reason
+ // for violating this general rule, document why this it safe.
+ SCHEME_ALL = -1,
+ };
+
+ // Error codes returned from Parse().
+ enum ParseResult {
+ PARSE_SUCCESS = 0,
+ PARSE_ERROR_MISSING_SCHEME_SEPARATOR,
+ PARSE_ERROR_INVALID_SCHEME,
+ PARSE_ERROR_WRONG_SCHEME_SEPARATOR,
+ PARSE_ERROR_EMPTY_HOST,
+ PARSE_ERROR_INVALID_HOST_WILDCARD,
+ PARSE_ERROR_EMPTY_PATH,
+ PARSE_ERROR_INVALID_PORT,
+ NUM_PARSE_RESULTS
+ };
+
+ // The <all_urls> string pattern.
+ static const char kAllUrlsPattern[];
+
+ explicit URLPattern(int valid_schemes);
+
+ // Convenience to construct a URLPattern from a string. If the string is not
+ // known ahead of time, use Parse() instead, which returns success or failure.
+ URLPattern(int valid_schemes, const std::string& pattern);
+
+ URLPattern();
+ ~URLPattern();
+
+ bool operator<(const URLPattern& other) const;
+ bool operator==(const URLPattern& other) const;
+
+ // Initializes this instance by parsing the provided string. Returns
+ // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On
+ // failure, this instance will have some intermediate values and is in an
+ // invalid state.
+ ParseResult Parse(const std::string& pattern_str);
+
+ // Gets the bitmask of valid schemes.
+ int valid_schemes() const { return valid_schemes_; }
+ void SetValidSchemes(int valid_schemes);
+
+ // Gets the host the pattern matches. This can be an empty string if the
+ // pattern matches all hosts (the input was <scheme>://*/<whatever>).
+ const std::string& host() const { return host_; }
+ void SetHost(const std::string& host);
+
+ // Gets whether to match subdomains of host().
+ bool match_subdomains() const { return match_subdomains_; }
+ void SetMatchSubdomains(bool val);
+
+ // Gets the path the pattern matches with the leading slash. This can have
+ // embedded asterisks which are interpreted using glob rules.
+ const std::string& path() const { return path_; }
+ void SetPath(const std::string& path);
+
+ // Returns true if this pattern matches all urls.
+ bool match_all_urls() const { return match_all_urls_; }
+ void SetMatchAllURLs(bool val);
+
+ // Sets the scheme for pattern matches. This can be a single '*' if the
+ // pattern matches all valid schemes (as defined by the valid_schemes_
+ // property). Returns false on failure (if the scheme is not valid).
+ bool SetScheme(const std::string& scheme);
+ // Note: You should use MatchesScheme() instead of this getter unless you
+ // absolutely need the exact scheme. This is exposed for testing.
+ const std::string& scheme() const { return scheme_; }
+
+ // Returns true if the specified scheme can be used in this URL pattern, and
+ // false otherwise. Uses valid_schemes_ to determine validity.
+ bool IsValidScheme(const std::string& scheme) const;
+
+ // Returns true if this instance matches the specified URL.
+ bool MatchesURL(const GURL& test) const;
+
+ // Returns true if this instance matches the specified security origin.
+ bool MatchesSecurityOrigin(const GURL& test) const;
+
+ // Returns true if |test| matches our scheme.
+ // Note that if test is "filesystem", this may fail whereas MatchesURL
+ // may succeed. MatchesURL is smart enough to look at the inner_url instead
+ // of the outer "filesystem:" part.
+ bool MatchesScheme(const std::string& test) const;
+
+ // Returns true if |test| matches our host.
+ bool MatchesHost(const std::string& test) const;
+ bool MatchesHost(const GURL& test) const;
+
+ // Returns true if |test| matches our path.
+ bool MatchesPath(const std::string& test) const;
+
+ // Returns true if |port| matches our port.
+ bool MatchesPort(int port) const;
+
+ // Sets the port. Returns false if the port is invalid.
+ bool SetPort(const std::string& port);
+ const std::string& port() const { return port_; }
+
+ // Returns a string representing this instance.
+ const std::string& GetAsString() const;
+
+ // Determine whether there is a URL that would match this instance and another
+ // instance. This method is symmetrical: Calling other.OverlapsWith(this)
+ // would result in the same answer.
+ bool OverlapsWith(const URLPattern& other) const;
+
+ // Convert this URLPattern into an equivalent set of URLPatterns that don't
+ // use a wildcard in the scheme component. If this URLPattern doesn't use a
+ // wildcard scheme, then the returned set will contain one element that is
+ // equivalent to this instance.
+ std::vector<URLPattern> ConvertToExplicitSchemes() const;
+
+ static bool EffectiveHostCompare(const URLPattern& a, const URLPattern& b) {
+ if (a.match_all_urls_ && b.match_all_urls_)
+ return false;
+ return a.host_.compare(b.host_) < 0;
+ };
+
+ // Used for origin comparisons in a std::set.
+ class EffectiveHostCompareFunctor {
+ public:
+ bool operator()(const URLPattern& a, const URLPattern& b) const {
+ return EffectiveHostCompare(a, b);
+ };
+ };
+
+ // Get an error string for a ParseResult.
+ static const char* GetParseResultString(URLPattern::ParseResult parse_result);
+
+ private:
+ // Returns true if any of the |schemes| items matches our scheme.
+ bool MatchesAnyScheme(const std::vector<std::string>& schemes) const;
+
+ bool MatchesSecurityOriginHelper(const GURL& test) const;
+
+ // If the URLPattern contains a wildcard scheme, returns a list of
+ // equivalent literal schemes, otherwise returns the current scheme.
+ std::vector<std::string> GetExplicitSchemes() const;
+
+ // A bitmask containing the schemes which are considered valid for this
+ // pattern. Parse() uses this to decide whether a pattern contains a valid
+ // scheme. MatchesScheme uses this to decide whether a wildcard scheme_
+ // matches a given test scheme.
+ int valid_schemes_;
+
+ // True if this is a special-case "<all_urls>" pattern.
+ bool match_all_urls_;
+
+ // The scheme for the pattern.
+ std::string scheme_;
+
+ // The host without any leading "*" components.
+ std::string host_;
+
+ // Whether we should match subdomains of the host. This is true if the first
+ // component of the pattern's host was "*".
+ bool match_subdomains_;
+
+ // The port.
+ std::string port_;
+
+ // The path to match. This is everything after the host of the URL, or
+ // everything after the scheme in the case of file:// URLs.
+ std::string path_;
+
+ // The path with "?" and "\" characters escaped for use with the
+ // MatchPattern() function.
+ std::string path_escaped_;
+
+ // A string representing this URLPattern.
+ mutable std::string spec_;
+};
+
+typedef std::vector<URLPattern> URLPatternList;
+
+#endif // EXTENSIONS_COMMON_URL_PATTERN_H_