diff options
author | battre@chromium.org <battre@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-04-11 20:04:01 +0000 |
---|---|---|
committer | battre@chromium.org <battre@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-04-11 20:04:01 +0000 |
commit | 2280dc815204af92079733bd445d5c30f4d3d680 (patch) | |
tree | 124c15f37d01ce59ddc1e55c46c2bc8d1b51b653 /extensions | |
parent | 22295741c015370083104061682bb72ec495be8f (diff) | |
download | chromium_src-2280dc815204af92079733bd445d5c30f4d3d680.zip chromium_src-2280dc815204af92079733bd445d5c30f4d3d680.tar.gz chromium_src-2280dc815204af92079733bd445d5c30f4d3d680.tar.bz2 |
Provide a mechanism to the decl. WebRequest API to match URLs without the query against a RegEx
If you look for URLs that contain example[1-9].com you do not want to trigger
actions for http://foobar.com?url=example1.com. For this reason the CL
introduces a new mechanism to evaluate RegEx only on the URL without the query
string.
BUG=228852
Review URL: https://chromiumcodereview.appspot.com/13699007
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@193714 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'extensions')
-rw-r--r-- | extensions/common/matcher/regex_set_matcher.cc | 4 | ||||
-rw-r--r-- | extensions/common/matcher/regex_set_matcher.h | 2 | ||||
-rw-r--r-- | extensions/common/matcher/url_matcher.cc | 94 | ||||
-rw-r--r-- | extensions/common/matcher/url_matcher.h | 12 | ||||
-rw-r--r-- | extensions/common/matcher/url_matcher_constants.cc | 1 | ||||
-rw-r--r-- | extensions/common/matcher/url_matcher_constants.h | 1 | ||||
-rw-r--r-- | extensions/common/matcher/url_matcher_factory.cc | 2 | ||||
-rw-r--r-- | extensions/common/matcher/url_matcher_unittest.cc | 46 |
8 files changed, 147 insertions, 15 deletions
diff --git a/extensions/common/matcher/regex_set_matcher.cc b/extensions/common/matcher/regex_set_matcher.cc index c402f2e..3052a63 100644 --- a/extensions/common/matcher/regex_set_matcher.cc +++ b/extensions/common/matcher/regex_set_matcher.cc @@ -61,6 +61,10 @@ bool RegexSetMatcher::Match(const std::string& text, return old_number_of_matches != matches->size(); } +bool RegexSetMatcher::IsEmpty() const { + return regexes_.empty(); +} + std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches( const std::string& text) const { std::set<int> atoms_set; diff --git a/extensions/common/matcher/regex_set_matcher.h b/extensions/common/matcher/regex_set_matcher.h index 8d9de1c..906ab9c 100644 --- a/extensions/common/matcher/regex_set_matcher.h +++ b/extensions/common/matcher/regex_set_matcher.h @@ -43,6 +43,8 @@ class RegexSetMatcher { bool Match(const std::string& text, std::set<StringPattern::ID>* matches) const; + bool IsEmpty() const; + private: typedef int RE2ID; typedef std::map<StringPattern::ID, const StringPattern*> RegexMap; diff --git a/extensions/common/matcher/url_matcher.cc b/extensions/common/matcher/url_matcher.cc index b599293..4aa83d6 100644 --- a/extensions/common/matcher/url_matcher.cc +++ b/extensions/common/matcher/url_matcher.cc @@ -146,6 +146,10 @@ bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) { return criterion == URLMatcherCondition::URL_MATCHES; } +bool IsOriginAndPathRegexCriterion(URLMatcherCondition::Criterion criterion) { + return criterion == URLMatcherCondition::ORIGIN_AND_PATH_MATCHES; +} + } // namespace // @@ -209,6 +213,10 @@ bool URLMatcherCondition::IsRegexCondition() const { return IsRegexCriterion(criterion_); } +bool URLMatcherCondition::IsOriginAndPathRegexCondition() const { + return IsOriginAndPathRegexCriterion(criterion_); +} + bool URLMatcherCondition::IsMatch( const std::set<StringPattern::ID>& matching_patterns, const GURL& url) const { @@ -251,6 +259,7 @@ URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} URLMatcherConditionFactory::~URLMatcherConditionFactory() { STLDeleteElements(&substring_pattern_singletons_); STLDeleteElements(®ex_pattern_singletons_); + STLDeleteElements(&origin_and_path_regex_pattern_singletons_); } std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( @@ -380,12 +389,15 @@ std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( kEndOfURL; } -std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches( - const GURL& url) const { +static std::string CanonicalizeURLForRegexSearchesHelper( + const GURL& url, + bool clear_query) { GURL::Replacements replacements; replacements.ClearPassword(); replacements.ClearUsername(); replacements.ClearRef(); + if (clear_query) + replacements.ClearQuery(); // Clear port if it is implicit from scheme. if (url.has_port()) { const std::string& port = url.scheme(); @@ -397,6 +409,17 @@ std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches( return url.ReplaceComponents(replacements).spec(); } +std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches( + const GURL& url) const { + return CanonicalizeURLForRegexSearchesHelper(url, false); +} + +std::string +URLMatcherConditionFactory::CanonicalizeURLForOriginAndPathRegexSearches( + const GURL& url) const { + return CanonicalizeURLForRegexSearchesHelper(url, true); +} + URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( const std::string& prefix) { return CreateCondition(URLMatcherCondition::URL_PREFIX, @@ -424,11 +447,17 @@ URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition( return CreateCondition(URLMatcherCondition::URL_MATCHES, regex); } +URLMatcherCondition +URLMatcherConditionFactory::CreateOriginAndPathMatchesCondition( + const std::string& regex) { + return CreateCondition(URLMatcherCondition::ORIGIN_AND_PATH_MATCHES, regex); +} + void URLMatcherConditionFactory::ForgetUnusedPatterns( const std::set<StringPattern::ID>& used_patterns) { PatternSingletons::iterator i = substring_pattern_singletons_.begin(); while (i != substring_pattern_singletons_.end()) { - if (used_patterns.find((*i)->id()) != used_patterns.end()) { + if (ContainsKey(used_patterns, (*i)->id())) { ++i; } else { delete *i; @@ -437,27 +466,41 @@ void URLMatcherConditionFactory::ForgetUnusedPatterns( } i = regex_pattern_singletons_.begin(); while (i != regex_pattern_singletons_.end()) { - if (used_patterns.find((*i)->id()) != used_patterns.end()) { + if (ContainsKey(used_patterns, (*i)->id())) { ++i; } else { delete *i; regex_pattern_singletons_.erase(i++); } } + i = origin_and_path_regex_pattern_singletons_.begin(); + while (i != origin_and_path_regex_pattern_singletons_.end()) { + if (ContainsKey(used_patterns, (*i)->id())) { + ++i; + } else { + delete *i; + origin_and_path_regex_pattern_singletons_.erase(i++); + } + } } bool URLMatcherConditionFactory::IsEmpty() const { return substring_pattern_singletons_.empty() && - regex_pattern_singletons_.empty(); + regex_pattern_singletons_.empty() && + origin_and_path_regex_pattern_singletons_.empty(); } URLMatcherCondition URLMatcherConditionFactory::CreateCondition( URLMatcherCondition::Criterion criterion, const std::string& pattern) { StringPattern search_pattern(pattern, 0); - PatternSingletons* pattern_singletons = - IsRegexCriterion(criterion) ? ®ex_pattern_singletons_ - : &substring_pattern_singletons_; + PatternSingletons* pattern_singletons = NULL; + if (IsRegexCriterion(criterion)) + pattern_singletons = ®ex_pattern_singletons_; + else if (IsOriginAndPathRegexCriterion(criterion)) + pattern_singletons = &origin_and_path_regex_pattern_singletons_; + else + pattern_singletons = &substring_pattern_singletons_; PatternSingletons::const_iterator iter = pattern_singletons->find(&search_pattern); @@ -618,12 +661,23 @@ std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL( // See URLMatcherConditionFactory for the canonicalization of URLs and the // distinction between full url searches and url component searches. std::set<StringPattern::ID> matches; - full_url_matcher_.Match( - condition_factory_.CanonicalizeURLForFullSearches(url), &matches); - url_component_matcher_.Match( - condition_factory_.CanonicalizeURLForComponentSearches(url), &matches); - regex_set_matcher_.Match( - condition_factory_.CanonicalizeURLForRegexSearches(url), &matches); + if (!full_url_matcher_.IsEmpty()) { + full_url_matcher_.Match( + condition_factory_.CanonicalizeURLForFullSearches(url), &matches); + } + if (!url_component_matcher_.IsEmpty()) { + url_component_matcher_.Match( + condition_factory_.CanonicalizeURLForComponentSearches(url), &matches); + } + if (!regex_set_matcher_.IsEmpty()) { + regex_set_matcher_.Match( + condition_factory_.CanonicalizeURLForRegexSearches(url), &matches); + } + if (!origin_and_path_regex_set_matcher_.IsEmpty()) { + origin_and_path_regex_set_matcher_.Match( + condition_factory_.CanonicalizeURLForOriginAndPathRegexSearches(url), + &matches); + } // Calculate all URLMatcherConditionSets for which all URLMatcherConditions // were fulfilled. @@ -659,6 +713,8 @@ bool URLMatcher::IsEmpty() const { substring_match_triggers_.empty() && full_url_matcher_.IsEmpty() && url_component_matcher_.IsEmpty() && + regex_set_matcher_.IsEmpty() && + origin_and_path_regex_set_matcher_.IsEmpty() && registered_full_url_patterns_.empty() && registered_url_component_patterns_.empty(); } @@ -683,6 +739,7 @@ void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { // If we are called to process Full URL searches, ignore others, and // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.) if (!condition_iter->IsRegexCondition() && + !condition_iter->IsOriginAndPathRegexCondition() && full_url_conditions == condition_iter->IsFullURLCondition()) new_patterns.insert(condition_iter->string_pattern()); } @@ -722,6 +779,7 @@ void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { void URLMatcher::UpdateRegexSetMatcher() { std::vector<const StringPattern*> new_patterns; + std::vector<const StringPattern*> new_origin_and_path_patterns; for (URLMatcherConditionSets::const_iterator condition_set_iter = url_matcher_condition_sets_.begin(); @@ -732,8 +790,12 @@ void URLMatcher::UpdateRegexSetMatcher() { for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = conditions.begin(); condition_iter != conditions.end(); ++condition_iter) { - if (condition_iter->IsRegexCondition()) + if (condition_iter->IsRegexCondition()) { new_patterns.push_back(condition_iter->string_pattern()); + } else if (condition_iter->IsOriginAndPathRegexCondition()) { + new_origin_and_path_patterns.push_back( + condition_iter->string_pattern()); + } } } @@ -741,6 +803,8 @@ void URLMatcher::UpdateRegexSetMatcher() { // FilteredRE2 backend doesn't support incremental updates. regex_set_matcher_.ClearPatterns(); regex_set_matcher_.AddPatterns(new_patterns); + origin_and_path_regex_set_matcher_.ClearPatterns(); + origin_and_path_regex_set_matcher_.AddPatterns(new_origin_and_path_patterns); } void URLMatcher::UpdateTriggers() { diff --git a/extensions/common/matcher/url_matcher.h b/extensions/common/matcher/url_matcher.h index fb7ace0f..d93a606 100644 --- a/extensions/common/matcher/url_matcher.h +++ b/extensions/common/matcher/url_matcher.h @@ -53,6 +53,7 @@ class URLMatcherCondition { URL_CONTAINS, URL_EQUALS, URL_MATCHES, + ORIGIN_AND_PATH_MATCHES, // Matches the URL minus its query string. }; URLMatcherCondition(); @@ -77,6 +78,10 @@ class URLMatcherCondition { // handled by a regex matcher instead of a substring matcher. bool IsRegexCondition() const; + // Returns whether this URLMatcherCondition is a regular expression that shall + // be evaluated on the URL without the query parameter. + bool IsOriginAndPathRegexCondition() const; + // Returns whether this condition is fulfilled according to // |matching_patterns| and |url|. bool IsMatch(const std::set<StringPattern::ID>& matching_patterns, @@ -155,6 +160,9 @@ class URLMatcherConditionFactory { // Canonicalizes a URL for "CreateURLMatchesCondition" searches. std::string CanonicalizeURLForRegexSearches(const GURL& url) const; + // Canonicalizes a URL for "CreateOriginAndPathMatchesCondition" searches. + std::string CanonicalizeURLForOriginAndPathRegexSearches( + const GURL& url) const; URLMatcherCondition CreateURLPrefixCondition(const std::string& prefix); URLMatcherCondition CreateURLSuffixCondition(const std::string& suffix); @@ -162,6 +170,8 @@ class URLMatcherConditionFactory { URLMatcherCondition CreateURLEqualsCondition(const std::string& str); URLMatcherCondition CreateURLMatchesCondition(const std::string& regex); + URLMatcherCondition CreateOriginAndPathMatchesCondition( + const std::string& regex); // Removes all patterns from |pattern_singletons_| that are not listed in // |used_patterns|. These patterns are not referenced any more and get @@ -197,6 +207,7 @@ class URLMatcherConditionFactory { PatternSingletons; PatternSingletons substring_pattern_singletons_; PatternSingletons regex_pattern_singletons_; + PatternSingletons origin_and_path_regex_pattern_singletons_; DISALLOW_COPY_AND_ASSIGN(URLMatcherConditionFactory); }; @@ -332,6 +343,7 @@ class URLMatcher { SubstringSetMatcher full_url_matcher_; SubstringSetMatcher url_component_matcher_; RegexSetMatcher regex_set_matcher_; + RegexSetMatcher origin_and_path_regex_set_matcher_; std::set<const StringPattern*> registered_full_url_patterns_; std::set<const StringPattern*> registered_url_component_patterns_; diff --git a/extensions/common/matcher/url_matcher_constants.cc b/extensions/common/matcher/url_matcher_constants.cc index 4ae1347..5b23dea 100644 --- a/extensions/common/matcher/url_matcher_constants.cc +++ b/extensions/common/matcher/url_matcher_constants.cc @@ -15,6 +15,7 @@ const char kHostEqualsKey[] = "hostEquals"; const char kHostPrefixKey[] = "hostPrefix"; const char kHostSuffixKey[] = "hostSuffix"; const char kHostSuffixPathPrefixKey[] = "hostSuffixPathPrefix"; +const char kOriginAndPathMatchesKey[] = "originAndPathMatches"; const char kPathContainsKey[] = "pathContains"; const char kPathEqualsKey[] = "pathEquals"; const char kPathPrefixKey[] = "pathPrefix"; diff --git a/extensions/common/matcher/url_matcher_constants.h b/extensions/common/matcher/url_matcher_constants.h index aed5c6a..0618a52 100644 --- a/extensions/common/matcher/url_matcher_constants.h +++ b/extensions/common/matcher/url_matcher_constants.h @@ -18,6 +18,7 @@ extern const char kHostEqualsKey[]; extern const char kHostPrefixKey[]; extern const char kHostSuffixKey[]; extern const char kHostSuffixPathPrefixKey[]; +extern const char kOriginAndPathMatchesKey[]; extern const char kPathContainsKey[]; extern const char kPathEqualsKey[]; extern const char kPathPrefixKey[]; diff --git a/extensions/common/matcher/url_matcher_factory.cc b/extensions/common/matcher/url_matcher_factory.cc index d82b399..80e27f6 100644 --- a/extensions/common/matcher/url_matcher_factory.cc +++ b/extensions/common/matcher/url_matcher_factory.cc @@ -43,6 +43,8 @@ class URLMatcherConditionFactoryMethods { factory_methods_[keys::kHostEqualsKey] = &F::CreateHostEqualsCondition; factory_methods_[keys::kHostPrefixKey] = &F::CreateHostPrefixCondition; factory_methods_[keys::kHostSuffixKey] = &F::CreateHostSuffixCondition; + factory_methods_[keys::kOriginAndPathMatchesKey] = + &F::CreateOriginAndPathMatchesCondition; factory_methods_[keys::kPathContainsKey] = &F::CreatePathContainsCondition; factory_methods_[keys::kPathEqualsKey] = &F::CreatePathEqualsCondition; factory_methods_[keys::kPathPrefixKey] = &F::CreatePathPrefixCondition; diff --git a/extensions/common/matcher/url_matcher_unittest.cc b/extensions/common/matcher/url_matcher_unittest.cc index b17c13c..7ccc058 100644 --- a/extensions/common/matcher/url_matcher_unittest.cc +++ b/extensions/common/matcher/url_matcher_unittest.cc @@ -489,6 +489,16 @@ TEST(URLMatcherConditionSetTest, Matching) { EXPECT_FALSE(condition_set6->IsMatch(matching_patterns, url1)); matching_patterns.insert(m1.string_pattern()->id()); EXPECT_TRUE(condition_set6->IsMatch(matching_patterns, url1)); + + matching_patterns.clear(); + regex_conditions.clear(); + URLMatcherCondition r2 = factory.CreateOriginAndPathMatchesCondition("b[a]r"); + regex_conditions.insert(r2); + scoped_refptr<URLMatcherConditionSet> condition_set7( + new URLMatcherConditionSet(1, regex_conditions)); + EXPECT_FALSE(condition_set7->IsMatch(matching_patterns, url1)); + matching_patterns.insert(r2.string_pattern()->id()); + EXPECT_TRUE(condition_set7->IsMatch(matching_patterns, url1)); } @@ -631,4 +641,40 @@ TEST(URLMatcherTest, TestComponentsImplyContains) { EXPECT_EQ(1u, matcher.MatchURL(url).size()); } +// Check that matches in everything but the query are found. +TEST(URLMatcherTest, TestOriginAndPathRegExPositive) { + GURL url("https://www.google.com:1234/webhp?test=val&a=b"); + + URLMatcher matcher; + URLMatcherConditionFactory* factory = matcher.condition_factory(); + + URLMatcherConditionSet::Conditions conditions; + + conditions.insert(factory->CreateOriginAndPathMatchesCondition("w..hp")); + const int kConditionSetId = 1; + URLMatcherConditionSet::Vector insert; + insert.push_back(make_scoped_refptr( + new URLMatcherConditionSet(kConditionSetId, conditions))); + matcher.AddConditionSets(insert); + EXPECT_EQ(1u, matcher.MatchURL(url).size()); +} + +// Check that matches in the query are ignored. +TEST(URLMatcherTest, TestOriginAndPathRegExNegative) { + GURL url("https://www.google.com:1234/webhp?test=val&a=b"); + + URLMatcher matcher; + URLMatcherConditionFactory* factory = matcher.condition_factory(); + + URLMatcherConditionSet::Conditions conditions; + + conditions.insert(factory->CreateOriginAndPathMatchesCondition("val")); + const int kConditionSetId = 1; + URLMatcherConditionSet::Vector insert; + insert.push_back(make_scoped_refptr( + new URLMatcherConditionSet(kConditionSetId, conditions))); + matcher.AddConditionSets(insert); + EXPECT_EQ(0u, matcher.MatchURL(url).size()); +} + } // namespace extensions |