diff options
author | joaodasilva@chromium.org <joaodasilva@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-12-13 20:36:53 +0000 |
---|---|---|
committer | joaodasilva@chromium.org <joaodasilva@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-12-13 20:36:53 +0000 |
commit | 716c016d95025a8f4d42baab6639b9dc90498f2d (patch) | |
tree | 9efb703e070ecbfb1b73bfac9b350a3b81af14f6 /components/url_matcher/regex_set_matcher.cc | |
parent | 32c90a98f03fa68da4ba3d97a8e56ca70e92a07d (diff) | |
download | chromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.zip chromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.tar.gz chromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.tar.bz2 |
Move extensions/common/matcher into components/url_matcher.
This allows using that code in builds that don't include extensions without
having to introduce layering exceptions. This is meant for inclusion on the
iOS build.
BUG=271392
TBR=brettw@chromium.org
Review URL: https://codereview.chromium.org/113903002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@240736 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'components/url_matcher/regex_set_matcher.cc')
-rw-r--r-- | components/url_matcher/regex_set_matcher.cc | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/components/url_matcher/regex_set_matcher.cc b/components/url_matcher/regex_set_matcher.cc new file mode 100644 index 0000000..c0b977c --- /dev/null +++ b/components/url_matcher/regex_set_matcher.cc @@ -0,0 +1,113 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/url_matcher/regex_set_matcher.h" + +#include "base/logging.h" +#include "base/stl_util.h" +#include "base/strings/string_util.h" +#include "components/url_matcher/substring_set_matcher.h" +#include "third_party/re2/re2/filtered_re2.h" +#include "third_party/re2/re2/re2.h" + +namespace url_matcher { + +RegexSetMatcher::RegexSetMatcher() {} + +RegexSetMatcher::~RegexSetMatcher() { + DeleteSubstringPatterns(); +} + +void RegexSetMatcher::AddPatterns( + const std::vector<const StringPattern*>& regex_list) { + if (regex_list.empty()) + return; + for (size_t i = 0; i < regex_list.size(); ++i) { + regexes_[regex_list[i]->id()] = regex_list[i]; + } + + RebuildMatcher(); +} + +void RegexSetMatcher::ClearPatterns() { + regexes_.clear(); + RebuildMatcher(); +} + +bool RegexSetMatcher::Match(const std::string& text, + std::set<StringPattern::ID>* matches) const { + size_t old_number_of_matches = matches->size(); + if (regexes_.empty()) + return false; + if (!filtered_re2_.get()) { + LOG(ERROR) << "RegexSetMatcher was not initialized"; + return false; + } + + // FilteredRE2 expects lowercase for prefiltering, but we still + // match case-sensitively. + std::vector<RE2ID> atoms(FindSubstringMatches( + StringToLowerASCII(text))); + + std::vector<RE2ID> re2_ids; + filtered_re2_->AllMatches(text, atoms, &re2_ids); + + for (size_t i = 0; i < re2_ids.size(); ++i) { + StringPattern::ID id = re2_id_map_[re2_ids[i]]; + matches->insert(id); + } + return old_number_of_matches != matches->size(); +} + +bool RegexSetMatcher::IsEmpty() const { + return regexes_.empty(); +} + +std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches( + const std::string& text) const { + std::set<int> atoms_set; + substring_matcher_->Match(text, &atoms_set); + return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end()); +} + +void RegexSetMatcher::RebuildMatcher() { + re2_id_map_.clear(); + filtered_re2_.reset(new re2::FilteredRE2()); + if (regexes_.empty()) + return; + + for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) { + RE2ID re2_id; + RE2::ErrorCode error = filtered_re2_->Add( + it->second->pattern(), RE2::DefaultOptions, &re2_id); + if (error == RE2::NoError) { + DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id); + re2_id_map_.push_back(it->first); + } else { + // Unparseable regexes should have been rejected already in + // URLMatcherFactory::CreateURLMatchesCondition. + LOG(ERROR) << "Could not parse regex (id=" << it->first << ", " + << it->second->pattern() << ")"; + } + } + + std::vector<std::string> strings_to_match; + filtered_re2_->Compile(&strings_to_match); + + substring_matcher_.reset(new SubstringSetMatcher); + DeleteSubstringPatterns(); + // Build SubstringSetMatcher from |strings_to_match|. + // SubstringSetMatcher doesn't own its strings. + for (size_t i = 0; i < strings_to_match.size(); ++i) { + substring_patterns_.push_back( + new StringPattern(strings_to_match[i], i)); + } + substring_matcher_->RegisterPatterns(substring_patterns_); +} + +void RegexSetMatcher::DeleteSubstringPatterns() { + STLDeleteElements(&substring_patterns_); +} + +} // namespace url_matcher |