summaryrefslogtreecommitdiffstats
path: root/components/url_matcher/regex_set_matcher.cc
diff options
context:
space:
mode:
authorjoaodasilva@chromium.org <joaodasilva@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-12-13 20:36:53 +0000
committerjoaodasilva@chromium.org <joaodasilva@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-12-13 20:36:53 +0000
commit716c016d95025a8f4d42baab6639b9dc90498f2d (patch)
tree9efb703e070ecbfb1b73bfac9b350a3b81af14f6 /components/url_matcher/regex_set_matcher.cc
parent32c90a98f03fa68da4ba3d97a8e56ca70e92a07d (diff)
downloadchromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.zip
chromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.tar.gz
chromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.tar.bz2
Move extensions/common/matcher into components/url_matcher.
This allows using that code in builds that don't include extensions without having to introduce layering exceptions. This is meant for inclusion on the iOS build. BUG=271392 TBR=brettw@chromium.org Review URL: https://codereview.chromium.org/113903002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@240736 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'components/url_matcher/regex_set_matcher.cc')
-rw-r--r--components/url_matcher/regex_set_matcher.cc113
1 files changed, 113 insertions, 0 deletions
diff --git a/components/url_matcher/regex_set_matcher.cc b/components/url_matcher/regex_set_matcher.cc
new file mode 100644
index 0000000..c0b977c
--- /dev/null
+++ b/components/url_matcher/regex_set_matcher.cc
@@ -0,0 +1,113 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/url_matcher/regex_set_matcher.h"
+
+#include "base/logging.h"
+#include "base/stl_util.h"
+#include "base/strings/string_util.h"
+#include "components/url_matcher/substring_set_matcher.h"
+#include "third_party/re2/re2/filtered_re2.h"
+#include "third_party/re2/re2/re2.h"
+
+namespace url_matcher {
+
+RegexSetMatcher::RegexSetMatcher() {}
+
+RegexSetMatcher::~RegexSetMatcher() {
+ DeleteSubstringPatterns();
+}
+
+void RegexSetMatcher::AddPatterns(
+ const std::vector<const StringPattern*>& regex_list) {
+ if (regex_list.empty())
+ return;
+ for (size_t i = 0; i < regex_list.size(); ++i) {
+ regexes_[regex_list[i]->id()] = regex_list[i];
+ }
+
+ RebuildMatcher();
+}
+
+void RegexSetMatcher::ClearPatterns() {
+ regexes_.clear();
+ RebuildMatcher();
+}
+
+bool RegexSetMatcher::Match(const std::string& text,
+ std::set<StringPattern::ID>* matches) const {
+ size_t old_number_of_matches = matches->size();
+ if (regexes_.empty())
+ return false;
+ if (!filtered_re2_.get()) {
+ LOG(ERROR) << "RegexSetMatcher was not initialized";
+ return false;
+ }
+
+ // FilteredRE2 expects lowercase for prefiltering, but we still
+ // match case-sensitively.
+ std::vector<RE2ID> atoms(FindSubstringMatches(
+ StringToLowerASCII(text)));
+
+ std::vector<RE2ID> re2_ids;
+ filtered_re2_->AllMatches(text, atoms, &re2_ids);
+
+ for (size_t i = 0; i < re2_ids.size(); ++i) {
+ StringPattern::ID id = re2_id_map_[re2_ids[i]];
+ matches->insert(id);
+ }
+ return old_number_of_matches != matches->size();
+}
+
+bool RegexSetMatcher::IsEmpty() const {
+ return regexes_.empty();
+}
+
+std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches(
+ const std::string& text) const {
+ std::set<int> atoms_set;
+ substring_matcher_->Match(text, &atoms_set);
+ return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end());
+}
+
+void RegexSetMatcher::RebuildMatcher() {
+ re2_id_map_.clear();
+ filtered_re2_.reset(new re2::FilteredRE2());
+ if (regexes_.empty())
+ return;
+
+ for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) {
+ RE2ID re2_id;
+ RE2::ErrorCode error = filtered_re2_->Add(
+ it->second->pattern(), RE2::DefaultOptions, &re2_id);
+ if (error == RE2::NoError) {
+ DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id);
+ re2_id_map_.push_back(it->first);
+ } else {
+ // Unparseable regexes should have been rejected already in
+ // URLMatcherFactory::CreateURLMatchesCondition.
+ LOG(ERROR) << "Could not parse regex (id=" << it->first << ", "
+ << it->second->pattern() << ")";
+ }
+ }
+
+ std::vector<std::string> strings_to_match;
+ filtered_re2_->Compile(&strings_to_match);
+
+ substring_matcher_.reset(new SubstringSetMatcher);
+ DeleteSubstringPatterns();
+ // Build SubstringSetMatcher from |strings_to_match|.
+ // SubstringSetMatcher doesn't own its strings.
+ for (size_t i = 0; i < strings_to_match.size(); ++i) {
+ substring_patterns_.push_back(
+ new StringPattern(strings_to_match[i], i));
+ }
+ substring_matcher_->RegisterPatterns(substring_patterns_);
+}
+
+void RegexSetMatcher::DeleteSubstringPatterns() {
+ STLDeleteElements(&substring_patterns_);
+}
+
+} // namespace url_matcher