summaryrefslogtreecommitdiffstats
path: root/components/url_matcher/regex_set_matcher.h
diff options
context:
space:
mode:
authorjoaodasilva@chromium.org <joaodasilva@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-12-13 20:36:53 +0000
committerjoaodasilva@chromium.org <joaodasilva@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-12-13 20:36:53 +0000
commit716c016d95025a8f4d42baab6639b9dc90498f2d (patch)
tree9efb703e070ecbfb1b73bfac9b350a3b81af14f6 /components/url_matcher/regex_set_matcher.h
parent32c90a98f03fa68da4ba3d97a8e56ca70e92a07d (diff)
downloadchromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.zip
chromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.tar.gz
chromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.tar.bz2
Move extensions/common/matcher into components/url_matcher.
This allows using that code in builds that don't include extensions without having to introduce layering exceptions. This is meant for inclusion on the iOS build. BUG=271392 TBR=brettw@chromium.org Review URL: https://codereview.chromium.org/113903002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@240736 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'components/url_matcher/regex_set_matcher.h')
-rw-r--r--components/url_matcher/regex_set_matcher.h83
1 files changed, 83 insertions, 0 deletions
diff --git a/components/url_matcher/regex_set_matcher.h b/components/url_matcher/regex_set_matcher.h
new file mode 100644
index 0000000..e4eb1dc
--- /dev/null
+++ b/components/url_matcher/regex_set_matcher.h
@@ -0,0 +1,83 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_
+#define COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "base/memory/scoped_ptr.h"
+#include "components/url_matcher/string_pattern.h"
+#include "components/url_matcher/substring_set_matcher.h"
+#include "components/url_matcher/url_matcher_export.h"
+
+namespace re2 {
+class FilteredRE2;
+}
+
+namespace url_matcher {
+
+// Efficiently matches URLs against a collection of regular expressions,
+// using FilteredRE2 to reduce the number of regexes that must be matched
+// by pre-filtering with substring matching. See:
+// http://swtch.com/~rsc/regexp/regexp3.html#analysis
+class URL_MATCHER_EXPORT RegexSetMatcher {
+ public:
+ RegexSetMatcher();
+ virtual ~RegexSetMatcher();
+
+ // Adds the regex patterns in |regex_list| to the matcher. Also rebuilds
+ // the FilteredRE2 matcher; thus, for efficiency, prefer adding multiple
+ // patterns at once.
+ // Ownership of the patterns remains with the caller.
+ void AddPatterns(const std::vector<const StringPattern*>& regex_list);
+
+ // Removes all regex patterns.
+ void ClearPatterns();
+
+ // Appends the IDs of regular expressions in our set that match the |text|
+ // to |matches|.
+ bool Match(const std::string& text,
+ std::set<StringPattern::ID>* matches) const;
+
+ bool IsEmpty() const;
+
+ private:
+ typedef int RE2ID;
+ typedef std::map<StringPattern::ID, const StringPattern*> RegexMap;
+ typedef std::vector<StringPattern::ID> RE2IDMap;
+
+ // Use Aho-Corasick SubstringSetMatcher to find which literal patterns
+ // match the |text|.
+ std::vector<RE2ID> FindSubstringMatches(const std::string& text) const;
+
+ // Rebuild FilteredRE2 from scratch. Needs to be called whenever
+ // our set of regexes changes.
+ // TODO(yoz): investigate if it could be done incrementally;
+ // apparently not supported by FilteredRE2.
+ void RebuildMatcher();
+
+ // Clean up StringPatterns in |substring_patterns_|.
+ void DeleteSubstringPatterns();
+
+ // Mapping of regex StringPattern::IDs to regexes.
+ RegexMap regexes_;
+ // Mapping of RE2IDs from FilteredRE2 (which are assigned in order)
+ // to regex StringPattern::IDs.
+ RE2IDMap re2_id_map_;
+
+ scoped_ptr<re2::FilteredRE2> filtered_re2_;
+ scoped_ptr<SubstringSetMatcher> substring_matcher_;
+
+ // The substring patterns from FilteredRE2, which are used in
+ // |substring_matcher_| but whose lifetime is managed here.
+ std::vector<const StringPattern*> substring_patterns_;
+};
+
+} // namespace url_matcher
+
+#endif // COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_