diff options
author | joaodasilva@chromium.org <joaodasilva@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-12-13 20:36:53 +0000 |
---|---|---|
committer | joaodasilva@chromium.org <joaodasilva@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-12-13 20:36:53 +0000 |
commit | 716c016d95025a8f4d42baab6639b9dc90498f2d (patch) | |
tree | 9efb703e070ecbfb1b73bfac9b350a3b81af14f6 /components/url_matcher/regex_set_matcher.h | |
parent | 32c90a98f03fa68da4ba3d97a8e56ca70e92a07d (diff) | |
download | chromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.zip chromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.tar.gz chromium_src-716c016d95025a8f4d42baab6639b9dc90498f2d.tar.bz2 |
Move extensions/common/matcher into components/url_matcher.
This allows using that code in builds that don't include extensions without
having to introduce layering exceptions. This is meant for inclusion on the
iOS build.
BUG=271392
TBR=brettw@chromium.org
Review URL: https://codereview.chromium.org/113903002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@240736 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'components/url_matcher/regex_set_matcher.h')
-rw-r--r-- | components/url_matcher/regex_set_matcher.h | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/components/url_matcher/regex_set_matcher.h b/components/url_matcher/regex_set_matcher.h new file mode 100644 index 0000000..e4eb1dc --- /dev/null +++ b/components/url_matcher/regex_set_matcher.h @@ -0,0 +1,83 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_ +#define COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_ + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "base/memory/scoped_ptr.h" +#include "components/url_matcher/string_pattern.h" +#include "components/url_matcher/substring_set_matcher.h" +#include "components/url_matcher/url_matcher_export.h" + +namespace re2 { +class FilteredRE2; +} + +namespace url_matcher { + +// Efficiently matches URLs against a collection of regular expressions, +// using FilteredRE2 to reduce the number of regexes that must be matched +// by pre-filtering with substring matching. See: +// http://swtch.com/~rsc/regexp/regexp3.html#analysis +class URL_MATCHER_EXPORT RegexSetMatcher { + public: + RegexSetMatcher(); + virtual ~RegexSetMatcher(); + + // Adds the regex patterns in |regex_list| to the matcher. Also rebuilds + // the FilteredRE2 matcher; thus, for efficiency, prefer adding multiple + // patterns at once. + // Ownership of the patterns remains with the caller. + void AddPatterns(const std::vector<const StringPattern*>& regex_list); + + // Removes all regex patterns. + void ClearPatterns(); + + // Appends the IDs of regular expressions in our set that match the |text| + // to |matches|. + bool Match(const std::string& text, + std::set<StringPattern::ID>* matches) const; + + bool IsEmpty() const; + + private: + typedef int RE2ID; + typedef std::map<StringPattern::ID, const StringPattern*> RegexMap; + typedef std::vector<StringPattern::ID> RE2IDMap; + + // Use Aho-Corasick SubstringSetMatcher to find which literal patterns + // match the |text|. + std::vector<RE2ID> FindSubstringMatches(const std::string& text) const; + + // Rebuild FilteredRE2 from scratch. Needs to be called whenever + // our set of regexes changes. + // TODO(yoz): investigate if it could be done incrementally; + // apparently not supported by FilteredRE2. + void RebuildMatcher(); + + // Clean up StringPatterns in |substring_patterns_|. + void DeleteSubstringPatterns(); + + // Mapping of regex StringPattern::IDs to regexes. + RegexMap regexes_; + // Mapping of RE2IDs from FilteredRE2 (which are assigned in order) + // to regex StringPattern::IDs. + RE2IDMap re2_id_map_; + + scoped_ptr<re2::FilteredRE2> filtered_re2_; + scoped_ptr<SubstringSetMatcher> substring_matcher_; + + // The substring patterns from FilteredRE2, which are used in + // |substring_matcher_| but whose lifetime is managed here. + std::vector<const StringPattern*> substring_patterns_; +}; + +} // namespace url_matcher + +#endif // COMPONENTS_URL_MATCHER_REGEX_SET_MATCHER_H_ |