diff options
author | battre@chromium.org <battre@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-04-23 08:12:27 +0000 |
---|---|---|
committer | battre@chromium.org <battre@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-04-23 08:12:27 +0000 |
commit | 342a79f2385be0c176cc9a175709061ab91a35ad (patch) | |
tree | 741cd7dbf74db03ebb15e07be0438b2cea17066e /third_party | |
parent | 50833ddb63f44b1b4e4661b713940c9d17ecfd64 (diff) | |
download | chromium_src-342a79f2385be0c176cc9a175709061ab91a35ad.zip chromium_src-342a79f2385be0c176cc9a175709061ab91a35ad.tar.gz chromium_src-342a79f2385be0c176cc9a175709061ab91a35ad.tar.bz2 |
Use std::map<int, int> instead of SparseArray<int> to save memory.
The PrefilterTree used a SparseArray<int> for pointers to "parents" (think of
this as pointers to ancestors). Despite its name, each SparseArray that is
allocated in such a way that is can store up to N element actually consumes
O(N) memory. Always! Regardless of its utilization! Given that in a tree with
N nodes, each node could have up to O(N) ancestors, this makes the
PrefilterTree consume O(N^2) memory for N nodes. In practice PrefilterTree
nodes have only few ancestor pointers ("parents"). Therefore, it is more
economic to use a std::map<int, int> which consumes less memory.
BUG=112155
NOTRY=true
Review URL: https://chromiumcodereview.appspot.com/14120010
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@195762 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party')
-rw-r--r-- | third_party/re2/README.chromium | 2 | ||||
-rw-r--r-- | third_party/re2/patches/re2-memory-optimization.patch | 133 | ||||
-rw-r--r-- | third_party/re2/re2/prefilter_tree.cc | 36 | ||||
-rw-r--r-- | third_party/re2/re2/prefilter_tree.h | 5 |
4 files changed, 158 insertions, 18 deletions
diff --git a/third_party/re2/README.chromium b/third_party/re2/README.chromium index c781629..3617102 100644 --- a/third_party/re2/README.chromium +++ b/third_party/re2/README.chromium @@ -21,3 +21,5 @@ Local Modifications (to be applied in this order): - Rename POSIX configuration (patches/rename-posix-option.patch) - Support libcxx (patches/re2-libcxx.patch) https://code.google.com/p/re2/issues/detail?id=76 +- Memory optimization for filtered trees + (patches/re2-memory-optimization.patch) diff --git a/third_party/re2/patches/re2-memory-optimization.patch b/third_party/re2/patches/re2-memory-optimization.patch new file mode 100644 index 0000000..05299b3 --- /dev/null +++ b/third_party/re2/patches/re2-memory-optimization.patch @@ -0,0 +1,133 @@ +diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc +--- a/re2/prefilter_tree.cc ++++ b/re2/prefilter_tree.cc +@@ -107,21 +107,23 @@ void PrefilterTree::Compile(vector<string>* atom_vec) { + // not miss out on any regexps triggering by getting rid of a + // prefilter node. + for (int i = 0; i < entries_.size(); i++) { +- IntMap* parents = entries_[i].parents; ++ StdIntMap* parents = entries_[i].parents; + if (parents->size() > 8) { + // This one triggers too many things. If all the parents are AND + // nodes and have other things guarding them, then get rid of + // this trigger. TODO(vsri): Adjust the threshold appropriately, + // make it a function of total number of nodes? + bool have_other_guard = true; +- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it) ++ for (StdIntMap::iterator it = parents->begin(); ++ it != parents->end(); ++it) { + have_other_guard = have_other_guard && +- (entries_[it->index()].propagate_up_at_count > 1); ++ (entries_[it->first].propagate_up_at_count > 1); ++ } + + if (have_other_guard) { +- for (IntMap::iterator it = parents->begin(); ++ for (StdIntMap::iterator it = parents->begin(); + it != parents->end(); ++it) +- entries_[it->index()].propagate_up_at_count -= 1; ++ entries_[it->first].propagate_up_at_count -= 1; + + parents->clear(); // Forget the parents + } +@@ -213,7 +215,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { + } + entries_.resize(node_map_.size()); + +- // Create parent IntMap for the entries. ++ // Create parent StdIntMap for the entries. + for (int i = v.size() - 1; i >= 0; i--) { + Prefilter* prefilter = v[i]; + if (prefilter == NULL) +@@ -223,7 +225,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { + continue; + + Entry* entry = &entries_[prefilter->unique_id()]; +- entry->parents = new IntMap(node_map_.size()); ++ entry->parents = new StdIntMap(); + } + + // Fill the entries. +@@ -249,7 +251,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { + + case Prefilter::OR: + case Prefilter::AND: { +- IntMap uniq_child(node_map_.size()); ++ std::set<int> uniq_child; + for (int j = 0; j < prefilter->subs()->size() ; j++) { + Prefilter* child = (*prefilter->subs())[j]; + Prefilter* canonical = CanonicalNode(child); +@@ -258,12 +260,12 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { + return; + } + int child_id = canonical->unique_id(); +- if (!uniq_child.has_index(child_id)) +- uniq_child.set_new(child_id, 1); ++ uniq_child.insert(child_id); + // To the child, we want to add to parent indices. + Entry* child_entry = &entries_[child_id]; +- if (!child_entry->parents->has_index(prefilter->unique_id())) +- child_entry->parents->set_new(prefilter->unique_id(), 1); ++ if (child_entry->parents->find(prefilter->unique_id()) == ++ child_entry->parents->end()) ++ (*child_entry->parents)[prefilter->unique_id()] = 1; + } + entry->propagate_up_at_count = + prefilter->op() == Prefilter::AND ? uniq_child.size() : 1; +@@ -329,10 +331,10 @@ void PrefilterTree::PropagateMatch(const vector<int>& atom_ids, + } + int c; + // Pass trigger up to parents. +- for (IntMap::iterator it = entry.parents->begin(); ++ for (StdIntMap::iterator it = entry.parents->begin(); + it != entry.parents->end(); + ++it) { +- int j = it->index(); ++ int j = it->first; + const Entry& parent = entries_[j]; + VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count; + // Delay until all the children have succeeded. +@@ -364,12 +366,12 @@ void PrefilterTree::PrintDebugInfo() { + VLOG(10) << "#Unique Nodes: " << entries_.size(); + + for (int i = 0; i < entries_.size(); ++i) { +- IntMap* parents = entries_[i].parents; ++ StdIntMap* parents = entries_[i].parents; + const vector<int>& regexps = entries_[i].regexps; + VLOG(10) << "EntryId: " << i + << " N: " << parents->size() << " R: " << regexps.size(); +- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it) +- VLOG(10) << it->index(); ++ for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) ++ VLOG(10) << it->first; + } + VLOG(10) << "Map:"; + for (map<string, Prefilter*>::const_iterator iter = node_map_.begin(); +diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h +--- a/re2/prefilter_tree.h ++++ b/re2/prefilter_tree.h +@@ -16,12 +16,15 @@ + #ifndef RE2_PREFILTER_TREE_H_ + #define RE2_PREFILTER_TREE_H_ + ++#include <map> ++ + #include "util/util.h" + #include "util/sparse_array.h" + + namespace re2 { + + typedef SparseArray<int> IntMap; ++typedef std::map<int, int> StdIntMap; + + class Prefilter; + +@@ -71,7 +74,7 @@ class PrefilterTree { + // are two different nodes, but they share the atom 'def'. So when + // 'def' matches, it triggers two parents, corresponding to the two + // different OR nodes. +- IntMap* parents; ++ StdIntMap* parents; + + // When this node is ready to trigger the parent, what are the + // regexps that are triggered. diff --git a/third_party/re2/re2/prefilter_tree.cc b/third_party/re2/re2/prefilter_tree.cc index cdcf77e..e5c465b 100644 --- a/third_party/re2/re2/prefilter_tree.cc +++ b/third_party/re2/re2/prefilter_tree.cc @@ -107,21 +107,23 @@ void PrefilterTree::Compile(vector<string>* atom_vec) { // not miss out on any regexps triggering by getting rid of a // prefilter node. for (int i = 0; i < entries_.size(); i++) { - IntMap* parents = entries_[i].parents; + StdIntMap* parents = entries_[i].parents; if (parents->size() > 8) { // This one triggers too many things. If all the parents are AND // nodes and have other things guarding them, then get rid of // this trigger. TODO(vsri): Adjust the threshold appropriately, // make it a function of total number of nodes? bool have_other_guard = true; - for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it) + for (StdIntMap::iterator it = parents->begin(); + it != parents->end(); ++it) { have_other_guard = have_other_guard && - (entries_[it->index()].propagate_up_at_count > 1); + (entries_[it->first].propagate_up_at_count > 1); + } if (have_other_guard) { - for (IntMap::iterator it = parents->begin(); + for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) - entries_[it->index()].propagate_up_at_count -= 1; + entries_[it->first].propagate_up_at_count -= 1; parents->clear(); // Forget the parents } @@ -213,7 +215,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { } entries_.resize(node_map_.size()); - // Create parent IntMap for the entries. + // Create parent StdIntMap for the entries. for (int i = v.size() - 1; i >= 0; i--) { Prefilter* prefilter = v[i]; if (prefilter == NULL) @@ -223,7 +225,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { continue; Entry* entry = &entries_[prefilter->unique_id()]; - entry->parents = new IntMap(node_map_.size()); + entry->parents = new StdIntMap(); } // Fill the entries. @@ -249,7 +251,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { case Prefilter::OR: case Prefilter::AND: { - IntMap uniq_child(node_map_.size()); + std::set<int> uniq_child; for (int j = 0; j < prefilter->subs()->size() ; j++) { Prefilter* child = (*prefilter->subs())[j]; Prefilter* canonical = CanonicalNode(child); @@ -258,12 +260,12 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { return; } int child_id = canonical->unique_id(); - if (!uniq_child.has_index(child_id)) - uniq_child.set_new(child_id, 1); + uniq_child.insert(child_id); // To the child, we want to add to parent indices. Entry* child_entry = &entries_[child_id]; - if (!child_entry->parents->has_index(prefilter->unique_id())) - child_entry->parents->set_new(prefilter->unique_id(), 1); + if (child_entry->parents->find(prefilter->unique_id()) == + child_entry->parents->end()) + (*child_entry->parents)[prefilter->unique_id()] = 1; } entry->propagate_up_at_count = prefilter->op() == Prefilter::AND ? uniq_child.size() : 1; @@ -329,10 +331,10 @@ void PrefilterTree::PropagateMatch(const vector<int>& atom_ids, } int c; // Pass trigger up to parents. - for (IntMap::iterator it = entry.parents->begin(); + for (StdIntMap::iterator it = entry.parents->begin(); it != entry.parents->end(); ++it) { - int j = it->index(); + int j = it->first; const Entry& parent = entries_[j]; VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count; // Delay until all the children have succeeded. @@ -364,12 +366,12 @@ void PrefilterTree::PrintDebugInfo() { VLOG(10) << "#Unique Nodes: " << entries_.size(); for (int i = 0; i < entries_.size(); ++i) { - IntMap* parents = entries_[i].parents; + StdIntMap* parents = entries_[i].parents; const vector<int>& regexps = entries_[i].regexps; VLOG(10) << "EntryId: " << i << " N: " << parents->size() << " R: " << regexps.size(); - for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it) - VLOG(10) << it->index(); + for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) + VLOG(10) << it->first; } VLOG(10) << "Map:"; for (map<string, Prefilter*>::const_iterator iter = node_map_.begin(); diff --git a/third_party/re2/re2/prefilter_tree.h b/third_party/re2/re2/prefilter_tree.h index 596b734..94eb183 100644 --- a/third_party/re2/re2/prefilter_tree.h +++ b/third_party/re2/re2/prefilter_tree.h @@ -16,12 +16,15 @@ #ifndef RE2_PREFILTER_TREE_H_ #define RE2_PREFILTER_TREE_H_ +#include <map> + #include "util/util.h" #include "util/sparse_array.h" namespace re2 { typedef SparseArray<int> IntMap; +typedef std::map<int, int> StdIntMap; class Prefilter; @@ -71,7 +74,7 @@ class PrefilterTree { // are two different nodes, but they share the atom 'def'. So when // 'def' matches, it triggers two parents, corresponding to the two // different OR nodes. - IntMap* parents; + StdIntMap* parents; // When this node is ready to trigger the parent, what are the // regexps that are triggered. |