summaryrefslogtreecommitdiffstats
path: root/third_party
diff options
context:
space:
mode:
authorbattre@chromium.org <battre@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-04-23 08:12:27 +0000
committerbattre@chromium.org <battre@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-04-23 08:12:27 +0000
commit342a79f2385be0c176cc9a175709061ab91a35ad (patch)
tree741cd7dbf74db03ebb15e07be0438b2cea17066e /third_party
parent50833ddb63f44b1b4e4661b713940c9d17ecfd64 (diff)
downloadchromium_src-342a79f2385be0c176cc9a175709061ab91a35ad.zip
chromium_src-342a79f2385be0c176cc9a175709061ab91a35ad.tar.gz
chromium_src-342a79f2385be0c176cc9a175709061ab91a35ad.tar.bz2
Use std::map<int, int> instead of SparseArray<int> to save memory.
The PrefilterTree used a SparseArray<int> for pointers to "parents" (think of this as pointers to ancestors). Despite its name, each SparseArray that is allocated in such a way that is can store up to N element actually consumes O(N) memory. Always! Regardless of its utilization! Given that in a tree with N nodes, each node could have up to O(N) ancestors, this makes the PrefilterTree consume O(N^2) memory for N nodes. In practice PrefilterTree nodes have only few ancestor pointers ("parents"). Therefore, it is more economic to use a std::map<int, int> which consumes less memory. BUG=112155 NOTRY=true Review URL: https://chromiumcodereview.appspot.com/14120010 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@195762 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party')
-rw-r--r--third_party/re2/README.chromium2
-rw-r--r--third_party/re2/patches/re2-memory-optimization.patch133
-rw-r--r--third_party/re2/re2/prefilter_tree.cc36
-rw-r--r--third_party/re2/re2/prefilter_tree.h5
4 files changed, 158 insertions, 18 deletions
diff --git a/third_party/re2/README.chromium b/third_party/re2/README.chromium
index c781629..3617102 100644
--- a/third_party/re2/README.chromium
+++ b/third_party/re2/README.chromium
@@ -21,3 +21,5 @@ Local Modifications (to be applied in this order):
- Rename POSIX configuration (patches/rename-posix-option.patch)
- Support libcxx (patches/re2-libcxx.patch)
https://code.google.com/p/re2/issues/detail?id=76
+- Memory optimization for filtered trees
+ (patches/re2-memory-optimization.patch)
diff --git a/third_party/re2/patches/re2-memory-optimization.patch b/third_party/re2/patches/re2-memory-optimization.patch
new file mode 100644
index 0000000..05299b3
--- /dev/null
+++ b/third_party/re2/patches/re2-memory-optimization.patch
@@ -0,0 +1,133 @@
+diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc
+--- a/re2/prefilter_tree.cc
++++ b/re2/prefilter_tree.cc
+@@ -107,21 +107,23 @@ void PrefilterTree::Compile(vector<string>* atom_vec) {
+ // not miss out on any regexps triggering by getting rid of a
+ // prefilter node.
+ for (int i = 0; i < entries_.size(); i++) {
+- IntMap* parents = entries_[i].parents;
++ StdIntMap* parents = entries_[i].parents;
+ if (parents->size() > 8) {
+ // This one triggers too many things. If all the parents are AND
+ // nodes and have other things guarding them, then get rid of
+ // this trigger. TODO(vsri): Adjust the threshold appropriately,
+ // make it a function of total number of nodes?
+ bool have_other_guard = true;
+- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
++ for (StdIntMap::iterator it = parents->begin();
++ it != parents->end(); ++it) {
+ have_other_guard = have_other_guard &&
+- (entries_[it->index()].propagate_up_at_count > 1);
++ (entries_[it->first].propagate_up_at_count > 1);
++ }
+
+ if (have_other_guard) {
+- for (IntMap::iterator it = parents->begin();
++ for (StdIntMap::iterator it = parents->begin();
+ it != parents->end(); ++it)
+- entries_[it->index()].propagate_up_at_count -= 1;
++ entries_[it->first].propagate_up_at_count -= 1;
+
+ parents->clear(); // Forget the parents
+ }
+@@ -213,7 +215,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
+ }
+ entries_.resize(node_map_.size());
+
+- // Create parent IntMap for the entries.
++ // Create parent StdIntMap for the entries.
+ for (int i = v.size() - 1; i >= 0; i--) {
+ Prefilter* prefilter = v[i];
+ if (prefilter == NULL)
+@@ -223,7 +225,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
+ continue;
+
+ Entry* entry = &entries_[prefilter->unique_id()];
+- entry->parents = new IntMap(node_map_.size());
++ entry->parents = new StdIntMap();
+ }
+
+ // Fill the entries.
+@@ -249,7 +251,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
+
+ case Prefilter::OR:
+ case Prefilter::AND: {
+- IntMap uniq_child(node_map_.size());
++ std::set<int> uniq_child;
+ for (int j = 0; j < prefilter->subs()->size() ; j++) {
+ Prefilter* child = (*prefilter->subs())[j];
+ Prefilter* canonical = CanonicalNode(child);
+@@ -258,12 +260,12 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
+ return;
+ }
+ int child_id = canonical->unique_id();
+- if (!uniq_child.has_index(child_id))
+- uniq_child.set_new(child_id, 1);
++ uniq_child.insert(child_id);
+ // To the child, we want to add to parent indices.
+ Entry* child_entry = &entries_[child_id];
+- if (!child_entry->parents->has_index(prefilter->unique_id()))
+- child_entry->parents->set_new(prefilter->unique_id(), 1);
++ if (child_entry->parents->find(prefilter->unique_id()) ==
++ child_entry->parents->end())
++ (*child_entry->parents)[prefilter->unique_id()] = 1;
+ }
+ entry->propagate_up_at_count =
+ prefilter->op() == Prefilter::AND ? uniq_child.size() : 1;
+@@ -329,10 +331,10 @@ void PrefilterTree::PropagateMatch(const vector<int>& atom_ids,
+ }
+ int c;
+ // Pass trigger up to parents.
+- for (IntMap::iterator it = entry.parents->begin();
++ for (StdIntMap::iterator it = entry.parents->begin();
+ it != entry.parents->end();
+ ++it) {
+- int j = it->index();
++ int j = it->first;
+ const Entry& parent = entries_[j];
+ VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count;
+ // Delay until all the children have succeeded.
+@@ -364,12 +366,12 @@ void PrefilterTree::PrintDebugInfo() {
+ VLOG(10) << "#Unique Nodes: " << entries_.size();
+
+ for (int i = 0; i < entries_.size(); ++i) {
+- IntMap* parents = entries_[i].parents;
++ StdIntMap* parents = entries_[i].parents;
+ const vector<int>& regexps = entries_[i].regexps;
+ VLOG(10) << "EntryId: " << i
+ << " N: " << parents->size() << " R: " << regexps.size();
+- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
+- VLOG(10) << it->index();
++ for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
++ VLOG(10) << it->first;
+ }
+ VLOG(10) << "Map:";
+ for (map<string, Prefilter*>::const_iterator iter = node_map_.begin();
+diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h
+--- a/re2/prefilter_tree.h
++++ b/re2/prefilter_tree.h
+@@ -16,12 +16,15 @@
+ #ifndef RE2_PREFILTER_TREE_H_
+ #define RE2_PREFILTER_TREE_H_
+
++#include <map>
++
+ #include "util/util.h"
+ #include "util/sparse_array.h"
+
+ namespace re2 {
+
+ typedef SparseArray<int> IntMap;
++typedef std::map<int, int> StdIntMap;
+
+ class Prefilter;
+
+@@ -71,7 +74,7 @@ class PrefilterTree {
+ // are two different nodes, but they share the atom 'def'. So when
+ // 'def' matches, it triggers two parents, corresponding to the two
+ // different OR nodes.
+- IntMap* parents;
++ StdIntMap* parents;
+
+ // When this node is ready to trigger the parent, what are the
+ // regexps that are triggered.
diff --git a/third_party/re2/re2/prefilter_tree.cc b/third_party/re2/re2/prefilter_tree.cc
index cdcf77e..e5c465b 100644
--- a/third_party/re2/re2/prefilter_tree.cc
+++ b/third_party/re2/re2/prefilter_tree.cc
@@ -107,21 +107,23 @@ void PrefilterTree::Compile(vector<string>* atom_vec) {
// not miss out on any regexps triggering by getting rid of a
// prefilter node.
for (int i = 0; i < entries_.size(); i++) {
- IntMap* parents = entries_[i].parents;
+ StdIntMap* parents = entries_[i].parents;
if (parents->size() > 8) {
// This one triggers too many things. If all the parents are AND
// nodes and have other things guarding them, then get rid of
// this trigger. TODO(vsri): Adjust the threshold appropriately,
// make it a function of total number of nodes?
bool have_other_guard = true;
- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
+ for (StdIntMap::iterator it = parents->begin();
+ it != parents->end(); ++it) {
have_other_guard = have_other_guard &&
- (entries_[it->index()].propagate_up_at_count > 1);
+ (entries_[it->first].propagate_up_at_count > 1);
+ }
if (have_other_guard) {
- for (IntMap::iterator it = parents->begin();
+ for (StdIntMap::iterator it = parents->begin();
it != parents->end(); ++it)
- entries_[it->index()].propagate_up_at_count -= 1;
+ entries_[it->first].propagate_up_at_count -= 1;
parents->clear(); // Forget the parents
}
@@ -213,7 +215,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
}
entries_.resize(node_map_.size());
- // Create parent IntMap for the entries.
+ // Create parent StdIntMap for the entries.
for (int i = v.size() - 1; i >= 0; i--) {
Prefilter* prefilter = v[i];
if (prefilter == NULL)
@@ -223,7 +225,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
continue;
Entry* entry = &entries_[prefilter->unique_id()];
- entry->parents = new IntMap(node_map_.size());
+ entry->parents = new StdIntMap();
}
// Fill the entries.
@@ -249,7 +251,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
case Prefilter::OR:
case Prefilter::AND: {
- IntMap uniq_child(node_map_.size());
+ std::set<int> uniq_child;
for (int j = 0; j < prefilter->subs()->size() ; j++) {
Prefilter* child = (*prefilter->subs())[j];
Prefilter* canonical = CanonicalNode(child);
@@ -258,12 +260,12 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
return;
}
int child_id = canonical->unique_id();
- if (!uniq_child.has_index(child_id))
- uniq_child.set_new(child_id, 1);
+ uniq_child.insert(child_id);
// To the child, we want to add to parent indices.
Entry* child_entry = &entries_[child_id];
- if (!child_entry->parents->has_index(prefilter->unique_id()))
- child_entry->parents->set_new(prefilter->unique_id(), 1);
+ if (child_entry->parents->find(prefilter->unique_id()) ==
+ child_entry->parents->end())
+ (*child_entry->parents)[prefilter->unique_id()] = 1;
}
entry->propagate_up_at_count =
prefilter->op() == Prefilter::AND ? uniq_child.size() : 1;
@@ -329,10 +331,10 @@ void PrefilterTree::PropagateMatch(const vector<int>& atom_ids,
}
int c;
// Pass trigger up to parents.
- for (IntMap::iterator it = entry.parents->begin();
+ for (StdIntMap::iterator it = entry.parents->begin();
it != entry.parents->end();
++it) {
- int j = it->index();
+ int j = it->first;
const Entry& parent = entries_[j];
VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count;
// Delay until all the children have succeeded.
@@ -364,12 +366,12 @@ void PrefilterTree::PrintDebugInfo() {
VLOG(10) << "#Unique Nodes: " << entries_.size();
for (int i = 0; i < entries_.size(); ++i) {
- IntMap* parents = entries_[i].parents;
+ StdIntMap* parents = entries_[i].parents;
const vector<int>& regexps = entries_[i].regexps;
VLOG(10) << "EntryId: " << i
<< " N: " << parents->size() << " R: " << regexps.size();
- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
- VLOG(10) << it->index();
+ for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
+ VLOG(10) << it->first;
}
VLOG(10) << "Map:";
for (map<string, Prefilter*>::const_iterator iter = node_map_.begin();
diff --git a/third_party/re2/re2/prefilter_tree.h b/third_party/re2/re2/prefilter_tree.h
index 596b734..94eb183 100644
--- a/third_party/re2/re2/prefilter_tree.h
+++ b/third_party/re2/re2/prefilter_tree.h
@@ -16,12 +16,15 @@
#ifndef RE2_PREFILTER_TREE_H_
#define RE2_PREFILTER_TREE_H_
+#include <map>
+
#include "util/util.h"
#include "util/sparse_array.h"
namespace re2 {
typedef SparseArray<int> IntMap;
+typedef std::map<int, int> StdIntMap;
class Prefilter;
@@ -71,7 +74,7 @@ class PrefilterTree {
// are two different nodes, but they share the atom 'def'. So when
// 'def' matches, it triggers two parents, corresponding to the two
// different OR nodes.
- IntMap* parents;
+ StdIntMap* parents;
// When this node is ready to trigger the parent, what are the
// regexps that are triggered.