summaryrefslogtreecommitdiffstats
path: root/chrome
diff options
context:
space:
mode:
authorpaulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-10-27 22:49:22 +0000
committerpaulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-10-27 22:49:22 +0000
commit1fc19e8bb6afa4a771c92eaa407c5838c01ab8c4 (patch)
treebce98e703a09fcf978cdee725fdc080fb65c49e7 /chrome
parentdca5cbb44a2665ce7248d99fae0007ed344734cf (diff)
downloadchromium_src-1fc19e8bb6afa4a771c92eaa407c5838c01ab8c4.zip
chromium_src-1fc19e8bb6afa4a771c92eaa407c5838c01ab8c4.tar.gz
chromium_src-1fc19e8bb6afa4a771c92eaa407c5838c01ab8c4.tar.bz2
Clean up the chunk processing pipeline. We are no longer
asynchronous, so we can greatly simplify the code. Refactor BuildBloomFilter() to smaller worker functions. Review URL: http://codereview.chromium.org/8605 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@4032 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_bloom.cc482
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_bloom.h60
2 files changed, 216 insertions, 326 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
index d04826d..e42af0d 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
@@ -43,7 +43,6 @@ static const int kMaxStalenessMinutes = 45;
SafeBrowsingDatabaseBloom::SafeBrowsingDatabaseBloom()
: db_(NULL),
- transaction_count_(0),
init_(false),
chunk_inserted_callback_(NULL),
ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)),
@@ -110,25 +109,10 @@ bool SafeBrowsingDatabaseBloom::Close() {
if (!db_)
return true;
- if (!pending_add_del_.empty()) {
- while (!pending_add_del_.empty())
- pending_add_del_.pop();
-
- EndTransaction();
- }
-
- while (!pending_chunks_.empty()) {
- std::deque<SBChunk>* chunks = pending_chunks_.front();
- safe_browsing_util::FreeChunks(chunks);
- delete chunks;
- pending_chunks_.pop();
- EndTransaction();
- }
-
statement_cache_.reset(); // Must free statements before closing DB.
- transaction_.reset();
bool result = sqlite3_close(db_) == SQLITE_OK;
db_ = NULL;
+
return result;
}
@@ -251,9 +235,7 @@ bool SafeBrowsingDatabaseBloom::CreateTables() {
// running are queued up and run once the reset is done.
bool SafeBrowsingDatabaseBloom::ResetDatabase() {
hash_cache_.clear();
- add_chunk_cache_.clear();
- sub_chunk_cache_.clear();
- prefix_miss_cache_.clear();
+ ClearUpdateCaches();
bool rv = Close();
DCHECK(rv);
@@ -288,6 +270,14 @@ bool SafeBrowsingDatabaseBloom::CheckCompatibleVersion() {
return statement->column_int(0) == kDatabaseVersion;
}
+void SafeBrowsingDatabaseBloom::ClearUpdateCaches() {
+ add_del_cache_.clear();
+ sub_del_cache_.clear();
+ add_chunk_cache_.clear();
+ sub_chunk_cache_.clear();
+ prefix_miss_cache_.clear();
+}
+
bool SafeBrowsingDatabaseBloom::ContainsUrl(
const GURL& url,
std::string* matching_list,
@@ -363,14 +353,6 @@ bool SafeBrowsingDatabaseBloom::NeedToCheckUrl(const GURL& url) {
return true;
}
-void SafeBrowsingDatabaseBloom::ProcessPendingWork() {
- BeginTransaction();
- prefix_miss_cache_.clear();
- ProcessChunks();
- ProcessAddDel();
- EndTransaction();
-}
-
void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name,
std::deque<SBChunk>* chunks) {
// We've going to be updating the bloom filter, so delete the on-disk
@@ -381,82 +363,60 @@ void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name,
// I think we need some way to indicate that the bloom filter is out of date
// and needs to be rebuilt, but we shouldn't delete it.
// DeleteBloomFilter();
-
- int list_id = GetListID(list_name);
- std::deque<SBChunk>::iterator i = chunks->begin();
- for (; i != chunks->end(); ++i) {
- SBChunk& chunk = (*i);
- chunk.list_id = list_id;
- std::deque<SBChunkHost>::iterator j = chunk.hosts.begin();
- for (; j != chunk.hosts.end(); ++j) {
- j->entry->set_list_id(list_id);
- if (j->entry->IsAdd())
- j->entry->set_chunk_id(chunk.chunk_number);
- }
- }
-
- pending_chunks_.push(chunks);
-
- ProcessPendingWork();
-}
-
-void SafeBrowsingDatabaseBloom::UpdateFinished(bool update_succeeded) {
- if (update_succeeded)
- BuildBloomFilter();
-
- // We won't need the chunk caches until the next update (which will read them
- // from the database), so free their memory as they may contain thousands of
- // chunk numbers.
- add_chunk_cache_.clear();
- sub_chunk_cache_.clear();
-}
-
-void SafeBrowsingDatabaseBloom::ProcessChunks() {
- if (pending_chunks_.empty())
+ if (chunks->empty())
return;
- while (!pending_chunks_.empty()) {
- std::deque<SBChunk>* chunks = pending_chunks_.front();
- if (chunks->front().is_add) {
- ProcessAddChunks(chunks);
- } else {
- ProcessSubChunks(chunks);
- }
-
- delete chunks;
- pending_chunks_.pop();
- }
-
- if (chunk_inserted_callback_)
- chunk_inserted_callback_->Run();
-}
+ int list_id = GetListID(list_name);
+ ChunkType chunk_type = chunks->front().is_add ? ADD_CHUNK : SUB_CHUNK;
-void SafeBrowsingDatabaseBloom::ProcessAddChunks(std::deque<SBChunk>* chunks) {
while (!chunks->empty()) {
SBChunk& chunk = chunks->front();
- int list_id = chunk.list_id;
+ chunk.list_id = list_id;
int chunk_id = chunk.chunk_number;
// The server can give us a chunk that we already have because it's part of
// a range. Don't add it again.
- if (!ChunkExists(list_id, ADD_CHUNK, chunk_id)) {
+ if (!ChunkExists(list_id, chunk_type, chunk_id)) {
while (!chunk.hosts.empty()) {
- WaitAfterResume();
// Read the existing record for this host, if it exists.
SBPrefix host = chunk.hosts.front().host;
SBEntry* entry = chunk.hosts.front().entry;
-
- AddEntry(host, entry);
+ entry->set_list_id(list_id);
+ if (chunk_type == ADD_CHUNK) {
+ entry->set_chunk_id(chunk_id);
+ AddEntry(host, entry);
+ } else {
+ AddSub(chunk_id, host, entry);
+ }
entry->Destroy();
chunk.hosts.pop_front();
}
+
int encoded = EncodeChunkId(chunk_id, list_id);
- add_chunk_cache_.insert(encoded);
+ if (chunk_type == ADD_CHUNK)
+ add_chunk_cache_.insert(encoded);
+ else
+ sub_chunk_cache_.insert(encoded);
}
chunks->pop_front();
}
+
+ delete chunks;
+
+ if (chunk_inserted_callback_)
+ chunk_inserted_callback_->Run();
+}
+
+void SafeBrowsingDatabaseBloom::UpdateFinished(bool update_succeeded) {
+ if (update_succeeded)
+ BuildBloomFilter();
+
+ // We won't need the chunk caches until the next update (which will read them
+ // from the database), so free their memory as they may contain thousands of
+ // entries.
+ ClearUpdateCaches();
}
void SafeBrowsingDatabaseBloom::AddEntry(SBPrefix host, SBEntry* entry) {
@@ -615,128 +575,27 @@ int SafeBrowsingDatabaseBloom::GetAddPrefixCount() {
return add_count;
}
-void SafeBrowsingDatabaseBloom::ProcessSubChunks(std::deque<SBChunk>* chunks) {
- while (!chunks->empty()) {
- SBChunk& chunk = chunks->front();
- int list_id = chunk.list_id;
- int chunk_id = chunk.chunk_number;
-
- if (!ChunkExists(list_id, SUB_CHUNK, chunk_id)) {
- while (!chunk.hosts.empty()) {
- WaitAfterResume();
-
- SBPrefix host = chunk.hosts.front().host;
- SBEntry* entry = chunk.hosts.front().entry;
- AddSub(chunk_id, host, entry);
-
- entry->Destroy();
- chunk.hosts.pop_front();
- }
-
- int encoded = EncodeChunkId(chunk_id, list_id);
- sub_chunk_cache_.insert(encoded);
- }
-
- chunks->pop_front();
- }
-}
-
void SafeBrowsingDatabaseBloom::DeleteChunks(
std::vector<SBChunkDelete>* chunk_deletes) {
- BeginTransaction();
- bool pending_add_del_were_empty = pending_add_del_.empty();
+ if (chunk_deletes->empty())
+ return;
+
+ int list_id = GetListID(chunk_deletes->front().list_name);
for (size_t i = 0; i < chunk_deletes->size(); ++i) {
const SBChunkDelete& chunk = (*chunk_deletes)[i];
std::vector<int> chunk_numbers;
RangesToChunks(chunk.chunk_del, &chunk_numbers);
for (size_t del = 0; del < chunk_numbers.size(); ++del) {
- if (chunk.is_sub_del) {
- SubDel(chunk.list_name, chunk_numbers[del]);
- } else {
- AddDel(chunk.list_name, chunk_numbers[del]);
- }
+ int encoded_chunk = EncodeChunkId(chunk_numbers[del], list_id);
+ if (chunk.is_sub_del)
+ sub_del_cache_.insert(encoded_chunk);
+ else
+ add_del_cache_.insert(encoded_chunk);
}
}
- if (pending_add_del_were_empty && !pending_add_del_.empty()) {
- ProcessPendingWork();
- }
-
delete chunk_deletes;
- EndTransaction();
-}
-
-void SafeBrowsingDatabaseBloom::AddDel(const std::string& list_name,
- int add_chunk_id) {
- int list_id = GetListID(list_name);
- AddDel(list_id, add_chunk_id);
-}
-
-// TODO(paulg): Change this from a database scan to an in-memory operation done
-// while building the bloom filter.
-void SafeBrowsingDatabaseBloom::AddDel(int list_id,
- int add_chunk_id) {
- STATS_COUNTER(L"SB.ChunkDelete", 1);
- // Find all the prefixes that came from the given add_chunk_id.
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "DELETE FROM add_prefix WHERE chunk=?");
- if (!statement.is_valid()) {
- NOTREACHED();
- return;
- }
-
- int encoded = EncodeChunkId(add_chunk_id, list_id);
- statement->bind_int(0, encoded);
- int rv = statement->step();
- if (rv == SQLITE_CORRUPT) {
- HandleCorruptDatabase();
- } else {
- DCHECK(rv == SQLITE_DONE);
- }
- add_chunk_cache_.erase(encoded);
-}
-
-void SafeBrowsingDatabaseBloom::SubDel(const std::string& list_name,
- int sub_chunk_id) {
- int list_id = GetListID(list_name);
- SubDel(list_id, sub_chunk_id);
-}
-
-// TODO(paulg): Change this from a database scan to an in-memory operation done
-// while building the bloom filter.
-void SafeBrowsingDatabaseBloom::SubDel(int list_id,
- int sub_chunk_id) {
- STATS_COUNTER(L"SB.ChunkDelete", 1);
- // Find all the prefixes that came from the given add_chunk_id.
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "DELETE FROM sub_prefix WHERE chunk=?");
- if (!statement.is_valid()) {
- NOTREACHED();
- return;
- }
-
- int encoded = EncodeChunkId(sub_chunk_id, list_id);
- statement->bind_int(0, encoded);
- int rv = statement->step();
- if (rv == SQLITE_CORRUPT) {
- HandleCorruptDatabase();
- } else {
- DCHECK(rv == SQLITE_DONE);
- }
- sub_chunk_cache_.erase(encoded);
-}
-
-void SafeBrowsingDatabaseBloom::ProcessAddDel() {
- if (pending_add_del_.empty())
- return;
-
- while (!pending_add_del_.empty()) {
- AddDelWork& add_del_work = pending_add_del_.front();
- ClearCachedHashesForChunk(add_del_work.list_id, add_del_work.add_chunk_id);
- AddDel(add_del_work.list_id, add_del_work.add_chunk_id);
- pending_add_del_.pop();
- }
}
bool SafeBrowsingDatabaseBloom::ChunkExists(int list_id,
@@ -920,18 +779,18 @@ void SafeBrowsingDatabaseBloom::ReadChunkNumbers() {
}
// Write all the chunk numbers to the add_chunks and sub_chunks tables.
-void SafeBrowsingDatabaseBloom::WriteChunkNumbers() {
+bool SafeBrowsingDatabaseBloom::WriteChunkNumbers() {
// Delete the contents of the add chunk table.
SQLITE_UNIQUE_STATEMENT(del_add_chunk, *statement_cache_,
"DELETE FROM add_chunks");
if (!del_add_chunk.is_valid()) {
NOTREACHED();
- return;
+ return false;
}
int rv = del_add_chunk->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
- return;
+ return false;
}
DCHECK(rv == SQLITE_DONE);
@@ -939,21 +798,22 @@ void SafeBrowsingDatabaseBloom::WriteChunkNumbers() {
"INSERT INTO add_chunks (chunk) VALUES (?)");
if (!write_adds.is_valid()) {
NOTREACHED();
- return;
+ return false;
}
// Write all the add chunks from the cache to the database.
std::set<int>::const_iterator it = add_chunk_cache_.begin();
- while (it != add_chunk_cache_.end()) {
+ for (; it != add_chunk_cache_.end(); ++it) {
+ if (add_del_cache_.find(*it) != add_del_cache_.end())
+ continue; // This chunk has been deleted.
write_adds->bind_int(0, *it);
rv = write_adds->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
- return;
+ return false;
}
DCHECK(rv == SQLITE_DONE);
write_adds->reset();
- ++it;
}
// Delete the contents of the sub chunk table.
@@ -961,12 +821,12 @@ void SafeBrowsingDatabaseBloom::WriteChunkNumbers() {
"DELETE FROM sub_chunks");
if (!del_sub_chunk.is_valid()) {
NOTREACHED();
- return;
+ return false;
}
rv = del_sub_chunk->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
- return;
+ return false;
}
DCHECK(rv == SQLITE_DONE);
@@ -974,31 +834,28 @@ void SafeBrowsingDatabaseBloom::WriteChunkNumbers() {
"INSERT INTO sub_chunks (chunk) VALUES (?)");
if (!write_subs.is_valid()) {
NOTREACHED();
- return;
+ return false;
}
// Write all the sub chunks from the cache to the database.
it = sub_chunk_cache_.begin();
- while (it != sub_chunk_cache_.end()) {
+ for (; it != sub_chunk_cache_.end(); ++it) {
+ if (sub_del_cache_.find(*it) != sub_del_cache_.end())
+ continue; // This chunk has been deleted.
write_subs->bind_int(0, *it);
rv = write_subs->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
- return;
+ return false;
}
DCHECK(rv == SQLITE_DONE);
write_subs->reset();
- ++it;
}
-}
-// Helper struct and compare function for building the bloom filter.
-typedef struct {
- int chunk_id;
- SBPrefix prefix;
-} SBPair;
+ return true;
+}
-static int pair_compare(const void* arg1, const void* arg2) {
+int SafeBrowsingDatabaseBloom::PairCompare(const void* arg1, const void* arg2) {
const SBPair* p1 = reinterpret_cast<const SBPair*>(arg1);
const SBPair* p2 = reinterpret_cast<const SBPair*>(arg2);
int delta = p1->chunk_id - p2->chunk_id;
@@ -1007,34 +864,16 @@ static int pair_compare(const void* arg1, const void* arg2) {
return delta;
}
-// TODO(erikkay): should we call WaitAfterResume() inside any of the loops here?
-// This is a pretty fast operation and it would be nice to let it finish.
-// TODO(erikkay, paulg): Break this into smaller functions.
-void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
- Time before = Time::Now();
-
- add_count_ = GetAddPrefixCount();
- if (add_count_ == 0) {
- bloom_filter_ = NULL;
- return;
- }
-
- scoped_array<SBPair> adds_array(new SBPair[add_count_]);
- SBPair* adds = adds_array.get();
-
- // Used to track which adds have been subbed out. The vector<bool> is actually
- // a bitvector so the size is as small as we can get.
- std::vector<bool> adds_removed;
- adds_removed.resize(add_count_, false);
-
+bool SafeBrowsingDatabaseBloom::BuildAddList(SBPair* adds) {
// Read add_prefix into memory and sort it.
STATS_COUNTER(L"SB.HostSelectForBloomFilter", 1);
SQLITE_UNIQUE_STATEMENT(add_prefix, *statement_cache_,
"SELECT chunk, prefix FROM add_prefix");
if (!add_prefix.is_valid()) {
NOTREACHED();
- return;
+ return false;
}
+
SBPair* add = adds;
while (true) {
int rv = add_prefix->step();
@@ -1050,21 +889,20 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
break;
}
DCHECK(add_count_ == (add - adds));
- qsort(adds, add_count_, sizeof(SBPair), pair_compare);
+ qsort(adds, add_count_, sizeof(SBPair),
+ &SafeBrowsingDatabaseBloom::PairCompare);
+ return true;
+}
+
+bool SafeBrowsingDatabaseBloom::RemoveSubs(SBPair* adds,
+ std::vector<bool>* adds_removed) {
// Read through sub_prefix and zero out add_prefix entries that match.
SQLITE_UNIQUE_STATEMENT(sub_prefix, *statement_cache_,
"SELECT chunk, add_chunk, prefix FROM sub_prefix");
if (!sub_prefix.is_valid()) {
NOTREACHED();
- return;
- }
-
- scoped_ptr<SQLTransaction> update_transaction;
- update_transaction.reset(new SQLTransaction(db_));
- if (update_transaction->Begin() != SQLITE_OK) {
- NOTREACHED();
- return;
+ return false;
}
// Create a temporary sub prefix table. We add entries to it as we scan the
@@ -1076,7 +914,7 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
"add_chunk INTEGER,"
"prefix INTEGER)",
NULL, NULL, NULL) != SQLITE_OK) {
- return;
+ return false;
}
SQLITE_UNIQUE_STATEMENT(
@@ -1085,48 +923,64 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
"INSERT INTO sub_prefix_tmp (chunk,add_chunk,prefix) VALUES (?,?,?)");
if (!sub_prefix_tmp.is_valid()) {
NOTREACHED();
- return;
+ return false;
}
SBPair sub;
while (true) {
int rv = sub_prefix->step();
if (rv != SQLITE_ROW) {
- if (rv == SQLITE_CORRUPT)
+ if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
+ return false;
+ }
break;
}
+
+ int sub_chunk = sub_prefix->column_int(0);
sub.chunk_id = sub_prefix->column_int(1);
sub.prefix = sub_prefix->column_int(2);
- void* match = bsearch(&sub, adds, add_count_, sizeof(SBPair), pair_compare);
+
+ // See if this sub chunk has been deleted via a SubDel, and skip doing the
+ // search or write if so.
+ if (sub_del_cache_.find(sub_chunk) != sub_del_cache_.end())
+ continue;
+
+ void* match = bsearch(&sub, adds, add_count_, sizeof(SBPair),
+ &SafeBrowsingDatabaseBloom::PairCompare);
if (match) {
SBPair* subbed = reinterpret_cast<SBPair*>(match);
- adds_removed[subbed - adds] = true;
+ (*adds_removed)[subbed - adds] = true;
} else {
// This sub_prefix entry did not match any add, so we keep it around.
- sub_prefix_tmp->bind_int(0, sub_prefix->column_int(0));
+ sub_prefix_tmp->bind_int(0, sub_chunk);
sub_prefix_tmp->bind_int(1, sub.chunk_id);
sub_prefix_tmp->bind_int(2, sub.prefix);
int rv = sub_prefix_tmp->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
- return;
+ return false;
}
DCHECK(rv == SQLITE_DONE);
sub_prefix_tmp->reset();
}
}
+
+ return true;
+}
+bool SafeBrowsingDatabaseBloom::UpdateTables() {
// Delete the old sub_prefix table and rename the temporary table.
SQLITE_UNIQUE_STATEMENT(del_sub, *statement_cache_, "DROP TABLE sub_prefix");
if (!del_sub.is_valid()) {
NOTREACHED();
- return;
+ return false;
}
+
int rv = del_sub->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
- return;
+ return false;
}
DCHECK(rv == SQLITE_DONE);
@@ -1134,12 +988,12 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
"ALTER TABLE sub_prefix_tmp RENAME TO sub_prefix");
if (!rename_sub.is_valid()) {
NOTREACHED();
- return;
+ return false;
}
rv = rename_sub->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
- return;
+ return false;
}
DCHECK(rv == SQLITE_DONE);
@@ -1148,36 +1002,55 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
SQLITE_UNIQUE_STATEMENT(del_add, *statement_cache_, "DELETE FROM add_prefix");
if (!del_add.is_valid()) {
NOTREACHED();
- return;
+ return false;
}
rv = del_add->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
- return;
+ return false;
}
DCHECK(rv == SQLITE_DONE);
+ return true;
+}
+
+bool SafeBrowsingDatabaseBloom::WritePrefixes(
+ SBPair* adds,
+ const std::vector<bool>& adds_removed,
+ int* new_add_count,
+ BloomFilter** filter) {
+ *filter = NULL;
+ *new_add_count = 0;
+
SQLITE_UNIQUE_STATEMENT(insert, *statement_cache_,
"INSERT INTO add_prefix VALUES (?,?)");
if (!insert.is_valid()) {
NOTREACHED();
- return;
+ return false;
}
+
int number_of_keys = std::max(add_count_, kBloomFilterMinSize);
int filter_size = number_of_keys * kBloomFilterSizeRatio;
- BloomFilter* filter = new BloomFilter(filter_size);
- add = adds;
+ BloomFilter* new_filter = new BloomFilter(filter_size);
+ SBPair* add = adds;
int new_count = 0;
+
while (add - adds < add_count_) {
if (!adds_removed[add - adds]) {
- filter->Insert(add->prefix);
+ // Check to see if we have an AddDel for this chunk and skip writing it
+ // if there is.
+ if (add_del_cache_.find(add->chunk_id) != add_del_cache_.end()) {
+ add++;
+ continue;
+ }
+ new_filter->Insert(add->prefix);
insert->bind_int(0, add->chunk_id);
insert->bind_int(1, add->prefix);
- rv = insert->step();
+ int rv = insert->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
- delete filter; // TODO(paulg): scoped.
- return;
+ delete new_filter; // TODO(paulg): scoped.
+ return false;
}
DCHECK(rv == SQLITE_DONE);
insert->reset();
@@ -1186,41 +1059,76 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
add++;
}
- update_transaction->Commit();
+ *new_add_count = new_count;
+ *filter = new_filter;
+
+ return true;
+}
+
+// TODO(erikkay): should we call WaitAfterResume() inside any of the loops here?
+// This is a pretty fast operation and it would be nice to let it finish.
+void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
+ Time before = Time::Now();
+
+ add_count_ = GetAddPrefixCount();
+ if (add_count_ == 0) {
+ bloom_filter_ = NULL;
+ return;
+ }
+
+ scoped_array<SBPair> adds_array(new SBPair[add_count_]);
+ SBPair* adds = adds_array.get();
+
+ if (!BuildAddList(adds))
+ return;
+
+ // Protect the remaining operations on the database with a transaction.
+ scoped_ptr<SQLTransaction> update_transaction;
+ update_transaction.reset(new SQLTransaction(db_));
+ if (update_transaction->Begin() != SQLITE_OK) {
+ NOTREACHED();
+ return;
+ }
+
+ // Used to track which adds have been subbed out. The vector<bool> is actually
+ // a bitvector so the size is as small as we can get.
+ std::vector<bool> adds_removed;
+ adds_removed.resize(add_count_, false);
+
+ // Flag any add as removed if there is a matching sub.
+ if (!RemoveSubs(adds, &adds_removed))
+ return;
+
+ // Prepare the database for writing out our remaining add and sub prefixes.
+ if (!UpdateTables())
+ return;
+
+ // Write out the remaining adds to the filter and database.
+ int new_count;
+ BloomFilter* filter;
+ if (!WritePrefixes(adds, adds_removed, &new_count, &filter))
+ return;
// If there were any matching subs, the size will be smaller.
add_count_ = new_count;
bloom_filter_ = filter;
TimeDelta bloom_gen = Time::Now() - before;
- SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in " <<
- bloom_gen.InMilliseconds() << " ms total. prefix count: " <<
- add_count_;
+ SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in "
+ << bloom_gen.InMilliseconds()
+ << " ms total. prefix count: "<< add_count_;
UMA_HISTOGRAM_LONG_TIMES(L"SB.BuildBloom", bloom_gen);
- WriteBloomFilter();
- WriteChunkNumbers();
-}
+ // Save the chunk numbers we've received to the database for reporting in
+ // future update requests.
+ if (!WriteChunkNumbers())
+ return;
-void SafeBrowsingDatabaseBloom::BeginTransaction() {
- transaction_count_++;
- if (transaction_.get() == NULL) {
- transaction_.reset(new SQLTransaction(db_));
- if (transaction_->Begin() != SQLITE_OK) {
- DCHECK(false) << "Safe browsing database couldn't start transaction";
- transaction_.reset();
- }
- }
-}
+ // We've made it this far without errors, so commit the changes.
+ update_transaction->Commit();
-void SafeBrowsingDatabaseBloom::EndTransaction() {
- if (--transaction_count_ == 0) {
- if (transaction_.get() != NULL) {
- STATS_COUNTER(L"SB.TransactionCommit", 1);
- transaction_->Commit();
- transaction_.reset();
- }
- }
+ // Persist the bloom filter to disk.
+ WriteBloomFilter();
}
void SafeBrowsingDatabaseBloom::GetCachedFullHashes(
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
index 8f5fbe1..e470a64 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
+++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
@@ -120,29 +120,19 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
// Generate a bloom filter.
virtual void BuildBloomFilter();
- // Used when generating the bloom filter. Reads a small number of hostkeys
- // starting at the given row id.
- void OnReadHostKeys(int start_id);
-
- // Synchronous methods to process the currently queued up chunks or add-dels
- void ProcessPendingWork();
- void ProcessChunks();
- void ProcessAddDel();
- void ProcessAddChunks(std::deque<SBChunk>* chunks);
- void ProcessSubChunks(std::deque<SBChunk>* chunks);
-
- void BeginTransaction();
- void EndTransaction();
-
- // Processes an add-del command, which deletes all the prefixes that came
- // from that add chunk id.
- void AddDel(const std::string& list_name, int add_chunk_id);
- void AddDel(int list_id, int add_chunk_id);
-
- // Processes a sub-del command, which just removes the sub chunk id from
- // our list.
- void SubDel(const std::string& list_name, int sub_chunk_id);
- void SubDel(int list_id, int sub_chunk_id);
+ // Helpers for building the bloom filter.
+ typedef struct {
+ int chunk_id;
+ SBPrefix prefix;
+ } SBPair;
+
+ static int PairCompare(const void* arg1, const void* arg2);
+
+ bool BuildAddList(SBPair* adds);
+ bool RemoveSubs(SBPair* adds, std::vector<bool>* adds_removed);
+ bool UpdateTables();
+ bool WritePrefixes(SBPair* adds, const std::vector<bool>& adds_removed,
+ int* new_add_count, BloomFilter** filter);
// Looks up any cached full hashes we may have.
void GetCachedFullHashes(const std::vector<SBPrefix>* prefix_hits,
@@ -170,7 +160,6 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
void AddPrefix(SBPrefix prefix, int encoded_chunk);
void AddSub(int chunk, SBPrefix host, SBEntry* entry);
void AddSubPrefix(SBPrefix prefix, int encoded_chunk, int encoded_add_chunk);
- void ProcessPendingSubs();
int GetAddPrefixCount();
void AddFullPrefix(SBPrefix prefix,
int encoded_chunk,
@@ -182,7 +171,10 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
// Reads and writes chunk numbers to and from persistent store.
void ReadChunkNumbers();
- void WriteChunkNumbers();
+ bool WriteChunkNumbers();
+
+ // Flush in memory temporary caches.
+ void ClearUpdateCaches();
// Encode the list id in the lower bit of the chunk.
static inline int EncodeChunkId(int chunk, int list_id) {
@@ -205,25 +197,11 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
// Cache of compiled statements for our database.
scoped_ptr<SqliteStatementCache> statement_cache_;
- int transaction_count_;
- scoped_ptr<SQLTransaction> transaction_;
-
// True iff the database has been opened successfully.
bool init_;
std::wstring filename_;
- // Used to store throttled work for commands that write to the database.
- std::queue<std::deque<SBChunk>*> pending_chunks_;
-
- struct AddDelWork {
- int list_id;
- int add_chunk_id;
- std::vector<std::string> hostkeys;
- };
-
- std::queue<AddDelWork> pending_add_del_;
-
// Called after an add/sub chunk is processed.
Callback0::Type* chunk_inserted_callback_;
@@ -252,6 +230,10 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
std::set<int> add_chunk_cache_;
std::set<int> sub_chunk_cache_;
+ // Caches for the AddDel and SubDel commands.
+ base::hash_set<int> add_del_cache_;
+ base::hash_set<int> sub_del_cache_;
+
// The number of entries in the add_prefix table. Used to pick the correct
// size for the bloom filter.
int add_count_;