diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-12-01 21:09:54 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-12-01 21:09:54 +0000 |
commit | 961354ef6fe1260d978389cdcc107ad468908ba6 (patch) | |
tree | 1804c273a765e62b3ff225969f58d11b8b4235bb /chrome/browser/safe_browsing/safe_browsing_database_bloom.cc | |
parent | 235f0e34406ee87a0daaeeff2ff420e39ec03753 (diff) | |
download | chromium_src-961354ef6fe1260d978389cdcc107ad468908ba6.zip chromium_src-961354ef6fe1260d978389cdcc107ad468908ba6.tar.gz chromium_src-961354ef6fe1260d978389cdcc107ad468908ba6.tar.bz2 |
Clean up Safe Browsing code a little:
* Update copyrights.
* Remove unnecessary headers.
* Remove useless functions.
* Make declaration and definition order match (in at least the _database_bloom.* files).
* Eliminate database_perftest.cc, since it seems to have been disabled entirely since before the public launch, and looks like it's perhaps irrelevant to the current code design.
BUG=none
TEST=none
Review URL: http://codereview.chromium.org/457019
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@33479 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing/safe_browsing_database_bloom.cc')
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database_bloom.cc | 1425 |
1 files changed, 703 insertions, 722 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc index 51946eb..6a40167 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc @@ -5,17 +5,12 @@ #include "chrome/browser/safe_browsing/safe_browsing_database_bloom.h" #include "base/auto_reset.h" -#include "base/compiler_specific.h" #include "base/file_util.h" -#include "base/logging.h" #include "base/message_loop.h" -#include "base/platform_thread.h" #include "base/process_util.h" #include "base/sha2.h" #include "base/stats_counters.h" -#include "base/string_util.h" #include "chrome/browser/safe_browsing/bloom_filter.h" -#include "chrome/browser/safe_browsing/chunk_range.h" #include "chrome/common/sqlite_compiled_statement.h" #include "chrome/common/sqlite_utils.h" #include "googleurl/src/gurl.h" @@ -64,130 +59,6 @@ void SafeBrowsingDatabaseBloom::Init(const FilePath& filename, chunk_inserted_callback_.reset(chunk_inserted_callback); } -bool SafeBrowsingDatabaseBloom::Open() { - if (db_) - return true; - - if (OpenSqliteDb(filename_, &db_) != SQLITE_OK) { - sqlite3_close(db_); - db_ = NULL; - return false; - } - - // Run the database in exclusive mode. Nobody else should be accessing the - // database while we're running, and this will give somewhat improved perf. - sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL); - - statement_cache_.reset(new SqliteStatementCache(db_)); - - if (!DoesSqliteTableExist(db_, "add_prefix")) { - if (!CreateTables()) { - // Database could be corrupt, try starting from scratch. - if (!ResetDatabase()) - return false; - } - } else if (!CheckCompatibleVersion()) { - if (!ResetDatabase()) - return false; - } - - return true; -} - -bool SafeBrowsingDatabaseBloom::Close() { - if (!db_) - return true; - - insert_transaction_.reset(); - statement_cache_.reset(); // Must free statements before closing DB. - bool result = sqlite3_close(db_) == SQLITE_OK; - db_ = NULL; - - return result; -} - -bool SafeBrowsingDatabaseBloom::CreateTables() { - SQLTransaction transaction(db_); - transaction.Begin(); - - // Store 32 bit add prefixes here. - if (sqlite3_exec(db_, "CREATE TABLE add_prefix (" - "chunk INTEGER," - "prefix INTEGER)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store 32 bit sub prefixes here. - if (sqlite3_exec(db_, "CREATE TABLE sub_prefix (" - "chunk INTEGER," - "add_chunk INTEGER," - "prefix INTEGER)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store 256 bit add full hashes (and GetHash results) here. - if (sqlite3_exec(db_, "CREATE TABLE add_full_hash (" - "chunk INTEGER," - "prefix INTEGER," - "receive_time INTEGER," - "full_hash BLOB)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store 256 bit sub full hashes here. - if (sqlite3_exec(db_, "CREATE TABLE sub_full_hash (" - "chunk INTEGER," - "add_chunk INTEGER," - "prefix INTEGER," - "full_hash BLOB)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store all the add and sub chunk numbers we receive. We cannot just rely on - // the prefix tables to generate these lists, since some chunks will have zero - // entries (and thus no prefixes), or potentially an add chunk can have all of - // its entries sub'd without receiving an AddDel, or a sub chunk might have - // been entirely consumed by adds. In these cases, we still have to report the - // chunk number but it will not have any prefixes in the prefix tables. - // - // TODO(paulg): Investigate storing the chunks as a string of ChunkRanges, one - // string for each of phish-add, phish-sub, malware-add, malware-sub. This - // might be better performance when the number of chunks is large, and is the - // natural format for the update request. - if (sqlite3_exec(db_, "CREATE TABLE add_chunks (" - "chunk INTEGER PRIMARY KEY)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - if (sqlite3_exec(db_, "CREATE TABLE sub_chunks (" - "chunk INTEGER PRIMARY KEY)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - std::string version = "PRAGMA user_version="; - version += StringPrintf("%d", kDatabaseVersion); - - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return false; - } - - if (statement->step() != SQLITE_DONE) - return false; - - transaction.Commit(); - add_count_ = 0; - - return true; -} - bool SafeBrowsingDatabaseBloom::ResetDatabase() { // Open() can call us when trying to handle potential database corruption. // Because we call Open() at the bottom of the function, we need to guard @@ -218,30 +89,6 @@ bool SafeBrowsingDatabaseBloom::ResetDatabase() { return Open(); } -bool SafeBrowsingDatabaseBloom::CheckCompatibleVersion() { - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "PRAGMA user_version"); - if (!statement.is_valid()) { - NOTREACHED(); - return false; - } - - int result = statement->step(); - if (result != SQLITE_ROW) - return false; - - return statement->column_int(0) == kDatabaseVersion; -} - -void SafeBrowsingDatabaseBloom::ClearUpdateCaches() { - AutoLock lock(lookup_lock_); - add_del_cache_.clear(); - sub_del_cache_.clear(); - add_chunk_cache_.clear(); - sub_chunk_cache_.clear(); - prefix_miss_cache_.clear(); -} - bool SafeBrowsingDatabaseBloom::ContainsUrl( const GURL& url, std::string* matching_list, @@ -308,15 +155,6 @@ bool SafeBrowsingDatabaseBloom::ContainsUrl( return false; } -bool SafeBrowsingDatabaseBloom::NeedToCheckUrl(const GURL& url) { - // Since everything is in the bloom filter, doing anything here would wind - // up just duplicating work that would happen in ContainsURL. - // It's possible that we may want to add a hostkey-based first-level cache - // on the front of this to minimize hash generation, but we'll need to do - // some measurements to verify that. - return true; -} - void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name, std::deque<SBChunk>* chunks) { if (chunks->empty()) @@ -374,6 +212,79 @@ void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name, chunk_inserted_callback_->Run(); } +void SafeBrowsingDatabaseBloom::DeleteChunks( + std::vector<SBChunkDelete>* chunk_deletes) { + if (chunk_deletes->empty()) + return; + + int list_id = safe_browsing_util::GetListId(chunk_deletes->front().list_name); + + for (size_t i = 0; i < chunk_deletes->size(); ++i) { + const SBChunkDelete& chunk = (*chunk_deletes)[i]; + std::vector<int> chunk_numbers; + RangesToChunks(chunk.chunk_del, &chunk_numbers); + for (size_t del = 0; del < chunk_numbers.size(); ++del) { + int encoded_chunk = EncodeChunkId(chunk_numbers[del], list_id); + if (chunk.is_sub_del) + sub_del_cache_.insert(encoded_chunk); + else + add_del_cache_.insert(encoded_chunk); + } + } + + delete chunk_deletes; +} + +void SafeBrowsingDatabaseBloom::GetListsInfo( + std::vector<SBListChunkRanges>* lists) { + DCHECK(lists); + lists->clear(); + + ReadChunkNumbers(); + + lists->push_back(SBListChunkRanges(safe_browsing_util::kMalwareList)); + GetChunkIds(safe_browsing_util::MALWARE, ADD_CHUNK, &lists->back().adds); + GetChunkIds(safe_browsing_util::MALWARE, SUB_CHUNK, &lists->back().subs); + + lists->push_back(SBListChunkRanges(safe_browsing_util::kPhishingList)); + GetChunkIds(safe_browsing_util::PHISH, ADD_CHUNK, &lists->back().adds); + GetChunkIds(safe_browsing_util::PHISH, SUB_CHUNK, &lists->back().subs); + + return; +} + +void SafeBrowsingDatabaseBloom::CacheHashResults( + const std::vector<SBPrefix>& prefixes, + const std::vector<SBFullHashResult>& full_hits) { + AutoLock lock(lookup_lock_); + + if (full_hits.empty()) { + // These prefixes returned no results, so we store them in order to prevent + // asking for them again. We flush this cache at the next update. + for (std::vector<SBPrefix>::const_iterator it = prefixes.begin(); + it != prefixes.end(); ++it) { + prefix_miss_cache_.insert(*it); + } + return; + } + + const Time now = Time::Now(); + for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin(); + it != full_hits.end(); ++it) { + SBPrefix prefix = it->hash.prefix; + HashList& entries = (*hash_cache_)[prefix]; + HashCacheEntry entry; + entry.received = now; + entry.list_id = safe_browsing_util::GetListId(it->list_name); + entry.add_chunk_id = EncodeChunkId(it->add_chunk_id, entry.list_id); + entry.full_hash = it->hash; + entries.push_back(entry); + + // Also push a copy to the pending write queue. + pending_full_hashes_.push_back(entry); + } +} + bool SafeBrowsingDatabaseBloom::UpdateStarted() { DCHECK(insert_transaction_.get() == NULL); @@ -401,218 +312,143 @@ void SafeBrowsingDatabaseBloom::UpdateFinished(bool update_succeeded) { ClearUpdateCaches(); } -void SafeBrowsingDatabaseBloom::InsertAdd(SBPrefix host, SBEntry* entry) { - STATS_COUNTER("SB.HostInsert", 1); - int encoded = EncodeChunkId(entry->chunk_id(), entry->list_id()); +bool SafeBrowsingDatabaseBloom::Open() { + if (db_) + return true; - if (entry->type() == SBEntry::ADD_FULL_HASH) { - base::Time receive_time = base::Time::Now(); - for (int i = 0; i < entry->prefix_count(); ++i) { - SBFullHash full_hash = entry->FullHashAt(i); - SBPrefix prefix = full_hash.prefix; - InsertAddPrefix(prefix, encoded); - InsertAddFullHash(prefix, encoded, receive_time, full_hash); - } - return; + if (OpenSqliteDb(filename_, &db_) != SQLITE_OK) { + sqlite3_close(db_); + db_ = NULL; + return false; } - // This entry contains only regular (32 bit) prefixes. - int count = entry->prefix_count(); - if (count == 0) { - InsertAddPrefix(host, encoded); - } else { - for (int i = 0; i < count; i++) { - SBPrefix prefix = entry->PrefixAt(i); - InsertAddPrefix(prefix, encoded); + // Run the database in exclusive mode. Nobody else should be accessing the + // database while we're running, and this will give somewhat improved perf. + sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL); + + statement_cache_.reset(new SqliteStatementCache(db_)); + + if (!DoesSqliteTableExist(db_, "add_prefix")) { + if (!CreateTables()) { + // Database could be corrupt, try starting from scratch. + if (!ResetDatabase()) + return false; } + } else if (!CheckCompatibleVersion()) { + if (!ResetDatabase()) + return false; } -} -void SafeBrowsingDatabaseBloom::InsertAddPrefix(SBPrefix prefix, - int encoded_chunk) { - STATS_COUNTER("SB.PrefixAdd", 1); - std::string sql = "INSERT INTO add_prefix (chunk, prefix) VALUES (?, ?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, prefix); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } - add_count_++; + return true; } -void SafeBrowsingDatabaseBloom::InsertAddFullHash(SBPrefix prefix, - int encoded_chunk, - base::Time receive_time, - SBFullHash full_prefix) { - STATS_COUNTER("SB.PrefixAddFull", 1); - std::string sql = "INSERT INTO add_full_hash " - "(chunk, prefix, receive_time, full_hash) " - "VALUES (?,?,?,?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } +bool SafeBrowsingDatabaseBloom::Close() { + if (!db_) + return true; - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, prefix); - statement->bind_int64(2, receive_time.ToTimeT()); - statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash)); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } + insert_transaction_.reset(); + statement_cache_.reset(); // Must free statements before closing DB. + bool result = sqlite3_close(db_) == SQLITE_OK; + db_ = NULL; + + return result; } -void SafeBrowsingDatabaseBloom::InsertSub( - int chunk_id, SBPrefix host, SBEntry* entry) { - STATS_COUNTER("SB.HostDelete", 1); - int encoded = EncodeChunkId(chunk_id, entry->list_id()); - int encoded_add; +bool SafeBrowsingDatabaseBloom::CreateTables() { + SQLTransaction transaction(db_); + transaction.Begin(); - if (entry->type() == SBEntry::SUB_FULL_HASH) { - for (int i = 0; i < entry->prefix_count(); ++i) { - SBFullHash full_hash = entry->FullHashAt(i); - SBPrefix prefix = full_hash.prefix; - encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), entry->list_id()); - InsertSubPrefix(prefix, encoded, encoded_add); - InsertSubFullHash(prefix, encoded, encoded_add, full_hash, false); - } - } else { - // We have prefixes. - int count = entry->prefix_count(); - if (count == 0) { - encoded_add = EncodeChunkId(entry->chunk_id(), entry->list_id()); - InsertSubPrefix(host, encoded, encoded_add); - } else { - for (int i = 0; i < count; i++) { - SBPrefix prefix = entry->PrefixAt(i); - encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), - entry->list_id()); - InsertSubPrefix(prefix, encoded, encoded_add); - } - } + // Store 32 bit add prefixes here. + if (sqlite3_exec(db_, "CREATE TABLE add_prefix (" + "chunk INTEGER," + "prefix INTEGER)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } -} -void SafeBrowsingDatabaseBloom::InsertSubPrefix(SBPrefix prefix, - int encoded_chunk, - int encoded_add_chunk) { - STATS_COUNTER("SB.PrefixSub", 1); - std::string sql = - "INSERT INTO sub_prefix (chunk, add_chunk, prefix) VALUES (?,?,?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; + // Store 32 bit sub prefixes here. + if (sqlite3_exec(db_, "CREATE TABLE sub_prefix (" + "chunk INTEGER," + "add_chunk INTEGER," + "prefix INTEGER)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, encoded_add_chunk); - statement->bind_int(2, prefix); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); + + // Store 256 bit add full hashes (and GetHash results) here. + if (sqlite3_exec(db_, "CREATE TABLE add_full_hash (" + "chunk INTEGER," + "prefix INTEGER," + "receive_time INTEGER," + "full_hash BLOB)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } -} -void SafeBrowsingDatabaseBloom::InsertSubFullHash(SBPrefix prefix, - int encoded_chunk, - int encoded_add_chunk, - SBFullHash full_prefix, - bool use_temp_table) { - STATS_COUNTER("SB.PrefixSubFull", 1); - std::string sql = "INSERT INTO "; - if (use_temp_table) { - sql += "sub_full_tmp"; - } else { - sql += "sub_full_hash"; + // Store 256 bit sub full hashes here. + if (sqlite3_exec(db_, "CREATE TABLE sub_full_hash (" + "chunk INTEGER," + "add_chunk INTEGER," + "prefix INTEGER," + "full_hash BLOB)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } - sql += " (chunk, add_chunk, prefix, full_hash) VALUES (?,?,?,?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; + // Store all the add and sub chunk numbers we receive. We cannot just rely on + // the prefix tables to generate these lists, since some chunks will have zero + // entries (and thus no prefixes), or potentially an add chunk can have all of + // its entries sub'd without receiving an AddDel, or a sub chunk might have + // been entirely consumed by adds. In these cases, we still have to report the + // chunk number but it will not have any prefixes in the prefix tables. + // + // TODO(paulg): Investigate storing the chunks as a string of ChunkRanges, one + // string for each of phish-add, phish-sub, malware-add, malware-sub. This + // might be better performance when the number of chunks is large, and is the + // natural format for the update request. + if (sqlite3_exec(db_, "CREATE TABLE add_chunks (" + "chunk INTEGER PRIMARY KEY)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, encoded_add_chunk); - statement->bind_int(2, prefix); - statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash)); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); + + if (sqlite3_exec(db_, "CREATE TABLE sub_chunks (" + "chunk INTEGER PRIMARY KEY)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } -} -void SafeBrowsingDatabaseBloom::ReadFullHash(SqliteCompiledStatement& statement, - int column, - SBFullHash* full_hash) { - DCHECK(full_hash); - std::vector<unsigned char> blob; - statement->column_blob_as_vector(column, &blob); - DCHECK(blob.size() == sizeof(SBFullHash)); - memcpy(full_hash->full_hash, &blob[0], sizeof(SBFullHash)); -} + std::string version = "PRAGMA user_version="; + version += StringPrintf("%d", kDatabaseVersion); -// TODO(paulg): Look for a less expensive way to maintain add_count_? If we move -// to a native file format, we can just cache the count in the file and not have -// to scan at all. -int SafeBrowsingDatabaseBloom::GetAddPrefixCount() { - SQLITE_UNIQUE_STATEMENT(count, *statement_cache_, - "SELECT count(*) FROM add_prefix"); - if (!count.is_valid()) { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str()); + if (!statement.is_valid()) { NOTREACHED(); - return 0; + return false; } - int rv = count->step(); - int add_count = 0; - if (rv == SQLITE_ROW) - add_count = count->column_int(0); - else if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - return add_count; -} + if (statement->step() != SQLITE_DONE) + return false; -void SafeBrowsingDatabaseBloom::DeleteChunks( - std::vector<SBChunkDelete>* chunk_deletes) { - if (chunk_deletes->empty()) - return; + transaction.Commit(); + add_count_ = 0; - int list_id = safe_browsing_util::GetListId(chunk_deletes->front().list_name); + return true; +} - for (size_t i = 0; i < chunk_deletes->size(); ++i) { - const SBChunkDelete& chunk = (*chunk_deletes)[i]; - std::vector<int> chunk_numbers; - RangesToChunks(chunk.chunk_del, &chunk_numbers); - for (size_t del = 0; del < chunk_numbers.size(); ++del) { - int encoded_chunk = EncodeChunkId(chunk_numbers[del], list_id); - if (chunk.is_sub_del) - sub_del_cache_.insert(encoded_chunk); - else - add_del_cache_.insert(encoded_chunk); - } +bool SafeBrowsingDatabaseBloom::CheckCompatibleVersion() { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "PRAGMA user_version"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; } - delete chunk_deletes; + int result = statement->step(); + if (result != SQLITE_ROW) + return false; + + return statement->column_int(0) == kDatabaseVersion; } bool SafeBrowsingDatabaseBloom::ChunkExists(int list_id, @@ -655,140 +491,126 @@ void SafeBrowsingDatabaseBloom::GetChunkIds( RangesToString(ranges, list); } -void SafeBrowsingDatabaseBloom::GetListsInfo( - std::vector<SBListChunkRanges>* lists) { - DCHECK(lists); - lists->clear(); +void SafeBrowsingDatabaseBloom::BuildBloomFilter() { +#if defined(OS_WIN) + // For measuring the amount of IO during the bloom filter build. + IoCounters io_before, io_after; + base::ProcessHandle handle = base::Process::Current().handle(); + scoped_ptr<base::ProcessMetrics> metric; + metric.reset(base::ProcessMetrics::CreateProcessMetrics(handle)); + metric->GetIOCounters(&io_before); +#endif - ReadChunkNumbers(); + Time before = Time::Now(); - lists->push_back(SBListChunkRanges(safe_browsing_util::kMalwareList)); - GetChunkIds(safe_browsing_util::MALWARE, ADD_CHUNK, &lists->back().adds); - GetChunkIds(safe_browsing_util::MALWARE, SUB_CHUNK, &lists->back().subs); + // Get all the pending GetHash results and write them to disk. + HashList pending_hashes; + { + AutoLock lock(lookup_lock_); + pending_hashes.swap(pending_full_hashes_); + } + WriteFullHashList(pending_hashes, true); - lists->push_back(SBListChunkRanges(safe_browsing_util::kPhishingList)); - GetChunkIds(safe_browsing_util::PHISH, ADD_CHUNK, &lists->back().adds); - GetChunkIds(safe_browsing_util::PHISH, SUB_CHUNK, &lists->back().subs); + add_count_ = GetAddPrefixCount(); + if (add_count_ == 0) { + AutoLock lock(lookup_lock_); + bloom_filter_ = NULL; + return; + } - return; -} + scoped_array<SBPair> adds_array(new SBPair[add_count_]); + SBPair* adds = adds_array.get(); -void SafeBrowsingDatabaseBloom::ReadChunkNumbers() { - add_chunk_cache_.clear(); - sub_chunk_cache_.clear(); + if (!BuildAddPrefixList(adds)) + return; - // Read in the add chunk numbers. - SQLITE_UNIQUE_STATEMENT(read_adds, *statement_cache_, - "SELECT chunk FROM add_chunks"); - if (!read_adds.is_valid()) { - NOTREACHED(); + // Build the full add cache, which includes full hash updates and GetHash + // results. Subs may remove some of these entries. + scoped_ptr<HashCache> add_cache(new HashCache); + if (!BuildAddFullHashCache(add_cache.get())) return; - } - while (true) { - int rv = read_adds->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - break; - } - add_chunk_cache_.insert(read_adds->column_int(0)); - } + scoped_ptr<HashCache> sub_cache(new HashCache); + if (!BuildSubFullHashCache(sub_cache.get())) + return; - // Read in the sub chunk numbers. - SQLITE_UNIQUE_STATEMENT(read_subs, *statement_cache_, - "SELECT chunk FROM sub_chunks"); - if (!read_subs.is_valid()) { - NOTREACHED(); + // Used to track which adds have been subbed out. The vector<bool> is actually + // a bitvector so the size is as small as we can get. + std::vector<bool> adds_removed; + adds_removed.resize(add_count_, false); + + // Flag any add as removed if there is a matching sub. + int subs = 0; + if (!RemoveSubs(adds, &adds_removed, add_cache.get(), sub_cache.get(), &subs)) return; - } - while (true) { - int rv = read_subs->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - break; - } - sub_chunk_cache_.insert(read_subs->column_int(0)); - } -} + // Prepare the database for writing out our remaining add and sub prefixes. + if (!UpdateTables()) + return; -// Write all the chunk numbers to the add_chunks and sub_chunks tables. -bool SafeBrowsingDatabaseBloom::WriteChunkNumbers() { - // Delete the contents of the add chunk table. - SQLITE_UNIQUE_STATEMENT(del_add_chunk, *statement_cache_, - "DELETE FROM add_chunks"); - if (!del_add_chunk.is_valid()) { - NOTREACHED(); - return false; - } - int rv = del_add_chunk->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); + // Write out the remaining add prefixes to the filter and database. + int new_count; + scoped_refptr<BloomFilter> filter; + if (!WritePrefixes(adds, adds_removed, &new_count, &filter)) + return; - SQLITE_UNIQUE_STATEMENT(write_adds, *statement_cache_, - "INSERT INTO add_chunks (chunk) VALUES (?)"); - if (!write_adds.is_valid()) { - NOTREACHED(); - return false; - } + // Write out the remaining full hash adds and subs to the database. + WriteFullHashes(add_cache.get(), true); + WriteFullHashes(sub_cache.get(), false); - // Write all the add chunks from the cache to the database. - std::set<int>::const_iterator it = add_chunk_cache_.begin(); - for (; it != add_chunk_cache_.end(); ++it) { - if (add_del_cache_.find(*it) != add_del_cache_.end()) - continue; // This chunk has been deleted. - write_adds->bind_int(0, *it); - rv = write_adds->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - write_adds->reset(); - } + // Save the chunk numbers we've received to the database for reporting in + // future update requests. + if (!WriteChunkNumbers()) + return; - // Delete the contents of the sub chunk table. - SQLITE_UNIQUE_STATEMENT(del_sub_chunk, *statement_cache_, - "DELETE FROM sub_chunks"); - if (!del_sub_chunk.is_valid()) { - NOTREACHED(); - return false; - } - rv = del_sub_chunk->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; + // Commit all the changes to the database. + int rv = insert_transaction_->Commit(); + if (rv != SQLITE_OK) { + NOTREACHED() << "SafeBrowsing update transaction failed to commit."; + UMA_HISTOGRAM_COUNTS("SB2.FailedUpdate", 1); + return; } - DCHECK(rv == SQLITE_DONE); - SQLITE_UNIQUE_STATEMENT(write_subs, *statement_cache_, - "INSERT INTO sub_chunks (chunk) VALUES (?)"); - if (!write_subs.is_valid()) { - NOTREACHED(); - return false; + // Swap in the newly built filter and cache. If there were any matching subs, + // the size (add_count_) will be smaller. + { + AutoLock lock(lookup_lock_); + add_count_ = new_count; + bloom_filter_.swap(filter); + hash_cache_.swap(add_cache); } - // Write all the sub chunks from the cache to the database. - it = sub_chunk_cache_.begin(); - for (; it != sub_chunk_cache_.end(); ++it) { - if (sub_del_cache_.find(*it) != sub_del_cache_.end()) - continue; // This chunk has been deleted. - write_subs->bind_int(0, *it); - rv = write_subs->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - write_subs->reset(); - } + TimeDelta bloom_gen = Time::Now() - before; - return true; + // Persist the bloom filter to disk. + WriteBloomFilter(); + + // Gather statistics. +#if defined(OS_WIN) + metric->GetIOCounters(&io_after); + UMA_HISTOGRAM_COUNTS("SB2.BuildReadBytes", + static_cast<int>(io_after.ReadTransferCount - + io_before.ReadTransferCount)); + UMA_HISTOGRAM_COUNTS("SB2.BuildWriteBytes", + static_cast<int>(io_after.WriteTransferCount - + io_before.WriteTransferCount)); + UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", + static_cast<int>(io_after.ReadOperationCount - + io_before.ReadOperationCount)); + UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", + static_cast<int>(io_after.WriteOperationCount - + io_before.WriteOperationCount)); +#endif + SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in " + << bloom_gen.InMilliseconds() + << " ms total. prefix count: "<< add_count_; + UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen); + UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_count_); + UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", subs); + UMA_HISTOGRAM_COUNTS("SB2.FilterSize", bloom_filter_->size()); + int64 size_64; + if (file_util::GetFileSize(filename_, &size_64)) + UMA_HISTOGRAM_COUNTS("SB2.DatabaseBytes", static_cast<int>(size_64)); } int SafeBrowsingDatabaseBloom::PairCompare(const void* arg1, const void* arg2) { @@ -831,6 +653,114 @@ bool SafeBrowsingDatabaseBloom::BuildAddPrefixList(SBPair* adds) { return true; } +bool SafeBrowsingDatabaseBloom::BuildAddFullHashCache(HashCache* add_cache) { + add_cache->clear(); + + // Read all full add entries to the cache. + SQLITE_UNIQUE_STATEMENT( + full_add_entry, + *statement_cache_, + "SELECT chunk, prefix, receive_time, full_hash FROM add_full_hash"); + if (!full_add_entry.is_valid()) { + NOTREACHED(); + return false; + } + + int rv; + while (true) { + rv = full_add_entry->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + break; + } + HashCacheEntry entry; + entry.add_chunk_id = full_add_entry->column_int(0); + if (add_del_cache_.find(entry.add_chunk_id) != add_del_cache_.end()) + continue; // This entry's chunk was deleted so we skip it. + SBPrefix prefix = full_add_entry->column_int(1); + entry.received = base::Time::FromTimeT(full_add_entry->column_int64(2)); + int chunk, list_id; + DecodeChunkId(entry.add_chunk_id, &chunk, &list_id); + entry.list_id = list_id; + ReadFullHash(&full_add_entry, 3, &entry.full_hash); + HashList& entries = (*add_cache)[prefix]; + entries.push_back(entry); + } + + // Clear the full add table. + SQLITE_UNIQUE_STATEMENT(full_add_drop, *statement_cache_, + "DELETE FROM add_full_hash"); + if (!full_add_drop.is_valid()) { + NOTREACHED(); + return false; + } + rv = full_add_drop->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + + return true; +} + +bool SafeBrowsingDatabaseBloom::BuildSubFullHashCache(HashCache* sub_cache) { + sub_cache->clear(); + + // Read all full sub entries to the cache. + SQLITE_UNIQUE_STATEMENT( + full_sub_entry, + *statement_cache_, + "SELECT chunk, add_chunk, prefix, full_hash FROM sub_full_hash"); + if (!full_sub_entry.is_valid()) { + NOTREACHED(); + return false; + } + + int rv; + while (true) { + rv = full_sub_entry->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + break; + } + HashCacheEntry entry; + entry.sub_chunk_id = full_sub_entry->column_int(0); + if (sub_del_cache_.find(entry.sub_chunk_id) != sub_del_cache_.end()) + continue; // This entry's chunk was deleted so we skip it. + entry.add_chunk_id = full_sub_entry->column_int(1); + SBPrefix prefix = full_sub_entry->column_int(2); + int chunk, list_id; + DecodeChunkId(entry.add_chunk_id, &chunk, &list_id); + entry.list_id = list_id; + ReadFullHash(&full_sub_entry, 3, &entry.full_hash); + HashList& entries = (*sub_cache)[prefix]; + entries.push_back(entry); + } + + // Clear the full sub table. + SQLITE_UNIQUE_STATEMENT(full_sub_drop, *statement_cache_, + "DELETE FROM sub_full_hash"); + if (!full_sub_drop.is_valid()) { + NOTREACHED(); + return false; + } + rv = full_sub_drop->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + + return true; +} + bool SafeBrowsingDatabaseBloom::RemoveSubs( SBPair* adds, std::vector<bool>* adds_removed, HashCache* add_cache, HashCache* sub_cache, int* subs) { @@ -1094,236 +1024,6 @@ void SafeBrowsingDatabaseBloom::WriteFullHashList(const HashList& hash_list, } } -bool SafeBrowsingDatabaseBloom::BuildAddFullHashCache(HashCache* add_cache) { - add_cache->clear(); - - // Read all full add entries to the cache. - SQLITE_UNIQUE_STATEMENT( - full_add_entry, - *statement_cache_, - "SELECT chunk, prefix, receive_time, full_hash FROM add_full_hash"); - if (!full_add_entry.is_valid()) { - NOTREACHED(); - return false; - } - - int rv; - while (true) { - rv = full_add_entry->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - break; - } - HashCacheEntry entry; - entry.add_chunk_id = full_add_entry->column_int(0); - if (add_del_cache_.find(entry.add_chunk_id) != add_del_cache_.end()) - continue; // This entry's chunk was deleted so we skip it. - SBPrefix prefix = full_add_entry->column_int(1); - entry.received = base::Time::FromTimeT(full_add_entry->column_int64(2)); - int chunk, list_id; - DecodeChunkId(entry.add_chunk_id, &chunk, &list_id); - entry.list_id = list_id; - ReadFullHash(full_add_entry, 3, &entry.full_hash); - HashList& entries = (*add_cache)[prefix]; - entries.push_back(entry); - } - - // Clear the full add table. - SQLITE_UNIQUE_STATEMENT(full_add_drop, *statement_cache_, - "DELETE FROM add_full_hash"); - if (!full_add_drop.is_valid()) { - NOTREACHED(); - return false; - } - rv = full_add_drop->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - return true; -} - -bool SafeBrowsingDatabaseBloom::BuildSubFullHashCache(HashCache* sub_cache) { - sub_cache->clear(); - - // Read all full sub entries to the cache. - SQLITE_UNIQUE_STATEMENT( - full_sub_entry, - *statement_cache_, - "SELECT chunk, add_chunk, prefix, full_hash FROM sub_full_hash"); - if (!full_sub_entry.is_valid()) { - NOTREACHED(); - return false; - } - - int rv; - while (true) { - rv = full_sub_entry->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - break; - } - HashCacheEntry entry; - entry.sub_chunk_id = full_sub_entry->column_int(0); - if (sub_del_cache_.find(entry.sub_chunk_id) != sub_del_cache_.end()) - continue; // This entry's chunk was deleted so we skip it. - entry.add_chunk_id = full_sub_entry->column_int(1); - SBPrefix prefix = full_sub_entry->column_int(2); - int chunk, list_id; - DecodeChunkId(entry.add_chunk_id, &chunk, &list_id); - entry.list_id = list_id; - ReadFullHash(full_sub_entry, 3, &entry.full_hash); - HashList& entries = (*sub_cache)[prefix]; - entries.push_back(entry); - } - - // Clear the full sub table. - SQLITE_UNIQUE_STATEMENT(full_sub_drop, *statement_cache_, - "DELETE FROM sub_full_hash"); - if (!full_sub_drop.is_valid()) { - NOTREACHED(); - return false; - } - rv = full_sub_drop->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - return true; -} - -void SafeBrowsingDatabaseBloom::BuildBloomFilter() { -#if defined(OS_WIN) - // For measuring the amount of IO during the bloom filter build. - IoCounters io_before, io_after; - base::ProcessHandle handle = base::Process::Current().handle(); - scoped_ptr<base::ProcessMetrics> metric; - metric.reset(base::ProcessMetrics::CreateProcessMetrics(handle)); - metric->GetIOCounters(&io_before); -#endif - - Time before = Time::Now(); - - // Get all the pending GetHash results and write them to disk. - HashList pending_hashes; - { - AutoLock lock(lookup_lock_); - pending_hashes.swap(pending_full_hashes_); - } - WriteFullHashList(pending_hashes, true); - - add_count_ = GetAddPrefixCount(); - if (add_count_ == 0) { - AutoLock lock(lookup_lock_); - bloom_filter_ = NULL; - return; - } - - scoped_array<SBPair> adds_array(new SBPair[add_count_]); - SBPair* adds = adds_array.get(); - - if (!BuildAddPrefixList(adds)) - return; - - // Build the full add cache, which includes full hash updates and GetHash - // results. Subs may remove some of these entries. - scoped_ptr<HashCache> add_cache(new HashCache); - if (!BuildAddFullHashCache(add_cache.get())) - return; - - scoped_ptr<HashCache> sub_cache(new HashCache); - if (!BuildSubFullHashCache(sub_cache.get())) - return; - - // Used to track which adds have been subbed out. The vector<bool> is actually - // a bitvector so the size is as small as we can get. - std::vector<bool> adds_removed; - adds_removed.resize(add_count_, false); - - // Flag any add as removed if there is a matching sub. - int subs = 0; - if (!RemoveSubs(adds, &adds_removed, add_cache.get(), sub_cache.get(), &subs)) - return; - - // Prepare the database for writing out our remaining add and sub prefixes. - if (!UpdateTables()) - return; - - // Write out the remaining add prefixes to the filter and database. - int new_count; - scoped_refptr<BloomFilter> filter; - if (!WritePrefixes(adds, adds_removed, &new_count, &filter)) - return; - - // Write out the remaining full hash adds and subs to the database. - WriteFullHashes(add_cache.get(), true); - WriteFullHashes(sub_cache.get(), false); - - // Save the chunk numbers we've received to the database for reporting in - // future update requests. - if (!WriteChunkNumbers()) - return; - - // Commit all the changes to the database. - int rv = insert_transaction_->Commit(); - if (rv != SQLITE_OK) { - NOTREACHED() << "SafeBrowsing update transaction failed to commit."; - UMA_HISTOGRAM_COUNTS("SB2.FailedUpdate", 1); - return; - } - - // Swap in the newly built filter and cache. If there were any matching subs, - // the size (add_count_) will be smaller. - { - AutoLock lock(lookup_lock_); - add_count_ = new_count; - bloom_filter_.swap(filter); - hash_cache_.swap(add_cache); - } - - TimeDelta bloom_gen = Time::Now() - before; - - // Persist the bloom filter to disk. - WriteBloomFilter(); - - // Gather statistics. -#if defined(OS_WIN) - metric->GetIOCounters(&io_after); - UMA_HISTOGRAM_COUNTS("SB2.BuildReadBytes", - static_cast<int>(io_after.ReadTransferCount - - io_before.ReadTransferCount)); - UMA_HISTOGRAM_COUNTS("SB2.BuildWriteBytes", - static_cast<int>(io_after.WriteTransferCount - - io_before.WriteTransferCount)); - UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", - static_cast<int>(io_after.ReadOperationCount - - io_before.ReadOperationCount)); - UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", - static_cast<int>(io_after.WriteOperationCount - - io_before.WriteOperationCount)); -#endif - SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in " - << bloom_gen.InMilliseconds() - << " ms total. prefix count: "<< add_count_; - UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen); - UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_count_); - UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", subs); - UMA_HISTOGRAM_COUNTS("SB2.FilterSize", bloom_filter_->size()); - int64 size_64; - if (file_util::GetFileSize(filename_, &size_64)) - UMA_HISTOGRAM_COUNTS("SB2.DatabaseBytes", static_cast<int>(size_64)); -} - void SafeBrowsingDatabaseBloom::GetCachedFullHashes( const std::vector<SBPrefix>* prefix_hits, std::vector<SBFullHashResult>* full_hits, @@ -1362,38 +1062,6 @@ void SafeBrowsingDatabaseBloom::GetCachedFullHashes( } } -void SafeBrowsingDatabaseBloom::CacheHashResults( - const std::vector<SBPrefix>& prefixes, - const std::vector<SBFullHashResult>& full_hits) { - AutoLock lock(lookup_lock_); - - if (full_hits.empty()) { - // These prefixes returned no results, so we store them in order to prevent - // asking for them again. We flush this cache at the next update. - for (std::vector<SBPrefix>::const_iterator it = prefixes.begin(); - it != prefixes.end(); ++it) { - prefix_miss_cache_.insert(*it); - } - return; - } - - const Time now = Time::Now(); - for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin(); - it != full_hits.end(); ++it) { - SBPrefix prefix = it->hash.prefix; - HashList& entries = (*hash_cache_)[prefix]; - HashCacheEntry entry; - entry.received = now; - entry.list_id = safe_browsing_util::GetListId(it->list_name); - entry.add_chunk_id = EncodeChunkId(it->add_chunk_id, entry.list_id); - entry.full_hash = it->hash; - entries.push_back(entry); - - // Also push a copy to the pending write queue. - pending_full_hashes_.push_back(entry); - } -} - bool SafeBrowsingDatabaseBloom::ClearCachedEntry(SBPrefix prefix, int add_chunk, HashCache* hash_cache) { @@ -1431,7 +1099,320 @@ void SafeBrowsingDatabaseBloom::OnHandleCorruptDatabase() { DCHECK(false) << "SafeBrowsing database was corrupt and reset"; } -// This database is always synchronous since we don't need to worry about -// blocking any incoming reads. -void SafeBrowsingDatabaseBloom::SetSynchronous() { +void SafeBrowsingDatabaseBloom::InsertAdd(SBPrefix host, SBEntry* entry) { + STATS_COUNTER("SB.HostInsert", 1); + int encoded = EncodeChunkId(entry->chunk_id(), entry->list_id()); + + if (entry->type() == SBEntry::ADD_FULL_HASH) { + base::Time receive_time = base::Time::Now(); + for (int i = 0; i < entry->prefix_count(); ++i) { + SBFullHash full_hash = entry->FullHashAt(i); + SBPrefix prefix = full_hash.prefix; + InsertAddPrefix(prefix, encoded); + InsertAddFullHash(prefix, encoded, receive_time, full_hash); + } + return; + } + + // This entry contains only regular (32 bit) prefixes. + int count = entry->prefix_count(); + if (count == 0) { + InsertAddPrefix(host, encoded); + } else { + for (int i = 0; i < count; i++) { + SBPrefix prefix = entry->PrefixAt(i); + InsertAddPrefix(prefix, encoded); + } + } +} + +void SafeBrowsingDatabaseBloom::InsertAddPrefix(SBPrefix prefix, + int encoded_chunk) { + STATS_COUNTER("SB.PrefixAdd", 1); + std::string sql = "INSERT INTO add_prefix (chunk, prefix) VALUES (?, ?)"; + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + statement->bind_int(0, encoded_chunk); + statement->bind_int(1, prefix); + int rv = statement->step(); + statement->reset(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } + add_count_++; +} + +void SafeBrowsingDatabaseBloom::InsertAddFullHash(SBPrefix prefix, + int encoded_chunk, + base::Time receive_time, + SBFullHash full_prefix) { + STATS_COUNTER("SB.PrefixAddFull", 1); + std::string sql = "INSERT INTO add_full_hash " + "(chunk, prefix, receive_time, full_hash) " + "VALUES (?,?,?,?)"; + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, encoded_chunk); + statement->bind_int(1, prefix); + statement->bind_int64(2, receive_time.ToTimeT()); + statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash)); + int rv = statement->step(); + statement->reset(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +void SafeBrowsingDatabaseBloom::InsertSub( + int chunk_id, SBPrefix host, SBEntry* entry) { + STATS_COUNTER("SB.HostDelete", 1); + int encoded = EncodeChunkId(chunk_id, entry->list_id()); + int encoded_add; + + if (entry->type() == SBEntry::SUB_FULL_HASH) { + for (int i = 0; i < entry->prefix_count(); ++i) { + SBFullHash full_hash = entry->FullHashAt(i); + SBPrefix prefix = full_hash.prefix; + encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), entry->list_id()); + InsertSubPrefix(prefix, encoded, encoded_add); + InsertSubFullHash(prefix, encoded, encoded_add, full_hash, false); + } + } else { + // We have prefixes. + int count = entry->prefix_count(); + if (count == 0) { + encoded_add = EncodeChunkId(entry->chunk_id(), entry->list_id()); + InsertSubPrefix(host, encoded, encoded_add); + } else { + for (int i = 0; i < count; i++) { + SBPrefix prefix = entry->PrefixAt(i); + encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), + entry->list_id()); + InsertSubPrefix(prefix, encoded, encoded_add); + } + } + } +} + +void SafeBrowsingDatabaseBloom::InsertSubPrefix(SBPrefix prefix, + int encoded_chunk, + int encoded_add_chunk) { + STATS_COUNTER("SB.PrefixSub", 1); + std::string sql = + "INSERT INTO sub_prefix (chunk, add_chunk, prefix) VALUES (?,?,?)"; + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + statement->bind_int(0, encoded_chunk); + statement->bind_int(1, encoded_add_chunk); + statement->bind_int(2, prefix); + int rv = statement->step(); + statement->reset(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +void SafeBrowsingDatabaseBloom::InsertSubFullHash(SBPrefix prefix, + int encoded_chunk, + int encoded_add_chunk, + SBFullHash full_prefix, + bool use_temp_table) { + STATS_COUNTER("SB.PrefixSubFull", 1); + std::string sql = "INSERT INTO "; + if (use_temp_table) { + sql += "sub_full_tmp"; + } else { + sql += "sub_full_hash"; + } + sql += " (chunk, add_chunk, prefix, full_hash) VALUES (?,?,?,?)"; + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + statement->bind_int(0, encoded_chunk); + statement->bind_int(1, encoded_add_chunk); + statement->bind_int(2, prefix); + statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash)); + int rv = statement->step(); + statement->reset(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +void SafeBrowsingDatabaseBloom::ReadFullHash(SqliteCompiledStatement* statement, + int column, + SBFullHash* full_hash) { + DCHECK(full_hash); + std::vector<unsigned char> blob; + (*statement)->column_blob_as_vector(column, &blob); + DCHECK(blob.size() == sizeof(SBFullHash)); + memcpy(full_hash->full_hash, &blob[0], sizeof(SBFullHash)); +} + +// TODO(paulg): Look for a less expensive way to maintain add_count_? If we move +// to a native file format, we can just cache the count in the file and not have +// to scan at all. +int SafeBrowsingDatabaseBloom::GetAddPrefixCount() { + SQLITE_UNIQUE_STATEMENT(count, *statement_cache_, + "SELECT count(*) FROM add_prefix"); + if (!count.is_valid()) { + NOTREACHED(); + return 0; + } + int rv = count->step(); + int add_count = 0; + if (rv == SQLITE_ROW) + add_count = count->column_int(0); + else if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + return add_count; +} + +void SafeBrowsingDatabaseBloom::ReadChunkNumbers() { + add_chunk_cache_.clear(); + sub_chunk_cache_.clear(); + + // Read in the add chunk numbers. + SQLITE_UNIQUE_STATEMENT(read_adds, *statement_cache_, + "SELECT chunk FROM add_chunks"); + if (!read_adds.is_valid()) { + NOTREACHED(); + return; + } + + while (true) { + int rv = read_adds->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + break; + } + add_chunk_cache_.insert(read_adds->column_int(0)); + } + + // Read in the sub chunk numbers. + SQLITE_UNIQUE_STATEMENT(read_subs, *statement_cache_, + "SELECT chunk FROM sub_chunks"); + if (!read_subs.is_valid()) { + NOTREACHED(); + return; + } + + while (true) { + int rv = read_subs->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + break; + } + sub_chunk_cache_.insert(read_subs->column_int(0)); + } +} + +// Write all the chunk numbers to the add_chunks and sub_chunks tables. +bool SafeBrowsingDatabaseBloom::WriteChunkNumbers() { + // Delete the contents of the add chunk table. + SQLITE_UNIQUE_STATEMENT(del_add_chunk, *statement_cache_, + "DELETE FROM add_chunks"); + if (!del_add_chunk.is_valid()) { + NOTREACHED(); + return false; + } + int rv = del_add_chunk->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + + SQLITE_UNIQUE_STATEMENT(write_adds, *statement_cache_, + "INSERT INTO add_chunks (chunk) VALUES (?)"); + if (!write_adds.is_valid()) { + NOTREACHED(); + return false; + } + + // Write all the add chunks from the cache to the database. + std::set<int>::const_iterator it = add_chunk_cache_.begin(); + for (; it != add_chunk_cache_.end(); ++it) { + if (add_del_cache_.find(*it) != add_del_cache_.end()) + continue; // This chunk has been deleted. + write_adds->bind_int(0, *it); + rv = write_adds->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + write_adds->reset(); + } + + // Delete the contents of the sub chunk table. + SQLITE_UNIQUE_STATEMENT(del_sub_chunk, *statement_cache_, + "DELETE FROM sub_chunks"); + if (!del_sub_chunk.is_valid()) { + NOTREACHED(); + return false; + } + rv = del_sub_chunk->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + + SQLITE_UNIQUE_STATEMENT(write_subs, *statement_cache_, + "INSERT INTO sub_chunks (chunk) VALUES (?)"); + if (!write_subs.is_valid()) { + NOTREACHED(); + return false; + } + + // Write all the sub chunks from the cache to the database. + it = sub_chunk_cache_.begin(); + for (; it != sub_chunk_cache_.end(); ++it) { + if (sub_del_cache_.find(*it) != sub_del_cache_.end()) + continue; // This chunk has been deleted. + write_subs->bind_int(0, *it); + rv = write_subs->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + write_subs->reset(); + } + + return true; +} + +void SafeBrowsingDatabaseBloom::ClearUpdateCaches() { + AutoLock lock(lookup_lock_); + add_del_cache_.clear(); + sub_del_cache_.clear(); + add_chunk_cache_.clear(); + sub_chunk_cache_.clear(); + prefix_miss_cache_.clear(); } |