diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-10-17 16:02:19 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-10-17 16:02:19 +0000 |
commit | 13147160ae28933269024ba973f0a1ae3d11a57d (patch) | |
tree | 3817578c54a3304d82b0df7efe8b7cd958e20206 | |
parent | 897fa12b0446cdf6beb1d3166d098fe13daadbf6 (diff) | |
download | chromium_src-13147160ae28933269024ba973f0a1ae3d11a57d.zip chromium_src-13147160ae28933269024ba973f0a1ae3d11a57d.tar.gz chromium_src-13147160ae28933269024ba973f0a1ae3d11a57d.tar.bz2 |
Delete SafeBrowsingDatabaseBloom implementation.
This is the old safe-browsing database based on direct SQLite access.
It has been replaced by SafeBrowsingDatabaseNew.
BUG=58551
TEST=none
Review URL: http://codereview.chromium.org/3845003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@62896 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database.cc | 40 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database_bloom.cc | 1459 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database_bloom.h | 248 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database_bloom_unittest.cc | 1321 | ||||
-rw-r--r-- | chrome/chrome_browser.gypi | 2 | ||||
-rw-r--r-- | chrome/chrome_tests.gypi | 1 | ||||
-rw-r--r-- | chrome/common/chrome_switches.cc | 6 | ||||
-rw-r--r-- | chrome/common/chrome_switches.h | 1 |
8 files changed, 6 insertions, 3072 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index 3301666..823362f 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -4,7 +4,6 @@ #include "chrome/browser/safe_browsing/safe_browsing_database.h" -#include "base/command_line.h" #include "base/file_util.h" #include "base/metrics/histogram.h" #include "base/metrics/stats_counters.h" @@ -13,10 +12,8 @@ #include "base/process_util.h" #include "base/sha2.h" #include "chrome/browser/safe_browsing/bloom_filter.h" -#include "chrome/browser/safe_browsing/safe_browsing_database_bloom.h" #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" #include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h" -#include "chrome/common/chrome_switches.h" #include "googleurl/src/gurl.h" namespace { @@ -146,38 +143,13 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { } // namespace // Factory method. -// TODO(shess): Proposed staging of the rolling: -// - Ship "old" to dev channel to provide a safe fallback. -// - If that proves stable, change to "newsqlite". This changes the -// code which manipulates the data, without changing the data -// format. At this point all changes could be reverted without -// having to resync everyone's database from scratch. -// - If SafeBrowsingDatabaseNew proves stable, change the default to -// "newfile", which will change the file format. Changing back -// would require resync from scratch. -// - Once enough users are converted to "newfile", remove all of the -// redundent indirection classes and functions, perhaps leaving -// SafeBrowsingStoreSqlite for on-the-fly conversions. -// - Once there are few remaining SQLite-format users, remove -// SafeBrowsingStoreSqlite. Remaining users will resync their -// safe-browsing database from scratch. If users haven't sync'ed -// their database in months, this probably won't be more expensive -// than an incremental sync. +// TODO(shess): Milestone-7 is converting from SQLite-based +// SafeBrowsingDatabaseBloom to the new file format with +// SafeBrowsingDatabaseNew. Once that conversion is too far along to +// consider reversing, circle back and lift SafeBrowsingDatabaseNew up +// to SafeBrowsingDatabase and get rid of the abstract class. SafeBrowsingDatabase* SafeBrowsingDatabase::Create() { - const CommandLine& command_line = *CommandLine::ForCurrentProcess(); - std::string value = - command_line.GetSwitchValueASCII(switches::kSafeBrowsingDatabaseStore); - if (!value.compare("newfile")) { - return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile); - } else if (!value.compare("newsqlite")) { - return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreSqlite); - } else if (!value.compare("old")) { - return new SafeBrowsingDatabaseBloom; - } else { - // Default. - DCHECK(value.empty()); - return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile); - } + return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile); } SafeBrowsingDatabase::~SafeBrowsingDatabase() { diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc deleted file mode 100644 index 36a03c0..0000000 --- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc +++ /dev/null @@ -1,1459 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/safe_browsing/safe_browsing_database_bloom.h" - -#include "base/auto_reset.h" -#include "base/file_util.h" -#include "base/metrics/histogram.h" -#include "base/message_loop.h" -#include "base/metrics/stats_counters.h" -#include "base/process_util.h" -#include "base/sha2.h" -#include "base/string_util.h" -#include "chrome/browser/safe_browsing/bloom_filter.h" -#include "chrome/common/sqlite_compiled_statement.h" -#include "chrome/common/sqlite_utils.h" -#include "googleurl/src/gurl.h" - -// Database version. If this is different than what's stored on disk, the -// database is reset. -static const int kDatabaseVersion = 6; - -// The maximum staleness for a cached entry. -static const int kMaxStalenessMinutes = 45; - -// Implementation -------------------------------------------------------------- - -SafeBrowsingDatabaseBloom::SafeBrowsingDatabaseBloom() - : db_(NULL), - ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)), - add_count_(0), - performing_reset_(false) { -} - -SafeBrowsingDatabaseBloom::~SafeBrowsingDatabaseBloom() { - Close(); -} - -void SafeBrowsingDatabaseBloom::Init(const FilePath& filename) { - DCHECK(filename_.empty()); // Ensure we haven't been run before. - - filename_ = FilePath(filename.value()); - bloom_filter_filename_ = BloomFilterForFilename(filename_); - - // NOTE: There is no need to grab the lock in this function, since until it - // returns, there are no pointers to this class on other threads. - hash_cache_.reset(new HashCache); - - LoadBloomFilter(); -} - -bool SafeBrowsingDatabaseBloom::ResetDatabase() { - // Open() can call us when trying to handle potential database corruption. - // Because we call Open() at the bottom of the function, we need to guard - // against recursion here. - if (performing_reset_) - return false; // Don't recurse. - - AutoReset<bool> auto_reset_performing_reset(&performing_reset_, true); - - // Delete files on disk. - bool rv = Close(); - DCHECK(rv); - if (!file_util::Delete(filename_, false)) { - NOTREACHED(); - return false; - } - DeleteBloomFilter(); - - // Reset objects in memory. - { - AutoLock lock(lookup_lock_); - hash_cache_->clear(); - ClearUpdateCaches(); - bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize * - BloomFilter::kBloomFilterSizeRatio); - } - - return Open(); -} - -bool SafeBrowsingDatabaseBloom::ContainsUrl( - const GURL& url, - std::string* matching_list, - std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - base::Time last_update) { - - // Clear the results first. - matching_list->clear(); - prefix_hits->clear(); - full_hits->clear(); - - std::vector<std::string> hosts; - if (url.HostIsIPAddress()) { - hosts.push_back(url.host()); - } else { - safe_browsing_util::GenerateHostsToCheck(url, &hosts); - if (hosts.size() == 0) - return false; // Things like about:blank - } - std::vector<std::string> paths; - safe_browsing_util::GeneratePathsToCheck(url, &paths); - - // Lock the bloom filter and cache so that they aren't deleted on us if an - // update is just about to finish. - AutoLock lock(lookup_lock_); - - if (!bloom_filter_.get()) - return false; - - // TODO(erikkay): This may wind up being too many hashes on a complex page. - // TODO(erikkay): Not filling in matching_list - is that OK? - for (size_t i = 0; i < hosts.size(); ++i) { - for (size_t j = 0; j < paths.size(); ++j) { - SBFullHash full_hash; - // TODO(erikkay): Maybe we should only do the first 32 bits initially, - // and then fall back to the full hash if there's a hit. - base::SHA256HashString(hosts[i] + paths[j], &full_hash, - sizeof(SBFullHash)); - SBPrefix prefix = full_hash.prefix; - if (bloom_filter_->Exists(prefix)) - prefix_hits->push_back(prefix); - } - } - - if (!prefix_hits->empty()) { - // If all the prefixes are cached as 'misses', don't issue a GetHash. - bool all_misses = true; - for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin(); - it != prefix_hits->end(); ++it) { - if (prefix_miss_cache_.find(*it) == prefix_miss_cache_.end()) { - all_misses = false; - break; - } - } - if (all_misses) - return false; - - // See if we have the results of recent GetHashes for the prefix matches. - GetCachedFullHashes(prefix_hits, full_hits, last_update); - return true; - } - - return false; -} - -void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name, - const SBChunkList& chunks) { - if (chunks.empty()) - return; - - const base::TimeTicks insert_start = base::TimeTicks::Now(); - - int list_id = safe_browsing_util::GetListId(list_name); - ChunkType chunk_type = chunks.front().is_add ? ADD_CHUNK : SUB_CHUNK; - - for (SBChunkList::const_iterator iter = chunks.begin(); - iter != chunks.end(); ++iter) { - const SBChunk& chunk = *iter; - int chunk_id = chunk.chunk_number; - - // The server can give us a chunk that we already have because it's part of - // a range. Don't add it again. - if (!ChunkExists(list_id, chunk_type, chunk_id)) { - for (std::deque<SBChunkHost>::const_iterator hiter = chunk.hosts.begin(); - hiter != chunk.hosts.end(); ++hiter) { - // Read the existing record for this host, if it exists. - const SBPrefix host = hiter->host; - const SBEntry* entry = hiter->entry; - if (chunk_type == ADD_CHUNK) { - InsertAdd(chunk_id, host, entry, list_id); - } else { - InsertSub(chunk_id, host, entry, list_id); - } - } - - int encoded = EncodeChunkId(chunk_id, list_id); - if (chunk_type == ADD_CHUNK) - add_chunk_cache_.insert(encoded); - else - sub_chunk_cache_.insert(encoded); - } - } - - UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - insert_start); -} - -void SafeBrowsingDatabaseBloom::DeleteChunks( - const std::vector<SBChunkDelete>& chunk_deletes) { - if (chunk_deletes.empty()) - return; - - int list_id = safe_browsing_util::GetListId(chunk_deletes.front().list_name); - - for (size_t i = 0; i < chunk_deletes.size(); ++i) { - const SBChunkDelete& chunk = chunk_deletes[i]; - std::vector<int> chunk_numbers; - RangesToChunks(chunk.chunk_del, &chunk_numbers); - for (size_t del = 0; del < chunk_numbers.size(); ++del) { - int encoded_chunk = EncodeChunkId(chunk_numbers[del], list_id); - if (chunk.is_sub_del) - sub_del_cache_.insert(encoded_chunk); - else - add_del_cache_.insert(encoded_chunk); - } - } -} - -void SafeBrowsingDatabaseBloom::GetListsInfo( - std::vector<SBListChunkRanges>* lists) { - DCHECK(lists); - lists->clear(); - - ReadChunkNumbers(); - - lists->push_back(SBListChunkRanges(safe_browsing_util::kMalwareList)); - GetChunkIds(safe_browsing_util::MALWARE, ADD_CHUNK, &lists->back().adds); - GetChunkIds(safe_browsing_util::MALWARE, SUB_CHUNK, &lists->back().subs); - - lists->push_back(SBListChunkRanges(safe_browsing_util::kPhishingList)); - GetChunkIds(safe_browsing_util::PHISH, ADD_CHUNK, &lists->back().adds); - GetChunkIds(safe_browsing_util::PHISH, SUB_CHUNK, &lists->back().subs); - - return; -} - -void SafeBrowsingDatabaseBloom::CacheHashResults( - const std::vector<SBPrefix>& prefixes, - const std::vector<SBFullHashResult>& full_hits) { - AutoLock lock(lookup_lock_); - - if (full_hits.empty()) { - // These prefixes returned no results, so we store them in order to prevent - // asking for them again. We flush this cache at the next update. - for (std::vector<SBPrefix>::const_iterator it = prefixes.begin(); - it != prefixes.end(); ++it) { - prefix_miss_cache_.insert(*it); - } - return; - } - - const base::Time now = base::Time::Now(); - for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin(); - it != full_hits.end(); ++it) { - SBPrefix prefix = it->hash.prefix; - HashList& entries = (*hash_cache_)[prefix]; - HashCacheEntry entry; - entry.received = now; - entry.list_id = safe_browsing_util::GetListId(it->list_name); - entry.add_chunk_id = EncodeChunkId(it->add_chunk_id, entry.list_id); - entry.full_hash = it->hash; - entries.push_back(entry); - - // Also push a copy to the pending write queue. - pending_full_hashes_.push_back(entry); - } -} - -bool SafeBrowsingDatabaseBloom::UpdateStarted() { - DCHECK(insert_transaction_.get() == NULL); - - if (!Open()) - return false; - - insert_transaction_.reset(new SQLTransaction(db_)); - if (insert_transaction_->Begin() != SQLITE_OK) { - DCHECK(false) << "Safe browsing database couldn't start transaction"; - Close(); - return false; - } - return true; -} - -bool SafeBrowsingDatabaseBloom::UpdateStarted( - std::vector<SBListChunkRanges>* lists) { - bool ret = UpdateStarted(); - if (ret) - GetListsInfo(lists); - return ret; -} - -void SafeBrowsingDatabaseBloom::UpdateFinished(bool update_succeeded) { - if (update_succeeded) - BuildBloomFilter(); - - Close(); - - // We won't need the chunk caches until the next update (which will read them - // from the database), so free their memory as they may contain thousands of - // entries. - { - AutoLock lock(lookup_lock_); - ClearUpdateCaches(); - } -} - -bool SafeBrowsingDatabaseBloom::Open() { - if (db_) - return true; - - if (sqlite_utils::OpenSqliteDb(filename_, &db_) != SQLITE_OK) { - sqlite3_close(db_); - db_ = NULL; - return false; - } - - // Run the database in exclusive mode. Nobody else should be accessing the - // database while we're running, and this will give somewhat improved perf. - sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL); - - statement_cache_.reset(new SqliteStatementCache(db_)); - - if (!sqlite_utils::DoesSqliteTableExist(db_, "add_prefix")) { - if (!CreateTables()) { - // Database could be corrupt, try starting from scratch. - if (!ResetDatabase()) - return false; - } - } else if (!CheckCompatibleVersion()) { - if (!ResetDatabase()) - return false; - } - - return true; -} - -bool SafeBrowsingDatabaseBloom::Close() { - if (!db_) - return true; - - insert_transaction_.reset(); - statement_cache_.reset(); // Must free statements before closing DB. - bool result = sqlite3_close(db_) == SQLITE_OK; - db_ = NULL; - - return result; -} - -bool SafeBrowsingDatabaseBloom::CreateTables() { - SQLTransaction transaction(db_); - transaction.Begin(); - - // Store 32 bit add prefixes here. - if (sqlite3_exec(db_, "CREATE TABLE add_prefix (" - "chunk INTEGER," - "prefix INTEGER)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store 32 bit sub prefixes here. - if (sqlite3_exec(db_, "CREATE TABLE sub_prefix (" - "chunk INTEGER," - "add_chunk INTEGER," - "prefix INTEGER)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store 256 bit add full hashes (and GetHash results) here. - if (sqlite3_exec(db_, "CREATE TABLE add_full_hash (" - "chunk INTEGER," - "prefix INTEGER," - "receive_time INTEGER," - "full_hash BLOB)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store 256 bit sub full hashes here. - if (sqlite3_exec(db_, "CREATE TABLE sub_full_hash (" - "chunk INTEGER," - "add_chunk INTEGER," - "prefix INTEGER," - "full_hash BLOB)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store all the add and sub chunk numbers we receive. We cannot just rely on - // the prefix tables to generate these lists, since some chunks will have zero - // entries (and thus no prefixes), or potentially an add chunk can have all of - // its entries sub'd without receiving an AddDel, or a sub chunk might have - // been entirely consumed by adds. In these cases, we still have to report the - // chunk number but it will not have any prefixes in the prefix tables. - // - // TODO(paulg): Investigate storing the chunks as a string of ChunkRanges, one - // string for each of phish-add, phish-sub, malware-add, malware-sub. This - // might be better performance when the number of chunks is large, and is the - // natural format for the update request. - if (sqlite3_exec(db_, "CREATE TABLE add_chunks (" - "chunk INTEGER PRIMARY KEY)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - if (sqlite3_exec(db_, "CREATE TABLE sub_chunks (" - "chunk INTEGER PRIMARY KEY)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - std::string version = "PRAGMA user_version="; - version += StringPrintf("%d", kDatabaseVersion); - - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return false; - } - - if (statement->step() != SQLITE_DONE) - return false; - - transaction.Commit(); - add_count_ = 0; - - return true; -} - -bool SafeBrowsingDatabaseBloom::CheckCompatibleVersion() { - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "PRAGMA user_version"); - if (!statement.is_valid()) { - NOTREACHED(); - return false; - } - - int result = statement->step(); - if (result != SQLITE_ROW) - return false; - - return statement->column_int(0) == kDatabaseVersion; -} - -bool SafeBrowsingDatabaseBloom::ChunkExists(int list_id, - ChunkType type, - int chunk_id) { - STATS_COUNTER("SB.ChunkSelect", 1); - int encoded = EncodeChunkId(chunk_id, list_id); - bool ret; - if (type == ADD_CHUNK) - ret = add_chunk_cache_.count(encoded) > 0; - else - ret = sub_chunk_cache_.count(encoded) > 0; - - return ret; -} - -// Return a comma separated list of chunk ids that are in the database for -// the given list and chunk type. -void SafeBrowsingDatabaseBloom::GetChunkIds( - int list_id, ChunkType type, std::string* list) { - - std::set<int>::iterator i, end; - if (type == ADD_CHUNK) { - i = add_chunk_cache_.begin(); - end = add_chunk_cache_.end(); - } else { - i = sub_chunk_cache_.begin(); - end = sub_chunk_cache_.end(); - } - std::vector<int> chunks; - for (; i != end; ++i) { - int chunk = *i; - int list_id2; - DecodeChunkId(chunk, &chunk, &list_id2); - if (list_id2 == list_id) - chunks.push_back(chunk); - } - std::vector<ChunkRange> ranges; - ChunksToRanges(chunks, &ranges); - RangesToString(ranges, list); -} - -void SafeBrowsingDatabaseBloom::BuildBloomFilter() { -#if defined(OS_WIN) - // For measuring the amount of IO during the bloom filter build. - base::IoCounters io_before, io_after; - base::ProcessHandle handle = base::Process::Current().handle(); - scoped_ptr<base::ProcessMetrics> metric; - metric.reset(base::ProcessMetrics::CreateProcessMetrics(handle)); - metric->GetIOCounters(&io_before); -#endif - - base::Time before = base::Time::Now(); - - // Get all the pending GetHash results and write them to disk. - HashList pending_hashes; - { - AutoLock lock(lookup_lock_); - pending_hashes.swap(pending_full_hashes_); - } - WriteFullHashList(pending_hashes, true); - - add_count_ = GetAddPrefixCount(); - if (add_count_ == 0) { - AutoLock lock(lookup_lock_); - bloom_filter_ = NULL; - return; - } - - scoped_array<SBPair> adds_array(new SBPair[add_count_]); - SBPair* adds = adds_array.get(); - - if (!BuildAddPrefixList(adds)) - return; - - // Build the full add cache, which includes full hash updates and GetHash - // results. Subs may remove some of these entries. - scoped_ptr<HashCache> add_cache(new HashCache); - if (!BuildAddFullHashCache(add_cache.get())) - return; - - scoped_ptr<HashCache> sub_cache(new HashCache); - if (!BuildSubFullHashCache(sub_cache.get())) - return; - - // Used to track which adds have been subbed out. The vector<bool> is actually - // a bitvector so the size is as small as we can get. - std::vector<bool> adds_removed; - adds_removed.resize(add_count_, false); - - // Flag any add as removed if there is a matching sub. - int subs = 0; - if (!RemoveSubs(adds, &adds_removed, add_cache.get(), sub_cache.get(), &subs)) - return; - - // Prepare the database for writing out our remaining add and sub prefixes. - if (!UpdateTables()) - return; - - // Write out the remaining add prefixes to the filter and database. - int new_count; - scoped_refptr<BloomFilter> filter; - if (!WritePrefixes(adds, adds_removed, &new_count, &filter)) - return; - - // Write out the remaining full hash adds and subs to the database. - WriteFullHashes(add_cache.get(), true); - WriteFullHashes(sub_cache.get(), false); - - // Save the chunk numbers we've received to the database for reporting in - // future update requests. - if (!WriteChunkNumbers()) - return; - - // Commit all the changes to the database. - int rv = insert_transaction_->Commit(); - if (rv != SQLITE_OK) { - NOTREACHED() << "SafeBrowsing update transaction failed to commit."; - UMA_HISTOGRAM_COUNTS("SB2.FailedUpdate", 1); - return; - } - - // Swap in the newly built filter and cache. If there were any matching subs, - // the size (add_count_) will be smaller. - { - AutoLock lock(lookup_lock_); - add_count_ = new_count; - bloom_filter_.swap(filter); - hash_cache_.swap(add_cache); - } - - base::TimeDelta bloom_gen = base::Time::Now() - before; - - // Persist the bloom filter to disk. - WriteBloomFilter(); - - // Gather statistics. -#if defined(OS_WIN) - metric->GetIOCounters(&io_after); - UMA_HISTOGRAM_COUNTS("SB2.BuildReadBytes", - static_cast<int>(io_after.ReadTransferCount - - io_before.ReadTransferCount)); - UMA_HISTOGRAM_COUNTS("SB2.BuildWriteBytes", - static_cast<int>(io_after.WriteTransferCount - - io_before.WriteTransferCount)); - UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", - static_cast<int>(io_after.ReadOperationCount - - io_before.ReadOperationCount)); - UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", - static_cast<int>(io_after.WriteOperationCount - - io_before.WriteOperationCount)); -#endif - SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in " - << bloom_gen.InMilliseconds() - << " ms total. prefix count: "<< add_count_; - UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen); - UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_count_); - UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", subs); - UMA_HISTOGRAM_COUNTS("SB2.FilterSize", bloom_filter_->size()); - int64 size_64; - if (file_util::GetFileSize(filename_, &size_64)) - UMA_HISTOGRAM_COUNTS("SB2.DatabaseBytes", static_cast<int>(size_64)); -} - -int SafeBrowsingDatabaseBloom::PairCompare(const void* arg1, const void* arg2) { - const SBPair* p1 = reinterpret_cast<const SBPair*>(arg1); - const SBPair* p2 = reinterpret_cast<const SBPair*>(arg2); - - if (p1->chunk_id < p2->chunk_id) - return -1; - - if (p1->chunk_id > p2->chunk_id) - return 1; - - if (p1->prefix < p2->prefix) - return -1; - - if (p1->prefix > p2->prefix) - return 1; - - return 0; -} - -bool SafeBrowsingDatabaseBloom::BuildAddPrefixList(SBPair* adds) { - // Read add_prefix into memory and sort it. - STATS_COUNTER("SB.HostSelectForBloomFilter", 1); - SQLITE_UNIQUE_STATEMENT(add_prefix, *statement_cache_, - "SELECT chunk, prefix FROM add_prefix"); - if (!add_prefix.is_valid()) { - NOTREACHED(); - return false; - } - - SBPair* add = adds; - while (true) { - int rv = add_prefix->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - break; - } - add->chunk_id = add_prefix->column_int(0); - add->prefix = add_prefix->column_int(1); - add++; - if (add_count_ < (add - adds)) - break; - } - DCHECK(add_count_ == (add - adds)); - qsort(adds, add_count_, sizeof(SBPair), - &SafeBrowsingDatabaseBloom::PairCompare); - - return true; -} - -bool SafeBrowsingDatabaseBloom::BuildAddFullHashCache(HashCache* add_cache) { - add_cache->clear(); - - // Read all full add entries to the cache. - SQLITE_UNIQUE_STATEMENT( - full_add_entry, - *statement_cache_, - "SELECT chunk, prefix, receive_time, full_hash FROM add_full_hash"); - if (!full_add_entry.is_valid()) { - NOTREACHED(); - return false; - } - - int rv; - while (true) { - rv = full_add_entry->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - break; - } - HashCacheEntry entry; - entry.add_chunk_id = full_add_entry->column_int(0); - if (add_del_cache_.find(entry.add_chunk_id) != add_del_cache_.end()) - continue; // This entry's chunk was deleted so we skip it. - SBPrefix prefix = full_add_entry->column_int(1); - entry.received = base::Time::FromTimeT(full_add_entry->column_int64(2)); - int chunk, list_id; - DecodeChunkId(entry.add_chunk_id, &chunk, &list_id); - entry.list_id = list_id; - ReadFullHash(&full_add_entry, 3, &entry.full_hash); - HashList& entries = (*add_cache)[prefix]; - entries.push_back(entry); - } - - // Clear the full add table. - SQLITE_UNIQUE_STATEMENT(full_add_drop, *statement_cache_, - "DELETE FROM add_full_hash"); - if (!full_add_drop.is_valid()) { - NOTREACHED(); - return false; - } - rv = full_add_drop->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - return true; -} - -bool SafeBrowsingDatabaseBloom::BuildSubFullHashCache(HashCache* sub_cache) { - sub_cache->clear(); - - // Read all full sub entries to the cache. - SQLITE_UNIQUE_STATEMENT( - full_sub_entry, - *statement_cache_, - "SELECT chunk, add_chunk, prefix, full_hash FROM sub_full_hash"); - if (!full_sub_entry.is_valid()) { - NOTREACHED(); - return false; - } - - int rv; - while (true) { - rv = full_sub_entry->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - break; - } - HashCacheEntry entry; - entry.sub_chunk_id = full_sub_entry->column_int(0); - if (sub_del_cache_.find(entry.sub_chunk_id) != sub_del_cache_.end()) - continue; // This entry's chunk was deleted so we skip it. - entry.add_chunk_id = full_sub_entry->column_int(1); - SBPrefix prefix = full_sub_entry->column_int(2); - int chunk, list_id; - DecodeChunkId(entry.add_chunk_id, &chunk, &list_id); - entry.list_id = list_id; - ReadFullHash(&full_sub_entry, 3, &entry.full_hash); - HashList& entries = (*sub_cache)[prefix]; - entries.push_back(entry); - } - - // Clear the full sub table. - SQLITE_UNIQUE_STATEMENT(full_sub_drop, *statement_cache_, - "DELETE FROM sub_full_hash"); - if (!full_sub_drop.is_valid()) { - NOTREACHED(); - return false; - } - rv = full_sub_drop->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - return true; -} - -bool SafeBrowsingDatabaseBloom::RemoveSubs( - SBPair* adds, std::vector<bool>* adds_removed, - HashCache* add_cache, HashCache* sub_cache, int* subs) { - DCHECK(add_cache && sub_cache && subs); - - // Read through sub_prefix and zero out add_prefix entries that match. - SQLITE_UNIQUE_STATEMENT(sub_prefix, *statement_cache_, - "SELECT chunk, add_chunk, prefix FROM sub_prefix"); - if (!sub_prefix.is_valid()) { - NOTREACHED(); - return false; - } - - // Create a temporary sub prefix table. We add entries to it as we scan the - // sub_prefix table looking for adds to remove. Only entries that don't - // remove an add written to this table. When we're done filtering, we replace - // sub_prefix with this table. - if (sqlite3_exec(db_, "CREATE TABLE sub_prefix_tmp (" - "chunk INTEGER," - "add_chunk INTEGER," - "prefix INTEGER)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Create a temporary sub full hash table, similar to the above prefix table. - if (sqlite3_exec(db_, "CREATE TABLE sub_full_tmp (" - "chunk INTEGER," - "add_chunk INTEGER," - "prefix INTEGER," - "full_hash BLOB)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - SQLITE_UNIQUE_STATEMENT( - sub_prefix_tmp, - *statement_cache_, - "INSERT INTO sub_prefix_tmp (chunk, add_chunk, prefix) VALUES (?,?,?)"); - if (!sub_prefix_tmp.is_valid()) { - NOTREACHED(); - return false; - } - - SBPair sub; - int sub_count = 0; - while (true) { - int rv = sub_prefix->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - break; - } - - int sub_chunk = sub_prefix->column_int(0); - sub.chunk_id = sub_prefix->column_int(1); - sub.prefix = sub_prefix->column_int(2); - - // See if this sub chunk has been deleted via a SubDel, and skip doing the - // search or write if so. - if (sub_del_cache_.find(sub_chunk) != sub_del_cache_.end()) - continue; - - void* match = bsearch(&sub, adds, add_count_, sizeof(SBPair), - &SafeBrowsingDatabaseBloom::PairCompare); - if (match) { - SBPair* subbed = reinterpret_cast<SBPair*>(match); - (*adds_removed)[subbed - adds] = true; - // Remove any GetHash results (full hashes) that match this sub, as well - // as removing any full subs we may have received. - ClearCachedEntry(sub.prefix, sub.chunk_id, add_cache); - ClearCachedEntry(sub.prefix, sub.chunk_id, sub_cache); - } else { - // This sub_prefix entry did not match any add, so we keep it around. - sub_prefix_tmp->bind_int(0, sub_chunk); - sub_prefix_tmp->bind_int(1, sub.chunk_id); - sub_prefix_tmp->bind_int(2, sub.prefix); - rv = sub_prefix_tmp->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - sub_prefix_tmp->reset(); - ++sub_count; - } - } - - *subs = sub_count; - return true; -} - -bool SafeBrowsingDatabaseBloom::UpdateTables() { - // Delete the old sub_prefix table and rename the temporary table. - SQLITE_UNIQUE_STATEMENT(del_sub, *statement_cache_, "DROP TABLE sub_prefix"); - if (!del_sub.is_valid()) { - NOTREACHED(); - return false; - } - - int rv = del_sub->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - SQLITE_UNIQUE_STATEMENT(rename_sub, *statement_cache_, - "ALTER TABLE sub_prefix_tmp RENAME TO sub_prefix"); - if (!rename_sub.is_valid()) { - NOTREACHED(); - return false; - } - rv = rename_sub->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - // Now blow away add_prefix. We will write the new values out later. - SQLITE_UNIQUE_STATEMENT(del_add, *statement_cache_, "DELETE FROM add_prefix"); - if (!del_add.is_valid()) { - NOTREACHED(); - return false; - } - rv = del_add->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - // Delete the old sub_full_hash table and rename the temp full hash table. - SQLITE_UNIQUE_STATEMENT(del_full_sub, *statement_cache_, - "DROP TABLE sub_full_hash"); - if (!del_full_sub.is_valid()) { - NOTREACHED(); - return false; - } - - rv = del_full_sub->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - SQLITE_UNIQUE_STATEMENT(rename_full_sub, *statement_cache_, - "ALTER TABLE sub_full_tmp RENAME TO sub_full_hash"); - if (!rename_full_sub.is_valid()) { - NOTREACHED(); - return false; - } - rv = rename_full_sub->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - // Blow away all the full adds. We will write the new values out later. - SQLITE_UNIQUE_STATEMENT(del_full_add, *statement_cache_, - "DELETE FROM add_full_hash"); - if (!del_full_add.is_valid()) { - NOTREACHED(); - return false; - } - rv = del_full_add->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - return true; -} - -bool SafeBrowsingDatabaseBloom::WritePrefixes( - SBPair* adds, - const std::vector<bool>& adds_removed, - int* new_add_count, - scoped_refptr<BloomFilter>* filter) { - *new_add_count = 0; - - SQLITE_UNIQUE_STATEMENT(insert, *statement_cache_, - "INSERT INTO add_prefix VALUES (?,?)"); - if (!insert.is_valid()) { - NOTREACHED(); - return false; - } - - // Determine the size of the new bloom filter. We will cap the maximum size at - // 2 MB to prevent an error from consuming large amounts of memory. - const int default_min = BloomFilter::kBloomFilterMinSize; - int number_of_keys = std::max(add_count_, default_min); - int filter_size = - std::min(number_of_keys * BloomFilter::kBloomFilterSizeRatio, - BloomFilter::kBloomFilterMaxSize * 8); - *filter = new BloomFilter(filter_size); - SBPair* add = adds; - int new_count = 0; - - while (add - adds < add_count_) { - if (!adds_removed[add - adds]) { - // Check to see if we have an AddDel for this chunk and skip writing it - // if there is. - if (add_del_cache_.find(add->chunk_id) != add_del_cache_.end()) { - add++; - continue; - } - (*filter)->Insert(add->prefix); - insert->bind_int(0, add->chunk_id); - insert->bind_int(1, add->prefix); - int rv = insert->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - insert->reset(); - new_count++; - } - add++; - } - - *new_add_count = new_count; - - return true; -} - -void SafeBrowsingDatabaseBloom::WriteFullHashes(HashCache* hash_cache, - bool is_add) { - DCHECK(hash_cache); - HashCache::iterator it = hash_cache->begin(); - for (; it != hash_cache->end(); ++it) { - const HashList& entries = it->second; - WriteFullHashList(entries, is_add); - } -} - -void SafeBrowsingDatabaseBloom::WriteFullHashList(const HashList& hash_list, - bool is_add) { - HashList::const_iterator lit = hash_list.begin(); - for (; lit != hash_list.end(); ++lit) { - const HashCacheEntry& entry = *lit; - SBPrefix prefix = entry.full_hash.prefix; - if (is_add) { - if (add_del_cache_.find(entry.add_chunk_id) == add_del_cache_.end()) { - InsertAddFullHash(prefix, entry.add_chunk_id, - entry.received, entry.full_hash); - } - } else { - if (sub_del_cache_.find(entry.sub_chunk_id) == sub_del_cache_.end()) { - InsertSubFullHash(prefix, entry.sub_chunk_id, - entry.add_chunk_id, entry.full_hash, true); - } - } - } -} - -void SafeBrowsingDatabaseBloom::GetCachedFullHashes( - const std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - base::Time last_update) { - DCHECK(prefix_hits && full_hits); - lookup_lock_.AssertAcquired(); - - const base::Time max_age = - base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes); - - for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin(); - it != prefix_hits->end(); ++it) { - HashCache::iterator hit = hash_cache_->find(*it); - if (hit != hash_cache_->end()) { - HashList& entries = hit->second; - HashList::iterator eit = entries.begin(); - while (eit != entries.end()) { - // An entry is valid if we've received an update in the past 45 minutes, - // or if this particular GetHash was received in the past 45 minutes. - // If an entry is does not meet the time criteria above, we are not - // allowed to use it since it might have become stale. We keep it - // around, though, and may be able to use it in the future once we - // receive the next update (that doesn't sub it). - if (max_age < last_update || eit->received > max_age) { - SBFullHashResult full_hash; - full_hash.hash = eit->full_hash; - full_hash.list_name = safe_browsing_util::GetListName(eit->list_id); - full_hash.add_chunk_id = eit->add_chunk_id; - full_hits->push_back(full_hash); - } - ++eit; - } - - if (entries.empty()) - hash_cache_->erase(hit); - } - } -} - -bool SafeBrowsingDatabaseBloom::ClearCachedEntry(SBPrefix prefix, - int add_chunk, - HashCache* hash_cache) { - bool match = false; - HashCache::iterator it = hash_cache->find(prefix); - if (it == hash_cache->end()) - return match; - - HashList& entries = it->second; - HashList::iterator lit = entries.begin(); - while (lit != entries.end()) { - HashCacheEntry& entry = *lit; - if (entry.add_chunk_id == add_chunk) { - lit = entries.erase(lit); - match = true; - continue; - } - ++lit; - } - - if (entries.empty()) - hash_cache->erase(it); - - return match; -} - -void SafeBrowsingDatabaseBloom::HandleCorruptDatabase() { - MessageLoop::current()->PostTask(FROM_HERE, - reset_factory_.NewRunnableMethod( - &SafeBrowsingDatabaseBloom::OnHandleCorruptDatabase)); -} - -void SafeBrowsingDatabaseBloom::OnHandleCorruptDatabase() { - ResetDatabase(); - DCHECK(false) << "SafeBrowsing database was corrupt and reset"; -} - -void SafeBrowsingDatabaseBloom::InsertAdd( - int chunk_id, SBPrefix host, const SBEntry* entry, int list_id) { - STATS_COUNTER("SB.HostInsert", 1); - int encoded = EncodeChunkId(chunk_id, list_id); - - DCHECK(entry->IsAdd()); - if (!entry->IsPrefix()) { - base::Time receive_time = base::Time::Now(); - for (int i = 0; i < entry->prefix_count(); ++i) { - SBFullHash full_hash = entry->FullHashAt(i); - SBPrefix prefix = full_hash.prefix; - InsertAddPrefix(prefix, encoded); - InsertAddFullHash(prefix, encoded, receive_time, full_hash); - } - return; - } - - // This entry contains only regular (32 bit) prefixes. - int count = entry->prefix_count(); - if (count == 0) { - InsertAddPrefix(host, encoded); - } else { - for (int i = 0; i < count; i++) { - SBPrefix prefix = entry->PrefixAt(i); - InsertAddPrefix(prefix, encoded); - } - } -} - -void SafeBrowsingDatabaseBloom::InsertAddPrefix(SBPrefix prefix, - int encoded_chunk) { - STATS_COUNTER("SB.PrefixAdd", 1); - std::string sql = "INSERT INTO add_prefix (chunk, prefix) VALUES (?, ?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, prefix); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } - add_count_++; -} - -void SafeBrowsingDatabaseBloom::InsertAddFullHash(SBPrefix prefix, - int encoded_chunk, - base::Time receive_time, - SBFullHash full_prefix) { - STATS_COUNTER("SB.PrefixAddFull", 1); - std::string sql = "INSERT INTO add_full_hash " - "(chunk, prefix, receive_time, full_hash) " - "VALUES (?,?,?,?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, prefix); - statement->bind_int64(2, receive_time.ToTimeT()); - statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash)); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } -} - -void SafeBrowsingDatabaseBloom::InsertSub( - int chunk_id, SBPrefix host, const SBEntry* entry, int list_id) { - STATS_COUNTER("SB.HostDelete", 1); - int encoded = EncodeChunkId(chunk_id, list_id); - int encoded_add; - - DCHECK(entry->IsSub()); - if (!entry->IsPrefix()) { - for (int i = 0; i < entry->prefix_count(); ++i) { - SBFullHash full_hash = entry->FullHashAt(i); - SBPrefix prefix = full_hash.prefix; - encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); - InsertSubPrefix(prefix, encoded, encoded_add); - InsertSubFullHash(prefix, encoded, encoded_add, full_hash, false); - } - } else { - // We have prefixes. - int count = entry->prefix_count(); - if (count == 0) { - encoded_add = EncodeChunkId(entry->chunk_id(), list_id); - InsertSubPrefix(host, encoded, encoded_add); - } else { - for (int i = 0; i < count; i++) { - SBPrefix prefix = entry->PrefixAt(i); - encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); - InsertSubPrefix(prefix, encoded, encoded_add); - } - } - } -} - -void SafeBrowsingDatabaseBloom::InsertSubPrefix(SBPrefix prefix, - int encoded_chunk, - int encoded_add_chunk) { - STATS_COUNTER("SB.PrefixSub", 1); - std::string sql = - "INSERT INTO sub_prefix (chunk, add_chunk, prefix) VALUES (?,?,?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, encoded_add_chunk); - statement->bind_int(2, prefix); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } -} - -void SafeBrowsingDatabaseBloom::InsertSubFullHash(SBPrefix prefix, - int encoded_chunk, - int encoded_add_chunk, - SBFullHash full_prefix, - bool use_temp_table) { - STATS_COUNTER("SB.PrefixSubFull", 1); - std::string sql = "INSERT INTO "; - if (use_temp_table) { - sql += "sub_full_tmp"; - } else { - sql += "sub_full_hash"; - } - sql += " (chunk, add_chunk, prefix, full_hash) VALUES (?,?,?,?)"; - - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, encoded_add_chunk); - statement->bind_int(2, prefix); - statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash)); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } -} - -void SafeBrowsingDatabaseBloom::ReadFullHash(SqliteCompiledStatement* statement, - int column, - SBFullHash* full_hash) { - DCHECK(full_hash); - std::vector<unsigned char> blob; - (*statement)->column_blob_as_vector(column, &blob); - DCHECK(blob.size() == sizeof(SBFullHash)); - memcpy(full_hash->full_hash, &blob[0], sizeof(SBFullHash)); -} - -// TODO(paulg): Look for a less expensive way to maintain add_count_? If we move -// to a native file format, we can just cache the count in the file and not have -// to scan at all. -int SafeBrowsingDatabaseBloom::GetAddPrefixCount() { - SQLITE_UNIQUE_STATEMENT(count, *statement_cache_, - "SELECT count(*) FROM add_prefix"); - if (!count.is_valid()) { - NOTREACHED(); - return 0; - } - int rv = count->step(); - int add_count = 0; - if (rv == SQLITE_ROW) - add_count = count->column_int(0); - else if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - - return add_count; -} - -void SafeBrowsingDatabaseBloom::ReadChunkNumbers() { - add_chunk_cache_.clear(); - sub_chunk_cache_.clear(); - - // Read in the add chunk numbers. - SQLITE_UNIQUE_STATEMENT(read_adds, *statement_cache_, - "SELECT chunk FROM add_chunks"); - if (!read_adds.is_valid()) { - NOTREACHED(); - return; - } - - while (true) { - int rv = read_adds->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - break; - } - add_chunk_cache_.insert(read_adds->column_int(0)); - } - - // Read in the sub chunk numbers. - SQLITE_UNIQUE_STATEMENT(read_subs, *statement_cache_, - "SELECT chunk FROM sub_chunks"); - if (!read_subs.is_valid()) { - NOTREACHED(); - return; - } - - while (true) { - int rv = read_subs->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - break; - } - sub_chunk_cache_.insert(read_subs->column_int(0)); - } -} - -// Write all the chunk numbers to the add_chunks and sub_chunks tables. -bool SafeBrowsingDatabaseBloom::WriteChunkNumbers() { - // Delete the contents of the add chunk table. - SQLITE_UNIQUE_STATEMENT(del_add_chunk, *statement_cache_, - "DELETE FROM add_chunks"); - if (!del_add_chunk.is_valid()) { - NOTREACHED(); - return false; - } - int rv = del_add_chunk->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - SQLITE_UNIQUE_STATEMENT(write_adds, *statement_cache_, - "INSERT INTO add_chunks (chunk) VALUES (?)"); - if (!write_adds.is_valid()) { - NOTREACHED(); - return false; - } - - // Write all the add chunks from the cache to the database. - std::set<int>::const_iterator it = add_chunk_cache_.begin(); - for (; it != add_chunk_cache_.end(); ++it) { - if (add_del_cache_.find(*it) != add_del_cache_.end()) - continue; // This chunk has been deleted. - write_adds->bind_int(0, *it); - rv = write_adds->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - write_adds->reset(); - } - - // Delete the contents of the sub chunk table. - SQLITE_UNIQUE_STATEMENT(del_sub_chunk, *statement_cache_, - "DELETE FROM sub_chunks"); - if (!del_sub_chunk.is_valid()) { - NOTREACHED(); - return false; - } - rv = del_sub_chunk->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - SQLITE_UNIQUE_STATEMENT(write_subs, *statement_cache_, - "INSERT INTO sub_chunks (chunk) VALUES (?)"); - if (!write_subs.is_valid()) { - NOTREACHED(); - return false; - } - - // Write all the sub chunks from the cache to the database. - it = sub_chunk_cache_.begin(); - for (; it != sub_chunk_cache_.end(); ++it) { - if (sub_del_cache_.find(*it) != sub_del_cache_.end()) - continue; // This chunk has been deleted. - write_subs->bind_int(0, *it); - rv = write_subs->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - write_subs->reset(); - } - - return true; -} - -void SafeBrowsingDatabaseBloom::ClearUpdateCaches() { - lookup_lock_.AssertAcquired(); - add_del_cache_.clear(); - sub_del_cache_.clear(); - add_chunk_cache_.clear(); - sub_chunk_cache_.clear(); - prefix_miss_cache_.clear(); -} - -void SafeBrowsingDatabaseBloom::LoadBloomFilter() { - DCHECK(!bloom_filter_filename_.empty()); - - // If we're missing either of the database or filter files, we wait until the - // next update to generate a new filter. - // TODO(paulg): Investigate how often the filter file is missing and how - // expensive it would be to regenerate it. - int64 size_64; - if (!file_util::GetFileSize(filename_, &size_64) || size_64 == 0) - return; - - if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) || - size_64 == 0) { - UMA_HISTOGRAM_COUNTS("SB2.FilterMissing", 1); - return; - } - - // We have a bloom filter file, so use that as our filter. - const base::TimeTicks before = base::TimeTicks::Now(); - bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_); - SB_DLOG(INFO) << "SafeBrowsingDatabase read bloom filter in " - << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; - - if (!bloom_filter_.get()) - UMA_HISTOGRAM_COUNTS("SB2.FilterReadFail", 1); -} - -void SafeBrowsingDatabaseBloom::DeleteBloomFilter() { - file_util::Delete(bloom_filter_filename_, false); -} - -void SafeBrowsingDatabaseBloom::WriteBloomFilter() { - if (!bloom_filter_.get()) - return; - - const base::TimeTicks before = base::TimeTicks::Now(); - bool write_ok = bloom_filter_->WriteFile(bloom_filter_filename_); - SB_DLOG(INFO) << "SafeBrowsingDatabase wrote bloom filter in " << - (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; - - if (!write_ok) - UMA_HISTOGRAM_COUNTS("SB2.FilterWriteFail", 1); -} diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h deleted file mode 100644 index df3e1a6..0000000 --- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h +++ /dev/null @@ -1,248 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_BLOOM_H_ -#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_BLOOM_H_ -#pragma once - -#include <list> -#include <set> - -#include "base/hash_tables.h" -#include "base/lock.h" -#include "base/ref_counted.h" -#include "base/scoped_ptr.h" -#include "base/task.h" -#include "base/time.h" -#include "chrome/browser/safe_browsing/safe_browsing_database.h" -#include "testing/gtest/include/gtest/gtest_prod.h" - -class BloomFilter; -struct sqlite3; -class SqliteCompiledStatement; -class SqliteStatementCache; -class SQLTransaction; - -// The reference implementation database using SQLite. -class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { - public: - SafeBrowsingDatabaseBloom(); - virtual ~SafeBrowsingDatabaseBloom(); - - // Implement SafeBrowsingDatabase interface. - virtual void Init(const FilePath& filename); - virtual bool ResetDatabase(); - virtual bool ContainsUrl(const GURL& url, - std::string* matching_list, - std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - base::Time last_update); - virtual void InsertChunks(const std::string& list_name, - const SBChunkList& chunks); - virtual void DeleteChunks(const std::vector<SBChunkDelete>& chunk_deletes); - virtual void CacheHashResults( - const std::vector<SBPrefix>& prefixes, - const std::vector<SBFullHashResult>& full_hits); - virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists); - virtual void UpdateFinished(bool update_succeeded); - - private: - friend class SafeBrowsingDatabaseBloomTest; - FRIEND_TEST(SafeBrowsingDatabaseBloomTest, HashCaching); - - struct HashCacheEntry { - SBFullHash full_hash; - int list_id; - int add_chunk_id; - int sub_chunk_id; - base::Time received; - }; - - typedef std::list<HashCacheEntry> HashList; - typedef base::hash_map<SBPrefix, HashList> HashCache; - - // Load the bloom filter off disk, or generates one if it doesn't exist. - virtual void LoadBloomFilter(); - - // Deletes the on-disk bloom filter, i.e. because it's stale. - virtual void DeleteBloomFilter(); - - // Writes the current bloom filter to disk. - virtual void WriteBloomFilter(); - - struct SBPair { - int chunk_id; - SBPrefix prefix; - }; - - enum ChunkType { - ADD_CHUNK = 0, - SUB_CHUNK = 1, - }; - - // Opens the database. - bool Open(); - - // Closes the database. - bool Close(); - - // Creates the SQL tables. - bool CreateTables(); - - // Checks the database version and if it's incompatible with the current one, - // resets the database. - bool CheckCompatibleVersion(); - - // Returns true if any of the given prefixes exist for the given host. - // Also returns the matching list or any prefix matches. - void CheckUrl(const std::string& host, - SBPrefix host_key, - const std::vector<std::string>& paths, - std::vector<SBPrefix>* prefix_hits); - - // Checks if a chunk is in the database. - bool ChunkExists(int list_id, ChunkType type, int chunk_id); - - // Return a comma separated list of chunk ids that are in the database for - // the given list and chunk type. - void GetChunkIds(int list_id, ChunkType type, std::string* list); - - // Old implementation methods which have been consolidated into new - // |UpdateStarted()| interface. Retained to minimize changes to - // this code. - void GetListsInfo(std::vector<SBListChunkRanges>* lists); - bool UpdateStarted(); - - // Generate a bloom filter. - virtual void BuildBloomFilter(); - - // Helpers for building the bloom filter. - static int PairCompare(const void* arg1, const void* arg2); - - bool BuildAddPrefixList(SBPair* adds); - bool BuildAddFullHashCache(HashCache* add_cache); - bool BuildSubFullHashCache(HashCache* sub_cache); - bool RemoveSubs(SBPair* adds, - std::vector<bool>* adds_removed, - HashCache* add_cache, - HashCache* sub_cache, - int* subs); - - bool UpdateTables(); - bool WritePrefixes(SBPair* adds, const std::vector<bool>& adds_removed, - int* new_add_count, scoped_refptr<BloomFilter>* filter); - void WriteFullHashes(HashCache* hash_cache, bool is_add); - void WriteFullHashList(const HashList& hash_list, bool is_add); - - // Looks up any cached full hashes we may have. - void GetCachedFullHashes(const std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - base::Time last_update); - - // Remove cached entries that have prefixes contained in the entry. - bool ClearCachedEntry(SBPrefix, int add_chunk_id, HashCache* hash_cache); - - void HandleCorruptDatabase(); - void OnHandleCorruptDatabase(); - - // Adding add entries to the database. - void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id); - void InsertAddPrefix(SBPrefix prefix, int encoded_chunk); - void InsertAddFullHash(SBPrefix prefix, - int encoded_chunk, - base::Time received_time, - SBFullHash full_prefix); - - // Adding sub entries to the database. - void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id); - void InsertSubPrefix(SBPrefix prefix, - int encoded_chunk, - int encoded_add_chunk); - void InsertSubFullHash(SBPrefix prefix, - int encoded_chunk, - int encoded_add_chunk, - SBFullHash full_prefix, - bool use_temp_table); - - // Used for reading full hashes from the database. - void ReadFullHash(SqliteCompiledStatement* statement, - int column, - SBFullHash* full_hash); - - // Returns the number of chunk + prefix pairs in the add prefix table. - int GetAddPrefixCount(); - - // Reads and writes chunk numbers to and from persistent store. - void ReadChunkNumbers(); - bool WriteChunkNumbers(); - - // Flush in-memory temporary caches. |lookup_lock_| must be locked - // by caller. - void ClearUpdateCaches(); - - // Encode the list id in the lower bit of the chunk. - static inline int EncodeChunkId(int chunk, int list_id) { - DCHECK(list_id == 0 || list_id == 1); - chunk = chunk << 1; - chunk |= list_id; - return chunk; - } - - // Split an encoded chunk id and return the original chunk id and list id. - static inline void DecodeChunkId(int encoded, int* chunk, int* list_id) { - *list_id = encoded & 0x1; - *chunk = encoded >> 1; - } - - // The database connection. - sqlite3* db_; - - // Cache of compiled statements for our database. - scoped_ptr<SqliteStatementCache> statement_cache_; - - // Used to schedule resetting the database because of corruption. - ScopedRunnableMethodFactory<SafeBrowsingDatabaseBloom> reset_factory_; - - // Caches for all of the existing add and sub chunks. - std::set<int> add_chunk_cache_; - std::set<int> sub_chunk_cache_; - - // Caches for the AddDel and SubDel commands. - base::hash_set<int> add_del_cache_; - base::hash_set<int> sub_del_cache_; - - // The number of entries in the add_prefix table. Used to pick the correct - // size for the bloom filter and stats gathering. - int add_count_; - - // Transaction for protecting database integrity during updates. - scoped_ptr<SQLTransaction> insert_transaction_; - - // Lock for protecting access to variables that may be used on the IO thread. - // This includes |bloom_filter_|, |hash_cache_| and |prefix_miss_cache_|. - Lock lookup_lock_; - - // True if we're in the middle of a reset. This is used to prevent possible - // infinite recursion. - bool performing_reset_; - - // A store for GetHash results that have not yet been written to the database. - HashList pending_full_hashes_; - - scoped_ptr<HashCache> hash_cache_; - HashCache* hash_cache() { return hash_cache_.get(); } - - // Cache of prefixes that returned empty results (no full hash match). - typedef std::set<SBPrefix> PrefixCache; - PrefixCache prefix_miss_cache_; - PrefixCache* prefix_miss_cache() { return &prefix_miss_cache_; } - - FilePath filename_; - FilePath bloom_filter_filename_; - scoped_refptr<BloomFilter> bloom_filter_; - - DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseBloom); -}; - -#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_BLOOM_H_ diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom_unittest.cc deleted file mode 100644 index 2032efd..0000000 --- a/chrome/browser/safe_browsing/safe_browsing_database_bloom_unittest.cc +++ /dev/null @@ -1,1321 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// Unit tests for the SafeBrowsing storage system. - -#include "app/sql/connection.h" -#include "app/sql/statement.h" -#include "base/debug_util.h" -#include "base/file_util.h" -#include "base/format_macros.h" -#include "base/logging.h" -#include "base/message_loop.h" -#include "base/metrics/stats_counters.h" -#include "base/path_service.h" -#include "base/process_util.h" -#include "base/scoped_temp_dir.h" -#include "base/sha2.h" -#include "base/string_util.h" -#include "base/time.h" -#include "chrome/browser/safe_browsing/protocol_parser.h" -#include "chrome/browser/safe_browsing/safe_browsing_database_bloom.h" -#include "chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h" -#include "googleurl/src/gurl.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "testing/platform_test.h" -#include "third_party/sqlite/sqlite3.h" - -using base::Time; - -namespace { - -SBPrefix Sha256Prefix(const std::string& str) { - SBPrefix prefix; - base::SHA256HashString(str, &prefix, sizeof(prefix)); - return prefix; -} - -SBFullHash Sha256Hash(const std::string& str) { - SBFullHash hash; - base::SHA256HashString(str, &hash, sizeof(hash)); - return hash; -} - -// Prevent DCHECK from killing tests. -// TODO(shess): Pawel disputes the use of this, so the test which uses -// it is DISABLED. http://crbug.com/56448 -class ScopedLogMessageIgnorer { - public: - ScopedLogMessageIgnorer() { - logging::SetLogMessageHandler(&LogMessageIgnorer); - } - ~ScopedLogMessageIgnorer() { - // TODO(shess): Would be better to verify whether anyone else - // changed it, and then restore it to the previous value. - logging::SetLogMessageHandler(NULL); - } - - private: - static bool LogMessageIgnorer(int severity, const std::string& str) { - // Intercept FATAL, strip the stack backtrace, and log it without - // the crash part. - if (severity == logging::LOG_FATAL) { - size_t newline = str.find('\n'); - if (newline != std::string::npos) { - const std::string msg = str.substr(0, newline + 1); - fprintf(stderr, "%s", msg.c_str()); - fflush(stderr); - } - return true; - } - - return false; - } -}; - -// Helper function which corrupts the root page of the indicated -// table. After this the table can be opened successfully, and -// queries to other tables work, and possibly queries to this table -// which only hit an index may work, but queries which hit the table -// itself should not. Returns |true| on success. -bool CorruptSqliteTable(const FilePath& filename, - const std::string& table_name) { - size_t root_page; // Root page of the table. - size_t page_size; // Page size of the database. - - sql::Connection db; - if (!db.Open(filename)) - return false; - - sql::Statement stmt(db.GetUniqueStatement("PRAGMA page_size")); - if (!stmt.Step()) - return false; - page_size = stmt.ColumnInt(0); - - stmt.Assign(db.GetUniqueStatement( - "SELECT rootpage FROM sqlite_master WHERE name = ?")); - stmt.BindString(0, "sub_prefix"); - if (!stmt.Step()) - return false; - root_page = stmt.ColumnInt(0); - - // The page numbers are 1-based. - const size_t root_page_offset = (root_page - 1) * page_size; - - // Corrupt the file by overwriting the table's root page. - FILE* fp = file_util::OpenFile(filename, "r+"); - if (!fp) - return false; - - file_util::ScopedFILE file_closer(fp); - if (fseek(fp, root_page_offset, SEEK_SET) == -1) - return false; - - for (size_t i = 0; i < page_size; ++i) { - fputc('!', fp); // Character experimentally verified. - } - - if (!file_util::CloseFile(fp)) - return false; - - file_closer.reset(); - return true; -} - -// Run a select against the named table to test for corruption. -bool TestCorruptSqliteTable(const FilePath& filename, - const std::string& table_name) { - sql::Connection db; - if (!db.Open(filename)) - return false; - - const std::string sql = - StringPrintf("SELECT COUNT(*) FROM \"%s\"", table_name.c_str()); - sql::Statement stmt(db.GetUniqueStatement(sql.c_str())); - if (stmt.Step()) - return false; - - return db.GetErrorCode() == SQLITE_CORRUPT; -} - -} // namespace - -class SafeBrowsingDatabaseBloomTest : public PlatformTest { - public: - virtual void SetUp() { - PlatformTest::SetUp(); - - // Setup a database in a temporary directory. - ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); - database_.reset(new SafeBrowsingDatabaseBloom); - database_filename_ = - temp_dir_.path().AppendASCII("SafeBrowsingTestDatabase"); - database_->Init(database_filename_); - } - - virtual void TearDown() { - database_.reset(); - - PlatformTest::TearDown(); - } - - void GetListsInfo(std::vector<SBListChunkRanges>* lists) { - lists->clear(); - EXPECT_TRUE(database_->UpdateStarted(lists)); - database_->UpdateFinished(true); - } - - // Helper function to do an AddDel or SubDel command. - void DelChunk(const std::string& list, - int chunk_id, - bool is_sub_del) { - std::vector<SBChunkDelete> deletes; - SBChunkDelete chunk_delete; - chunk_delete.list_name = list; - chunk_delete.is_sub_del = is_sub_del; - chunk_delete.chunk_del.push_back(ChunkRange(chunk_id)); - deletes.push_back(chunk_delete); - database_->DeleteChunks(deletes); - } - - void AddDelChunk(const std::string& list, int chunk_id) { - DelChunk(list, chunk_id, false); - } - - void SubDelChunk(const std::string& list, int chunk_id) { - DelChunk(list, chunk_id, true); - } - - // Utility function for setting up the database for the caching test. - void PopulateDatabaseForCacheTest(); - - scoped_ptr<SafeBrowsingDatabaseBloom> database_; - FilePath database_filename_; - ScopedTempDir temp_dir_; -}; - -// Tests retrieving list name information. -TEST_F(SafeBrowsingDatabaseBloomTest, ListName) { - SBChunkList chunks; - - // Insert some malware add chunks. - SBChunkHost host; - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); - host.entry->set_chunk_id(1); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/malware.html")); - SBChunk chunk; - chunk.chunk_number = 1; - chunk.is_add = true; - chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(chunk); - std::vector<SBListChunkRanges> lists; - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - - host.host = Sha256Prefix("www.foo.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); - host.entry->set_chunk_id(2); - host.entry->SetPrefixAt(0, Sha256Prefix("www.foo.com/malware.html")); - chunk.chunk_number = 2; - chunk.is_add = true; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(chunk); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - - host.host = Sha256Prefix("www.whatever.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); - host.entry->set_chunk_id(3); - host.entry->SetPrefixAt(0, Sha256Prefix("www.whatever.com/malware.html")); - chunk.chunk_number = 3; - chunk.is_add = true; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(chunk); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - GetListsInfo(&lists); - EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); - EXPECT_EQ(lists[0].adds, "1-3"); - EXPECT_TRUE(lists[0].subs.empty()); - - // Insert a malware sub chunk. - host.host = Sha256Prefix("www.subbed.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1); - host.entry->set_chunk_id(7); - host.entry->SetChunkIdAtPrefix(0, 19); - host.entry->SetPrefixAt(0, Sha256Prefix("www.subbed.com/notevil1.html")); - chunk.chunk_number = 7; - chunk.is_add = false; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(chunk); - - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - GetListsInfo(&lists); - EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); - EXPECT_EQ(lists[0].adds, "1-3"); - EXPECT_EQ(lists[0].subs, "7"); - if (lists.size() == 2) { - // Old style database won't have the second entry since it creates the lists - // when it receives an update containing that list. The new bloom filter - // based database has these values hard coded. - EXPECT_TRUE(lists[1].name == safe_browsing_util::kPhishingList); - EXPECT_TRUE(lists[1].adds.empty()); - EXPECT_TRUE(lists[1].subs.empty()); - } - - // Add a phishing add chunk. - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); - host.entry->set_chunk_id(47); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); - chunk.chunk_number = 47; - chunk.is_add = true; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(chunk); - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kPhishingList, chunks); - - // Insert some phishing sub chunks. - host.host = Sha256Prefix("www.phishy.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1); - host.entry->set_chunk_id(200); - host.entry->SetChunkIdAtPrefix(0, 1999); - host.entry->SetPrefixAt(0, Sha256Prefix("www.phishy.com/notevil1.html")); - chunk.chunk_number = 200; - chunk.is_add = false; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(chunk); - database_->InsertChunks(safe_browsing_util::kPhishingList, chunks); - - host.host = Sha256Prefix("www.phishy2.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1); - host.entry->set_chunk_id(201); - host.entry->SetChunkIdAtPrefix(0, 1999); - host.entry->SetPrefixAt(0, Sha256Prefix("www.phishy2.com/notevil1.html")); - chunk.chunk_number = 201; - chunk.is_add = false; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(chunk); - database_->InsertChunks(safe_browsing_util::kPhishingList, chunks); - database_->UpdateFinished(true); - - GetListsInfo(&lists); - EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); - EXPECT_EQ(lists[0].adds, "1-3"); - EXPECT_EQ(lists[0].subs, "7"); - EXPECT_TRUE(lists[1].name == safe_browsing_util::kPhishingList); - EXPECT_EQ(lists[1].adds, "47"); - EXPECT_EQ(lists[1].subs, "200-201"); -} - -// Checks database reading and writing. -TEST_F(SafeBrowsingDatabaseBloomTest, Database) { - SBChunkList chunks; - - // Add a simple chunk with one hostkey. - SBChunkHost host; - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); - host.entry->set_chunk_id(1); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); - host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/malware.html")); - - SBChunk chunk; - chunk.chunk_number = 1; - chunk.is_add = true; - chunk.hosts.push_back(host); - - chunks.clear(); - chunks.push_back(chunk); - std::vector<SBListChunkRanges> lists; - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - - // Add another chunk with two different hostkeys. - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); - host.entry->set_chunk_id(2); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html")); - host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/notevil2.html")); - - chunk.chunk_number = 2; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - - host.host = Sha256Prefix("www.good.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); - host.entry->set_chunk_id(2); - host.entry->SetPrefixAt(0, Sha256Prefix("www.good.com/good1.html")); - host.entry->SetPrefixAt(1, Sha256Prefix("www.good.com/good2.html")); - - chunk.hosts.push_back(host); - - chunks.clear(); - chunks.push_back(chunk); - - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - - // and a chunk with an IP-based host - host.host = Sha256Prefix("192.168.0.1/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); - host.entry->set_chunk_id(3); - host.entry->SetPrefixAt(0, Sha256Prefix("192.168.0.1/malware.html")); - - chunk.chunk_number = 3; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - - chunks.clear(); - chunks.push_back(chunk); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - // Make sure they were added correctly. - GetListsInfo(&lists); - EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); - EXPECT_EQ(lists[0].adds, "1-3"); - EXPECT_TRUE(lists[0].subs.empty()); - - const Time now = Time::Now(); - std::vector<SBFullHashResult> full_hashes; - std::vector<SBPrefix> prefix_hits; - std::string matching_list; - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - EXPECT_EQ(prefix_hits[0], Sha256Prefix("www.evil.com/phishing.html")); - EXPECT_EQ(prefix_hits.size(), 1U); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/malware.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/notevil1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/notevil2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.good.com/good1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.good.com/good2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://192.168.0.1/malware.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.evil.com/"), - &matching_list, &prefix_hits, - &full_hashes, now)); - EXPECT_TRUE(prefix_hits.empty()); - - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.evil.com/robots.txt"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - - - // Attempt to re-add the first chunk (should be a no-op). - // see bug: http://code.google.com/p/chromium/issues/detail?id=4522 - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); - host.entry->set_chunk_id(1); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); - host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/malware.html")); - - chunk.chunk_number = 1; - chunk.is_add = true; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - - chunks.clear(); - chunks.push_back(chunk); - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - GetListsInfo(&lists); - EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); - EXPECT_EQ(lists[0].adds, "1-3"); - EXPECT_TRUE(lists[0].subs.empty()); - - - // Test removing a single prefix from the add chunk. - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1); - host.entry->set_chunk_id(2); - host.entry->SetChunkIdAtPrefix(0, 2); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html")); - - chunk.is_add = false; - chunk.chunk_number = 4; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - - chunks.clear(); - chunks.push_back(chunk); - - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - EXPECT_EQ(prefix_hits[0], Sha256Prefix("www.evil.com/phishing.html")); - EXPECT_EQ(prefix_hits.size(), 1U); - - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.evil.com/notevil1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - EXPECT_TRUE(prefix_hits.empty()); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/notevil2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.good.com/good1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.good.com/good2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - GetListsInfo(&lists); - EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); - EXPECT_EQ(lists[0].subs, "4"); - - // Test the same sub chunk again. This should be a no-op. - // see bug: http://code.google.com/p/chromium/issues/detail?id=4522 - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1); - host.entry->set_chunk_id(2); - host.entry->SetChunkIdAtPrefix(0, 2); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html")); - - chunk.is_add = false; - chunk.chunk_number = 4; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - - chunks.clear(); - chunks.push_back(chunk); - - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - GetListsInfo(&lists); - EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); - EXPECT_EQ(lists[0].subs, "4"); - - // Test removing all the prefixes from an add chunk. - EXPECT_TRUE(database_->UpdateStarted(&lists)); - AddDelChunk(safe_browsing_util::kMalwareList, 2); - database_->UpdateFinished(true); - - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.evil.com/notevil2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.good.com/good1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.good.com/good2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - GetListsInfo(&lists); - EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); - EXPECT_EQ(lists[0].adds, "1,3"); - EXPECT_EQ(lists[0].subs, "4"); - - // The adddel command exposed a bug in the transaction code where any - // transaction after it would fail. Add a dummy entry and remove it to - // make sure the transcation works fine. - host.host = Sha256Prefix("www.redherring.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); - host.entry->set_chunk_id(1); - host.entry->SetPrefixAt(0, Sha256Prefix("www.redherring.com/index.html")); - - chunk.is_add = true; - chunk.chunk_number = 44; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - - chunks.clear(); - chunks.push_back(chunk); - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - - // Now remove the dummy entry. If there are any problems with the - // transactions, asserts will fire. - AddDelChunk(safe_browsing_util::kMalwareList, 44); - - // Test the subdel command. - SubDelChunk(safe_browsing_util::kMalwareList, 4); - database_->UpdateFinished(true); - - GetListsInfo(&lists); - EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); - EXPECT_EQ(lists[0].adds, "1,3"); - EXPECT_EQ(lists[0].subs, ""); - - // Test a sub command coming in before the add. - host.host = Sha256Prefix("www.notevilanymore.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2); - host.entry->set_chunk_id(10); - host.entry->SetPrefixAt(0, Sha256Prefix("www.notevilanymore.com/index.html")); - host.entry->SetChunkIdAtPrefix(0, 10); - host.entry->SetPrefixAt(1, Sha256Prefix("www.notevilanymore.com/good.html")); - host.entry->SetChunkIdAtPrefix(1, 10); - - chunk.is_add = false; - chunk.chunk_number = 5; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - - chunks.clear(); - chunks.push_back(chunk); - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - EXPECT_FALSE(database_->ContainsUrl( - GURL("http://www.notevilanymore.com/index.html"), - &matching_list, &prefix_hits, &full_hashes, now)); - - // Now insert the tardy add chunk. - host.host = Sha256Prefix("www.notevilanymore.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); - host.entry->SetPrefixAt(0, Sha256Prefix("www.notevilanymore.com/index.html")); - host.entry->SetPrefixAt(1, Sha256Prefix("www.notevilanymore.com/good.html")); - - chunk.is_add = true; - chunk.chunk_number = 10; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - - chunks.clear(); - chunks.push_back(chunk); - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - EXPECT_FALSE(database_->ContainsUrl( - GURL("http://www.notevilanymore.com/index.html"), - &matching_list, &prefix_hits, &full_hashes, now)); - - EXPECT_FALSE(database_->ContainsUrl( - GURL("http://www.notevilanymore.com/good.html"), - &matching_list, &prefix_hits, &full_hashes, now)); -} - - -// Test adding zero length chunks to the database. -TEST_F(SafeBrowsingDatabaseBloomTest, ZeroSizeChunk) { - SBChunkList chunks; - - // Populate with a couple of normal chunks. - SBChunkHost host; - host.host = Sha256Prefix("www.test.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); - host.entry->SetPrefixAt(0, Sha256Prefix("www.test.com/test1.html")); - host.entry->SetPrefixAt(1, Sha256Prefix("www.test.com/test2.html")); - host.entry->set_chunk_id(1); - - SBChunk chunk; - chunk.is_add = true; - chunk.chunk_number = 1; - chunk.hosts.push_back(host); - - chunks.clear(); - chunks.push_back(chunk); - - host.host = Sha256Prefix("www.random.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); - host.entry->SetPrefixAt(0, Sha256Prefix("www.random.com/random1.html")); - host.entry->SetPrefixAt(1, Sha256Prefix("www.random.com/random2.html")); - host.entry->set_chunk_id(10); - chunk.chunk_number = 10; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - chunks.push_back(chunk); - - std::vector<SBListChunkRanges> lists; - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - // Add an empty ADD and SUB chunk. - GetListsInfo(&lists); - EXPECT_EQ(lists[0].adds, "1,10"); - - SBChunk empty_chunk; - empty_chunk.chunk_number = 19; - empty_chunk.is_add = true; - chunks.clear(); - chunks.push_back(empty_chunk); - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - chunks.clear(); - empty_chunk.chunk_number = 7; - empty_chunk.is_add = false; - chunks.push_back(empty_chunk); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - GetListsInfo(&lists); - EXPECT_EQ(lists[0].adds, "1,10,19"); - EXPECT_EQ(lists[0].subs, "7"); - - // Add an empty chunk along with a couple that contain data. This should - // result in the chunk range being reduced in size. - host.host = Sha256Prefix("www.notempty.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); - host.entry->SetPrefixAt(0, Sha256Prefix("www.notempty.com/full1.html")); - host.entry->set_chunk_id(20); - empty_chunk.chunk_number = 20; - empty_chunk.is_add = true; - empty_chunk.hosts.clear(); - empty_chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(empty_chunk); - - empty_chunk.chunk_number = 21; - empty_chunk.is_add = true; - empty_chunk.hosts.clear(); - chunks.push_back(empty_chunk); - - host.host = Sha256Prefix("www.notempty.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); - host.entry->SetPrefixAt(0, Sha256Prefix("www.notempty.com/full2.html")); - host.entry->set_chunk_id(22); - empty_chunk.hosts.clear(); - empty_chunk.hosts.push_back(host); - empty_chunk.chunk_number = 22; - empty_chunk.is_add = true; - chunks.push_back(empty_chunk); - - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - const Time now = Time::Now(); - std::vector<SBFullHashResult> full_hashes; - std::vector<SBPrefix> prefix_hits; - std::string matching_list; - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.notempty.com/full1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.notempty.com/full2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - GetListsInfo(&lists); - EXPECT_EQ(lists[0].adds, "1,10,19-22"); - EXPECT_EQ(lists[0].subs, "7"); - - // Handle AddDel and SubDel commands for empty chunks. - EXPECT_TRUE(database_->UpdateStarted(&lists)); - AddDelChunk(safe_browsing_util::kMalwareList, 21); - database_->UpdateFinished(true); - - GetListsInfo(&lists); - EXPECT_EQ(lists[0].adds, "1,10,19-20,22"); - EXPECT_EQ(lists[0].subs, "7"); - - EXPECT_TRUE(database_->UpdateStarted(&lists)); - SubDelChunk(safe_browsing_util::kMalwareList, 7); - database_->UpdateFinished(true); - - GetListsInfo(&lists); - EXPECT_EQ(lists[0].adds, "1,10,19-20,22"); - EXPECT_EQ(lists[0].subs, ""); -} - -// Utility function for setting up the database for the caching test. -void SafeBrowsingDatabaseBloomTest::PopulateDatabaseForCacheTest() { - // Add a simple chunk with one hostkey and cache it. - SBChunkHost host; - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); - host.entry->set_chunk_id(1); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); - host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/malware.html")); - - SBChunk chunk; - chunk.chunk_number = 1; - chunk.is_add = true; - chunk.hosts.push_back(host); - - SBChunkList chunks; - std::vector<SBListChunkRanges> lists; - chunks.push_back(chunk); - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - // Add the GetHash results to the cache. - SBFullHashResult full_hash; - full_hash.hash = Sha256Hash("www.evil.com/phishing.html"); - full_hash.list_name = safe_browsing_util::kMalwareList; - full_hash.add_chunk_id = 1; - - std::vector<SBFullHashResult> results; - results.push_back(full_hash); - - full_hash.hash = Sha256Hash("www.evil.com/malware.html"); - results.push_back(full_hash); - - std::vector<SBPrefix> prefixes; - database_->CacheHashResults(prefixes, results); -} - -TEST_F(SafeBrowsingDatabaseBloomTest, HashCaching) { - PopulateDatabaseForCacheTest(); - - // We should have both full hashes in the cache. - SafeBrowsingDatabaseBloom::HashCache* hash_cache = database_->hash_cache(); - EXPECT_EQ(hash_cache->size(), 2U); - - // Test the cache lookup for the first prefix. - std::string listname; - std::vector<SBPrefix> prefixes; - std::vector<SBFullHashResult> full_hashes; - database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &listname, &prefixes, &full_hashes, Time::Now()); - EXPECT_EQ(full_hashes.size(), 1U); - EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, - Sha256Hash("www.evil.com/phishing.html"))); - - prefixes.clear(); - full_hashes.clear(); - - // Test the cache lookup for the second prefix. - database_->ContainsUrl(GURL("http://www.evil.com/malware.html"), - &listname, &prefixes, &full_hashes, Time::Now()); - EXPECT_EQ(full_hashes.size(), 1U); - EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, - Sha256Hash("www.evil.com/malware.html"))); - - prefixes.clear(); - full_hashes.clear(); - - // Test removing a prefix via a sub chunk. - SBChunkHost host; - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1); - host.entry->set_chunk_id(1); - host.entry->SetChunkIdAtPrefix(0, 1); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); - - SBChunk chunk; - chunk.chunk_number = 2; - chunk.is_add = false; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - SBChunkList chunks; - chunks.push_back(chunk); - - std::vector<SBListChunkRanges> lists; - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - // This prefix should still be there. - database_->ContainsUrl(GURL("http://www.evil.com/malware.html"), - &listname, &prefixes, &full_hashes, Time::Now()); - EXPECT_EQ(full_hashes.size(), 1U); - EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, - Sha256Hash("www.evil.com/malware.html"))); - - prefixes.clear(); - full_hashes.clear(); - - // This prefix should be gone. - database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &listname, &prefixes, &full_hashes, Time::Now()); - EXPECT_TRUE(full_hashes.empty()); - - prefixes.clear(); - full_hashes.clear(); - - // Test that an AddDel for the original chunk removes the last cached entry. - EXPECT_TRUE(database_->UpdateStarted(&lists)); - AddDelChunk(safe_browsing_util::kMalwareList, 1); - database_->UpdateFinished(true); - database_->ContainsUrl(GURL("http://www.evil.com/malware.html"), - &listname, &prefixes, &full_hashes, Time::Now()); - EXPECT_TRUE(full_hashes.empty()); - EXPECT_TRUE(database_->hash_cache()->empty()); - - prefixes.clear(); - full_hashes.clear(); - - // Test that the cache won't return expired values. First we have to adjust - // the cached entries' received time to make them older, since the database - // cache insert uses Time::Now(). First, store some entries. - PopulateDatabaseForCacheTest(); - hash_cache = database_->hash_cache(); - EXPECT_EQ(hash_cache->size(), 2U); - - // Now adjust one of the entries times to be in the past. - base::Time expired = base::Time::Now() - base::TimeDelta::FromMinutes(60); - const SBPrefix key = Sha256Prefix("www.evil.com/malware.html"); - SafeBrowsingDatabaseBloom::HashList& entries = (*hash_cache)[key]; - SafeBrowsingDatabaseBloom::HashCacheEntry entry = entries.front(); - entries.pop_front(); - entry.received = expired; - entries.push_back(entry); - - database_->ContainsUrl(GURL("http://www.evil.com/malware.html"), - &listname, &prefixes, &full_hashes, expired); - EXPECT_TRUE(full_hashes.empty()); - - // This entry should still exist. - database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &listname, &prefixes, &full_hashes, expired); - EXPECT_EQ(full_hashes.size(), 1U); - - - // Testing prefix miss caching. First, we clear out the existing database, - // Since PopulateDatabaseForCacheTest() doesn't handle adding duplicate - // chunks. - EXPECT_TRUE(database_->UpdateStarted(&lists)); - AddDelChunk(safe_browsing_util::kMalwareList, 1); - database_->UpdateFinished(true); - - std::vector<SBPrefix> prefix_misses; - std::vector<SBFullHashResult> empty_full_hash; - prefix_misses.push_back(Sha256Prefix("http://www.bad.com/malware.html")); - prefix_misses.push_back(Sha256Prefix("http://www.bad.com/phishing.html")); - database_->CacheHashResults(prefix_misses, empty_full_hash); - - // Prefixes with no full results are misses. - EXPECT_EQ(database_->prefix_miss_cache()->size(), 2U); - - // Update the database. - PopulateDatabaseForCacheTest(); - - // Prefix miss cache should be cleared. - EXPECT_TRUE(database_->prefix_miss_cache()->empty()); - - // Cache a GetHash miss for a particular prefix, and even though the prefix is - // in the database, it is flagged as a miss so looking up the associated URL - // will not succeed. - prefixes.clear(); - full_hashes.clear(); - prefix_misses.clear(); - empty_full_hash.clear(); - prefix_misses.push_back(Sha256Prefix("www.evil.com/phishing.html")); - database_->CacheHashResults(prefix_misses, empty_full_hash); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &listname, &prefixes, - &full_hashes, Time::Now())); - - prefixes.clear(); - full_hashes.clear(); - - // Test receiving a full add chunk. - host.host = Sha256Prefix("www.fullevil.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_FULL_HASH, 2); - host.entry->set_chunk_id(20); - host.entry->SetFullHashAt(0, Sha256Hash("www.fullevil.com/bad1.html")); - host.entry->SetFullHashAt(1, Sha256Hash("www.fullevil.com/bad2.html")); - - chunk.chunk_number = 20; - chunk.is_add = true; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(chunk); - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad1.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); - EXPECT_EQ(full_hashes.size(), 1U); - EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, - Sha256Hash("www.fullevil.com/bad1.html"))); - prefixes.clear(); - full_hashes.clear(); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad2.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); - EXPECT_EQ(full_hashes.size(), 1U); - EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, - Sha256Hash("www.fullevil.com/bad2.html"))); - prefixes.clear(); - full_hashes.clear(); - - // Test receiving a full sub chunk, which will remove one of the full adds. - host.host = Sha256Prefix("www.fullevil.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_FULL_HASH, 1); - host.entry->set_chunk_id(200); - host.entry->SetChunkIdAtPrefix(0, 20); - host.entry->SetFullHashAt(0, Sha256Hash("www.fullevil.com/bad1.html")); - - chunk.chunk_number = 200; - chunk.is_add = false; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(chunk); - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad1.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); - EXPECT_TRUE(full_hashes.empty()); - - // There should be one remaining full add. - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad2.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); - EXPECT_EQ(full_hashes.size(), 1U); - EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, - Sha256Hash("www.fullevil.com/bad2.html"))); - prefixes.clear(); - full_hashes.clear(); - - // Now test an AddDel for the remaining full add. - EXPECT_TRUE(database_->UpdateStarted(&lists)); - AddDelChunk(safe_browsing_util::kMalwareList, 20); - database_->UpdateFinished(true); - - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad1.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad2.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); -} - -// Test that corrupt databases are appropriately handled, even if the -// corruption is detected in the midst of the update. -// TODO(shess): Disabled until ScopedLogMessageIgnorer resolved. -// http://crbug.com/56448 -TEST_F(SafeBrowsingDatabaseBloomTest, DISABLED_SqliteCorruptionHandling) { - // Re-create the database in a captive message loop so that we can - // influence task-posting. - database_.reset(); - MessageLoop loop(MessageLoop::TYPE_DEFAULT); - database_.reset(new SafeBrowsingDatabaseBloom); - database_->Init(database_filename_); - - // This will cause an empty database to be created. - std::vector<SBListChunkRanges> lists; - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->UpdateFinished(true); - - // Create a sub chunk to insert. - SBChunkList chunks; - SBChunk chunk; - SBChunkHost host; - host.host = Sha256Prefix("www.subbed.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1); - host.entry->set_chunk_id(7); - host.entry->SetChunkIdAtPrefix(0, 19); - host.entry->SetPrefixAt(0, Sha256Prefix("www.subbed.com/notevil1.html")); - chunk.chunk_number = 7; - chunk.is_add = false; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - chunks.clear(); - chunks.push_back(chunk); - - // Corrupt the |sub_prefix| table. - ASSERT_TRUE(CorruptSqliteTable(database_filename_, "sub_prefix")); - - { - // Verify the corruption. This will DCHECK, so suppress the crash. - ScopedLogMessageIgnorer ignorer; - LOG(INFO) << "Expect failed check on: database disk image is malformed"; - ASSERT_TRUE(TestCorruptSqliteTable(database_filename_, "sub_prefix")); - - // Start an update. The insert will fail due to corruption. - EXPECT_TRUE(database_->UpdateStarted(&lists)); - LOG(INFO) << "Expect failed check on: sqlite error 11"; - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - - // TODO(shess): Would prefer to test that the database is still - // corrupt at this point, but the database is locked. - - // Flush through the corruption-handler task. - LOG(INFO) << "Expect failed check on: SafeBrowsing database reset"; - MessageLoop::current()->RunAllPending(); - database_->UpdateFinished(true); - } - - // Database was re-created, and is now not corrupt. - ASSERT_FALSE(TestCorruptSqliteTable(database_filename_, "sub_prefix")); - - // This update succeeds. - EXPECT_TRUE(database_->UpdateStarted(&lists)); - database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); - database_->UpdateFinished(true); - - database_.reset(); -} - -namespace { - -void PrintStat(const char* name) { - int value = base::StatsTable::current()->GetCounterValue(name); - SB_DLOG(INFO) << StringPrintf("%s %d", name, value); -} - -FilePath GetFullSBDataPath(const FilePath& path) { - FilePath full_path; - if (!PathService::Get(base::DIR_SOURCE_ROOT, &full_path)) { - ADD_FAILURE() << "Unable to find test DIR_SOURCE_ROOT for test data."; - return FilePath(); - } - full_path = full_path.AppendASCII("chrome"); - full_path = full_path.AppendASCII("test"); - full_path = full_path.AppendASCII("data"); - full_path = full_path.AppendASCII("safe_browsing"); - full_path = full_path.Append(path); - return full_path; -} - -// TODO(shess): The clients of this structure manually manage -// |chunks|. Improve this code to apply the RAII idiom to manage -// |chunks|. -struct ChunksInfo { - SBChunkList* chunks; // weak - std::string listname; -}; - -// TODO(shess): Move this into SafeBrowsingDatabaseTest. -void PerformUpdate(SafeBrowsingDatabaseBloom* database, - const FilePath& database_filename, - const FilePath& initial_db, - const std::vector<ChunksInfo>& chunks, - const std::vector<SBChunkDelete>& deletes) { - base::IoCounters before, after; - - if (!initial_db.empty()) { - FilePath full_initial_db = GetFullSBDataPath(initial_db); - ASSERT_FALSE(full_initial_db.empty()); - ASSERT_TRUE(file_util::PathExists(full_initial_db)); - ASSERT_TRUE(file_util::CopyFile(full_initial_db, database_filename)); - } - - Time before_time = Time::Now(); - base::ProcessHandle handle = base::Process::Current().handle(); - scoped_ptr<base::ProcessMetrics> metric( -#if !defined(OS_MACOSX) - base::ProcessMetrics::CreateProcessMetrics(handle)); -#else - // Getting stats only for the current process is enough, so NULL is fine. - base::ProcessMetrics::CreateProcessMetrics(handle, NULL)); -#endif - // Get IO stats. These are currently not supported on Mac, and may not be - // available for Linux, so we check the result and only show IO stats if - // they are available. - bool gotIOCounters = metric->GetIOCounters(&before); - - std::vector<SBListChunkRanges> lists; - EXPECT_TRUE(database->UpdateStarted(&lists)); - database->DeleteChunks(deletes); - for (size_t i = 0; i < chunks.size(); ++i) - database->InsertChunks(chunks[i].listname, *chunks[i].chunks); - - database->UpdateFinished(true); - gotIOCounters = gotIOCounters && metric->GetIOCounters(&after); - - if (gotIOCounters) { - SB_DLOG(INFO) << StringPrintf("I/O Read Bytes: %" PRIu64, - after.ReadTransferCount - before.ReadTransferCount); - SB_DLOG(INFO) << StringPrintf("I/O Write Bytes: %" PRIu64, - after.WriteTransferCount - before.WriteTransferCount); - SB_DLOG(INFO) << StringPrintf("I/O Reads: %" PRIu64, - after.ReadOperationCount - before.ReadOperationCount); - SB_DLOG(INFO) << StringPrintf("I/O Writes: %" PRIu64, - after.WriteOperationCount - before.WriteOperationCount); - } - SB_DLOG(INFO) << StringPrintf("Finished in %" PRId64 " ms", - (Time::Now() - before_time).InMilliseconds()); - - PrintStat("c:SB.HostSelect"); - PrintStat("c:SB.HostSelectForBloomFilter"); - PrintStat("c:SB.HostReplace"); - PrintStat("c:SB.HostInsert"); - PrintStat("c:SB.HostDelete"); - PrintStat("c:SB.ChunkSelect"); - PrintStat("c:SB.ChunkInsert"); - PrintStat("c:SB.ChunkDelete"); - PrintStat("c:SB.TransactionCommit"); -} - -void UpdateDatabase(SafeBrowsingDatabaseBloom* database, - const FilePath& database_filename, - const FilePath& initial_db, - const FilePath& response_path, - const FilePath& updates_path) { - // First we read the chunks from disk, so that this isn't counted in IO bytes. - std::vector<ChunksInfo> chunks; - - SafeBrowsingProtocolParser parser; - if (!updates_path.empty()) { - FilePath data_dir = GetFullSBDataPath(updates_path); - ASSERT_FALSE(data_dir.empty()); - ASSERT_TRUE(file_util::PathExists(data_dir)); - file_util::FileEnumerator file_enum(data_dir, false, - file_util::FileEnumerator::FILES); - while (true) { - FilePath file = file_enum.Next(); - if (file.empty()) - break; - - int64 size64; - bool result = file_util::GetFileSize(file, &size64); - ASSERT_TRUE(result); - - int size = static_cast<int>(size64); - scoped_array<char> data(new char[size]); - file_util::ReadFile(file, data.get(), size); - - ChunksInfo info; - info.chunks = new SBChunkList; - - bool re_key; - result = parser.ParseChunk(data.get(), size, "", "", - &re_key, info.chunks); - ASSERT_TRUE(result); - - info.listname = WideToASCII(file.BaseName().ToWStringHack()); - size_t index = info.listname.find('_'); // Get rid fo the _s or _a. - info.listname.resize(index); - info.listname.erase(0, 3); // Get rid of the 000 etc. - - chunks.push_back(info); - } - } - - std::vector<SBChunkDelete> deletes; - if (!response_path.empty()) { - std::string update; - FilePath full_response_path = GetFullSBDataPath(response_path); - ASSERT_FALSE(full_response_path.empty()); - ASSERT_TRUE(file_util::PathExists(full_response_path)); - if (file_util::ReadFileToString(full_response_path, &update)) { - int next_update; - bool result, rekey, reset; - std::vector<ChunkUrl> urls; - result = parser.ParseUpdate(update.c_str(), - static_cast<int>(update.length()), - "", - &next_update, - &rekey, - &reset, - &deletes, - &urls); - ASSERT_TRUE(result); - if (!updates_path.empty()) - ASSERT_EQ(urls.size(), chunks.size()); - } - } - - PerformUpdate(database, database_filename, initial_db, chunks, deletes); - - // TODO(shess): Make ChunksInfo handle this via scoping. - for (std::vector<ChunksInfo>::iterator iter = chunks.begin(); - iter != chunks.end(); ++iter) { - delete iter->chunks; - iter->chunks = NULL; - } -} - -// Construct the shared base path used by the GetOld* functions. -FilePath BasePath() { - return FilePath(FILE_PATH_LITERAL("old")); -} - -FilePath GetOldSafeBrowsingPath() { - return BasePath().AppendASCII("SafeBrowsing"); -} - -FilePath GetOldResponsePath() { - return BasePath().AppendASCII("response"); -} - -FilePath GetOldUpdatesPath() { - return BasePath().AppendASCII("updates"); -} - -} // namespace - -// Counts the IO needed for the initial update of a database. -// test\data\safe_browsing\download_update.py was used to fetch the add/sub -// chunks that are read, in order to get repeatable runs. -TEST_F(SafeBrowsingDatabaseBloomTest, DatabaseInitialIO) { - UpdateDatabase(database_.get(), database_filename_, - FilePath(), FilePath(), FilePath().AppendASCII("initial")); -} - -// Counts the IO needed to update a month old database. -// The data files were generated by running "..\download_update.py postdata" -// in the "safe_browsing\old" directory. -TEST_F(SafeBrowsingDatabaseBloomTest, DatabaseOldIO) { - UpdateDatabase(database_.get(), database_filename_, GetOldSafeBrowsingPath(), - GetOldResponsePath(), GetOldUpdatesPath()); -} - -// Like DatabaseOldIO but only the deletes. -TEST_F(SafeBrowsingDatabaseBloomTest, DatabaseOldDeletesIO) { - UpdateDatabase(database_.get(), database_filename_, - GetOldSafeBrowsingPath(), GetOldResponsePath(), FilePath()); -} - -// Like DatabaseOldIO but only the updates. -TEST_F(SafeBrowsingDatabaseBloomTest, DatabaseOldUpdatesIO) { - UpdateDatabase(database_.get(), database_filename_, - GetOldSafeBrowsingPath(), FilePath(), GetOldUpdatesPath()); -} - -// Does a a lot of addel's on very large chunks. -TEST_F(SafeBrowsingDatabaseBloomTest, DatabaseOldLotsofDeletesIO) { - std::vector<ChunksInfo> chunks; - std::vector<SBChunkDelete> deletes; - SBChunkDelete del; - del.is_sub_del = false; - del.list_name = safe_browsing_util::kMalwareList; - del.chunk_del.push_back(ChunkRange(3539, 3579)); - deletes.push_back(del); - PerformUpdate(database_.get(), database_filename_, - GetOldSafeBrowsingPath(), chunks, deletes); -} diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index f5d3f7d..e7e772a 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -2568,8 +2568,6 @@ 'browser/safe_browsing/safe_browsing_blocking_page.h', 'browser/safe_browsing/safe_browsing_database.cc', 'browser/safe_browsing/safe_browsing_database.h', - 'browser/safe_browsing/safe_browsing_database_bloom.cc', - 'browser/safe_browsing/safe_browsing_database_bloom.h', 'browser/safe_browsing/safe_browsing_service.cc', 'browser/safe_browsing/safe_browsing_service.h', 'browser/safe_browsing/safe_browsing_store.cc', diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi index f1e951e..2fa3101 100644 --- a/chrome/chrome_tests.gypi +++ b/chrome/chrome_tests.gypi @@ -1406,7 +1406,6 @@ 'browser/safe_browsing/protocol_manager_unittest.cc', 'browser/safe_browsing/protocol_parser_unittest.cc', 'browser/safe_browsing/safe_browsing_blocking_page_unittest.cc', - 'browser/safe_browsing/safe_browsing_database_bloom_unittest.cc', 'browser/safe_browsing/safe_browsing_database_unittest.cc', 'browser/safe_browsing/safe_browsing_store_file_unittest.cc', 'browser/safe_browsing/safe_browsing_store_sqlite_unittest.cc', diff --git a/chrome/common/chrome_switches.cc b/chrome/common/chrome_switches.cc index a60f712..462ff5d 100644 --- a/chrome/common/chrome_switches.cc +++ b/chrome/common/chrome_switches.cc @@ -948,12 +948,6 @@ const char kRestoreLastSession[] = "restore-last-session"; // Runs the plugin processes inside the sandbox. const char kSafePlugins[] = "safe-plugins"; -// Select the safe-browsing database storage. "old" for the original -// SQLite-based SafeBrowsingDatabaseBloom, "newsqlite" for the new -// SQLite-based SafeBrowsingDatabase+SafeBrowsingStoreSQLite, and -// "newfile" for SafeBrowsingDatabase+SafeBrowsingStoreFile. -const char kSafeBrowsingDatabaseStore[] = "safe-browsing-database-store"; - // URL prefix used by safebrowsing to fetch hash, download data and // report malware. const char kSbInfoURLPrefix[] = "safebrowsing-info-url-prefix"; diff --git a/chrome/common/chrome_switches.h b/chrome/common/chrome_switches.h index d4d1500..a95b9f5 100644 --- a/chrome/common/chrome_switches.h +++ b/chrome/common/chrome_switches.h @@ -268,7 +268,6 @@ extern const char kRendererProcess[]; extern const char kRendererStartupDialog[]; extern const char kRestoreLastSession[]; extern const char kSafePlugins[]; -extern const char kSafeBrowsingDatabaseStore[]; extern const char kSbInfoURLPrefix[]; extern const char kSbMacKeyURLPrefix[]; extern const char kSbDisableAutoUpdate[]; |