diff options
author | paulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-09-20 02:03:08 +0000 |
---|---|---|
committer | paulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-09-20 02:03:08 +0000 |
commit | 54d80bb02aa71eb957a863109392cd97ea1e2496 (patch) | |
tree | 31e7c397ba403a9e81c15eb1755a16b8e1083ad2 | |
parent | 8c8824bdb4820a4107ec94e5c3d12981bdc4ae7f (diff) | |
download | chromium_src-54d80bb02aa71eb957a863109392cd97ea1e2496.zip chromium_src-54d80bb02aa71eb957a863109392cd97ea1e2496.tar.gz chromium_src-54d80bb02aa71eb957a863109392cd97ea1e2496.tar.bz2 |
Create a SafeBrowsing database interface to more easily
facilitate alternate implementations.
The current SafeBrowsingDatabase code is moved to *_impl files.
Adding a new implementation can be done via command line flags
in the SafeBrowsingDatabase::Create factory method.
Review URL: http://codereview.chromium.org/3162
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@2434 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/browser.vcproj | 8 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/bloom_filter.h | 4 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/protocol_manager.cc | 1 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database.cc | 1231 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database.h | 266 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database_impl.cc | 1216 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database_impl.h | 283 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database_impl_unittest.cc | 228 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database_unittest.cc | 367 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_service.cc | 2 | ||||
-rw-r--r-- | chrome/test/unit/unittests.vcproj | 4 |
11 files changed, 1897 insertions, 1713 deletions
diff --git a/chrome/browser/browser.vcproj b/chrome/browser/browser.vcproj index 004701f..fc3c31b 100644 --- a/chrome/browser/browser.vcproj +++ b/chrome/browser/browser.vcproj @@ -1926,6 +1926,14 @@ > </File> <File + RelativePath=".\safe_browsing\safe_browsing_database_impl.cc" + > + </File> + <File + RelativePath=".\safe_browsing\safe_browsing_database_impl.h" + > + </File> + <File RelativePath=".\safe_browsing\safe_browsing_service.cc" > </File> diff --git a/chrome/browser/safe_browsing/bloom_filter.h b/chrome/browser/safe_browsing/bloom_filter.h index f470740..bb25d75 100644 --- a/chrome/browser/safe_browsing/bloom_filter.h +++ b/chrome/browser/safe_browsing/bloom_filter.h @@ -5,6 +5,9 @@ // A simple bloom filter. It's currently limited to four hashing functions, // which are calculated from the item's hash. +#ifndef CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H__ +#define CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H__ + #include "base/scoped_ptr.h" #include "base/basictypes.h" @@ -32,3 +35,4 @@ class BloomFilter { scoped_array<char> data_; }; +#endif // CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H__
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/protocol_manager.cc b/chrome/browser/safe_browsing/protocol_manager.cc index 0f9aa5f..84b9882 100644 --- a/chrome/browser/safe_browsing/protocol_manager.cc +++ b/chrome/browser/safe_browsing/protocol_manager.cc @@ -12,7 +12,6 @@ #include "base/timer.h" #include "chrome/browser/profile.h" #include "chrome/browser/safe_browsing/protocol_parser.h" -#include "chrome/browser/safe_browsing/safe_browsing_database.h" #include "chrome/browser/safe_browsing/safe_browsing_service.h" #include "chrome/common/env_util.h" #include "chrome/common/env_vars.h" diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index 8a57559..f978200 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -6,956 +6,48 @@ #include "base/file_util.h" #include "base/logging.h" -#include "base/message_loop.h" #include "base/sha2.h" -#include "base/string_util.h" -#include "chrome/browser/safe_browsing/bloom_filter.h" -#include "chrome/browser/safe_browsing/chunk_range.h" -#include "chrome/common/sqlite_compiled_statement.h" -#include "chrome/common/sqlite_utils.h" +#include "chrome/browser/safe_browsing/safe_browsing_database_impl.h" #include "googleurl/src/gurl.h" -// Database version. If this is different than what's stored on disk, the -// database is reset. -static const int kDatabaseVersion = 4; - // Filename suffix for the bloom filter. static const wchar_t kBloomFilterFile[] = L" Filter"; -// Don't want to create too small of a bloom filter initially while we're -// downloading the data and then keep having to rebuild it. -static const int kBloomFilterMinSize = 250000; - -// How many bits to use per item. See the design doc for more information. -static const int kBloomFilterSizeRatio = 13; - -// The minimum number of reads/misses before we will consider rebuilding the -// bloom filter. This is needed because we don't want a few misses after -// starting the browser to skew the percentage. -// TODO(jabdelmalek): report to UMA how often we rebuild. -static const int kBloomFilterMinReadsToCheckFP = 200; - -// The percentage of hit rate in the bloom filter when we regenerate it. -static const double kBloomFilterMaxFPRate = 5.0; - -// When we awake from a low power state, we try to avoid doing expensive disk -// operations for a few minutes to let the system page itself in and settle -// down. -static const int kOnResumeHoldupMs = 5 * 60 * 1000; // 5 minutes. - -// When doing any database operations that can take a long time, we do it in -// small chunks up to this amount. Once this much time passes, we sleep for -// the same amount and continue. This avoids blocking the thread so that if -// we get a bloom filter hit, we don't block the network request. -static const int kMaxThreadHoldupMs = 100; - -// How long to wait after updating the database to write the bloom filter. -static const int kBloomFilterWriteDelayMs = (60 * 1000); - -// The maximum staleness for a cached entry. -static const int kMaxStalenessMinutes = 45; - -SafeBrowsingDatabase::SafeBrowsingDatabase() - : db_(NULL), - init_(false), - transaction_count_(0), - asynchronous_(true), - chunk_inserted_callback_(NULL), -#pragma warning(suppress: 4355) // can use this - bloom_read_factory_(this), -#pragma warning(suppress: 4355) // can use this - bloom_write_factory_(this), -#pragma warning(suppress: 4355) // can use this - process_factory_(this), -#pragma warning(suppress: 4355) // can use this - reset_factory_(this), -#pragma warning(suppress: 4355) // can use this - resume_factory_(this), - disk_delay_(kMaxThreadHoldupMs) { -} - -SafeBrowsingDatabase::~SafeBrowsingDatabase() { - Close(); -} - -bool SafeBrowsingDatabase::Init(const std::wstring& filename, - Callback0::Type* chunk_inserted_callback) { - DCHECK(!init_ && filename_.empty()); - - filename_ = filename; - if (!Open()) - return false; - - bool load_filter = false; - if (!DoesSqliteTableExist(db_, "hosts")) { - if (!CreateTables()) { - // Database could be corrupt, try starting from scratch. - if (!ResetDatabase()) - return false; - } - } else if (!CheckCompatibleVersion()) { - if (!ResetDatabase()) - return false; - } else { - load_filter = true; - } - - bloom_filter_filename_ = BloomFilterFilename(filename_); - - if (load_filter) { - LoadBloomFilter(); - } else { - bloom_filter_.reset( - new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio)); - } - - init_ = true; - chunk_inserted_callback_ = chunk_inserted_callback; - return true; -} - -bool SafeBrowsingDatabase::Open() { - if (sqlite3_open(WideToUTF8(filename_).c_str(), &db_) != SQLITE_OK) - return false; - - // Run the database in exclusive mode. Nobody else should be accessing the - // database while we're running, and this will give somewhat improved perf. - sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL); - - statement_cache_.reset(new SqliteStatementCache(db_)); - bloom_filter_read_count_= 0; - bloom_filter_fp_count_ = 0; - bloom_filter_building_ = false; - - process_factory_.RevokeAll(); - bloom_read_factory_.RevokeAll(); - bloom_write_factory_.RevokeAll(); - - return true; +// Factory method. +SafeBrowsingDatabase* SafeBrowsingDatabase::Create() { + return new SafeBrowsingDatabaseImpl; } -bool SafeBrowsingDatabase::Close() { - if (!db_) +bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) { + if (!bloom_filter_.get()) return true; - process_factory_.RevokeAll(); - bloom_read_factory_.RevokeAll(); - bloom_write_factory_.RevokeAll(); - - if (!pending_add_del_.empty()) { - while (!pending_add_del_.empty()) - pending_add_del_.pop(); - - EndTransaction(); - } - - while (!pending_chunks_.empty()) { - std::deque<SBChunk>* chunks = pending_chunks_.front(); - safe_browsing_util::FreeChunks(chunks); - delete chunks; - pending_chunks_.pop(); - EndTransaction(); - } - - statement_cache_.reset(); // Must free statements before closing DB. - transaction_.reset(); - bool result = sqlite3_close(db_) == SQLITE_OK; - db_ = NULL; - return result; -} - -bool SafeBrowsingDatabase::CreateTables() { - SQLTransaction transaction(db_); - transaction.Begin(); - - // We use an autoincrement integer as the primary key to allow full table - // scans to be quick. Otherwise if we used host, then we'd have to jump - // all over the table when doing a full table scan to generate the bloom - // filter and that's an order of magnitude slower. By marking host as - // unique, an index is created automatically. - if (sqlite3_exec(db_, "CREATE TABLE hosts (" - "id INTEGER PRIMARY KEY AUTOINCREMENT," - "host INTEGER UNIQUE," - "entries BLOB)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - if (sqlite3_exec(db_, "CREATE TABLE chunks (" - "list_id INTEGER," - "chunk_type INTEGER," - "chunk_id INTEGER," - "hostkeys TEXT)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - if (sqlite3_exec(db_, "CREATE TABLE list_names (" - "id INTEGER PRIMARY KEY AUTOINCREMENT," - "name TEXT)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } + IncrementBloomFilterReadCount(); - sqlite3_exec(db_, "CREATE INDEX chunks_chunk_id ON chunks(chunk_id)", - NULL, NULL, NULL); - - std::string version = "PRAGMA user_version="; - version += StringPrintf("%d", kDatabaseVersion); - - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return false; - } - - if (statement->step() != SQLITE_DONE) - return false; - - transaction.Commit(); - return true; -} - -// The SafeBrowsing service assumes this operation is synchronous. -bool SafeBrowsingDatabase::ResetDatabase() { - hash_cache_.clear(); - prefix_miss_cache_.clear(); - - bool rv = Close(); - DCHECK(rv); - - if (!file_util::Delete(filename_, false)) { - NOTREACHED(); - return false; - } - - bloom_filter_.reset( - new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio)); - file_util::Delete(bloom_filter_filename_, false); - - if (!Open()) - return false; - - return CreateTables(); -} - -bool SafeBrowsingDatabase::CheckCompatibleVersion() { - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "PRAGMA user_version"); - if (!statement.is_valid()) { - NOTREACHED(); - return false; - } - - int result = statement->step(); - if (result != SQLITE_ROW) - return false; - - return statement->column_int(0) == kDatabaseVersion; -} - -bool SafeBrowsingDatabase::ContainsUrl( - const GURL& url, - std::string* matching_list, - std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - Time last_update) { - matching_list->clear(); - prefix_hits->clear(); - if (!init_) { - DCHECK(false); - return false; - } - - if (!url.is_valid()) - return false; - - std::vector<std::string> hosts, paths; + std::vector<std::string> hosts; safe_browsing_util::GenerateHostsToCheck(url, &hosts); - safe_browsing_util::GeneratePathsToCheck(url, &paths); if (hosts.size() == 0) - return false; + return false; // Could be about:blank. - // Per the spec, if there is at least 3 components, check both the most - // significant three components and the most significant two components. - // If only two components, check the most significant two components. - // If it's an IP address, use the entire IP address as the host. - SBPrefix host_key_2, host_key_3, host_key_ip; + SBPrefix host_key; if (url.HostIsIPAddress()) { - base::SHA256HashString(url.host() + "/", &host_key_ip, sizeof(SBPrefix)); - CheckUrl(url.host(), host_key_ip, paths, matching_list, prefix_hits); + base::SHA256HashString(url.host() + "/", &host_key, sizeof(SBPrefix)); + if (bloom_filter_->Exists(host_key)) + return true; } else { - base::SHA256HashString(hosts[0] + "/", &host_key_2, sizeof(SBPrefix)); - if (hosts.size() > 1) - base::SHA256HashString(hosts[1] + "/", &host_key_3, sizeof(SBPrefix)); - - for (size_t i = 0; i < hosts.size(); ++i) { - SBPrefix host_key = i == 0 ? host_key_2 : host_key_3; - CheckUrl(hosts[i], host_key, paths, matching_list, prefix_hits); - } - } - - if (!matching_list->empty() || !prefix_hits->empty()) { - // If all the prefixes are cached as 'misses', don't issue a GetHash. - bool all_misses = true; - for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin(); - it != prefix_hits->end(); ++it) { - if (prefix_miss_cache_.find(*it) == prefix_miss_cache_.end()) { - all_misses = false; - break; - } - } - if (all_misses) - return false; - GetCachedFullHashes(prefix_hits, full_hits, last_update); - return true; - } + base::SHA256HashString(hosts[0] + "/", &host_key, sizeof(SBPrefix)); + if (bloom_filter_->Exists(host_key)) + return true; - // Check if we're getting too many FPs in the bloom filter, in which case - // it's time to rebuild it. - bloom_filter_fp_count_++; - if (!bloom_filter_building_ && - bloom_filter_read_count_ > kBloomFilterMinReadsToCheckFP) { - double fp_rate = bloom_filter_fp_count_ * 100 / bloom_filter_read_count_; - if (fp_rate > kBloomFilterMaxFPRate) { - DeleteBloomFilter(); - MessageLoop::current()->PostTask(FROM_HERE, - bloom_read_factory_.NewRunnableMethod( - &SafeBrowsingDatabase::BuildBloomFilter)); + if (hosts.size() > 1) { + base::SHA256HashString(hosts[1] + "/", &host_key, sizeof(SBPrefix)); + if (bloom_filter_->Exists(host_key)) + return true; } } - return false; } -void SafeBrowsingDatabase::CheckUrl(const std::string& host, - SBPrefix host_key, - const std::vector<std::string>& paths, - std::string* matching_list, - std::vector<SBPrefix>* prefix_hits) { - // First see if there are any entries in the db for this host. - SBHostInfo info; - if (!ReadInfo(host_key, &info, NULL)) - return; // No hostkey found. This is definitely safe. - - std::vector<SBFullHash> prefixes; - prefixes.resize(paths.size()); - for (size_t i = 0; i < paths.size(); ++i) - base::SHA256HashString(host + paths[i], &prefixes[i], sizeof(SBFullHash)); - - std::vector<SBPrefix> hits; - int list_id = -1; - if (!info.Contains(prefixes, &list_id, &hits)) - return; - - if (list_id != -1) { - *matching_list = GetListName(list_id); - } else if (hits.empty()) { - prefix_hits->push_back(host_key); - } else { - for (size_t i = 0; i < hits.size(); ++i) - prefix_hits->push_back(hits[i]); - } -} - -bool SafeBrowsingDatabase::ReadInfo(int host_key, SBHostInfo* info, int* id) { - STATS_COUNTER(L"SB.HostSelect", 1); - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "SELECT id, entries FROM hosts WHERE host=?"); - if (!statement.is_valid()) { - NOTREACHED(); - return false; - } - - statement->bind_int(0, host_key); - int result = statement->step(); - if (result == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - - if (result == SQLITE_DONE) - return false; - - if (result != SQLITE_ROW) { - DLOG(ERROR) << "SafeBrowsingDatabase got " - "statement->step() != SQLITE_ROW for " - << host_key; - return false; - } - - if (id) - *id = statement->column_int(0); - - return info->Initialize(statement->column_blob(1), - statement->column_bytes(1)); -} - -void SafeBrowsingDatabase::WriteInfo(int host_key, - const SBHostInfo& info, - int id) { - SQLITE_UNIQUE_STATEMENT(statement1, *statement_cache_, - "INSERT OR REPLACE INTO hosts" - "(host,entries)" - "VALUES (?,?)"); - - SQLITE_UNIQUE_STATEMENT(statement2, *statement_cache_, - "INSERT OR REPLACE INTO hosts" - "(id,host,entries)" - "VALUES (?,?,?)"); - - SqliteCompiledStatement& statement = id == 0 ? statement1 : statement2; - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - - int start_index = 0; - if (id != 0) { - statement->bind_int(start_index++, id); - STATS_COUNTER(L"SB.HostReplace", 1); - } else { - STATS_COUNTER(L"SB.HostInsert", 1); - } - - statement->bind_int(start_index++, host_key); - statement->bind_blob(start_index++, info.data(), info.size()); - int rv = statement->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } - AddHostToBloomFilter(host_key); -} - -void SafeBrowsingDatabase::DeleteInfo(int host_key) { - STATS_COUNTER(L"SB.HostDelete", 1); - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "DELETE FROM hosts WHERE host=?"); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - - statement->bind_int(0, host_key); - int rv = statement->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } -} - -void SafeBrowsingDatabase::StartThrottledWork() { - if (process_factory_.empty()) - RunThrottledWork(); -} - -void SafeBrowsingDatabase::RunThrottledWork() { - prefix_miss_cache_.clear(); - while (true) { - bool done = ProcessChunks(); - - if (done) - done = ProcessAddDel(); - - if (done) - break; - - if (asynchronous_) { - // For production code, we want to throttle by calling InvokeLater to - // continue the work after a delay. However for unit tests we depend on - // updates to happen synchronously. - MessageLoop::current()->PostDelayedTask(FROM_HERE, - process_factory_.NewRunnableMethod( - &SafeBrowsingDatabase::RunThrottledWork), disk_delay_); - break; - } else { - Sleep(kMaxThreadHoldupMs); - } - } -} - -void SafeBrowsingDatabase::InsertChunks(const std::string& list_name, - std::deque<SBChunk>* chunks) { - // We've going to be updating the bloom filter, so delete the on-disk - // serialization so that if the process crashes we'll generate a new one on - // startup, instead of reading a stale filter. - DeleteBloomFilter(); - - int list_id = GetListID(list_name); - std::deque<SBChunk>::iterator i = chunks->begin(); - for (; i != chunks->end(); ++i) { - SBChunk& chunk = (*i); - std::deque<SBChunkHost>::iterator j = chunk.hosts.begin(); - for (; j != chunk.hosts.end(); ++j) { - j->entry->set_list_id(list_id); - if (j->entry->IsAdd()) - j->entry->set_chunk_id(chunk.chunk_number); - } - } - - pending_chunks_.push(chunks); - - BeginTransaction(); - StartThrottledWork(); -} - -bool SafeBrowsingDatabase::ProcessChunks() { - if (pending_chunks_.empty()) - return true; - - while (!pending_chunks_.empty()) { - std::deque<SBChunk>* chunks = pending_chunks_.front(); - bool done = false; - // The entries in one chunk are all either adds or subs. - if (chunks->front().hosts.front().entry->IsAdd()) { - done = ProcessAddChunks(chunks); - } else { - done = ProcessSubChunks(chunks); - } - - if (!done) - return false; - - delete chunks; - pending_chunks_.pop(); - EndTransaction(); - } - - if (!bloom_filter_building_) { - if (asynchronous_) { - // When we're updating, there will usually be a bunch of pending_chunks_ - // to process, and we don't want to keep writing the bloom filter to disk - // 10 or 20 times unnecessarily. So schedule to write it in a minute, and - // if any new updates happen in the meantime, push that forward. - if (!bloom_write_factory_.empty()) - bloom_write_factory_.RevokeAll(); - - MessageLoop::current()->PostDelayedTask(FROM_HERE, - bloom_write_factory_.NewRunnableMethod( - &SafeBrowsingDatabase::WriteBloomFilter), - kBloomFilterWriteDelayMs); - } else { - WriteBloomFilter(); - } - } - - if (chunk_inserted_callback_) - chunk_inserted_callback_->Run(); - - return true; -} - -bool SafeBrowsingDatabase::ProcessAddChunks(std::deque<SBChunk>* chunks) { - Time before = Time::Now(); - while (!chunks->empty()) { - SBChunk& chunk = chunks->front(); - int list_id = chunk.hosts.front().entry->list_id(); - int chunk_id = chunk.chunk_number; - - // The server can give us a chunk that we already have because it's part of - // a range. Don't add it again. - if (!ChunkExists(list_id, ADD_CHUNK, chunk_id)) { - while (!chunk.hosts.empty()) { - // Read the existing record for this host, if it exists. - SBPrefix host = chunk.hosts.front().host; - SBEntry* entry = chunk.hosts.front().entry; - - UpdateInfo(host, entry, false); - - if (!add_chunk_modified_hosts_.empty()) - add_chunk_modified_hosts_.append(","); - - add_chunk_modified_hosts_.append(StringPrintf("%d", host)); - - entry->Destroy(); - chunk.hosts.pop_front(); - if (!chunk.hosts.empty() && - (Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { - return false; - } - } - - AddChunkInformation(list_id, ADD_CHUNK, chunk_id, - add_chunk_modified_hosts_); - add_chunk_modified_hosts_.clear(); - } - - chunks->pop_front(); - } - - return true; -} - -bool SafeBrowsingDatabase::ProcessSubChunks(std::deque<SBChunk>* chunks) { - Time before = Time::Now(); - while (!chunks->empty()) { - SBChunk& chunk = chunks->front(); - int list_id = chunk.hosts.front().entry->list_id(); - int chunk_id = chunk.chunk_number; - - if (!ChunkExists(list_id, SUB_CHUNK, chunk_id)) { - while (!chunk.hosts.empty()) { - SBPrefix host = chunk.hosts.front().host; - SBEntry* entry = chunk.hosts.front().entry; - UpdateInfo(host, entry, true); - - entry->Destroy(); - chunk.hosts.pop_front(); - if (!chunk.hosts.empty() && - (Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { - return false; - } - } - - AddChunkInformation(list_id, SUB_CHUNK, chunk_id, ""); - } - - chunks->pop_front(); - } - - return true; -} - -void SafeBrowsingDatabase::UpdateInfo(SBPrefix host_key, - SBEntry* entry, - bool persist) { - // If an existing record exists, and the new record is smaller, then reuse - // its entry to reduce database fragmentation. - int old_id = 0; - SBHostInfo info; - // If the bloom filter isn't there, then assume that the entry exists, - // otherwise test the bloom filter. - bool exists = !bloom_filter_.get() || bloom_filter_->Exists(host_key); - if (exists) - exists = ReadInfo(host_key, &info, &old_id); - int old_size = info.size(); - - if (entry->IsAdd()) { - info.AddPrefixes(entry); - } else { - ClearCachedHashes(entry); - info.RemovePrefixes(entry, persist); - } - - if (old_size == info.size()) { - // The entry didn't change, so no point writing it. - return; - } - - if (!info.size()) { - // Just delete the existing information instead of writing an empty one. - if (exists) - DeleteInfo(host_key); - return; - } - - if (info.size() > old_size) { - // New record is larger, so just add a new entry. - old_id = 0; - } - - WriteInfo(host_key, info, old_id); -} - -void SafeBrowsingDatabase::DeleteChunks( - std::vector<SBChunkDelete>* chunk_deletes) { - BeginTransaction(); - bool pending_add_del_were_empty = pending_add_del_.empty(); - - for (size_t i = 0; i < chunk_deletes->size(); ++i) { - const SBChunkDelete& chunk = (*chunk_deletes)[i]; - std::vector<int> chunk_numbers; - RangesToChunks(chunk.chunk_del, &chunk_numbers); - for (size_t del = 0; del < chunk_numbers.size(); ++del) { - if (chunk.is_sub_del) { - SubDel(chunk.list_name, chunk_numbers[del]); - } else { - AddDel(chunk.list_name, chunk_numbers[del]); - } - } - } - - if (pending_add_del_were_empty && !pending_add_del_.empty()) { - // Only start a transaction for pending AddDel work if we haven't started - // one already. - BeginTransaction(); - StartThrottledWork(); - } - - delete chunk_deletes; - EndTransaction(); -} - -void SafeBrowsingDatabase::AddDel(const std::string& list_name, - int add_chunk_id) { - STATS_COUNTER(L"SB.ChunkSelect", 1); - int list_id = GetListID(list_name); - // Find all the prefixes that came from the given add_chunk_id. - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "SELECT hostkeys FROM chunks WHERE " - "list_id=? AND chunk_type=? AND chunk_id=?"); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - - std::string hostkeys_str; - statement->bind_int(0, list_id); - statement->bind_int(1, ADD_CHUNK); - statement->bind_int(2, add_chunk_id); - int rv = statement->step(); - if (rv != SQLITE_ROW || !statement->column_string(0, &hostkeys_str)) { - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - NOTREACHED(); - } - - return; - } - - AddDelWork work; - work.list_id = list_id; - work.add_chunk_id = add_chunk_id; - pending_add_del_.push(work); - SplitString(hostkeys_str, ',', &pending_add_del_.back().hostkeys); -} - -bool SafeBrowsingDatabase::ProcessAddDel() { - if (pending_add_del_.empty()) - return true; - - Time before = Time::Now(); - while (!pending_add_del_.empty()) { - AddDelWork& add_del_work = pending_add_del_.front(); - ClearCachedHashesForChunk(add_del_work.list_id, add_del_work.add_chunk_id); - std::vector<std::string>& hostkeys = add_del_work.hostkeys; - for (size_t i = 0; i < hostkeys.size(); ++i) { - SBPrefix host = atoi(hostkeys[i].c_str()); - // Doesn't matter if we use SUB_PREFIX or SUB_FULL_HASH since if there - // are no prefixes it's not used. - SBEntry* entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0); - entry->set_list_id(add_del_work.list_id); - entry->set_chunk_id(add_del_work.add_chunk_id); - UpdateInfo(host, entry, false); - entry->Destroy(); - if ((Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { - hostkeys.erase(hostkeys.begin(), hostkeys.begin() + i); - return false; - } - } - - RemoveChunkId(add_del_work.list_id, ADD_CHUNK, add_del_work.add_chunk_id); - pending_add_del_.pop(); - } - - EndTransaction(); - - return true; -} - -void SafeBrowsingDatabase::SubDel(const std::string& list_name, - int sub_chunk_id) { - RemoveChunkId(GetListID(list_name), SUB_CHUNK, sub_chunk_id); -} - -void SafeBrowsingDatabase::AddChunkInformation(int list_id, - ChunkType type, - int chunk_id, - const std::string& hostkeys) { - STATS_COUNTER(L"SB.ChunkInsert", 1); - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "INSERT INTO chunks" - "(list_id,chunk_type,chunk_id,hostkeys)" - "VALUES (?,?,?,?)"); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - - statement->bind_int(0, list_id); - statement->bind_int(1, type); - statement->bind_int(2, chunk_id); - statement->bind_string(3, hostkeys); - int rv = statement->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } -} - -void SafeBrowsingDatabase::GetListsInfo(std::vector<SBListChunkRanges>* lists) { - lists->clear(); - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "SELECT name,id FROM list_names"); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - - while (true) { - int rv = statement->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - - break; - } - int list_id = statement->column_int(1); - lists->push_back(SBListChunkRanges(statement->column_string(0))); - GetChunkIds(list_id, ADD_CHUNK, &lists->back().adds); - GetChunkIds(list_id, SUB_CHUNK, &lists->back().subs); - } -} - -void SafeBrowsingDatabase::GetChunkIds(int list_id, - ChunkType type, - std::string* list) { - list->clear(); - STATS_COUNTER(L"SB.ChunkSelect", 1); - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "SELECT chunk_id FROM chunks WHERE list_id=? AND chunk_type=? " - "ORDER BY chunk_id"); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - - statement->bind_int(0, list_id); - statement->bind_int(1, type); - - std::vector<int> chunk_ids; - while (true) { - int rv = statement->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - - break; - } - chunk_ids.push_back(statement->column_int(0)); - } - - std::vector<ChunkRange> ranges; - ChunksToRanges(chunk_ids, &ranges); - RangesToString(ranges, list); -} - -bool SafeBrowsingDatabase::ChunkExists(int list_id, - ChunkType type, - int chunk_id) { - STATS_COUNTER(L"SB.ChunkSelect", 1); - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "SELECT chunk_id FROM chunks WHERE" - " list_id=? AND chunk_type=? AND chunk_id=?"); - if (!statement.is_valid()) { - NOTREACHED(); - return false; - } - - statement->bind_int(0, list_id); - statement->bind_int(1, type); - statement->bind_int(2, chunk_id); - - int rv = statement->step(); - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - - return rv == SQLITE_ROW; -} - -void SafeBrowsingDatabase::RemoveChunkId(int list_id, - ChunkType type, - int chunk_id) { - // Also remove the add chunk id from add_chunks - STATS_COUNTER(L"SB.ChunkDelete", 1); - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "DELETE FROM chunks WHERE list_id=? AND chunk_type=? AND chunk_id=?"); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - - statement->bind_int(0, list_id); - statement->bind_int(1, type); - statement->bind_int(2, chunk_id); - int rv = statement->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } -} - -int SafeBrowsingDatabase::AddList(const std::string& name) { - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "INSERT INTO list_names" - "(id,name)" - "VALUES (NULL,?)"); - if (!statement.is_valid()) { - NOTREACHED(); - return 0; - } - - statement->bind_string(0, name); - int rv = statement->step(); - if (rv != SQLITE_DONE) { - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - NOTREACHED(); - } - - return 0; - } - - return static_cast<int>(sqlite3_last_insert_rowid(db_)); -} - -int SafeBrowsingDatabase::GetListID(const std::string& name) { - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "SELECT id FROM list_names WHERE name=?"); - if (!statement.is_valid()) { - NOTREACHED(); - return 0; - } - - statement->bind_string(0, name); - int result = statement->step(); - if (result == SQLITE_ROW) - return statement->column_int(0); - - if (result == SQLITE_CORRUPT) - HandleCorruptDatabase(); - - // There isn't an existing entry so add one. - return AddList(name); -} - -std::string SafeBrowsingDatabase::GetListName(int id) { - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "SELECT name FROM list_names WHERE id=?"); - if (!statement.is_valid()) { - NOTREACHED(); - return 0; - } - - statement->bind_int(0, id); - int result = statement->step(); - if (result != SQLITE_ROW) { - if (result == SQLITE_CORRUPT) - HandleCorruptDatabase(); - - return std::string(); - } - - return statement->column_string(0); -} - std::wstring SafeBrowsingDatabase::BloomFilterFilename( const std::wstring& db_filename) { return db_filename + kBloomFilterFile; @@ -998,286 +90,3 @@ void SafeBrowsingDatabase::WriteBloomFilter() { SB_DLOG(INFO) << "SafeBrowsingDatabase wrote bloom filter in " << (Time::Now() - before).InMilliseconds() << " ms"; } - -void SafeBrowsingDatabase::AddHostToBloomFilter(int host_key) { - if (bloom_filter_building_) - bloom_filter_temp_hostkeys_.push_back(host_key); - // Even if we're rebuilding the bloom filter, we still need to update the - // current one since we also use it to decide whether to do certain database - // operations during update. - if (bloom_filter_.get()) - bloom_filter_->Insert(host_key); -} - -void SafeBrowsingDatabase::BuildBloomFilter() { - // A bloom filter needs the size at creation, however doing a select count(*) - // is too slow since sqlite would have to enumerate each entry to get the - // count. So instead we load all the hostkeys into memory, and then when - // we've read all of them and have the total count, we can create the bloom - // filter. - bloom_filter_temp_hostkeys_.reserve(kBloomFilterMinSize); - - bloom_filter_building_ = true; - bloom_filter_rebuild_time_ = Time::Now(); - - BeginTransaction(); - - OnReadHostKeys(0); -} - -void SafeBrowsingDatabase::OnReadHostKeys(int start_id) { - // Since reading all the keys in one go could take > 20 seconds, instead we - // read them in small chunks. - STATS_COUNTER(L"SB.HostSelectForBloomFilter", 1); - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "SELECT host,id FROM hosts WHERE id > ? ORDER BY id"); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - - statement->bind_int(0, start_id); - Time before = Time::Now(); - int count = 0; - - int next_id = start_id + 1; - while (true) { - int rv = statement->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - - break; - } - - count++; - bloom_filter_temp_hostkeys_.push_back(statement->column_int(0)); - next_id = statement->column_int(1) + 1; - if ((Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { - if (asynchronous_) { - break; - } else { - Sleep(kMaxThreadHoldupMs); - } - } - } - - TimeDelta chunk_time = Time::Now() - before; - int time_ms = static_cast<int>(chunk_time.InMilliseconds()); - SB_DLOG(INFO) << "SafeBrowsingDatabase read " << count << " hostkeys in " << - time_ms << " ms"; - - if (!count || !asynchronous_) { - OnDoneReadingHostKeys(); - return; - } - - // To avoid hammering the disk and disrupting other parts of Chrome that use - // the disk, we throttle the rebuilding. - MessageLoop::current()->PostDelayedTask(FROM_HERE, - bloom_read_factory_.NewRunnableMethod( - &SafeBrowsingDatabase::OnReadHostKeys, next_id), - disk_delay_); -} - -void SafeBrowsingDatabase::OnDoneReadingHostKeys() { - EndTransaction(); - Time before = Time::Now(); - int number_of_keys = std::max(kBloomFilterMinSize, - static_cast<int>(bloom_filter_temp_hostkeys_.size())); - int filter_size = number_of_keys * kBloomFilterSizeRatio; - BloomFilter* filter = new BloomFilter(filter_size); - for (size_t i = 0; i < bloom_filter_temp_hostkeys_.size(); ++i) - filter->Insert(bloom_filter_temp_hostkeys_[i]); - - bloom_filter_.reset(filter); - - TimeDelta bloom_gen = Time::Now() - before; - TimeDelta delta = Time::Now() - bloom_filter_rebuild_time_; - SB_DLOG(INFO) << "SafeBrowsingDatabase built bloom filter in " << - delta.InMilliseconds() << " ms total (" << bloom_gen.InMilliseconds() - << " ms to generate bloom filter). hostkey count: " << - bloom_filter_temp_hostkeys_.size(); - - WriteBloomFilter(); - bloom_filter_building_ = false; - bloom_filter_temp_hostkeys_.clear(); - bloom_filter_read_count_ = 0; - bloom_filter_fp_count_ = 0; -} - -bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) { - if (!bloom_filter_.get()) - return true; - - bloom_filter_read_count_++; - - std::vector<std::string> hosts; - safe_browsing_util::GenerateHostsToCheck(url, &hosts); - if (hosts.size() == 0) - return false; // Could be about:blank. - - SBPrefix host_key; - if (url.HostIsIPAddress()) { - base::SHA256HashString(url.host() + "/", &host_key, sizeof(SBPrefix)); - if (bloom_filter_->Exists(host_key)) - return true; - } else { - base::SHA256HashString(hosts[0] + "/", &host_key, sizeof(SBPrefix)); - if (bloom_filter_->Exists(host_key)) - return true; - - if (hosts.size() > 1) { - base::SHA256HashString(hosts[1] + "/", &host_key, sizeof(SBPrefix)); - if (bloom_filter_->Exists(host_key)) - return true; - } - } - return false; -} - -void SafeBrowsingDatabase::BeginTransaction() { - transaction_count_++; - if (transaction_.get() == NULL) { - transaction_.reset(new SQLTransaction(db_)); - if (transaction_->Begin() != SQLITE_OK) { - DCHECK(false) << "Safe browsing database couldn't start transaction"; - transaction_.reset(); - } - } -} - -void SafeBrowsingDatabase::EndTransaction() { - if (--transaction_count_ == 0) { - if (transaction_.get() != NULL) { - STATS_COUNTER(L"SB.TransactionCommit", 1); - transaction_->Commit(); - transaction_.reset(); - } - } -} - -void SafeBrowsingDatabase::GetCachedFullHashes( - const std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - Time last_update) { - DCHECK(prefix_hits && full_hits); - - Time max_age = Time::Now() - TimeDelta::FromMinutes(kMaxStalenessMinutes); - - for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin(); - it != prefix_hits->end(); ++it) { - HashCache::iterator hit = hash_cache_.find(*it); - if (hit != hash_cache_.end()) { - HashList& entries = hit->second; - HashList::iterator eit = entries.begin(); - while (eit != entries.end()) { - // An entry is valid if we've received an update in the past 45 minutes, - // or if this particular GetHash was received in the past 45 minutes. - if (max_age < last_update || eit->received > max_age) { - SBFullHashResult full_hash; - memcpy(&full_hash.hash.full_hash, - &eit->full_hash.full_hash, - sizeof(SBFullHash)); - full_hash.list_name = GetListName(eit->list_id); - full_hash.add_chunk_id = eit->add_chunk_id; - full_hits->push_back(full_hash); - ++eit; - } else { - // Evict the expired entry. - eit = entries.erase(eit); - } - } - - if (entries.empty()) - hash_cache_.erase(hit); - } - } -} - -void SafeBrowsingDatabase::CacheHashResults( - const std::vector<SBPrefix>& prefixes, - const std::vector<SBFullHashResult>& full_hits) { - if (full_hits.empty()) { - // These prefixes returned no results, so we store them in order to prevent - // asking for them again. We flush this cache at the next update. - for (std::vector<SBPrefix>::const_iterator it = prefixes.begin(); - it != prefixes.end(); ++it) { - prefix_miss_cache_.insert(*it); - } - return; - } - - const Time now = Time::Now(); - for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin(); - it != full_hits.end(); ++it) { - SBPrefix prefix; - memcpy(&prefix, &it->hash.full_hash, sizeof(prefix)); - HashList& entries = hash_cache_[prefix]; - HashCacheEntry entry; - entry.received = now; - entry.list_id = GetListID(it->list_name); - entry.add_chunk_id = it->add_chunk_id; - memcpy(&entry.full_hash, &it->hash.full_hash, sizeof(SBFullHash)); - entries.push_back(entry); - } -} - -void SafeBrowsingDatabase::ClearCachedHashes(const SBEntry* entry) { - for (int i = 0; i < entry->prefix_count(); ++i) { - SBPrefix prefix; - if (entry->type() == SBEntry::SUB_FULL_HASH) - memcpy(&prefix, &entry->FullHashAt(i), sizeof(SBPrefix)); - else - prefix = entry->PrefixAt(i); - - HashCache::iterator it = hash_cache_.find(prefix); - if (it != hash_cache_.end()) - hash_cache_.erase(it); - } -} - -// This clearing algorithm is a little inefficient, but we don't expect there to -// be too many entries for this to matter. Also, this runs as a background task -// during an update, so no user action is blocking on it. -void SafeBrowsingDatabase::ClearCachedHashesForChunk(int list_id, - int add_chunk_id) { - HashCache::iterator it = hash_cache_.begin(); - while (it != hash_cache_.end()) { - HashList& entries = it->second; - HashList::iterator eit = entries.begin(); - while (eit != entries.end()) { - if (eit->list_id == list_id && eit->add_chunk_id == add_chunk_id) - eit = entries.erase(eit); - else - ++eit; - } - if (entries.empty()) - it = hash_cache_.erase(it); - else - ++it; - } -} - -void SafeBrowsingDatabase::HandleCorruptDatabase() { - MessageLoop::current()->PostTask(FROM_HERE, - reset_factory_.NewRunnableMethod( - &SafeBrowsingDatabase::OnHandleCorruptDatabase)); -} - -void SafeBrowsingDatabase::OnHandleCorruptDatabase() { - ResetDatabase(); - DCHECK(false) << "SafeBrowsing database was corrupt and reset"; -} - -void SafeBrowsingDatabase::HandleResume() { - disk_delay_ = kOnResumeHoldupMs; - MessageLoop::current()->PostDelayedTask( - FROM_HERE, - resume_factory_.NewRunnableMethod(&SafeBrowsingDatabase::OnResumeDone), - kOnResumeHoldupMs); -} - -void SafeBrowsingDatabase::OnResumeDone() { - disk_delay_ = kMaxThreadHoldupMs; -} diff --git a/chrome/browser/safe_browsing/safe_browsing_database.h b/chrome/browser/safe_browsing/safe_browsing_database.h index 872f76e..794020e 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.h +++ b/chrome/browser/safe_browsing/safe_browsing_database.h @@ -5,19 +5,16 @@ #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__ #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__ -#include <list> -#include <queue> +#include <deque> +#include <string> #include <vector> -#include "base/hash_tables.h" #include "base/scoped_ptr.h" #include "base/task.h" #include "base/time.h" +#include "chrome/browser/safe_browsing/bloom_filter.h" #include "chrome/browser/safe_browsing/safe_browsing_util.h" -#include "chrome/common/sqlite_compiled_statement.h" -#include "chrome/common/sqlite_utils.h" -class BloomFilter; class GURL; // Encapsulates the database that stores information about phishing and malware @@ -27,277 +24,82 @@ class GURL; // exception of NeedToCheckUrl. class SafeBrowsingDatabase { public: - SafeBrowsingDatabase(); - ~SafeBrowsingDatabase(); + // Factory method for obtaining a SafeBrowsingDatabase implementation. + static SafeBrowsingDatabase* Create(); + + virtual ~SafeBrowsingDatabase() {} // Initializes the database with the given filename. The callback is // executed after finishing a chunk. - bool Init(const std::wstring& filename, - Callback0::Type* chunk_inserted_callback); + virtual bool Init(const std::wstring& filename, + Callback0::Type* chunk_inserted_callback) = 0; // Deletes the current database and creates a new one. - bool ResetDatabase(); + virtual bool ResetDatabase() = 0; // This function can be called on any thread to check if the given url may be // in the database. If this function returns false, it is definitely not in // the database and ContainsUrl doesn't need to be called. If it returns // true, then the url might be in the database and ContainsUrl needs to be // called. This function can only be called after Init succeeded. - bool NeedToCheckUrl(const GURL& url); + virtual bool NeedToCheckUrl(const GURL& url); // Returns false if the given url is not in the database. If it returns // true, then either "list" is the name of the matching list, or prefix_hits // contains the matching hash prefixes. - bool ContainsUrl(const GURL& url, - std::string* matching_list, - std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - Time last_update); + virtual bool ContainsUrl(const GURL& url, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + Time last_update) = 0; // Processes add/sub commands. Database will free the chunks when it's done. - void InsertChunks(const std::string& list_name, std::deque<SBChunk>* chunks); + virtual void InsertChunks(const std::string& list_name, + std::deque<SBChunk>* chunks) = 0; // Processs adddel/subdel commands. Database will free chunk_deletes when // it's done. - void DeleteChunks(std::vector<SBChunkDelete>* chunk_deletes); + virtual void DeleteChunks(std::vector<SBChunkDelete>* chunk_deletes) = 0; // Returns the lists and their add/sub chunks. - void GetListsInfo(std::vector<SBListChunkRanges>* lists); + virtual void GetListsInfo(std::vector<SBListChunkRanges>* lists) = 0; // Call this to make all database operations synchronous. While useful for // testing, this should never be called in chrome.exe because it can lead // to blocking user requests. - void set_synchronous() { asynchronous_ = false; } + virtual void SetSynchronous() = 0; // Store the results of a GetHash response. In the case of empty results, we // cache the prefixes until the next update so that we don't have to issue // further GetHash requests we know will be empty. - void CacheHashResults(const std::vector<SBPrefix>& prefixes, - const std::vector<SBFullHashResult>& full_hits); + virtual void CacheHashResults( + const std::vector<SBPrefix>& prefixes, + const std::vector<SBFullHashResult>& full_hits) = 0; // Called when the user's machine has resumed from a lower power state. - void HandleResume(); - - private: - friend class SafeBrowsing_HashCaching_Test; - - // Opens the database. - bool Open(); - - // Closes the database. - bool Close(); - - // Creates the SQL tables. - bool CreateTables(); - - // Checks the database version and if it's incompatible with the current one, - // resets the database. - bool CheckCompatibleVersion(); - - // Updates, or adds if new, a hostkey's record with the given add/sub entry. - // If this is a sub, removes the given prefixes, or all if prefixes is empty, - // from host_key's record. If persist is true, then if the add_chunk_id isn't - // found the entry will store this sub information for future reference. - // Otherwise the entry will not be modified if there are no matches. - void UpdateInfo(SBPrefix host, SBEntry* entry, bool persist); - - // Returns true if any of the given prefixes exist for the given host. - // Also returns the matching list or any prefix matches. - void CheckUrl(const std::string& host, - SBPrefix host_key, - const std::vector<std::string>& paths, - std::string* matching_list, - std::vector<SBPrefix>* prefix_hits); - - enum ChunkType { - ADD_CHUNK = 0, - SUB_CHUNK = 1, - }; - - // Adds information about the given chunk to the chunks table. - void AddChunkInformation(int list_id, - ChunkType type, - int chunk_id, - const std::string& hostkeys); // only used for add - - // Return a comma separated list of chunk ids that are in the database for - // the given list and chunk type. - void GetChunkIds(int list_id, ChunkType type, std::string* list); - - // Checks if a chunk is in the database. - bool ChunkExists(int list_id, ChunkType type, int chunk_id); - - // Removes the given id from our list of chunk ids. - void RemoveChunkId(int list_id, ChunkType type, int chunk_id); - - // Reads the host's information from the database. Returns true if it was - // found, or false otherwise. - bool ReadInfo(int host_key, SBHostInfo* info, int* id); - - // Writes the host's information to the database, overwriting any existing - // information for that host_key if it existed. - void WriteInfo(int host_key, const SBHostInfo& info, int id); - - // Deletes existing information for the given hostkey. - void DeleteInfo(int host_key); - - // Adds the given list to the database. Returns its row id. - int AddList(const std::string& name); - - // Given a list name, returns its internal id. If we haven't seen it before, - // an id is created and stored in the database. On error, returns 0. - int GetListID(const std::string& name); - - // Given a list id, returns its name. - std::string GetListName(int id); + virtual void HandleResume() = 0; + protected: static std::wstring BloomFilterFilename(const std::wstring& db_filename); // Load the bloom filter off disk. Generates one if it can't find it. - void LoadBloomFilter(); + virtual void LoadBloomFilter(); // Deletes the on-disk bloom filter, i.e. because it's stale. - void DeleteBloomFilter(); + virtual void DeleteBloomFilter(); // Writes the current bloom filter to disk. - void WriteBloomFilter(); - - // Adds the host to the bloom filter. - void AddHostToBloomFilter(int host_key); - - // Generate a bloom filter. - void BuildBloomFilter(); - - // Used when generating the bloom filter. Reads a small number of hostkeys - // starting at the given row id. - void OnReadHostKeys(int start_id); - - // Called when we finished reading all the hostkeys from the database during - // bloom filter generation. - void OnDoneReadingHostKeys(); - - void StartThrottledWork(); - void RunThrottledWork(); - - // Used when processing an add-del, add chunk and sub chunk commands in small - // batches so that the db thread is never blocked. They return true if - // complete, or false if there's still more work to do. - bool ProcessChunks(); - bool ProcessAddDel(); - - bool ProcessAddChunks(std::deque<SBChunk>* chunks); - bool ProcessSubChunks(std::deque<SBChunk>* chunks); - - void BeginTransaction(); - void EndTransaction(); + virtual void WriteBloomFilter(); - // Processes an add-del command, which deletes all the prefixes that came - // from that add chunk id. - void AddDel(const std::string& list_name, int add_chunk_id); + // Implementation specific bloom filter building. + virtual void BuildBloomFilter() = 0; + virtual void AddHostToBloomFilter(int host_key) = 0; - // Processes a sub-del command, which just removes the sub chunk id from - // our list. - void SubDel(const std::string& list_name, int sub_chunk_id); + // Measuring false positive rate. Call this each time we look in the filter. + virtual void IncrementBloomFilterReadCount() = 0; - // Looks up any cached full hashes we may have. - void GetCachedFullHashes(const std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - Time last_update); - - // Remove cached entries that have prefixes contained in the entry. - void ClearCachedHashes(const SBEntry* entry); - - // Remove all GetHash entries that match the list and chunk id from an AddDel. - void ClearCachedHashesForChunk(int list_id, int add_chunk_id); - - void HandleCorruptDatabase(); - void OnHandleCorruptDatabase(); - - // Runs a small amount of time after the machine has resumed operation from - // a low power state. - void OnResumeDone(); - - // The database connection. - sqlite3* db_; - - // Cache of compiled statements for our database. - scoped_ptr<SqliteStatementCache> statement_cache_; - - int transaction_count_; - scoped_ptr<SQLTransaction> transaction_; - - // True iff the database has been opened successfully. - bool init_; - - std::wstring filename_; - - // Used by the bloom filter. std::wstring bloom_filter_filename_; scoped_ptr<BloomFilter> bloom_filter_; - int bloom_filter_read_count_; - int bloom_filter_fp_count_; - - // These are temp variables used when rebuilding the bloom filter. - bool bloom_filter_building_; - std::vector<int> bloom_filter_temp_hostkeys_; - int bloom_filter_last_hostkey_; - Time bloom_filter_rebuild_time_; - - // Used to store throttled work for commands that write to the database. - std::queue<std::deque<SBChunk>*> pending_chunks_; - - // Used during processing of an add chunk. - std::string add_chunk_modified_hosts_; - - struct AddDelWork { - int list_id; - int add_chunk_id; - std::vector<std::string> hostkeys; - }; - - std::queue<AddDelWork> pending_add_del_; - - // Controls whether database writes are done synchronously in one go or - // asynchronously in small chunks. - bool asynchronous_; - - // Called after an add/sub chunk is processed. - Callback0::Type* chunk_inserted_callback_; - - // Used to schedule small bits of work when writing to the database. - ScopedRunnableMethodFactory<SafeBrowsingDatabase> process_factory_; - - // Used to schedule reading the database to rebuild the bloom filter. - ScopedRunnableMethodFactory<SafeBrowsingDatabase> bloom_read_factory_; - - // Used to schedule writing the bloom filter after an update. - ScopedRunnableMethodFactory<SafeBrowsingDatabase> bloom_write_factory_; - - // Used to schedule resetting the database because of corruption. - ScopedRunnableMethodFactory<SafeBrowsingDatabase> reset_factory_; - - // Used to schedule resuming from a lower power state. - ScopedRunnableMethodFactory<SafeBrowsingDatabase> resume_factory_; - - // Used for caching GetHash results. - typedef struct HashCacheEntry { - SBFullHash full_hash; - int list_id; - int add_chunk_id; - Time received; - } HashCacheEntry; - - typedef std::list<HashCacheEntry> HashList; - typedef base::hash_map<SBPrefix, HashList> HashCache; - HashCache hash_cache_; - - // Cache of prefixes that returned empty results (no full hash match). - std::set<SBPrefix> prefix_miss_cache_; - - // The amount of time, in milliseconds, to wait before the next disk write. - int disk_delay_; - - DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingDatabase); }; #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H__ diff --git a/chrome/browser/safe_browsing/safe_browsing_database_impl.cc b/chrome/browser/safe_browsing/safe_browsing_database_impl.cc new file mode 100644 index 0000000..556f780 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_database_impl.cc @@ -0,0 +1,1216 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/safe_browsing_database_impl.h" + +#include "base/file_util.h" +#include "base/logging.h" +#include "base/message_loop.h" +#include "base/sha2.h" +#include "base/string_util.h" +#include "chrome/browser/safe_browsing/bloom_filter.h" +#include "chrome/browser/safe_browsing/chunk_range.h" +#include "chrome/common/sqlite_compiled_statement.h" +#include "chrome/common/sqlite_utils.h" +#include "googleurl/src/gurl.h" + +// Database version. If this is different than what's stored on disk, the +// database is reset. +static const int kDatabaseVersion = 4; + +// Don't want to create too small of a bloom filter initially while we're +// downloading the data and then keep having to rebuild it. +static const int kBloomFilterMinSize = 250000; + +// How many bits to use per item. See the design doc for more information. +static const int kBloomFilterSizeRatio = 13; + +// The minimum number of reads/misses before we will consider rebuilding the +// bloom filter. This is needed because we don't want a few misses after +// starting the browser to skew the percentage. +// TODO(jabdelmalek): report to UMA how often we rebuild. +static const int kBloomFilterMinReadsToCheckFP = 200; + +// The percentage of hit rate in the bloom filter when we regenerate it. +static const double kBloomFilterMaxFPRate = 5.0; + +// When we awake from a low power state, we try to avoid doing expensive disk +// operations for a few minutes to let the system page itself in and settle +// down. +static const int kOnResumeHoldupMs = 5 * 60 * 1000; // 5 minutes. + +// When doing any database operations that can take a long time, we do it in +// small chunks up to this amount. Once this much time passes, we sleep for +// the same amount and continue. This avoids blocking the thread so that if +// we get a bloom filter hit, we don't block the network request. +static const int kMaxThreadHoldupMs = 100; + +// How long to wait after updating the database to write the bloom filter. +static const int kBloomFilterWriteDelayMs = (60 * 1000); + +// The maximum staleness for a cached entry. +static const int kMaxStalenessMinutes = 45; + +// Implementation -------------------------------------------------------------- + +SafeBrowsingDatabaseImpl::SafeBrowsingDatabaseImpl() + : db_(NULL), + init_(false), + transaction_count_(0), + asynchronous_(true), + chunk_inserted_callback_(NULL), +#pragma warning(suppress: 4355) // can use this + bloom_read_factory_(this), +#pragma warning(suppress: 4355) // can use this + bloom_write_factory_(this), +#pragma warning(suppress: 4355) // can use this + process_factory_(this), +#pragma warning(suppress: 4355) // can use this + reset_factory_(this), +#pragma warning(suppress: 4355) // can use this + resume_factory_(this), + disk_delay_(kMaxThreadHoldupMs) { +} + +SafeBrowsingDatabaseImpl::~SafeBrowsingDatabaseImpl() { + Close(); +} + +bool SafeBrowsingDatabaseImpl::Init(const std::wstring& filename, + Callback0::Type* chunk_inserted_callback) { + DCHECK(!init_ && filename_.empty()); + + filename_ = filename; + if (!Open()) + return false; + + bool load_filter = false; + if (!DoesSqliteTableExist(db_, "hosts")) { + if (!CreateTables()) { + // Database could be corrupt, try starting from scratch. + if (!ResetDatabase()) + return false; + } + } else if (!CheckCompatibleVersion()) { + if (!ResetDatabase()) + return false; + } else { + load_filter = true; + } + + bloom_filter_filename_ = BloomFilterFilename(filename_); + + if (load_filter) { + LoadBloomFilter(); + } else { + bloom_filter_.reset( + new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio)); + } + + init_ = true; + chunk_inserted_callback_ = chunk_inserted_callback; + return true; +} + +bool SafeBrowsingDatabaseImpl::Open() { + if (sqlite3_open(WideToUTF8(filename_).c_str(), &db_) != SQLITE_OK) + return false; + + // Run the database in exclusive mode. Nobody else should be accessing the + // database while we're running, and this will give somewhat improved perf. + sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL); + + statement_cache_.reset(new SqliteStatementCache(db_)); + bloom_filter_read_count_= 0; + bloom_filter_fp_count_ = 0; + bloom_filter_building_ = false; + + process_factory_.RevokeAll(); + bloom_read_factory_.RevokeAll(); + bloom_write_factory_.RevokeAll(); + + return true; +} + +bool SafeBrowsingDatabaseImpl::Close() { + if (!db_) + return true; + + process_factory_.RevokeAll(); + bloom_read_factory_.RevokeAll(); + bloom_write_factory_.RevokeAll(); + + if (!pending_add_del_.empty()) { + while (!pending_add_del_.empty()) + pending_add_del_.pop(); + + EndTransaction(); + } + + while (!pending_chunks_.empty()) { + std::deque<SBChunk>* chunks = pending_chunks_.front(); + safe_browsing_util::FreeChunks(chunks); + delete chunks; + pending_chunks_.pop(); + EndTransaction(); + } + + statement_cache_.reset(); // Must free statements before closing DB. + transaction_.reset(); + bool result = sqlite3_close(db_) == SQLITE_OK; + db_ = NULL; + return result; +} + +bool SafeBrowsingDatabaseImpl::CreateTables() { + SQLTransaction transaction(db_); + transaction.Begin(); + + // We use an autoincrement integer as the primary key to allow full table + // scans to be quick. Otherwise if we used host, then we'd have to jump + // all over the table when doing a full table scan to generate the bloom + // filter and that's an order of magnitude slower. By marking host as + // unique, an index is created automatically. + if (sqlite3_exec(db_, "CREATE TABLE hosts (" + "id INTEGER PRIMARY KEY AUTOINCREMENT," + "host INTEGER UNIQUE," + "entries BLOB)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; + } + + if (sqlite3_exec(db_, "CREATE TABLE chunks (" + "list_id INTEGER," + "chunk_type INTEGER," + "chunk_id INTEGER," + "hostkeys TEXT)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; + } + + if (sqlite3_exec(db_, "CREATE TABLE list_names (" + "id INTEGER PRIMARY KEY AUTOINCREMENT," + "name TEXT)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; + } + + sqlite3_exec(db_, "CREATE INDEX chunks_chunk_id ON chunks(chunk_id)", + NULL, NULL, NULL); + + std::string version = "PRAGMA user_version="; + version += StringPrintf("%d", kDatabaseVersion); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + if (statement->step() != SQLITE_DONE) + return false; + + transaction.Commit(); + return true; +} + +// The SafeBrowsing service assumes this operation is synchronous. +bool SafeBrowsingDatabaseImpl::ResetDatabase() { + hash_cache_.clear(); + prefix_miss_cache_.clear(); + + bool rv = Close(); + DCHECK(rv); + + if (!file_util::Delete(filename_, false)) { + NOTREACHED(); + return false; + } + + bloom_filter_.reset( + new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio)); + file_util::Delete(bloom_filter_filename_, false); + + if (!Open()) + return false; + + return CreateTables(); +} + +bool SafeBrowsingDatabaseImpl::CheckCompatibleVersion() { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "PRAGMA user_version"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + int result = statement->step(); + if (result != SQLITE_ROW) + return false; + + return statement->column_int(0) == kDatabaseVersion; +} + +bool SafeBrowsingDatabaseImpl::ContainsUrl( + const GURL& url, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + Time last_update) { + matching_list->clear(); + prefix_hits->clear(); + if (!init_) { + DCHECK(false); + return false; + } + + if (!url.is_valid()) + return false; + + std::vector<std::string> hosts, paths; + safe_browsing_util::GenerateHostsToCheck(url, &hosts); + safe_browsing_util::GeneratePathsToCheck(url, &paths); + if (hosts.size() == 0) + return false; + + // Per the spec, if there is at least 3 components, check both the most + // significant three components and the most significant two components. + // If only two components, check the most significant two components. + // If it's an IP address, use the entire IP address as the host. + SBPrefix host_key_2, host_key_3, host_key_ip; + if (url.HostIsIPAddress()) { + base::SHA256HashString(url.host() + "/", &host_key_ip, sizeof(SBPrefix)); + CheckUrl(url.host(), host_key_ip, paths, matching_list, prefix_hits); + } else { + base::SHA256HashString(hosts[0] + "/", &host_key_2, sizeof(SBPrefix)); + if (hosts.size() > 1) + base::SHA256HashString(hosts[1] + "/", &host_key_3, sizeof(SBPrefix)); + + for (size_t i = 0; i < hosts.size(); ++i) { + SBPrefix host_key = i == 0 ? host_key_2 : host_key_3; + CheckUrl(hosts[i], host_key, paths, matching_list, prefix_hits); + } + } + + if (!matching_list->empty() || !prefix_hits->empty()) { + // If all the prefixes are cached as 'misses', don't issue a GetHash. + bool all_misses = true; + for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin(); + it != prefix_hits->end(); ++it) { + if (prefix_miss_cache_.find(*it) == prefix_miss_cache_.end()) { + all_misses = false; + break; + } + } + if (all_misses) + return false; + GetCachedFullHashes(prefix_hits, full_hits, last_update); + return true; + } + + // Check if we're getting too many FPs in the bloom filter, in which case + // it's time to rebuild it. + bloom_filter_fp_count_++; + if (!bloom_filter_building_ && + bloom_filter_read_count_ > kBloomFilterMinReadsToCheckFP) { + double fp_rate = bloom_filter_fp_count_ * 100 / bloom_filter_read_count_; + if (fp_rate > kBloomFilterMaxFPRate) { + DeleteBloomFilter(); + MessageLoop::current()->PostTask(FROM_HERE, + bloom_read_factory_.NewRunnableMethod( + &SafeBrowsingDatabaseImpl::BuildBloomFilter)); + } + } + + return false; +} + +void SafeBrowsingDatabaseImpl::CheckUrl(const std::string& host, + SBPrefix host_key, + const std::vector<std::string>& paths, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits) { + // First see if there are any entries in the db for this host. + SBHostInfo info; + if (!ReadInfo(host_key, &info, NULL)) + return; // No hostkey found. This is definitely safe. + + std::vector<SBFullHash> prefixes; + prefixes.resize(paths.size()); + for (size_t i = 0; i < paths.size(); ++i) + base::SHA256HashString(host + paths[i], &prefixes[i], sizeof(SBFullHash)); + + std::vector<SBPrefix> hits; + int list_id = -1; + if (!info.Contains(prefixes, &list_id, &hits)) + return; + + if (list_id != -1) { + *matching_list = GetListName(list_id); + } else if (hits.empty()) { + prefix_hits->push_back(host_key); + } else { + for (size_t i = 0; i < hits.size(); ++i) + prefix_hits->push_back(hits[i]); + } +} + +bool SafeBrowsingDatabaseImpl::ReadInfo(int host_key, + SBHostInfo* info, + int* id) { + STATS_COUNTER(L"SB.HostSelect", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT id, entries FROM hosts WHERE host=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + statement->bind_int(0, host_key); + int result = statement->step(); + if (result == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + + if (result == SQLITE_DONE) + return false; + + if (result != SQLITE_ROW) { + DLOG(ERROR) << "SafeBrowsingDatabaseImpl got " + "statement->step() != SQLITE_ROW for " + << host_key; + return false; + } + + if (id) + *id = statement->column_int(0); + + return info->Initialize(statement->column_blob(1), + statement->column_bytes(1)); +} + +void SafeBrowsingDatabaseImpl::WriteInfo(int host_key, + const SBHostInfo& info, + int id) { + SQLITE_UNIQUE_STATEMENT(statement1, *statement_cache_, + "INSERT OR REPLACE INTO hosts" + "(host,entries)" + "VALUES (?,?)"); + + SQLITE_UNIQUE_STATEMENT(statement2, *statement_cache_, + "INSERT OR REPLACE INTO hosts" + "(id,host,entries)" + "VALUES (?,?,?)"); + + SqliteCompiledStatement& statement = id == 0 ? statement1 : statement2; + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + int start_index = 0; + if (id != 0) { + statement->bind_int(start_index++, id); + STATS_COUNTER(L"SB.HostReplace", 1); + } else { + STATS_COUNTER(L"SB.HostInsert", 1); + } + + statement->bind_int(start_index++, host_key); + statement->bind_blob(start_index++, info.data(), info.size()); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } + AddHostToBloomFilter(host_key); +} + +void SafeBrowsingDatabaseImpl::DeleteInfo(int host_key) { + STATS_COUNTER(L"SB.HostDelete", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "DELETE FROM hosts WHERE host=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, host_key); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +void SafeBrowsingDatabaseImpl::StartThrottledWork() { + if (process_factory_.empty()) + RunThrottledWork(); +} + +void SafeBrowsingDatabaseImpl::RunThrottledWork() { + prefix_miss_cache_.clear(); + while (true) { + bool done = ProcessChunks(); + + if (done) + done = ProcessAddDel(); + + if (done) + break; + + if (asynchronous_) { + // For production code, we want to throttle by calling InvokeLater to + // continue the work after a delay. However for unit tests we depend on + // updates to happen synchronously. + MessageLoop::current()->PostDelayedTask(FROM_HERE, + process_factory_.NewRunnableMethod( + &SafeBrowsingDatabaseImpl::RunThrottledWork), disk_delay_); + break; + } else { + Sleep(kMaxThreadHoldupMs); + } + } +} + +void SafeBrowsingDatabaseImpl::InsertChunks(const std::string& list_name, + std::deque<SBChunk>* chunks) { + // We've going to be updating the bloom filter, so delete the on-disk + // serialization so that if the process crashes we'll generate a new one on + // startup, instead of reading a stale filter. + DeleteBloomFilter(); + + int list_id = GetListID(list_name); + std::deque<SBChunk>::iterator i = chunks->begin(); + for (; i != chunks->end(); ++i) { + SBChunk& chunk = (*i); + std::deque<SBChunkHost>::iterator j = chunk.hosts.begin(); + for (; j != chunk.hosts.end(); ++j) { + j->entry->set_list_id(list_id); + if (j->entry->IsAdd()) + j->entry->set_chunk_id(chunk.chunk_number); + } + } + + pending_chunks_.push(chunks); + + BeginTransaction(); + StartThrottledWork(); +} + +bool SafeBrowsingDatabaseImpl::ProcessChunks() { + if (pending_chunks_.empty()) + return true; + + while (!pending_chunks_.empty()) { + std::deque<SBChunk>* chunks = pending_chunks_.front(); + bool done = false; + // The entries in one chunk are all either adds or subs. + if (chunks->front().hosts.front().entry->IsAdd()) { + done = ProcessAddChunks(chunks); + } else { + done = ProcessSubChunks(chunks); + } + + if (!done) + return false; + + delete chunks; + pending_chunks_.pop(); + EndTransaction(); + } + + if (!bloom_filter_building_) { + if (asynchronous_) { + // When we're updating, there will usually be a bunch of pending_chunks_ + // to process, and we don't want to keep writing the bloom filter to disk + // 10 or 20 times unnecessarily. So schedule to write it in a minute, and + // if any new updates happen in the meantime, push that forward. + if (!bloom_write_factory_.empty()) + bloom_write_factory_.RevokeAll(); + + MessageLoop::current()->PostDelayedTask(FROM_HERE, + bloom_write_factory_.NewRunnableMethod( + &SafeBrowsingDatabaseImpl::WriteBloomFilter), + kBloomFilterWriteDelayMs); + } else { + WriteBloomFilter(); + } + } + + if (chunk_inserted_callback_) + chunk_inserted_callback_->Run(); + + return true; +} + +bool SafeBrowsingDatabaseImpl::ProcessAddChunks(std::deque<SBChunk>* chunks) { + Time before = Time::Now(); + while (!chunks->empty()) { + SBChunk& chunk = chunks->front(); + int list_id = chunk.hosts.front().entry->list_id(); + int chunk_id = chunk.chunk_number; + + // The server can give us a chunk that we already have because it's part of + // a range. Don't add it again. + if (!ChunkExists(list_id, ADD_CHUNK, chunk_id)) { + while (!chunk.hosts.empty()) { + // Read the existing record for this host, if it exists. + SBPrefix host = chunk.hosts.front().host; + SBEntry* entry = chunk.hosts.front().entry; + + UpdateInfo(host, entry, false); + + if (!add_chunk_modified_hosts_.empty()) + add_chunk_modified_hosts_.append(","); + + add_chunk_modified_hosts_.append(StringPrintf("%d", host)); + + entry->Destroy(); + chunk.hosts.pop_front(); + if (!chunk.hosts.empty() && + (Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { + return false; + } + } + + AddChunkInformation(list_id, ADD_CHUNK, chunk_id, + add_chunk_modified_hosts_); + add_chunk_modified_hosts_.clear(); + } + + chunks->pop_front(); + } + + return true; +} + +bool SafeBrowsingDatabaseImpl::ProcessSubChunks(std::deque<SBChunk>* chunks) { + Time before = Time::Now(); + while (!chunks->empty()) { + SBChunk& chunk = chunks->front(); + int list_id = chunk.hosts.front().entry->list_id(); + int chunk_id = chunk.chunk_number; + + if (!ChunkExists(list_id, SUB_CHUNK, chunk_id)) { + while (!chunk.hosts.empty()) { + SBPrefix host = chunk.hosts.front().host; + SBEntry* entry = chunk.hosts.front().entry; + UpdateInfo(host, entry, true); + + entry->Destroy(); + chunk.hosts.pop_front(); + if (!chunk.hosts.empty() && + (Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { + return false; + } + } + + AddChunkInformation(list_id, SUB_CHUNK, chunk_id, ""); + } + + chunks->pop_front(); + } + + return true; +} + +void SafeBrowsingDatabaseImpl::UpdateInfo(SBPrefix host_key, + SBEntry* entry, + bool persist) { + // If an existing record exists, and the new record is smaller, then reuse + // its entry to reduce database fragmentation. + int old_id = 0; + SBHostInfo info; + // If the bloom filter isn't there, then assume that the entry exists, + // otherwise test the bloom filter. + bool exists = !bloom_filter_.get() || bloom_filter_->Exists(host_key); + if (exists) + exists = ReadInfo(host_key, &info, &old_id); + int old_size = info.size(); + + if (entry->IsAdd()) { + info.AddPrefixes(entry); + } else { + ClearCachedHashes(entry); + info.RemovePrefixes(entry, persist); + } + + if (old_size == info.size()) { + // The entry didn't change, so no point writing it. + return; + } + + if (!info.size()) { + // Just delete the existing information instead of writing an empty one. + if (exists) + DeleteInfo(host_key); + return; + } + + if (info.size() > old_size) { + // New record is larger, so just add a new entry. + old_id = 0; + } + + WriteInfo(host_key, info, old_id); +} + +void SafeBrowsingDatabaseImpl::DeleteChunks( + std::vector<SBChunkDelete>* chunk_deletes) { + BeginTransaction(); + bool pending_add_del_were_empty = pending_add_del_.empty(); + + for (size_t i = 0; i < chunk_deletes->size(); ++i) { + const SBChunkDelete& chunk = (*chunk_deletes)[i]; + std::vector<int> chunk_numbers; + RangesToChunks(chunk.chunk_del, &chunk_numbers); + for (size_t del = 0; del < chunk_numbers.size(); ++del) { + if (chunk.is_sub_del) { + SubDel(chunk.list_name, chunk_numbers[del]); + } else { + AddDel(chunk.list_name, chunk_numbers[del]); + } + } + } + + if (pending_add_del_were_empty && !pending_add_del_.empty()) { + // Only start a transaction for pending AddDel work if we haven't started + // one already. + BeginTransaction(); + StartThrottledWork(); + } + + delete chunk_deletes; + EndTransaction(); +} + +void SafeBrowsingDatabaseImpl::AddDel(const std::string& list_name, + int add_chunk_id) { + STATS_COUNTER(L"SB.ChunkSelect", 1); + int list_id = GetListID(list_name); + // Find all the prefixes that came from the given add_chunk_id. + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT hostkeys FROM chunks WHERE " + "list_id=? AND chunk_type=? AND chunk_id=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + std::string hostkeys_str; + statement->bind_int(0, list_id); + statement->bind_int(1, ADD_CHUNK); + statement->bind_int(2, add_chunk_id); + int rv = statement->step(); + if (rv != SQLITE_ROW || !statement->column_string(0, &hostkeys_str)) { + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + NOTREACHED(); + } + + return; + } + + AddDelWork work; + work.list_id = list_id; + work.add_chunk_id = add_chunk_id; + pending_add_del_.push(work); + SplitString(hostkeys_str, ',', &pending_add_del_.back().hostkeys); +} + +bool SafeBrowsingDatabaseImpl::ProcessAddDel() { + if (pending_add_del_.empty()) + return true; + + Time before = Time::Now(); + while (!pending_add_del_.empty()) { + AddDelWork& add_del_work = pending_add_del_.front(); + ClearCachedHashesForChunk(add_del_work.list_id, add_del_work.add_chunk_id); + std::vector<std::string>& hostkeys = add_del_work.hostkeys; + for (size_t i = 0; i < hostkeys.size(); ++i) { + SBPrefix host = atoi(hostkeys[i].c_str()); + // Doesn't matter if we use SUB_PREFIX or SUB_FULL_HASH since if there + // are no prefixes it's not used. + SBEntry* entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0); + entry->set_list_id(add_del_work.list_id); + entry->set_chunk_id(add_del_work.add_chunk_id); + UpdateInfo(host, entry, false); + entry->Destroy(); + if ((Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { + hostkeys.erase(hostkeys.begin(), hostkeys.begin() + i); + return false; + } + } + + RemoveChunkId(add_del_work.list_id, ADD_CHUNK, add_del_work.add_chunk_id); + pending_add_del_.pop(); + } + + EndTransaction(); + + return true; +} + +void SafeBrowsingDatabaseImpl::SubDel(const std::string& list_name, + int sub_chunk_id) { + RemoveChunkId(GetListID(list_name), SUB_CHUNK, sub_chunk_id); +} + +void SafeBrowsingDatabaseImpl::AddChunkInformation( + int list_id, ChunkType type, int chunk_id, const std::string& hostkeys) { + STATS_COUNTER(L"SB.ChunkInsert", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "INSERT INTO chunks" + "(list_id,chunk_type,chunk_id,hostkeys)" + "VALUES (?,?,?,?)"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, list_id); + statement->bind_int(1, type); + statement->bind_int(2, chunk_id); + statement->bind_string(3, hostkeys); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +void SafeBrowsingDatabaseImpl::GetListsInfo( + std::vector<SBListChunkRanges>* lists) { + lists->clear(); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT name,id FROM list_names"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + while (true) { + int rv = statement->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + break; + } + int list_id = statement->column_int(1); + lists->push_back(SBListChunkRanges(statement->column_string(0))); + GetChunkIds(list_id, ADD_CHUNK, &lists->back().adds); + GetChunkIds(list_id, SUB_CHUNK, &lists->back().subs); + } +} + +void SafeBrowsingDatabaseImpl::GetChunkIds(int list_id, + ChunkType type, + std::string* list) { + list->clear(); + STATS_COUNTER(L"SB.ChunkSelect", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT chunk_id FROM chunks WHERE list_id=? AND chunk_type=? " + "ORDER BY chunk_id"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, list_id); + statement->bind_int(1, type); + + std::vector<int> chunk_ids; + while (true) { + int rv = statement->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + break; + } + chunk_ids.push_back(statement->column_int(0)); + } + + std::vector<ChunkRange> ranges; + ChunksToRanges(chunk_ids, &ranges); + RangesToString(ranges, list); +} + +bool SafeBrowsingDatabaseImpl::ChunkExists(int list_id, + ChunkType type, + int chunk_id) { + STATS_COUNTER(L"SB.ChunkSelect", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT chunk_id FROM chunks WHERE" + " list_id=? AND chunk_type=? AND chunk_id=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + statement->bind_int(0, list_id); + statement->bind_int(1, type); + statement->bind_int(2, chunk_id); + + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + return rv == SQLITE_ROW; +} + +void SafeBrowsingDatabaseImpl::RemoveChunkId(int list_id, + ChunkType type, + int chunk_id) { + // Also remove the add chunk id from add_chunks + STATS_COUNTER(L"SB.ChunkDelete", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "DELETE FROM chunks WHERE list_id=? AND chunk_type=? AND chunk_id=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, list_id); + statement->bind_int(1, type); + statement->bind_int(2, chunk_id); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +int SafeBrowsingDatabaseImpl::AddList(const std::string& name) { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "INSERT INTO list_names" + "(id,name)" + "VALUES (NULL,?)"); + if (!statement.is_valid()) { + NOTREACHED(); + return 0; + } + + statement->bind_string(0, name); + int rv = statement->step(); + if (rv != SQLITE_DONE) { + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + NOTREACHED(); + } + + return 0; + } + + return static_cast<int>(sqlite3_last_insert_rowid(db_)); +} + +int SafeBrowsingDatabaseImpl::GetListID(const std::string& name) { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT id FROM list_names WHERE name=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return 0; + } + + statement->bind_string(0, name); + int result = statement->step(); + if (result == SQLITE_ROW) + return statement->column_int(0); + + if (result == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + // There isn't an existing entry so add one. + return AddList(name); +} + +std::string SafeBrowsingDatabaseImpl::GetListName(int id) { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT name FROM list_names WHERE id=?"); + if (!statement.is_valid()) { + NOTREACHED(); + return 0; + } + + statement->bind_int(0, id); + int result = statement->step(); + if (result != SQLITE_ROW) { + if (result == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + return std::string(); + } + + return statement->column_string(0); +} + +void SafeBrowsingDatabaseImpl::AddHostToBloomFilter(int host_key) { + if (bloom_filter_building_) + bloom_filter_temp_hostkeys_.push_back(host_key); + // Even if we're rebuilding the bloom filter, we still need to update the + // current one since we also use it to decide whether to do certain database + // operations during update. + if (bloom_filter_.get()) + bloom_filter_->Insert(host_key); +} + +void SafeBrowsingDatabaseImpl::BuildBloomFilter() { + // A bloom filter needs the size at creation, however doing a select count(*) + // is too slow since sqlite would have to enumerate each entry to get the + // count. So instead we load all the hostkeys into memory, and then when + // we've read all of them and have the total count, we can create the bloom + // filter. + bloom_filter_temp_hostkeys_.reserve(kBloomFilterMinSize); + + bloom_filter_building_ = true; + bloom_filter_rebuild_time_ = Time::Now(); + + BeginTransaction(); + + OnReadHostKeys(0); +} + +void SafeBrowsingDatabaseImpl::OnReadHostKeys(int start_id) { + // Since reading all the keys in one go could take > 20 seconds, instead we + // read them in small chunks. + STATS_COUNTER(L"SB.HostSelectForBloomFilter", 1); + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT host,id FROM hosts WHERE id > ? ORDER BY id"); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, start_id); + Time before = Time::Now(); + int count = 0; + + int next_id = start_id + 1; + while (true) { + int rv = statement->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + break; + } + + count++; + bloom_filter_temp_hostkeys_.push_back(statement->column_int(0)); + next_id = statement->column_int(1) + 1; + if ((Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) { + if (asynchronous_) { + break; + } else { + Sleep(kMaxThreadHoldupMs); + } + } + } + + TimeDelta chunk_time = Time::Now() - before; + int time_ms = static_cast<int>(chunk_time.InMilliseconds()); + SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl read " << count + << " hostkeys in " << time_ms << " ms"; + + if (!count || !asynchronous_) { + OnDoneReadingHostKeys(); + return; + } + + // To avoid hammering the disk and disrupting other parts of Chrome that use + // the disk, we throttle the rebuilding. + MessageLoop::current()->PostDelayedTask(FROM_HERE, + bloom_read_factory_.NewRunnableMethod( + &SafeBrowsingDatabaseImpl::OnReadHostKeys, next_id), + disk_delay_); +} + +void SafeBrowsingDatabaseImpl::OnDoneReadingHostKeys() { + EndTransaction(); + Time before = Time::Now(); + int number_of_keys = std::max(kBloomFilterMinSize, + static_cast<int>(bloom_filter_temp_hostkeys_.size())); + int filter_size = number_of_keys * kBloomFilterSizeRatio; + BloomFilter* filter = new BloomFilter(filter_size); + for (size_t i = 0; i < bloom_filter_temp_hostkeys_.size(); ++i) + filter->Insert(bloom_filter_temp_hostkeys_[i]); + + bloom_filter_.reset(filter); + + TimeDelta bloom_gen = Time::Now() - before; + TimeDelta delta = Time::Now() - bloom_filter_rebuild_time_; + SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in " << + delta.InMilliseconds() << " ms total (" << bloom_gen.InMilliseconds() + << " ms to generate bloom filter). hostkey count: " << + bloom_filter_temp_hostkeys_.size(); + + WriteBloomFilter(); + bloom_filter_building_ = false; + bloom_filter_temp_hostkeys_.clear(); + bloom_filter_read_count_ = 0; + bloom_filter_fp_count_ = 0; +} + +void SafeBrowsingDatabaseImpl::BeginTransaction() { + transaction_count_++; + if (transaction_.get() == NULL) { + transaction_.reset(new SQLTransaction(db_)); + if (transaction_->Begin() != SQLITE_OK) { + DCHECK(false) << "Safe browsing database couldn't start transaction"; + transaction_.reset(); + } + } +} + +void SafeBrowsingDatabaseImpl::EndTransaction() { + if (--transaction_count_ == 0) { + if (transaction_.get() != NULL) { + STATS_COUNTER(L"SB.TransactionCommit", 1); + transaction_->Commit(); + transaction_.reset(); + } + } +} + +void SafeBrowsingDatabaseImpl::GetCachedFullHashes( + const std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + Time last_update) { + DCHECK(prefix_hits && full_hits); + + Time max_age = Time::Now() - TimeDelta::FromMinutes(kMaxStalenessMinutes); + + for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin(); + it != prefix_hits->end(); ++it) { + HashCache::iterator hit = hash_cache_.find(*it); + if (hit != hash_cache_.end()) { + HashList& entries = hit->second; + HashList::iterator eit = entries.begin(); + while (eit != entries.end()) { + // An entry is valid if we've received an update in the past 45 minutes, + // or if this particular GetHash was received in the past 45 minutes. + if (max_age < last_update || eit->received > max_age) { + SBFullHashResult full_hash; + memcpy(&full_hash.hash.full_hash, + &eit->full_hash.full_hash, + sizeof(SBFullHash)); + full_hash.list_name = GetListName(eit->list_id); + full_hash.add_chunk_id = eit->add_chunk_id; + full_hits->push_back(full_hash); + ++eit; + } else { + // Evict the expired entry. + eit = entries.erase(eit); + } + } + + if (entries.empty()) + hash_cache_.erase(hit); + } + } +} + +void SafeBrowsingDatabaseImpl::CacheHashResults( + const std::vector<SBPrefix>& prefixes, + const std::vector<SBFullHashResult>& full_hits) { + if (full_hits.empty()) { + // These prefixes returned no results, so we store them in order to prevent + // asking for them again. We flush this cache at the next update. + for (std::vector<SBPrefix>::const_iterator it = prefixes.begin(); + it != prefixes.end(); ++it) { + prefix_miss_cache_.insert(*it); + } + return; + } + + const Time now = Time::Now(); + for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin(); + it != full_hits.end(); ++it) { + SBPrefix prefix; + memcpy(&prefix, &it->hash.full_hash, sizeof(prefix)); + HashList& entries = hash_cache_[prefix]; + HashCacheEntry entry; + entry.received = now; + entry.list_id = GetListID(it->list_name); + entry.add_chunk_id = it->add_chunk_id; + memcpy(&entry.full_hash, &it->hash.full_hash, sizeof(SBFullHash)); + entries.push_back(entry); + } +} + +void SafeBrowsingDatabaseImpl::ClearCachedHashes(const SBEntry* entry) { + for (int i = 0; i < entry->prefix_count(); ++i) { + SBPrefix prefix; + if (entry->type() == SBEntry::SUB_FULL_HASH) + memcpy(&prefix, &entry->FullHashAt(i), sizeof(SBPrefix)); + else + prefix = entry->PrefixAt(i); + + HashCache::iterator it = hash_cache_.find(prefix); + if (it != hash_cache_.end()) + hash_cache_.erase(it); + } +} + +// This clearing algorithm is a little inefficient, but we don't expect there to +// be too many entries for this to matter. Also, this runs as a background task +// during an update, so no user action is blocking on it. +void SafeBrowsingDatabaseImpl::ClearCachedHashesForChunk(int list_id, + int add_chunk_id) { + HashCache::iterator it = hash_cache_.begin(); + while (it != hash_cache_.end()) { + HashList& entries = it->second; + HashList::iterator eit = entries.begin(); + while (eit != entries.end()) { + if (eit->list_id == list_id && eit->add_chunk_id == add_chunk_id) + eit = entries.erase(eit); + else + ++eit; + } + if (entries.empty()) + it = hash_cache_.erase(it); + else + ++it; + } +} + +void SafeBrowsingDatabaseImpl::HandleCorruptDatabase() { + MessageLoop::current()->PostTask(FROM_HERE, + reset_factory_.NewRunnableMethod( + &SafeBrowsingDatabaseImpl::OnHandleCorruptDatabase)); +} + +void SafeBrowsingDatabaseImpl::OnHandleCorruptDatabase() { + ResetDatabase(); + DCHECK(false) << "SafeBrowsing database was corrupt and reset"; +} + +void SafeBrowsingDatabaseImpl::HandleResume() { + disk_delay_ = kOnResumeHoldupMs; + MessageLoop::current()->PostDelayedTask( + FROM_HERE, + resume_factory_.NewRunnableMethod( + &SafeBrowsingDatabaseImpl::OnResumeDone), + kOnResumeHoldupMs); +} + +void SafeBrowsingDatabaseImpl::OnResumeDone() { + disk_delay_ = kMaxThreadHoldupMs; +} + +void SafeBrowsingDatabaseImpl::SetSynchronous() { + asynchronous_ = false; +} + diff --git a/chrome/browser/safe_browsing/safe_browsing_database_impl.h b/chrome/browser/safe_browsing/safe_browsing_database_impl.h new file mode 100644 index 0000000..8d5e0fe --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_database_impl.h @@ -0,0 +1,283 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_IMPL_H__ +#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_IMPL_H__ + +#include <deque> +#include <list> +#include <queue> +#include <set> +#include <string> +#include <vector> + +#include "base/hash_tables.h" +#include "base/scoped_ptr.h" +#include "base/task.h" +#include "base/time.h" +#include "chrome/browser/safe_browsing/safe_browsing_database.h" +#include "chrome/browser/safe_browsing/safe_browsing_util.h" +#include "chrome/common/sqlite_compiled_statement.h" +#include "chrome/common/sqlite_utils.h" + +//class BloomFilter; + +// The reference implementation database using SQLite. +class SafeBrowsingDatabaseImpl : public SafeBrowsingDatabase { + public: + SafeBrowsingDatabaseImpl(); + virtual ~SafeBrowsingDatabaseImpl(); + + // SafeBrowsingDatabase interface: + + // Initializes the database with the given filename. The callback is + // executed after finishing a chunk. + virtual bool Init(const std::wstring& filename, + Callback0::Type* chunk_inserted_callback); + + // Deletes the current database and creates a new one. + virtual bool ResetDatabase(); + + // Returns false if the given url is not in the database. If it returns + // true, then either "list" is the name of the matching list, or prefix_hits + // contains the matching hash prefixes. + virtual bool ContainsUrl(const GURL& url, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + Time last_update); + + // Processes add/sub commands. Database will free the chunks when it's done. + virtual void InsertChunks(const std::string& list_name, + std::deque<SBChunk>* chunks); + + // Processs adddel/subdel commands. Database will free chunk_deletes when + // it's done. + virtual void DeleteChunks(std::vector<SBChunkDelete>* chunk_deletes); + + // Returns the lists and their add/sub chunks. + virtual void GetListsInfo(std::vector<SBListChunkRanges>* lists); + + virtual void SetSynchronous(); + + // Store the results of a GetHash response. In the case of empty results, we + // cache the prefixes until the next update so that we don't have to issue + // further GetHash requests we know will be empty. + virtual void CacheHashResults(const std::vector<SBPrefix>& prefixes, + const std::vector<SBFullHashResult>& full_hits); + + // Called when the user's machine has resumed from a lower power state. + virtual void HandleResume(); + + private: + friend class SafeBrowsingDatabaseImpl_HashCaching_Test; + + // Opens the database. + bool Open(); + + // Closes the database. + bool Close(); + + // Creates the SQL tables. + bool CreateTables(); + + // Checks the database version and if it's incompatible with the current one, + // resets the database. + bool CheckCompatibleVersion(); + + // Updates, or adds if new, a hostkey's record with the given add/sub entry. + // If this is a sub, removes the given prefixes, or all if prefixes is empty, + // from host_key's record. If persist is true, then if the add_chunk_id isn't + // found the entry will store this sub information for future reference. + // Otherwise the entry will not be modified if there are no matches. + void UpdateInfo(SBPrefix host, SBEntry* entry, bool persist); + + // Returns true if any of the given prefixes exist for the given host. + // Also returns the matching list or any prefix matches. + void CheckUrl(const std::string& host, + SBPrefix host_key, + const std::vector<std::string>& paths, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits); + + enum ChunkType { + ADD_CHUNK = 0, + SUB_CHUNK = 1, + }; + + // Adds information about the given chunk to the chunks table. + void AddChunkInformation(int list_id, + ChunkType type, + int chunk_id, + const std::string& hostkeys); // only used for add + + // Return a comma separated list of chunk ids that are in the database for + // the given list and chunk type. + void GetChunkIds(int list_id, ChunkType type, std::string* list); + + // Checks if a chunk is in the database. + bool ChunkExists(int list_id, ChunkType type, int chunk_id); + + // Removes the given id from our list of chunk ids. + void RemoveChunkId(int list_id, ChunkType type, int chunk_id); + + // Reads the host's information from the database. Returns true if it was + // found, or false otherwise. + bool ReadInfo(int host_key, SBHostInfo* info, int* id); + + // Writes the host's information to the database, overwriting any existing + // information for that host_key if it existed. + void WriteInfo(int host_key, const SBHostInfo& info, int id); + + // Deletes existing information for the given hostkey. + void DeleteInfo(int host_key); + + // Adds the given list to the database. Returns its row id. + int AddList(const std::string& name); + + // Given a list name, returns its internal id. If we haven't seen it before, + // an id is created and stored in the database. On error, returns 0. + int GetListID(const std::string& name); + + // Given a list id, returns its name. + std::string GetListName(int id); + + // Adds the host to the bloom filter. + void AddHostToBloomFilter(int host_key); + + // Generate a bloom filter. + virtual void BuildBloomFilter(); + + virtual void IncrementBloomFilterReadCount() { ++bloom_filter_read_count_; } + + // Used when generating the bloom filter. Reads a small number of hostkeys + // starting at the given row id. + void OnReadHostKeys(int start_id); + + // Called when we finished reading all the hostkeys from the database during + // bloom filter generation. + void OnDoneReadingHostKeys(); + + void StartThrottledWork(); + void RunThrottledWork(); + + // Used when processing an add-del, add chunk and sub chunk commands in small + // batches so that the db thread is never blocked. They return true if + // complete, or false if there's still more work to do. + bool ProcessChunks(); + bool ProcessAddDel(); + + bool ProcessAddChunks(std::deque<SBChunk>* chunks); + bool ProcessSubChunks(std::deque<SBChunk>* chunks); + + void BeginTransaction(); + void EndTransaction(); + + // Processes an add-del command, which deletes all the prefixes that came + // from that add chunk id. + void AddDel(const std::string& list_name, int add_chunk_id); + + // Processes a sub-del command, which just removes the sub chunk id from + // our list. + void SubDel(const std::string& list_name, int sub_chunk_id); + + // Looks up any cached full hashes we may have. + void GetCachedFullHashes(const std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + Time last_update); + + // Remove cached entries that have prefixes contained in the entry. + void ClearCachedHashes(const SBEntry* entry); + + // Remove all GetHash entries that match the list and chunk id from an AddDel. + void ClearCachedHashesForChunk(int list_id, int add_chunk_id); + + void HandleCorruptDatabase(); + void OnHandleCorruptDatabase(); + + // Runs a small amount of time after the machine has resumed operation from + // a low power state. + void OnResumeDone(); + + // The database connection. + sqlite3* db_; + + // Cache of compiled statements for our database. + scoped_ptr<SqliteStatementCache> statement_cache_; + + int transaction_count_; + scoped_ptr<SQLTransaction> transaction_; + + // True iff the database has been opened successfully. + bool init_; + + std::wstring filename_; + + // Controls whether database writes are done synchronously in one go or + // asynchronously in small chunks. + bool asynchronous_; + + // False positive hit rate tracking. + int bloom_filter_fp_count_; + int bloom_filter_read_count_; + + // These are temp variables used when rebuilding the bloom filter. + bool bloom_filter_building_; + std::vector<int> bloom_filter_temp_hostkeys_; + Time bloom_filter_rebuild_time_; + + // Used to store throttled work for commands that write to the database. + std::queue<std::deque<SBChunk>*> pending_chunks_; + + // Used during processing of an add chunk. + std::string add_chunk_modified_hosts_; + + struct AddDelWork { + int list_id; + int add_chunk_id; + std::vector<std::string> hostkeys; + }; + + std::queue<AddDelWork> pending_add_del_; + + // Called after an add/sub chunk is processed. + Callback0::Type* chunk_inserted_callback_; + + // Used to schedule small bits of work when writing to the database. + ScopedRunnableMethodFactory<SafeBrowsingDatabaseImpl> process_factory_; + + // Used to schedule reading the database to rebuild the bloom filter. + ScopedRunnableMethodFactory<SafeBrowsingDatabaseImpl> bloom_read_factory_; + + // Used to schedule writing the bloom filter after an update. + ScopedRunnableMethodFactory<SafeBrowsingDatabaseImpl> bloom_write_factory_; + + // Used to schedule resetting the database because of corruption. + ScopedRunnableMethodFactory<SafeBrowsingDatabaseImpl> reset_factory_; + + // Used to schedule resuming from a lower power state. + ScopedRunnableMethodFactory<SafeBrowsingDatabaseImpl> resume_factory_; + + // Used for caching GetHash results. + typedef struct HashCacheEntry { + SBFullHash full_hash; + int list_id; + int add_chunk_id; + Time received; + } HashCacheEntry; + + typedef std::list<HashCacheEntry> HashList; + typedef base::hash_map<SBPrefix, HashList> HashCache; + HashCache hash_cache_; + + // Cache of prefixes that returned empty results (no full hash match). + std::set<SBPrefix> prefix_miss_cache_; + + // The amount of time, in milliseconds, to wait before the next disk write. + int disk_delay_; + + DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseImpl); +}; + +#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_IMPL_H__
\ No newline at end of file diff --git a/chrome/browser/safe_browsing/safe_browsing_database_impl_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_impl_unittest.cc new file mode 100644 index 0000000..1077525 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_database_impl_unittest.cc @@ -0,0 +1,228 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Unit tests for the SafeBrowsing storage system (specific to the +// SafeBrowsingDatabaseImpl implementation). + +#include "base/file_util.h" +#include "base/logging.h" +#include "base/path_service.h" +#include "base/process_util.h" +#include "base/sha2.h" +#include "base/stats_counters.h" +#include "base/string_util.h" +#include "base/time.h" +#include "chrome/browser/safe_browsing/protocol_parser.h" +#include "chrome/browser/safe_browsing/safe_browsing_database_impl.h" +#include "googleurl/src/gurl.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + SBPrefix Sha256Prefix(const std::string& str) { + SBPrefix hash; + base::SHA256HashString(str, &hash, sizeof(hash)); + return hash; + } + +// Helper function to do an AddDel or SubDel command. +void DelChunk(SafeBrowsingDatabase* db, + const std::string& list, + int chunk_id, + bool is_sub_del) { + std::vector<SBChunkDelete>* deletes = new std::vector<SBChunkDelete>; + SBChunkDelete chunk_delete; + chunk_delete.list_name = list; + chunk_delete.is_sub_del = is_sub_del; + chunk_delete.chunk_del.push_back(ChunkRange(chunk_id)); + deletes->push_back(chunk_delete); + db->DeleteChunks(deletes); +} + +void AddDelChunk(SafeBrowsingDatabase* db, + const std::string& list, + int chunk_id) { + DelChunk(db, list, chunk_id, false); +} + +} + +// Utility function for setting up the database for the caching test. +void PopulateDatabaseForCacheTest(SafeBrowsingDatabase* database) { + // Add a simple chunk with one hostkey and cache it. + SBChunkHost host; + host.host = Sha256Prefix("www.evil.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->set_chunk_id(1); + host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); + host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/malware.html")); + + SBChunk chunk; + chunk.chunk_number = 1; + chunk.hosts.push_back(host); + + std::deque<SBChunk>* chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + database->InsertChunks("goog-malware-shavar", chunks); + + // Add the GetHash results to the cache. + SBFullHashResult full_hash; + base::SHA256HashString("www.evil.com/phishing.html", + &full_hash.hash, sizeof(SBFullHash)); + full_hash.list_name = "goog-malware-shavar"; + full_hash.add_chunk_id = 1; + + std::vector<SBFullHashResult> results; + results.push_back(full_hash); + + base::SHA256HashString("www.evil.com/malware.html", + &full_hash.hash, sizeof(SBFullHash)); + results.push_back(full_hash); + + std::vector<SBPrefix> prefixes; + database->CacheHashResults(prefixes, results); +} + +TEST(SafeBrowsingDatabaseImpl, HashCaching) { + std::wstring filename; + PathService::Get(base::DIR_TEMP, &filename); + filename.push_back(file_util::kPathSeparator); + filename.append(L"SafeBrowsingTestDatabase"); + DeleteFile(filename.c_str()); // In case it existed from a previous run. + + SafeBrowsingDatabaseImpl database; + database.SetSynchronous(); + EXPECT_TRUE(database.Init(filename, NULL)); + + PopulateDatabaseForCacheTest(&database); + + // We should have both full hashes in the cache. + EXPECT_EQ(database.hash_cache_.size(), 2); + + // Test the cache lookup for the first prefix. + std::string list; + std::vector<SBPrefix> prefixes; + std::vector<SBFullHashResult> full_hashes; + database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &list, &prefixes, &full_hashes, Time::Now()); + EXPECT_EQ(full_hashes.size(), 1); + + SBFullHashResult full_hash; + base::SHA256HashString("www.evil.com/phishing.html", + &full_hash.hash, sizeof(SBFullHash)); + EXPECT_EQ(memcmp(&full_hashes[0].hash, + &full_hash.hash, sizeof(SBFullHash)), 0); + + prefixes.clear(); + full_hashes.clear(); + + // Test the cache lookup for the second prefix. + database.ContainsUrl(GURL("http://www.evil.com/malware.html"), + &list, &prefixes, &full_hashes, Time::Now()); + EXPECT_EQ(full_hashes.size(), 1); + base::SHA256HashString("www.evil.com/malware.html", + &full_hash.hash, sizeof(SBFullHash)); + EXPECT_EQ(memcmp(&full_hashes[0].hash, + &full_hash.hash, sizeof(SBFullHash)), 0); + + prefixes.clear(); + full_hashes.clear(); + + // Test removing a prefix via a sub chunk. + SBChunkHost host; + host.host = Sha256Prefix("www.evil.com/"); + host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2); + host.entry->set_chunk_id(1); + host.entry->SetChunkIdAtPrefix(0, 1); + host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); + + SBChunk chunk; + chunk.chunk_number = 2; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + std::deque<SBChunk>* chunks = new std::deque<SBChunk>; + chunks->push_back(chunk); + database.InsertChunks("goog-malware-shavar", chunks); + + // This prefix should still be there. + database.ContainsUrl(GURL("http://www.evil.com/malware.html"), + &list, &prefixes, &full_hashes, Time::Now()); + EXPECT_EQ(full_hashes.size(), 1); + base::SHA256HashString("www.evil.com/malware.html", + &full_hash.hash, sizeof(SBFullHash)); + EXPECT_EQ(memcmp(&full_hashes[0].hash, + &full_hash.hash, sizeof(SBFullHash)), 0); + + prefixes.clear(); + full_hashes.clear(); + + // This prefix should be gone. + database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &list, &prefixes, &full_hashes, Time::Now()); + EXPECT_EQ(full_hashes.size(), 0); + + prefixes.clear(); + full_hashes.clear(); + + // Test that an AddDel for the original chunk removes the last cached entry. + AddDelChunk(&database, "goog-malware-shavar", 1); + database.ContainsUrl(GURL("http://www.evil.com/malware.html"), + &list, &prefixes, &full_hashes, Time::Now()); + EXPECT_EQ(full_hashes.size(), 0); + EXPECT_EQ(database.hash_cache_.size(), 0); + + prefixes.clear(); + full_hashes.clear(); + + // Test that the cache won't return expired values. First we have to adjust + // the cached entries' received time to make them older, since the database + // cache insert uses Time::Now(). First, store some entries. + PopulateDatabaseForCacheTest(&database); + EXPECT_EQ(database.hash_cache_.size(), 2); + + // Now adjust one of the entries times to be in the past. + Time expired = Time::Now() - TimeDelta::FromMinutes(60); + SBPrefix key; + memcpy(&key, &full_hash.hash, sizeof(SBPrefix)); + SafeBrowsingDatabaseImpl::HashList& entries = database.hash_cache_[key]; + SafeBrowsingDatabaseImpl::HashCacheEntry entry = entries.front(); + entries.pop_front(); + entry.received = expired; + entries.push_back(entry); + + database.ContainsUrl(GURL("http://www.evil.com/malware.html"), + &list, &prefixes, &full_hashes, expired); + EXPECT_EQ(full_hashes.size(), 0); + + // Expired entry was dumped. + EXPECT_EQ(database.hash_cache_.size(), 1); + + // This entry should still exist. + database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &list, &prefixes, &full_hashes, expired); + EXPECT_EQ(full_hashes.size(), 1); + + + // Testing prefix miss caching. First, we clear out the existing database, + // Since PopulateDatabaseForCacheTest() doesn't handle adding duplicate + // chunks. + AddDelChunk(&database, "goog-malware-shavar", 1); + + std::vector<SBPrefix> prefix_misses; + std::vector<SBFullHashResult> empty_full_hash; + prefix_misses.push_back(Sha256Prefix("http://www.bad.com/malware.html")); + prefix_misses.push_back(Sha256Prefix("http://www.bad.com/phishing.html")); + database.CacheHashResults(prefix_misses, empty_full_hash); + + // Prefixes with no full results are misses. + EXPECT_EQ(database.prefix_miss_cache_.size(), 2); + + // Update the database. + PopulateDatabaseForCacheTest(&database); + + // Prefix miss cache should be cleared. + EXPECT_EQ(database.prefix_miss_cache_.size(), 0); +} + + + diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc index fe6f15c..48c4fb1 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // -// Unit tests for the SafeBrowsing storage system (SafeBrowsingDatabase). +// Unit tests for the SafeBrowsing storage system. #include "base/file_util.h" #include "base/logging.h" @@ -39,25 +39,29 @@ void DelChunk(SafeBrowsingDatabase* db, db->DeleteChunks(deletes); } -void AddDelChunk(SafeBrowsingDatabase* db, const std::string& list, int chunk_id) { +void AddDelChunk(SafeBrowsingDatabase* db, + const std::string& list, + int chunk_id) { DelChunk(db, list, chunk_id, false); } -void SubDelChunk(SafeBrowsingDatabase* db, const std::string& list, int chunk_id) { +void SubDelChunk(SafeBrowsingDatabase* db, + const std::string& list, + int chunk_id) { DelChunk(db, list, chunk_id, true); } // Checks database reading and writing. -TEST(SafeBrowsing, Database) { +TEST(SafeBrowsingDatabase, Database) { std::wstring filename; PathService::Get(base::DIR_TEMP, &filename); filename.push_back(file_util::kPathSeparator); filename.append(L"SafeBrowsingTestDatabase"); DeleteFile(filename.c_str()); // In case it existed from a previous run. - SafeBrowsingDatabase database; - database.set_synchronous(); - EXPECT_TRUE(database.Init(filename, NULL)); + SafeBrowsingDatabase* database = SafeBrowsingDatabase::Create(); + database->SetSynchronous(); + EXPECT_TRUE(database->Init(filename, NULL)); // Add a simple chunk with one hostkey. SBChunkHost host; @@ -73,7 +77,7 @@ TEST(SafeBrowsing, Database) { std::deque<SBChunk>* chunks = new std::deque<SBChunk>; chunks->push_back(chunk); - database.InsertChunks("goog-malware", chunks); + database->InsertChunks("goog-malware", chunks); // Add another chunk with two different hostkeys. host.host = Sha256Prefix("www.evil.com/"); @@ -96,7 +100,7 @@ TEST(SafeBrowsing, Database) { chunks = new std::deque<SBChunk>; chunks->push_back(chunk); - database.InsertChunks("goog-malware", chunks); + database->InsertChunks("goog-malware", chunks); // and a chunk with an IP-based host host.host = Sha256Prefix("192.168.0.1/"); @@ -109,12 +113,12 @@ TEST(SafeBrowsing, Database) { chunks = new std::deque<SBChunk>; chunks->push_back(chunk); - database.InsertChunks("goog-malware", chunks); + database->InsertChunks("goog-malware", chunks); // Make sure they were added correctly. std::vector<SBListChunkRanges> lists; - database.GetListsInfo(&lists); + database->GetListsInfo(&lists); EXPECT_EQ(lists.size(), 1); EXPECT_EQ(lists[0].name, "goog-malware"); EXPECT_EQ(lists[0].adds, "1-3"); @@ -124,45 +128,45 @@ TEST(SafeBrowsing, Database) { std::vector<SBFullHashResult> full_hashes; std::vector<SBPrefix> prefix_hits; std::string matching_list; - EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); EXPECT_EQ(prefix_hits[0], Sha256Prefix("www.evil.com/phishing.html")); EXPECT_EQ(prefix_hits.size(), 1); - EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/malware.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/notevil1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/notevil2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.evil.com/malware.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.evil.com/notevil1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.evil.com/notevil2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_TRUE(database.ContainsUrl(GURL("http://192.168.0.1/malware.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.good.com/good1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/"), + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.good.com/good2.html"), &matching_list, &prefix_hits, &full_hashes, now)); - EXPECT_EQ(prefix_hits.size(), 0); - EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/robots.txt"), + EXPECT_TRUE(database->ContainsUrl(GURL("http://192.168.0.1/malware.html"), &matching_list, &prefix_hits, &full_hashes, now)); + EXPECT_FALSE(database->ContainsUrl(GURL("http://www.evil.com/"), + &matching_list, &prefix_hits, + &full_hashes, now)); + EXPECT_EQ(prefix_hits.size(), 0); + + EXPECT_FALSE(database->ContainsUrl(GURL("http://www.evil.com/robots.txt"), + &matching_list, &prefix_hits, + &full_hashes, now)); + // Test removing a single prefix from the add chunk. host.host = Sha256Prefix("www.evil.com/"); host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2); @@ -177,51 +181,51 @@ TEST(SafeBrowsing, Database) { chunks = new std::deque<SBChunk>; chunks->push_back(chunk); - database.InsertChunks("goog-malware", chunks); + database->InsertChunks("goog-malware", chunks); - EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.evil.com/phishing.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); EXPECT_EQ(prefix_hits[0], Sha256Prefix("www.evil.com/phishing.html")); EXPECT_EQ(prefix_hits.size(), 1); - EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/notevil1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database->ContainsUrl(GURL("http://www.evil.com/notevil1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); EXPECT_EQ(prefix_hits.size(), 0); - EXPECT_TRUE(database.ContainsUrl(GURL("http://www.evil.com/notevil2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.evil.com/notevil2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.good.com/good1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_TRUE(database.ContainsUrl(GURL("http://www.good.com/good2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database->ContainsUrl(GURL("http://www.good.com/good2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - database.GetListsInfo(&lists); + database->GetListsInfo(&lists); EXPECT_EQ(lists.size(), 1); EXPECT_EQ(lists[0].name, "goog-malware"); EXPECT_EQ(lists[0].subs, "4"); // Test removing all the prefixes from an add chunk. - AddDelChunk(&database, "goog-malware", 2); - EXPECT_FALSE(database.ContainsUrl(GURL("http://www.evil.com/notevil2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + AddDelChunk(database, "goog-malware", 2); + EXPECT_FALSE(database->ContainsUrl(GURL("http://www.evil.com/notevil2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_FALSE(database.ContainsUrl(GURL("http://www.good.com/good1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database->ContainsUrl(GURL("http://www.good.com/good1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_FALSE(database.ContainsUrl(GURL("http://www.good.com/good2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database->ContainsUrl(GURL("http://www.good.com/good2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - database.GetListsInfo(&lists); + database->GetListsInfo(&lists); EXPECT_EQ(lists.size(), 1); EXPECT_EQ(lists[0].name, "goog-malware"); EXPECT_EQ(lists[0].subs, "4"); @@ -240,15 +244,15 @@ TEST(SafeBrowsing, Database) { chunks = new std::deque<SBChunk>; chunks->push_back(chunk); - database.InsertChunks("goog-malware", chunks); + database->InsertChunks("goog-malware", chunks); // Now remove the dummy entry. If there are any problems with the // transactions, asserts will fire. - AddDelChunk(&database, "goog-malware", 44); + AddDelChunk(database, "goog-malware", 44); // Test the subdel command. - SubDelChunk(&database, "goog-malware", 4); - database.GetListsInfo(&lists); + SubDelChunk(database, "goog-malware", 4); + database->GetListsInfo(&lists); EXPECT_EQ(lists.size(), 1); EXPECT_EQ(lists[0].name, "goog-malware"); EXPECT_EQ(lists[0].subs, ""); @@ -264,11 +268,11 @@ TEST(SafeBrowsing, Database) { chunks = new std::deque<SBChunk>; chunks->push_back(chunk); - database.InsertChunks("goog-malware", chunks); + database->InsertChunks("goog-malware", chunks); - EXPECT_FALSE(database.ContainsUrl(GURL("http://www.notevilanymore.com/index.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database->ContainsUrl( + GURL("http://www.notevilanymore.com/index.html"), + &matching_list, &prefix_hits, &full_hashes, now)); // Now insert the tardy add chunk. host.host = Sha256Prefix("www.notevilanymore.com/"); @@ -282,194 +286,19 @@ TEST(SafeBrowsing, Database) { chunks = new std::deque<SBChunk>; chunks->push_back(chunk); - database.InsertChunks("goog-malware", chunks); + database->InsertChunks("goog-malware", chunks); - EXPECT_FALSE(database.ContainsUrl(GURL("http://www.notevilanymore.com/index.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database->ContainsUrl( + GURL("http://www.notevilanymore.com/index.html"), + &matching_list, &prefix_hits, &full_hashes, now)); - EXPECT_FALSE(database.ContainsUrl(GURL("http://www.notevilanymore.com/good.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database->ContainsUrl( + GURL("http://www.notevilanymore.com/good.html"), + &matching_list, &prefix_hits, &full_hashes, now)); DeleteFile(filename.c_str()); // Clean up. -} - -// Utility function for setting up the database for the caching test. -void PopulateDatabaseForCacheTest(SafeBrowsingDatabase* database) { - // Add a simple chunk with one hostkey and cache it. - SBChunkHost host; - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); - host.entry->set_chunk_id(1); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); - host.entry->SetPrefixAt(1, Sha256Prefix("www.evil.com/malware.html")); - SBChunk chunk; - chunk.chunk_number = 1; - chunk.hosts.push_back(host); - - std::deque<SBChunk>* chunks = new std::deque<SBChunk>; - chunks->push_back(chunk); - database->InsertChunks("goog-malware-shavar", chunks); - - // Add the GetHash results to the cache. - SBFullHashResult full_hash; - base::SHA256HashString("www.evil.com/phishing.html", - &full_hash.hash, sizeof(SBFullHash)); - full_hash.list_name = "goog-malware-shavar"; - full_hash.add_chunk_id = 1; - - std::vector<SBFullHashResult> results; - results.push_back(full_hash); - - base::SHA256HashString("www.evil.com/malware.html", - &full_hash.hash, sizeof(SBFullHash)); - results.push_back(full_hash); - - std::vector<SBPrefix> prefixes; - database->CacheHashResults(prefixes, results); -} - -TEST(SafeBrowsing, HashCaching) { - std::wstring filename; - PathService::Get(base::DIR_TEMP, &filename); - filename.push_back(file_util::kPathSeparator); - filename.append(L"SafeBrowsingTestDatabase"); - DeleteFile(filename.c_str()); // In case it existed from a previous run. - - SafeBrowsingDatabase database; - database.set_synchronous(); - EXPECT_TRUE(database.Init(filename, NULL)); - - PopulateDatabaseForCacheTest(&database); - - // We should have both full hashes in the cache. - EXPECT_EQ(database.hash_cache_.size(), 2); - - // Test the cache lookup for the first prefix. - std::string list; - std::vector<SBPrefix> prefixes; - std::vector<SBFullHashResult> full_hashes; - database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &list, &prefixes, &full_hashes, Time::Now()); - EXPECT_EQ(full_hashes.size(), 1); - - SBFullHashResult full_hash; - base::SHA256HashString("www.evil.com/phishing.html", - &full_hash.hash, sizeof(SBFullHash)); - EXPECT_EQ(memcmp(&full_hashes[0].hash, - &full_hash.hash, sizeof(SBFullHash)), 0); - - prefixes.clear(); - full_hashes.clear(); - - // Test the cache lookup for the second prefix. - database.ContainsUrl(GURL("http://www.evil.com/malware.html"), - &list, &prefixes, &full_hashes, Time::Now()); - EXPECT_EQ(full_hashes.size(), 1); - base::SHA256HashString("www.evil.com/malware.html", - &full_hash.hash, sizeof(SBFullHash)); - EXPECT_EQ(memcmp(&full_hashes[0].hash, - &full_hash.hash, sizeof(SBFullHash)), 0); - - prefixes.clear(); - full_hashes.clear(); - - // Test removing a prefix via a sub chunk. - SBChunkHost host; - host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2); - host.entry->set_chunk_id(1); - host.entry->SetChunkIdAtPrefix(0, 1); - host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/phishing.html")); - - SBChunk chunk; - chunk.chunk_number = 2; - chunk.hosts.clear(); - chunk.hosts.push_back(host); - std::deque<SBChunk>* chunks = new std::deque<SBChunk>; - chunks->push_back(chunk); - database.InsertChunks("goog-malware-shavar", chunks); - - // This prefix should still be there. - database.ContainsUrl(GURL("http://www.evil.com/malware.html"), - &list, &prefixes, &full_hashes, Time::Now()); - EXPECT_EQ(full_hashes.size(), 1); - base::SHA256HashString("www.evil.com/malware.html", - &full_hash.hash, sizeof(SBFullHash)); - EXPECT_EQ(memcmp(&full_hashes[0].hash, - &full_hash.hash, sizeof(SBFullHash)), 0); - - prefixes.clear(); - full_hashes.clear(); - - // This prefix should be gone. - database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &list, &prefixes, &full_hashes, Time::Now()); - EXPECT_EQ(full_hashes.size(), 0); - - prefixes.clear(); - full_hashes.clear(); - - // Test that an AddDel for the original chunk removes the last cached entry. - AddDelChunk(&database, "goog-malware-shavar", 1); - database.ContainsUrl(GURL("http://www.evil.com/malware.html"), - &list, &prefixes, &full_hashes, Time::Now()); - EXPECT_EQ(full_hashes.size(), 0); - EXPECT_EQ(database.hash_cache_.size(), 0); - - prefixes.clear(); - full_hashes.clear(); - - // Test that the cache won't return expired values. First we have to adjust - // the cached entries' received time to make them older, since the database - // cache insert uses Time::Now(). First, store some entries. - PopulateDatabaseForCacheTest(&database); - EXPECT_EQ(database.hash_cache_.size(), 2); - - // Now adjust one of the entries times to be in the past. - Time expired = Time::Now() - TimeDelta::FromMinutes(60); - SBPrefix key; - memcpy(&key, &full_hash.hash, sizeof(SBPrefix)); - SafeBrowsingDatabase::HashList& entries = database.hash_cache_[key]; - SafeBrowsingDatabase::HashCacheEntry entry = entries.front(); - entries.pop_front(); - entry.received = expired; - entries.push_back(entry); - - database.ContainsUrl(GURL("http://www.evil.com/malware.html"), - &list, &prefixes, &full_hashes, expired); - EXPECT_EQ(full_hashes.size(), 0); - - // Expired entry was dumped. - EXPECT_EQ(database.hash_cache_.size(), 1); - - // This entry should still exist. - database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &list, &prefixes, &full_hashes, expired); - EXPECT_EQ(full_hashes.size(), 1); - - - // Testing prefix miss caching. First, we clear out the existing database, - // Since PopulateDatabaseForCacheTest() doesn't handle adding duplicate - // chunks. - AddDelChunk(&database, "goog-malware-shavar", 1); - - std::vector<SBPrefix> prefix_misses; - std::vector<SBFullHashResult> empty_full_hash; - prefix_misses.push_back(Sha256Prefix("http://www.bad.com/malware.html")); - prefix_misses.push_back(Sha256Prefix("http://www.bad.com/phishing.html")); - database.CacheHashResults(prefix_misses, empty_full_hash); - - // Prefixes with no full results are misses. - EXPECT_EQ(database.prefix_miss_cache_.size(), 2); - - // Update the database. - PopulateDatabaseForCacheTest(&database); - - // Prefix miss cache should be cleared. - EXPECT_EQ(database.prefix_miss_cache_.size(), 0); + delete database; } void PrintStat(const wchar_t* name) { @@ -508,9 +337,9 @@ void PeformUpdate(const std::wstring& initial_db, ASSERT_TRUE(file_util::CopyFile(full_initial_db, filename)); } - SafeBrowsingDatabase database; - database.set_synchronous(); - EXPECT_TRUE(database.Init(filename, NULL)); + SafeBrowsingDatabase* database = SafeBrowsingDatabase::Create(); + database->SetSynchronous(); + EXPECT_TRUE(database->Init(filename, NULL)); Time before_time = Time::Now(); ProcessHandle handle = Process::Current().handle(); @@ -518,10 +347,10 @@ void PeformUpdate(const std::wstring& initial_db, process_util::ProcessMetrics::CreateProcessMetrics(handle)); CHECK(metric->GetIOCounters(&before)); - database.DeleteChunks(deletes); + database->DeleteChunks(deletes); for (size_t i = 0; i < chunks.size(); ++i) - database.InsertChunks(chunks[i].listname, chunks[i].chunks); + database->InsertChunks(chunks[i].listname, chunks[i].chunks); CHECK(metric->GetIOCounters(&after)); @@ -545,12 +374,13 @@ void PeformUpdate(const std::wstring& initial_db, PrintStat(L"c:SB.ChunkInsert"); PrintStat(L"c:SB.ChunkDelete"); PrintStat(L"c:SB.TransactionCommit"); + + delete database; } void UpdateDatabase(const std::wstring& initial_db, const std::wstring& response_path, const std::wstring& updates_path) { - // First we read the chunks from disk, so that this isn't counted in IO bytes. std::vector<ChunksInfo> chunks; @@ -576,7 +406,8 @@ void UpdateDatabase(const std::wstring& initial_db, info.chunks = new std::deque<SBChunk>; bool re_key; - result = parser.ParseChunk(data.get(), size, "", "", &re_key, info.chunks); + result = parser.ParseChunk(data.get(), size, "", "", + &re_key, info.chunks); CHECK(result); info.listname = WideToASCII(file_util::GetFilenameFromPath(file)); @@ -616,29 +447,29 @@ void UpdateDatabase(const std::wstring& initial_db, // Counts the IO needed for the initial update of a database. // test\data\safe_browsing\download_update.py was used to fetch the add/sub // chunks that are read, in order to get repeatable runs. -TEST(SafeBrowsing, DISABLED_DatabaseInitialIO) { +TEST(SafeBrowsingDatabase, DISABLED_DatabaseInitialIO) { UpdateDatabase(L"", L"", L"initial"); } // Counts the IO needed to update a month old database. // The data files were generated by running "..\download_update.py postdata" // in the "safe_browsing\old" directory. -TEST(SafeBrowsing, DISABLED_DatabaseOldIO) { +TEST(SafeBrowsingDatabase, DISABLED_DatabaseOldIO) { UpdateDatabase(L"old\\SafeBrowsing", L"old\\response", L"old\\updates"); } // Like DatabaseOldIO but only the deletes. -TEST(SafeBrowsing, DISABLED_DatabaseOldDeletesIO) { +TEST(SafeBrowsingDatabase, DISABLED_DatabaseOldDeletesIO) { UpdateDatabase(L"old\\SafeBrowsing", L"old\\response", L""); } // Like DatabaseOldIO but only the updates. -TEST(SafeBrowsing, DISABLED_DatabaseOldUpdatesIO) { +TEST(SafeBrowsingDatabase, DISABLED_DatabaseOldUpdatesIO) { UpdateDatabase(L"old\\SafeBrowsing", L"", L"old\\updates"); } // Does a a lot of addel's on very large chunks. -TEST(SafeBrowsing, DISABLED_DatabaseOldLotsofDeletesIO) { +TEST(SafeBrowsingDatabase, DISABLED_DatabaseOldLotsofDeletesIO) { std::vector<ChunksInfo> chunks; std::vector<SBChunkDelete>* deletes = new std::vector<SBChunkDelete>; SBChunkDelete del; diff --git a/chrome/browser/safe_browsing/safe_browsing_service.cc b/chrome/browser/safe_browsing/safe_browsing_service.cc index 4dbb70f..1cd4ff7 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service.cc +++ b/chrome/browser/safe_browsing/safe_browsing_service.cc @@ -290,7 +290,7 @@ SafeBrowsingDatabase* SafeBrowsingService::GetDatabase() { path.append(chrome::kSafeBrowsingFilename); Time before = Time::Now(); - SafeBrowsingDatabase* database = new SafeBrowsingDatabase(); + SafeBrowsingDatabase* database = SafeBrowsingDatabase::Create(); Callback0::Type* callback = NewCallback(this, &SafeBrowsingService::ChunkInserted); result = database->Init(path, callback); diff --git a/chrome/test/unit/unittests.vcproj b/chrome/test/unit/unittests.vcproj index 247fc1c..68950ad 100644 --- a/chrome/test/unit/unittests.vcproj +++ b/chrome/test/unit/unittests.vcproj @@ -758,6 +758,10 @@ > </File> <File + RelativePath="..\..\browser\safe_browsing\safe_browsing_database_impl_unittest.cc" + > + </File> + <File RelativePath="..\..\browser\safe_browsing\safe_browsing_util_unittest.cc" > </File> |