summaryrefslogtreecommitdiffstats
path: root/chrome/browser/safe_browsing/safe_browsing_database.cc
diff options
context:
space:
mode:
authorpaulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-09-20 02:03:08 +0000
committerpaulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-09-20 02:03:08 +0000
commit54d80bb02aa71eb957a863109392cd97ea1e2496 (patch)
tree31e7c397ba403a9e81c15eb1755a16b8e1083ad2 /chrome/browser/safe_browsing/safe_browsing_database.cc
parent8c8824bdb4820a4107ec94e5c3d12981bdc4ae7f (diff)
downloadchromium_src-54d80bb02aa71eb957a863109392cd97ea1e2496.zip
chromium_src-54d80bb02aa71eb957a863109392cd97ea1e2496.tar.gz
chromium_src-54d80bb02aa71eb957a863109392cd97ea1e2496.tar.bz2
Create a SafeBrowsing database interface to more easily
facilitate alternate implementations. The current SafeBrowsingDatabase code is moved to *_impl files. Adding a new implementation can be done via command line flags in the SafeBrowsingDatabase::Create factory method. Review URL: http://codereview.chromium.org/3162 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@2434 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing/safe_browsing_database.cc')
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database.cc1231
1 files changed, 20 insertions, 1211 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
index 8a57559..f978200 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
@@ -6,956 +6,48 @@
#include "base/file_util.h"
#include "base/logging.h"
-#include "base/message_loop.h"
#include "base/sha2.h"
-#include "base/string_util.h"
-#include "chrome/browser/safe_browsing/bloom_filter.h"
-#include "chrome/browser/safe_browsing/chunk_range.h"
-#include "chrome/common/sqlite_compiled_statement.h"
-#include "chrome/common/sqlite_utils.h"
+#include "chrome/browser/safe_browsing/safe_browsing_database_impl.h"
#include "googleurl/src/gurl.h"
-// Database version. If this is different than what's stored on disk, the
-// database is reset.
-static const int kDatabaseVersion = 4;
-
// Filename suffix for the bloom filter.
static const wchar_t kBloomFilterFile[] = L" Filter";
-// Don't want to create too small of a bloom filter initially while we're
-// downloading the data and then keep having to rebuild it.
-static const int kBloomFilterMinSize = 250000;
-
-// How many bits to use per item. See the design doc for more information.
-static const int kBloomFilterSizeRatio = 13;
-
-// The minimum number of reads/misses before we will consider rebuilding the
-// bloom filter. This is needed because we don't want a few misses after
-// starting the browser to skew the percentage.
-// TODO(jabdelmalek): report to UMA how often we rebuild.
-static const int kBloomFilterMinReadsToCheckFP = 200;
-
-// The percentage of hit rate in the bloom filter when we regenerate it.
-static const double kBloomFilterMaxFPRate = 5.0;
-
-// When we awake from a low power state, we try to avoid doing expensive disk
-// operations for a few minutes to let the system page itself in and settle
-// down.
-static const int kOnResumeHoldupMs = 5 * 60 * 1000; // 5 minutes.
-
-// When doing any database operations that can take a long time, we do it in
-// small chunks up to this amount. Once this much time passes, we sleep for
-// the same amount and continue. This avoids blocking the thread so that if
-// we get a bloom filter hit, we don't block the network request.
-static const int kMaxThreadHoldupMs = 100;
-
-// How long to wait after updating the database to write the bloom filter.
-static const int kBloomFilterWriteDelayMs = (60 * 1000);
-
-// The maximum staleness for a cached entry.
-static const int kMaxStalenessMinutes = 45;
-
-SafeBrowsingDatabase::SafeBrowsingDatabase()
- : db_(NULL),
- init_(false),
- transaction_count_(0),
- asynchronous_(true),
- chunk_inserted_callback_(NULL),
-#pragma warning(suppress: 4355) // can use this
- bloom_read_factory_(this),
-#pragma warning(suppress: 4355) // can use this
- bloom_write_factory_(this),
-#pragma warning(suppress: 4355) // can use this
- process_factory_(this),
-#pragma warning(suppress: 4355) // can use this
- reset_factory_(this),
-#pragma warning(suppress: 4355) // can use this
- resume_factory_(this),
- disk_delay_(kMaxThreadHoldupMs) {
-}
-
-SafeBrowsingDatabase::~SafeBrowsingDatabase() {
- Close();
-}
-
-bool SafeBrowsingDatabase::Init(const std::wstring& filename,
- Callback0::Type* chunk_inserted_callback) {
- DCHECK(!init_ && filename_.empty());
-
- filename_ = filename;
- if (!Open())
- return false;
-
- bool load_filter = false;
- if (!DoesSqliteTableExist(db_, "hosts")) {
- if (!CreateTables()) {
- // Database could be corrupt, try starting from scratch.
- if (!ResetDatabase())
- return false;
- }
- } else if (!CheckCompatibleVersion()) {
- if (!ResetDatabase())
- return false;
- } else {
- load_filter = true;
- }
-
- bloom_filter_filename_ = BloomFilterFilename(filename_);
-
- if (load_filter) {
- LoadBloomFilter();
- } else {
- bloom_filter_.reset(
- new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio));
- }
-
- init_ = true;
- chunk_inserted_callback_ = chunk_inserted_callback;
- return true;
-}
-
-bool SafeBrowsingDatabase::Open() {
- if (sqlite3_open(WideToUTF8(filename_).c_str(), &db_) != SQLITE_OK)
- return false;
-
- // Run the database in exclusive mode. Nobody else should be accessing the
- // database while we're running, and this will give somewhat improved perf.
- sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL);
-
- statement_cache_.reset(new SqliteStatementCache(db_));
- bloom_filter_read_count_= 0;
- bloom_filter_fp_count_ = 0;
- bloom_filter_building_ = false;
-
- process_factory_.RevokeAll();
- bloom_read_factory_.RevokeAll();
- bloom_write_factory_.RevokeAll();
-
- return true;
+// Factory method.
+SafeBrowsingDatabase* SafeBrowsingDatabase::Create() {
+ return new SafeBrowsingDatabaseImpl;
}
-bool SafeBrowsingDatabase::Close() {
- if (!db_)
+bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) {
+ if (!bloom_filter_.get())
return true;
- process_factory_.RevokeAll();
- bloom_read_factory_.RevokeAll();
- bloom_write_factory_.RevokeAll();
-
- if (!pending_add_del_.empty()) {
- while (!pending_add_del_.empty())
- pending_add_del_.pop();
-
- EndTransaction();
- }
-
- while (!pending_chunks_.empty()) {
- std::deque<SBChunk>* chunks = pending_chunks_.front();
- safe_browsing_util::FreeChunks(chunks);
- delete chunks;
- pending_chunks_.pop();
- EndTransaction();
- }
-
- statement_cache_.reset(); // Must free statements before closing DB.
- transaction_.reset();
- bool result = sqlite3_close(db_) == SQLITE_OK;
- db_ = NULL;
- return result;
-}
-
-bool SafeBrowsingDatabase::CreateTables() {
- SQLTransaction transaction(db_);
- transaction.Begin();
-
- // We use an autoincrement integer as the primary key to allow full table
- // scans to be quick. Otherwise if we used host, then we'd have to jump
- // all over the table when doing a full table scan to generate the bloom
- // filter and that's an order of magnitude slower. By marking host as
- // unique, an index is created automatically.
- if (sqlite3_exec(db_, "CREATE TABLE hosts ("
- "id INTEGER PRIMARY KEY AUTOINCREMENT,"
- "host INTEGER UNIQUE,"
- "entries BLOB)",
- NULL, NULL, NULL) != SQLITE_OK) {
- return false;
- }
-
- if (sqlite3_exec(db_, "CREATE TABLE chunks ("
- "list_id INTEGER,"
- "chunk_type INTEGER,"
- "chunk_id INTEGER,"
- "hostkeys TEXT)",
- NULL, NULL, NULL) != SQLITE_OK) {
- return false;
- }
-
- if (sqlite3_exec(db_, "CREATE TABLE list_names ("
- "id INTEGER PRIMARY KEY AUTOINCREMENT,"
- "name TEXT)",
- NULL, NULL, NULL) != SQLITE_OK) {
- return false;
- }
+ IncrementBloomFilterReadCount();
- sqlite3_exec(db_, "CREATE INDEX chunks_chunk_id ON chunks(chunk_id)",
- NULL, NULL, NULL);
-
- std::string version = "PRAGMA user_version=";
- version += StringPrintf("%d", kDatabaseVersion);
-
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str());
- if (!statement.is_valid()) {
- NOTREACHED();
- return false;
- }
-
- if (statement->step() != SQLITE_DONE)
- return false;
-
- transaction.Commit();
- return true;
-}
-
-// The SafeBrowsing service assumes this operation is synchronous.
-bool SafeBrowsingDatabase::ResetDatabase() {
- hash_cache_.clear();
- prefix_miss_cache_.clear();
-
- bool rv = Close();
- DCHECK(rv);
-
- if (!file_util::Delete(filename_, false)) {
- NOTREACHED();
- return false;
- }
-
- bloom_filter_.reset(
- new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio));
- file_util::Delete(bloom_filter_filename_, false);
-
- if (!Open())
- return false;
-
- return CreateTables();
-}
-
-bool SafeBrowsingDatabase::CheckCompatibleVersion() {
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "PRAGMA user_version");
- if (!statement.is_valid()) {
- NOTREACHED();
- return false;
- }
-
- int result = statement->step();
- if (result != SQLITE_ROW)
- return false;
-
- return statement->column_int(0) == kDatabaseVersion;
-}
-
-bool SafeBrowsingDatabase::ContainsUrl(
- const GURL& url,
- std::string* matching_list,
- std::vector<SBPrefix>* prefix_hits,
- std::vector<SBFullHashResult>* full_hits,
- Time last_update) {
- matching_list->clear();
- prefix_hits->clear();
- if (!init_) {
- DCHECK(false);
- return false;
- }
-
- if (!url.is_valid())
- return false;
-
- std::vector<std::string> hosts, paths;
+ std::vector<std::string> hosts;
safe_browsing_util::GenerateHostsToCheck(url, &hosts);
- safe_browsing_util::GeneratePathsToCheck(url, &paths);
if (hosts.size() == 0)
- return false;
+ return false; // Could be about:blank.
- // Per the spec, if there is at least 3 components, check both the most
- // significant three components and the most significant two components.
- // If only two components, check the most significant two components.
- // If it's an IP address, use the entire IP address as the host.
- SBPrefix host_key_2, host_key_3, host_key_ip;
+ SBPrefix host_key;
if (url.HostIsIPAddress()) {
- base::SHA256HashString(url.host() + "/", &host_key_ip, sizeof(SBPrefix));
- CheckUrl(url.host(), host_key_ip, paths, matching_list, prefix_hits);
+ base::SHA256HashString(url.host() + "/", &host_key, sizeof(SBPrefix));
+ if (bloom_filter_->Exists(host_key))
+ return true;
} else {
- base::SHA256HashString(hosts[0] + "/", &host_key_2, sizeof(SBPrefix));
- if (hosts.size() > 1)
- base::SHA256HashString(hosts[1] + "/", &host_key_3, sizeof(SBPrefix));
-
- for (size_t i = 0; i < hosts.size(); ++i) {
- SBPrefix host_key = i == 0 ? host_key_2 : host_key_3;
- CheckUrl(hosts[i], host_key, paths, matching_list, prefix_hits);
- }
- }
-
- if (!matching_list->empty() || !prefix_hits->empty()) {
- // If all the prefixes are cached as 'misses', don't issue a GetHash.
- bool all_misses = true;
- for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin();
- it != prefix_hits->end(); ++it) {
- if (prefix_miss_cache_.find(*it) == prefix_miss_cache_.end()) {
- all_misses = false;
- break;
- }
- }
- if (all_misses)
- return false;
- GetCachedFullHashes(prefix_hits, full_hits, last_update);
- return true;
- }
+ base::SHA256HashString(hosts[0] + "/", &host_key, sizeof(SBPrefix));
+ if (bloom_filter_->Exists(host_key))
+ return true;
- // Check if we're getting too many FPs in the bloom filter, in which case
- // it's time to rebuild it.
- bloom_filter_fp_count_++;
- if (!bloom_filter_building_ &&
- bloom_filter_read_count_ > kBloomFilterMinReadsToCheckFP) {
- double fp_rate = bloom_filter_fp_count_ * 100 / bloom_filter_read_count_;
- if (fp_rate > kBloomFilterMaxFPRate) {
- DeleteBloomFilter();
- MessageLoop::current()->PostTask(FROM_HERE,
- bloom_read_factory_.NewRunnableMethod(
- &SafeBrowsingDatabase::BuildBloomFilter));
+ if (hosts.size() > 1) {
+ base::SHA256HashString(hosts[1] + "/", &host_key, sizeof(SBPrefix));
+ if (bloom_filter_->Exists(host_key))
+ return true;
}
}
-
return false;
}
-void SafeBrowsingDatabase::CheckUrl(const std::string& host,
- SBPrefix host_key,
- const std::vector<std::string>& paths,
- std::string* matching_list,
- std::vector<SBPrefix>* prefix_hits) {
- // First see if there are any entries in the db for this host.
- SBHostInfo info;
- if (!ReadInfo(host_key, &info, NULL))
- return; // No hostkey found. This is definitely safe.
-
- std::vector<SBFullHash> prefixes;
- prefixes.resize(paths.size());
- for (size_t i = 0; i < paths.size(); ++i)
- base::SHA256HashString(host + paths[i], &prefixes[i], sizeof(SBFullHash));
-
- std::vector<SBPrefix> hits;
- int list_id = -1;
- if (!info.Contains(prefixes, &list_id, &hits))
- return;
-
- if (list_id != -1) {
- *matching_list = GetListName(list_id);
- } else if (hits.empty()) {
- prefix_hits->push_back(host_key);
- } else {
- for (size_t i = 0; i < hits.size(); ++i)
- prefix_hits->push_back(hits[i]);
- }
-}
-
-bool SafeBrowsingDatabase::ReadInfo(int host_key, SBHostInfo* info, int* id) {
- STATS_COUNTER(L"SB.HostSelect", 1);
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "SELECT id, entries FROM hosts WHERE host=?");
- if (!statement.is_valid()) {
- NOTREACHED();
- return false;
- }
-
- statement->bind_int(0, host_key);
- int result = statement->step();
- if (result == SQLITE_CORRUPT) {
- HandleCorruptDatabase();
- return false;
- }
-
- if (result == SQLITE_DONE)
- return false;
-
- if (result != SQLITE_ROW) {
- DLOG(ERROR) << "SafeBrowsingDatabase got "
- "statement->step() != SQLITE_ROW for "
- << host_key;
- return false;
- }
-
- if (id)
- *id = statement->column_int(0);
-
- return info->Initialize(statement->column_blob(1),
- statement->column_bytes(1));
-}
-
-void SafeBrowsingDatabase::WriteInfo(int host_key,
- const SBHostInfo& info,
- int id) {
- SQLITE_UNIQUE_STATEMENT(statement1, *statement_cache_,
- "INSERT OR REPLACE INTO hosts"
- "(host,entries)"
- "VALUES (?,?)");
-
- SQLITE_UNIQUE_STATEMENT(statement2, *statement_cache_,
- "INSERT OR REPLACE INTO hosts"
- "(id,host,entries)"
- "VALUES (?,?,?)");
-
- SqliteCompiledStatement& statement = id == 0 ? statement1 : statement2;
- if (!statement.is_valid()) {
- NOTREACHED();
- return;
- }
-
- int start_index = 0;
- if (id != 0) {
- statement->bind_int(start_index++, id);
- STATS_COUNTER(L"SB.HostReplace", 1);
- } else {
- STATS_COUNTER(L"SB.HostInsert", 1);
- }
-
- statement->bind_int(start_index++, host_key);
- statement->bind_blob(start_index++, info.data(), info.size());
- int rv = statement->step();
- if (rv == SQLITE_CORRUPT) {
- HandleCorruptDatabase();
- } else {
- DCHECK(rv == SQLITE_DONE);
- }
- AddHostToBloomFilter(host_key);
-}
-
-void SafeBrowsingDatabase::DeleteInfo(int host_key) {
- STATS_COUNTER(L"SB.HostDelete", 1);
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "DELETE FROM hosts WHERE host=?");
- if (!statement.is_valid()) {
- NOTREACHED();
- return;
- }
-
- statement->bind_int(0, host_key);
- int rv = statement->step();
- if (rv == SQLITE_CORRUPT) {
- HandleCorruptDatabase();
- } else {
- DCHECK(rv == SQLITE_DONE);
- }
-}
-
-void SafeBrowsingDatabase::StartThrottledWork() {
- if (process_factory_.empty())
- RunThrottledWork();
-}
-
-void SafeBrowsingDatabase::RunThrottledWork() {
- prefix_miss_cache_.clear();
- while (true) {
- bool done = ProcessChunks();
-
- if (done)
- done = ProcessAddDel();
-
- if (done)
- break;
-
- if (asynchronous_) {
- // For production code, we want to throttle by calling InvokeLater to
- // continue the work after a delay. However for unit tests we depend on
- // updates to happen synchronously.
- MessageLoop::current()->PostDelayedTask(FROM_HERE,
- process_factory_.NewRunnableMethod(
- &SafeBrowsingDatabase::RunThrottledWork), disk_delay_);
- break;
- } else {
- Sleep(kMaxThreadHoldupMs);
- }
- }
-}
-
-void SafeBrowsingDatabase::InsertChunks(const std::string& list_name,
- std::deque<SBChunk>* chunks) {
- // We've going to be updating the bloom filter, so delete the on-disk
- // serialization so that if the process crashes we'll generate a new one on
- // startup, instead of reading a stale filter.
- DeleteBloomFilter();
-
- int list_id = GetListID(list_name);
- std::deque<SBChunk>::iterator i = chunks->begin();
- for (; i != chunks->end(); ++i) {
- SBChunk& chunk = (*i);
- std::deque<SBChunkHost>::iterator j = chunk.hosts.begin();
- for (; j != chunk.hosts.end(); ++j) {
- j->entry->set_list_id(list_id);
- if (j->entry->IsAdd())
- j->entry->set_chunk_id(chunk.chunk_number);
- }
- }
-
- pending_chunks_.push(chunks);
-
- BeginTransaction();
- StartThrottledWork();
-}
-
-bool SafeBrowsingDatabase::ProcessChunks() {
- if (pending_chunks_.empty())
- return true;
-
- while (!pending_chunks_.empty()) {
- std::deque<SBChunk>* chunks = pending_chunks_.front();
- bool done = false;
- // The entries in one chunk are all either adds or subs.
- if (chunks->front().hosts.front().entry->IsAdd()) {
- done = ProcessAddChunks(chunks);
- } else {
- done = ProcessSubChunks(chunks);
- }
-
- if (!done)
- return false;
-
- delete chunks;
- pending_chunks_.pop();
- EndTransaction();
- }
-
- if (!bloom_filter_building_) {
- if (asynchronous_) {
- // When we're updating, there will usually be a bunch of pending_chunks_
- // to process, and we don't want to keep writing the bloom filter to disk
- // 10 or 20 times unnecessarily. So schedule to write it in a minute, and
- // if any new updates happen in the meantime, push that forward.
- if (!bloom_write_factory_.empty())
- bloom_write_factory_.RevokeAll();
-
- MessageLoop::current()->PostDelayedTask(FROM_HERE,
- bloom_write_factory_.NewRunnableMethod(
- &SafeBrowsingDatabase::WriteBloomFilter),
- kBloomFilterWriteDelayMs);
- } else {
- WriteBloomFilter();
- }
- }
-
- if (chunk_inserted_callback_)
- chunk_inserted_callback_->Run();
-
- return true;
-}
-
-bool SafeBrowsingDatabase::ProcessAddChunks(std::deque<SBChunk>* chunks) {
- Time before = Time::Now();
- while (!chunks->empty()) {
- SBChunk& chunk = chunks->front();
- int list_id = chunk.hosts.front().entry->list_id();
- int chunk_id = chunk.chunk_number;
-
- // The server can give us a chunk that we already have because it's part of
- // a range. Don't add it again.
- if (!ChunkExists(list_id, ADD_CHUNK, chunk_id)) {
- while (!chunk.hosts.empty()) {
- // Read the existing record for this host, if it exists.
- SBPrefix host = chunk.hosts.front().host;
- SBEntry* entry = chunk.hosts.front().entry;
-
- UpdateInfo(host, entry, false);
-
- if (!add_chunk_modified_hosts_.empty())
- add_chunk_modified_hosts_.append(",");
-
- add_chunk_modified_hosts_.append(StringPrintf("%d", host));
-
- entry->Destroy();
- chunk.hosts.pop_front();
- if (!chunk.hosts.empty() &&
- (Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) {
- return false;
- }
- }
-
- AddChunkInformation(list_id, ADD_CHUNK, chunk_id,
- add_chunk_modified_hosts_);
- add_chunk_modified_hosts_.clear();
- }
-
- chunks->pop_front();
- }
-
- return true;
-}
-
-bool SafeBrowsingDatabase::ProcessSubChunks(std::deque<SBChunk>* chunks) {
- Time before = Time::Now();
- while (!chunks->empty()) {
- SBChunk& chunk = chunks->front();
- int list_id = chunk.hosts.front().entry->list_id();
- int chunk_id = chunk.chunk_number;
-
- if (!ChunkExists(list_id, SUB_CHUNK, chunk_id)) {
- while (!chunk.hosts.empty()) {
- SBPrefix host = chunk.hosts.front().host;
- SBEntry* entry = chunk.hosts.front().entry;
- UpdateInfo(host, entry, true);
-
- entry->Destroy();
- chunk.hosts.pop_front();
- if (!chunk.hosts.empty() &&
- (Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) {
- return false;
- }
- }
-
- AddChunkInformation(list_id, SUB_CHUNK, chunk_id, "");
- }
-
- chunks->pop_front();
- }
-
- return true;
-}
-
-void SafeBrowsingDatabase::UpdateInfo(SBPrefix host_key,
- SBEntry* entry,
- bool persist) {
- // If an existing record exists, and the new record is smaller, then reuse
- // its entry to reduce database fragmentation.
- int old_id = 0;
- SBHostInfo info;
- // If the bloom filter isn't there, then assume that the entry exists,
- // otherwise test the bloom filter.
- bool exists = !bloom_filter_.get() || bloom_filter_->Exists(host_key);
- if (exists)
- exists = ReadInfo(host_key, &info, &old_id);
- int old_size = info.size();
-
- if (entry->IsAdd()) {
- info.AddPrefixes(entry);
- } else {
- ClearCachedHashes(entry);
- info.RemovePrefixes(entry, persist);
- }
-
- if (old_size == info.size()) {
- // The entry didn't change, so no point writing it.
- return;
- }
-
- if (!info.size()) {
- // Just delete the existing information instead of writing an empty one.
- if (exists)
- DeleteInfo(host_key);
- return;
- }
-
- if (info.size() > old_size) {
- // New record is larger, so just add a new entry.
- old_id = 0;
- }
-
- WriteInfo(host_key, info, old_id);
-}
-
-void SafeBrowsingDatabase::DeleteChunks(
- std::vector<SBChunkDelete>* chunk_deletes) {
- BeginTransaction();
- bool pending_add_del_were_empty = pending_add_del_.empty();
-
- for (size_t i = 0; i < chunk_deletes->size(); ++i) {
- const SBChunkDelete& chunk = (*chunk_deletes)[i];
- std::vector<int> chunk_numbers;
- RangesToChunks(chunk.chunk_del, &chunk_numbers);
- for (size_t del = 0; del < chunk_numbers.size(); ++del) {
- if (chunk.is_sub_del) {
- SubDel(chunk.list_name, chunk_numbers[del]);
- } else {
- AddDel(chunk.list_name, chunk_numbers[del]);
- }
- }
- }
-
- if (pending_add_del_were_empty && !pending_add_del_.empty()) {
- // Only start a transaction for pending AddDel work if we haven't started
- // one already.
- BeginTransaction();
- StartThrottledWork();
- }
-
- delete chunk_deletes;
- EndTransaction();
-}
-
-void SafeBrowsingDatabase::AddDel(const std::string& list_name,
- int add_chunk_id) {
- STATS_COUNTER(L"SB.ChunkSelect", 1);
- int list_id = GetListID(list_name);
- // Find all the prefixes that came from the given add_chunk_id.
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "SELECT hostkeys FROM chunks WHERE "
- "list_id=? AND chunk_type=? AND chunk_id=?");
- if (!statement.is_valid()) {
- NOTREACHED();
- return;
- }
-
- std::string hostkeys_str;
- statement->bind_int(0, list_id);
- statement->bind_int(1, ADD_CHUNK);
- statement->bind_int(2, add_chunk_id);
- int rv = statement->step();
- if (rv != SQLITE_ROW || !statement->column_string(0, &hostkeys_str)) {
- if (rv == SQLITE_CORRUPT) {
- HandleCorruptDatabase();
- } else {
- NOTREACHED();
- }
-
- return;
- }
-
- AddDelWork work;
- work.list_id = list_id;
- work.add_chunk_id = add_chunk_id;
- pending_add_del_.push(work);
- SplitString(hostkeys_str, ',', &pending_add_del_.back().hostkeys);
-}
-
-bool SafeBrowsingDatabase::ProcessAddDel() {
- if (pending_add_del_.empty())
- return true;
-
- Time before = Time::Now();
- while (!pending_add_del_.empty()) {
- AddDelWork& add_del_work = pending_add_del_.front();
- ClearCachedHashesForChunk(add_del_work.list_id, add_del_work.add_chunk_id);
- std::vector<std::string>& hostkeys = add_del_work.hostkeys;
- for (size_t i = 0; i < hostkeys.size(); ++i) {
- SBPrefix host = atoi(hostkeys[i].c_str());
- // Doesn't matter if we use SUB_PREFIX or SUB_FULL_HASH since if there
- // are no prefixes it's not used.
- SBEntry* entry = SBEntry::Create(SBEntry::SUB_PREFIX, 0);
- entry->set_list_id(add_del_work.list_id);
- entry->set_chunk_id(add_del_work.add_chunk_id);
- UpdateInfo(host, entry, false);
- entry->Destroy();
- if ((Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) {
- hostkeys.erase(hostkeys.begin(), hostkeys.begin() + i);
- return false;
- }
- }
-
- RemoveChunkId(add_del_work.list_id, ADD_CHUNK, add_del_work.add_chunk_id);
- pending_add_del_.pop();
- }
-
- EndTransaction();
-
- return true;
-}
-
-void SafeBrowsingDatabase::SubDel(const std::string& list_name,
- int sub_chunk_id) {
- RemoveChunkId(GetListID(list_name), SUB_CHUNK, sub_chunk_id);
-}
-
-void SafeBrowsingDatabase::AddChunkInformation(int list_id,
- ChunkType type,
- int chunk_id,
- const std::string& hostkeys) {
- STATS_COUNTER(L"SB.ChunkInsert", 1);
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "INSERT INTO chunks"
- "(list_id,chunk_type,chunk_id,hostkeys)"
- "VALUES (?,?,?,?)");
- if (!statement.is_valid()) {
- NOTREACHED();
- return;
- }
-
- statement->bind_int(0, list_id);
- statement->bind_int(1, type);
- statement->bind_int(2, chunk_id);
- statement->bind_string(3, hostkeys);
- int rv = statement->step();
- if (rv == SQLITE_CORRUPT) {
- HandleCorruptDatabase();
- } else {
- DCHECK(rv == SQLITE_DONE);
- }
-}
-
-void SafeBrowsingDatabase::GetListsInfo(std::vector<SBListChunkRanges>* lists) {
- lists->clear();
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "SELECT name,id FROM list_names");
- if (!statement.is_valid()) {
- NOTREACHED();
- return;
- }
-
- while (true) {
- int rv = statement->step();
- if (rv != SQLITE_ROW) {
- if (rv == SQLITE_CORRUPT)
- HandleCorruptDatabase();
-
- break;
- }
- int list_id = statement->column_int(1);
- lists->push_back(SBListChunkRanges(statement->column_string(0)));
- GetChunkIds(list_id, ADD_CHUNK, &lists->back().adds);
- GetChunkIds(list_id, SUB_CHUNK, &lists->back().subs);
- }
-}
-
-void SafeBrowsingDatabase::GetChunkIds(int list_id,
- ChunkType type,
- std::string* list) {
- list->clear();
- STATS_COUNTER(L"SB.ChunkSelect", 1);
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "SELECT chunk_id FROM chunks WHERE list_id=? AND chunk_type=? "
- "ORDER BY chunk_id");
- if (!statement.is_valid()) {
- NOTREACHED();
- return;
- }
-
- statement->bind_int(0, list_id);
- statement->bind_int(1, type);
-
- std::vector<int> chunk_ids;
- while (true) {
- int rv = statement->step();
- if (rv != SQLITE_ROW) {
- if (rv == SQLITE_CORRUPT)
- HandleCorruptDatabase();
-
- break;
- }
- chunk_ids.push_back(statement->column_int(0));
- }
-
- std::vector<ChunkRange> ranges;
- ChunksToRanges(chunk_ids, &ranges);
- RangesToString(ranges, list);
-}
-
-bool SafeBrowsingDatabase::ChunkExists(int list_id,
- ChunkType type,
- int chunk_id) {
- STATS_COUNTER(L"SB.ChunkSelect", 1);
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "SELECT chunk_id FROM chunks WHERE"
- " list_id=? AND chunk_type=? AND chunk_id=?");
- if (!statement.is_valid()) {
- NOTREACHED();
- return false;
- }
-
- statement->bind_int(0, list_id);
- statement->bind_int(1, type);
- statement->bind_int(2, chunk_id);
-
- int rv = statement->step();
- if (rv == SQLITE_CORRUPT)
- HandleCorruptDatabase();
-
- return rv == SQLITE_ROW;
-}
-
-void SafeBrowsingDatabase::RemoveChunkId(int list_id,
- ChunkType type,
- int chunk_id) {
- // Also remove the add chunk id from add_chunks
- STATS_COUNTER(L"SB.ChunkDelete", 1);
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "DELETE FROM chunks WHERE list_id=? AND chunk_type=? AND chunk_id=?");
- if (!statement.is_valid()) {
- NOTREACHED();
- return;
- }
-
- statement->bind_int(0, list_id);
- statement->bind_int(1, type);
- statement->bind_int(2, chunk_id);
- int rv = statement->step();
- if (rv == SQLITE_CORRUPT) {
- HandleCorruptDatabase();
- } else {
- DCHECK(rv == SQLITE_DONE);
- }
-}
-
-int SafeBrowsingDatabase::AddList(const std::string& name) {
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "INSERT INTO list_names"
- "(id,name)"
- "VALUES (NULL,?)");
- if (!statement.is_valid()) {
- NOTREACHED();
- return 0;
- }
-
- statement->bind_string(0, name);
- int rv = statement->step();
- if (rv != SQLITE_DONE) {
- if (rv == SQLITE_CORRUPT) {
- HandleCorruptDatabase();
- } else {
- NOTREACHED();
- }
-
- return 0;
- }
-
- return static_cast<int>(sqlite3_last_insert_rowid(db_));
-}
-
-int SafeBrowsingDatabase::GetListID(const std::string& name) {
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "SELECT id FROM list_names WHERE name=?");
- if (!statement.is_valid()) {
- NOTREACHED();
- return 0;
- }
-
- statement->bind_string(0, name);
- int result = statement->step();
- if (result == SQLITE_ROW)
- return statement->column_int(0);
-
- if (result == SQLITE_CORRUPT)
- HandleCorruptDatabase();
-
- // There isn't an existing entry so add one.
- return AddList(name);
-}
-
-std::string SafeBrowsingDatabase::GetListName(int id) {
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "SELECT name FROM list_names WHERE id=?");
- if (!statement.is_valid()) {
- NOTREACHED();
- return 0;
- }
-
- statement->bind_int(0, id);
- int result = statement->step();
- if (result != SQLITE_ROW) {
- if (result == SQLITE_CORRUPT)
- HandleCorruptDatabase();
-
- return std::string();
- }
-
- return statement->column_string(0);
-}
-
std::wstring SafeBrowsingDatabase::BloomFilterFilename(
const std::wstring& db_filename) {
return db_filename + kBloomFilterFile;
@@ -998,286 +90,3 @@ void SafeBrowsingDatabase::WriteBloomFilter() {
SB_DLOG(INFO) << "SafeBrowsingDatabase wrote bloom filter in " <<
(Time::Now() - before).InMilliseconds() << " ms";
}
-
-void SafeBrowsingDatabase::AddHostToBloomFilter(int host_key) {
- if (bloom_filter_building_)
- bloom_filter_temp_hostkeys_.push_back(host_key);
- // Even if we're rebuilding the bloom filter, we still need to update the
- // current one since we also use it to decide whether to do certain database
- // operations during update.
- if (bloom_filter_.get())
- bloom_filter_->Insert(host_key);
-}
-
-void SafeBrowsingDatabase::BuildBloomFilter() {
- // A bloom filter needs the size at creation, however doing a select count(*)
- // is too slow since sqlite would have to enumerate each entry to get the
- // count. So instead we load all the hostkeys into memory, and then when
- // we've read all of them and have the total count, we can create the bloom
- // filter.
- bloom_filter_temp_hostkeys_.reserve(kBloomFilterMinSize);
-
- bloom_filter_building_ = true;
- bloom_filter_rebuild_time_ = Time::Now();
-
- BeginTransaction();
-
- OnReadHostKeys(0);
-}
-
-void SafeBrowsingDatabase::OnReadHostKeys(int start_id) {
- // Since reading all the keys in one go could take > 20 seconds, instead we
- // read them in small chunks.
- STATS_COUNTER(L"SB.HostSelectForBloomFilter", 1);
- SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
- "SELECT host,id FROM hosts WHERE id > ? ORDER BY id");
- if (!statement.is_valid()) {
- NOTREACHED();
- return;
- }
-
- statement->bind_int(0, start_id);
- Time before = Time::Now();
- int count = 0;
-
- int next_id = start_id + 1;
- while (true) {
- int rv = statement->step();
- if (rv != SQLITE_ROW) {
- if (rv == SQLITE_CORRUPT)
- HandleCorruptDatabase();
-
- break;
- }
-
- count++;
- bloom_filter_temp_hostkeys_.push_back(statement->column_int(0));
- next_id = statement->column_int(1) + 1;
- if ((Time::Now() - before).InMilliseconds() > kMaxThreadHoldupMs) {
- if (asynchronous_) {
- break;
- } else {
- Sleep(kMaxThreadHoldupMs);
- }
- }
- }
-
- TimeDelta chunk_time = Time::Now() - before;
- int time_ms = static_cast<int>(chunk_time.InMilliseconds());
- SB_DLOG(INFO) << "SafeBrowsingDatabase read " << count << " hostkeys in " <<
- time_ms << " ms";
-
- if (!count || !asynchronous_) {
- OnDoneReadingHostKeys();
- return;
- }
-
- // To avoid hammering the disk and disrupting other parts of Chrome that use
- // the disk, we throttle the rebuilding.
- MessageLoop::current()->PostDelayedTask(FROM_HERE,
- bloom_read_factory_.NewRunnableMethod(
- &SafeBrowsingDatabase::OnReadHostKeys, next_id),
- disk_delay_);
-}
-
-void SafeBrowsingDatabase::OnDoneReadingHostKeys() {
- EndTransaction();
- Time before = Time::Now();
- int number_of_keys = std::max(kBloomFilterMinSize,
- static_cast<int>(bloom_filter_temp_hostkeys_.size()));
- int filter_size = number_of_keys * kBloomFilterSizeRatio;
- BloomFilter* filter = new BloomFilter(filter_size);
- for (size_t i = 0; i < bloom_filter_temp_hostkeys_.size(); ++i)
- filter->Insert(bloom_filter_temp_hostkeys_[i]);
-
- bloom_filter_.reset(filter);
-
- TimeDelta bloom_gen = Time::Now() - before;
- TimeDelta delta = Time::Now() - bloom_filter_rebuild_time_;
- SB_DLOG(INFO) << "SafeBrowsingDatabase built bloom filter in " <<
- delta.InMilliseconds() << " ms total (" << bloom_gen.InMilliseconds()
- << " ms to generate bloom filter). hostkey count: " <<
- bloom_filter_temp_hostkeys_.size();
-
- WriteBloomFilter();
- bloom_filter_building_ = false;
- bloom_filter_temp_hostkeys_.clear();
- bloom_filter_read_count_ = 0;
- bloom_filter_fp_count_ = 0;
-}
-
-bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) {
- if (!bloom_filter_.get())
- return true;
-
- bloom_filter_read_count_++;
-
- std::vector<std::string> hosts;
- safe_browsing_util::GenerateHostsToCheck(url, &hosts);
- if (hosts.size() == 0)
- return false; // Could be about:blank.
-
- SBPrefix host_key;
- if (url.HostIsIPAddress()) {
- base::SHA256HashString(url.host() + "/", &host_key, sizeof(SBPrefix));
- if (bloom_filter_->Exists(host_key))
- return true;
- } else {
- base::SHA256HashString(hosts[0] + "/", &host_key, sizeof(SBPrefix));
- if (bloom_filter_->Exists(host_key))
- return true;
-
- if (hosts.size() > 1) {
- base::SHA256HashString(hosts[1] + "/", &host_key, sizeof(SBPrefix));
- if (bloom_filter_->Exists(host_key))
- return true;
- }
- }
- return false;
-}
-
-void SafeBrowsingDatabase::BeginTransaction() {
- transaction_count_++;
- if (transaction_.get() == NULL) {
- transaction_.reset(new SQLTransaction(db_));
- if (transaction_->Begin() != SQLITE_OK) {
- DCHECK(false) << "Safe browsing database couldn't start transaction";
- transaction_.reset();
- }
- }
-}
-
-void SafeBrowsingDatabase::EndTransaction() {
- if (--transaction_count_ == 0) {
- if (transaction_.get() != NULL) {
- STATS_COUNTER(L"SB.TransactionCommit", 1);
- transaction_->Commit();
- transaction_.reset();
- }
- }
-}
-
-void SafeBrowsingDatabase::GetCachedFullHashes(
- const std::vector<SBPrefix>* prefix_hits,
- std::vector<SBFullHashResult>* full_hits,
- Time last_update) {
- DCHECK(prefix_hits && full_hits);
-
- Time max_age = Time::Now() - TimeDelta::FromMinutes(kMaxStalenessMinutes);
-
- for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin();
- it != prefix_hits->end(); ++it) {
- HashCache::iterator hit = hash_cache_.find(*it);
- if (hit != hash_cache_.end()) {
- HashList& entries = hit->second;
- HashList::iterator eit = entries.begin();
- while (eit != entries.end()) {
- // An entry is valid if we've received an update in the past 45 minutes,
- // or if this particular GetHash was received in the past 45 minutes.
- if (max_age < last_update || eit->received > max_age) {
- SBFullHashResult full_hash;
- memcpy(&full_hash.hash.full_hash,
- &eit->full_hash.full_hash,
- sizeof(SBFullHash));
- full_hash.list_name = GetListName(eit->list_id);
- full_hash.add_chunk_id = eit->add_chunk_id;
- full_hits->push_back(full_hash);
- ++eit;
- } else {
- // Evict the expired entry.
- eit = entries.erase(eit);
- }
- }
-
- if (entries.empty())
- hash_cache_.erase(hit);
- }
- }
-}
-
-void SafeBrowsingDatabase::CacheHashResults(
- const std::vector<SBPrefix>& prefixes,
- const std::vector<SBFullHashResult>& full_hits) {
- if (full_hits.empty()) {
- // These prefixes returned no results, so we store them in order to prevent
- // asking for them again. We flush this cache at the next update.
- for (std::vector<SBPrefix>::const_iterator it = prefixes.begin();
- it != prefixes.end(); ++it) {
- prefix_miss_cache_.insert(*it);
- }
- return;
- }
-
- const Time now = Time::Now();
- for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin();
- it != full_hits.end(); ++it) {
- SBPrefix prefix;
- memcpy(&prefix, &it->hash.full_hash, sizeof(prefix));
- HashList& entries = hash_cache_[prefix];
- HashCacheEntry entry;
- entry.received = now;
- entry.list_id = GetListID(it->list_name);
- entry.add_chunk_id = it->add_chunk_id;
- memcpy(&entry.full_hash, &it->hash.full_hash, sizeof(SBFullHash));
- entries.push_back(entry);
- }
-}
-
-void SafeBrowsingDatabase::ClearCachedHashes(const SBEntry* entry) {
- for (int i = 0; i < entry->prefix_count(); ++i) {
- SBPrefix prefix;
- if (entry->type() == SBEntry::SUB_FULL_HASH)
- memcpy(&prefix, &entry->FullHashAt(i), sizeof(SBPrefix));
- else
- prefix = entry->PrefixAt(i);
-
- HashCache::iterator it = hash_cache_.find(prefix);
- if (it != hash_cache_.end())
- hash_cache_.erase(it);
- }
-}
-
-// This clearing algorithm is a little inefficient, but we don't expect there to
-// be too many entries for this to matter. Also, this runs as a background task
-// during an update, so no user action is blocking on it.
-void SafeBrowsingDatabase::ClearCachedHashesForChunk(int list_id,
- int add_chunk_id) {
- HashCache::iterator it = hash_cache_.begin();
- while (it != hash_cache_.end()) {
- HashList& entries = it->second;
- HashList::iterator eit = entries.begin();
- while (eit != entries.end()) {
- if (eit->list_id == list_id && eit->add_chunk_id == add_chunk_id)
- eit = entries.erase(eit);
- else
- ++eit;
- }
- if (entries.empty())
- it = hash_cache_.erase(it);
- else
- ++it;
- }
-}
-
-void SafeBrowsingDatabase::HandleCorruptDatabase() {
- MessageLoop::current()->PostTask(FROM_HERE,
- reset_factory_.NewRunnableMethod(
- &SafeBrowsingDatabase::OnHandleCorruptDatabase));
-}
-
-void SafeBrowsingDatabase::OnHandleCorruptDatabase() {
- ResetDatabase();
- DCHECK(false) << "SafeBrowsing database was corrupt and reset";
-}
-
-void SafeBrowsingDatabase::HandleResume() {
- disk_delay_ = kOnResumeHoldupMs;
- MessageLoop::current()->PostDelayedTask(
- FROM_HERE,
- resume_factory_.NewRunnableMethod(&SafeBrowsingDatabase::OnResumeDone),
- kOnResumeHoldupMs);
-}
-
-void SafeBrowsingDatabase::OnResumeDone() {
- disk_delay_ = kMaxThreadHoldupMs;
-}