diff options
10 files changed, 749 insertions, 1384 deletions
diff --git a/chrome/browser/safe_browsing/bloom_filter.cc b/chrome/browser/safe_browsing/bloom_filter.cc index 55cb4da..3d9772d 100644 --- a/chrome/browser/safe_browsing/bloom_filter.cc +++ b/chrome/browser/safe_browsing/bloom_filter.cc @@ -4,9 +4,6 @@ #include "chrome/browser/safe_browsing/bloom_filter.h" -#include <string.h> - -#include "base/logging.h" #include "base/rand_util.h" #include "net/base/file_stream.h" #include "net/base/net_errors.h" diff --git a/chrome/browser/safe_browsing/bloom_filter.h b/chrome/browser/safe_browsing/bloom_filter.h index 5d9bbf6..2abe402 100644 --- a/chrome/browser/safe_browsing/bloom_filter.h +++ b/chrome/browser/safe_browsing/bloom_filter.h @@ -19,13 +19,12 @@ #include <vector> -#include "base/file_path.h" #include "base/ref_counted.h" -#include "base/scoped_ptr.h" -#include "base/basictypes.h" #include "chrome/browser/safe_browsing/safe_browsing_util.h" #include "testing/gtest/include/gtest/gtest_prod.h" +class FilePath; + class BloomFilter : public base::RefCountedThreadSafe<BloomFilter> { public: typedef uint64 HashKey; diff --git a/chrome/browser/safe_browsing/database_perftest.cc b/chrome/browser/safe_browsing/database_perftest.cc deleted file mode 100644 index 29a1cdb..0000000 --- a/chrome/browser/safe_browsing/database_perftest.cc +++ /dev/null @@ -1,542 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <stdio.h> -#include <stdlib.h> - -#include <limits> -#include <set> - -#include "base/file_path.h" -#include "base/file_util.h" -#include "base/logging.h" -#include "base/path_service.h" -#include "base/perftimer.h" -#include "base/rand_util.h" -#include "base/scoped_ptr.h" -#include "base/string_util.h" -#include "base/test/test_file_util.h" -#include "chrome/browser/safe_browsing/safe_browsing_database.h" -#include "chrome/common/chrome_paths.h" -#include "chrome/common/sqlite_compiled_statement.h" -#include "chrome/common/sqlite_utils.h" -#include "googleurl/src/gurl.h" -#include "testing/gtest/include/gtest/gtest.h" - -namespace { - -// Base class for a safebrowsing database. Derived classes can implement -// different types of tables to compare performance characteristics. -class Database { - public: - Database() : db_(NULL) { - } - - ~Database() { - if (db_) { - sqlite3_close(db_); - db_ = NULL; - } - } - - void Init(const FilePath& name, bool create) { - // get an empty file for the test DB - FilePath filename; - PathService::Get(base::DIR_TEMP, &filename); - filename = filename.Append(name); - - if (create) { - file_util::Delete(filename, false); - } else { - DLOG(INFO) << "evicting " << name.value() << " ..."; - file_util::EvictFileFromSystemCache(filename); - DLOG(INFO) << "... evicted"; - } - - const std::string sqlite_path = WideToUTF8(filename.ToWStringHack()); - ASSERT_EQ(sqlite3_open(sqlite_path.c_str(), &db_), SQLITE_OK); - - statement_cache_.set_db(db_); - - if (!create) - return; - - ASSERT_TRUE(CreateTable()); - } - - virtual bool CreateTable() = 0; - virtual bool Add(int host_key, int* prefixes, int count) = 0; - virtual bool Read(int host_key, int* prefixes, int size, int* count) = 0; - virtual int Count() = 0; - virtual std::string GetDBSuffix() = 0; - - sqlite3* db() { return db_; } - - protected: - // The database connection. - sqlite3* db_; - - // Cache of compiled statements for our database. - SqliteStatementCache statement_cache_; -}; - -class SimpleDatabase : public Database { - public: - virtual bool CreateTable() { - if (DoesSqliteTableExist(db_, "hosts")) - return false; - - return sqlite3_exec(db_, "CREATE TABLE hosts (" - "host INTEGER," - "prefixes BLOB)", - NULL, NULL, NULL) == SQLITE_OK; - } - - virtual bool Add(int host_key, int* prefixes, int count) { - SQLITE_UNIQUE_STATEMENT(statement, statement_cache_, - "INSERT OR REPLACE INTO hosts" - "(host,prefixes)" - "VALUES (?,?)"); - if (!statement.is_valid()) - return false; - - statement->bind_int(0, host_key); - statement->bind_blob(1, prefixes, count*sizeof(int)); - return statement->step() == SQLITE_DONE; - } - - virtual bool Read(int host_key, int* prefixes, int size, int* count) { - SQLITE_UNIQUE_STATEMENT(statement, statement_cache_, - "SELECT host, prefixes FROM hosts WHERE host=?"); - if (!statement.is_valid()) - return false; - - statement->bind_int(0, host_key); - - int rv = statement->step(); - if (rv == SQLITE_DONE) { - // no hostkey found, not an error - *count = -1; - return true; - } - - if (rv != SQLITE_ROW) - return false; - - *count = statement->column_bytes(1); - if (*count > size) - return false; - - memcpy(prefixes, statement->column_blob(0), *count); - return true; - } - - int Count() { - SQLITE_UNIQUE_STATEMENT(statement, statement_cache_, - "SELECT COUNT(*) FROM hosts"); - if (!statement.is_valid()) { - EXPECT_TRUE(false); - return -1; - } - - if (statement->step() != SQLITE_ROW) { - EXPECT_TRUE(false); - return -1; - } - - return statement->column_int(0); - } - - std::string GetDBSuffix() { - return "Simple"; - } -}; - -class IndexedDatabase : public SimpleDatabase { - public: - virtual bool CreateTable() { - return sqlite3_exec(db_, "CREATE TABLE hosts (" - "host INTEGER PRIMARY KEY," - "prefixes BLOB)", - NULL, NULL, NULL) == SQLITE_OK; - } - - std::string GetDBSuffix() { - return "Indexed"; - } -}; - -class IndexedWithIDDatabase : public SimpleDatabase { - public: - virtual bool CreateTable() { - return sqlite3_exec(db_, "CREATE TABLE hosts (" - "id INTEGER PRIMARY KEY AUTOINCREMENT," - "host INTEGER UNIQUE," - "prefixes BLOB)", - NULL, NULL, NULL) == SQLITE_OK; - } - - virtual bool Add(int host_key, int* prefixes, int count) { - SQLITE_UNIQUE_STATEMENT(statement, statement_cache_, - "INSERT OR REPLACE INTO hosts" - "(id,host,prefixes)" - "VALUES (NULL,?,?)"); - if (!statement.is_valid()) - return false; - - statement->bind_int(0, host_key); - statement->bind_blob(1, prefixes, count * sizeof(int)); - return statement->step() == SQLITE_DONE; - } - - std::string GetDBSuffix() { - return "IndexedWithID"; - } -}; - -} // namespace - -class SafeBrowsing: public testing::Test { - protected: - // Get the test parameters from the test case's name. - virtual void SetUp() { - logging::InitLogging( - NULL, logging::LOG_ONLY_TO_SYSTEM_DEBUG_LOG, - logging::LOCK_LOG_FILE, - logging::DELETE_OLD_LOG_FILE); - - const testing::TestInfo* const test_info = - testing::UnitTest::GetInstance()->current_test_info(); - std::string test_name = test_info->name(); - - TestType type; - if (test_name.find("Write") != std::string::npos) { - type = WRITE; - } else if (test_name.find("Read") != std::string::npos) { - type = READ; - } else { - type = COUNT; - } - - if (test_name.find("IndexedWithID") != std::string::npos) { - db_ = new IndexedWithIDDatabase(); - } else if (test_name.find("Indexed") != std::string::npos) { - db_ = new IndexedDatabase(); - } else { - db_ = new SimpleDatabase(); - } - - - char multiplier_letter = test_name[test_name.size() - 1]; - int multiplier = 0; - if (multiplier_letter == 'K') { - multiplier = 1000; - } else if (multiplier_letter == 'M') { - multiplier = 1000000; - } else { - NOTREACHED(); - } - - size_t index = test_name.size() - 1; - while (index != 0 && test_name[index] != '_') - index--; - - DCHECK(index); - const char* count_start = test_name.c_str() + ++index; - int count = atoi(count_start); - int size = count * multiplier; - - db_name_ = StringPrintf("TestSafeBrowsing"); - db_name_.append(count_start); - db_name_.append(db_->GetDBSuffix()); - - FilePath path = FilePath::FromWStringHack(ASCIIToWide(db_name_)); - db_->Init(path, type == WRITE); - - if (type == WRITE) { - WriteEntries(size); - } else if (type == READ) { - ReadEntries(100); - } else { - CountEntries(); - } - } - - virtual void TearDown() { - delete db_; - } - - // This writes the given number of entries to the database. - void WriteEntries(int count) { - int prefixes[4]; - - SQLTransaction transaction(db_->db()); - transaction.Begin(); - - for (int i = 0; i < count; i++) { - int hostkey = base::RandInt(std::numeric_limits<int>::min(), - std::numeric_limits<int>::max()); - ASSERT_TRUE(db_->Add(hostkey, prefixes, 1)); - } - - transaction.Commit(); - } - - // Read the given number of entries from the database. - void ReadEntries(int count) { - int prefixes[4]; - - int64 total_ms = 0; - - for (int i = 0; i < count; ++i) { - int key = base::RandInt(std::numeric_limits<int>::min(), - std::numeric_limits<int>::max()); - - PerfTimer timer; - - int read; - ASSERT_TRUE(db_->Read(key, prefixes, sizeof(prefixes), &read)); - - int64 time_ms = timer.Elapsed().InMilliseconds(); - total_ms += time_ms; - DLOG(INFO) << "Read in " << time_ms << " ms."; - } - - DLOG(INFO) << db_name_ << " read " << count << " entries in average of " << - total_ms/count << " ms."; - } - - // Counts how many entries are in the database, which effectively does a full - // table scan. - void CountEntries() { - PerfTimer timer; - - int count = db_->Count(); - - DLOG(INFO) << db_name_ << " counted " << count << " entries in " << - timer.Elapsed().InMilliseconds() << " ms"; - } - - enum TestType { - WRITE, - READ, - COUNT, - }; - - private: - - Database* db_; - std::string db_name_; -}; - -TEST_F(SafeBrowsing, DISABLED_Write_100K) { -} - -TEST_F(SafeBrowsing, DISABLED_Read_100K) { -} - -TEST_F(SafeBrowsing, DISABLED_WriteIndexed_100K) { -} - -TEST_F(SafeBrowsing, DISABLED_ReadIndexed_100K) { -} - -TEST_F(SafeBrowsing, DISABLED_WriteIndexed_250K) { -} - -TEST_F(SafeBrowsing, DISABLED_ReadIndexed_250K) { -} - -TEST_F(SafeBrowsing, DISABLED_WriteIndexed_500K) { -} - -TEST_F(SafeBrowsing, DISABLED_ReadIndexed_500K) { -} - -TEST_F(SafeBrowsing, DISABLED_WriteIndexedWithID_250K) { -} - -TEST_F(SafeBrowsing, DISABLED_ReadIndexedWithID_250K) { -} - -TEST_F(SafeBrowsing, DISABLED_WriteIndexedWithID_500K) { -} - -TEST_F(SafeBrowsing, DISABLED_ReadIndexedWithID_500K) { -} - -TEST_F(SafeBrowsing, DISABLED_CountIndexed_250K) { -} - -TEST_F(SafeBrowsing, DISABLED_CountIndexed_500K) { -} - -TEST_F(SafeBrowsing, DISABLED_CountIndexedWithID_250K) { -} - -TEST_F(SafeBrowsing, DISABLED_CountIndexedWithID_500K) { -} - - -class SafeBrowsingDatabaseTest { - public: - explicit SafeBrowsingDatabaseTest(const FilePath& filename) { - logging::InitLogging( - NULL, logging::LOG_ONLY_TO_SYSTEM_DEBUG_LOG, - logging::LOCK_LOG_FILE, - logging::DELETE_OLD_LOG_FILE); - - FilePath tmp_path; - PathService::Get(base::DIR_TEMP, &tmp_path); - path_ = tmp_path.Append(filename); - } - - void Create(int size) { - file_util::Delete(path_, false); - - scoped_ptr<SafeBrowsingDatabase> database(SafeBrowsingDatabase::Create()); - database->SetSynchronous(); - database->Init(path_, NULL); - - int chunk_id = 0; - int total_host_keys = size; - int host_keys_per_chunk = 100; - - std::deque<SBChunk>* chunks = new std::deque<SBChunk>; - - for (int i = 0; i < total_host_keys / host_keys_per_chunk; ++i) { - chunks->push_back(SBChunk()); - chunks->back().chunk_number = ++chunk_id; - - for (int j = 0; j < host_keys_per_chunk; ++j) { - SBChunkHost host; - host.host = base::RandInt(std::numeric_limits<int>::min(), - std::numeric_limits<int>::max()); - host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); - host.entry->SetPrefixAt(0, 0x2425525); - host.entry->SetPrefixAt(1, 0x1536366); - - chunks->back().hosts.push_back(host); - } - } - - database->InsertChunks("goog-malware", chunks); - } - - void Read(bool use_bloom_filter) { - int keys_to_read = 500; - file_util::EvictFileFromSystemCache(path_); - - scoped_ptr<SafeBrowsingDatabase> database(SafeBrowsingDatabase::Create()); - database->SetSynchronous(); - database->Init(path_, NULL); - - PerfTimer total_timer; - int64 db_ms = 0; - int keys_from_db = 0; - for (int i = 0; i < keys_to_read; ++i) { - int key = base::RandInt(std::numeric_limits<int>::min(), - std::numeric_limits<int>::max()); - - std::string url = StringPrintf("http://www.%d.com/blah.html", key); - - std::string matching_list; - std::vector<SBPrefix> prefix_hits; - std::vector<SBFullHashResult> full_hits; - GURL gurl(url); - if (!use_bloom_filter || database->NeedToCheckUrl(gurl)) { - PerfTimer timer; - database->ContainsUrl(gurl, - &matching_list, - &prefix_hits, - &full_hits, - base::Time::Now()); - - int64 time_ms = timer.Elapsed().InMilliseconds(); - - DLOG(INFO) << "Read from db in " << time_ms << " ms."; - - db_ms += time_ms; - keys_from_db++; - } - } - - int64 total_ms = total_timer.Elapsed().InMilliseconds(); - - DLOG(INFO) << path_.BaseName().value() << " read " << keys_to_read << - " entries in " << total_ms << " ms. " << keys_from_db << - " keys were read from the db, with average read taking " << - db_ms / keys_from_db << " ms"; - } - - void BuildBloomFilter() { - file_util::EvictFileFromSystemCache(path_); - file_util::Delete(SafeBrowsingDatabase::BloomFilterFilename(path_), false); - - PerfTimer total_timer; - - scoped_ptr<SafeBrowsingDatabase> database(SafeBrowsingDatabase::Create()); - database->SetSynchronous(); - database->Init(path_, NULL); - - int64 total_ms = total_timer.Elapsed().InMilliseconds(); - - DLOG(INFO) << path_.BaseName().value() << - " built bloom filter in " << total_ms << " ms."; - } - - private: - FilePath path_; -}; - -// Adds 100K host records. -TEST(SafeBrowsingDatabase, DISABLED_FillUp100K) { - SafeBrowsingDatabaseTest db(FilePath(FILE_PATH_LITERAL("SafeBrowsing100K"))); - db.Create(100000); -} - -// Adds 250K host records. -TEST(SafeBrowsingDatabase, DISABLED_FillUp250K) { - SafeBrowsingDatabaseTest db(FilePath(FILE_PATH_LITERAL("SafeBrowsing250K"))); - db.Create(250000); -} - -// Adds 500K host records. -TEST(SafeBrowsingDatabase, DISABLED_FillUp500K) { - SafeBrowsingDatabaseTest db(FilePath(FILE_PATH_LITERAL("SafeBrowsing500K"))); - db.Create(500000); -} - -// Reads 500 entries and prints the timing. -TEST(SafeBrowsingDatabase, DISABLED_ReadFrom250K) { - SafeBrowsingDatabaseTest db(FilePath(FILE_PATH_LITERAL("SafeBrowsing250K"))); - db.Read(false); -} - -TEST(SafeBrowsingDatabase, DISABLED_ReadFrom500K) { - SafeBrowsingDatabaseTest db(FilePath(FILE_PATH_LITERAL("SafeBrowsing500K"))); - db.Read(false); -} - -// Read 500 entries with a bloom filter and print the timing. -TEST(SafeBrowsingDatabase, DISABLED_BloomReadFrom250K) { - SafeBrowsingDatabaseTest db(FilePath(FILE_PATH_LITERAL("SafeBrowsing250K"))); - db.Read(true); -} - -TEST(SafeBrowsingDatabase, DISABLED_BloomReadFrom500K) { - SafeBrowsingDatabaseTest db(FilePath(FILE_PATH_LITERAL("SafeBrowsing500K"))); - db.Read(true); -} - -// Test how long bloom filter creation takes. -TEST(SafeBrowsingDatabase, DISABLED_BuildBloomFilter250K) { - SafeBrowsingDatabaseTest db(FilePath(FILE_PATH_LITERAL("SafeBrowsing250K"))); - db.BuildBloomFilter(); -} - -TEST(SafeBrowsingDatabase, DISABLED_BuildBloomFilter500K) { - SafeBrowsingDatabaseTest db(FilePath(FILE_PATH_LITERAL("SafeBrowsing500K"))); - db.BuildBloomFilter(); -} diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index c97f696..aa6a97a 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -6,10 +6,8 @@ #include "base/file_util.h" #include "base/histogram.h" -#include "base/logging.h" -#include "base/sha2.h" +#include "chrome/browser/safe_browsing/bloom_filter.h" #include "chrome/browser/safe_browsing/safe_browsing_database_bloom.h" -#include "googleurl/src/gurl.h" using base::Time; @@ -22,37 +20,7 @@ SafeBrowsingDatabase* SafeBrowsingDatabase::Create() { return new SafeBrowsingDatabaseBloom; } -bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) { - // Keep a reference to the current bloom filter in case the database rebuilds - // it while we're accessing it. - scoped_refptr<BloomFilter> filter = bloom_filter_; - if (!filter.get()) - return true; - - IncrementBloomFilterReadCount(); - - std::vector<std::string> hosts; - safe_browsing_util::GenerateHostsToCheck(url, &hosts); - if (hosts.size() == 0) - return false; // Could be about:blank. - - SBPrefix host_key; - if (url.HostIsIPAddress()) { - base::SHA256HashString(url.host() + "/", &host_key, sizeof(SBPrefix)); - if (filter->Exists(host_key)) - return true; - } else { - base::SHA256HashString(hosts[0] + "/", &host_key, sizeof(SBPrefix)); - if (filter->Exists(host_key)) - return true; - - if (hosts.size() > 1) { - base::SHA256HashString(hosts[1] + "/", &host_key, sizeof(SBPrefix)); - if (filter->Exists(host_key)) - return true; - } - } - return false; +SafeBrowsingDatabase::~SafeBrowsingDatabase() { } // static diff --git a/chrome/browser/safe_browsing/safe_browsing_database.h b/chrome/browser/safe_browsing/safe_browsing_database.h index 828be6a..5adc573 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.h +++ b/chrome/browser/safe_browsing/safe_browsing_database.h @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -11,28 +11,23 @@ #include <vector> #include "base/file_path.h" -#include "base/hash_tables.h" -#include "base/ref_counted.h" #include "base/scoped_ptr.h" #include "base/task.h" -#include "base/time.h" -#include "chrome/browser/safe_browsing/bloom_filter.h" #include "chrome/browser/safe_browsing/safe_browsing_util.h" #include "testing/gtest/include/gtest/gtest_prod.h" +class BloomFilter; class GURL; // Encapsulates the database that stores information about phishing and malware // sites. There is one on-disk database for all profiles, as it doesn't // contain user-specific data. This object is not thread-safe, i.e. all its -// methods should be used on the same thread that it was created on, with the -// exception of NeedToCheckUrl. +// methods should be used on the same thread that it was created on. class SafeBrowsingDatabase { public: // Factory method for obtaining a SafeBrowsingDatabase implementation. static SafeBrowsingDatabase* Create(); - - virtual ~SafeBrowsingDatabase() {} + virtual ~SafeBrowsingDatabase(); // Initializes the database with the given filename. The callback is // executed after finishing a chunk. @@ -42,13 +37,6 @@ class SafeBrowsingDatabase { // Deletes the current database and creates a new one. virtual bool ResetDatabase() = 0; - // This function can be called on any thread to check if the given url may be - // in the database. If this function returns false, it is definitely not in - // the database and ContainsUrl doesn't need to be called. If it returns - // true, then the url might be in the database and ContainsUrl needs to be - // called. This function can only be called after Init succeeded. - virtual bool NeedToCheckUrl(const GURL& url); - // Returns false if the given url is not in the database. If it returns // true, then either "list" is the name of the matching list, or prefix_hits // contains the matching hash prefixes. @@ -69,11 +57,6 @@ class SafeBrowsingDatabase { // Returns the lists and their add/sub chunks. virtual void GetListsInfo(std::vector<SBListChunkRanges>* lists) = 0; - // Call this to make all database operations synchronous. While useful for - // testing, this should never be called in chrome.exe because it can lead - // to blocking user requests. - virtual void SetSynchronous() = 0; - // Store the results of a GetHash response. In the case of empty results, we // cache the prefixes until the next update so that we don't have to issue // further GetHash requests we know will be empty. @@ -88,6 +71,17 @@ class SafeBrowsingDatabase { virtual FilePath filename() const { return filename_; } protected: + struct HashCacheEntry { + SBFullHash full_hash; + int list_id; + int add_chunk_id; + int sub_chunk_id; + base::Time received; + }; + + typedef std::list<HashCacheEntry> HashList; + typedef base::hash_map<SBPrefix, HashList> HashCache; + friend class SafeBrowsingDatabaseTest; FRIEND_TEST(SafeBrowsingDatabase, HashCaching); @@ -105,20 +99,6 @@ class SafeBrowsingDatabase { // Implementation specific bloom filter building. virtual void BuildBloomFilter() = 0; - // Measuring false positive rate. Call this each time we look in the filter. - virtual void IncrementBloomFilterReadCount() {} - - typedef struct HashCacheEntry { - SBFullHash full_hash; - int list_id; - int add_chunk_id; - int sub_chunk_id; - base::Time received; - } HashCacheEntry; - - typedef std::list<HashCacheEntry> HashList; - typedef base::hash_map<SBPrefix, HashList> HashCache; - scoped_ptr<HashCache> hash_cache_; HashCache* hash_cache() { return hash_cache_.get(); } diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc index 51946eb..6a40167 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc @@ -5,17 +5,12 @@ #include "chrome/browser/safe_browsing/safe_browsing_database_bloom.h" #include "base/auto_reset.h" -#include "base/compiler_specific.h" #include "base/file_util.h" -#include "base/logging.h" #include "base/message_loop.h" -#include "base/platform_thread.h" #include "base/process_util.h" #include "base/sha2.h" #include "base/stats_counters.h" -#include "base/string_util.h" #include "chrome/browser/safe_browsing/bloom_filter.h" -#include "chrome/browser/safe_browsing/chunk_range.h" #include "chrome/common/sqlite_compiled_statement.h" #include "chrome/common/sqlite_utils.h" #include "googleurl/src/gurl.h" @@ -64,130 +59,6 @@ void SafeBrowsingDatabaseBloom::Init(const FilePath& filename, chunk_inserted_callback_.reset(chunk_inserted_callback); } -bool SafeBrowsingDatabaseBloom::Open() { - if (db_) - return true; - - if (OpenSqliteDb(filename_, &db_) != SQLITE_OK) { - sqlite3_close(db_); - db_ = NULL; - return false; - } - - // Run the database in exclusive mode. Nobody else should be accessing the - // database while we're running, and this will give somewhat improved perf. - sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL); - - statement_cache_.reset(new SqliteStatementCache(db_)); - - if (!DoesSqliteTableExist(db_, "add_prefix")) { - if (!CreateTables()) { - // Database could be corrupt, try starting from scratch. - if (!ResetDatabase()) - return false; - } - } else if (!CheckCompatibleVersion()) { - if (!ResetDatabase()) - return false; - } - - return true; -} - -bool SafeBrowsingDatabaseBloom::Close() { - if (!db_) - return true; - - insert_transaction_.reset(); - statement_cache_.reset(); // Must free statements before closing DB. - bool result = sqlite3_close(db_) == SQLITE_OK; - db_ = NULL; - - return result; -} - -bool SafeBrowsingDatabaseBloom::CreateTables() { - SQLTransaction transaction(db_); - transaction.Begin(); - - // Store 32 bit add prefixes here. - if (sqlite3_exec(db_, "CREATE TABLE add_prefix (" - "chunk INTEGER," - "prefix INTEGER)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store 32 bit sub prefixes here. - if (sqlite3_exec(db_, "CREATE TABLE sub_prefix (" - "chunk INTEGER," - "add_chunk INTEGER," - "prefix INTEGER)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store 256 bit add full hashes (and GetHash results) here. - if (sqlite3_exec(db_, "CREATE TABLE add_full_hash (" - "chunk INTEGER," - "prefix INTEGER," - "receive_time INTEGER," - "full_hash BLOB)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store 256 bit sub full hashes here. - if (sqlite3_exec(db_, "CREATE TABLE sub_full_hash (" - "chunk INTEGER," - "add_chunk INTEGER," - "prefix INTEGER," - "full_hash BLOB)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - // Store all the add and sub chunk numbers we receive. We cannot just rely on - // the prefix tables to generate these lists, since some chunks will have zero - // entries (and thus no prefixes), or potentially an add chunk can have all of - // its entries sub'd without receiving an AddDel, or a sub chunk might have - // been entirely consumed by adds. In these cases, we still have to report the - // chunk number but it will not have any prefixes in the prefix tables. - // - // TODO(paulg): Investigate storing the chunks as a string of ChunkRanges, one - // string for each of phish-add, phish-sub, malware-add, malware-sub. This - // might be better performance when the number of chunks is large, and is the - // natural format for the update request. - if (sqlite3_exec(db_, "CREATE TABLE add_chunks (" - "chunk INTEGER PRIMARY KEY)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - if (sqlite3_exec(db_, "CREATE TABLE sub_chunks (" - "chunk INTEGER PRIMARY KEY)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - - std::string version = "PRAGMA user_version="; - version += StringPrintf("%d", kDatabaseVersion); - - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return false; - } - - if (statement->step() != SQLITE_DONE) - return false; - - transaction.Commit(); - add_count_ = 0; - - return true; -} - bool SafeBrowsingDatabaseBloom::ResetDatabase() { // Open() can call us when trying to handle potential database corruption. // Because we call Open() at the bottom of the function, we need to guard @@ -218,30 +89,6 @@ bool SafeBrowsingDatabaseBloom::ResetDatabase() { return Open(); } -bool SafeBrowsingDatabaseBloom::CheckCompatibleVersion() { - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, - "PRAGMA user_version"); - if (!statement.is_valid()) { - NOTREACHED(); - return false; - } - - int result = statement->step(); - if (result != SQLITE_ROW) - return false; - - return statement->column_int(0) == kDatabaseVersion; -} - -void SafeBrowsingDatabaseBloom::ClearUpdateCaches() { - AutoLock lock(lookup_lock_); - add_del_cache_.clear(); - sub_del_cache_.clear(); - add_chunk_cache_.clear(); - sub_chunk_cache_.clear(); - prefix_miss_cache_.clear(); -} - bool SafeBrowsingDatabaseBloom::ContainsUrl( const GURL& url, std::string* matching_list, @@ -308,15 +155,6 @@ bool SafeBrowsingDatabaseBloom::ContainsUrl( return false; } -bool SafeBrowsingDatabaseBloom::NeedToCheckUrl(const GURL& url) { - // Since everything is in the bloom filter, doing anything here would wind - // up just duplicating work that would happen in ContainsURL. - // It's possible that we may want to add a hostkey-based first-level cache - // on the front of this to minimize hash generation, but we'll need to do - // some measurements to verify that. - return true; -} - void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name, std::deque<SBChunk>* chunks) { if (chunks->empty()) @@ -374,6 +212,79 @@ void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name, chunk_inserted_callback_->Run(); } +void SafeBrowsingDatabaseBloom::DeleteChunks( + std::vector<SBChunkDelete>* chunk_deletes) { + if (chunk_deletes->empty()) + return; + + int list_id = safe_browsing_util::GetListId(chunk_deletes->front().list_name); + + for (size_t i = 0; i < chunk_deletes->size(); ++i) { + const SBChunkDelete& chunk = (*chunk_deletes)[i]; + std::vector<int> chunk_numbers; + RangesToChunks(chunk.chunk_del, &chunk_numbers); + for (size_t del = 0; del < chunk_numbers.size(); ++del) { + int encoded_chunk = EncodeChunkId(chunk_numbers[del], list_id); + if (chunk.is_sub_del) + sub_del_cache_.insert(encoded_chunk); + else + add_del_cache_.insert(encoded_chunk); + } + } + + delete chunk_deletes; +} + +void SafeBrowsingDatabaseBloom::GetListsInfo( + std::vector<SBListChunkRanges>* lists) { + DCHECK(lists); + lists->clear(); + + ReadChunkNumbers(); + + lists->push_back(SBListChunkRanges(safe_browsing_util::kMalwareList)); + GetChunkIds(safe_browsing_util::MALWARE, ADD_CHUNK, &lists->back().adds); + GetChunkIds(safe_browsing_util::MALWARE, SUB_CHUNK, &lists->back().subs); + + lists->push_back(SBListChunkRanges(safe_browsing_util::kPhishingList)); + GetChunkIds(safe_browsing_util::PHISH, ADD_CHUNK, &lists->back().adds); + GetChunkIds(safe_browsing_util::PHISH, SUB_CHUNK, &lists->back().subs); + + return; +} + +void SafeBrowsingDatabaseBloom::CacheHashResults( + const std::vector<SBPrefix>& prefixes, + const std::vector<SBFullHashResult>& full_hits) { + AutoLock lock(lookup_lock_); + + if (full_hits.empty()) { + // These prefixes returned no results, so we store them in order to prevent + // asking for them again. We flush this cache at the next update. + for (std::vector<SBPrefix>::const_iterator it = prefixes.begin(); + it != prefixes.end(); ++it) { + prefix_miss_cache_.insert(*it); + } + return; + } + + const Time now = Time::Now(); + for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin(); + it != full_hits.end(); ++it) { + SBPrefix prefix = it->hash.prefix; + HashList& entries = (*hash_cache_)[prefix]; + HashCacheEntry entry; + entry.received = now; + entry.list_id = safe_browsing_util::GetListId(it->list_name); + entry.add_chunk_id = EncodeChunkId(it->add_chunk_id, entry.list_id); + entry.full_hash = it->hash; + entries.push_back(entry); + + // Also push a copy to the pending write queue. + pending_full_hashes_.push_back(entry); + } +} + bool SafeBrowsingDatabaseBloom::UpdateStarted() { DCHECK(insert_transaction_.get() == NULL); @@ -401,218 +312,143 @@ void SafeBrowsingDatabaseBloom::UpdateFinished(bool update_succeeded) { ClearUpdateCaches(); } -void SafeBrowsingDatabaseBloom::InsertAdd(SBPrefix host, SBEntry* entry) { - STATS_COUNTER("SB.HostInsert", 1); - int encoded = EncodeChunkId(entry->chunk_id(), entry->list_id()); +bool SafeBrowsingDatabaseBloom::Open() { + if (db_) + return true; - if (entry->type() == SBEntry::ADD_FULL_HASH) { - base::Time receive_time = base::Time::Now(); - for (int i = 0; i < entry->prefix_count(); ++i) { - SBFullHash full_hash = entry->FullHashAt(i); - SBPrefix prefix = full_hash.prefix; - InsertAddPrefix(prefix, encoded); - InsertAddFullHash(prefix, encoded, receive_time, full_hash); - } - return; + if (OpenSqliteDb(filename_, &db_) != SQLITE_OK) { + sqlite3_close(db_); + db_ = NULL; + return false; } - // This entry contains only regular (32 bit) prefixes. - int count = entry->prefix_count(); - if (count == 0) { - InsertAddPrefix(host, encoded); - } else { - for (int i = 0; i < count; i++) { - SBPrefix prefix = entry->PrefixAt(i); - InsertAddPrefix(prefix, encoded); + // Run the database in exclusive mode. Nobody else should be accessing the + // database while we're running, and this will give somewhat improved perf. + sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL); + + statement_cache_.reset(new SqliteStatementCache(db_)); + + if (!DoesSqliteTableExist(db_, "add_prefix")) { + if (!CreateTables()) { + // Database could be corrupt, try starting from scratch. + if (!ResetDatabase()) + return false; } + } else if (!CheckCompatibleVersion()) { + if (!ResetDatabase()) + return false; } -} -void SafeBrowsingDatabaseBloom::InsertAddPrefix(SBPrefix prefix, - int encoded_chunk) { - STATS_COUNTER("SB.PrefixAdd", 1); - std::string sql = "INSERT INTO add_prefix (chunk, prefix) VALUES (?, ?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, prefix); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } - add_count_++; + return true; } -void SafeBrowsingDatabaseBloom::InsertAddFullHash(SBPrefix prefix, - int encoded_chunk, - base::Time receive_time, - SBFullHash full_prefix) { - STATS_COUNTER("SB.PrefixAddFull", 1); - std::string sql = "INSERT INTO add_full_hash " - "(chunk, prefix, receive_time, full_hash) " - "VALUES (?,?,?,?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; - } +bool SafeBrowsingDatabaseBloom::Close() { + if (!db_) + return true; - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, prefix); - statement->bind_int64(2, receive_time.ToTimeT()); - statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash)); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); - } + insert_transaction_.reset(); + statement_cache_.reset(); // Must free statements before closing DB. + bool result = sqlite3_close(db_) == SQLITE_OK; + db_ = NULL; + + return result; } -void SafeBrowsingDatabaseBloom::InsertSub( - int chunk_id, SBPrefix host, SBEntry* entry) { - STATS_COUNTER("SB.HostDelete", 1); - int encoded = EncodeChunkId(chunk_id, entry->list_id()); - int encoded_add; +bool SafeBrowsingDatabaseBloom::CreateTables() { + SQLTransaction transaction(db_); + transaction.Begin(); - if (entry->type() == SBEntry::SUB_FULL_HASH) { - for (int i = 0; i < entry->prefix_count(); ++i) { - SBFullHash full_hash = entry->FullHashAt(i); - SBPrefix prefix = full_hash.prefix; - encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), entry->list_id()); - InsertSubPrefix(prefix, encoded, encoded_add); - InsertSubFullHash(prefix, encoded, encoded_add, full_hash, false); - } - } else { - // We have prefixes. - int count = entry->prefix_count(); - if (count == 0) { - encoded_add = EncodeChunkId(entry->chunk_id(), entry->list_id()); - InsertSubPrefix(host, encoded, encoded_add); - } else { - for (int i = 0; i < count; i++) { - SBPrefix prefix = entry->PrefixAt(i); - encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), - entry->list_id()); - InsertSubPrefix(prefix, encoded, encoded_add); - } - } + // Store 32 bit add prefixes here. + if (sqlite3_exec(db_, "CREATE TABLE add_prefix (" + "chunk INTEGER," + "prefix INTEGER)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } -} -void SafeBrowsingDatabaseBloom::InsertSubPrefix(SBPrefix prefix, - int encoded_chunk, - int encoded_add_chunk) { - STATS_COUNTER("SB.PrefixSub", 1); - std::string sql = - "INSERT INTO sub_prefix (chunk, add_chunk, prefix) VALUES (?,?,?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; + // Store 32 bit sub prefixes here. + if (sqlite3_exec(db_, "CREATE TABLE sub_prefix (" + "chunk INTEGER," + "add_chunk INTEGER," + "prefix INTEGER)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, encoded_add_chunk); - statement->bind_int(2, prefix); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); + + // Store 256 bit add full hashes (and GetHash results) here. + if (sqlite3_exec(db_, "CREATE TABLE add_full_hash (" + "chunk INTEGER," + "prefix INTEGER," + "receive_time INTEGER," + "full_hash BLOB)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } -} -void SafeBrowsingDatabaseBloom::InsertSubFullHash(SBPrefix prefix, - int encoded_chunk, - int encoded_add_chunk, - SBFullHash full_prefix, - bool use_temp_table) { - STATS_COUNTER("SB.PrefixSubFull", 1); - std::string sql = "INSERT INTO "; - if (use_temp_table) { - sql += "sub_full_tmp"; - } else { - sql += "sub_full_hash"; + // Store 256 bit sub full hashes here. + if (sqlite3_exec(db_, "CREATE TABLE sub_full_hash (" + "chunk INTEGER," + "add_chunk INTEGER," + "prefix INTEGER," + "full_hash BLOB)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } - sql += " (chunk, add_chunk, prefix, full_hash) VALUES (?,?,?,?)"; - SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); - if (!statement.is_valid()) { - NOTREACHED(); - return; + // Store all the add and sub chunk numbers we receive. We cannot just rely on + // the prefix tables to generate these lists, since some chunks will have zero + // entries (and thus no prefixes), or potentially an add chunk can have all of + // its entries sub'd without receiving an AddDel, or a sub chunk might have + // been entirely consumed by adds. In these cases, we still have to report the + // chunk number but it will not have any prefixes in the prefix tables. + // + // TODO(paulg): Investigate storing the chunks as a string of ChunkRanges, one + // string for each of phish-add, phish-sub, malware-add, malware-sub. This + // might be better performance when the number of chunks is large, and is the + // natural format for the update request. + if (sqlite3_exec(db_, "CREATE TABLE add_chunks (" + "chunk INTEGER PRIMARY KEY)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } - statement->bind_int(0, encoded_chunk); - statement->bind_int(1, encoded_add_chunk); - statement->bind_int(2, prefix); - statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash)); - int rv = statement->step(); - statement->reset(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - } else { - DCHECK(rv == SQLITE_DONE); + + if (sqlite3_exec(db_, "CREATE TABLE sub_chunks (" + "chunk INTEGER PRIMARY KEY)", + NULL, NULL, NULL) != SQLITE_OK) { + return false; } -} -void SafeBrowsingDatabaseBloom::ReadFullHash(SqliteCompiledStatement& statement, - int column, - SBFullHash* full_hash) { - DCHECK(full_hash); - std::vector<unsigned char> blob; - statement->column_blob_as_vector(column, &blob); - DCHECK(blob.size() == sizeof(SBFullHash)); - memcpy(full_hash->full_hash, &blob[0], sizeof(SBFullHash)); -} + std::string version = "PRAGMA user_version="; + version += StringPrintf("%d", kDatabaseVersion); -// TODO(paulg): Look for a less expensive way to maintain add_count_? If we move -// to a native file format, we can just cache the count in the file and not have -// to scan at all. -int SafeBrowsingDatabaseBloom::GetAddPrefixCount() { - SQLITE_UNIQUE_STATEMENT(count, *statement_cache_, - "SELECT count(*) FROM add_prefix"); - if (!count.is_valid()) { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str()); + if (!statement.is_valid()) { NOTREACHED(); - return 0; + return false; } - int rv = count->step(); - int add_count = 0; - if (rv == SQLITE_ROW) - add_count = count->column_int(0); - else if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - return add_count; -} + if (statement->step() != SQLITE_DONE) + return false; -void SafeBrowsingDatabaseBloom::DeleteChunks( - std::vector<SBChunkDelete>* chunk_deletes) { - if (chunk_deletes->empty()) - return; + transaction.Commit(); + add_count_ = 0; - int list_id = safe_browsing_util::GetListId(chunk_deletes->front().list_name); + return true; +} - for (size_t i = 0; i < chunk_deletes->size(); ++i) { - const SBChunkDelete& chunk = (*chunk_deletes)[i]; - std::vector<int> chunk_numbers; - RangesToChunks(chunk.chunk_del, &chunk_numbers); - for (size_t del = 0; del < chunk_numbers.size(); ++del) { - int encoded_chunk = EncodeChunkId(chunk_numbers[del], list_id); - if (chunk.is_sub_del) - sub_del_cache_.insert(encoded_chunk); - else - add_del_cache_.insert(encoded_chunk); - } +bool SafeBrowsingDatabaseBloom::CheckCompatibleVersion() { + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "PRAGMA user_version"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; } - delete chunk_deletes; + int result = statement->step(); + if (result != SQLITE_ROW) + return false; + + return statement->column_int(0) == kDatabaseVersion; } bool SafeBrowsingDatabaseBloom::ChunkExists(int list_id, @@ -655,140 +491,126 @@ void SafeBrowsingDatabaseBloom::GetChunkIds( RangesToString(ranges, list); } -void SafeBrowsingDatabaseBloom::GetListsInfo( - std::vector<SBListChunkRanges>* lists) { - DCHECK(lists); - lists->clear(); +void SafeBrowsingDatabaseBloom::BuildBloomFilter() { +#if defined(OS_WIN) + // For measuring the amount of IO during the bloom filter build. + IoCounters io_before, io_after; + base::ProcessHandle handle = base::Process::Current().handle(); + scoped_ptr<base::ProcessMetrics> metric; + metric.reset(base::ProcessMetrics::CreateProcessMetrics(handle)); + metric->GetIOCounters(&io_before); +#endif - ReadChunkNumbers(); + Time before = Time::Now(); - lists->push_back(SBListChunkRanges(safe_browsing_util::kMalwareList)); - GetChunkIds(safe_browsing_util::MALWARE, ADD_CHUNK, &lists->back().adds); - GetChunkIds(safe_browsing_util::MALWARE, SUB_CHUNK, &lists->back().subs); + // Get all the pending GetHash results and write them to disk. + HashList pending_hashes; + { + AutoLock lock(lookup_lock_); + pending_hashes.swap(pending_full_hashes_); + } + WriteFullHashList(pending_hashes, true); - lists->push_back(SBListChunkRanges(safe_browsing_util::kPhishingList)); - GetChunkIds(safe_browsing_util::PHISH, ADD_CHUNK, &lists->back().adds); - GetChunkIds(safe_browsing_util::PHISH, SUB_CHUNK, &lists->back().subs); + add_count_ = GetAddPrefixCount(); + if (add_count_ == 0) { + AutoLock lock(lookup_lock_); + bloom_filter_ = NULL; + return; + } - return; -} + scoped_array<SBPair> adds_array(new SBPair[add_count_]); + SBPair* adds = adds_array.get(); -void SafeBrowsingDatabaseBloom::ReadChunkNumbers() { - add_chunk_cache_.clear(); - sub_chunk_cache_.clear(); + if (!BuildAddPrefixList(adds)) + return; - // Read in the add chunk numbers. - SQLITE_UNIQUE_STATEMENT(read_adds, *statement_cache_, - "SELECT chunk FROM add_chunks"); - if (!read_adds.is_valid()) { - NOTREACHED(); + // Build the full add cache, which includes full hash updates and GetHash + // results. Subs may remove some of these entries. + scoped_ptr<HashCache> add_cache(new HashCache); + if (!BuildAddFullHashCache(add_cache.get())) return; - } - while (true) { - int rv = read_adds->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - break; - } - add_chunk_cache_.insert(read_adds->column_int(0)); - } + scoped_ptr<HashCache> sub_cache(new HashCache); + if (!BuildSubFullHashCache(sub_cache.get())) + return; - // Read in the sub chunk numbers. - SQLITE_UNIQUE_STATEMENT(read_subs, *statement_cache_, - "SELECT chunk FROM sub_chunks"); - if (!read_subs.is_valid()) { - NOTREACHED(); + // Used to track which adds have been subbed out. The vector<bool> is actually + // a bitvector so the size is as small as we can get. + std::vector<bool> adds_removed; + adds_removed.resize(add_count_, false); + + // Flag any add as removed if there is a matching sub. + int subs = 0; + if (!RemoveSubs(adds, &adds_removed, add_cache.get(), sub_cache.get(), &subs)) return; - } - while (true) { - int rv = read_subs->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) - HandleCorruptDatabase(); - break; - } - sub_chunk_cache_.insert(read_subs->column_int(0)); - } -} + // Prepare the database for writing out our remaining add and sub prefixes. + if (!UpdateTables()) + return; -// Write all the chunk numbers to the add_chunks and sub_chunks tables. -bool SafeBrowsingDatabaseBloom::WriteChunkNumbers() { - // Delete the contents of the add chunk table. - SQLITE_UNIQUE_STATEMENT(del_add_chunk, *statement_cache_, - "DELETE FROM add_chunks"); - if (!del_add_chunk.is_valid()) { - NOTREACHED(); - return false; - } - int rv = del_add_chunk->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); + // Write out the remaining add prefixes to the filter and database. + int new_count; + scoped_refptr<BloomFilter> filter; + if (!WritePrefixes(adds, adds_removed, &new_count, &filter)) + return; - SQLITE_UNIQUE_STATEMENT(write_adds, *statement_cache_, - "INSERT INTO add_chunks (chunk) VALUES (?)"); - if (!write_adds.is_valid()) { - NOTREACHED(); - return false; - } + // Write out the remaining full hash adds and subs to the database. + WriteFullHashes(add_cache.get(), true); + WriteFullHashes(sub_cache.get(), false); - // Write all the add chunks from the cache to the database. - std::set<int>::const_iterator it = add_chunk_cache_.begin(); - for (; it != add_chunk_cache_.end(); ++it) { - if (add_del_cache_.find(*it) != add_del_cache_.end()) - continue; // This chunk has been deleted. - write_adds->bind_int(0, *it); - rv = write_adds->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - write_adds->reset(); - } + // Save the chunk numbers we've received to the database for reporting in + // future update requests. + if (!WriteChunkNumbers()) + return; - // Delete the contents of the sub chunk table. - SQLITE_UNIQUE_STATEMENT(del_sub_chunk, *statement_cache_, - "DELETE FROM sub_chunks"); - if (!del_sub_chunk.is_valid()) { - NOTREACHED(); - return false; - } - rv = del_sub_chunk->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; + // Commit all the changes to the database. + int rv = insert_transaction_->Commit(); + if (rv != SQLITE_OK) { + NOTREACHED() << "SafeBrowsing update transaction failed to commit."; + UMA_HISTOGRAM_COUNTS("SB2.FailedUpdate", 1); + return; } - DCHECK(rv == SQLITE_DONE); - SQLITE_UNIQUE_STATEMENT(write_subs, *statement_cache_, - "INSERT INTO sub_chunks (chunk) VALUES (?)"); - if (!write_subs.is_valid()) { - NOTREACHED(); - return false; + // Swap in the newly built filter and cache. If there were any matching subs, + // the size (add_count_) will be smaller. + { + AutoLock lock(lookup_lock_); + add_count_ = new_count; + bloom_filter_.swap(filter); + hash_cache_.swap(add_cache); } - // Write all the sub chunks from the cache to the database. - it = sub_chunk_cache_.begin(); - for (; it != sub_chunk_cache_.end(); ++it) { - if (sub_del_cache_.find(*it) != sub_del_cache_.end()) - continue; // This chunk has been deleted. - write_subs->bind_int(0, *it); - rv = write_subs->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - write_subs->reset(); - } + TimeDelta bloom_gen = Time::Now() - before; - return true; + // Persist the bloom filter to disk. + WriteBloomFilter(); + + // Gather statistics. +#if defined(OS_WIN) + metric->GetIOCounters(&io_after); + UMA_HISTOGRAM_COUNTS("SB2.BuildReadBytes", + static_cast<int>(io_after.ReadTransferCount - + io_before.ReadTransferCount)); + UMA_HISTOGRAM_COUNTS("SB2.BuildWriteBytes", + static_cast<int>(io_after.WriteTransferCount - + io_before.WriteTransferCount)); + UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", + static_cast<int>(io_after.ReadOperationCount - + io_before.ReadOperationCount)); + UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", + static_cast<int>(io_after.WriteOperationCount - + io_before.WriteOperationCount)); +#endif + SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in " + << bloom_gen.InMilliseconds() + << " ms total. prefix count: "<< add_count_; + UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen); + UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_count_); + UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", subs); + UMA_HISTOGRAM_COUNTS("SB2.FilterSize", bloom_filter_->size()); + int64 size_64; + if (file_util::GetFileSize(filename_, &size_64)) + UMA_HISTOGRAM_COUNTS("SB2.DatabaseBytes", static_cast<int>(size_64)); } int SafeBrowsingDatabaseBloom::PairCompare(const void* arg1, const void* arg2) { @@ -831,6 +653,114 @@ bool SafeBrowsingDatabaseBloom::BuildAddPrefixList(SBPair* adds) { return true; } +bool SafeBrowsingDatabaseBloom::BuildAddFullHashCache(HashCache* add_cache) { + add_cache->clear(); + + // Read all full add entries to the cache. + SQLITE_UNIQUE_STATEMENT( + full_add_entry, + *statement_cache_, + "SELECT chunk, prefix, receive_time, full_hash FROM add_full_hash"); + if (!full_add_entry.is_valid()) { + NOTREACHED(); + return false; + } + + int rv; + while (true) { + rv = full_add_entry->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + break; + } + HashCacheEntry entry; + entry.add_chunk_id = full_add_entry->column_int(0); + if (add_del_cache_.find(entry.add_chunk_id) != add_del_cache_.end()) + continue; // This entry's chunk was deleted so we skip it. + SBPrefix prefix = full_add_entry->column_int(1); + entry.received = base::Time::FromTimeT(full_add_entry->column_int64(2)); + int chunk, list_id; + DecodeChunkId(entry.add_chunk_id, &chunk, &list_id); + entry.list_id = list_id; + ReadFullHash(&full_add_entry, 3, &entry.full_hash); + HashList& entries = (*add_cache)[prefix]; + entries.push_back(entry); + } + + // Clear the full add table. + SQLITE_UNIQUE_STATEMENT(full_add_drop, *statement_cache_, + "DELETE FROM add_full_hash"); + if (!full_add_drop.is_valid()) { + NOTREACHED(); + return false; + } + rv = full_add_drop->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + + return true; +} + +bool SafeBrowsingDatabaseBloom::BuildSubFullHashCache(HashCache* sub_cache) { + sub_cache->clear(); + + // Read all full sub entries to the cache. + SQLITE_UNIQUE_STATEMENT( + full_sub_entry, + *statement_cache_, + "SELECT chunk, add_chunk, prefix, full_hash FROM sub_full_hash"); + if (!full_sub_entry.is_valid()) { + NOTREACHED(); + return false; + } + + int rv; + while (true) { + rv = full_sub_entry->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + break; + } + HashCacheEntry entry; + entry.sub_chunk_id = full_sub_entry->column_int(0); + if (sub_del_cache_.find(entry.sub_chunk_id) != sub_del_cache_.end()) + continue; // This entry's chunk was deleted so we skip it. + entry.add_chunk_id = full_sub_entry->column_int(1); + SBPrefix prefix = full_sub_entry->column_int(2); + int chunk, list_id; + DecodeChunkId(entry.add_chunk_id, &chunk, &list_id); + entry.list_id = list_id; + ReadFullHash(&full_sub_entry, 3, &entry.full_hash); + HashList& entries = (*sub_cache)[prefix]; + entries.push_back(entry); + } + + // Clear the full sub table. + SQLITE_UNIQUE_STATEMENT(full_sub_drop, *statement_cache_, + "DELETE FROM sub_full_hash"); + if (!full_sub_drop.is_valid()) { + NOTREACHED(); + return false; + } + rv = full_sub_drop->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + + return true; +} + bool SafeBrowsingDatabaseBloom::RemoveSubs( SBPair* adds, std::vector<bool>* adds_removed, HashCache* add_cache, HashCache* sub_cache, int* subs) { @@ -1094,236 +1024,6 @@ void SafeBrowsingDatabaseBloom::WriteFullHashList(const HashList& hash_list, } } -bool SafeBrowsingDatabaseBloom::BuildAddFullHashCache(HashCache* add_cache) { - add_cache->clear(); - - // Read all full add entries to the cache. - SQLITE_UNIQUE_STATEMENT( - full_add_entry, - *statement_cache_, - "SELECT chunk, prefix, receive_time, full_hash FROM add_full_hash"); - if (!full_add_entry.is_valid()) { - NOTREACHED(); - return false; - } - - int rv; - while (true) { - rv = full_add_entry->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - break; - } - HashCacheEntry entry; - entry.add_chunk_id = full_add_entry->column_int(0); - if (add_del_cache_.find(entry.add_chunk_id) != add_del_cache_.end()) - continue; // This entry's chunk was deleted so we skip it. - SBPrefix prefix = full_add_entry->column_int(1); - entry.received = base::Time::FromTimeT(full_add_entry->column_int64(2)); - int chunk, list_id; - DecodeChunkId(entry.add_chunk_id, &chunk, &list_id); - entry.list_id = list_id; - ReadFullHash(full_add_entry, 3, &entry.full_hash); - HashList& entries = (*add_cache)[prefix]; - entries.push_back(entry); - } - - // Clear the full add table. - SQLITE_UNIQUE_STATEMENT(full_add_drop, *statement_cache_, - "DELETE FROM add_full_hash"); - if (!full_add_drop.is_valid()) { - NOTREACHED(); - return false; - } - rv = full_add_drop->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - return true; -} - -bool SafeBrowsingDatabaseBloom::BuildSubFullHashCache(HashCache* sub_cache) { - sub_cache->clear(); - - // Read all full sub entries to the cache. - SQLITE_UNIQUE_STATEMENT( - full_sub_entry, - *statement_cache_, - "SELECT chunk, add_chunk, prefix, full_hash FROM sub_full_hash"); - if (!full_sub_entry.is_valid()) { - NOTREACHED(); - return false; - } - - int rv; - while (true) { - rv = full_sub_entry->step(); - if (rv != SQLITE_ROW) { - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - break; - } - HashCacheEntry entry; - entry.sub_chunk_id = full_sub_entry->column_int(0); - if (sub_del_cache_.find(entry.sub_chunk_id) != sub_del_cache_.end()) - continue; // This entry's chunk was deleted so we skip it. - entry.add_chunk_id = full_sub_entry->column_int(1); - SBPrefix prefix = full_sub_entry->column_int(2); - int chunk, list_id; - DecodeChunkId(entry.add_chunk_id, &chunk, &list_id); - entry.list_id = list_id; - ReadFullHash(full_sub_entry, 3, &entry.full_hash); - HashList& entries = (*sub_cache)[prefix]; - entries.push_back(entry); - } - - // Clear the full sub table. - SQLITE_UNIQUE_STATEMENT(full_sub_drop, *statement_cache_, - "DELETE FROM sub_full_hash"); - if (!full_sub_drop.is_valid()) { - NOTREACHED(); - return false; - } - rv = full_sub_drop->step(); - if (rv == SQLITE_CORRUPT) { - HandleCorruptDatabase(); - return false; - } - DCHECK(rv == SQLITE_DONE); - - return true; -} - -void SafeBrowsingDatabaseBloom::BuildBloomFilter() { -#if defined(OS_WIN) - // For measuring the amount of IO during the bloom filter build. - IoCounters io_before, io_after; - base::ProcessHandle handle = base::Process::Current().handle(); - scoped_ptr<base::ProcessMetrics> metric; - metric.reset(base::ProcessMetrics::CreateProcessMetrics(handle)); - metric->GetIOCounters(&io_before); -#endif - - Time before = Time::Now(); - - // Get all the pending GetHash results and write them to disk. - HashList pending_hashes; - { - AutoLock lock(lookup_lock_); - pending_hashes.swap(pending_full_hashes_); - } - WriteFullHashList(pending_hashes, true); - - add_count_ = GetAddPrefixCount(); - if (add_count_ == 0) { - AutoLock lock(lookup_lock_); - bloom_filter_ = NULL; - return; - } - - scoped_array<SBPair> adds_array(new SBPair[add_count_]); - SBPair* adds = adds_array.get(); - - if (!BuildAddPrefixList(adds)) - return; - - // Build the full add cache, which includes full hash updates and GetHash - // results. Subs may remove some of these entries. - scoped_ptr<HashCache> add_cache(new HashCache); - if (!BuildAddFullHashCache(add_cache.get())) - return; - - scoped_ptr<HashCache> sub_cache(new HashCache); - if (!BuildSubFullHashCache(sub_cache.get())) - return; - - // Used to track which adds have been subbed out. The vector<bool> is actually - // a bitvector so the size is as small as we can get. - std::vector<bool> adds_removed; - adds_removed.resize(add_count_, false); - - // Flag any add as removed if there is a matching sub. - int subs = 0; - if (!RemoveSubs(adds, &adds_removed, add_cache.get(), sub_cache.get(), &subs)) - return; - - // Prepare the database for writing out our remaining add and sub prefixes. - if (!UpdateTables()) - return; - - // Write out the remaining add prefixes to the filter and database. - int new_count; - scoped_refptr<BloomFilter> filter; - if (!WritePrefixes(adds, adds_removed, &new_count, &filter)) - return; - - // Write out the remaining full hash adds and subs to the database. - WriteFullHashes(add_cache.get(), true); - WriteFullHashes(sub_cache.get(), false); - - // Save the chunk numbers we've received to the database for reporting in - // future update requests. - if (!WriteChunkNumbers()) - return; - - // Commit all the changes to the database. - int rv = insert_transaction_->Commit(); - if (rv != SQLITE_OK) { - NOTREACHED() << "SafeBrowsing update transaction failed to commit."; - UMA_HISTOGRAM_COUNTS("SB2.FailedUpdate", 1); - return; - } - - // Swap in the newly built filter and cache. If there were any matching subs, - // the size (add_count_) will be smaller. - { - AutoLock lock(lookup_lock_); - add_count_ = new_count; - bloom_filter_.swap(filter); - hash_cache_.swap(add_cache); - } - - TimeDelta bloom_gen = Time::Now() - before; - - // Persist the bloom filter to disk. - WriteBloomFilter(); - - // Gather statistics. -#if defined(OS_WIN) - metric->GetIOCounters(&io_after); - UMA_HISTOGRAM_COUNTS("SB2.BuildReadBytes", - static_cast<int>(io_after.ReadTransferCount - - io_before.ReadTransferCount)); - UMA_HISTOGRAM_COUNTS("SB2.BuildWriteBytes", - static_cast<int>(io_after.WriteTransferCount - - io_before.WriteTransferCount)); - UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", - static_cast<int>(io_after.ReadOperationCount - - io_before.ReadOperationCount)); - UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", - static_cast<int>(io_after.WriteOperationCount - - io_before.WriteOperationCount)); -#endif - SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in " - << bloom_gen.InMilliseconds() - << " ms total. prefix count: "<< add_count_; - UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen); - UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_count_); - UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", subs); - UMA_HISTOGRAM_COUNTS("SB2.FilterSize", bloom_filter_->size()); - int64 size_64; - if (file_util::GetFileSize(filename_, &size_64)) - UMA_HISTOGRAM_COUNTS("SB2.DatabaseBytes", static_cast<int>(size_64)); -} - void SafeBrowsingDatabaseBloom::GetCachedFullHashes( const std::vector<SBPrefix>* prefix_hits, std::vector<SBFullHashResult>* full_hits, @@ -1362,38 +1062,6 @@ void SafeBrowsingDatabaseBloom::GetCachedFullHashes( } } -void SafeBrowsingDatabaseBloom::CacheHashResults( - const std::vector<SBPrefix>& prefixes, - const std::vector<SBFullHashResult>& full_hits) { - AutoLock lock(lookup_lock_); - - if (full_hits.empty()) { - // These prefixes returned no results, so we store them in order to prevent - // asking for them again. We flush this cache at the next update. - for (std::vector<SBPrefix>::const_iterator it = prefixes.begin(); - it != prefixes.end(); ++it) { - prefix_miss_cache_.insert(*it); - } - return; - } - - const Time now = Time::Now(); - for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin(); - it != full_hits.end(); ++it) { - SBPrefix prefix = it->hash.prefix; - HashList& entries = (*hash_cache_)[prefix]; - HashCacheEntry entry; - entry.received = now; - entry.list_id = safe_browsing_util::GetListId(it->list_name); - entry.add_chunk_id = EncodeChunkId(it->add_chunk_id, entry.list_id); - entry.full_hash = it->hash; - entries.push_back(entry); - - // Also push a copy to the pending write queue. - pending_full_hashes_.push_back(entry); - } -} - bool SafeBrowsingDatabaseBloom::ClearCachedEntry(SBPrefix prefix, int add_chunk, HashCache* hash_cache) { @@ -1431,7 +1099,320 @@ void SafeBrowsingDatabaseBloom::OnHandleCorruptDatabase() { DCHECK(false) << "SafeBrowsing database was corrupt and reset"; } -// This database is always synchronous since we don't need to worry about -// blocking any incoming reads. -void SafeBrowsingDatabaseBloom::SetSynchronous() { +void SafeBrowsingDatabaseBloom::InsertAdd(SBPrefix host, SBEntry* entry) { + STATS_COUNTER("SB.HostInsert", 1); + int encoded = EncodeChunkId(entry->chunk_id(), entry->list_id()); + + if (entry->type() == SBEntry::ADD_FULL_HASH) { + base::Time receive_time = base::Time::Now(); + for (int i = 0; i < entry->prefix_count(); ++i) { + SBFullHash full_hash = entry->FullHashAt(i); + SBPrefix prefix = full_hash.prefix; + InsertAddPrefix(prefix, encoded); + InsertAddFullHash(prefix, encoded, receive_time, full_hash); + } + return; + } + + // This entry contains only regular (32 bit) prefixes. + int count = entry->prefix_count(); + if (count == 0) { + InsertAddPrefix(host, encoded); + } else { + for (int i = 0; i < count; i++) { + SBPrefix prefix = entry->PrefixAt(i); + InsertAddPrefix(prefix, encoded); + } + } +} + +void SafeBrowsingDatabaseBloom::InsertAddPrefix(SBPrefix prefix, + int encoded_chunk) { + STATS_COUNTER("SB.PrefixAdd", 1); + std::string sql = "INSERT INTO add_prefix (chunk, prefix) VALUES (?, ?)"; + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + statement->bind_int(0, encoded_chunk); + statement->bind_int(1, prefix); + int rv = statement->step(); + statement->reset(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } + add_count_++; +} + +void SafeBrowsingDatabaseBloom::InsertAddFullHash(SBPrefix prefix, + int encoded_chunk, + base::Time receive_time, + SBFullHash full_prefix) { + STATS_COUNTER("SB.PrefixAddFull", 1); + std::string sql = "INSERT INTO add_full_hash " + "(chunk, prefix, receive_time, full_hash) " + "VALUES (?,?,?,?)"; + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + + statement->bind_int(0, encoded_chunk); + statement->bind_int(1, prefix); + statement->bind_int64(2, receive_time.ToTimeT()); + statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash)); + int rv = statement->step(); + statement->reset(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +void SafeBrowsingDatabaseBloom::InsertSub( + int chunk_id, SBPrefix host, SBEntry* entry) { + STATS_COUNTER("SB.HostDelete", 1); + int encoded = EncodeChunkId(chunk_id, entry->list_id()); + int encoded_add; + + if (entry->type() == SBEntry::SUB_FULL_HASH) { + for (int i = 0; i < entry->prefix_count(); ++i) { + SBFullHash full_hash = entry->FullHashAt(i); + SBPrefix prefix = full_hash.prefix; + encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), entry->list_id()); + InsertSubPrefix(prefix, encoded, encoded_add); + InsertSubFullHash(prefix, encoded, encoded_add, full_hash, false); + } + } else { + // We have prefixes. + int count = entry->prefix_count(); + if (count == 0) { + encoded_add = EncodeChunkId(entry->chunk_id(), entry->list_id()); + InsertSubPrefix(host, encoded, encoded_add); + } else { + for (int i = 0; i < count; i++) { + SBPrefix prefix = entry->PrefixAt(i); + encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), + entry->list_id()); + InsertSubPrefix(prefix, encoded, encoded_add); + } + } + } +} + +void SafeBrowsingDatabaseBloom::InsertSubPrefix(SBPrefix prefix, + int encoded_chunk, + int encoded_add_chunk) { + STATS_COUNTER("SB.PrefixSub", 1); + std::string sql = + "INSERT INTO sub_prefix (chunk, add_chunk, prefix) VALUES (?,?,?)"; + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + statement->bind_int(0, encoded_chunk); + statement->bind_int(1, encoded_add_chunk); + statement->bind_int(2, prefix); + int rv = statement->step(); + statement->reset(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +void SafeBrowsingDatabaseBloom::InsertSubFullHash(SBPrefix prefix, + int encoded_chunk, + int encoded_add_chunk, + SBFullHash full_prefix, + bool use_temp_table) { + STATS_COUNTER("SB.PrefixSubFull", 1); + std::string sql = "INSERT INTO "; + if (use_temp_table) { + sql += "sub_full_tmp"; + } else { + sql += "sub_full_hash"; + } + sql += " (chunk, add_chunk, prefix, full_hash) VALUES (?,?,?,?)"; + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + statement->bind_int(0, encoded_chunk); + statement->bind_int(1, encoded_add_chunk); + statement->bind_int(2, prefix); + statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash)); + int rv = statement->step(); + statement->reset(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + +void SafeBrowsingDatabaseBloom::ReadFullHash(SqliteCompiledStatement* statement, + int column, + SBFullHash* full_hash) { + DCHECK(full_hash); + std::vector<unsigned char> blob; + (*statement)->column_blob_as_vector(column, &blob); + DCHECK(blob.size() == sizeof(SBFullHash)); + memcpy(full_hash->full_hash, &blob[0], sizeof(SBFullHash)); +} + +// TODO(paulg): Look for a less expensive way to maintain add_count_? If we move +// to a native file format, we can just cache the count in the file and not have +// to scan at all. +int SafeBrowsingDatabaseBloom::GetAddPrefixCount() { + SQLITE_UNIQUE_STATEMENT(count, *statement_cache_, + "SELECT count(*) FROM add_prefix"); + if (!count.is_valid()) { + NOTREACHED(); + return 0; + } + int rv = count->step(); + int add_count = 0; + if (rv == SQLITE_ROW) + add_count = count->column_int(0); + else if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + + return add_count; +} + +void SafeBrowsingDatabaseBloom::ReadChunkNumbers() { + add_chunk_cache_.clear(); + sub_chunk_cache_.clear(); + + // Read in the add chunk numbers. + SQLITE_UNIQUE_STATEMENT(read_adds, *statement_cache_, + "SELECT chunk FROM add_chunks"); + if (!read_adds.is_valid()) { + NOTREACHED(); + return; + } + + while (true) { + int rv = read_adds->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + break; + } + add_chunk_cache_.insert(read_adds->column_int(0)); + } + + // Read in the sub chunk numbers. + SQLITE_UNIQUE_STATEMENT(read_subs, *statement_cache_, + "SELECT chunk FROM sub_chunks"); + if (!read_subs.is_valid()) { + NOTREACHED(); + return; + } + + while (true) { + int rv = read_subs->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + break; + } + sub_chunk_cache_.insert(read_subs->column_int(0)); + } +} + +// Write all the chunk numbers to the add_chunks and sub_chunks tables. +bool SafeBrowsingDatabaseBloom::WriteChunkNumbers() { + // Delete the contents of the add chunk table. + SQLITE_UNIQUE_STATEMENT(del_add_chunk, *statement_cache_, + "DELETE FROM add_chunks"); + if (!del_add_chunk.is_valid()) { + NOTREACHED(); + return false; + } + int rv = del_add_chunk->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + + SQLITE_UNIQUE_STATEMENT(write_adds, *statement_cache_, + "INSERT INTO add_chunks (chunk) VALUES (?)"); + if (!write_adds.is_valid()) { + NOTREACHED(); + return false; + } + + // Write all the add chunks from the cache to the database. + std::set<int>::const_iterator it = add_chunk_cache_.begin(); + for (; it != add_chunk_cache_.end(); ++it) { + if (add_del_cache_.find(*it) != add_del_cache_.end()) + continue; // This chunk has been deleted. + write_adds->bind_int(0, *it); + rv = write_adds->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + write_adds->reset(); + } + + // Delete the contents of the sub chunk table. + SQLITE_UNIQUE_STATEMENT(del_sub_chunk, *statement_cache_, + "DELETE FROM sub_chunks"); + if (!del_sub_chunk.is_valid()) { + NOTREACHED(); + return false; + } + rv = del_sub_chunk->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + + SQLITE_UNIQUE_STATEMENT(write_subs, *statement_cache_, + "INSERT INTO sub_chunks (chunk) VALUES (?)"); + if (!write_subs.is_valid()) { + NOTREACHED(); + return false; + } + + // Write all the sub chunks from the cache to the database. + it = sub_chunk_cache_.begin(); + for (; it != sub_chunk_cache_.end(); ++it) { + if (sub_del_cache_.find(*it) != sub_del_cache_.end()) + continue; // This chunk has been deleted. + write_subs->bind_int(0, *it); + rv = write_subs->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return false; + } + DCHECK(rv == SQLITE_DONE); + write_subs->reset(); + } + + return true; +} + +void SafeBrowsingDatabaseBloom::ClearUpdateCaches() { + AutoLock lock(lookup_lock_); + add_del_cache_.clear(); + sub_del_cache_.clear(); + add_chunk_cache_.clear(); + sub_chunk_cache_.clear(); + prefix_miss_cache_.clear(); } diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h index 296e4e649..449e76c 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h +++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -6,25 +6,22 @@ #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_BLOOM_H_ #include <deque> -#include <list> -#include <queue> #include <set> #include <string> #include <vector> -#include "base/hash_tables.h" #include "base/lock.h" -#include "base/scoped_ptr.h" -#include "base/task.h" #include "chrome/browser/safe_browsing/safe_browsing_database.h" -#include "chrome/browser/safe_browsing/safe_browsing_util.h" -#include "chrome/common/sqlite_compiled_statement.h" -#include "chrome/common/sqlite_utils.h" namespace base { class Time; } +struct sqlite3; +class SqliteCompiledStatement; +class SqliteStatementCache; +class SQLTransaction; + // The reference implementation database using SQLite. class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { public: @@ -44,17 +41,23 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { std::deque<SBChunk>* chunks); virtual void DeleteChunks(std::vector<SBChunkDelete>* chunk_deletes); virtual void GetListsInfo(std::vector<SBListChunkRanges>* lists); - virtual void SetSynchronous(); virtual void CacheHashResults( const std::vector<SBPrefix>& prefixes, const std::vector<SBFullHashResult>& full_hits); virtual bool UpdateStarted(); virtual void UpdateFinished(bool update_succeeded); + private: + struct SBPair { + int chunk_id; + SBPrefix prefix; + }; - virtual bool NeedToCheckUrl(const GURL& url); + enum ChunkType { + ADD_CHUNK = 0, + SUB_CHUNK = 1, + }; - private: // Opens the database. bool Open(); @@ -75,11 +78,6 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { const std::vector<std::string>& paths, std::vector<SBPrefix>* prefix_hits); - enum ChunkType { - ADD_CHUNK = 0, - SUB_CHUNK = 1, - }; - // Checks if a chunk is in the database. bool ChunkExists(int list_id, ChunkType type, int chunk_id); @@ -91,11 +89,6 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { virtual void BuildBloomFilter(); // Helpers for building the bloom filter. - typedef struct { - int chunk_id; - SBPrefix prefix; - } SBPair; - static int PairCompare(const void* arg1, const void* arg2); bool BuildAddPrefixList(SBPair* adds); @@ -144,7 +137,7 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { bool use_temp_table); // Used for reading full hashes from the database. - void ReadFullHash(SqliteCompiledStatement& statement, + void ReadFullHash(SqliteCompiledStatement* statement, int column, SBFullHash* full_hash); diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc index c8efc3d..3d244c0 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc @@ -82,7 +82,6 @@ namespace { file_util::Delete(filename, false); SafeBrowsingDatabase* database = SafeBrowsingDatabase::Create(); - database->SetSynchronous(); database->Init(filename, NULL); return database; @@ -1074,7 +1073,6 @@ void PeformUpdate(const std::wstring& initial_db, } SafeBrowsingDatabase* database = SafeBrowsingDatabase::Create(); - database->SetSynchronous(); database->Init(path, NULL); Time before_time = Time::Now(); diff --git a/chrome/browser/safe_browsing/safe_browsing_util.cc b/chrome/browser/safe_browsing/safe_browsing_util.cc index 4ee5529..3553beb 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util.cc +++ b/chrome/browser/safe_browsing/safe_browsing_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -6,7 +6,6 @@ #include "base/base64.h" #include "base/hmac.h" -#include "base/logging.h" #include "base/sha2.h" #include "base/string_util.h" #include "chrome/browser/google_util.h" @@ -200,10 +199,8 @@ GURL GeneratePhishingReportUrl(const std::string& report_page, const std::string continue_esc = EscapeQueryParamValue(StringPrintf(kContinueUrlFormat, lang)); const std::string current_esc = EscapeQueryParamValue(url_to_report); - const std::string format = report_page + kReportParams; - GURL report_url(StringPrintf(format.c_str(), - continue_esc.c_str(), - current_esc.c_str())); + GURL report_url(report_page + + StringPrintf(kReportParams, continue_esc.c_str(), current_esc.c_str())); return google_util::AppendGoogleLocaleParam(report_url); } diff --git a/chrome/browser/safe_browsing/safe_browsing_util.h b/chrome/browser/safe_browsing/safe_browsing_util.h index 44c94c4..8ef145f 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util.h +++ b/chrome/browser/safe_browsing/safe_browsing_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -7,8 +7,6 @@ #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ -#include <string.h> - #include <deque> #include <string> #include <vector> @@ -18,33 +16,29 @@ class GURL; -// #define SB_LOGGING_ENABLED #ifdef SB_LOGGING_ENABLED #define SB_DLOG(severity) DLOG_IF(INFO, 1) #else #define SB_DLOG(severity) DLOG_IF(INFO, 0) #endif -// forward declaration class SBEntry; -// Widely used typedefs ------------------------------------------------------- +// A truncated hash's type. +typedef int SBPrefix; // Container for holding a chunk URL and the MAC of the contents of the URL. -typedef struct { +struct ChunkUrl { std::string url; std::string mac; std::string list_name; -} ChunkUrl; - -// A truncated hash's type. -typedef int SBPrefix; +}; // A full hash. -typedef union { +union SBFullHash { char full_hash[32]; SBPrefix prefix; -} SBFullHash; +}; inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) { return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0; |