From 961354ef6fe1260d978389cdcc107ad468908ba6 Mon Sep 17 00:00:00 2001
From: "pkasting@chromium.org"
 <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>
Date: Tue, 1 Dec 2009 21:09:54 +0000
Subject: Clean up Safe Browsing code a little: * Update copyrights. * Remove
 unnecessary headers. * Remove useless functions. * Make declaration and
 definition order match (in at least the _database_bloom.* files). * Eliminate
 database_perftest.cc, since it seems to have been disabled entirely since
 before the public launch, and looks like it's perhaps irrelevant to the
 current code design.

BUG=none
TEST=none
Review URL: http://codereview.chromium.org/457019

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@33479 0039d316-1c4b-4281-b951-d872f2087c98
---
 .../safe_browsing/safe_browsing_database_bloom.cc  | 1527 ++++++++++----------
 1 file changed, 754 insertions(+), 773 deletions(-)

(limited to 'chrome/browser/safe_browsing/safe_browsing_database_bloom.cc')

diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
index 51946eb..6a40167 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
@@ -5,17 +5,12 @@
 #include "chrome/browser/safe_browsing/safe_browsing_database_bloom.h"
 
 #include "base/auto_reset.h"
-#include "base/compiler_specific.h"
 #include "base/file_util.h"
-#include "base/logging.h"
 #include "base/message_loop.h"
-#include "base/platform_thread.h"
 #include "base/process_util.h"
 #include "base/sha2.h"
 #include "base/stats_counters.h"
-#include "base/string_util.h"
 #include "chrome/browser/safe_browsing/bloom_filter.h"
-#include "chrome/browser/safe_browsing/chunk_range.h"
 #include "chrome/common/sqlite_compiled_statement.h"
 #include "chrome/common/sqlite_utils.h"
 #include "googleurl/src/gurl.h"
@@ -64,130 +59,6 @@ void SafeBrowsingDatabaseBloom::Init(const FilePath& filename,
   chunk_inserted_callback_.reset(chunk_inserted_callback);
 }
 
-bool SafeBrowsingDatabaseBloom::Open() {
-  if (db_)
-    return true;
-
-  if (OpenSqliteDb(filename_, &db_) != SQLITE_OK) {
-    sqlite3_close(db_);
-    db_ = NULL;
-    return false;
-  }
-
-  // Run the database in exclusive mode. Nobody else should be accessing the
-  // database while we're running, and this will give somewhat improved perf.
-  sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL);
-
-  statement_cache_.reset(new SqliteStatementCache(db_));
-
-  if (!DoesSqliteTableExist(db_, "add_prefix")) {
-    if (!CreateTables()) {
-      // Database could be corrupt, try starting from scratch.
-      if (!ResetDatabase())
-        return false;
-    }
-  } else if (!CheckCompatibleVersion()) {
-    if (!ResetDatabase())
-      return false;
-  }
-
-  return true;
-}
-
-bool SafeBrowsingDatabaseBloom::Close() {
-  if (!db_)
-    return true;
-
-  insert_transaction_.reset();
-  statement_cache_.reset();  // Must free statements before closing DB.
-  bool result = sqlite3_close(db_) == SQLITE_OK;
-  db_ = NULL;
-
-  return result;
-}
-
-bool SafeBrowsingDatabaseBloom::CreateTables() {
-  SQLTransaction transaction(db_);
-  transaction.Begin();
-
-  // Store 32 bit add prefixes here.
-  if (sqlite3_exec(db_, "CREATE TABLE add_prefix ("
-      "chunk INTEGER,"
-      "prefix INTEGER)",
-      NULL, NULL, NULL) != SQLITE_OK) {
-    return false;
-  }
-
-  // Store 32 bit sub prefixes here.
-  if (sqlite3_exec(db_, "CREATE TABLE sub_prefix ("
-                   "chunk INTEGER,"
-                   "add_chunk INTEGER,"
-                   "prefix INTEGER)",
-                   NULL, NULL, NULL) != SQLITE_OK) {
-    return false;
-  }
-
-  // Store 256 bit add full hashes (and GetHash results) here.
-  if (sqlite3_exec(db_, "CREATE TABLE add_full_hash ("
-                   "chunk INTEGER,"
-                   "prefix INTEGER,"
-                   "receive_time INTEGER,"
-                   "full_hash BLOB)",
-                   NULL, NULL, NULL) != SQLITE_OK) {
-    return false;
-  }
-
-  // Store 256 bit sub full hashes here.
-  if (sqlite3_exec(db_, "CREATE TABLE sub_full_hash ("
-                   "chunk INTEGER,"
-                   "add_chunk INTEGER,"
-                   "prefix INTEGER,"
-                   "full_hash BLOB)",
-                   NULL, NULL, NULL) != SQLITE_OK) {
-    return false;
-  }
-
-  // Store all the add and sub chunk numbers we receive. We cannot just rely on
-  // the prefix tables to generate these lists, since some chunks will have zero
-  // entries (and thus no prefixes), or potentially an add chunk can have all of
-  // its entries sub'd without receiving an AddDel, or a sub chunk might have
-  // been entirely consumed by adds. In these cases, we still have to report the
-  // chunk number but it will not have any prefixes in the prefix tables.
-  //
-  // TODO(paulg): Investigate storing the chunks as a string of ChunkRanges, one
-  // string for each of phish-add, phish-sub, malware-add, malware-sub. This
-  // might be better performance when the number of chunks is large, and is the
-  // natural format for the update request.
-  if (sqlite3_exec(db_, "CREATE TABLE add_chunks ("
-                   "chunk INTEGER PRIMARY KEY)",
-                   NULL, NULL, NULL) != SQLITE_OK) {
-    return false;
-  }
-
-  if (sqlite3_exec(db_, "CREATE TABLE sub_chunks ("
-                   "chunk INTEGER PRIMARY KEY)",
-                   NULL, NULL, NULL) != SQLITE_OK) {
-    return false;
-  }
-
-  std::string version = "PRAGMA user_version=";
-  version += StringPrintf("%d", kDatabaseVersion);
-
-  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str());
-  if (!statement.is_valid()) {
-    NOTREACHED();
-    return false;
-  }
-
-  if (statement->step() != SQLITE_DONE)
-    return false;
-
-  transaction.Commit();
-  add_count_ = 0;
-
-  return true;
-}
-
 bool SafeBrowsingDatabaseBloom::ResetDatabase() {
   // Open() can call us when trying to handle potential database corruption.
   // Because we call Open() at the bottom of the function, we need to guard
@@ -218,30 +89,6 @@ bool SafeBrowsingDatabaseBloom::ResetDatabase() {
   return Open();
 }
 
-bool SafeBrowsingDatabaseBloom::CheckCompatibleVersion() {
-  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
-                          "PRAGMA user_version");
-  if (!statement.is_valid()) {
-    NOTREACHED();
-    return false;
-  }
-
-  int result = statement->step();
-  if (result != SQLITE_ROW)
-    return false;
-
-  return statement->column_int(0) == kDatabaseVersion;
-}
-
-void SafeBrowsingDatabaseBloom::ClearUpdateCaches() {
-  AutoLock lock(lookup_lock_);
-  add_del_cache_.clear();
-  sub_del_cache_.clear();
-  add_chunk_cache_.clear();
-  sub_chunk_cache_.clear();
-  prefix_miss_cache_.clear();
-}
-
 bool SafeBrowsingDatabaseBloom::ContainsUrl(
     const GURL& url,
     std::string* matching_list,
@@ -308,15 +155,6 @@ bool SafeBrowsingDatabaseBloom::ContainsUrl(
   return false;
 }
 
-bool SafeBrowsingDatabaseBloom::NeedToCheckUrl(const GURL& url) {
-  // Since everything is in the bloom filter, doing anything here would wind
-  // up just duplicating work that would happen in ContainsURL.
-  // It's possible that we may want to add a hostkey-based first-level cache
-  // on the front of this to minimize hash generation, but we'll need to do
-  // some measurements to verify that.
-  return true;
-}
-
 void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name,
                                              std::deque<SBChunk>* chunks) {
   if (chunks->empty())
@@ -374,6 +212,79 @@ void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name,
     chunk_inserted_callback_->Run();
 }
 
+void SafeBrowsingDatabaseBloom::DeleteChunks(
+    std::vector<SBChunkDelete>* chunk_deletes) {
+  if (chunk_deletes->empty())
+    return;
+
+  int list_id = safe_browsing_util::GetListId(chunk_deletes->front().list_name);
+
+  for (size_t i = 0; i < chunk_deletes->size(); ++i) {
+    const SBChunkDelete& chunk = (*chunk_deletes)[i];
+    std::vector<int> chunk_numbers;
+    RangesToChunks(chunk.chunk_del, &chunk_numbers);
+    for (size_t del = 0; del < chunk_numbers.size(); ++del) {
+      int encoded_chunk = EncodeChunkId(chunk_numbers[del], list_id);
+      if (chunk.is_sub_del)
+        sub_del_cache_.insert(encoded_chunk);
+      else
+        add_del_cache_.insert(encoded_chunk);
+    }
+  }
+
+  delete chunk_deletes;
+}
+
+void SafeBrowsingDatabaseBloom::GetListsInfo(
+    std::vector<SBListChunkRanges>* lists) {
+  DCHECK(lists);
+  lists->clear();
+
+  ReadChunkNumbers();
+
+  lists->push_back(SBListChunkRanges(safe_browsing_util::kMalwareList));
+  GetChunkIds(safe_browsing_util::MALWARE, ADD_CHUNK, &lists->back().adds);
+  GetChunkIds(safe_browsing_util::MALWARE, SUB_CHUNK, &lists->back().subs);
+
+  lists->push_back(SBListChunkRanges(safe_browsing_util::kPhishingList));
+  GetChunkIds(safe_browsing_util::PHISH, ADD_CHUNK, &lists->back().adds);
+  GetChunkIds(safe_browsing_util::PHISH, SUB_CHUNK, &lists->back().subs);
+
+  return;
+}
+
+void SafeBrowsingDatabaseBloom::CacheHashResults(
+    const std::vector<SBPrefix>& prefixes,
+    const std::vector<SBFullHashResult>& full_hits) {
+  AutoLock lock(lookup_lock_);
+
+  if (full_hits.empty()) {
+    // These prefixes returned no results, so we store them in order to prevent
+    // asking for them again. We flush this cache at the next update.
+    for (std::vector<SBPrefix>::const_iterator it = prefixes.begin();
+         it != prefixes.end(); ++it) {
+      prefix_miss_cache_.insert(*it);
+    }
+    return;
+  }
+
+  const Time now = Time::Now();
+  for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin();
+       it != full_hits.end(); ++it) {
+    SBPrefix prefix = it->hash.prefix;
+    HashList& entries = (*hash_cache_)[prefix];
+    HashCacheEntry entry;
+    entry.received = now;
+    entry.list_id = safe_browsing_util::GetListId(it->list_name);
+    entry.add_chunk_id = EncodeChunkId(it->add_chunk_id, entry.list_id);
+    entry.full_hash = it->hash;
+    entries.push_back(entry);
+
+    // Also push a copy to the pending write queue.
+    pending_full_hashes_.push_back(entry);
+  }
+}
+
 bool SafeBrowsingDatabaseBloom::UpdateStarted() {
   DCHECK(insert_transaction_.get() == NULL);
 
@@ -401,219 +312,144 @@ void SafeBrowsingDatabaseBloom::UpdateFinished(bool update_succeeded) {
   ClearUpdateCaches();
 }
 
-void SafeBrowsingDatabaseBloom::InsertAdd(SBPrefix host, SBEntry* entry) {
-  STATS_COUNTER("SB.HostInsert", 1);
-  int encoded = EncodeChunkId(entry->chunk_id(), entry->list_id());
-
-  if (entry->type() == SBEntry::ADD_FULL_HASH) {
-    base::Time receive_time = base::Time::Now();
-    for (int i = 0; i < entry->prefix_count(); ++i) {
-      SBFullHash full_hash = entry->FullHashAt(i);
-      SBPrefix prefix = full_hash.prefix;
-      InsertAddPrefix(prefix, encoded);
-      InsertAddFullHash(prefix, encoded, receive_time, full_hash);
-    }
-    return;
-  }
+bool SafeBrowsingDatabaseBloom::Open() {
+  if (db_)
+    return true;
 
-  // This entry contains only regular (32 bit) prefixes.
-  int count = entry->prefix_count();
-  if (count == 0) {
-    InsertAddPrefix(host, encoded);
-  } else {
-    for (int i = 0; i < count; i++) {
-      SBPrefix prefix = entry->PrefixAt(i);
-      InsertAddPrefix(prefix, encoded);
-    }
+  if (OpenSqliteDb(filename_, &db_) != SQLITE_OK) {
+    sqlite3_close(db_);
+    db_ = NULL;
+    return false;
   }
-}
 
-void SafeBrowsingDatabaseBloom::InsertAddPrefix(SBPrefix prefix,
-                                                int encoded_chunk) {
-  STATS_COUNTER("SB.PrefixAdd", 1);
-  std::string sql = "INSERT INTO add_prefix (chunk, prefix) VALUES (?, ?)";
-  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
-  if (!statement.is_valid()) {
-    NOTREACHED();
-    return;
-  }
-  statement->bind_int(0, encoded_chunk);
-  statement->bind_int(1, prefix);
-  int rv = statement->step();
-  statement->reset();
-  if (rv == SQLITE_CORRUPT) {
-    HandleCorruptDatabase();
-  } else {
-    DCHECK(rv == SQLITE_DONE);
+  // Run the database in exclusive mode. Nobody else should be accessing the
+  // database while we're running, and this will give somewhat improved perf.
+  sqlite3_exec(db_, "PRAGMA locking_mode=EXCLUSIVE", NULL, NULL, NULL);
+
+  statement_cache_.reset(new SqliteStatementCache(db_));
+
+  if (!DoesSqliteTableExist(db_, "add_prefix")) {
+    if (!CreateTables()) {
+      // Database could be corrupt, try starting from scratch.
+      if (!ResetDatabase())
+        return false;
+    }
+  } else if (!CheckCompatibleVersion()) {
+    if (!ResetDatabase())
+      return false;
   }
-  add_count_++;
+
+  return true;
 }
 
-void SafeBrowsingDatabaseBloom::InsertAddFullHash(SBPrefix prefix,
-                                                  int encoded_chunk,
-                                                  base::Time receive_time,
-                                                  SBFullHash full_prefix) {
-  STATS_COUNTER("SB.PrefixAddFull", 1);
-  std::string sql = "INSERT INTO add_full_hash "
-                    "(chunk, prefix, receive_time, full_hash) "
-                    "VALUES (?,?,?,?)";
-  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
-  if (!statement.is_valid()) {
-    NOTREACHED();
-    return;
-  }
+bool SafeBrowsingDatabaseBloom::Close() {
+  if (!db_)
+    return true;
 
-  statement->bind_int(0, encoded_chunk);
-  statement->bind_int(1, prefix);
-  statement->bind_int64(2, receive_time.ToTimeT());
-  statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash));
-  int rv = statement->step();
-  statement->reset();
-  if (rv == SQLITE_CORRUPT) {
-    HandleCorruptDatabase();
-  } else {
-    DCHECK(rv == SQLITE_DONE);
-  }
+  insert_transaction_.reset();
+  statement_cache_.reset();  // Must free statements before closing DB.
+  bool result = sqlite3_close(db_) == SQLITE_OK;
+  db_ = NULL;
+
+  return result;
 }
 
-void SafeBrowsingDatabaseBloom::InsertSub(
-    int chunk_id, SBPrefix host, SBEntry* entry) {
-  STATS_COUNTER("SB.HostDelete", 1);
-  int encoded = EncodeChunkId(chunk_id, entry->list_id());
-  int encoded_add;
+bool SafeBrowsingDatabaseBloom::CreateTables() {
+  SQLTransaction transaction(db_);
+  transaction.Begin();
 
-  if (entry->type() == SBEntry::SUB_FULL_HASH) {
-    for (int i = 0; i < entry->prefix_count(); ++i) {
-      SBFullHash full_hash = entry->FullHashAt(i);
-      SBPrefix prefix = full_hash.prefix;
-      encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), entry->list_id());
-      InsertSubPrefix(prefix, encoded, encoded_add);
-      InsertSubFullHash(prefix, encoded, encoded_add, full_hash, false);
-    }
-  } else {
-    // We have prefixes.
-    int count = entry->prefix_count();
-    if (count == 0) {
-      encoded_add = EncodeChunkId(entry->chunk_id(), entry->list_id());
-      InsertSubPrefix(host, encoded, encoded_add);
-    } else {
-      for (int i = 0; i < count; i++) {
-        SBPrefix prefix = entry->PrefixAt(i);
-        encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i),
-                                    entry->list_id());
-        InsertSubPrefix(prefix, encoded, encoded_add);
-      }
-    }
+  // Store 32 bit add prefixes here.
+  if (sqlite3_exec(db_, "CREATE TABLE add_prefix ("
+      "chunk INTEGER,"
+      "prefix INTEGER)",
+      NULL, NULL, NULL) != SQLITE_OK) {
+    return false;
   }
-}
 
-void SafeBrowsingDatabaseBloom::InsertSubPrefix(SBPrefix prefix,
-                                                int encoded_chunk,
-                                                int encoded_add_chunk) {
-  STATS_COUNTER("SB.PrefixSub", 1);
-  std::string sql =
-    "INSERT INTO sub_prefix (chunk, add_chunk, prefix) VALUES (?,?,?)";
-  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
-  if (!statement.is_valid()) {
-    NOTREACHED();
-    return;
+  // Store 32 bit sub prefixes here.
+  if (sqlite3_exec(db_, "CREATE TABLE sub_prefix ("
+                   "chunk INTEGER,"
+                   "add_chunk INTEGER,"
+                   "prefix INTEGER)",
+                   NULL, NULL, NULL) != SQLITE_OK) {
+    return false;
   }
-  statement->bind_int(0, encoded_chunk);
-  statement->bind_int(1, encoded_add_chunk);
-  statement->bind_int(2, prefix);
-  int rv = statement->step();
-  statement->reset();
-  if (rv == SQLITE_CORRUPT) {
-    HandleCorruptDatabase();
-  } else {
-    DCHECK(rv == SQLITE_DONE);
+
+  // Store 256 bit add full hashes (and GetHash results) here.
+  if (sqlite3_exec(db_, "CREATE TABLE add_full_hash ("
+                   "chunk INTEGER,"
+                   "prefix INTEGER,"
+                   "receive_time INTEGER,"
+                   "full_hash BLOB)",
+                   NULL, NULL, NULL) != SQLITE_OK) {
+    return false;
   }
-}
 
-void SafeBrowsingDatabaseBloom::InsertSubFullHash(SBPrefix prefix,
-                                                  int encoded_chunk,
-                                                  int encoded_add_chunk,
-                                                  SBFullHash full_prefix,
-                                                  bool use_temp_table) {
-  STATS_COUNTER("SB.PrefixSubFull", 1);
-  std::string sql = "INSERT INTO ";
-  if (use_temp_table) {
-    sql += "sub_full_tmp";
-  } else {
-    sql += "sub_full_hash";
+  // Store 256 bit sub full hashes here.
+  if (sqlite3_exec(db_, "CREATE TABLE sub_full_hash ("
+                   "chunk INTEGER,"
+                   "add_chunk INTEGER,"
+                   "prefix INTEGER,"
+                   "full_hash BLOB)",
+                   NULL, NULL, NULL) != SQLITE_OK) {
+    return false;
   }
-  sql += " (chunk, add_chunk, prefix, full_hash) VALUES (?,?,?,?)";
 
-  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
-  if (!statement.is_valid()) {
-    NOTREACHED();
-    return;
+  // Store all the add and sub chunk numbers we receive. We cannot just rely on
+  // the prefix tables to generate these lists, since some chunks will have zero
+  // entries (and thus no prefixes), or potentially an add chunk can have all of
+  // its entries sub'd without receiving an AddDel, or a sub chunk might have
+  // been entirely consumed by adds. In these cases, we still have to report the
+  // chunk number but it will not have any prefixes in the prefix tables.
+  //
+  // TODO(paulg): Investigate storing the chunks as a string of ChunkRanges, one
+  // string for each of phish-add, phish-sub, malware-add, malware-sub. This
+  // might be better performance when the number of chunks is large, and is the
+  // natural format for the update request.
+  if (sqlite3_exec(db_, "CREATE TABLE add_chunks ("
+                   "chunk INTEGER PRIMARY KEY)",
+                   NULL, NULL, NULL) != SQLITE_OK) {
+    return false;
   }
-  statement->bind_int(0, encoded_chunk);
-  statement->bind_int(1, encoded_add_chunk);
-  statement->bind_int(2, prefix);
-  statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash));
-  int rv = statement->step();
-  statement->reset();
-  if (rv == SQLITE_CORRUPT) {
-    HandleCorruptDatabase();
-  } else {
-    DCHECK(rv == SQLITE_DONE);
+
+  if (sqlite3_exec(db_, "CREATE TABLE sub_chunks ("
+                   "chunk INTEGER PRIMARY KEY)",
+                   NULL, NULL, NULL) != SQLITE_OK) {
+    return false;
   }
-}
 
-void SafeBrowsingDatabaseBloom::ReadFullHash(SqliteCompiledStatement& statement,
-                                             int column,
-                                             SBFullHash* full_hash) {
-  DCHECK(full_hash);
-  std::vector<unsigned char> blob;
-  statement->column_blob_as_vector(column, &blob);
-  DCHECK(blob.size() == sizeof(SBFullHash));
-  memcpy(full_hash->full_hash, &blob[0], sizeof(SBFullHash));
-}
+  std::string version = "PRAGMA user_version=";
+  version += StringPrintf("%d", kDatabaseVersion);
 
-// TODO(paulg): Look for a less expensive way to maintain add_count_? If we move
-// to a native file format, we can just cache the count in the file and not have
-// to scan at all.
-int SafeBrowsingDatabaseBloom::GetAddPrefixCount() {
-  SQLITE_UNIQUE_STATEMENT(count, *statement_cache_,
-                          "SELECT count(*) FROM add_prefix");
-  if (!count.is_valid()) {
+  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, version.c_str());
+  if (!statement.is_valid()) {
     NOTREACHED();
-    return 0;
+    return false;
   }
-  int rv = count->step();
-  int add_count = 0;
-  if (rv == SQLITE_ROW)
-    add_count = count->column_int(0);
-  else if (rv == SQLITE_CORRUPT)
-    HandleCorruptDatabase();
 
-  return add_count;
-}
+  if (statement->step() != SQLITE_DONE)
+    return false;
 
-void SafeBrowsingDatabaseBloom::DeleteChunks(
-    std::vector<SBChunkDelete>* chunk_deletes) {
-  if (chunk_deletes->empty())
-    return;
+  transaction.Commit();
+  add_count_ = 0;
 
-  int list_id = safe_browsing_util::GetListId(chunk_deletes->front().list_name);
+  return true;
+}
 
-  for (size_t i = 0; i < chunk_deletes->size(); ++i) {
-    const SBChunkDelete& chunk = (*chunk_deletes)[i];
-    std::vector<int> chunk_numbers;
-    RangesToChunks(chunk.chunk_del, &chunk_numbers);
-    for (size_t del = 0; del < chunk_numbers.size(); ++del) {
-      int encoded_chunk = EncodeChunkId(chunk_numbers[del], list_id);
-      if (chunk.is_sub_del)
-        sub_del_cache_.insert(encoded_chunk);
-      else
-        add_del_cache_.insert(encoded_chunk);
-    }
+bool SafeBrowsingDatabaseBloom::CheckCompatibleVersion() {
+  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+                          "PRAGMA user_version");
+  if (!statement.is_valid()) {
+    NOTREACHED();
+    return false;
   }
 
-  delete chunk_deletes;
-}
+  int result = statement->step();
+  if (result != SQLITE_ROW)
+    return false;
+
+  return statement->column_int(0) == kDatabaseVersion;
+}
 
 bool SafeBrowsingDatabaseBloom::ChunkExists(int list_id,
                                             ChunkType type,
@@ -655,140 +491,126 @@ void SafeBrowsingDatabaseBloom::GetChunkIds(
   RangesToString(ranges, list);
 }
 
-void SafeBrowsingDatabaseBloom::GetListsInfo(
-    std::vector<SBListChunkRanges>* lists) {
-  DCHECK(lists);
-  lists->clear();
+void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
+#if defined(OS_WIN)
+  // For measuring the amount of IO during the bloom filter build.
+  IoCounters io_before, io_after;
+  base::ProcessHandle handle = base::Process::Current().handle();
+  scoped_ptr<base::ProcessMetrics> metric;
+  metric.reset(base::ProcessMetrics::CreateProcessMetrics(handle));
+  metric->GetIOCounters(&io_before);
+#endif
 
-  ReadChunkNumbers();
+  Time before = Time::Now();
 
-  lists->push_back(SBListChunkRanges(safe_browsing_util::kMalwareList));
-  GetChunkIds(safe_browsing_util::MALWARE, ADD_CHUNK, &lists->back().adds);
-  GetChunkIds(safe_browsing_util::MALWARE, SUB_CHUNK, &lists->back().subs);
+  // Get all the pending GetHash results and write them to disk.
+  HashList pending_hashes;
+  {
+    AutoLock lock(lookup_lock_);
+    pending_hashes.swap(pending_full_hashes_);
+  }
+  WriteFullHashList(pending_hashes, true);
 
-  lists->push_back(SBListChunkRanges(safe_browsing_util::kPhishingList));
-  GetChunkIds(safe_browsing_util::PHISH, ADD_CHUNK, &lists->back().adds);
-  GetChunkIds(safe_browsing_util::PHISH, SUB_CHUNK, &lists->back().subs);
+  add_count_ = GetAddPrefixCount();
+  if (add_count_ == 0) {
+    AutoLock lock(lookup_lock_);
+    bloom_filter_ = NULL;
+    return;
+  }
 
-  return;
-}
+  scoped_array<SBPair> adds_array(new SBPair[add_count_]);
+  SBPair* adds = adds_array.get();
 
-void SafeBrowsingDatabaseBloom::ReadChunkNumbers() {
-  add_chunk_cache_.clear();
-  sub_chunk_cache_.clear();
+  if (!BuildAddPrefixList(adds))
+    return;
 
-  // Read in the add chunk numbers.
-  SQLITE_UNIQUE_STATEMENT(read_adds, *statement_cache_,
-                          "SELECT chunk FROM add_chunks");
-  if (!read_adds.is_valid()) {
-    NOTREACHED();
+  // Build the full add cache, which includes full hash updates and GetHash
+  // results. Subs may remove some of these entries.
+  scoped_ptr<HashCache> add_cache(new HashCache);
+  if (!BuildAddFullHashCache(add_cache.get()))
     return;
-  }
 
-  while (true) {
-    int rv = read_adds->step();
-    if (rv != SQLITE_ROW) {
-      if (rv == SQLITE_CORRUPT)
-        HandleCorruptDatabase();
-      break;
-    }
-    add_chunk_cache_.insert(read_adds->column_int(0));
-  }
+  scoped_ptr<HashCache> sub_cache(new HashCache);
+  if (!BuildSubFullHashCache(sub_cache.get()))
+    return;
 
-  // Read in the sub chunk numbers.
-  SQLITE_UNIQUE_STATEMENT(read_subs, *statement_cache_,
-                          "SELECT chunk FROM sub_chunks");
-  if (!read_subs.is_valid()) {
-    NOTREACHED();
+  // Used to track which adds have been subbed out. The vector<bool> is actually
+  // a bitvector so the size is as small as we can get.
+  std::vector<bool> adds_removed;
+  adds_removed.resize(add_count_, false);
+
+  // Flag any add as removed if there is a matching sub.
+  int subs = 0;
+  if (!RemoveSubs(adds, &adds_removed, add_cache.get(), sub_cache.get(), &subs))
     return;
-  }
 
-  while (true) {
-    int rv = read_subs->step();
-    if (rv != SQLITE_ROW) {
-      if (rv == SQLITE_CORRUPT)
-        HandleCorruptDatabase();
-      break;
-    }
-    sub_chunk_cache_.insert(read_subs->column_int(0));
-  }
-}
+  // Prepare the database for writing out our remaining add and sub prefixes.
+  if (!UpdateTables())
+    return;
 
-// Write all the chunk numbers to the add_chunks and sub_chunks tables.
-bool SafeBrowsingDatabaseBloom::WriteChunkNumbers() {
-  // Delete the contents of the add chunk table.
-  SQLITE_UNIQUE_STATEMENT(del_add_chunk, *statement_cache_,
-                          "DELETE FROM add_chunks");
-  if (!del_add_chunk.is_valid()) {
-    NOTREACHED();
-    return false;
-  }
-  int rv = del_add_chunk->step();
-  if (rv == SQLITE_CORRUPT) {
-    HandleCorruptDatabase();
-    return false;
-  }
-  DCHECK(rv == SQLITE_DONE);
+  // Write out the remaining add prefixes to the filter and database.
+  int new_count;
+  scoped_refptr<BloomFilter> filter;
+  if (!WritePrefixes(adds, adds_removed, &new_count, &filter))
+    return;
 
-  SQLITE_UNIQUE_STATEMENT(write_adds, *statement_cache_,
-                          "INSERT INTO add_chunks (chunk) VALUES (?)");
-  if (!write_adds.is_valid()) {
-    NOTREACHED();
-    return false;
-  }
+  // Write out the remaining full hash adds and subs to the database.
+  WriteFullHashes(add_cache.get(), true);
+  WriteFullHashes(sub_cache.get(), false);
 
-  // Write all the add chunks from the cache to the database.
-  std::set<int>::const_iterator it = add_chunk_cache_.begin();
-  for (; it != add_chunk_cache_.end(); ++it) {
-    if (add_del_cache_.find(*it) != add_del_cache_.end())
-      continue;  // This chunk has been deleted.
-    write_adds->bind_int(0, *it);
-    rv = write_adds->step();
-    if (rv == SQLITE_CORRUPT) {
-      HandleCorruptDatabase();
-      return false;
-    }
-    DCHECK(rv == SQLITE_DONE);
-    write_adds->reset();
-  }
+  // Save the chunk numbers we've received to the database for reporting in
+  // future update requests.
+  if (!WriteChunkNumbers())
+    return;
 
-  // Delete the contents of the sub chunk table.
-  SQLITE_UNIQUE_STATEMENT(del_sub_chunk, *statement_cache_,
-                          "DELETE FROM sub_chunks");
-  if (!del_sub_chunk.is_valid()) {
-    NOTREACHED();
-    return false;
-  }
-  rv = del_sub_chunk->step();
-  if (rv == SQLITE_CORRUPT) {
-    HandleCorruptDatabase();
-    return false;
+  // Commit all the changes to the database.
+  int rv = insert_transaction_->Commit();
+  if (rv != SQLITE_OK) {
+    NOTREACHED() << "SafeBrowsing update transaction failed to commit.";
+    UMA_HISTOGRAM_COUNTS("SB2.FailedUpdate", 1);
+    return;
   }
-  DCHECK(rv == SQLITE_DONE);
 
-  SQLITE_UNIQUE_STATEMENT(write_subs, *statement_cache_,
-                          "INSERT INTO sub_chunks (chunk) VALUES (?)");
-  if (!write_subs.is_valid()) {
-    NOTREACHED();
-    return false;
+  // Swap in the newly built filter and cache. If there were any matching subs,
+  // the size (add_count_) will be smaller.
+  {
+    AutoLock lock(lookup_lock_);
+    add_count_ = new_count;
+    bloom_filter_.swap(filter);
+    hash_cache_.swap(add_cache);
   }
 
-  // Write all the sub chunks from the cache to the database.
-  it = sub_chunk_cache_.begin();
-  for (; it != sub_chunk_cache_.end(); ++it) {
-    if (sub_del_cache_.find(*it) != sub_del_cache_.end())
-      continue;  // This chunk has been deleted.
-    write_subs->bind_int(0, *it);
-    rv = write_subs->step();
-    if (rv == SQLITE_CORRUPT) {
-      HandleCorruptDatabase();
-      return false;
-    }
-    DCHECK(rv == SQLITE_DONE);
-    write_subs->reset();
-  }
+  TimeDelta bloom_gen = Time::Now() - before;
 
-  return true;
+  // Persist the bloom filter to disk.
+  WriteBloomFilter();
+
+  // Gather statistics.
+#if defined(OS_WIN)
+  metric->GetIOCounters(&io_after);
+  UMA_HISTOGRAM_COUNTS("SB2.BuildReadBytes",
+                       static_cast<int>(io_after.ReadTransferCount -
+                                        io_before.ReadTransferCount));
+  UMA_HISTOGRAM_COUNTS("SB2.BuildWriteBytes",
+                       static_cast<int>(io_after.WriteTransferCount -
+                                        io_before.WriteTransferCount));
+  UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
+                       static_cast<int>(io_after.ReadOperationCount -
+                                        io_before.ReadOperationCount));
+  UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
+                       static_cast<int>(io_after.WriteOperationCount -
+                                        io_before.WriteOperationCount));
+#endif
+  SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in "
+                << bloom_gen.InMilliseconds()
+                << " ms total.  prefix count: "<< add_count_;
+  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen);
+  UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_count_);
+  UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", subs);
+  UMA_HISTOGRAM_COUNTS("SB2.FilterSize", bloom_filter_->size());
+  int64 size_64;
+  if (file_util::GetFileSize(filename_, &size_64))
+    UMA_HISTOGRAM_COUNTS("SB2.DatabaseBytes", static_cast<int>(size_64));
 }
 
 int SafeBrowsingDatabaseBloom::PairCompare(const void* arg1, const void* arg2) {
@@ -831,42 +653,150 @@ bool SafeBrowsingDatabaseBloom::BuildAddPrefixList(SBPair* adds) {
   return true;
 }
 
-bool SafeBrowsingDatabaseBloom::RemoveSubs(
-    SBPair* adds, std::vector<bool>* adds_removed,
-    HashCache* add_cache, HashCache* sub_cache, int* subs) {
-  DCHECK(add_cache && sub_cache && subs);
+bool SafeBrowsingDatabaseBloom::BuildAddFullHashCache(HashCache* add_cache) {
+  add_cache->clear();
 
-  // Read through sub_prefix and zero out add_prefix entries that match.
-  SQLITE_UNIQUE_STATEMENT(sub_prefix, *statement_cache_,
-                          "SELECT chunk, add_chunk, prefix FROM sub_prefix");
-  if (!sub_prefix.is_valid()) {
+  // Read all full add entries to the cache.
+  SQLITE_UNIQUE_STATEMENT(
+      full_add_entry,
+      *statement_cache_,
+      "SELECT chunk, prefix, receive_time, full_hash FROM add_full_hash");
+  if (!full_add_entry.is_valid()) {
     NOTREACHED();
     return false;
   }
 
-  // Create a temporary sub prefix table. We add entries to it as we scan the
-  // sub_prefix table looking for adds to remove. Only entries that don't
-  // remove an add written to this table. When we're done filtering, we replace
-  // sub_prefix with this table.
-  if (sqlite3_exec(db_, "CREATE TABLE sub_prefix_tmp ("
-                   "chunk INTEGER,"
-                   "add_chunk INTEGER,"
-                   "prefix INTEGER)",
-                   NULL, NULL, NULL) != SQLITE_OK) {
-    return false;
-  }
-
-  // Create a temporary sub full hash table, similar to the above prefix table.
-  if (sqlite3_exec(db_, "CREATE TABLE sub_full_tmp ("
-                   "chunk INTEGER,"
-                   "add_chunk INTEGER,"
-                   "prefix INTEGER,"
-                   "full_hash BLOB)",
-                   NULL, NULL, NULL) != SQLITE_OK) {
-    return false;
-  }
-
-  SQLITE_UNIQUE_STATEMENT(
+  int rv;
+  while (true) {
+    rv = full_add_entry->step();
+    if (rv != SQLITE_ROW) {
+      if (rv == SQLITE_CORRUPT) {
+        HandleCorruptDatabase();
+        return false;
+      }
+      break;
+    }
+    HashCacheEntry entry;
+    entry.add_chunk_id = full_add_entry->column_int(0);
+    if (add_del_cache_.find(entry.add_chunk_id) != add_del_cache_.end())
+      continue;  // This entry's chunk was deleted so we skip it.
+    SBPrefix prefix = full_add_entry->column_int(1);
+    entry.received = base::Time::FromTimeT(full_add_entry->column_int64(2));
+    int chunk, list_id;
+    DecodeChunkId(entry.add_chunk_id, &chunk, &list_id);
+    entry.list_id = list_id;
+    ReadFullHash(&full_add_entry, 3, &entry.full_hash);
+    HashList& entries = (*add_cache)[prefix];
+    entries.push_back(entry);
+  }
+
+  // Clear the full add table.
+  SQLITE_UNIQUE_STATEMENT(full_add_drop, *statement_cache_,
+                          "DELETE FROM add_full_hash");
+  if (!full_add_drop.is_valid()) {
+    NOTREACHED();
+    return false;
+  }
+  rv = full_add_drop->step();
+  if (rv == SQLITE_CORRUPT) {
+    HandleCorruptDatabase();
+    return false;
+  }
+  DCHECK(rv == SQLITE_DONE);
+
+  return true;
+}
+
+bool SafeBrowsingDatabaseBloom::BuildSubFullHashCache(HashCache* sub_cache) {
+  sub_cache->clear();
+
+  // Read all full sub entries to the cache.
+  SQLITE_UNIQUE_STATEMENT(
+      full_sub_entry,
+      *statement_cache_,
+      "SELECT chunk, add_chunk, prefix, full_hash FROM sub_full_hash");
+  if (!full_sub_entry.is_valid()) {
+    NOTREACHED();
+    return false;
+  }
+
+  int rv;
+  while (true) {
+    rv = full_sub_entry->step();
+    if (rv != SQLITE_ROW) {
+      if (rv == SQLITE_CORRUPT) {
+        HandleCorruptDatabase();
+        return false;
+      }
+      break;
+    }
+    HashCacheEntry entry;
+    entry.sub_chunk_id = full_sub_entry->column_int(0);
+    if (sub_del_cache_.find(entry.sub_chunk_id) != sub_del_cache_.end())
+      continue;  // This entry's chunk was deleted so we skip it.
+    entry.add_chunk_id = full_sub_entry->column_int(1);
+    SBPrefix prefix = full_sub_entry->column_int(2);
+    int chunk, list_id;
+    DecodeChunkId(entry.add_chunk_id, &chunk, &list_id);
+    entry.list_id = list_id;
+    ReadFullHash(&full_sub_entry, 3, &entry.full_hash);
+    HashList& entries = (*sub_cache)[prefix];
+    entries.push_back(entry);
+  }
+
+  // Clear the full sub table.
+  SQLITE_UNIQUE_STATEMENT(full_sub_drop, *statement_cache_,
+                          "DELETE FROM sub_full_hash");
+  if (!full_sub_drop.is_valid()) {
+    NOTREACHED();
+    return false;
+  }
+  rv = full_sub_drop->step();
+  if (rv == SQLITE_CORRUPT) {
+    HandleCorruptDatabase();
+    return false;
+  }
+  DCHECK(rv == SQLITE_DONE);
+
+  return true;
+}
+
+bool SafeBrowsingDatabaseBloom::RemoveSubs(
+    SBPair* adds, std::vector<bool>* adds_removed,
+    HashCache* add_cache, HashCache* sub_cache, int* subs) {
+  DCHECK(add_cache && sub_cache && subs);
+
+  // Read through sub_prefix and zero out add_prefix entries that match.
+  SQLITE_UNIQUE_STATEMENT(sub_prefix, *statement_cache_,
+                          "SELECT chunk, add_chunk, prefix FROM sub_prefix");
+  if (!sub_prefix.is_valid()) {
+    NOTREACHED();
+    return false;
+  }
+
+  // Create a temporary sub prefix table. We add entries to it as we scan the
+  // sub_prefix table looking for adds to remove. Only entries that don't
+  // remove an add written to this table. When we're done filtering, we replace
+  // sub_prefix with this table.
+  if (sqlite3_exec(db_, "CREATE TABLE sub_prefix_tmp ("
+                   "chunk INTEGER,"
+                   "add_chunk INTEGER,"
+                   "prefix INTEGER)",
+                   NULL, NULL, NULL) != SQLITE_OK) {
+    return false;
+  }
+
+  // Create a temporary sub full hash table, similar to the above prefix table.
+  if (sqlite3_exec(db_, "CREATE TABLE sub_full_tmp ("
+                   "chunk INTEGER,"
+                   "add_chunk INTEGER,"
+                   "prefix INTEGER,"
+                   "full_hash BLOB)",
+                   NULL, NULL, NULL) != SQLITE_OK) {
+    return false;
+  }
+
+  SQLITE_UNIQUE_STATEMENT(
       sub_prefix_tmp,
       *statement_cache_,
       "INSERT INTO sub_prefix_tmp (chunk, add_chunk, prefix) VALUES (?,?,?)");
@@ -1094,344 +1024,395 @@ void SafeBrowsingDatabaseBloom::WriteFullHashList(const HashList& hash_list,
   }
 }
 
-bool SafeBrowsingDatabaseBloom::BuildAddFullHashCache(HashCache* add_cache) {
-  add_cache->clear();
+void SafeBrowsingDatabaseBloom::GetCachedFullHashes(
+    const std::vector<SBPrefix>* prefix_hits,
+    std::vector<SBFullHashResult>* full_hits,
+    Time last_update) {
+  DCHECK(prefix_hits && full_hits);
+  lookup_lock_.AssertAcquired();
 
-  // Read all full add entries to the cache.
-  SQLITE_UNIQUE_STATEMENT(
-      full_add_entry,
-      *statement_cache_,
-      "SELECT chunk, prefix, receive_time, full_hash FROM add_full_hash");
-  if (!full_add_entry.is_valid()) {
-    NOTREACHED();
-    return false;
-  }
+  Time max_age = Time::Now() - TimeDelta::FromMinutes(kMaxStalenessMinutes);
 
-  int rv;
-  while (true) {
-    rv = full_add_entry->step();
-    if (rv != SQLITE_ROW) {
-      if (rv == SQLITE_CORRUPT) {
-        HandleCorruptDatabase();
-        return false;
+  for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin();
+       it != prefix_hits->end(); ++it) {
+    HashCache::iterator hit = hash_cache_->find(*it);
+    if (hit != hash_cache_->end()) {
+      HashList& entries = hit->second;
+      HashList::iterator eit = entries.begin();
+      while (eit != entries.end()) {
+        // An entry is valid if we've received an update in the past 45 minutes,
+        // or if this particular GetHash was received in the past 45 minutes.
+        // If an entry is does not meet the time criteria above, we are not
+        // allowed to use it since it might have become stale. We keep it
+        // around, though, and may be able to use it in the future once we
+        // receive the next update (that doesn't sub it).
+        if (max_age < last_update || eit->received > max_age) {
+          SBFullHashResult full_hash;
+          full_hash.hash = eit->full_hash;
+          full_hash.list_name = safe_browsing_util::GetListName(eit->list_id);
+          full_hash.add_chunk_id = eit->add_chunk_id;
+          full_hits->push_back(full_hash);
+        }
+        ++eit;
       }
-      break;
+
+      if (entries.empty())
+        hash_cache_->erase(hit);
     }
-    HashCacheEntry entry;
-    entry.add_chunk_id = full_add_entry->column_int(0);
-    if (add_del_cache_.find(entry.add_chunk_id) != add_del_cache_.end())
-      continue;  // This entry's chunk was deleted so we skip it.
-    SBPrefix prefix = full_add_entry->column_int(1);
-    entry.received = base::Time::FromTimeT(full_add_entry->column_int64(2));
-    int chunk, list_id;
-    DecodeChunkId(entry.add_chunk_id, &chunk, &list_id);
-    entry.list_id = list_id;
-    ReadFullHash(full_add_entry, 3, &entry.full_hash);
-    HashList& entries = (*add_cache)[prefix];
-    entries.push_back(entry);
   }
+}
 
-  // Clear the full add table.
-  SQLITE_UNIQUE_STATEMENT(full_add_drop, *statement_cache_,
-                          "DELETE FROM add_full_hash");
-  if (!full_add_drop.is_valid()) {
-    NOTREACHED();
-    return false;
-  }
-  rv = full_add_drop->step();
-  if (rv == SQLITE_CORRUPT) {
-    HandleCorruptDatabase();
-    return false;
+bool SafeBrowsingDatabaseBloom::ClearCachedEntry(SBPrefix prefix,
+                                                 int add_chunk,
+                                                 HashCache* hash_cache) {
+  bool match = false;
+  HashCache::iterator it = hash_cache->find(prefix);
+  if (it == hash_cache->end())
+    return match;
+
+  HashList& entries = it->second;
+  HashList::iterator lit = entries.begin();
+  while (lit != entries.end()) {
+    HashCacheEntry& entry = *lit;
+    if (entry.add_chunk_id == add_chunk) {
+      lit = entries.erase(lit);
+      match = true;
+      continue;
+    }
+    ++lit;
   }
-  DCHECK(rv == SQLITE_DONE);
 
-  return true;
+  if (entries.empty())
+    hash_cache->erase(it);
+
+  return match;
 }
 
-bool SafeBrowsingDatabaseBloom::BuildSubFullHashCache(HashCache* sub_cache) {
-  sub_cache->clear();
+void SafeBrowsingDatabaseBloom::HandleCorruptDatabase() {
+  MessageLoop::current()->PostTask(FROM_HERE,
+      reset_factory_.NewRunnableMethod(
+          &SafeBrowsingDatabaseBloom::OnHandleCorruptDatabase));
+}
 
-  // Read all full sub entries to the cache.
-  SQLITE_UNIQUE_STATEMENT(
-      full_sub_entry,
-      *statement_cache_,
-      "SELECT chunk, add_chunk, prefix, full_hash FROM sub_full_hash");
-  if (!full_sub_entry.is_valid()) {
-    NOTREACHED();
-    return false;
-  }
+void SafeBrowsingDatabaseBloom::OnHandleCorruptDatabase() {
+  ResetDatabase();
+  DCHECK(false) << "SafeBrowsing database was corrupt and reset";
+}
 
-  int rv;
-  while (true) {
-    rv = full_sub_entry->step();
-    if (rv != SQLITE_ROW) {
-      if (rv == SQLITE_CORRUPT) {
-        HandleCorruptDatabase();
-        return false;
-      }
-      break;
-    }
-    HashCacheEntry entry;
-    entry.sub_chunk_id = full_sub_entry->column_int(0);
-    if (sub_del_cache_.find(entry.sub_chunk_id) != sub_del_cache_.end())
-      continue;  // This entry's chunk was deleted so we skip it.
-    entry.add_chunk_id = full_sub_entry->column_int(1);
-    SBPrefix prefix = full_sub_entry->column_int(2);
-    int chunk, list_id;
-    DecodeChunkId(entry.add_chunk_id, &chunk, &list_id);
-    entry.list_id = list_id;
-    ReadFullHash(full_sub_entry, 3, &entry.full_hash);
-    HashList& entries = (*sub_cache)[prefix];
-    entries.push_back(entry);
+void SafeBrowsingDatabaseBloom::InsertAdd(SBPrefix host, SBEntry* entry) {
+  STATS_COUNTER("SB.HostInsert", 1);
+  int encoded = EncodeChunkId(entry->chunk_id(), entry->list_id());
+
+  if (entry->type() == SBEntry::ADD_FULL_HASH) {
+    base::Time receive_time = base::Time::Now();
+    for (int i = 0; i < entry->prefix_count(); ++i) {
+      SBFullHash full_hash = entry->FullHashAt(i);
+      SBPrefix prefix = full_hash.prefix;
+      InsertAddPrefix(prefix, encoded);
+      InsertAddFullHash(prefix, encoded, receive_time, full_hash);
+    }
+    return;
   }
 
-  // Clear the full sub table.
-  SQLITE_UNIQUE_STATEMENT(full_sub_drop, *statement_cache_,
-                          "DELETE FROM sub_full_hash");
-  if (!full_sub_drop.is_valid()) {
+  // This entry contains only regular (32 bit) prefixes.
+  int count = entry->prefix_count();
+  if (count == 0) {
+    InsertAddPrefix(host, encoded);
+  } else {
+    for (int i = 0; i < count; i++) {
+      SBPrefix prefix = entry->PrefixAt(i);
+      InsertAddPrefix(prefix, encoded);
+    }
+  }
+}
+
+void SafeBrowsingDatabaseBloom::InsertAddPrefix(SBPrefix prefix,
+                                                int encoded_chunk) {
+  STATS_COUNTER("SB.PrefixAdd", 1);
+  std::string sql = "INSERT INTO add_prefix (chunk, prefix) VALUES (?, ?)";
+  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
+  if (!statement.is_valid()) {
     NOTREACHED();
-    return false;
+    return;
   }
-  rv = full_sub_drop->step();
+  statement->bind_int(0, encoded_chunk);
+  statement->bind_int(1, prefix);
+  int rv = statement->step();
+  statement->reset();
   if (rv == SQLITE_CORRUPT) {
     HandleCorruptDatabase();
-    return false;
+  } else {
+    DCHECK(rv == SQLITE_DONE);
   }
-  DCHECK(rv == SQLITE_DONE);
-
-  return true;
+  add_count_++;
 }
 
-void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
-#if defined(OS_WIN)
-  // For measuring the amount of IO during the bloom filter build.
-  IoCounters io_before, io_after;
-  base::ProcessHandle handle = base::Process::Current().handle();
-  scoped_ptr<base::ProcessMetrics> metric;
-  metric.reset(base::ProcessMetrics::CreateProcessMetrics(handle));
-  metric->GetIOCounters(&io_before);
-#endif
-
-  Time before = Time::Now();
-
-  // Get all the pending GetHash results and write them to disk.
-  HashList pending_hashes;
-  {
-    AutoLock lock(lookup_lock_);
-    pending_hashes.swap(pending_full_hashes_);
-  }
-  WriteFullHashList(pending_hashes, true);
-
-  add_count_ = GetAddPrefixCount();
-  if (add_count_ == 0) {
-    AutoLock lock(lookup_lock_);
-    bloom_filter_ = NULL;
+void SafeBrowsingDatabaseBloom::InsertAddFullHash(SBPrefix prefix,
+                                                  int encoded_chunk,
+                                                  base::Time receive_time,
+                                                  SBFullHash full_prefix) {
+  STATS_COUNTER("SB.PrefixAddFull", 1);
+  std::string sql = "INSERT INTO add_full_hash "
+                    "(chunk, prefix, receive_time, full_hash) "
+                    "VALUES (?,?,?,?)";
+  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
+  if (!statement.is_valid()) {
+    NOTREACHED();
     return;
   }
 
-  scoped_array<SBPair> adds_array(new SBPair[add_count_]);
-  SBPair* adds = adds_array.get();
-
-  if (!BuildAddPrefixList(adds))
-    return;
-
-  // Build the full add cache, which includes full hash updates and GetHash
-  // results. Subs may remove some of these entries.
-  scoped_ptr<HashCache> add_cache(new HashCache);
-  if (!BuildAddFullHashCache(add_cache.get()))
-    return;
-
-  scoped_ptr<HashCache> sub_cache(new HashCache);
-  if (!BuildSubFullHashCache(sub_cache.get()))
-    return;
-
-  // Used to track which adds have been subbed out. The vector<bool> is actually
-  // a bitvector so the size is as small as we can get.
-  std::vector<bool> adds_removed;
-  adds_removed.resize(add_count_, false);
+  statement->bind_int(0, encoded_chunk);
+  statement->bind_int(1, prefix);
+  statement->bind_int64(2, receive_time.ToTimeT());
+  statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash));
+  int rv = statement->step();
+  statement->reset();
+  if (rv == SQLITE_CORRUPT) {
+    HandleCorruptDatabase();
+  } else {
+    DCHECK(rv == SQLITE_DONE);
+  }
+}
 
-  // Flag any add as removed if there is a matching sub.
-  int subs = 0;
-  if (!RemoveSubs(adds, &adds_removed, add_cache.get(), sub_cache.get(), &subs))
-    return;
+void SafeBrowsingDatabaseBloom::InsertSub(
+    int chunk_id, SBPrefix host, SBEntry* entry) {
+  STATS_COUNTER("SB.HostDelete", 1);
+  int encoded = EncodeChunkId(chunk_id, entry->list_id());
+  int encoded_add;
 
-  // Prepare the database for writing out our remaining add and sub prefixes.
-  if (!UpdateTables())
-    return;
+  if (entry->type() == SBEntry::SUB_FULL_HASH) {
+    for (int i = 0; i < entry->prefix_count(); ++i) {
+      SBFullHash full_hash = entry->FullHashAt(i);
+      SBPrefix prefix = full_hash.prefix;
+      encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i), entry->list_id());
+      InsertSubPrefix(prefix, encoded, encoded_add);
+      InsertSubFullHash(prefix, encoded, encoded_add, full_hash, false);
+    }
+  } else {
+    // We have prefixes.
+    int count = entry->prefix_count();
+    if (count == 0) {
+      encoded_add = EncodeChunkId(entry->chunk_id(), entry->list_id());
+      InsertSubPrefix(host, encoded, encoded_add);
+    } else {
+      for (int i = 0; i < count; i++) {
+        SBPrefix prefix = entry->PrefixAt(i);
+        encoded_add = EncodeChunkId(entry->ChunkIdAtPrefix(i),
+                                    entry->list_id());
+        InsertSubPrefix(prefix, encoded, encoded_add);
+      }
+    }
+  }
+}
 
-  // Write out the remaining add prefixes to the filter and database.
-  int new_count;
-  scoped_refptr<BloomFilter> filter;
-  if (!WritePrefixes(adds, adds_removed, &new_count, &filter))
+void SafeBrowsingDatabaseBloom::InsertSubPrefix(SBPrefix prefix,
+                                                int encoded_chunk,
+                                                int encoded_add_chunk) {
+  STATS_COUNTER("SB.PrefixSub", 1);
+  std::string sql =
+    "INSERT INTO sub_prefix (chunk, add_chunk, prefix) VALUES (?,?,?)";
+  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
+  if (!statement.is_valid()) {
+    NOTREACHED();
     return;
+  }
+  statement->bind_int(0, encoded_chunk);
+  statement->bind_int(1, encoded_add_chunk);
+  statement->bind_int(2, prefix);
+  int rv = statement->step();
+  statement->reset();
+  if (rv == SQLITE_CORRUPT) {
+    HandleCorruptDatabase();
+  } else {
+    DCHECK(rv == SQLITE_DONE);
+  }
+}
 
-  // Write out the remaining full hash adds and subs to the database.
-  WriteFullHashes(add_cache.get(), true);
-  WriteFullHashes(sub_cache.get(), false);
-
-  // Save the chunk numbers we've received to the database for reporting in
-  // future update requests.
-  if (!WriteChunkNumbers())
-    return;
+void SafeBrowsingDatabaseBloom::InsertSubFullHash(SBPrefix prefix,
+                                                  int encoded_chunk,
+                                                  int encoded_add_chunk,
+                                                  SBFullHash full_prefix,
+                                                  bool use_temp_table) {
+  STATS_COUNTER("SB.PrefixSubFull", 1);
+  std::string sql = "INSERT INTO ";
+  if (use_temp_table) {
+    sql += "sub_full_tmp";
+  } else {
+    sql += "sub_full_hash";
+  }
+  sql += " (chunk, add_chunk, prefix, full_hash) VALUES (?,?,?,?)";
 
-  // Commit all the changes to the database.
-  int rv = insert_transaction_->Commit();
-  if (rv != SQLITE_OK) {
-    NOTREACHED() << "SafeBrowsing update transaction failed to commit.";
-    UMA_HISTOGRAM_COUNTS("SB2.FailedUpdate", 1);
+  SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
+  if (!statement.is_valid()) {
+    NOTREACHED();
     return;
   }
-
-  // Swap in the newly built filter and cache. If there were any matching subs,
-  // the size (add_count_) will be smaller.
-  {
-    AutoLock lock(lookup_lock_);
-    add_count_ = new_count;
-    bloom_filter_.swap(filter);
-    hash_cache_.swap(add_cache);
+  statement->bind_int(0, encoded_chunk);
+  statement->bind_int(1, encoded_add_chunk);
+  statement->bind_int(2, prefix);
+  statement->bind_blob(3, full_prefix.full_hash, sizeof(SBFullHash));
+  int rv = statement->step();
+  statement->reset();
+  if (rv == SQLITE_CORRUPT) {
+    HandleCorruptDatabase();
+  } else {
+    DCHECK(rv == SQLITE_DONE);
   }
+}
 
-  TimeDelta bloom_gen = Time::Now() - before;
+void SafeBrowsingDatabaseBloom::ReadFullHash(SqliteCompiledStatement* statement,
+                                             int column,
+                                             SBFullHash* full_hash) {
+  DCHECK(full_hash);
+  std::vector<unsigned char> blob;
+  (*statement)->column_blob_as_vector(column, &blob);
+  DCHECK(blob.size() == sizeof(SBFullHash));
+  memcpy(full_hash->full_hash, &blob[0], sizeof(SBFullHash));
+}
 
-  // Persist the bloom filter to disk.
-  WriteBloomFilter();
+// TODO(paulg): Look for a less expensive way to maintain add_count_? If we move
+// to a native file format, we can just cache the count in the file and not have
+// to scan at all.
+int SafeBrowsingDatabaseBloom::GetAddPrefixCount() {
+  SQLITE_UNIQUE_STATEMENT(count, *statement_cache_,
+                          "SELECT count(*) FROM add_prefix");
+  if (!count.is_valid()) {
+    NOTREACHED();
+    return 0;
+  }
+  int rv = count->step();
+  int add_count = 0;
+  if (rv == SQLITE_ROW)
+    add_count = count->column_int(0);
+  else if (rv == SQLITE_CORRUPT)
+    HandleCorruptDatabase();
 
-  // Gather statistics.
-#if defined(OS_WIN)
-  metric->GetIOCounters(&io_after);
-  UMA_HISTOGRAM_COUNTS("SB2.BuildReadBytes",
-                       static_cast<int>(io_after.ReadTransferCount -
-                                        io_before.ReadTransferCount));
-  UMA_HISTOGRAM_COUNTS("SB2.BuildWriteBytes",
-                       static_cast<int>(io_after.WriteTransferCount -
-                                        io_before.WriteTransferCount));
-  UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
-                       static_cast<int>(io_after.ReadOperationCount -
-                                        io_before.ReadOperationCount));
-  UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
-                       static_cast<int>(io_after.WriteOperationCount -
-                                        io_before.WriteOperationCount));
-#endif
-  SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in "
-                << bloom_gen.InMilliseconds()
-                << " ms total.  prefix count: "<< add_count_;
-  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen);
-  UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_count_);
-  UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", subs);
-  UMA_HISTOGRAM_COUNTS("SB2.FilterSize", bloom_filter_->size());
-  int64 size_64;
-  if (file_util::GetFileSize(filename_, &size_64))
-    UMA_HISTOGRAM_COUNTS("SB2.DatabaseBytes", static_cast<int>(size_64));
+  return add_count;
 }
 
-void SafeBrowsingDatabaseBloom::GetCachedFullHashes(
-    const std::vector<SBPrefix>* prefix_hits,
-    std::vector<SBFullHashResult>* full_hits,
-    Time last_update) {
-  DCHECK(prefix_hits && full_hits);
-  lookup_lock_.AssertAcquired();
-
-  Time max_age = Time::Now() - TimeDelta::FromMinutes(kMaxStalenessMinutes);
+void SafeBrowsingDatabaseBloom::ReadChunkNumbers() {
+  add_chunk_cache_.clear();
+  sub_chunk_cache_.clear();
 
-  for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin();
-       it != prefix_hits->end(); ++it) {
-    HashCache::iterator hit = hash_cache_->find(*it);
-    if (hit != hash_cache_->end()) {
-      HashList& entries = hit->second;
-      HashList::iterator eit = entries.begin();
-      while (eit != entries.end()) {
-        // An entry is valid if we've received an update in the past 45 minutes,
-        // or if this particular GetHash was received in the past 45 minutes.
-        // If an entry is does not meet the time criteria above, we are not
-        // allowed to use it since it might have become stale. We keep it
-        // around, though, and may be able to use it in the future once we
-        // receive the next update (that doesn't sub it).
-        if (max_age < last_update || eit->received > max_age) {
-          SBFullHashResult full_hash;
-          full_hash.hash = eit->full_hash;
-          full_hash.list_name = safe_browsing_util::GetListName(eit->list_id);
-          full_hash.add_chunk_id = eit->add_chunk_id;
-          full_hits->push_back(full_hash);
-        }
-        ++eit;
-      }
+  // Read in the add chunk numbers.
+  SQLITE_UNIQUE_STATEMENT(read_adds, *statement_cache_,
+                          "SELECT chunk FROM add_chunks");
+  if (!read_adds.is_valid()) {
+    NOTREACHED();
+    return;
+  }
 
-      if (entries.empty())
-        hash_cache_->erase(hit);
+  while (true) {
+    int rv = read_adds->step();
+    if (rv != SQLITE_ROW) {
+      if (rv == SQLITE_CORRUPT)
+        HandleCorruptDatabase();
+      break;
     }
+    add_chunk_cache_.insert(read_adds->column_int(0));
   }
-}
 
-void SafeBrowsingDatabaseBloom::CacheHashResults(
-    const std::vector<SBPrefix>& prefixes,
-    const std::vector<SBFullHashResult>& full_hits) {
-  AutoLock lock(lookup_lock_);
-
-  if (full_hits.empty()) {
-    // These prefixes returned no results, so we store them in order to prevent
-    // asking for them again. We flush this cache at the next update.
-    for (std::vector<SBPrefix>::const_iterator it = prefixes.begin();
-         it != prefixes.end(); ++it) {
-      prefix_miss_cache_.insert(*it);
-    }
+  // Read in the sub chunk numbers.
+  SQLITE_UNIQUE_STATEMENT(read_subs, *statement_cache_,
+                          "SELECT chunk FROM sub_chunks");
+  if (!read_subs.is_valid()) {
+    NOTREACHED();
     return;
   }
 
-  const Time now = Time::Now();
-  for (std::vector<SBFullHashResult>::const_iterator it = full_hits.begin();
-       it != full_hits.end(); ++it) {
-    SBPrefix prefix = it->hash.prefix;
-    HashList& entries = (*hash_cache_)[prefix];
-    HashCacheEntry entry;
-    entry.received = now;
-    entry.list_id = safe_browsing_util::GetListId(it->list_name);
-    entry.add_chunk_id = EncodeChunkId(it->add_chunk_id, entry.list_id);
-    entry.full_hash = it->hash;
-    entries.push_back(entry);
-
-    // Also push a copy to the pending write queue.
-    pending_full_hashes_.push_back(entry);
+  while (true) {
+    int rv = read_subs->step();
+    if (rv != SQLITE_ROW) {
+      if (rv == SQLITE_CORRUPT)
+        HandleCorruptDatabase();
+      break;
+    }
+    sub_chunk_cache_.insert(read_subs->column_int(0));
   }
 }
 
-bool SafeBrowsingDatabaseBloom::ClearCachedEntry(SBPrefix prefix,
-                                                 int add_chunk,
-                                                 HashCache* hash_cache) {
-  bool match = false;
-  HashCache::iterator it = hash_cache->find(prefix);
-  if (it == hash_cache->end())
-    return match;
+// Write all the chunk numbers to the add_chunks and sub_chunks tables.
+bool SafeBrowsingDatabaseBloom::WriteChunkNumbers() {
+  // Delete the contents of the add chunk table.
+  SQLITE_UNIQUE_STATEMENT(del_add_chunk, *statement_cache_,
+                          "DELETE FROM add_chunks");
+  if (!del_add_chunk.is_valid()) {
+    NOTREACHED();
+    return false;
+  }
+  int rv = del_add_chunk->step();
+  if (rv == SQLITE_CORRUPT) {
+    HandleCorruptDatabase();
+    return false;
+  }
+  DCHECK(rv == SQLITE_DONE);
 
-  HashList& entries = it->second;
-  HashList::iterator lit = entries.begin();
-  while (lit != entries.end()) {
-    HashCacheEntry& entry = *lit;
-    if (entry.add_chunk_id == add_chunk) {
-      lit = entries.erase(lit);
-      match = true;
-      continue;
+  SQLITE_UNIQUE_STATEMENT(write_adds, *statement_cache_,
+                          "INSERT INTO add_chunks (chunk) VALUES (?)");
+  if (!write_adds.is_valid()) {
+    NOTREACHED();
+    return false;
+  }
+
+  // Write all the add chunks from the cache to the database.
+  std::set<int>::const_iterator it = add_chunk_cache_.begin();
+  for (; it != add_chunk_cache_.end(); ++it) {
+    if (add_del_cache_.find(*it) != add_del_cache_.end())
+      continue;  // This chunk has been deleted.
+    write_adds->bind_int(0, *it);
+    rv = write_adds->step();
+    if (rv == SQLITE_CORRUPT) {
+      HandleCorruptDatabase();
+      return false;
     }
-    ++lit;
+    DCHECK(rv == SQLITE_DONE);
+    write_adds->reset();
   }
 
-  if (entries.empty())
-    hash_cache->erase(it);
+  // Delete the contents of the sub chunk table.
+  SQLITE_UNIQUE_STATEMENT(del_sub_chunk, *statement_cache_,
+                          "DELETE FROM sub_chunks");
+  if (!del_sub_chunk.is_valid()) {
+    NOTREACHED();
+    return false;
+  }
+  rv = del_sub_chunk->step();
+  if (rv == SQLITE_CORRUPT) {
+    HandleCorruptDatabase();
+    return false;
+  }
+  DCHECK(rv == SQLITE_DONE);
 
-  return match;
-}
+  SQLITE_UNIQUE_STATEMENT(write_subs, *statement_cache_,
+                          "INSERT INTO sub_chunks (chunk) VALUES (?)");
+  if (!write_subs.is_valid()) {
+    NOTREACHED();
+    return false;
+  }
 
-void SafeBrowsingDatabaseBloom::HandleCorruptDatabase() {
-  MessageLoop::current()->PostTask(FROM_HERE,
-      reset_factory_.NewRunnableMethod(
-          &SafeBrowsingDatabaseBloom::OnHandleCorruptDatabase));
-}
+  // Write all the sub chunks from the cache to the database.
+  it = sub_chunk_cache_.begin();
+  for (; it != sub_chunk_cache_.end(); ++it) {
+    if (sub_del_cache_.find(*it) != sub_del_cache_.end())
+      continue;  // This chunk has been deleted.
+    write_subs->bind_int(0, *it);
+    rv = write_subs->step();
+    if (rv == SQLITE_CORRUPT) {
+      HandleCorruptDatabase();
+      return false;
+    }
+    DCHECK(rv == SQLITE_DONE);
+    write_subs->reset();
+  }
 
-void SafeBrowsingDatabaseBloom::OnHandleCorruptDatabase() {
-  ResetDatabase();
-  DCHECK(false) << "SafeBrowsing database was corrupt and reset";
+  return true;
 }
 
-// This database is always synchronous since we don't need to worry about
-// blocking any incoming reads.
-void SafeBrowsingDatabaseBloom::SetSynchronous() {
+void SafeBrowsingDatabaseBloom::ClearUpdateCaches() {
+  AutoLock lock(lookup_lock_);
+  add_del_cache_.clear();
+  sub_del_cache_.clear();
+  add_chunk_cache_.clear();
+  sub_chunk_cache_.clear();
+  prefix_miss_cache_.clear();
 }
-- 
cgit v1.1