summaryrefslogtreecommitdiffstats
path: root/chrome
diff options
context:
space:
mode:
authorpaulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-10-24 23:02:00 +0000
committerpaulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-10-24 23:02:00 +0000
commit613a03b2415686990248a3058575a4504e2f0ec3 (patch)
tree01dd1013f78c022b44920a24a94a2326fcef0dc8 /chrome
parentedfa71e63db4db57ee6fafb237e4331cb631824b (diff)
downloadchromium_src-613a03b2415686990248a3058575a4504e2f0ec3.zip
chromium_src-613a03b2415686990248a3058575a4504e2f0ec3.tar.gz
chromium_src-613a03b2415686990248a3058575a4504e2f0ec3.tar.bz2
Changes to allow running the new SafeBrowsing storage system,contained in SafeBrowsingDatabaseBloom, via a command line flag(--new-safe-browsing).
Review URL: http://codereview.chromium.org/6807 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@3959 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r--chrome/browser/safe_browsing/bloom_filter.h3
-rw-r--r--chrome/browser/safe_browsing/bloom_filter_unittest.cc16
-rw-r--r--chrome/browser/safe_browsing/protocol_manager.cc3
-rw-r--r--chrome/browser/safe_browsing/protocol_manager.h2
-rw-r--r--chrome/browser/safe_browsing/protocol_parser.h2
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database.cc13
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database.h5
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_bloom.cc465
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_bloom.h26
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_impl.cc10
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database_unittest.cc31
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_service.cc155
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_service.h37
13 files changed, 606 insertions, 162 deletions
diff --git a/chrome/browser/safe_browsing/bloom_filter.h b/chrome/browser/safe_browsing/bloom_filter.h
index 6b94745..1ec0624 100644
--- a/chrome/browser/safe_browsing/bloom_filter.h
+++ b/chrome/browser/safe_browsing/bloom_filter.h
@@ -8,10 +8,11 @@
#ifndef CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_
#define CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_
+#include "base/ref_counted.h"
#include "base/scoped_ptr.h"
#include "base/basictypes.h"
-class BloomFilter {
+class BloomFilter : public base::RefCountedThreadSafe<BloomFilter> {
public:
// Constructs an empty filter with the given size.
BloomFilter(int bit_size);
diff --git a/chrome/browser/safe_browsing/bloom_filter_unittest.cc b/chrome/browser/safe_browsing/bloom_filter_unittest.cc
index edfefa9..b28269a 100644
--- a/chrome/browser/safe_browsing/bloom_filter_unittest.cc
+++ b/chrome/browser/safe_browsing/bloom_filter_unittest.cc
@@ -11,6 +11,7 @@
#include "base/logging.h"
#include "base/rand_util.h"
+#include "base/ref_counted.h"
#include "base/string_util.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -27,24 +28,25 @@ TEST(SafeBrowsing, BloomFilter) {
uint32 count = 1000;
// Build up the bloom filter.
- BloomFilter filter(count * 10);
+ scoped_refptr<BloomFilter> filter = new BloomFilter(count * 10);
typedef std::set<int> Values;
Values values;
for (uint32 i = 0; i < count; ++i) {
uint32 value = GenHash();
values.insert(value);
- filter.Insert(value);
+ filter->Insert(value);
}
// Check serialization works.
- char* data_copy = new char[filter.size()];
- memcpy(data_copy, filter.data(), filter.size());
- BloomFilter filter_copy(data_copy, filter.size());
+ char* data_copy = new char[filter->size()];
+ memcpy(data_copy, filter->data(), filter->size());
+ scoped_refptr<BloomFilter> filter_copy =
+ new BloomFilter(data_copy, filter->size());
// Check no false negatives by ensuring that every time we inserted exists.
for (Values::iterator i = values.begin(); i != values.end(); ++i) {
- EXPECT_TRUE(filter_copy.Exists(*i));
+ EXPECT_TRUE(filter_copy->Exists(*i));
}
// Check false positive error rate by checking the same number of items that
@@ -57,7 +59,7 @@ TEST(SafeBrowsing, BloomFilter) {
if (values.find(value) != values.end())
continue;
- if (filter_copy.Exists(value))
+ if (filter_copy->Exists(value))
found_count++;
checked ++;
diff --git a/chrome/browser/safe_browsing/protocol_manager.cc b/chrome/browser/safe_browsing/protocol_manager.cc
index eeb1889..f531538 100644
--- a/chrome/browser/safe_browsing/protocol_manager.cc
+++ b/chrome/browser/safe_browsing/protocol_manager.cc
@@ -265,6 +265,7 @@ bool SafeBrowsingProtocolManager::HandleServiceResponse(const GURL& url,
&next_update_sec, &re_key,
&reset, chunk_deletes, &chunk_urls)) {
delete chunk_deletes;
+ sb_service_->UpdateFinished(false);
return false;
}
@@ -512,7 +513,7 @@ void SafeBrowsingProtocolManager::OnChunkInserted() {
if (chunk_request_urls_.empty()) {
UMA_HISTOGRAM_LONG_TIMES(L"SB.Update", Time::Now() - last_update_);
- sb_service_->UpdateFinished();
+ sb_service_->UpdateFinished(true);
} else {
IssueChunkRequest();
}
diff --git a/chrome/browser/safe_browsing/protocol_manager.h b/chrome/browser/safe_browsing/protocol_manager.h
index 723a1ed..4cb75c0 100644
--- a/chrome/browser/safe_browsing/protocol_manager.h
+++ b/chrome/browser/safe_browsing/protocol_manager.h
@@ -204,7 +204,7 @@ class SafeBrowsingProtocolManager : public URLFetcher::Delegate {
// Current product version sent in each request.
std::string version_;
- DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingProtocolManager);
+ DISALLOW_COPY_AND_ASSIGN(SafeBrowsingProtocolManager);
};
#endif // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
diff --git a/chrome/browser/safe_browsing/protocol_parser.h b/chrome/browser/safe_browsing/protocol_parser.h
index fe87549..4d77e50 100644
--- a/chrome/browser/safe_browsing/protocol_parser.h
+++ b/chrome/browser/safe_browsing/protocol_parser.h
@@ -121,7 +121,7 @@ class SafeBrowsingProtocolParser {
// The name of the current list
std::string list_name_;
- DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingProtocolParser);
+ DISALLOW_COPY_AND_ASSIGN(SafeBrowsingProtocolParser);
};
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
index d09a85f..2f43f6b 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
@@ -24,7 +24,10 @@ SafeBrowsingDatabase* SafeBrowsingDatabase::Create() {
}
bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) {
- if (!bloom_filter_.get())
+ // Keep a reference to the current bloom filter in case the database rebuilds
+ // it while we're accessing it.
+ scoped_refptr<BloomFilter> filter = bloom_filter_;
+ if (!filter.get())
return true;
IncrementBloomFilterReadCount();
@@ -37,16 +40,16 @@ bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) {
SBPrefix host_key;
if (url.HostIsIPAddress()) {
base::SHA256HashString(url.host() + "/", &host_key, sizeof(SBPrefix));
- if (bloom_filter_->Exists(host_key))
+ if (filter->Exists(host_key))
return true;
} else {
base::SHA256HashString(hosts[0] + "/", &host_key, sizeof(SBPrefix));
- if (bloom_filter_->Exists(host_key))
+ if (filter->Exists(host_key))
return true;
if (hosts.size() > 1) {
base::SHA256HashString(hosts[1] + "/", &host_key, sizeof(SBPrefix));
- if (bloom_filter_->Exists(host_key))
+ if (filter->Exists(host_key))
return true;
}
}
@@ -77,7 +80,7 @@ void SafeBrowsingDatabase::LoadBloomFilter() {
SB_DLOG(INFO) << "SafeBrowsingDatabase read bloom filter in " <<
(Time::Now() - before).InMilliseconds() << " ms";
- bloom_filter_.reset(new BloomFilter(data, size));
+ bloom_filter_ = new BloomFilter(data, size);
}
void SafeBrowsingDatabase::DeleteBloomFilter() {
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.h b/chrome/browser/safe_browsing/safe_browsing_database.h
index fe6d586..94b9b76 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database.h
+++ b/chrome/browser/safe_browsing/safe_browsing_database.h
@@ -9,6 +9,7 @@
#include <string>
#include <vector>
+#include "base/ref_counted.h"
#include "base/scoped_ptr.h"
#include "base/task.h"
#include "base/time.h"
@@ -79,7 +80,7 @@ class SafeBrowsingDatabase {
// Called when the user's machine has resumed from a lower power state.
virtual void HandleResume() = 0;
- virtual void UpdateFinished() { }
+ virtual void UpdateFinished(bool update_succeeded) { }
protected:
static std::wstring BloomFilterFilename(const std::wstring& db_filename);
@@ -100,7 +101,7 @@ class SafeBrowsingDatabase {
virtual void IncrementBloomFilterReadCount() {};
std::wstring bloom_filter_filename_;
- scoped_ptr<BloomFilter> bloom_filter_;
+ scoped_refptr<BloomFilter> bloom_filter_;
};
#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
index 01d0b99..9b15082 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc
@@ -53,7 +53,7 @@ SafeBrowsingDatabaseBloom::~SafeBrowsingDatabaseBloom() {
}
bool SafeBrowsingDatabaseBloom::Init(const std::wstring& filename,
- Callback0::Type* chunk_inserted_callback) {
+ Callback0::Type* chunk_inserted_callback) {
DCHECK(!init_ && filename_.empty());
filename_ = filename;
@@ -74,19 +74,19 @@ bool SafeBrowsingDatabaseBloom::Init(const std::wstring& filename,
load_filter = true;
}
- CreateChunkCaches();
-
+ add_count_ = GetAddPrefixCount();
bloom_filter_filename_ = BloomFilterFilename(filename_);
if (load_filter) {
LoadBloomFilter();
} else {
- bloom_filter_.reset(
- new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio));
+ bloom_filter_ =
+ new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio);
}
init_ = true;
chunk_inserted_callback_ = chunk_inserted_callback;
+
return true;
}
@@ -133,6 +133,7 @@ bool SafeBrowsingDatabaseBloom::CreateTables() {
SQLTransaction transaction(db_);
transaction.Begin();
+ // Store 32 bit add prefixes here.
if (sqlite3_exec(db_, "CREATE TABLE add_prefix ("
"chunk INTEGER,"
"prefix INTEGER)",
@@ -140,6 +141,7 @@ bool SafeBrowsingDatabaseBloom::CreateTables() {
return false;
}
+ // Store 32 sub prefixes here.
if (sqlite3_exec(db_, "CREATE TABLE sub_prefix ("
"chunk INTEGER,"
"add_chunk INTEGER,"
@@ -148,17 +150,51 @@ bool SafeBrowsingDatabaseBloom::CreateTables() {
return false;
}
- if (sqlite3_exec(db_, "CREATE TABLE full_prefix ("
- "chunk INTEGER,"
- "prefix INTEGER,"
- "full_prefix BLOB)",
- NULL, NULL, NULL) != SQLITE_OK) {
- return false;
- }
- sqlite3_exec(db_, "CREATE INDEX full_prefix_chunk ON full_prefix(chunk)",
- NULL, NULL, NULL);
- sqlite3_exec(db_, "CREATE INDEX full_prefix_prefix ON full_prefix(prefix)",
- NULL, NULL, NULL);
+ // TODO(paulg): Store 256 bit add full prefixes and GetHash results here.
+ // 'receive_time' is used for testing the staleness of GetHash results.
+ // if (sqlite3_exec(db_, "CREATE TABLE full_add_prefix ("
+ // "chunk INTEGER,"
+ // "prefix INTEGER,"
+ // "receive_time INTEGER,"
+ // "full_prefix BLOB)",
+ // NULL, NULL, NULL) != SQLITE_OK) {
+ // return false;
+ // }
+
+ // TODO(paulg): These tables are going to contain very few entries, so we
+ // need to measure if it's worth keeping an index on them.
+ // sqlite3_exec(db_,
+ // "CREATE INDEX full_add_prefix_chunk ON full_prefix(chunk)",
+ // NULL, NULL, NULL);
+ // sqlite3_exec(db_,
+ // "CREATE INDEX full_add_prefix_prefix ON full_prefix(prefix)",
+ // NULL, NULL, NULL);
+
+ // TODO(paulg): Store 256 bit sub full prefixes here.
+ // if (sqlite3_exec(db_, "CREATE TABLE full_sub_prefix ("
+ // "chunk INTEGER,"
+ // "add_chunk INTEGER,"
+ // "prefix INTEGER,"
+ // "receive_time INTEGER,"
+ // "full_prefix BLOB)",
+ // NULL, NULL, NULL) != SQLITE_OK) {
+ // return false;
+ // }
+
+ // TODO(paulg): These tables are going to contain very few entries, so we
+ // need to measure if it's worth keeping an index on them.
+ // sqlite3_exec(
+ // db_,
+ // "CREATE INDEX full_sub_prefix_chunk ON full_sub_prefix(chunk)",
+ // NULL, NULL, NULL);
+ // sqlite3_exec(
+ // db_,
+ // "CREATE INDEX full_sub_prefix_add_chunk ON full_sub_prefix(add_chunk)",
+ // NULL, NULL, NULL);
+ // sqlite3_exec(
+ // db_,
+ // "CREATE INDEX full_sub_prefix_prefix ON full_sub_prefix(prefix)",
+ // NULL, NULL, NULL);
if (sqlite3_exec(db_, "CREATE TABLE list_names ("
"id INTEGER PRIMARY KEY AUTOINCREMENT,"
@@ -167,13 +203,24 @@ bool SafeBrowsingDatabaseBloom::CreateTables() {
return false;
}
- if (sqlite3_exec(db_, "CREATE TABLE add_chunk ("
+ // Store all the add and sub chunk numbers we receive. We cannot just rely on
+ // the prefix tables to generate these lists, since some chunks will have zero
+ // entries (and thus no prefixes), or potentially an add chunk can have all of
+ // its entries sub'd without receiving an AddDel, or a sub chunk might have
+ // been entirely consumed by adds. In these cases, we still have to report the
+ // chunk number but it will not have any prefixes in the prefix tables.
+ //
+ // TODO(paulg): Investigate storing the chunks as a string of ChunkRanges, one
+ // string for each of phish-add, phish-sub, malware-add, malware-sub. This
+ // might be better performance when the number of chunks is large, and is the
+ // natural format for the update request.
+ if (sqlite3_exec(db_, "CREATE TABLE add_chunks ("
"chunk INTEGER PRIMARY KEY)",
NULL, NULL, NULL) != SQLITE_OK) {
return false;
}
- if (sqlite3_exec(db_, "CREATE TABLE sub_chunk ("
+ if (sqlite3_exec(db_, "CREATE TABLE sub_chunks ("
"chunk INTEGER PRIMARY KEY)",
NULL, NULL, NULL) != SQLITE_OK) {
return false;
@@ -196,7 +243,9 @@ bool SafeBrowsingDatabaseBloom::CreateTables() {
return true;
}
-// The SafeBrowsing service assumes this operation is synchronous.
+// The SafeBrowsing service assumes this operation is run synchronously on the
+// database thread. Any URLs that the service needs to check when this is
+// running are queued up and run once the reset is done.
bool SafeBrowsingDatabaseBloom::ResetDatabase() {
hash_cache_.clear();
add_chunk_cache_.clear();
@@ -211,8 +260,8 @@ bool SafeBrowsingDatabaseBloom::ResetDatabase() {
return false;
}
- bloom_filter_.reset(
- new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio));
+ bloom_filter_ =
+ new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio);
file_util::Delete(bloom_filter_filename_, false);
if (!Open())
@@ -254,28 +303,52 @@ bool SafeBrowsingDatabaseBloom::ContainsUrl(
} else {
safe_browsing_util::GenerateHostsToCheck(url, &hosts);
if (hosts.size() == 0)
- return false; // things like about:blank
+ return false; // Things like about:blank
}
std::vector<std::string> paths;
safe_browsing_util::GeneratePathsToCheck(url, &paths);
- // TODO(erikkay): this may wind up being too many hashes on a complex page
- // TODO(erikkay): not filling in matching_list - is that OK?
- // TODO(erikkay): handle full_hits
+ // Grab a reference to the existing filter so that it isn't deleted on us if
+ // a update is just about to finish.
+ scoped_refptr<BloomFilter> filter = bloom_filter_;
+ if (!filter.get())
+ return false;
+
+ // TODO(erikkay): This may wind up being too many hashes on a complex page.
+ // TODO(erikkay): Not filling in matching_list - is that OK?
for (size_t i = 0; i < hosts.size(); ++i) {
for (size_t j = 0; j < paths.size(); ++j) {
SBFullHash full_hash;
- // TODO(erikkay): maybe we should only do the first 32 bits initially,
+ // TODO(erikkay): Maybe we should only do the first 32 bits initially,
// and then fall back to the full hash if there's a hit.
base::SHA256HashString(hosts[i] + paths[j], &full_hash,
sizeof(SBFullHash));
SBPrefix prefix;
memcpy(&prefix, &full_hash, sizeof(SBPrefix));
- if (bloom_filter_->Exists(prefix))
+ if (filter->Exists(prefix))
prefix_hits->push_back(prefix);
}
}
- return prefix_hits->size() > 0;
+
+ if (!prefix_hits->empty()) {
+ // If all the prefixes are cached as 'misses', don't issue a GetHash.
+ bool all_misses = true;
+ for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin();
+ it != prefix_hits->end(); ++it) {
+ if (prefix_miss_cache_.find(*it) == prefix_miss_cache_.end()) {
+ all_misses = false;
+ break;
+ }
+ }
+ if (all_misses)
+ return false;
+
+ // See if we have the results of recent GetHashes for the prefix matches.
+ GetCachedFullHashes(prefix_hits, full_hits, last_update);
+ return true;
+ }
+
+ return false;
}
bool SafeBrowsingDatabaseBloom::NeedToCheckUrl(const GURL& url) {
@@ -324,8 +397,15 @@ void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name,
ProcessPendingWork();
}
-void SafeBrowsingDatabaseBloom::UpdateFinished() {
- BuildBloomFilter();
+void SafeBrowsingDatabaseBloom::UpdateFinished(bool update_succeeded) {
+ if (update_succeeded)
+ BuildBloomFilter();
+
+ // We won't need the chunk caches until the next update (which will read them
+ // from the database), so free their memory as they may contain thousands of
+ // chunk numbers.
+ add_chunk_cache_.clear();
+ sub_chunk_cache_.clear();
}
void SafeBrowsingDatabaseBloom::ProcessChunks() {
@@ -357,7 +437,6 @@ void SafeBrowsingDatabaseBloom::ProcessAddChunks(std::deque<SBChunk>* chunks) {
// The server can give us a chunk that we already have because it's part of
// a range. Don't add it again.
if (!ChunkExists(list_id, ADD_CHUNK, chunk_id)) {
- InsertChunk(list_id, ADD_CHUNK, chunk_id);
while (!chunk.hosts.empty()) {
WaitAfterResume();
// Read the existing record for this host, if it exists.
@@ -380,7 +459,8 @@ void SafeBrowsingDatabaseBloom::ProcessAddChunks(std::deque<SBChunk>* chunks) {
void SafeBrowsingDatabaseBloom::AddEntry(SBPrefix host, SBEntry* entry) {
STATS_COUNTER(L"SB.HostInsert", 1);
if (entry->type() == SBEntry::ADD_FULL_HASH) {
- // TODO(erikkay)
+ // TODO(erikkay, paulg): Add the full 256 bit prefix to the full_prefix
+ // table AND insert its 32 bit prefix into the regular prefix table.
return;
}
int encoded = EncodeChunkId(entry->chunk_id(), entry->list_id());
@@ -416,6 +496,32 @@ void SafeBrowsingDatabaseBloom::AddPrefix(SBPrefix prefix, int encoded_chunk) {
add_count_++;
}
+void SafeBrowsingDatabaseBloom::AddFullPrefix(SBPrefix prefix,
+ int encoded_chunk,
+ SBFullHash full_prefix) {
+ STATS_COUNTER(L"SB.PrefixAddFull", 1);
+ std::string sql = "INSERT INTO full_add_prefix "
+ "(chunk, prefix, receive_time, full_prefix) "
+ "VALUES (?, ?, ?)";
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+ statement->bind_int(0, encoded_chunk);
+ statement->bind_int(1, prefix);
+ // TODO(paulg): Add receive_time and full_prefix.
+ // statement->bind_int64(2, receive_time);
+ // statement->bind_blob(3, full_prefix);
+ int rv = statement->step();
+ statement->reset();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ } else {
+ DCHECK(rv == SQLITE_DONE);
+ }
+}
+
void SafeBrowsingDatabaseBloom::AddSub(
int chunk_id, SBPrefix host, SBEntry* entry) {
STATS_COUNTER(L"SB.HostDelete", 1);
@@ -460,6 +566,34 @@ void SafeBrowsingDatabaseBloom::AddSubPrefix(SBPrefix prefix,
}
}
+void SafeBrowsingDatabaseBloom::SubFullPrefix(SBPrefix prefix,
+ int encoded_chunk,
+ int encoded_add_chunk,
+ SBFullHash full_prefix) {
+ STATS_COUNTER(L"SB.PrefixSubFull", 1);
+ std::string sql = "INSERT INTO full_sub_prefix "
+ "(chunk, add_chunk, prefix, receive_time, full_prefix) "
+ "VALUES (?,?,?,?)";
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str());
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+ statement->bind_int(0, encoded_chunk);
+ statement->bind_int(1, encoded_add_chunk);
+ statement->bind_int(2, prefix);
+ // TODO(paulg): Add receive_time and full_prefix.
+ // statement->bind_int64(3, receive_time);
+ // statement->bind_blob(4, full_prefix);
+ int rv = statement->step();
+ statement->reset();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ } else {
+ DCHECK(rv == SQLITE_DONE);
+ }
+}
+
// TODO(paulg): Look for a less expensive way to maintain add_count_.
int SafeBrowsingDatabaseBloom::GetAddPrefixCount() {
SQLITE_UNIQUE_STATEMENT(count, *statement_cache_,
@@ -478,36 +612,6 @@ int SafeBrowsingDatabaseBloom::GetAddPrefixCount() {
return add_count;
}
-// TODO(erikkay) - this is too slow
-void SafeBrowsingDatabaseBloom::CreateChunkCaches() {
- SQLITE_UNIQUE_STATEMENT(adds, *statement_cache_,
- "SELECT distinct chunk FROM add_prefix");
- if (!adds.is_valid()) {
- NOTREACHED();
- return;
- }
- int rv = adds->step();
- while (rv == SQLITE_ROW) {
- int chunk = adds->column_int(0);
- add_chunk_cache_.insert(chunk);
- rv = adds->step();
- }
- SQLITE_UNIQUE_STATEMENT(subs, *statement_cache_,
- "SELECT distinct chunk FROM sub_prefix");
- if (!subs.is_valid()) {
- NOTREACHED();
- return;
- }
- rv = subs->step();
- while (rv == SQLITE_ROW) {
- int chunk = subs->column_int(0);
- sub_chunk_cache_.insert(chunk);
- rv = subs->step();
- }
-
- add_count_ = GetAddPrefixCount();
-}
-
void SafeBrowsingDatabaseBloom::ProcessSubChunks(std::deque<SBChunk>* chunks) {
while (!chunks->empty()) {
SBChunk& chunk = chunks->front();
@@ -515,7 +619,6 @@ void SafeBrowsingDatabaseBloom::ProcessSubChunks(std::deque<SBChunk>* chunks) {
int chunk_id = chunk.chunk_number;
if (!ChunkExists(list_id, SUB_CHUNK, chunk_id)) {
- InsertChunk(list_id, SUB_CHUNK, chunk_id);
while (!chunk.hosts.empty()) {
WaitAfterResume();
@@ -567,6 +670,8 @@ void SafeBrowsingDatabaseBloom::AddDel(const std::string& list_name,
AddDel(list_id, add_chunk_id);
}
+// TODO(paulg): Change this from a database scan to an in-memory operation done
+// while building the bloom filter.
void SafeBrowsingDatabaseBloom::AddDel(int list_id,
int add_chunk_id) {
STATS_COUNTER(L"SB.ChunkDelete", 1);
@@ -595,6 +700,8 @@ void SafeBrowsingDatabaseBloom::SubDel(const std::string& list_name,
SubDel(list_id, sub_chunk_id);
}
+// TODO(paulg): Change this from a database scan to an in-memory operation done
+// while building the bloom filter.
void SafeBrowsingDatabaseBloom::SubDel(int list_id,
int sub_chunk_id) {
STATS_COUNTER(L"SB.ChunkDelete", 1);
@@ -643,12 +750,6 @@ bool SafeBrowsingDatabaseBloom::ChunkExists(int list_id,
return ret;
}
-void SafeBrowsingDatabaseBloom::InsertChunk(int list_id,
- ChunkType type,
- int chunk_id) {
- // TODO(erikkay): insert into the correct chunk table
-}
-
// Return a comma separated list of chunk ids that are in the database for
// the given list and chunk type.
void SafeBrowsingDatabaseBloom::GetChunkIds(
@@ -677,6 +778,10 @@ void SafeBrowsingDatabaseBloom::GetChunkIds(
void SafeBrowsingDatabaseBloom::GetListsInfo(
std::vector<SBListChunkRanges>* lists) {
+ DCHECK(lists);
+
+ ReadChunkNumbers();
+
lists->clear();
SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
"SELECT name,id FROM list_names");
@@ -770,6 +875,121 @@ std::string SafeBrowsingDatabaseBloom::GetListName(int id) {
return statement->column_string(0);
}
+void SafeBrowsingDatabaseBloom::ReadChunkNumbers() {
+ add_chunk_cache_.clear();
+ sub_chunk_cache_.clear();
+
+ // Read in the add chunk numbers.
+ SQLITE_UNIQUE_STATEMENT(read_adds, *statement_cache_,
+ "SELECT chunk FROM add_chunks");
+ if (!read_adds.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ while (true) {
+ int rv = read_adds->step();
+ if (rv != SQLITE_ROW) {
+ if (rv == SQLITE_CORRUPT)
+ HandleCorruptDatabase();
+ break;
+ }
+ add_chunk_cache_.insert(read_adds->column_int(0));
+ }
+
+ // Read in the sub chunk numbers.
+ SQLITE_UNIQUE_STATEMENT(read_subs, *statement_cache_,
+ "SELECT chunk FROM sub_chunks");
+ if (!read_subs.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ while (true) {
+ int rv = read_subs->step();
+ if (rv != SQLITE_ROW) {
+ if (rv == SQLITE_CORRUPT)
+ HandleCorruptDatabase();
+ break;
+ }
+ sub_chunk_cache_.insert(read_subs->column_int(0));
+ }
+}
+
+// Write all the chunk numbers to the add_chunks and sub_chunks tables.
+void SafeBrowsingDatabaseBloom::WriteChunkNumbers() {
+ // Delete the contents of the add chunk table.
+ SQLITE_UNIQUE_STATEMENT(del_add_chunk, *statement_cache_,
+ "DELETE FROM add_chunks");
+ if (!del_add_chunk.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+ int rv = del_add_chunk->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ return;
+ }
+ DCHECK(rv == SQLITE_DONE);
+
+ SQLITE_UNIQUE_STATEMENT(write_adds, *statement_cache_,
+ "INSERT INTO add_chunks (chunk) VALUES (?)");
+ if (!write_adds.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ // Write all the add chunks from the cache to the database.
+ std::set<int>::const_iterator it = add_chunk_cache_.begin();
+ while (it != add_chunk_cache_.end()) {
+ write_adds->bind_int(0, *it);
+ rv = write_adds->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ return;
+ }
+ DCHECK(rv == SQLITE_DONE);
+ write_adds->reset();
+ ++it;
+ }
+
+ // Delete the contents of the sub chunk table.
+ SQLITE_UNIQUE_STATEMENT(del_sub_chunk, *statement_cache_,
+ "DELETE FROM sub_chunks");
+ if (!del_sub_chunk.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+ rv = del_sub_chunk->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ return;
+ }
+ DCHECK(rv == SQLITE_DONE);
+
+ SQLITE_UNIQUE_STATEMENT(write_subs, *statement_cache_,
+ "INSERT INTO sub_chunks (chunk) VALUES (?)");
+ if (!write_subs.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
+ // Write all the sub chunks from the cache to the database.
+ it = sub_chunk_cache_.begin();
+ while (it != sub_chunk_cache_.end()) {
+ write_subs->bind_int(0, *it);
+ rv = write_subs->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ return;
+ }
+ DCHECK(rv == SQLITE_DONE);
+ write_subs->reset();
+ ++it;
+ }
+}
+
+// Helper struct and compare function for building the bloom filter.
typedef struct {
int chunk_id;
SBPrefix prefix;
@@ -786,13 +1006,24 @@ static int pair_compare(const void* arg1, const void* arg2) {
// TODO(erikkay): should we call WaitAfterResume() inside any of the loops here?
// This is a pretty fast operation and it would be nice to let it finish.
+// TODO(erikkay, paulg): Break this into smaller functions.
void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
Time before = Time::Now();
add_count_ = GetAddPrefixCount();
+ if (add_count_ == 0) {
+ bloom_filter_ = NULL;
+ return;
+ }
+
scoped_array<SBPair> adds_array(new SBPair[add_count_]);
SBPair* adds = adds_array.get();
+ // Used to track which adds have been subbed out. The vector<bool> is actually
+ // a bitvector so the size is as small as we can get.
+ std::vector<bool> adds_removed;
+ adds_removed.resize(add_count_, false);
+
// Read add_prefix into memory and sort it.
STATS_COUNTER(L"SB.HostSelectForBloomFilter", 1);
SQLITE_UNIQUE_STATEMENT(add_prefix, *statement_cache_,
@@ -820,11 +1051,40 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
// Read through sub_prefix and zero out add_prefix entries that match.
SQLITE_UNIQUE_STATEMENT(sub_prefix, *statement_cache_,
- "SELECT add_chunk, prefix FROM sub_prefix");
+ "SELECT chunk, add_chunk, prefix FROM sub_prefix");
if (!sub_prefix.is_valid()) {
NOTREACHED();
return;
}
+
+ scoped_ptr<SQLTransaction> update_transaction;
+ update_transaction.reset(new SQLTransaction(db_));
+ if (update_transaction->Begin() != SQLITE_OK) {
+ NOTREACHED();
+ return;
+ }
+
+ // Create a temporary sub prefix table. We add entries to it as we scan the
+ // sub_prefix table looking for adds to remove. Only entries that don't
+ // remove an add written to this table. When we're done filtering, we replace
+ // sub_prefix with this table.
+ if (sqlite3_exec(db_, "CREATE TABLE sub_prefix_tmp ("
+ "chunk INTEGER,"
+ "add_chunk INTEGER,"
+ "prefix INTEGER)",
+ NULL, NULL, NULL) != SQLITE_OK) {
+ return;
+ }
+
+ SQLITE_UNIQUE_STATEMENT(
+ sub_prefix_tmp,
+ *statement_cache_,
+ "INSERT INTO sub_prefix_tmp (chunk,add_chunk,prefix) VALUES (?,?,?)");
+ if (!sub_prefix_tmp.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+
SBPair sub;
while (true) {
int rv = sub_prefix->step();
@@ -833,26 +1093,61 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
HandleCorruptDatabase();
break;
}
- sub.chunk_id = sub_prefix->column_int(0);
- sub.prefix = sub_prefix->column_int(1);
+ sub.chunk_id = sub_prefix->column_int(1);
+ sub.prefix = sub_prefix->column_int(2);
void* match = bsearch(&sub, adds, add_count_, sizeof(SBPair), pair_compare);
if (match) {
SBPair* subbed = reinterpret_cast<SBPair*>(match);
- // A chunk_id of 0 is invalid, so we use that to mark this prefix as
- // having been subbed.
- subbed->chunk_id = 0;
+ adds_removed[subbed - adds] = true;
+ } else {
+ // This sub_prefix entry did not match any add, so we keep it around.
+ sub_prefix_tmp->bind_int(0, sub_prefix->column_int(0));
+ sub_prefix_tmp->bind_int(1, sub.chunk_id);
+ sub_prefix_tmp->bind_int(2, sub.prefix);
+ int rv = sub_prefix_tmp->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ return;
+ }
+ DCHECK(rv == SQLITE_DONE);
+ sub_prefix_tmp->reset();
}
}
+ // Delete the old sub_prefix table and rename the temporary table.
+ SQLITE_UNIQUE_STATEMENT(del_sub, *statement_cache_, "DROP TABLE sub_prefix");
+ if (!del_sub.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+ int rv = del_sub->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ return;
+ }
+ DCHECK(rv == SQLITE_DONE);
+
+ SQLITE_UNIQUE_STATEMENT(rename_sub, *statement_cache_,
+ "ALTER TABLE sub_prefix_tmp RENAME TO sub_prefix");
+ if (!rename_sub.is_valid()) {
+ NOTREACHED();
+ return;
+ }
+ rv = rename_sub->step();
+ if (rv == SQLITE_CORRUPT) {
+ HandleCorruptDatabase();
+ return;
+ }
+ DCHECK(rv == SQLITE_DONE);
+
// Now blow away add_prefix and re-write it from our in-memory data,
// building the new bloom filter as we go.
- BeginTransaction();
- SQLITE_UNIQUE_STATEMENT(del, *statement_cache_, "DELETE FROM add_prefix");
- if (!del.is_valid()) {
+ SQLITE_UNIQUE_STATEMENT(del_add, *statement_cache_, "DELETE FROM add_prefix");
+ if (!del_add.is_valid()) {
NOTREACHED();
return;
}
- int rv = del->step();
+ rv = del_add->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
return;
@@ -860,7 +1155,7 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
DCHECK(rv == SQLITE_DONE);
SQLITE_UNIQUE_STATEMENT(insert, *statement_cache_,
- "INSERT INTO add_prefix VALUES(?,?)");
+ "INSERT INTO add_prefix VALUES (?,?)");
if (!insert.is_valid()) {
NOTREACHED();
return;
@@ -871,13 +1166,14 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
add = adds;
int new_count = 0;
while (add - adds < add_count_) {
- if (add->chunk_id != 0) {
+ if (!adds_removed[add - adds]) {
filter->Insert(add->prefix);
insert->bind_int(0, add->chunk_id);
insert->bind_int(1, add->prefix);
rv = insert->step();
if (rv == SQLITE_CORRUPT) {
HandleCorruptDatabase();
+ delete filter; // TODO(paulg): scoped.
return;
}
DCHECK(rv == SQLITE_DONE);
@@ -886,10 +1182,12 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
}
add++;
}
- bloom_filter_.reset(filter);
+
+ update_transaction->Commit();
+
// If there were any matching subs, the size will be smaller.
add_count_ = new_count;
- EndTransaction();
+ bloom_filter_ = filter;
TimeDelta bloom_gen = Time::Now() - before;
SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in " <<
@@ -898,6 +1196,7 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() {
UMA_HISTOGRAM_LONG_TIMES(L"SB.BuildBloom", bloom_gen);
WriteBloomFilter();
+ WriteChunkNumbers();
}
void SafeBrowsingDatabaseBloom::BeginTransaction() {
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
index e991333..f639943 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
+++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h
@@ -64,13 +64,14 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
// Store the results of a GetHash response. In the case of empty results, we
// cache the prefixes until the next update so that we don't have to issue
// further GetHash requests we know will be empty.
- virtual void CacheHashResults(const std::vector<SBPrefix>& prefixes,
- const std::vector<SBFullHashResult>& full_hits);
+ virtual void CacheHashResults(
+ const std::vector<SBPrefix>& prefixes,
+ const std::vector<SBFullHashResult>& full_hits);
// Called when the user's machine has resumed from a lower power state.
virtual void HandleResume();
- virtual void UpdateFinished();
+ virtual void UpdateFinished(bool update_succeeded);
virtual bool NeedToCheckUrl(const GURL& url);
private:
@@ -102,10 +103,6 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
// Checks if a chunk is in the database.
bool ChunkExists(int list_id, ChunkType type, int chunk_id);
- // Note the existence of a chunk in the database. This is used as a faster
- // cache of all of the chunks we have.
- void InsertChunk(int list_id, ChunkType type, int chunk_id);
-
// Return a comma separated list of chunk ids that are in the database for
// the given list and chunk type.
void GetChunkIds(int list_id, ChunkType type, std::string* list);
@@ -164,6 +161,7 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
// Clears the did_resume_ flag. This is called by HandleResume after a delay
// to handle the case where we weren't in the middle of any work.
void OnResumeDone();
+
// If the did_resume_ flag is set, sleep for a period and then clear the
// flag. This method should be called periodically inside of busy disk loops.
void WaitAfterResume();
@@ -173,8 +171,18 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
void AddSub(int chunk, SBPrefix host, SBEntry* entry);
void AddSubPrefix(SBPrefix prefix, int encoded_chunk, int encoded_add_chunk);
void ProcessPendingSubs();
- void CreateChunkCaches();
int GetAddPrefixCount();
+ void AddFullPrefix(SBPrefix prefix,
+ int encoded_chunk,
+ SBFullHash full_prefix);
+ void SubFullPrefix(SBPrefix prefix,
+ int encoded_chunk,
+ int encoded_add_chunk,
+ SBFullHash full_prefix);
+
+ // Reads and writes chunk numbers to and from persistent store.
+ void ReadChunkNumbers();
+ void WriteChunkNumbers();
// Encode the list id in the lower bit of the chunk.
static inline int EncodeChunkId(int chunk, int list_id) {
@@ -240,7 +248,7 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase {
// Cache of prefixes that returned empty results (no full hash match).
std::set<SBPrefix> prefix_miss_cache_;
- // a cache of all of the existing add and sub chunks
+ // Caches for all of the existing add and sub chunks.
std::set<int> add_chunk_cache_;
std::set<int> sub_chunk_cache_;
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_impl.cc b/chrome/browser/safe_browsing/safe_browsing_database_impl.cc
index 02c2d04..5d1c758 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_impl.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database_impl.cc
@@ -101,8 +101,8 @@ bool SafeBrowsingDatabaseImpl::Init(const std::wstring& filename,
if (load_filter) {
LoadBloomFilter();
} else {
- bloom_filter_.reset(
- new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio));
+ bloom_filter_ =
+ new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio);
}
init_ = true;
@@ -225,8 +225,8 @@ bool SafeBrowsingDatabaseImpl::ResetDatabase() {
return false;
}
- bloom_filter_.reset(
- new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio));
+ bloom_filter_ =
+ new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio);
file_util::Delete(bloom_filter_filename_, false);
if (!Open())
@@ -1044,7 +1044,7 @@ void SafeBrowsingDatabaseImpl::OnDoneReadingHostKeys() {
for (size_t i = 0; i < bloom_filter_temp_hostkeys_.size(); ++i)
filter->Insert(bloom_filter_temp_hostkeys_[i]);
- bloom_filter_.reset(filter);
+ bloom_filter_ = filter;
TimeDelta bloom_gen = Time::Now() - before;
TimeDelta delta = Time::Now() - bloom_filter_rebuild_time_;
diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
index 803b0ef..3fd2bb3 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc
@@ -114,6 +114,7 @@ TEST(SafeBrowsingDatabase, Database) {
host.host = Sha256Prefix("www.good.com/");
host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2);
+ host.entry->set_chunk_id(2);
host.entry->SetPrefixAt(0, Sha256Prefix("www.good.com/good1.html"));
host.entry->SetPrefixAt(1, Sha256Prefix("www.good.com/good2.html"));
@@ -127,6 +128,7 @@ TEST(SafeBrowsingDatabase, Database) {
// and a chunk with an IP-based host
host.host = Sha256Prefix("192.168.0.1/");
host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1);
+ host.entry->set_chunk_id(3);
host.entry->SetPrefixAt(0, Sha256Prefix("192.168.0.1/malware.html"));
chunk.chunk_number = 3;
@@ -136,8 +138,7 @@ TEST(SafeBrowsingDatabase, Database) {
chunks = new std::deque<SBChunk>;
chunks->push_back(chunk);
database->InsertChunks("goog-malware", chunks);
- database->UpdateFinished();
-
+ database->UpdateFinished(true);
// Make sure they were added correctly.
std::vector<SBListChunkRanges> lists;
@@ -192,7 +193,7 @@ TEST(SafeBrowsingDatabase, Database) {
// Test removing a single prefix from the add chunk.
host.host = Sha256Prefix("www.evil.com/");
- host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2);
+ host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1);
host.entry->set_chunk_id(2);
host.entry->SetChunkIdAtPrefix(0, 2);
host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html"));
@@ -206,7 +207,7 @@ TEST(SafeBrowsingDatabase, Database) {
chunks->push_back(chunk);
database->InsertChunks("goog-malware", chunks);
- database->UpdateFinished();
+ database->UpdateFinished(true);
EXPECT_TRUE(database->ContainsUrl(GURL("http://www.evil.com/phishing.html"),
&matching_list, &prefix_hits,
@@ -238,7 +239,7 @@ TEST(SafeBrowsingDatabase, Database) {
// Test removing all the prefixes from an add chunk.
AddDelChunk(database, "goog-malware", 2);
- database->UpdateFinished();
+ database->UpdateFinished(true);
EXPECT_FALSE(database->ContainsUrl(GURL("http://www.evil.com/notevil2.html"),
&matching_list, &prefix_hits,
@@ -281,7 +282,7 @@ TEST(SafeBrowsingDatabase, Database) {
// Test the subdel command.
SubDelChunk(database, "goog-malware", 4);
- database->UpdateFinished();
+ database->UpdateFinished(true);
database->GetListsInfo(&lists);
EXPECT_EQ(lists.size(), 1U);
@@ -306,7 +307,7 @@ TEST(SafeBrowsingDatabase, Database) {
chunks = new std::deque<SBChunk>;
chunks->push_back(chunk);
database->InsertChunks("goog-malware", chunks);
- database->UpdateFinished();
+ database->UpdateFinished(true);
EXPECT_FALSE(database->ContainsUrl(
GURL("http://www.notevilanymore.com/index.html"),
@@ -326,7 +327,7 @@ TEST(SafeBrowsingDatabase, Database) {
chunks = new std::deque<SBChunk>;
chunks->push_back(chunk);
database->InsertChunks("goog-malware", chunks);
- database->UpdateFinished();
+ database->UpdateFinished(true);
EXPECT_FALSE(database->ContainsUrl(
GURL("http://www.notevilanymore.com/index.html"),
@@ -371,7 +372,7 @@ TEST(SafeBrowsingDatabase, ZeroSizeChunk) {
chunks->push_back(chunk);
database->InsertChunks("goog-malware", chunks);
- database->UpdateFinished();
+ database->UpdateFinished(true);
// Add an empty ADD and SUB chunk.
std::vector<SBListChunkRanges> list_chunks_empty;
@@ -389,7 +390,7 @@ TEST(SafeBrowsingDatabase, ZeroSizeChunk) {
empty_chunk.is_add = false;
chunks->push_back(empty_chunk);
database->InsertChunks("goog-malware", chunks);
- database->UpdateFinished();
+ database->UpdateFinished(true);
list_chunks_empty.clear();
database->GetListsInfo(&list_chunks_empty);
@@ -425,7 +426,7 @@ TEST(SafeBrowsingDatabase, ZeroSizeChunk) {
chunks->push_back(empty_chunk);
database->InsertChunks("goog-malware", chunks);
- database->UpdateFinished();
+ database->UpdateFinished(true);
const Time now = Time::Now();
std::vector<SBFullHashResult> full_hashes;
@@ -445,14 +446,14 @@ TEST(SafeBrowsingDatabase, ZeroSizeChunk) {
// Handle AddDel and SubDel commands for empty chunks.
AddDelChunk(database, "goog-malware", 21);
- database->UpdateFinished();
+ database->UpdateFinished(true);
list_chunks_empty.clear();
database->GetListsInfo(&list_chunks_empty);
EXPECT_EQ(list_chunks_empty[0].adds, "1,10,19-20,22");
EXPECT_EQ(list_chunks_empty[0].subs, "7");
SubDelChunk(database, "goog-malware", 7);
- database->UpdateFinished();
+ database->UpdateFinished(true);
list_chunks_empty.clear();
database->GetListsInfo(&list_chunks_empty);
EXPECT_EQ(list_chunks_empty[0].adds, "1,10,19-20,22");
@@ -520,7 +521,7 @@ void PeformUpdate(const std::wstring& initial_db,
for (size_t i = 0; i < chunks.size(); ++i)
database->InsertChunks(chunks[i].listname, chunks[i].chunks);
- database->UpdateFinished();
+ database->UpdateFinished(true);
CHECK(metric->GetIOCounters(&after));
@@ -639,7 +640,7 @@ const wchar_t* GetOldUpdatesPath() {
// Counts the IO needed for the initial update of a database.
// test\data\safe_browsing\download_update.py was used to fetch the add/sub
// chunks that are read, in order to get repeatable runs.
-TEST(SafeBrowsingDatabase, DISABLED_DatabaseInitialIO) {
+TEST(SafeBrowsingDatabase, DatabaseInitialIO) {
UpdateDatabase(L"", L"", L"initial");
}
diff --git a/chrome/browser/safe_browsing/safe_browsing_service.cc b/chrome/browser/safe_browsing/safe_browsing_service.cc
index 4f9a157..9f6419c 100644
--- a/chrome/browser/safe_browsing/safe_browsing_service.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_service.cc
@@ -5,6 +5,7 @@
#include "chrome/browser/safe_browsing/safe_browsing_service.h"
+#include "base/command_line.h"
#include "base/histogram.h"
#include "base/logging.h"
#include "base/message_loop.h"
@@ -17,6 +18,7 @@
#include "chrome/browser/safe_browsing/safe_browsing_database.h"
#include "chrome/common/chrome_constants.h"
#include "chrome/common/chrome_paths.h"
+#include "chrome/common/chrome_switches.h"
#include "chrome/common/pref_names.h"
#include "chrome/common/pref_service.h"
#include "net/base/registry_controlled_domain.h"
@@ -26,7 +28,9 @@ SafeBrowsingService::SafeBrowsingService()
database_(NULL),
protocol_manager_(NULL),
enabled_(false),
- resetting_(false) {
+ resetting_(false),
+ database_loaded_(false) {
+ new_safe_browsing_ = CommandLine().HasSwitch(switches::kUseNewSafeBrowsing);
}
SafeBrowsingService::~SafeBrowsingService() {
@@ -86,7 +90,12 @@ void SafeBrowsingService::OnIOInitialize(MessageLoop* notify_loop,
notify_loop,
client_key,
wrapped_key);
- protocol_manager_->Initialize();
+ // We want to initialize the protocol manager only after the database has
+ // loaded, which we'll receive asynchronously (DatabaseLoadComplete). If
+ // database_loaded_ isn't true, we'll wait for that notification to do the
+ // init.
+ if (database_loaded_)
+ protocol_manager_->Initialize();
}
void SafeBrowsingService::OnDBInitialize() {
@@ -113,8 +122,15 @@ void SafeBrowsingService::OnIOShutdown() {
database_ = NULL;
- // Delete checks once the database thread is done, calling back any clients
- // with 'URL_SAFE'.
+ // Delete queued and pending checks once the database thread is done, calling
+ // back any clients with 'URL_SAFE'.
+ while (!queued_checks_.empty()) {
+ QueuedCheck check = queued_checks_.front();
+ if (check.client)
+ check.client->OnUrlCheckResult(check.url, URL_SAFE);
+ queued_checks_.pop_front();
+ }
+
for (CurrentChecks::iterator it = checks_.begin();
it != checks_.end(); ++it) {
if ((*it)->client)
@@ -140,10 +156,12 @@ bool SafeBrowsingService::CanCheckUrl(const GURL& url) const {
bool SafeBrowsingService::CheckUrl(const GURL& url, Client* client) {
DCHECK(MessageLoop::current() == io_loop_);
-
if (!enabled_ || !database_)
return true;
+ if (new_safe_browsing_)
+ return CheckUrlNew(url, client);
+
if (!resetting_) {
Time start_time = Time::Now();
bool need_check = database_->NeedToCheckUrl(url);
@@ -161,9 +179,47 @@ bool SafeBrowsingService::CheckUrl(const GURL& url, Client* client) {
check->start = Time::Now();
checks_.insert(check);
+ // Old school SafeBrowsing does an asynchronous database check.
db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
this, &SafeBrowsingService::CheckDatabase,
check, protocol_manager_->last_update()));
+
+ return false;
+}
+
+bool SafeBrowsingService::CheckUrlNew(const GURL& url, Client* client) {
+ if (resetting_ || !database_loaded_) {
+ QueuedCheck check;
+ check.client = client;
+ check.url = url;
+ queued_checks_.push_back(check);
+ return false;
+ }
+
+ std::string list;
+ std::vector<SBPrefix> prefix_hits;
+ std::vector<SBFullHashResult> full_hits;
+ bool prefix_match = database_->ContainsUrl(url, &list, &prefix_hits,
+ &full_hits,
+ protocol_manager_->last_update());
+ if (!prefix_match)
+ return true; // URL is okay.
+
+ // Needs to be asynchronous, since we could be in the constructor of a
+ // ResourceDispatcherHost event handler which can't pause there.
+ SafeBrowsingCheck* check = new SafeBrowsingCheck();
+ check->url = url;
+ check->client = client;
+ check->result = URL_SAFE;
+ check->start = Time::Now();
+ check->need_get_hash = full_hits.empty();
+ check->prefix_hits.swap(prefix_hits);
+ check->full_hits.swap(full_hits);
+ checks_.insert(check);
+
+ io_loop_->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::OnCheckDone, check));
+
return false;
}
@@ -218,6 +274,16 @@ void SafeBrowsingService::CancelCheck(Client* client) {
if ((*i)->client == client)
(*i)->client = NULL;
}
+
+ // Scan the queued clients store. Clients may be here if they requested a URL
+ // check before the database has finished loading or resetting.
+ if (!database_loaded_ || resetting_) {
+ std::deque<QueuedCheck>::iterator it = queued_checks_.begin();
+ for (; it != queued_checks_.end(); ++it) {
+ if (it->client == client)
+ it->client = NULL;
+ }
+ }
}
void SafeBrowsingService::CheckDatabase(SafeBrowsingCheck* info,
@@ -247,16 +313,16 @@ void SafeBrowsingService::CheckDatabase(SafeBrowsingCheck* info,
this, &SafeBrowsingService::OnCheckDone, info));
}
-void SafeBrowsingService::OnCheckDone(SafeBrowsingCheck* info) {
+void SafeBrowsingService::OnCheckDone(SafeBrowsingCheck* check) {
DCHECK(MessageLoop::current() == io_loop_);
// If we've been shutdown during the database lookup, this check will already
// have been deleted (in OnIOShutdown).
- if (!enabled_ || checks_.find(info) == checks_.end())
+ if (!enabled_ || checks_.find(check) == checks_.end())
return;
- UMA_HISTOGRAM_TIMES(L"SB.Database", Time::Now() - info->start);
- if (info->client && info->need_get_hash) {
+ UMA_HISTOGRAM_TIMES(L"SB.Database", Time::Now() - check->start);
+ if (check->client && check->need_get_hash) {
// We have a partial match so we need to query Google for the full hash.
// Clean up will happen in HandleGetHashResults.
@@ -265,28 +331,28 @@ void SafeBrowsingService::OnCheckDone(SafeBrowsingCheck* info) {
// when the results arrive. We only do this for checks involving one prefix,
// since that is the common case (multiple prefixes will issue the request
// as normal).
- if (info->prefix_hits.size() == 1) {
- SBPrefix prefix = info->prefix_hits[0];
+ if (check->prefix_hits.size() == 1) {
+ SBPrefix prefix = check->prefix_hits[0];
GetHashRequests::iterator it = gethash_requests_.find(prefix);
if (it != gethash_requests_.end()) {
// There's already a request in progress.
- it->second.push_back(info);
+ it->second.push_back(check);
return;
}
// No request in progress, so we're the first for this prefix.
GetHashRequestors requestors;
- requestors.push_back(info);
+ requestors.push_back(check);
gethash_requests_[prefix] = requestors;
}
// Reset the start time so that we can measure the network time without the
// database time.
- info->start = Time::Now();
- protocol_manager_->GetFullHash(info, info->prefix_hits);
+ check->start = Time::Now();
+ protocol_manager_->GetFullHash(check, check->prefix_hits);
} else {
// We may have cached results for previous GetHash queries.
- HandleOneCheck(info, info->full_hits);
+ HandleOneCheck(check, check->full_hits);
}
}
@@ -304,10 +370,14 @@ SafeBrowsingDatabase* SafeBrowsingService::GetDatabase() {
Time before = Time::Now();
SafeBrowsingDatabase* database = SafeBrowsingDatabase::Create();
- Callback0::Type* callback =
+ Callback0::Type* chunk_callback =
NewCallback(this, &SafeBrowsingService::ChunkInserted);
- result = database->Init(path, callback);
- if (!result) {
+ bool init_success = database->Init(path, chunk_callback);
+
+ io_loop_->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::DatabaseLoadComplete, !init_success));
+
+ if (!init_success) {
NOTREACHED();
return NULL;
}
@@ -337,9 +407,14 @@ void SafeBrowsingService::HandleGetHashResults(
UMA_HISTOGRAM_LONG_TIMES(L"SB.Network", Time::Now() - check->start);
OnHandleGetHashResults(check, full_hashes); // 'check' is deleted here.
- if (can_cache)
- db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
- this, &SafeBrowsingService::CacheHashResults, prefixes, full_hashes));
+ if (can_cache) {
+ if (!new_safe_browsing_) {
+ db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
+ this, &SafeBrowsingService::CacheHashResults, prefixes, full_hashes));
+ } else if (database_) {
+ database_->CacheHashResults(prefixes, full_hashes);
+ }
+ }
}
void SafeBrowsingService::OnHandleGetHashResults(
@@ -386,17 +461,17 @@ void SafeBrowsingService::GetAllChunks() {
this, &SafeBrowsingService::GetAllChunksFromDatabase));
}
-void SafeBrowsingService::UpdateFinished() {
+void SafeBrowsingService::UpdateFinished(bool update_succeeded) {
DCHECK(MessageLoop::current() == io_loop_);
DCHECK(enabled_);
db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod(
- this, &SafeBrowsingService::DatabaseUpdateFinished));
+ this, &SafeBrowsingService::DatabaseUpdateFinished, update_succeeded));
}
-void SafeBrowsingService::DatabaseUpdateFinished() {
+void SafeBrowsingService::DatabaseUpdateFinished(bool update_succeeded) {
DCHECK(MessageLoop::current() == db_thread_->message_loop());
if (GetDatabase())
- GetDatabase()->UpdateFinished();
+ GetDatabase()->UpdateFinished(update_succeeded);
}
void SafeBrowsingService::OnBlockingPageDone(const BlockingPageParam& param) {
@@ -439,6 +514,20 @@ void SafeBrowsingService::OnChunkInserted() {
protocol_manager_->OnChunkInserted();
}
+void SafeBrowsingService::DatabaseLoadComplete(bool database_error) {
+ DCHECK(MessageLoop::current() == io_loop_);
+
+ database_loaded_ = true;
+
+ // TODO(paulg): More robust database initialization error handling.
+ if (protocol_manager_ && !database_error)
+ protocol_manager_->Initialize();
+
+ // If we have any queued requests, we can now check them.
+ if (!resetting_)
+ RunQueuedClients();
+}
+
// static
void SafeBrowsingService::RegisterUserPrefs(PrefService* prefs) {
prefs->RegisterStringPref(prefs::kSafeBrowsingClientKey, L"");
@@ -462,6 +551,8 @@ void SafeBrowsingService::OnResetDatabase() {
void SafeBrowsingService::OnResetComplete() {
DCHECK(MessageLoop::current() == io_loop_);
resetting_ = false;
+ database_loaded_ = true;
+ RunQueuedClients();
}
void SafeBrowsingService::HandleChunk(const std::string& list,
@@ -566,3 +657,15 @@ void SafeBrowsingService::HandleResume() {
DCHECK(MessageLoop::current() == db_thread_->message_loop());
GetDatabase()->HandleResume();
}
+
+void SafeBrowsingService::RunQueuedClients() {
+ DCHECK(MessageLoop::current() == io_loop_);
+ HISTOGRAM_COUNTS(L"SB.QueueDepth", queued_checks_.size());
+ while (!queued_checks_.empty()) {
+ QueuedCheck check = queued_checks_.front();
+ HISTOGRAM_TIMES(L"SB.QueueDelay", Time::Now() - check.start);
+ CheckUrl(check.url, check.client);
+ queued_checks_.pop_front();
+ }
+}
+
diff --git a/chrome/browser/safe_browsing/safe_browsing_service.h b/chrome/browser/safe_browsing/safe_browsing_service.h
index aec465d..dd2c818 100644
--- a/chrome/browser/safe_browsing/safe_browsing_service.h
+++ b/chrome/browser/safe_browsing/safe_browsing_service.h
@@ -22,6 +22,7 @@
#include "googleurl/src/gurl.h"
#include "webkit/glue/resource_type.h"
+class BloomFilter;
class MessageLoop;
class PrefService;
class SafeBrowsingBlockingPage;
@@ -98,6 +99,9 @@ class SafeBrowsingService
// and "client" is called asynchronously with the result when it is ready.
bool CheckUrl(const GURL& url, Client* client);
+ // For the new SafeBrowsingDatabase, which runs the check synchronously.
+ bool CheckUrlNew(const GURL& url, Client* client);
+
// Cancels a pending check if the result is no longer needed.
void CancelCheck(Client* client);
@@ -111,9 +115,6 @@ class SafeBrowsingService
int render_view_id);
// Bundle of SafeBrowsing state for one URL check.
- // TODO(paulg): Make this struct private to SafeBrowsingService and maintain
- // request mappings using CancelableRequests instead (which can
- // store this state for us).
struct SafeBrowsingCheck {
GURL url;
Client* client;
@@ -134,7 +135,7 @@ class SafeBrowsingService
void HandleChunk(const std::string& list, std::deque<SBChunk>* chunks);
void HandleChunkDelete(std::vector<SBChunkDelete>* chunk_deletes);
void GetAllChunks();
- void UpdateFinished();
+ void UpdateFinished(bool update_succeeded);
// The blocking page on the UI thread has completed.
void OnBlockingPageDone(const BlockingPageParam& param);
@@ -151,6 +152,9 @@ class SafeBrowsingService
// complete.
void ChunkInserted();
+ // Notification from the database when it's done loading its bloom filter.
+ void DatabaseLoadComplete(bool database_error);
+
// Preference handling.
static void RegisterUserPrefs(PrefService* prefs);
@@ -199,7 +203,7 @@ class SafeBrowsingService
void NotifyClientBlockingComplete(Client* client, bool proceed);
- void DatabaseUpdateFinished();
+ void DatabaseUpdateFinished(bool update_succeeded);
void Start();
void Stop();
@@ -229,6 +233,13 @@ class SafeBrowsingService
// Invoked on the UI thread to show the blocking page.
void DoDisplayBlockingPage(const BlockingPageParam& param);
+ // During a reset or the initial load we may have to queue checks until the
+ // database is ready. This method is run once the database has loaded (or if
+ // we shut down SafeBrowsing before the database has finished loading).
+ void RunQueuedClients();
+
+ void OnUpdateComplete(bool update_succeeded);
+
MessageLoop* io_loop_;
typedef std::set<SafeBrowsingCheck*> CurrentChecks;
@@ -266,7 +277,21 @@ class SafeBrowsingService
// Indicates if we are in the process of resetting the database.
bool resetting_;
- DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingService);
+ // Indicates if we are using the new bloom filter based database.
+ bool new_safe_browsing_;
+
+ // Indicates if the database has finished initialization.
+ bool database_loaded_;
+
+ // Clients that we've queued up for checking later once the database is ready.
+ typedef struct {
+ Client* client;
+ GURL url;
+ Time start;
+ } QueuedCheck;
+ std::deque<QueuedCheck> queued_checks_;
+
+ DISALLOW_COPY_AND_ASSIGN(SafeBrowsingService);
};
#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H_