diff options
author | paulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-10-24 23:02:00 +0000 |
---|---|---|
committer | paulg@google.com <paulg@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-10-24 23:02:00 +0000 |
commit | 613a03b2415686990248a3058575a4504e2f0ec3 (patch) | |
tree | 01dd1013f78c022b44920a24a94a2326fcef0dc8 /chrome | |
parent | edfa71e63db4db57ee6fafb237e4331cb631824b (diff) | |
download | chromium_src-613a03b2415686990248a3058575a4504e2f0ec3.zip chromium_src-613a03b2415686990248a3058575a4504e2f0ec3.tar.gz chromium_src-613a03b2415686990248a3058575a4504e2f0ec3.tar.bz2 |
Changes to allow running the new SafeBrowsing storage system,contained in SafeBrowsingDatabaseBloom, via a command line flag(--new-safe-browsing).
Review URL: http://codereview.chromium.org/6807
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@3959 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
13 files changed, 606 insertions, 162 deletions
diff --git a/chrome/browser/safe_browsing/bloom_filter.h b/chrome/browser/safe_browsing/bloom_filter.h index 6b94745..1ec0624 100644 --- a/chrome/browser/safe_browsing/bloom_filter.h +++ b/chrome/browser/safe_browsing/bloom_filter.h @@ -8,10 +8,11 @@ #ifndef CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ #define CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ +#include "base/ref_counted.h" #include "base/scoped_ptr.h" #include "base/basictypes.h" -class BloomFilter { +class BloomFilter : public base::RefCountedThreadSafe<BloomFilter> { public: // Constructs an empty filter with the given size. BloomFilter(int bit_size); diff --git a/chrome/browser/safe_browsing/bloom_filter_unittest.cc b/chrome/browser/safe_browsing/bloom_filter_unittest.cc index edfefa9..b28269a 100644 --- a/chrome/browser/safe_browsing/bloom_filter_unittest.cc +++ b/chrome/browser/safe_browsing/bloom_filter_unittest.cc @@ -11,6 +11,7 @@ #include "base/logging.h" #include "base/rand_util.h" +#include "base/ref_counted.h" #include "base/string_util.h" #include "testing/gtest/include/gtest/gtest.h" @@ -27,24 +28,25 @@ TEST(SafeBrowsing, BloomFilter) { uint32 count = 1000; // Build up the bloom filter. - BloomFilter filter(count * 10); + scoped_refptr<BloomFilter> filter = new BloomFilter(count * 10); typedef std::set<int> Values; Values values; for (uint32 i = 0; i < count; ++i) { uint32 value = GenHash(); values.insert(value); - filter.Insert(value); + filter->Insert(value); } // Check serialization works. - char* data_copy = new char[filter.size()]; - memcpy(data_copy, filter.data(), filter.size()); - BloomFilter filter_copy(data_copy, filter.size()); + char* data_copy = new char[filter->size()]; + memcpy(data_copy, filter->data(), filter->size()); + scoped_refptr<BloomFilter> filter_copy = + new BloomFilter(data_copy, filter->size()); // Check no false negatives by ensuring that every time we inserted exists. for (Values::iterator i = values.begin(); i != values.end(); ++i) { - EXPECT_TRUE(filter_copy.Exists(*i)); + EXPECT_TRUE(filter_copy->Exists(*i)); } // Check false positive error rate by checking the same number of items that @@ -57,7 +59,7 @@ TEST(SafeBrowsing, BloomFilter) { if (values.find(value) != values.end()) continue; - if (filter_copy.Exists(value)) + if (filter_copy->Exists(value)) found_count++; checked ++; diff --git a/chrome/browser/safe_browsing/protocol_manager.cc b/chrome/browser/safe_browsing/protocol_manager.cc index eeb1889..f531538 100644 --- a/chrome/browser/safe_browsing/protocol_manager.cc +++ b/chrome/browser/safe_browsing/protocol_manager.cc @@ -265,6 +265,7 @@ bool SafeBrowsingProtocolManager::HandleServiceResponse(const GURL& url, &next_update_sec, &re_key, &reset, chunk_deletes, &chunk_urls)) { delete chunk_deletes; + sb_service_->UpdateFinished(false); return false; } @@ -512,7 +513,7 @@ void SafeBrowsingProtocolManager::OnChunkInserted() { if (chunk_request_urls_.empty()) { UMA_HISTOGRAM_LONG_TIMES(L"SB.Update", Time::Now() - last_update_); - sb_service_->UpdateFinished(); + sb_service_->UpdateFinished(true); } else { IssueChunkRequest(); } diff --git a/chrome/browser/safe_browsing/protocol_manager.h b/chrome/browser/safe_browsing/protocol_manager.h index 723a1ed..4cb75c0 100644 --- a/chrome/browser/safe_browsing/protocol_manager.h +++ b/chrome/browser/safe_browsing/protocol_manager.h @@ -204,7 +204,7 @@ class SafeBrowsingProtocolManager : public URLFetcher::Delegate { // Current product version sent in each request. std::string version_; - DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingProtocolManager); + DISALLOW_COPY_AND_ASSIGN(SafeBrowsingProtocolManager); }; #endif // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_ diff --git a/chrome/browser/safe_browsing/protocol_parser.h b/chrome/browser/safe_browsing/protocol_parser.h index fe87549..4d77e50 100644 --- a/chrome/browser/safe_browsing/protocol_parser.h +++ b/chrome/browser/safe_browsing/protocol_parser.h @@ -121,7 +121,7 @@ class SafeBrowsingProtocolParser { // The name of the current list std::string list_name_; - DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingProtocolParser); + DISALLOW_COPY_AND_ASSIGN(SafeBrowsingProtocolParser); }; diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index d09a85f..2f43f6b 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -24,7 +24,10 @@ SafeBrowsingDatabase* SafeBrowsingDatabase::Create() { } bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) { - if (!bloom_filter_.get()) + // Keep a reference to the current bloom filter in case the database rebuilds + // it while we're accessing it. + scoped_refptr<BloomFilter> filter = bloom_filter_; + if (!filter.get()) return true; IncrementBloomFilterReadCount(); @@ -37,16 +40,16 @@ bool SafeBrowsingDatabase::NeedToCheckUrl(const GURL& url) { SBPrefix host_key; if (url.HostIsIPAddress()) { base::SHA256HashString(url.host() + "/", &host_key, sizeof(SBPrefix)); - if (bloom_filter_->Exists(host_key)) + if (filter->Exists(host_key)) return true; } else { base::SHA256HashString(hosts[0] + "/", &host_key, sizeof(SBPrefix)); - if (bloom_filter_->Exists(host_key)) + if (filter->Exists(host_key)) return true; if (hosts.size() > 1) { base::SHA256HashString(hosts[1] + "/", &host_key, sizeof(SBPrefix)); - if (bloom_filter_->Exists(host_key)) + if (filter->Exists(host_key)) return true; } } @@ -77,7 +80,7 @@ void SafeBrowsingDatabase::LoadBloomFilter() { SB_DLOG(INFO) << "SafeBrowsingDatabase read bloom filter in " << (Time::Now() - before).InMilliseconds() << " ms"; - bloom_filter_.reset(new BloomFilter(data, size)); + bloom_filter_ = new BloomFilter(data, size); } void SafeBrowsingDatabase::DeleteBloomFilter() { diff --git a/chrome/browser/safe_browsing/safe_browsing_database.h b/chrome/browser/safe_browsing/safe_browsing_database.h index fe6d586..94b9b76 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.h +++ b/chrome/browser/safe_browsing/safe_browsing_database.h @@ -9,6 +9,7 @@ #include <string> #include <vector> +#include "base/ref_counted.h" #include "base/scoped_ptr.h" #include "base/task.h" #include "base/time.h" @@ -79,7 +80,7 @@ class SafeBrowsingDatabase { // Called when the user's machine has resumed from a lower power state. virtual void HandleResume() = 0; - virtual void UpdateFinished() { } + virtual void UpdateFinished(bool update_succeeded) { } protected: static std::wstring BloomFilterFilename(const std::wstring& db_filename); @@ -100,7 +101,7 @@ class SafeBrowsingDatabase { virtual void IncrementBloomFilterReadCount() {}; std::wstring bloom_filter_filename_; - scoped_ptr<BloomFilter> bloom_filter_; + scoped_refptr<BloomFilter> bloom_filter_; }; #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc index 01d0b99..9b15082 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.cc @@ -53,7 +53,7 @@ SafeBrowsingDatabaseBloom::~SafeBrowsingDatabaseBloom() { } bool SafeBrowsingDatabaseBloom::Init(const std::wstring& filename, - Callback0::Type* chunk_inserted_callback) { + Callback0::Type* chunk_inserted_callback) { DCHECK(!init_ && filename_.empty()); filename_ = filename; @@ -74,19 +74,19 @@ bool SafeBrowsingDatabaseBloom::Init(const std::wstring& filename, load_filter = true; } - CreateChunkCaches(); - + add_count_ = GetAddPrefixCount(); bloom_filter_filename_ = BloomFilterFilename(filename_); if (load_filter) { LoadBloomFilter(); } else { - bloom_filter_.reset( - new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio)); + bloom_filter_ = + new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio); } init_ = true; chunk_inserted_callback_ = chunk_inserted_callback; + return true; } @@ -133,6 +133,7 @@ bool SafeBrowsingDatabaseBloom::CreateTables() { SQLTransaction transaction(db_); transaction.Begin(); + // Store 32 bit add prefixes here. if (sqlite3_exec(db_, "CREATE TABLE add_prefix (" "chunk INTEGER," "prefix INTEGER)", @@ -140,6 +141,7 @@ bool SafeBrowsingDatabaseBloom::CreateTables() { return false; } + // Store 32 sub prefixes here. if (sqlite3_exec(db_, "CREATE TABLE sub_prefix (" "chunk INTEGER," "add_chunk INTEGER," @@ -148,17 +150,51 @@ bool SafeBrowsingDatabaseBloom::CreateTables() { return false; } - if (sqlite3_exec(db_, "CREATE TABLE full_prefix (" - "chunk INTEGER," - "prefix INTEGER," - "full_prefix BLOB)", - NULL, NULL, NULL) != SQLITE_OK) { - return false; - } - sqlite3_exec(db_, "CREATE INDEX full_prefix_chunk ON full_prefix(chunk)", - NULL, NULL, NULL); - sqlite3_exec(db_, "CREATE INDEX full_prefix_prefix ON full_prefix(prefix)", - NULL, NULL, NULL); + // TODO(paulg): Store 256 bit add full prefixes and GetHash results here. + // 'receive_time' is used for testing the staleness of GetHash results. + // if (sqlite3_exec(db_, "CREATE TABLE full_add_prefix (" + // "chunk INTEGER," + // "prefix INTEGER," + // "receive_time INTEGER," + // "full_prefix BLOB)", + // NULL, NULL, NULL) != SQLITE_OK) { + // return false; + // } + + // TODO(paulg): These tables are going to contain very few entries, so we + // need to measure if it's worth keeping an index on them. + // sqlite3_exec(db_, + // "CREATE INDEX full_add_prefix_chunk ON full_prefix(chunk)", + // NULL, NULL, NULL); + // sqlite3_exec(db_, + // "CREATE INDEX full_add_prefix_prefix ON full_prefix(prefix)", + // NULL, NULL, NULL); + + // TODO(paulg): Store 256 bit sub full prefixes here. + // if (sqlite3_exec(db_, "CREATE TABLE full_sub_prefix (" + // "chunk INTEGER," + // "add_chunk INTEGER," + // "prefix INTEGER," + // "receive_time INTEGER," + // "full_prefix BLOB)", + // NULL, NULL, NULL) != SQLITE_OK) { + // return false; + // } + + // TODO(paulg): These tables are going to contain very few entries, so we + // need to measure if it's worth keeping an index on them. + // sqlite3_exec( + // db_, + // "CREATE INDEX full_sub_prefix_chunk ON full_sub_prefix(chunk)", + // NULL, NULL, NULL); + // sqlite3_exec( + // db_, + // "CREATE INDEX full_sub_prefix_add_chunk ON full_sub_prefix(add_chunk)", + // NULL, NULL, NULL); + // sqlite3_exec( + // db_, + // "CREATE INDEX full_sub_prefix_prefix ON full_sub_prefix(prefix)", + // NULL, NULL, NULL); if (sqlite3_exec(db_, "CREATE TABLE list_names (" "id INTEGER PRIMARY KEY AUTOINCREMENT," @@ -167,13 +203,24 @@ bool SafeBrowsingDatabaseBloom::CreateTables() { return false; } - if (sqlite3_exec(db_, "CREATE TABLE add_chunk (" + // Store all the add and sub chunk numbers we receive. We cannot just rely on + // the prefix tables to generate these lists, since some chunks will have zero + // entries (and thus no prefixes), or potentially an add chunk can have all of + // its entries sub'd without receiving an AddDel, or a sub chunk might have + // been entirely consumed by adds. In these cases, we still have to report the + // chunk number but it will not have any prefixes in the prefix tables. + // + // TODO(paulg): Investigate storing the chunks as a string of ChunkRanges, one + // string for each of phish-add, phish-sub, malware-add, malware-sub. This + // might be better performance when the number of chunks is large, and is the + // natural format for the update request. + if (sqlite3_exec(db_, "CREATE TABLE add_chunks (" "chunk INTEGER PRIMARY KEY)", NULL, NULL, NULL) != SQLITE_OK) { return false; } - if (sqlite3_exec(db_, "CREATE TABLE sub_chunk (" + if (sqlite3_exec(db_, "CREATE TABLE sub_chunks (" "chunk INTEGER PRIMARY KEY)", NULL, NULL, NULL) != SQLITE_OK) { return false; @@ -196,7 +243,9 @@ bool SafeBrowsingDatabaseBloom::CreateTables() { return true; } -// The SafeBrowsing service assumes this operation is synchronous. +// The SafeBrowsing service assumes this operation is run synchronously on the +// database thread. Any URLs that the service needs to check when this is +// running are queued up and run once the reset is done. bool SafeBrowsingDatabaseBloom::ResetDatabase() { hash_cache_.clear(); add_chunk_cache_.clear(); @@ -211,8 +260,8 @@ bool SafeBrowsingDatabaseBloom::ResetDatabase() { return false; } - bloom_filter_.reset( - new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio)); + bloom_filter_ = + new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio); file_util::Delete(bloom_filter_filename_, false); if (!Open()) @@ -254,28 +303,52 @@ bool SafeBrowsingDatabaseBloom::ContainsUrl( } else { safe_browsing_util::GenerateHostsToCheck(url, &hosts); if (hosts.size() == 0) - return false; // things like about:blank + return false; // Things like about:blank } std::vector<std::string> paths; safe_browsing_util::GeneratePathsToCheck(url, &paths); - // TODO(erikkay): this may wind up being too many hashes on a complex page - // TODO(erikkay): not filling in matching_list - is that OK? - // TODO(erikkay): handle full_hits + // Grab a reference to the existing filter so that it isn't deleted on us if + // a update is just about to finish. + scoped_refptr<BloomFilter> filter = bloom_filter_; + if (!filter.get()) + return false; + + // TODO(erikkay): This may wind up being too many hashes on a complex page. + // TODO(erikkay): Not filling in matching_list - is that OK? for (size_t i = 0; i < hosts.size(); ++i) { for (size_t j = 0; j < paths.size(); ++j) { SBFullHash full_hash; - // TODO(erikkay): maybe we should only do the first 32 bits initially, + // TODO(erikkay): Maybe we should only do the first 32 bits initially, // and then fall back to the full hash if there's a hit. base::SHA256HashString(hosts[i] + paths[j], &full_hash, sizeof(SBFullHash)); SBPrefix prefix; memcpy(&prefix, &full_hash, sizeof(SBPrefix)); - if (bloom_filter_->Exists(prefix)) + if (filter->Exists(prefix)) prefix_hits->push_back(prefix); } } - return prefix_hits->size() > 0; + + if (!prefix_hits->empty()) { + // If all the prefixes are cached as 'misses', don't issue a GetHash. + bool all_misses = true; + for (std::vector<SBPrefix>::const_iterator it = prefix_hits->begin(); + it != prefix_hits->end(); ++it) { + if (prefix_miss_cache_.find(*it) == prefix_miss_cache_.end()) { + all_misses = false; + break; + } + } + if (all_misses) + return false; + + // See if we have the results of recent GetHashes for the prefix matches. + GetCachedFullHashes(prefix_hits, full_hits, last_update); + return true; + } + + return false; } bool SafeBrowsingDatabaseBloom::NeedToCheckUrl(const GURL& url) { @@ -324,8 +397,15 @@ void SafeBrowsingDatabaseBloom::InsertChunks(const std::string& list_name, ProcessPendingWork(); } -void SafeBrowsingDatabaseBloom::UpdateFinished() { - BuildBloomFilter(); +void SafeBrowsingDatabaseBloom::UpdateFinished(bool update_succeeded) { + if (update_succeeded) + BuildBloomFilter(); + + // We won't need the chunk caches until the next update (which will read them + // from the database), so free their memory as they may contain thousands of + // chunk numbers. + add_chunk_cache_.clear(); + sub_chunk_cache_.clear(); } void SafeBrowsingDatabaseBloom::ProcessChunks() { @@ -357,7 +437,6 @@ void SafeBrowsingDatabaseBloom::ProcessAddChunks(std::deque<SBChunk>* chunks) { // The server can give us a chunk that we already have because it's part of // a range. Don't add it again. if (!ChunkExists(list_id, ADD_CHUNK, chunk_id)) { - InsertChunk(list_id, ADD_CHUNK, chunk_id); while (!chunk.hosts.empty()) { WaitAfterResume(); // Read the existing record for this host, if it exists. @@ -380,7 +459,8 @@ void SafeBrowsingDatabaseBloom::ProcessAddChunks(std::deque<SBChunk>* chunks) { void SafeBrowsingDatabaseBloom::AddEntry(SBPrefix host, SBEntry* entry) { STATS_COUNTER(L"SB.HostInsert", 1); if (entry->type() == SBEntry::ADD_FULL_HASH) { - // TODO(erikkay) + // TODO(erikkay, paulg): Add the full 256 bit prefix to the full_prefix + // table AND insert its 32 bit prefix into the regular prefix table. return; } int encoded = EncodeChunkId(entry->chunk_id(), entry->list_id()); @@ -416,6 +496,32 @@ void SafeBrowsingDatabaseBloom::AddPrefix(SBPrefix prefix, int encoded_chunk) { add_count_++; } +void SafeBrowsingDatabaseBloom::AddFullPrefix(SBPrefix prefix, + int encoded_chunk, + SBFullHash full_prefix) { + STATS_COUNTER(L"SB.PrefixAddFull", 1); + std::string sql = "INSERT INTO full_add_prefix " + "(chunk, prefix, receive_time, full_prefix) " + "VALUES (?, ?, ?)"; + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + statement->bind_int(0, encoded_chunk); + statement->bind_int(1, prefix); + // TODO(paulg): Add receive_time and full_prefix. + // statement->bind_int64(2, receive_time); + // statement->bind_blob(3, full_prefix); + int rv = statement->step(); + statement->reset(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + void SafeBrowsingDatabaseBloom::AddSub( int chunk_id, SBPrefix host, SBEntry* entry) { STATS_COUNTER(L"SB.HostDelete", 1); @@ -460,6 +566,34 @@ void SafeBrowsingDatabaseBloom::AddSubPrefix(SBPrefix prefix, } } +void SafeBrowsingDatabaseBloom::SubFullPrefix(SBPrefix prefix, + int encoded_chunk, + int encoded_add_chunk, + SBFullHash full_prefix) { + STATS_COUNTER(L"SB.PrefixSubFull", 1); + std::string sql = "INSERT INTO full_sub_prefix " + "(chunk, add_chunk, prefix, receive_time, full_prefix) " + "VALUES (?,?,?,?)"; + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, sql.c_str()); + if (!statement.is_valid()) { + NOTREACHED(); + return; + } + statement->bind_int(0, encoded_chunk); + statement->bind_int(1, encoded_add_chunk); + statement->bind_int(2, prefix); + // TODO(paulg): Add receive_time and full_prefix. + // statement->bind_int64(3, receive_time); + // statement->bind_blob(4, full_prefix); + int rv = statement->step(); + statement->reset(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + } else { + DCHECK(rv == SQLITE_DONE); + } +} + // TODO(paulg): Look for a less expensive way to maintain add_count_. int SafeBrowsingDatabaseBloom::GetAddPrefixCount() { SQLITE_UNIQUE_STATEMENT(count, *statement_cache_, @@ -478,36 +612,6 @@ int SafeBrowsingDatabaseBloom::GetAddPrefixCount() { return add_count; } -// TODO(erikkay) - this is too slow -void SafeBrowsingDatabaseBloom::CreateChunkCaches() { - SQLITE_UNIQUE_STATEMENT(adds, *statement_cache_, - "SELECT distinct chunk FROM add_prefix"); - if (!adds.is_valid()) { - NOTREACHED(); - return; - } - int rv = adds->step(); - while (rv == SQLITE_ROW) { - int chunk = adds->column_int(0); - add_chunk_cache_.insert(chunk); - rv = adds->step(); - } - SQLITE_UNIQUE_STATEMENT(subs, *statement_cache_, - "SELECT distinct chunk FROM sub_prefix"); - if (!subs.is_valid()) { - NOTREACHED(); - return; - } - rv = subs->step(); - while (rv == SQLITE_ROW) { - int chunk = subs->column_int(0); - sub_chunk_cache_.insert(chunk); - rv = subs->step(); - } - - add_count_ = GetAddPrefixCount(); -} - void SafeBrowsingDatabaseBloom::ProcessSubChunks(std::deque<SBChunk>* chunks) { while (!chunks->empty()) { SBChunk& chunk = chunks->front(); @@ -515,7 +619,6 @@ void SafeBrowsingDatabaseBloom::ProcessSubChunks(std::deque<SBChunk>* chunks) { int chunk_id = chunk.chunk_number; if (!ChunkExists(list_id, SUB_CHUNK, chunk_id)) { - InsertChunk(list_id, SUB_CHUNK, chunk_id); while (!chunk.hosts.empty()) { WaitAfterResume(); @@ -567,6 +670,8 @@ void SafeBrowsingDatabaseBloom::AddDel(const std::string& list_name, AddDel(list_id, add_chunk_id); } +// TODO(paulg): Change this from a database scan to an in-memory operation done +// while building the bloom filter. void SafeBrowsingDatabaseBloom::AddDel(int list_id, int add_chunk_id) { STATS_COUNTER(L"SB.ChunkDelete", 1); @@ -595,6 +700,8 @@ void SafeBrowsingDatabaseBloom::SubDel(const std::string& list_name, SubDel(list_id, sub_chunk_id); } +// TODO(paulg): Change this from a database scan to an in-memory operation done +// while building the bloom filter. void SafeBrowsingDatabaseBloom::SubDel(int list_id, int sub_chunk_id) { STATS_COUNTER(L"SB.ChunkDelete", 1); @@ -643,12 +750,6 @@ bool SafeBrowsingDatabaseBloom::ChunkExists(int list_id, return ret; } -void SafeBrowsingDatabaseBloom::InsertChunk(int list_id, - ChunkType type, - int chunk_id) { - // TODO(erikkay): insert into the correct chunk table -} - // Return a comma separated list of chunk ids that are in the database for // the given list and chunk type. void SafeBrowsingDatabaseBloom::GetChunkIds( @@ -677,6 +778,10 @@ void SafeBrowsingDatabaseBloom::GetChunkIds( void SafeBrowsingDatabaseBloom::GetListsInfo( std::vector<SBListChunkRanges>* lists) { + DCHECK(lists); + + ReadChunkNumbers(); + lists->clear(); SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, "SELECT name,id FROM list_names"); @@ -770,6 +875,121 @@ std::string SafeBrowsingDatabaseBloom::GetListName(int id) { return statement->column_string(0); } +void SafeBrowsingDatabaseBloom::ReadChunkNumbers() { + add_chunk_cache_.clear(); + sub_chunk_cache_.clear(); + + // Read in the add chunk numbers. + SQLITE_UNIQUE_STATEMENT(read_adds, *statement_cache_, + "SELECT chunk FROM add_chunks"); + if (!read_adds.is_valid()) { + NOTREACHED(); + return; + } + + while (true) { + int rv = read_adds->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + break; + } + add_chunk_cache_.insert(read_adds->column_int(0)); + } + + // Read in the sub chunk numbers. + SQLITE_UNIQUE_STATEMENT(read_subs, *statement_cache_, + "SELECT chunk FROM sub_chunks"); + if (!read_subs.is_valid()) { + NOTREACHED(); + return; + } + + while (true) { + int rv = read_subs->step(); + if (rv != SQLITE_ROW) { + if (rv == SQLITE_CORRUPT) + HandleCorruptDatabase(); + break; + } + sub_chunk_cache_.insert(read_subs->column_int(0)); + } +} + +// Write all the chunk numbers to the add_chunks and sub_chunks tables. +void SafeBrowsingDatabaseBloom::WriteChunkNumbers() { + // Delete the contents of the add chunk table. + SQLITE_UNIQUE_STATEMENT(del_add_chunk, *statement_cache_, + "DELETE FROM add_chunks"); + if (!del_add_chunk.is_valid()) { + NOTREACHED(); + return; + } + int rv = del_add_chunk->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return; + } + DCHECK(rv == SQLITE_DONE); + + SQLITE_UNIQUE_STATEMENT(write_adds, *statement_cache_, + "INSERT INTO add_chunks (chunk) VALUES (?)"); + if (!write_adds.is_valid()) { + NOTREACHED(); + return; + } + + // Write all the add chunks from the cache to the database. + std::set<int>::const_iterator it = add_chunk_cache_.begin(); + while (it != add_chunk_cache_.end()) { + write_adds->bind_int(0, *it); + rv = write_adds->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return; + } + DCHECK(rv == SQLITE_DONE); + write_adds->reset(); + ++it; + } + + // Delete the contents of the sub chunk table. + SQLITE_UNIQUE_STATEMENT(del_sub_chunk, *statement_cache_, + "DELETE FROM sub_chunks"); + if (!del_sub_chunk.is_valid()) { + NOTREACHED(); + return; + } + rv = del_sub_chunk->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return; + } + DCHECK(rv == SQLITE_DONE); + + SQLITE_UNIQUE_STATEMENT(write_subs, *statement_cache_, + "INSERT INTO sub_chunks (chunk) VALUES (?)"); + if (!write_subs.is_valid()) { + NOTREACHED(); + return; + } + + // Write all the sub chunks from the cache to the database. + it = sub_chunk_cache_.begin(); + while (it != sub_chunk_cache_.end()) { + write_subs->bind_int(0, *it); + rv = write_subs->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return; + } + DCHECK(rv == SQLITE_DONE); + write_subs->reset(); + ++it; + } +} + +// Helper struct and compare function for building the bloom filter. typedef struct { int chunk_id; SBPrefix prefix; @@ -786,13 +1006,24 @@ static int pair_compare(const void* arg1, const void* arg2) { // TODO(erikkay): should we call WaitAfterResume() inside any of the loops here? // This is a pretty fast operation and it would be nice to let it finish. +// TODO(erikkay, paulg): Break this into smaller functions. void SafeBrowsingDatabaseBloom::BuildBloomFilter() { Time before = Time::Now(); add_count_ = GetAddPrefixCount(); + if (add_count_ == 0) { + bloom_filter_ = NULL; + return; + } + scoped_array<SBPair> adds_array(new SBPair[add_count_]); SBPair* adds = adds_array.get(); + // Used to track which adds have been subbed out. The vector<bool> is actually + // a bitvector so the size is as small as we can get. + std::vector<bool> adds_removed; + adds_removed.resize(add_count_, false); + // Read add_prefix into memory and sort it. STATS_COUNTER(L"SB.HostSelectForBloomFilter", 1); SQLITE_UNIQUE_STATEMENT(add_prefix, *statement_cache_, @@ -820,11 +1051,40 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() { // Read through sub_prefix and zero out add_prefix entries that match. SQLITE_UNIQUE_STATEMENT(sub_prefix, *statement_cache_, - "SELECT add_chunk, prefix FROM sub_prefix"); + "SELECT chunk, add_chunk, prefix FROM sub_prefix"); if (!sub_prefix.is_valid()) { NOTREACHED(); return; } + + scoped_ptr<SQLTransaction> update_transaction; + update_transaction.reset(new SQLTransaction(db_)); + if (update_transaction->Begin() != SQLITE_OK) { + NOTREACHED(); + return; + } + + // Create a temporary sub prefix table. We add entries to it as we scan the + // sub_prefix table looking for adds to remove. Only entries that don't + // remove an add written to this table. When we're done filtering, we replace + // sub_prefix with this table. + if (sqlite3_exec(db_, "CREATE TABLE sub_prefix_tmp (" + "chunk INTEGER," + "add_chunk INTEGER," + "prefix INTEGER)", + NULL, NULL, NULL) != SQLITE_OK) { + return; + } + + SQLITE_UNIQUE_STATEMENT( + sub_prefix_tmp, + *statement_cache_, + "INSERT INTO sub_prefix_tmp (chunk,add_chunk,prefix) VALUES (?,?,?)"); + if (!sub_prefix_tmp.is_valid()) { + NOTREACHED(); + return; + } + SBPair sub; while (true) { int rv = sub_prefix->step(); @@ -833,26 +1093,61 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() { HandleCorruptDatabase(); break; } - sub.chunk_id = sub_prefix->column_int(0); - sub.prefix = sub_prefix->column_int(1); + sub.chunk_id = sub_prefix->column_int(1); + sub.prefix = sub_prefix->column_int(2); void* match = bsearch(&sub, adds, add_count_, sizeof(SBPair), pair_compare); if (match) { SBPair* subbed = reinterpret_cast<SBPair*>(match); - // A chunk_id of 0 is invalid, so we use that to mark this prefix as - // having been subbed. - subbed->chunk_id = 0; + adds_removed[subbed - adds] = true; + } else { + // This sub_prefix entry did not match any add, so we keep it around. + sub_prefix_tmp->bind_int(0, sub_prefix->column_int(0)); + sub_prefix_tmp->bind_int(1, sub.chunk_id); + sub_prefix_tmp->bind_int(2, sub.prefix); + int rv = sub_prefix_tmp->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return; + } + DCHECK(rv == SQLITE_DONE); + sub_prefix_tmp->reset(); } } + // Delete the old sub_prefix table and rename the temporary table. + SQLITE_UNIQUE_STATEMENT(del_sub, *statement_cache_, "DROP TABLE sub_prefix"); + if (!del_sub.is_valid()) { + NOTREACHED(); + return; + } + int rv = del_sub->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return; + } + DCHECK(rv == SQLITE_DONE); + + SQLITE_UNIQUE_STATEMENT(rename_sub, *statement_cache_, + "ALTER TABLE sub_prefix_tmp RENAME TO sub_prefix"); + if (!rename_sub.is_valid()) { + NOTREACHED(); + return; + } + rv = rename_sub->step(); + if (rv == SQLITE_CORRUPT) { + HandleCorruptDatabase(); + return; + } + DCHECK(rv == SQLITE_DONE); + // Now blow away add_prefix and re-write it from our in-memory data, // building the new bloom filter as we go. - BeginTransaction(); - SQLITE_UNIQUE_STATEMENT(del, *statement_cache_, "DELETE FROM add_prefix"); - if (!del.is_valid()) { + SQLITE_UNIQUE_STATEMENT(del_add, *statement_cache_, "DELETE FROM add_prefix"); + if (!del_add.is_valid()) { NOTREACHED(); return; } - int rv = del->step(); + rv = del_add->step(); if (rv == SQLITE_CORRUPT) { HandleCorruptDatabase(); return; @@ -860,7 +1155,7 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() { DCHECK(rv == SQLITE_DONE); SQLITE_UNIQUE_STATEMENT(insert, *statement_cache_, - "INSERT INTO add_prefix VALUES(?,?)"); + "INSERT INTO add_prefix VALUES (?,?)"); if (!insert.is_valid()) { NOTREACHED(); return; @@ -871,13 +1166,14 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() { add = adds; int new_count = 0; while (add - adds < add_count_) { - if (add->chunk_id != 0) { + if (!adds_removed[add - adds]) { filter->Insert(add->prefix); insert->bind_int(0, add->chunk_id); insert->bind_int(1, add->prefix); rv = insert->step(); if (rv == SQLITE_CORRUPT) { HandleCorruptDatabase(); + delete filter; // TODO(paulg): scoped. return; } DCHECK(rv == SQLITE_DONE); @@ -886,10 +1182,12 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() { } add++; } - bloom_filter_.reset(filter); + + update_transaction->Commit(); + // If there were any matching subs, the size will be smaller. add_count_ = new_count; - EndTransaction(); + bloom_filter_ = filter; TimeDelta bloom_gen = Time::Now() - before; SB_DLOG(INFO) << "SafeBrowsingDatabaseImpl built bloom filter in " << @@ -898,6 +1196,7 @@ void SafeBrowsingDatabaseBloom::BuildBloomFilter() { UMA_HISTOGRAM_LONG_TIMES(L"SB.BuildBloom", bloom_gen); WriteBloomFilter(); + WriteChunkNumbers(); } void SafeBrowsingDatabaseBloom::BeginTransaction() { diff --git a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h index e991333..f639943 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_bloom.h +++ b/chrome/browser/safe_browsing/safe_browsing_database_bloom.h @@ -64,13 +64,14 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { // Store the results of a GetHash response. In the case of empty results, we // cache the prefixes until the next update so that we don't have to issue // further GetHash requests we know will be empty. - virtual void CacheHashResults(const std::vector<SBPrefix>& prefixes, - const std::vector<SBFullHashResult>& full_hits); + virtual void CacheHashResults( + const std::vector<SBPrefix>& prefixes, + const std::vector<SBFullHashResult>& full_hits); // Called when the user's machine has resumed from a lower power state. virtual void HandleResume(); - virtual void UpdateFinished(); + virtual void UpdateFinished(bool update_succeeded); virtual bool NeedToCheckUrl(const GURL& url); private: @@ -102,10 +103,6 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { // Checks if a chunk is in the database. bool ChunkExists(int list_id, ChunkType type, int chunk_id); - // Note the existence of a chunk in the database. This is used as a faster - // cache of all of the chunks we have. - void InsertChunk(int list_id, ChunkType type, int chunk_id); - // Return a comma separated list of chunk ids that are in the database for // the given list and chunk type. void GetChunkIds(int list_id, ChunkType type, std::string* list); @@ -164,6 +161,7 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { // Clears the did_resume_ flag. This is called by HandleResume after a delay // to handle the case where we weren't in the middle of any work. void OnResumeDone(); + // If the did_resume_ flag is set, sleep for a period and then clear the // flag. This method should be called periodically inside of busy disk loops. void WaitAfterResume(); @@ -173,8 +171,18 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { void AddSub(int chunk, SBPrefix host, SBEntry* entry); void AddSubPrefix(SBPrefix prefix, int encoded_chunk, int encoded_add_chunk); void ProcessPendingSubs(); - void CreateChunkCaches(); int GetAddPrefixCount(); + void AddFullPrefix(SBPrefix prefix, + int encoded_chunk, + SBFullHash full_prefix); + void SubFullPrefix(SBPrefix prefix, + int encoded_chunk, + int encoded_add_chunk, + SBFullHash full_prefix); + + // Reads and writes chunk numbers to and from persistent store. + void ReadChunkNumbers(); + void WriteChunkNumbers(); // Encode the list id in the lower bit of the chunk. static inline int EncodeChunkId(int chunk, int list_id) { @@ -240,7 +248,7 @@ class SafeBrowsingDatabaseBloom : public SafeBrowsingDatabase { // Cache of prefixes that returned empty results (no full hash match). std::set<SBPrefix> prefix_miss_cache_; - // a cache of all of the existing add and sub chunks + // Caches for all of the existing add and sub chunks. std::set<int> add_chunk_cache_; std::set<int> sub_chunk_cache_; diff --git a/chrome/browser/safe_browsing/safe_browsing_database_impl.cc b/chrome/browser/safe_browsing/safe_browsing_database_impl.cc index 02c2d04..5d1c758 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_impl.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_impl.cc @@ -101,8 +101,8 @@ bool SafeBrowsingDatabaseImpl::Init(const std::wstring& filename, if (load_filter) { LoadBloomFilter(); } else { - bloom_filter_.reset( - new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio)); + bloom_filter_ = + new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio); } init_ = true; @@ -225,8 +225,8 @@ bool SafeBrowsingDatabaseImpl::ResetDatabase() { return false; } - bloom_filter_.reset( - new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio)); + bloom_filter_ = + new BloomFilter(kBloomFilterMinSize * kBloomFilterSizeRatio); file_util::Delete(bloom_filter_filename_, false); if (!Open()) @@ -1044,7 +1044,7 @@ void SafeBrowsingDatabaseImpl::OnDoneReadingHostKeys() { for (size_t i = 0; i < bloom_filter_temp_hostkeys_.size(); ++i) filter->Insert(bloom_filter_temp_hostkeys_[i]); - bloom_filter_.reset(filter); + bloom_filter_ = filter; TimeDelta bloom_gen = Time::Now() - before; TimeDelta delta = Time::Now() - bloom_filter_rebuild_time_; diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc index 803b0ef..3fd2bb3 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc @@ -114,6 +114,7 @@ TEST(SafeBrowsingDatabase, Database) { host.host = Sha256Prefix("www.good.com/"); host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->set_chunk_id(2); host.entry->SetPrefixAt(0, Sha256Prefix("www.good.com/good1.html")); host.entry->SetPrefixAt(1, Sha256Prefix("www.good.com/good2.html")); @@ -127,6 +128,7 @@ TEST(SafeBrowsingDatabase, Database) { // and a chunk with an IP-based host host.host = Sha256Prefix("192.168.0.1/"); host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); + host.entry->set_chunk_id(3); host.entry->SetPrefixAt(0, Sha256Prefix("192.168.0.1/malware.html")); chunk.chunk_number = 3; @@ -136,8 +138,7 @@ TEST(SafeBrowsingDatabase, Database) { chunks = new std::deque<SBChunk>; chunks->push_back(chunk); database->InsertChunks("goog-malware", chunks); - database->UpdateFinished(); - + database->UpdateFinished(true); // Make sure they were added correctly. std::vector<SBListChunkRanges> lists; @@ -192,7 +193,7 @@ TEST(SafeBrowsingDatabase, Database) { // Test removing a single prefix from the add chunk. host.host = Sha256Prefix("www.evil.com/"); - host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 2); + host.entry = SBEntry::Create(SBEntry::SUB_PREFIX, 1); host.entry->set_chunk_id(2); host.entry->SetChunkIdAtPrefix(0, 2); host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/notevil1.html")); @@ -206,7 +207,7 @@ TEST(SafeBrowsingDatabase, Database) { chunks->push_back(chunk); database->InsertChunks("goog-malware", chunks); - database->UpdateFinished(); + database->UpdateFinished(true); EXPECT_TRUE(database->ContainsUrl(GURL("http://www.evil.com/phishing.html"), &matching_list, &prefix_hits, @@ -238,7 +239,7 @@ TEST(SafeBrowsingDatabase, Database) { // Test removing all the prefixes from an add chunk. AddDelChunk(database, "goog-malware", 2); - database->UpdateFinished(); + database->UpdateFinished(true); EXPECT_FALSE(database->ContainsUrl(GURL("http://www.evil.com/notevil2.html"), &matching_list, &prefix_hits, @@ -281,7 +282,7 @@ TEST(SafeBrowsingDatabase, Database) { // Test the subdel command. SubDelChunk(database, "goog-malware", 4); - database->UpdateFinished(); + database->UpdateFinished(true); database->GetListsInfo(&lists); EXPECT_EQ(lists.size(), 1U); @@ -306,7 +307,7 @@ TEST(SafeBrowsingDatabase, Database) { chunks = new std::deque<SBChunk>; chunks->push_back(chunk); database->InsertChunks("goog-malware", chunks); - database->UpdateFinished(); + database->UpdateFinished(true); EXPECT_FALSE(database->ContainsUrl( GURL("http://www.notevilanymore.com/index.html"), @@ -326,7 +327,7 @@ TEST(SafeBrowsingDatabase, Database) { chunks = new std::deque<SBChunk>; chunks->push_back(chunk); database->InsertChunks("goog-malware", chunks); - database->UpdateFinished(); + database->UpdateFinished(true); EXPECT_FALSE(database->ContainsUrl( GURL("http://www.notevilanymore.com/index.html"), @@ -371,7 +372,7 @@ TEST(SafeBrowsingDatabase, ZeroSizeChunk) { chunks->push_back(chunk); database->InsertChunks("goog-malware", chunks); - database->UpdateFinished(); + database->UpdateFinished(true); // Add an empty ADD and SUB chunk. std::vector<SBListChunkRanges> list_chunks_empty; @@ -389,7 +390,7 @@ TEST(SafeBrowsingDatabase, ZeroSizeChunk) { empty_chunk.is_add = false; chunks->push_back(empty_chunk); database->InsertChunks("goog-malware", chunks); - database->UpdateFinished(); + database->UpdateFinished(true); list_chunks_empty.clear(); database->GetListsInfo(&list_chunks_empty); @@ -425,7 +426,7 @@ TEST(SafeBrowsingDatabase, ZeroSizeChunk) { chunks->push_back(empty_chunk); database->InsertChunks("goog-malware", chunks); - database->UpdateFinished(); + database->UpdateFinished(true); const Time now = Time::Now(); std::vector<SBFullHashResult> full_hashes; @@ -445,14 +446,14 @@ TEST(SafeBrowsingDatabase, ZeroSizeChunk) { // Handle AddDel and SubDel commands for empty chunks. AddDelChunk(database, "goog-malware", 21); - database->UpdateFinished(); + database->UpdateFinished(true); list_chunks_empty.clear(); database->GetListsInfo(&list_chunks_empty); EXPECT_EQ(list_chunks_empty[0].adds, "1,10,19-20,22"); EXPECT_EQ(list_chunks_empty[0].subs, "7"); SubDelChunk(database, "goog-malware", 7); - database->UpdateFinished(); + database->UpdateFinished(true); list_chunks_empty.clear(); database->GetListsInfo(&list_chunks_empty); EXPECT_EQ(list_chunks_empty[0].adds, "1,10,19-20,22"); @@ -520,7 +521,7 @@ void PeformUpdate(const std::wstring& initial_db, for (size_t i = 0; i < chunks.size(); ++i) database->InsertChunks(chunks[i].listname, chunks[i].chunks); - database->UpdateFinished(); + database->UpdateFinished(true); CHECK(metric->GetIOCounters(&after)); @@ -639,7 +640,7 @@ const wchar_t* GetOldUpdatesPath() { // Counts the IO needed for the initial update of a database. // test\data\safe_browsing\download_update.py was used to fetch the add/sub // chunks that are read, in order to get repeatable runs. -TEST(SafeBrowsingDatabase, DISABLED_DatabaseInitialIO) { +TEST(SafeBrowsingDatabase, DatabaseInitialIO) { UpdateDatabase(L"", L"", L"initial"); } diff --git a/chrome/browser/safe_browsing/safe_browsing_service.cc b/chrome/browser/safe_browsing/safe_browsing_service.cc index 4f9a157..9f6419c 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service.cc +++ b/chrome/browser/safe_browsing/safe_browsing_service.cc @@ -5,6 +5,7 @@ #include "chrome/browser/safe_browsing/safe_browsing_service.h" +#include "base/command_line.h" #include "base/histogram.h" #include "base/logging.h" #include "base/message_loop.h" @@ -17,6 +18,7 @@ #include "chrome/browser/safe_browsing/safe_browsing_database.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_paths.h" +#include "chrome/common/chrome_switches.h" #include "chrome/common/pref_names.h" #include "chrome/common/pref_service.h" #include "net/base/registry_controlled_domain.h" @@ -26,7 +28,9 @@ SafeBrowsingService::SafeBrowsingService() database_(NULL), protocol_manager_(NULL), enabled_(false), - resetting_(false) { + resetting_(false), + database_loaded_(false) { + new_safe_browsing_ = CommandLine().HasSwitch(switches::kUseNewSafeBrowsing); } SafeBrowsingService::~SafeBrowsingService() { @@ -86,7 +90,12 @@ void SafeBrowsingService::OnIOInitialize(MessageLoop* notify_loop, notify_loop, client_key, wrapped_key); - protocol_manager_->Initialize(); + // We want to initialize the protocol manager only after the database has + // loaded, which we'll receive asynchronously (DatabaseLoadComplete). If + // database_loaded_ isn't true, we'll wait for that notification to do the + // init. + if (database_loaded_) + protocol_manager_->Initialize(); } void SafeBrowsingService::OnDBInitialize() { @@ -113,8 +122,15 @@ void SafeBrowsingService::OnIOShutdown() { database_ = NULL; - // Delete checks once the database thread is done, calling back any clients - // with 'URL_SAFE'. + // Delete queued and pending checks once the database thread is done, calling + // back any clients with 'URL_SAFE'. + while (!queued_checks_.empty()) { + QueuedCheck check = queued_checks_.front(); + if (check.client) + check.client->OnUrlCheckResult(check.url, URL_SAFE); + queued_checks_.pop_front(); + } + for (CurrentChecks::iterator it = checks_.begin(); it != checks_.end(); ++it) { if ((*it)->client) @@ -140,10 +156,12 @@ bool SafeBrowsingService::CanCheckUrl(const GURL& url) const { bool SafeBrowsingService::CheckUrl(const GURL& url, Client* client) { DCHECK(MessageLoop::current() == io_loop_); - if (!enabled_ || !database_) return true; + if (new_safe_browsing_) + return CheckUrlNew(url, client); + if (!resetting_) { Time start_time = Time::Now(); bool need_check = database_->NeedToCheckUrl(url); @@ -161,9 +179,47 @@ bool SafeBrowsingService::CheckUrl(const GURL& url, Client* client) { check->start = Time::Now(); checks_.insert(check); + // Old school SafeBrowsing does an asynchronous database check. db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( this, &SafeBrowsingService::CheckDatabase, check, protocol_manager_->last_update())); + + return false; +} + +bool SafeBrowsingService::CheckUrlNew(const GURL& url, Client* client) { + if (resetting_ || !database_loaded_) { + QueuedCheck check; + check.client = client; + check.url = url; + queued_checks_.push_back(check); + return false; + } + + std::string list; + std::vector<SBPrefix> prefix_hits; + std::vector<SBFullHashResult> full_hits; + bool prefix_match = database_->ContainsUrl(url, &list, &prefix_hits, + &full_hits, + protocol_manager_->last_update()); + if (!prefix_match) + return true; // URL is okay. + + // Needs to be asynchronous, since we could be in the constructor of a + // ResourceDispatcherHost event handler which can't pause there. + SafeBrowsingCheck* check = new SafeBrowsingCheck(); + check->url = url; + check->client = client; + check->result = URL_SAFE; + check->start = Time::Now(); + check->need_get_hash = full_hits.empty(); + check->prefix_hits.swap(prefix_hits); + check->full_hits.swap(full_hits); + checks_.insert(check); + + io_loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::OnCheckDone, check)); + return false; } @@ -218,6 +274,16 @@ void SafeBrowsingService::CancelCheck(Client* client) { if ((*i)->client == client) (*i)->client = NULL; } + + // Scan the queued clients store. Clients may be here if they requested a URL + // check before the database has finished loading or resetting. + if (!database_loaded_ || resetting_) { + std::deque<QueuedCheck>::iterator it = queued_checks_.begin(); + for (; it != queued_checks_.end(); ++it) { + if (it->client == client) + it->client = NULL; + } + } } void SafeBrowsingService::CheckDatabase(SafeBrowsingCheck* info, @@ -247,16 +313,16 @@ void SafeBrowsingService::CheckDatabase(SafeBrowsingCheck* info, this, &SafeBrowsingService::OnCheckDone, info)); } -void SafeBrowsingService::OnCheckDone(SafeBrowsingCheck* info) { +void SafeBrowsingService::OnCheckDone(SafeBrowsingCheck* check) { DCHECK(MessageLoop::current() == io_loop_); // If we've been shutdown during the database lookup, this check will already // have been deleted (in OnIOShutdown). - if (!enabled_ || checks_.find(info) == checks_.end()) + if (!enabled_ || checks_.find(check) == checks_.end()) return; - UMA_HISTOGRAM_TIMES(L"SB.Database", Time::Now() - info->start); - if (info->client && info->need_get_hash) { + UMA_HISTOGRAM_TIMES(L"SB.Database", Time::Now() - check->start); + if (check->client && check->need_get_hash) { // We have a partial match so we need to query Google for the full hash. // Clean up will happen in HandleGetHashResults. @@ -265,28 +331,28 @@ void SafeBrowsingService::OnCheckDone(SafeBrowsingCheck* info) { // when the results arrive. We only do this for checks involving one prefix, // since that is the common case (multiple prefixes will issue the request // as normal). - if (info->prefix_hits.size() == 1) { - SBPrefix prefix = info->prefix_hits[0]; + if (check->prefix_hits.size() == 1) { + SBPrefix prefix = check->prefix_hits[0]; GetHashRequests::iterator it = gethash_requests_.find(prefix); if (it != gethash_requests_.end()) { // There's already a request in progress. - it->second.push_back(info); + it->second.push_back(check); return; } // No request in progress, so we're the first for this prefix. GetHashRequestors requestors; - requestors.push_back(info); + requestors.push_back(check); gethash_requests_[prefix] = requestors; } // Reset the start time so that we can measure the network time without the // database time. - info->start = Time::Now(); - protocol_manager_->GetFullHash(info, info->prefix_hits); + check->start = Time::Now(); + protocol_manager_->GetFullHash(check, check->prefix_hits); } else { // We may have cached results for previous GetHash queries. - HandleOneCheck(info, info->full_hits); + HandleOneCheck(check, check->full_hits); } } @@ -304,10 +370,14 @@ SafeBrowsingDatabase* SafeBrowsingService::GetDatabase() { Time before = Time::Now(); SafeBrowsingDatabase* database = SafeBrowsingDatabase::Create(); - Callback0::Type* callback = + Callback0::Type* chunk_callback = NewCallback(this, &SafeBrowsingService::ChunkInserted); - result = database->Init(path, callback); - if (!result) { + bool init_success = database->Init(path, chunk_callback); + + io_loop_->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::DatabaseLoadComplete, !init_success)); + + if (!init_success) { NOTREACHED(); return NULL; } @@ -337,9 +407,14 @@ void SafeBrowsingService::HandleGetHashResults( UMA_HISTOGRAM_LONG_TIMES(L"SB.Network", Time::Now() - check->start); OnHandleGetHashResults(check, full_hashes); // 'check' is deleted here. - if (can_cache) - db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( - this, &SafeBrowsingService::CacheHashResults, prefixes, full_hashes)); + if (can_cache) { + if (!new_safe_browsing_) { + db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::CacheHashResults, prefixes, full_hashes)); + } else if (database_) { + database_->CacheHashResults(prefixes, full_hashes); + } + } } void SafeBrowsingService::OnHandleGetHashResults( @@ -386,17 +461,17 @@ void SafeBrowsingService::GetAllChunks() { this, &SafeBrowsingService::GetAllChunksFromDatabase)); } -void SafeBrowsingService::UpdateFinished() { +void SafeBrowsingService::UpdateFinished(bool update_succeeded) { DCHECK(MessageLoop::current() == io_loop_); DCHECK(enabled_); db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( - this, &SafeBrowsingService::DatabaseUpdateFinished)); + this, &SafeBrowsingService::DatabaseUpdateFinished, update_succeeded)); } -void SafeBrowsingService::DatabaseUpdateFinished() { +void SafeBrowsingService::DatabaseUpdateFinished(bool update_succeeded) { DCHECK(MessageLoop::current() == db_thread_->message_loop()); if (GetDatabase()) - GetDatabase()->UpdateFinished(); + GetDatabase()->UpdateFinished(update_succeeded); } void SafeBrowsingService::OnBlockingPageDone(const BlockingPageParam& param) { @@ -439,6 +514,20 @@ void SafeBrowsingService::OnChunkInserted() { protocol_manager_->OnChunkInserted(); } +void SafeBrowsingService::DatabaseLoadComplete(bool database_error) { + DCHECK(MessageLoop::current() == io_loop_); + + database_loaded_ = true; + + // TODO(paulg): More robust database initialization error handling. + if (protocol_manager_ && !database_error) + protocol_manager_->Initialize(); + + // If we have any queued requests, we can now check them. + if (!resetting_) + RunQueuedClients(); +} + // static void SafeBrowsingService::RegisterUserPrefs(PrefService* prefs) { prefs->RegisterStringPref(prefs::kSafeBrowsingClientKey, L""); @@ -462,6 +551,8 @@ void SafeBrowsingService::OnResetDatabase() { void SafeBrowsingService::OnResetComplete() { DCHECK(MessageLoop::current() == io_loop_); resetting_ = false; + database_loaded_ = true; + RunQueuedClients(); } void SafeBrowsingService::HandleChunk(const std::string& list, @@ -566,3 +657,15 @@ void SafeBrowsingService::HandleResume() { DCHECK(MessageLoop::current() == db_thread_->message_loop()); GetDatabase()->HandleResume(); } + +void SafeBrowsingService::RunQueuedClients() { + DCHECK(MessageLoop::current() == io_loop_); + HISTOGRAM_COUNTS(L"SB.QueueDepth", queued_checks_.size()); + while (!queued_checks_.empty()) { + QueuedCheck check = queued_checks_.front(); + HISTOGRAM_TIMES(L"SB.QueueDelay", Time::Now() - check.start); + CheckUrl(check.url, check.client); + queued_checks_.pop_front(); + } +} + diff --git a/chrome/browser/safe_browsing/safe_browsing_service.h b/chrome/browser/safe_browsing/safe_browsing_service.h index aec465d..dd2c818 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service.h +++ b/chrome/browser/safe_browsing/safe_browsing_service.h @@ -22,6 +22,7 @@ #include "googleurl/src/gurl.h" #include "webkit/glue/resource_type.h" +class BloomFilter; class MessageLoop; class PrefService; class SafeBrowsingBlockingPage; @@ -98,6 +99,9 @@ class SafeBrowsingService // and "client" is called asynchronously with the result when it is ready. bool CheckUrl(const GURL& url, Client* client); + // For the new SafeBrowsingDatabase, which runs the check synchronously. + bool CheckUrlNew(const GURL& url, Client* client); + // Cancels a pending check if the result is no longer needed. void CancelCheck(Client* client); @@ -111,9 +115,6 @@ class SafeBrowsingService int render_view_id); // Bundle of SafeBrowsing state for one URL check. - // TODO(paulg): Make this struct private to SafeBrowsingService and maintain - // request mappings using CancelableRequests instead (which can - // store this state for us). struct SafeBrowsingCheck { GURL url; Client* client; @@ -134,7 +135,7 @@ class SafeBrowsingService void HandleChunk(const std::string& list, std::deque<SBChunk>* chunks); void HandleChunkDelete(std::vector<SBChunkDelete>* chunk_deletes); void GetAllChunks(); - void UpdateFinished(); + void UpdateFinished(bool update_succeeded); // The blocking page on the UI thread has completed. void OnBlockingPageDone(const BlockingPageParam& param); @@ -151,6 +152,9 @@ class SafeBrowsingService // complete. void ChunkInserted(); + // Notification from the database when it's done loading its bloom filter. + void DatabaseLoadComplete(bool database_error); + // Preference handling. static void RegisterUserPrefs(PrefService* prefs); @@ -199,7 +203,7 @@ class SafeBrowsingService void NotifyClientBlockingComplete(Client* client, bool proceed); - void DatabaseUpdateFinished(); + void DatabaseUpdateFinished(bool update_succeeded); void Start(); void Stop(); @@ -229,6 +233,13 @@ class SafeBrowsingService // Invoked on the UI thread to show the blocking page. void DoDisplayBlockingPage(const BlockingPageParam& param); + // During a reset or the initial load we may have to queue checks until the + // database is ready. This method is run once the database has loaded (or if + // we shut down SafeBrowsing before the database has finished loading). + void RunQueuedClients(); + + void OnUpdateComplete(bool update_succeeded); + MessageLoop* io_loop_; typedef std::set<SafeBrowsingCheck*> CurrentChecks; @@ -266,7 +277,21 @@ class SafeBrowsingService // Indicates if we are in the process of resetting the database. bool resetting_; - DISALLOW_EVIL_CONSTRUCTORS(SafeBrowsingService); + // Indicates if we are using the new bloom filter based database. + bool new_safe_browsing_; + + // Indicates if the database has finished initialization. + bool database_loaded_; + + // Clients that we've queued up for checking later once the database is ready. + typedef struct { + Client* client; + GURL url; + Time start; + } QueuedCheck; + std::deque<QueuedCheck> queued_checks_; + + DISALLOW_COPY_AND_ASSIGN(SafeBrowsingService); }; #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H_ |