diff options
author | lzheng@chromium.org <lzheng@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-12-15 17:42:46 +0000 |
---|---|---|
committer | lzheng@chromium.org <lzheng@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-12-15 17:42:46 +0000 |
commit | 6df44fb660221182373b00ad27840040167205d7 (patch) | |
tree | 781dfed8d21dc5b653baa5cc990cdf939a60261e /chrome/browser/safe_browsing | |
parent | e064e3004f682442dedac5697701aadadfa390ea (diff) | |
download | chromium_src-6df44fb660221182373b00ad27840040167205d7.zip chromium_src-6df44fb660221182373b00ad27840040167205d7.tar.gz chromium_src-6df44fb660221182373b00ad27840040167205d7.tar.bz2 |
Handle download url list and binary hash list in safe browsing. Added
a switch to enable this feature on safebrowsing service level.
TEST=safe_browsing_database_unittest.cc,safe_browsing_test.cc
BUG=60822
Review URL: http://codereview.chromium.org/5209003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@69275 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing')
12 files changed, 927 insertions, 321 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_blocking_page_test.cc b/chrome/browser/safe_browsing/safe_browsing_blocking_page_test.cc index f76dd4a..19eae29 100644 --- a/chrome/browser/safe_browsing/safe_browsing_blocking_page_test.cc +++ b/chrome/browser/safe_browsing/safe_browsing_blocking_page_test.cc @@ -36,7 +36,7 @@ class FakeSafeBrowsingService : public SafeBrowsingService { // Otherwise it returns false, and "client" is called asynchronously with the // result when it is ready. // Overrides SafeBrowsingService::CheckUrl. - virtual bool CheckUrl(const GURL& gurl, Client* client) { + virtual bool CheckBrowseUrl(const GURL& gurl, Client* client) { const std::string& url = gurl.spec(); if (badurls[url] == URL_SAFE) return true; @@ -49,7 +49,7 @@ class FakeSafeBrowsingService : public SafeBrowsingService { } void OnCheckDone(std::string url, Client* client) { - client->OnUrlCheckResult(GURL(url), badurls[url]); + client->OnSafeBrowsingResult(GURL(url), badurls[url]); } void AddURLResult(const GURL& url, UrlCheckResult checkresult) { @@ -113,8 +113,8 @@ class SafeBrowsingBlockingPageTest : public InProcessBrowserTest, } // SafeBrowsingService::Client implementation. - virtual void OnUrlCheckResult(const GURL& url, - SafeBrowsingService::UrlCheckResult result) { + virtual void OnSafeBrowsingResult( + const GURL& url, SafeBrowsingService::UrlCheckResult result) { } virtual void OnBlockingPageComplete(bool proceed) { } diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index 590be6b..8e0c6c9 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -14,12 +14,18 @@ #include "chrome/browser/safe_browsing/bloom_filter.h" #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" #include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h" +#include "chrome/browser/safe_browsing/safe_browsing_util.h" #include "googleurl/src/gurl.h" namespace { // Filename suffix for the bloom filter. const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); +// Filename suffix for download store. +const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download"); +// Filename suffix for browse store. +// TODO(lzheng): change to a better name when we change the file format. +const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); // The maximum staleness for a cached entry. const int kMaxStalenessMinutes = 45; @@ -27,15 +33,31 @@ const int kMaxStalenessMinutes = 45; // To save space, the incoming |chunk_id| and |list_id| are combined // into an |encoded_chunk_id| for storage by shifting the |list_id| // into the low-order bits. These functions decode that information. -int DecodeListId(const int encoded_chunk_id) { +// TODO(lzheng): It was reasonable when database is saved in sqlite, but +// there should be better ways to save chunk_id and list_id after we use +// SafeBrowsingStoreFile. +int GetListIdBit(const int encoded_chunk_id) { return encoded_chunk_id & 1; } int DecodeChunkId(int encoded_chunk_id) { return encoded_chunk_id >> 1; } -int EncodeChunkId(int chunk, int list_id) { - DCHECK(list_id == 0 || list_id == 1); - return chunk << 1 | list_id; +int EncodeChunkId(const int chunk, const int list_id) { + DCHECK_NE(list_id, safe_browsing_util::INVALID); + return chunk << 1 | list_id % 2; +} + +// Get the prefix for download url. +void GetDownloadUrlPrefix(const GURL& url, SBPrefix* prefix) { + std::string hostname; + std::string path; + std::string query; + safe_browsing_util::CanonicalizeUrl(url, &hostname, &path, &query); + + SBFullHash full_hash; + base::SHA256HashString(hostname + path + query, &full_hash, + sizeof(full_hash)); + *prefix = full_hash.prefix; } // Generate the set of prefixes to check for |url|. @@ -44,7 +66,7 @@ int EncodeChunkId(int chunk, int list_id) { // does an early exit on match. Since match should be the infrequent // case (phishing or malware found), consider combining this function // with that one. -void PrefixesToCheck(const GURL& url, std::vector<SBPrefix>* prefixes) { +void BrowsePrefixesToCheck(const GURL& url, std::vector<SBPrefix>* prefixes) { std::vector<std::string> hosts; if (url.HostIsIPAddress()) { hosts.push_back(url.host()); @@ -59,7 +81,7 @@ void PrefixesToCheck(const GURL& url, std::vector<SBPrefix>* prefixes) { for (size_t j = 0; j < paths.size(); ++j) { SBFullHash full_hash; base::SHA256HashString(hosts[i] + paths[j], &full_hash, - sizeof(SBFullHash)); + sizeof(full_hash)); prefixes->push_back(full_hash.prefix); } } @@ -73,10 +95,10 @@ void PrefixesToCheck(const GURL& url, std::vector<SBPrefix>* prefixes) { // // For efficiency reasons the code walks |prefix_hits| and // |full_hashes| in parallel, so they must be sorted by prefix. -void GetCachedFullHashes(const std::vector<SBPrefix>& prefix_hits, - const std::vector<SBAddFullHash>& full_hashes, - std::vector<SBFullHashResult>* full_hits, - base::Time last_update) { +void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits, + const std::vector<SBAddFullHash>& full_hashes, + std::vector<SBFullHashResult>* full_hits, + base::Time last_update) { const base::Time expire_time = base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes); @@ -92,8 +114,11 @@ void GetCachedFullHashes(const std::vector<SBPrefix>& prefix_hits, if (expire_time < last_update || expire_time.ToTimeT() < hiter->received) { SBFullHashResult result; - const int list_id = DecodeListId(hiter->chunk_id); - result.list_name = safe_browsing_util::GetListName(list_id); + const int list_bit = GetListIdBit(hiter->chunk_id); + DCHECK(list_bit == safe_browsing_util::MALWARE || + list_bit == safe_browsing_util::PHISH); + if (!safe_browsing_util::GetListName(list_bit, &result.list_name)) + continue; result.add_chunk_id = DecodeChunkId(hiter->chunk_id); result.hash = hiter->full_hash; full_hits->push_back(result); @@ -105,26 +130,47 @@ void GetCachedFullHashes(const std::vector<SBPrefix>& prefix_hits, } } -// Helper for |UpdateStarted()|. Separates |chunks| into malware and -// phishing vectors, and converts the results into range strings. -void GetChunkIds(const std::vector<int>& chunks, - std::string* malware_list, std::string* phishing_list) { - std::vector<int> malware_chunks; - std::vector<int> phishing_chunks; +void GetChunkRanges(const std::vector<int>& chunks, + std::string* list0, + std::string* list1) { + std::vector<int> chunks0; + std::vector<int> chunks1; for (std::vector<int>::const_iterator iter = chunks.begin(); iter != chunks.end(); ++iter) { - if (safe_browsing_util::MALWARE == DecodeListId(*iter)) { - malware_chunks.push_back(DecodeChunkId(*iter)); - } else if (safe_browsing_util::PHISH == DecodeListId(*iter)) { - phishing_chunks.push_back(DecodeChunkId(*iter)); + int mod_list_id = GetListIdBit(*iter); + if (0 == mod_list_id) { + chunks0.push_back(DecodeChunkId(*iter)); } else { - NOTREACHED(); + DCHECK_EQ(1, mod_list_id); + chunks1.push_back(DecodeChunkId(*iter)); } } - ChunksToRangeString(malware_chunks, malware_list); - ChunksToRangeString(phishing_chunks, phishing_list); + ChunksToRangeString(chunks0, list0); + ChunksToRangeString(chunks1, list1); +} + +// Helper function to create chunk range lists for Browse related +// lists. +void UpdateChunkRanges(const std::vector<int>& add_chunks, + const std::vector<int>& sub_chunks, + const std::string& list_name0, + const std::string& list_name1, + std::vector<SBListChunkRanges>* lists) { + DCHECK_EQ(safe_browsing_util::GetListId(list_name0) % 2, 0); + DCHECK_EQ(safe_browsing_util::GetListId(list_name1) % 2, 1); + DCHECK_NE(safe_browsing_util::GetListId(list_name0), + safe_browsing_util::INVALID); + DCHECK_NE(safe_browsing_util::GetListId(list_name1), + safe_browsing_util::INVALID); + + SBListChunkRanges chunkrange0(list_name0); + SBListChunkRanges chunkrange1(list_name1); + GetChunkRanges(add_chunks, &chunkrange0.adds, &chunkrange1.adds); + GetChunkRanges(sub_chunks, &chunkrange0.subs, &chunkrange1.subs); + lists->push_back(chunkrange0); + lists->push_back(chunkrange1); } // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from @@ -138,8 +184,15 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { // The default SafeBrowsingDatabaseFactory. class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { public: - virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase() { - return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile); + virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( + bool enable_download_protection) { + if (enable_download_protection) { + // Create database with browse url store and download store. + return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, + new SafeBrowsingStoreFile); + } + // Create database with only browse url store. + return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, NULL); } SafeBrowsingDatabaseFactoryImpl() { } @@ -158,47 +211,76 @@ SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; // SafeBrowsingDatabaseNew. Once that conversion is too far along to // consider reversing, circle back and lift SafeBrowsingDatabaseNew up // to SafeBrowsingDatabase and get rid of the abstract class. -SafeBrowsingDatabase* SafeBrowsingDatabase::Create() { +SafeBrowsingDatabase* SafeBrowsingDatabase::Create( + bool enable_download_protection) { if (!factory_) factory_ = new SafeBrowsingDatabaseFactoryImpl(); - return factory_->CreateSafeBrowsingDatabase(); + return factory_->CreateSafeBrowsingDatabase(enable_download_protection); } SafeBrowsingDatabase::~SafeBrowsingDatabase() { } // static +FilePath SafeBrowsingDatabase::BrowseDBFilename( + const FilePath& db_base_filename) { + return FilePath(db_base_filename.value() + kBrowseDBFile); +} + +// static +FilePath SafeBrowsingDatabase::DownloadDBFilename( + const FilePath& db_base_filename) { + return FilePath(db_base_filename.value() + kDownloadDBFile); +} + +// static FilePath SafeBrowsingDatabase::BloomFilterForFilename( const FilePath& db_filename) { return FilePath(db_filename.value() + kBloomFilterFile); } +SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { + DVLOG(3) << "Get store for list: " << list_id; + if (list_id == safe_browsing_util::PHISH || + list_id == safe_browsing_util::MALWARE) { + return browse_store_.get(); + } else if (list_id == safe_browsing_util::BINURL || + list_id == safe_browsing_util::BINHASH) { + return download_store_.get(); + } + return NULL; +} + // static void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) { UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type, FAILURE_DATABASE_MAX); } -SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(SafeBrowsingStore* store) +SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() : creation_loop_(MessageLoop::current()), - store_(store), - ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)), - corruption_detected_(false) { - DCHECK(store_.get()); + browse_store_(new SafeBrowsingStoreSqlite), + download_store_(NULL), + ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) { + DCHECK(browse_store_.get()); + DCHECK(!download_store_.get()); } -SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() +SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( + SafeBrowsingStore* browse_store, SafeBrowsingStore* download_store) : creation_loop_(MessageLoop::current()), - store_(new SafeBrowsingStoreSqlite), - ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) { - DCHECK(store_.get()); + browse_store_(browse_store), + download_store_(download_store), + ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)), + corruption_detected_(false) { + DCHECK(browse_store_.get()); } SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { DCHECK_EQ(creation_loop_, MessageLoop::current()); } -void SafeBrowsingDatabaseNew::Init(const FilePath& filename) { +void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { DCHECK_EQ(creation_loop_, MessageLoop::current()); // NOTE: There is no need to grab the lock in this function, since @@ -207,18 +289,28 @@ void SafeBrowsingDatabaseNew::Init(const FilePath& filename) { // contention on the lock... AutoLock locked(lookup_lock_); - DCHECK(filename_.empty()); // Ensure we haven't been run before. + DCHECK(browse_filename_.empty()); // Ensure we haven't been run before. + DCHECK(download_filename_.empty()); // Ensure we haven't been run before. - filename_ = filename; - store_->Init( - filename_, + browse_filename_ = BrowseDBFilename(filename_base); + browse_store_->Init( + browse_filename_, NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); - full_hashes_.clear(); - pending_hashes_.clear(); + full_browse_hashes_.clear(); + pending_browse_hashes_.clear(); - bloom_filter_filename_ = BloomFilterForFilename(filename_); + bloom_filter_filename_ = BloomFilterForFilename(browse_filename_); LoadBloomFilter(); + DVLOG(1) << "Init browse store: " << browse_filename_.value(); + + if (download_store_.get()) { + download_filename_ = DownloadDBFilename(filename_base); + download_store_->Init( + download_filename_, + NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); + DVLOG(1) << "Init download store: " << download_filename_.value(); + } } bool SafeBrowsingDatabaseNew::ResetDatabase() { @@ -233,18 +325,19 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { // Reset objects in memory. { AutoLock locked(lookup_lock_); - full_hashes_.clear(); - pending_hashes_.clear(); + full_browse_hashes_.clear(); + pending_browse_hashes_.clear(); prefix_miss_cache_.clear(); // TODO(shess): This could probably be |bloom_filter_.reset()|. - bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize * - BloomFilter::kBloomFilterSizeRatio); + browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize * + BloomFilter::kBloomFilterSizeRatio); } return true; } -bool SafeBrowsingDatabaseNew::ContainsUrl( +// TODO(lzheng): Remove matching_list, it is not used anywhere. +bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( const GURL& url, std::string* matching_list, std::vector<SBPrefix>* prefix_hits, @@ -256,7 +349,7 @@ bool SafeBrowsingDatabaseNew::ContainsUrl( full_hits->clear(); std::vector<SBPrefix> prefixes; - PrefixesToCheck(url, &prefixes); + BrowsePrefixesToCheck(url, &prefixes); if (prefixes.empty()) return false; @@ -264,13 +357,12 @@ bool SafeBrowsingDatabaseNew::ContainsUrl( // bloom filter and caches. AutoLock locked(lookup_lock_); - if (!bloom_filter_.get()) + if (!browse_bloom_filter_.get()) return false; - // TODO(erikkay): Not filling in matching_list - is that OK? size_t miss_count = 0; for (size_t i = 0; i < prefixes.size(); ++i) { - if (bloom_filter_->Exists(prefixes[i])) { + if (browse_bloom_filter_->Exists(prefixes[i])) { prefix_hits->push_back(prefixes[i]); if (prefix_miss_cache_.count(prefixes[i]) > 0) ++miss_count; @@ -281,21 +373,51 @@ bool SafeBrowsingDatabaseNew::ContainsUrl( if (miss_count == prefix_hits->size()) return false; - // Find the matching full-hash results. |full_hashes_| are from the - // database, |pending_hashes_| are from GetHash requests between + // Find the matching full-hash results. |full_browse_hashes_| are from the + // database, |pending_browse_hashes_| are from GetHash requests between // updates. std::sort(prefix_hits->begin(), prefix_hits->end()); - GetCachedFullHashes(*prefix_hits, full_hashes_, full_hits, last_update); - GetCachedFullHashes(*prefix_hits, pending_hashes_, full_hits, last_update); + + GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_, + full_hits, last_update); + GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_, + full_hits, last_update); return true; } +bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( + const GURL& url, std::vector<SBPrefix>* prefix_hits) { + DCHECK_EQ(creation_loop_, MessageLoop::current()); + prefix_hits->clear(); + + // Ignore this check when download checking is not enabled. + if (!download_store_.get()) return false; + + SBPrefix prefix; + GetDownloadUrlPrefix(url, &prefix); + + std::vector<SBAddPrefix> add_prefixes; + download_store_->GetAddPrefixes(&add_prefixes); + for (size_t i = 0; i < add_prefixes.size(); ++i) { + if (prefix == add_prefixes[i].prefix && + GetListIdBit(add_prefixes[i].chunk_id) == + safe_browsing_util::BINURL % 2) { + prefix_hits->push_back(prefix); + return true; + } + } + return false; +} + // Helper to insert entries for all of the prefixes or full hashes in // |entry| into the store. void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, const SBEntry* entry, int list_id) { DCHECK_EQ(creation_loop_, MessageLoop::current()); + SafeBrowsingStore* store = GetStore(list_id); + if (!store) return; + STATS_COUNTER("SB.HostInsert", 1); const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); const int count = entry->prefix_count(); @@ -304,13 +426,13 @@ void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, if (!count) { // No prefixes, use host instead. STATS_COUNTER("SB.PrefixAdd", 1); - store_->WriteAddPrefix(encoded_chunk_id, host); + store->WriteAddPrefix(encoded_chunk_id, host); } else if (entry->IsPrefix()) { // Prefixes only. for (int i = 0; i < count; i++) { const SBPrefix prefix = entry->PrefixAt(i); STATS_COUNTER("SB.PrefixAdd", 1); - store_->WriteAddPrefix(encoded_chunk_id, prefix); + store->WriteAddPrefix(encoded_chunk_id, prefix); } } else { // Prefixes and hashes. @@ -320,19 +442,23 @@ void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, const SBPrefix prefix = full_hash.prefix; STATS_COUNTER("SB.PrefixAdd", 1); - store_->WriteAddPrefix(encoded_chunk_id, prefix); + store->WriteAddPrefix(encoded_chunk_id, prefix); STATS_COUNTER("SB.PrefixAddFull", 1); - store_->WriteAddHash(encoded_chunk_id, receive_time, full_hash); + store->WriteAddHash(encoded_chunk_id, receive_time, full_hash); } } } // Helper to iterate over all the entries in the hosts in |chunks| and // add them to the store. -void SafeBrowsingDatabaseNew::InsertAddChunks(int list_id, +void SafeBrowsingDatabaseNew::InsertAddChunks(const int list_id, const SBChunkList& chunks) { DCHECK_EQ(creation_loop_, MessageLoop::current()); + + SafeBrowsingStore* store = GetStore(list_id); + if (!store) return; + for (SBChunkList::const_iterator citer = chunks.begin(); citer != chunks.end(); ++citer) { const int chunk_id = citer->chunk_number; @@ -340,10 +466,10 @@ void SafeBrowsingDatabaseNew::InsertAddChunks(int list_id, // The server can give us a chunk that we already have because // it's part of a range. Don't add it again. const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); - if (store_->CheckAddChunk(encoded_chunk_id)) + if (store->CheckAddChunk(encoded_chunk_id)) continue; - store_->SetAddChunk(encoded_chunk_id); + store->SetAddChunk(encoded_chunk_id); for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin(); hiter != citer->hosts.end(); ++hiter) { // NOTE: Could pass |encoded_chunk_id|, but then inserting add @@ -359,6 +485,9 @@ void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host, const SBEntry* entry, int list_id) { DCHECK_EQ(creation_loop_, MessageLoop::current()); + SafeBrowsingStore* store = GetStore(list_id); + if (!store) return; + STATS_COUNTER("SB.HostDelete", 1); const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); const int count = entry->prefix_count(); @@ -368,7 +497,7 @@ void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host, // No prefixes, use host instead. STATS_COUNTER("SB.PrefixSub", 1); const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id); - store_->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host); + store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host); } else if (entry->IsPrefix()) { // Prefixes only. for (int i = 0; i < count; i++) { @@ -377,7 +506,7 @@ void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host, EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); STATS_COUNTER("SB.PrefixSub", 1); - store_->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix); + store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix); } } else { // Prefixes and hashes. @@ -387,10 +516,10 @@ void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host, EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); STATS_COUNTER("SB.PrefixSub", 1); - store_->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix); + store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix); STATS_COUNTER("SB.PrefixSubFull", 1); - store_->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash); + store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash); } } } @@ -400,6 +529,10 @@ void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host, void SafeBrowsingDatabaseNew::InsertSubChunks(int list_id, const SBChunkList& chunks) { DCHECK_EQ(creation_loop_, MessageLoop::current()); + + SafeBrowsingStore* store = GetStore(list_id); + if (!store) return; + for (SBChunkList::const_iterator citer = chunks.begin(); citer != chunks.end(); ++citer) { const int chunk_id = citer->chunk_number; @@ -407,10 +540,10 @@ void SafeBrowsingDatabaseNew::InsertSubChunks(int list_id, // The server can give us a chunk that we already have because // it's part of a range. Don't add it again. const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); - if (store_->CheckSubChunk(encoded_chunk_id)) + if (store->CheckSubChunk(encoded_chunk_id)) continue; - store_->SetSubChunk(encoded_chunk_id); + store->SetSubChunk(encoded_chunk_id); for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin(); hiter != citer->hosts.end(); ++hiter) { InsertSub(chunk_id, hiter->host, hiter->entry, list_id); @@ -428,13 +561,18 @@ void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name, const base::Time insert_start = base::Time::Now(); const int list_id = safe_browsing_util::GetListId(list_name); - store_->BeginChunk(); + DVLOG(2) << list_name << ": " << list_id; + + SafeBrowsingStore* store = GetStore(list_id); + if (!store) return; + + store->BeginChunk(); if (chunks.front().is_add) { InsertAddChunks(list_id, chunks); } else { InsertSubChunks(list_id, chunks); } - store_->FinishChunk(); + store->FinishChunk(); UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start); } @@ -449,15 +587,18 @@ void SafeBrowsingDatabaseNew::DeleteChunks( const std::string& list_name = chunk_deletes.front().list_name; const int list_id = safe_browsing_util::GetListId(list_name); + SafeBrowsingStore* store = GetStore(list_id); + if (!store) return; + for (size_t i = 0; i < chunk_deletes.size(); ++i) { std::vector<int> chunk_numbers; RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers); for (size_t j = 0; j < chunk_numbers.size(); ++j) { const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id); if (chunk_deletes[i].is_sub_del) - store_->DeleteSubChunk(encoded_chunk_id); + store->DeleteSubChunk(encoded_chunk_id); else - store_->DeleteAddChunk(encoded_chunk_id); + store->DeleteAddChunk(encoded_chunk_id); } } } @@ -476,19 +617,24 @@ void SafeBrowsingDatabaseNew::CacheHashResults( // TODO(shess): SBFullHashResult and SBAddFullHash are very similar. // Refactor to make them identical. const base::Time now = base::Time::Now(); - const size_t orig_size = pending_hashes_.size(); + const size_t orig_size = pending_browse_hashes_.size(); for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); iter != full_hits.end(); ++iter) { const int list_id = safe_browsing_util::GetListId(iter->list_name); - const int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id); - pending_hashes_.push_back(SBAddFullHash(encoded_chunk_id, now, iter->hash)); + if (list_id == safe_browsing_util::MALWARE || + list_id == safe_browsing_util::PHISH) { + int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id); + SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash); + pending_browse_hashes_.push_back(add_full_hash); + } } // Sort new entries then merge with the previously-sorted entries. std::vector<SBAddFullHash>::iterator - orig_end = pending_hashes_.begin() + orig_size; - std::sort(orig_end, pending_hashes_.end(), SBAddFullHashPrefixLess); - std::inplace_merge(pending_hashes_.begin(), orig_end, pending_hashes_.end(), + orig_end = pending_browse_hashes_.begin() + orig_size; + std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess); + std::inplace_merge(pending_browse_hashes_.begin(), + orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess); } @@ -498,50 +644,95 @@ bool SafeBrowsingDatabaseNew::UpdateStarted( DCHECK(lists); // If |BeginUpdate()| fails, reset the database. - if (!store_->BeginUpdate()) { - RecordFailure(FAILURE_DATABASE_UPDATE_BEGIN); + if (!browse_store_->BeginUpdate()) { + RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); HandleCorruptDatabase(); return false; } - SBListChunkRanges malware(safe_browsing_util::kMalwareList); - SBListChunkRanges phishing(safe_browsing_util::kPhishingList); - - std::vector<int> add_chunks; - store_->GetAddChunks(&add_chunks); - GetChunkIds(add_chunks, &malware.adds, &phishing.adds); - - std::vector<int> sub_chunks; - store_->GetSubChunks(&sub_chunks); - GetChunkIds(sub_chunks, &malware.subs, &phishing.subs); + if (download_store_.get() && !download_store_->BeginUpdate()) { + RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN); + HandleCorruptDatabase(); + return false; + } - lists->push_back(malware); - lists->push_back(phishing); + std::vector<int> browse_add_chunks; + browse_store_->GetAddChunks(&browse_add_chunks); + std::vector<int> browse_sub_chunks; + browse_store_->GetSubChunks(&browse_sub_chunks); + UpdateChunkRanges(browse_add_chunks, browse_sub_chunks, + safe_browsing_util::kMalwareList, + safe_browsing_util::kPhishingList, + lists); + + if (download_store_.get()) { + std::vector<int> download_add_chunks; + download_store_->GetAddChunks(&download_add_chunks); + std::vector<int> download_sub_chunks; + download_store_->GetSubChunks(&download_sub_chunks); + UpdateChunkRanges(download_add_chunks, download_sub_chunks, + safe_browsing_util::kBinUrlList, + safe_browsing_util::kBinHashList, + lists); + } corruption_detected_ = false; - return true; } void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { DCHECK_EQ(creation_loop_, MessageLoop::current()); - if (corruption_detected_) return; // Unroll any partially-received transaction. if (!update_succeeded) { - store_->CancelUpdate(); + browse_store_->CancelUpdate(); + if (download_store_.get()) + download_store_->CancelUpdate(); return; } + // for download + UpdateDownloadStore(); + // for browsing + UpdateBrowseStore(); +} + +void SafeBrowsingDatabaseNew:: UpdateDownloadStore() { + if (!download_store_.get()) + return; + + // For download, we don't cache and save full hashes. + std::vector<SBAddFullHash> empty_add_hashes; + + // For download, backend lookup happens only if a prefix is in add list. + // No need to pass in miss cache when call FinishUpdate to caculate + // bloomfilter false positives. + std::set<SBPrefix> empty_miss_cache; + + // These results are not used after this call. Simply ignore the + // returned value after FinishUpdate(...). + std::vector<SBAddPrefix> add_prefixes_result; + std::vector<SBAddFullHash> add_full_hashes_result; + + if (download_store_->FinishUpdate(empty_add_hashes, + empty_miss_cache, + &add_prefixes_result, + &add_full_hashes_result)) + RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH); + return; +} + +void SafeBrowsingDatabaseNew::UpdateBrowseStore() { // Copy out the pending add hashes. Copy rather than swapping in - // case |ContainsURL()| is called before the new filter is complete. + // case |ContainsBrowseURL()| is called before the new filter is complete. std::vector<SBAddFullHash> pending_add_hashes; { AutoLock locked(lookup_lock_); pending_add_hashes.insert(pending_add_hashes.end(), - pending_hashes_.begin(), pending_hashes_.end()); + pending_browse_hashes_.begin(), + pending_browse_hashes_.end()); } // Measure the amount of IO during the bloom filter build. @@ -565,9 +756,9 @@ void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { std::vector<SBAddPrefix> add_prefixes; std::vector<SBAddFullHash> add_full_hashes; - if (!store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_, - &add_prefixes, &add_full_hashes)) { - RecordFailure(FAILURE_DATABASE_UPDATE_FINISH); + if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_, + &add_prefixes, &add_full_hashes)) { + RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); return; } @@ -588,23 +779,22 @@ void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { // Swap in the newly built filter and cache. { AutoLock locked(lookup_lock_); - full_hashes_.swap(add_full_hashes); + full_browse_hashes_.swap(add_full_hashes); // TODO(shess): If |CacheHashResults()| is posted between the // earlier lock and this clear, those pending hashes will be lost. // It could be fixed by only removing hashes which were collected // at the earlier point. I believe that is fail-safe as-is (the // hash will be fetched again). - pending_hashes_.clear(); - + pending_browse_hashes_.clear(); prefix_miss_cache_.clear(); - bloom_filter_.swap(filter); + browse_bloom_filter_.swap(filter); } const base::TimeDelta bloom_gen = base::Time::Now() - before; // Persist the bloom filter to disk. Since only this thread changes - // |bloom_filter_|, there is no need to lock. + // |browse_bloom_filter_|, there is no need to lock. WriteBloomFilter(); // Gather statistics. @@ -622,16 +812,19 @@ void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { static_cast<int>(io_after.WriteOperationCount - io_before.WriteOperationCount)); } - VLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in " - << bloom_gen.InMilliseconds() << " ms total. prefix count: " - << add_prefixes.size(); + DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in " + << bloom_gen.InMilliseconds() << " ms total. prefix count: " + << add_prefixes.size(); UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen); - UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes", bloom_filter_->size() / 1024); + UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes", + browse_bloom_filter_->size() / 1024); int64 size_64; - if (file_util::GetFileSize(filename_, &size_64)) { - UMA_HISTOGRAM_COUNTS("SB2.DatabaseKilobytes", + if (file_util::GetFileSize(browse_filename_, &size_64)) + UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", + static_cast<int>(size_64 / 1024)); + if (file_util::GetFileSize(download_filename_, &size_64)) + UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", static_cast<int>(size_64 / 1024)); - } } void SafeBrowsingDatabaseNew::HandleCorruptDatabase() { @@ -664,7 +857,7 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() { // TODO(paulg): Investigate how often the filter file is missing and how // expensive it would be to regenerate it. int64 size_64; - if (!file_util::GetFileSize(filename_, &size_64) || size_64 == 0) + if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0) return; if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) || @@ -675,11 +868,11 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() { } const base::TimeTicks before = base::TimeTicks::Now(); - bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_); - VLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in " - << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; + browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_); + DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in " + << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; - if (!bloom_filter_.get()) { + if (!browse_bloom_filter_.get()) { UMA_HISTOGRAM_COUNTS("SB2.FilterReadFail", 1); RecordFailure(FAILURE_DATABASE_FILTER_READ); } @@ -688,25 +881,30 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() { bool SafeBrowsingDatabaseNew::Delete() { DCHECK_EQ(creation_loop_, MessageLoop::current()); - const bool r1 = store_->Delete(); + const bool r1 = browse_store_->Delete(); if (!r1) RecordFailure(FAILURE_DATABASE_STORE_DELETE); - const bool r2 = file_util::Delete(bloom_filter_filename_, false); + + const bool r2 = download_store_.get() ? download_store_->Delete() : true; if (!r2) + RecordFailure(FAILURE_DATABASE_STORE_DELETE); + + const bool r3 = file_util::Delete(bloom_filter_filename_, false); + if (!r3) RecordFailure(FAILURE_DATABASE_FILTER_DELETE); - return r1 && r2; + return r1 && r2 && r3; } void SafeBrowsingDatabaseNew::WriteBloomFilter() { DCHECK_EQ(creation_loop_, MessageLoop::current()); - if (!bloom_filter_.get()) + if (!browse_bloom_filter_.get()) return; const base::TimeTicks before = base::TimeTicks::Now(); - const bool write_ok = bloom_filter_->WriteFile(bloom_filter_filename_); - VLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in " - << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; + const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_); + DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in " + << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; if (!write_ok) { UMA_HISTOGRAM_COUNTS("SB2.FilterWriteFail", 1); diff --git a/chrome/browser/safe_browsing/safe_browsing_database.h b/chrome/browser/safe_browsing/safe_browsing_database.h index 6dbd6e7..980a8d3 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.h +++ b/chrome/browser/safe_browsing/safe_browsing_database.h @@ -14,7 +14,6 @@ #include "base/scoped_ptr.h" #include "base/task.h" #include "chrome/browser/safe_browsing/safe_browsing_store.h" -#include "chrome/browser/safe_browsing/safe_browsing_util.h" #include "testing/gtest/include/gtest/gtest_prod.h" namespace base { @@ -32,22 +31,29 @@ class SafeBrowsingDatabaseFactory { public: SafeBrowsingDatabaseFactory() { } virtual ~SafeBrowsingDatabaseFactory() { } - virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase() = 0; + virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( + bool enable_download_protection) = 0; private: DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory); }; -// Encapsulates the database that stores information about phishing -// and malware sites. There is one on-disk database for all profiles, -// as it doesn't contain user-specific data. This object is not -// thread-safe, i.e. all its methods should be used on the same thread -// that it was created on. + +// Encapsulates on-disk databases that for safebrowsing. There are two +// databases: browse database and download database. The browse database +// contains information about phishing and malware urls. The download +// database contains URLs for bad binaries (e.g: those containing virus) +// and hash of these downloaded contents. These on-disk databases are shared +// among all profiles, as it doesn't contain user-specific data. This object +// is not thread-safe, i.e. all its methods should be used on the same thread +// that it was created on. class SafeBrowsingDatabase { public: // Factory method for obtaining a SafeBrowsingDatabase implementation. // It is not thread safe. - static SafeBrowsingDatabase* Create(); + // |enable_download_protection| is used to control the download database + // feature. + static SafeBrowsingDatabase* Create(bool enable_download_protection); // Makes the passed |factory| the factory used to instantiate // a SafeBrowsingDatabase. This is used for tests. @@ -63,16 +69,22 @@ class SafeBrowsingDatabase { // Deletes the current database and creates a new one. virtual bool ResetDatabase() = 0; - // Returns false if |url| is not in the database. If it returns - // true, then either |matching_list| is the name of the matching + // Returns false if |url| is not in the browse database. If it + // returns true, then either |matching_list| is the name of the matching // list, or |prefix_hits| and |full_hits| contains the matching hash // prefixes. This function is safe to call from threads other than // the creation thread. - virtual bool ContainsUrl(const GURL& url, - std::string* matching_list, - std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - base::Time last_update) = 0; + virtual bool ContainsBrowseUrl(const GURL& url, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + base::Time last_update) = 0; + + // Returns false if |url| is not in Download database. If it returns true, + // |prefix_hits| should contain the prefix for |url|. + // This function could ONLY be accessed from creation thread. + virtual bool ContainsDownloadUrl(const GURL& url, + std::vector<SBPrefix>* prefix_hits) = 0; // A database transaction should look like: // @@ -112,19 +124,27 @@ class SafeBrowsingDatabase { // The name of the bloom-filter file for the given database file. static FilePath BloomFilterForFilename(const FilePath& db_filename); + // Filename for malware and phishing URL database. + static FilePath BrowseDBFilename(const FilePath& db_base_filename); + + // Filename for download URL and download binary hash database. + static FilePath DownloadDBFilename(const FilePath& db_base_filename); + // Enumerate failures for histogramming purposes. DO NOT CHANGE THE // ORDERING OF THESE VALUES. enum FailureType { FAILURE_DATABASE_CORRUPT, FAILURE_DATABASE_CORRUPT_HANDLER, - FAILURE_DATABASE_UPDATE_BEGIN, - FAILURE_DATABASE_UPDATE_FINISH, + FAILURE_BROWSE_DATABASE_UPDATE_BEGIN, + FAILURE_BROWSE_DATABASE_UPDATE_FINISH, FAILURE_DATABASE_FILTER_MISSING, FAILURE_DATABASE_FILTER_READ, FAILURE_DATABASE_FILTER_WRITE, FAILURE_DATABASE_FILTER_DELETE, FAILURE_DATABASE_STORE_MISSING, FAILURE_DATABASE_STORE_DELETE, + FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN, + FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH, // Memory space for histograms is determined by the max. ALWAYS // ADD NEW VALUES BEFORE THIS ONE. @@ -142,12 +162,15 @@ class SafeBrowsingDatabase { class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { public: - // Create a database on the given store. Takes ownership of - // |store|. This method is temporary for - // SafeBrowsingDatabase::Create(), do not use it otherwise. - explicit SafeBrowsingDatabaseNew(SafeBrowsingStore* store); - - // Create a database with a default store. + // Create a database with a browse store and download store. Takes ownership + // of browse_store and download_store. When |download_store| is NULL, + // the database will ignore any operations related download (url hashes and + // binary hashes). + SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store, + SafeBrowsingStore* download_store); + + // Create a database with a browse store. This is a legacy interface that + // useds Sqlite. SafeBrowsingDatabaseNew(); virtual ~SafeBrowsingDatabaseNew(); @@ -155,11 +178,14 @@ class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { // Implement SafeBrowsingDatabase interface. virtual void Init(const FilePath& filename); virtual bool ResetDatabase(); - virtual bool ContainsUrl(const GURL& url, - std::string* matching_list, - std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - base::Time last_update); + virtual bool ContainsBrowseUrl(const GURL& url, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + base::Time last_update); + virtual bool ContainsDownloadUrl(const GURL& url, + std::vector<SBPrefix>* prefix_hits); + virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists); virtual void InsertChunks(const std::string& list_name, const SBChunkList& chunks); @@ -172,7 +198,10 @@ class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { friend class SafeBrowsingDatabaseTest; FRIEND_TEST(SafeBrowsingDatabaseTest, HashCaching); - // Deletes the files on disk. + // Return the browse_store_ or download_store_ based on list_id. + SafeBrowsingStore* GetStore(int list_id); + + // Deletes the files on disk. bool Delete(); // Load the bloom filter off disk, or generates one if it doesn't exist. @@ -197,29 +226,39 @@ class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id); void InsertSubChunks(int list_id, const SBChunkList& chunks); + void UpdateDownloadStore(); + void UpdateBrowseStore(); + // Used to verify that various calls are made from the thread the // object was created on. MessageLoop* creation_loop_; // Lock for protecting access to variables that may be used on the - // IO thread. This includes |bloom_filter_|, |full_hashes_|, - // |pending_hashes_|, and |prefix_miss_cache_|. + // IO thread. This includes |browse_bloom_filter_|, |full_browse_hashes_|, + // |pending_browse_hashes_|, and |prefix_miss_cache_|. Lock lookup_lock_; // Underlying persistent store for chunk data. - FilePath filename_; - scoped_ptr<SafeBrowsingStore> store_; + // For browsing related (phishing and malware URLs) chunks and prefixes. + FilePath browse_filename_; + scoped_ptr<SafeBrowsingStore> browse_store_; + + // For download related (download URL and binary hash) chunks and prefixes. + FilePath download_filename_; + scoped_ptr<SafeBrowsingStore> download_store_; - // Bloom filter generated from the add-prefixes in |store_|. + // Bloom filter generated from the add-prefixes in |browse_store_|. + // Only browse_store_ requires the BloomFilter for fast query. FilePath bloom_filter_filename_; - scoped_refptr<BloomFilter> bloom_filter_; - - // Cached full-hash items, ordered by prefix for efficient scanning. - // |full_hashes_| are items from |store_|, |pending_hashes_| are - // items from |CacheHashResults()|, which will be pushed to the - // store on the next update. - std::vector<SBAddFullHash> full_hashes_; - std::vector<SBAddFullHash> pending_hashes_; + scoped_refptr<BloomFilter> browse_bloom_filter_; + + // Cached browse store related full-hash items, ordered by prefix for + // efficient scanning. + // |full_browse_hashes_| are items from |browse_store_|, + // |pending_browse_hashes_| are items from |CacheHashResults()|, which + // will be pushed to the store on the next update. + std::vector<SBAddFullHash> full_browse_hashes_; + std::vector<SBAddFullHash> pending_browse_hashes_; // Cache of prefixes that returned empty results (no full hash // match) to |CacheHashResults()|. Cached to prevent asking for diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc index 89cc553..32ef6d2 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc @@ -89,7 +89,7 @@ bool CorruptSqliteTable(const FilePath& filename, page_size = stmt.ColumnInt(0); stmt.Assign(db.GetUniqueStatement( - "SELECT rootpage FROM sqlite_master WHERE name = ?")); + "SELECT rootpage FROM sqlite_master WHERE name = ?")); stmt.BindString(0, "sub_prefix"); if (!stmt.Step()) return false; @@ -177,7 +177,7 @@ class SafeBrowsingDatabaseTest : public PlatformTest { }; // Tests retrieving list name information. -TEST_F(SafeBrowsingDatabaseTest, ListName) { +TEST_F(SafeBrowsingDatabaseTest, ListNameForBrowse) { SBChunkList chunks; // Insert some malware add chunks. @@ -307,8 +307,90 @@ TEST_F(SafeBrowsingDatabaseTest, ListName) { EXPECT_EQ(lists[1].subs, "200-201"); } -// Checks database reading and writing. -TEST_F(SafeBrowsingDatabaseTest, Database) { +TEST_F(SafeBrowsingDatabaseTest, ListNameForBrowseAndDownload) { + database_.reset(); + MessageLoop loop(MessageLoop::TYPE_DEFAULT); + SafeBrowsingStoreFile* browse_store = new SafeBrowsingStoreFile(); + SafeBrowsingStoreFile* download_store = new SafeBrowsingStoreFile(); + database_.reset(new SafeBrowsingDatabaseNew(browse_store, download_store)); + database_->Init(database_filename_); + + SBChunkList chunks; + + // Insert malware, phish, binurl and bindownload add chunks. + SBChunkHost host; + host.host = Sha256Prefix("www.evil.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); + host.entry->set_chunk_id(1); + host.entry->SetPrefixAt(0, Sha256Prefix("www.evil.com/malware.html")); + SBChunk chunk; + chunk.chunk_number = 1; + chunk.is_add = true; + chunk.hosts.push_back(host); + chunks.clear(); + chunks.push_back(chunk); + std::vector<SBListChunkRanges> lists; + + EXPECT_TRUE(database_->UpdateStarted(&lists)); + database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); + + host.host = Sha256Prefix("www.foo.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); + host.entry->set_chunk_id(2); + host.entry->SetPrefixAt(0, Sha256Prefix("www.foo.com/malware.html")); + chunk.chunk_number = 2; + chunk.is_add = true; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + chunks.clear(); + chunks.push_back(chunk); + database_->InsertChunks(safe_browsing_util::kPhishingList, chunks); + + host.host = Sha256Prefix("www.whatever.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); + host.entry->set_chunk_id(3); + host.entry->SetPrefixAt(0, Sha256Prefix("www.whatever.com/download.html")); + chunk.chunk_number = 3; + chunk.is_add = true; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + chunks.clear(); + chunks.push_back(chunk); + database_->InsertChunks(safe_browsing_util::kBinUrlList, chunks); + + host.host = Sha256Prefix("www.forhash.com/"); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 1); + host.entry->set_chunk_id(4); + host.entry->SetPrefixAt(0, Sha256Prefix("www.forhash.com/download.html")); + chunk.chunk_number = 4; + chunk.is_add = true; + chunk.hosts.clear(); + chunk.hosts.push_back(host); + chunks.clear(); + chunks.push_back(chunk); + database_->InsertChunks(safe_browsing_util::kBinHashList, chunks); + + database_->UpdateFinished(true); + + GetListsInfo(&lists); + EXPECT_EQ(4U, lists.size()); + EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); + EXPECT_EQ(lists[0].adds, "1"); + EXPECT_TRUE(lists[0].subs.empty()); + EXPECT_TRUE(lists[1].name == safe_browsing_util::kPhishingList); + EXPECT_EQ(lists[1].adds, "2"); + EXPECT_TRUE(lists[1].subs.empty()); + EXPECT_TRUE(lists[2].name == safe_browsing_util::kBinUrlList); + EXPECT_EQ(lists[2].adds, "3"); + EXPECT_TRUE(lists[2].subs.empty()); + EXPECT_TRUE(lists[3].name == safe_browsing_util::kBinHashList); + EXPECT_EQ(lists[3].adds, "4"); + EXPECT_TRUE(lists[3].subs.empty()); + database_.reset(); +} + +// Checks database reading and writing for browse. +TEST_F(SafeBrowsingDatabaseTest, BrowseDatabase) { SBChunkList chunks; // Add a simple chunk with one hostkey. @@ -379,44 +461,53 @@ TEST_F(SafeBrowsingDatabaseTest, Database) { std::vector<SBFullHashResult> full_hashes; std::vector<SBPrefix> prefix_hits; std::string matching_list; - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/phishing.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); EXPECT_EQ(prefix_hits[0], Sha256Prefix("www.evil.com/phishing.html")); EXPECT_EQ(prefix_hits.size(), 1U); - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/malware.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/notevil1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/notevil2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.good.com/good1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.good.com/good2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_TRUE(database_->ContainsUrl(GURL("http://192.168.0.1/malware.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.evil.com/"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/malware.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/notevil1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/notevil2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.good.com/good1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.good.com/good2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://192.168.0.1/malware.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + + EXPECT_FALSE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/"), + &matching_list, &prefix_hits, + &full_hashes, now)); EXPECT_TRUE(prefix_hits.empty()); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.evil.com/robots.txt"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/robots.txt"), + &matching_list, &prefix_hits, + &full_hashes, now)); @@ -464,28 +555,33 @@ TEST_F(SafeBrowsingDatabaseTest, Database) { database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); database_->UpdateFinished(true); - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/phishing.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); EXPECT_EQ(prefix_hits[0], Sha256Prefix("www.evil.com/phishing.html")); EXPECT_EQ(prefix_hits.size(), 1U); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.evil.com/notevil1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/notevil1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); EXPECT_TRUE(prefix_hits.empty()); - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.evil.com/notevil2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/notevil2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.good.com/good1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.good.com/good1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.good.com/good2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.good.com/good2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); GetListsInfo(&lists); EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); @@ -520,17 +616,20 @@ TEST_F(SafeBrowsingDatabaseTest, Database) { AddDelChunk(safe_browsing_util::kMalwareList, 2); database_->UpdateFinished(true); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.evil.com/notevil2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/notevil2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.good.com/good1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database_->ContainsBrowseUrl( + GURL("http://www.good.com/good1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.good.com/good2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_FALSE(database_->ContainsBrowseUrl( + GURL("http://www.good.com/good2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); GetListsInfo(&lists); EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); @@ -588,7 +687,7 @@ TEST_F(SafeBrowsingDatabaseTest, Database) { database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); database_->UpdateFinished(true); - EXPECT_FALSE(database_->ContainsUrl( + EXPECT_FALSE(database_->ContainsBrowseUrl( GURL("http://www.notevilanymore.com/index.html"), &matching_list, &prefix_hits, &full_hashes, now)); @@ -609,11 +708,11 @@ TEST_F(SafeBrowsingDatabaseTest, Database) { database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); database_->UpdateFinished(true); - EXPECT_FALSE(database_->ContainsUrl( + EXPECT_FALSE(database_->ContainsBrowseUrl( GURL("http://www.notevilanymore.com/index.html"), &matching_list, &prefix_hits, &full_hashes, now)); - EXPECT_FALSE(database_->ContainsUrl( + EXPECT_FALSE(database_->ContainsBrowseUrl( GURL("http://www.notevilanymore.com/good.html"), &matching_list, &prefix_hits, &full_hashes, now)); } @@ -712,12 +811,14 @@ TEST_F(SafeBrowsingDatabaseTest, ZeroSizeChunk) { std::vector<SBFullHashResult> full_hashes; std::vector<SBPrefix> prefix_hits; std::string matching_list; - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.notempty.com/full1.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.notempty.com/full2.html"), - &matching_list, &prefix_hits, - &full_hashes, now)); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.notempty.com/full1.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.notempty.com/full2.html"), + &matching_list, &prefix_hits, + &full_hashes, now)); GetListsInfo(&lists); EXPECT_EQ(lists[0].adds, "1,10,19-22"); @@ -783,14 +884,15 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { PopulateDatabaseForCacheTest(); // We should have both full hashes in the cache. - EXPECT_EQ(database_->pending_hashes_.size(), 2U); + EXPECT_EQ(database_->pending_browse_hashes_.size(), 2U); // Test the cache lookup for the first prefix. std::string listname; std::vector<SBPrefix> prefixes; std::vector<SBFullHashResult> full_hashes; - database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &listname, &prefixes, &full_hashes, Time::Now()); + database_->ContainsBrowseUrl( + GURL("http://www.evil.com/phishing.html"), + &listname, &prefixes, &full_hashes, Time::Now()); EXPECT_EQ(full_hashes.size(), 1U); EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, Sha256Hash("www.evil.com/phishing.html"))); @@ -799,8 +901,9 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { full_hashes.clear(); // Test the cache lookup for the second prefix. - database_->ContainsUrl(GURL("http://www.evil.com/malware.html"), - &listname, &prefixes, &full_hashes, Time::Now()); + database_->ContainsBrowseUrl( + GURL("http://www.evil.com/malware.html"), + &listname, &prefixes, &full_hashes, Time::Now()); EXPECT_EQ(full_hashes.size(), 1U); EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, Sha256Hash("www.evil.com/malware.html"))); @@ -830,8 +933,9 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { database_->UpdateFinished(true); // This prefix should still be there. - database_->ContainsUrl(GURL("http://www.evil.com/malware.html"), - &listname, &prefixes, &full_hashes, Time::Now()); + database_->ContainsBrowseUrl( + GURL("http://www.evil.com/malware.html"), + &listname, &prefixes, &full_hashes, Time::Now()); EXPECT_EQ(full_hashes.size(), 1U); EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, Sha256Hash("www.evil.com/malware.html"))); @@ -840,8 +944,9 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { full_hashes.clear(); // This prefix should be gone. - database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &listname, &prefixes, &full_hashes, Time::Now()); + database_->ContainsBrowseUrl( + GURL("http://www.evil.com/phishing.html"), + &listname, &prefixes, &full_hashes, Time::Now()); EXPECT_TRUE(full_hashes.empty()); prefixes.clear(); @@ -851,11 +956,12 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { EXPECT_TRUE(database_->UpdateStarted(&lists)); AddDelChunk(safe_browsing_util::kMalwareList, 1); database_->UpdateFinished(true); - database_->ContainsUrl(GURL("http://www.evil.com/malware.html"), - &listname, &prefixes, &full_hashes, Time::Now()); + database_->ContainsBrowseUrl( + GURL("http://www.evil.com/malware.html"), + &listname, &prefixes, &full_hashes, Time::Now()); EXPECT_TRUE(full_hashes.empty()); - EXPECT_TRUE(database_->full_hashes_.empty()); - EXPECT_TRUE(database_->pending_hashes_.empty()); + EXPECT_TRUE(database_->full_browse_hashes_.empty()); + EXPECT_TRUE(database_->pending_browse_hashes_.empty()); prefixes.clear(); full_hashes.clear(); @@ -865,7 +971,7 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { // cache insert uses Time::Now(). First, store some entries. PopulateDatabaseForCacheTest(); - std::vector<SBAddFullHash>* hash_cache = &database_->pending_hashes_; + std::vector<SBAddFullHash>* hash_cache = &database_->pending_browse_hashes_; EXPECT_EQ(hash_cache->size(), 2U); // Now adjust one of the entries times to be in the past. @@ -880,13 +986,15 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { } EXPECT_TRUE(iter != hash_cache->end()); - database_->ContainsUrl(GURL("http://www.evil.com/malware.html"), - &listname, &prefixes, &full_hashes, expired); + database_->ContainsBrowseUrl( + GURL("http://www.evil.com/malware.html"), + &listname, &prefixes, &full_hashes, expired); EXPECT_TRUE(full_hashes.empty()); // This entry should still exist. - database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &listname, &prefixes, &full_hashes, expired); + database_->ContainsBrowseUrl( + GURL("http://www.evil.com/phishing.html"), + &listname, &prefixes, &full_hashes, expired); EXPECT_EQ(full_hashes.size(), 1U); @@ -921,9 +1029,10 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { empty_full_hash.clear(); prefix_misses.push_back(Sha256Prefix("www.evil.com/phishing.html")); database_->CacheHashResults(prefix_misses, empty_full_hash); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.evil.com/phishing.html"), - &listname, &prefixes, - &full_hashes, Time::Now())); + EXPECT_FALSE(database_->ContainsBrowseUrl( + GURL("http://www.evil.com/phishing.html"), + &listname, &prefixes, + &full_hashes, Time::Now())); prefixes.clear(); full_hashes.clear(); @@ -945,18 +1054,20 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); database_->UpdateFinished(true); - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad1.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.fullevil.com/bad1.html"), + &listname, &prefixes, &full_hashes, + Time::Now())); EXPECT_EQ(full_hashes.size(), 1U); EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, Sha256Hash("www.fullevil.com/bad1.html"))); prefixes.clear(); full_hashes.clear(); - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad2.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.fullevil.com/bad2.html"), + &listname, &prefixes, &full_hashes, + Time::Now())); EXPECT_EQ(full_hashes.size(), 1U); EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, Sha256Hash("www.fullevil.com/bad2.html"))); @@ -980,15 +1091,17 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); database_->UpdateFinished(true); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad1.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); + EXPECT_FALSE(database_->ContainsBrowseUrl( + GURL("http://www.fullevil.com/bad1.html"), + &listname, &prefixes, &full_hashes, + Time::Now())); EXPECT_TRUE(full_hashes.empty()); // There should be one remaining full add. - EXPECT_TRUE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad2.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); + EXPECT_TRUE(database_->ContainsBrowseUrl( + GURL("http://www.fullevil.com/bad2.html"), + &listname, &prefixes, &full_hashes, + Time::Now())); EXPECT_EQ(full_hashes.size(), 1U); EXPECT_TRUE(SBFullHashEq(full_hashes[0].hash, Sha256Hash("www.fullevil.com/bad2.html"))); @@ -1000,12 +1113,14 @@ TEST_F(SafeBrowsingDatabaseTest, HashCaching) { AddDelChunk(safe_browsing_util::kMalwareList, 20); database_->UpdateFinished(true); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad1.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); - EXPECT_FALSE(database_->ContainsUrl(GURL("http://www.fullevil.com/bad2.html"), - &listname, &prefixes, &full_hashes, - Time::Now())); + EXPECT_FALSE(database_->ContainsBrowseUrl( + GURL("http://www.fullevil.com/bad1.html"), + &listname, &prefixes, &full_hashes, + Time::Now())); + EXPECT_FALSE(database_->ContainsBrowseUrl( + GURL("http://www.fullevil.com/bad2.html"), + &listname, &prefixes, &full_hashes, + Time::Now())); } // Test that corrupt databases are appropriately handled, even if the @@ -1019,7 +1134,7 @@ TEST_F(SafeBrowsingDatabaseTest, DISABLED_SqliteCorruptionHandling) { database_.reset(); MessageLoop loop(MessageLoop::TYPE_DEFAULT); SafeBrowsingStoreSqlite* store = new SafeBrowsingStoreSqlite(); - database_.reset(new SafeBrowsingDatabaseNew(store)); + database_.reset(new SafeBrowsingDatabaseNew(store, NULL)); database_->Init(database_filename_); // This will cause an empty database to be created. @@ -1097,7 +1212,7 @@ TEST_F(SafeBrowsingDatabaseTest, DISABLED_FileCorruptionHandling) { database_.reset(); MessageLoop loop(MessageLoop::TYPE_DEFAULT); SafeBrowsingStoreFile* store = new SafeBrowsingStoreFile(); - database_.reset(new SafeBrowsingDatabaseNew(store)); + database_.reset(new SafeBrowsingDatabaseNew(store, NULL)); database_->Init(database_filename_); // This will cause an empty database to be created. @@ -1159,3 +1274,65 @@ TEST_F(SafeBrowsingDatabaseTest, DISABLED_FileCorruptionHandling) { database_.reset(); } + +// Checks database reading and writing. +TEST_F(SafeBrowsingDatabaseTest, ContainsDownloadUrl) { + database_.reset(); + MessageLoop loop(MessageLoop::TYPE_DEFAULT); + SafeBrowsingStoreFile* browse_store = new SafeBrowsingStoreFile(); + SafeBrowsingStoreFile* download_store = new SafeBrowsingStoreFile(); + database_.reset(new SafeBrowsingDatabaseNew(browse_store, download_store)); + database_->Init(database_filename_); + + const char kEvil1Host[] = "www.evil1.com/"; + const char kEvil1Url1[] = "www.evil1.com/download1.html"; + const char kEvil1Url2[] = "www.evil1.com/download2.html"; + + SBChunkList chunks; + // Add a simple chunk with one hostkey for download url list. + SBChunkHost host; + host.host = Sha256Prefix(kEvil1Host); + host.entry = SBEntry::Create(SBEntry::ADD_PREFIX, 2); + host.entry->set_chunk_id(1); + host.entry->SetPrefixAt(0, Sha256Prefix(kEvil1Url1)); + host.entry->SetPrefixAt(1, Sha256Prefix(kEvil1Url2)); + SBChunk chunk; + chunk.chunk_number = 1; + chunk.is_add = true; + chunk.hosts.push_back(host); + chunks.clear(); + chunks.push_back(chunk); + std::vector<SBListChunkRanges> lists; + EXPECT_TRUE(database_->UpdateStarted(&lists)); + database_->InsertChunks(safe_browsing_util::kBinUrlList, chunks); + database_->UpdateFinished(true); + + const Time now = Time::Now(); + std::vector<SBPrefix> prefix_hits; + std::string matching_list; + + EXPECT_TRUE(database_->ContainsDownloadUrl( + GURL(std::string("http://") + kEvil1Url1), &prefix_hits)); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url1)); + EXPECT_EQ(prefix_hits.size(), 1U); + + EXPECT_TRUE(database_->ContainsDownloadUrl( + GURL(std::string("http://") + kEvil1Url2), &prefix_hits)); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url2)); + EXPECT_EQ(prefix_hits.size(), 1U); + + EXPECT_TRUE(database_->ContainsDownloadUrl( + GURL(std::string("https://") + kEvil1Url2), &prefix_hits)); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url2)); + EXPECT_EQ(prefix_hits.size(), 1U); + + EXPECT_TRUE(database_->ContainsDownloadUrl( + GURL(std::string("ftp://") + kEvil1Url2), &prefix_hits)); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url2)); + EXPECT_EQ(prefix_hits.size(), 1U); + + EXPECT_FALSE(database_->ContainsDownloadUrl(GURL("http://www.randomevil.com"), + &prefix_hits)); + EXPECT_EQ(prefix_hits.size(), 0U); + database_.reset(); +} diff --git a/chrome/browser/safe_browsing/safe_browsing_service.cc b/chrome/browser/safe_browsing/safe_browsing_service.cc index 94b97fa..657778a 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service.cc +++ b/chrome/browser/safe_browsing/safe_browsing_service.cc @@ -110,6 +110,7 @@ SafeBrowsingService::SafeBrowsingService() : database_(NULL), protocol_manager_(NULL), enabled_(false), + enable_download_protection_(false), update_in_progress_(false), database_update_in_progress_(false), closing_database_(false) { @@ -134,7 +135,61 @@ bool SafeBrowsingService::CanCheckUrl(const GURL& url) const { url.SchemeIs(chrome::kHttpsScheme); } -bool SafeBrowsingService::CheckUrl(const GURL& url, Client* client) { +void SafeBrowsingService::CheckDownloadUrlDone( + Client* client, const GURL& url, UrlCheckResult result) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + DCHECK(enable_download_protection_); + VLOG(1) << "CheckDownloadUrlDone: " << result; + client->OnSafeBrowsingResult(url, result); +} + +void SafeBrowsingService::CheckDownloadUrlOnSBThread(const GURL& url, + Client* client) { + DCHECK_EQ(MessageLoop::current(), safe_browsing_thread_->message_loop()); + DCHECK(enable_download_protection_); + + std::vector<SBPrefix> prefix_hits; + + if (!database_->ContainsDownloadUrl(url, &prefix_hits)) { + // Good, we don't have hash for this url prefix. + BrowserThread::PostTask( + BrowserThread::IO, FROM_HERE, + NewRunnableMethod(this, + &SafeBrowsingService::CheckDownloadUrlDone, + client, url, URL_SAFE)); + return; + } + + // Now, we need to fetch the url from the safebrowsing backends. + // Needs to be asynchronous, since we could be in the constructor of a + // ResourceDispatcherHost event handler which can't pause there. + SafeBrowsingCheck* check = new SafeBrowsingCheck(); + + check->url = url; + check->client = client; + check->result = URL_SAFE; + check->need_get_hash = true; + check->prefix_hits.swap(prefix_hits); + checks_.insert(check); + + BrowserThread::PostTask( + BrowserThread::IO, FROM_HERE, + NewRunnableMethod(this, &SafeBrowsingService::OnCheckDone, check)); +} + +bool SafeBrowsingService::CheckDownloadUrl(const GURL& url, + Client* client) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!enabled_ || !enable_download_protection_) + return true; + + safe_browsing_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::CheckDownloadUrlOnSBThread, url, client)); + return false; +} + +bool SafeBrowsingService::CheckBrowseUrl(const GURL& url, + Client* client) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); if (!enabled_) return true; @@ -154,9 +209,10 @@ bool SafeBrowsingService::CheckUrl(const GURL& url, Client* client) { std::vector<SBPrefix> prefix_hits; std::vector<SBFullHashResult> full_hits; base::Time check_start = base::Time::Now(); - bool prefix_match = database_->ContainsUrl(url, &list, &prefix_hits, - &full_hits, - protocol_manager_->last_update()); + bool prefix_match = + database_->ContainsBrowseUrl(url, &list, &prefix_hits, + &full_hits, + protocol_manager_->last_update()); UMA_HISTOGRAM_TIMES("SB2.FilterCheck", base::Time::Now() - check_start); @@ -405,6 +461,11 @@ void SafeBrowsingService::OnIOInitialize( URLRequestContextGetter* request_context_getter) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); enabled_ = true; + + CommandLine* cmdline = CommandLine::ForCurrentProcess(); + enable_download_protection_ = + cmdline->HasSwitch(switches::kSbEnableDownloadProtection); + MakeDatabaseAvailable(); // On Windows, get the safe browsing client name from the browser @@ -420,7 +481,6 @@ void SafeBrowsingService::OnIOInitialize( std::string client_name("chromium"); #endif #endif - CommandLine* cmdline = CommandLine::ForCurrentProcess(); bool disable_auto_update = cmdline->HasSwitch(switches::kSbDisableAutoUpdate) || cmdline->HasSwitch(switches::kDisableBackgroundNetworking); @@ -463,7 +523,7 @@ void SafeBrowsingService::OnIOShutdown() { while (!queued_checks_.empty()) { QueuedCheck check = queued_checks_.front(); if (check.client) - check.client->OnUrlCheckResult(check.url, URL_SAFE); + check.client->OnSafeBrowsingResult(check.url, URL_SAFE); queued_checks_.pop_front(); } @@ -487,7 +547,7 @@ void SafeBrowsingService::OnIOShutdown() { for (CurrentChecks::iterator it = checks_.begin(); it != checks_.end(); ++it) { if ((*it)->client) - (*it)->client->OnUrlCheckResult((*it)->url, URL_SAFE); + (*it)->client->OnSafeBrowsingResult((*it)->url, URL_SAFE); delete *it; } checks_.clear(); @@ -518,10 +578,13 @@ SafeBrowsingDatabase* SafeBrowsingService::GetDatabase() { FilePath path; bool result = PathService::Get(chrome::DIR_USER_DATA, &path); DCHECK(result); - path = path.Append(chrome::kSafeBrowsingFilename); + path = path.Append(chrome::kSafeBrowsingBaseFilename); Time before = Time::Now(); - SafeBrowsingDatabase* database = SafeBrowsingDatabase::Create(); + + SafeBrowsingDatabase* database = + SafeBrowsingDatabase::Create(enable_download_protection_); + database->Init(path); { // Acquiring the lock here guarantees correct ordering between the writes to @@ -632,8 +695,8 @@ void SafeBrowsingService::DatabaseLoadComplete() { // If CheckUrl() determines the URL is safe immediately, it doesn't call the // client's handler function (because normally it's being directly called by // the client). Since we're not the client, we have to convey this result. - if (check.client && CheckUrl(check.url, check.client)) - check.client->OnUrlCheckResult(check.url, URL_SAFE); + if (check.client && CheckBrowseUrl(check.url, check.client)) + check.client->OnSafeBrowsingResult(check.url, URL_SAFE); queued_checks_.pop_front(); } } @@ -670,6 +733,10 @@ SafeBrowsingService::UrlCheckResult SafeBrowsingService::GetResultFromListname( return URL_MALWARE; } + if (safe_browsing_util::IsBadbinurlList(list_name)) { + return BINARY_MALWARE; + } + DVLOG(1) << "Unknown safe browsing list " << list_name; return URL_SAFE; } @@ -781,7 +848,7 @@ void SafeBrowsingService::HandleOneCheck( } // Let the client continue handling the original request. - check->client->OnUrlCheckResult(check->url, result); + check->client->OnSafeBrowsingResult(check->url, result); } checks_.erase(check); diff --git a/chrome/browser/safe_browsing/safe_browsing_service.h b/chrome/browser/safe_browsing/safe_browsing_service.h index b9633f9..abade47 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service.h +++ b/chrome/browser/safe_browsing/safe_browsing_service.h @@ -44,18 +44,36 @@ class SafeBrowsingService URL_SAFE, URL_PHISHING, URL_MALWARE, + BINARY_MALWARE, // This binary is a malware. }; class Client { public: virtual ~Client() {} - // Called when the result of checking a URL is known. - virtual void OnUrlCheckResult(const GURL& url, UrlCheckResult result) = 0; + void OnSafeBrowsingResult(const GURL& url, UrlCheckResult result) { + OnBrowseUrlCheckResult(url, result); + OnDownloadUrlCheckResult(url, result); + // TODO(lzheng): This is not implemented yet. + // OnDownloadHashCheckResult(url, result); + } // Called when the user has made a decision about how to handle the // SafeBrowsing interstitial page. - virtual void OnBlockingPageComplete(bool proceed) = 0; + virtual void OnBlockingPageComplete(bool proceed) {} + + protected: + // Called when the result of checking a browse URL is known. + virtual void OnBrowseUrlCheckResult(const GURL& url, + UrlCheckResult result) {} + + // Called when the result of checking a download URL is known. + virtual void OnDownloadUrlCheckResult(const GURL& url, + UrlCheckResult result) {} + + // Called when the result of checking a download binary hash is known. + virtual void OnDownloadHashCheckResult(const GURL& url, + UrlCheckResult result) {} }; // Structure used to pass parameters between the IO and UI thread when @@ -112,7 +130,11 @@ class SafeBrowsingService // can synchronously determine that the url is safe, CheckUrl returns true. // Otherwise it returns false, and "client" is called asynchronously with the // result when it is ready. - virtual bool CheckUrl(const GURL& url, Client* client); + virtual bool CheckBrowseUrl(const GURL& url, Client* client); + + // Check if the prefix for |url| is in safebrowsing download add lists. + // Result will be passed to callback in |client|. + bool CheckDownloadUrl(const GURL& url, Client* client); // Called on the IO thread to cancel a pending check if the result is no // longer needed. @@ -299,6 +321,14 @@ class SafeBrowsingService bool is_subresource, UrlCheckResult threat_type); + // Invoked by CheckDownloadUrl. It checks the download URL on + // safe_browsing_thread_. + void CheckDownloadUrlOnSBThread(const GURL& url, Client* client); + + // Call the Client's callback in IO thread after CheckDownloadUrl finishes. + void CheckDownloadUrlDone(Client* client, const GURL& url, + UrlCheckResult result); + // The factory used to instanciate a SafeBrowsingService object. // Useful for tests, so they can provide their own implementation of // SafeBrowsingService. @@ -325,6 +355,10 @@ class SafeBrowsingService // on the IO thread during normal operations. bool enabled_; + // Indicate if download_protection is enabled by command switch + // so we allow this feature to be exersized. + bool enable_download_protection_; + // The SafeBrowsing thread that runs database operations. // // Note: Functions that run on this thread should run synchronously and return diff --git a/chrome/browser/safe_browsing/safe_browsing_service_browsertest.cc b/chrome/browser/safe_browsing/safe_browsing_service_browsertest.cc index cbc0b5f..c495481 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service_browsertest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_service_browsertest.cc @@ -42,11 +42,11 @@ class TestSafeBrowsingDatabase : public SafeBrowsingDatabase { // Called on the IO thread to check if the given URL is safe or not. If we // can synchronously determine that the URL is safe, CheckUrl returns true, // otherwise it returns false. - virtual bool ContainsUrl(const GURL& url, - std::string* matching_list, - std::vector<SBPrefix>* prefix_hits, - std::vector<SBFullHashResult>* full_hits, - base::Time last_update) { + virtual bool ContainsBrowseUrl(const GURL& url, + std::string* matching_list, + std::vector<SBPrefix>* prefix_hits, + std::vector<SBFullHashResult>* full_hits, + base::Time last_update) { base::hash_map<std::string, Hits>::const_iterator badurls_it = badurls_.find(url.spec()); if (badurls_it == badurls_.end()) @@ -56,6 +56,12 @@ class TestSafeBrowsingDatabase : public SafeBrowsingDatabase { return true; } + virtual bool ContainsDownloadUrl(const GURL& url, + std::vector<SBPrefix>* prefix_hits) { + ADD_FAILURE() << "Not implemented."; + return false; + } + virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) { ADD_FAILURE() << "Not implemented."; return false; @@ -97,7 +103,8 @@ class TestSafeBrowsingDatabaseFactory : public SafeBrowsingDatabaseFactory { TestSafeBrowsingDatabaseFactory() : db_(NULL) {} virtual ~TestSafeBrowsingDatabaseFactory() {} - virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase() { + virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( + bool enable_download_protection) { db_ = new TestSafeBrowsingDatabase(); return db_; } diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.cc b/chrome/browser/safe_browsing/safe_browsing_store_file.cc index 1791c32..cc8ae87 100644 --- a/chrome/browser/safe_browsing/safe_browsing_store_file.cc +++ b/chrome/browser/safe_browsing/safe_browsing_store_file.cc @@ -44,7 +44,7 @@ bool FileRewind(FILE* fp) { bool FileSkip(size_t bytes, FILE* fp) { // Although fseek takes negative values, for this case, we only want // to skip forward. - DCHECK(static_cast<long>(bytes) > 0); + DCHECK(static_cast<long>(bytes) >= 0); if (static_cast<long>(bytes) < 0) return false; int rv = fseek(fp, static_cast<long>(bytes), SEEK_CUR); diff --git a/chrome/browser/safe_browsing/safe_browsing_test.cc b/chrome/browser/safe_browsing/safe_browsing_test.cc index ba2c3a6..259d3ff 100644 --- a/chrome/browser/safe_browsing/safe_browsing_test.cc +++ b/chrome/browser/safe_browsing/safe_browsing_test.cc @@ -228,7 +228,7 @@ class SafeBrowsingServiceTest : public InProcessBrowserTest { void CheckUrl(SafeBrowsingService::Client* helper, const GURL& url) { ASSERT_TRUE(safe_browsing_service_); AutoLock lock(update_status_mutex_); - if (safe_browsing_service_->CheckUrl(url, helper)) { + if (safe_browsing_service_->CheckBrowseUrl(url, helper)) { is_checked_url_in_db_ = false; is_checked_url_safe_ = true; } else { @@ -345,8 +345,8 @@ class SafeBrowsingServiceTestHelper } // Callbacks for SafeBrowsingService::Client. - virtual void OnUrlCheckResult(const GURL& url, - SafeBrowsingService::UrlCheckResult result) { + virtual void OnBrowseUrlCheckResult( + const GURL& url, SafeBrowsingService::UrlCheckResult result) { EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::IO)); EXPECT_TRUE(safe_browsing_test_->is_checked_url_in_db()); safe_browsing_test_->set_is_checked_url_safe( @@ -355,6 +355,11 @@ class SafeBrowsingServiceTestHelper NewRunnableMethod(this, &SafeBrowsingServiceTestHelper::OnCheckUrlDone)); } + virtual void OnDownloadUrlCheckResult( + const GURL& url, SafeBrowsingService::UrlCheckResult result) { + // TODO(lzheng): Add test for DownloadUrl. + } + virtual void OnBlockingPageComplete(bool proceed) { NOTREACHED() << "Not implemented."; } diff --git a/chrome/browser/safe_browsing/safe_browsing_util.cc b/chrome/browser/safe_browsing/safe_browsing_util.cc index 0f99bf2..f43ee51 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util.cc +++ b/chrome/browser/safe_browsing/safe_browsing_util.cc @@ -175,16 +175,43 @@ namespace safe_browsing_util { const char kMalwareList[] = "goog-malware-shavar"; const char kPhishingList[] = "goog-phish-shavar"; +const char kBinUrlList[] = "goog-badbinurl-shavar"; +const char kBinHashList[] = "goog-badbinhash-shavar"; + int GetListId(const std::string& name) { - if (name == kMalwareList) - return MALWARE; - return (name == kPhishingList) ? PHISH : INVALID; + int id; + if (name == safe_browsing_util::kMalwareList) { + id = MALWARE; + } else if (name == safe_browsing_util::kPhishingList) { + id = PHISH; + } else if (name == safe_browsing_util::kBinUrlList) { + id = BINURL; + } else if (name == safe_browsing_util::kBinHashList) { + id = BINHASH; + } else { + id = INVALID; + } + return id; } -std::string GetListName(int list_id) { - if (list_id == MALWARE) - return kMalwareList; - return (list_id == PHISH) ? kPhishingList : std::string(); +bool GetListName(int list_id, std::string* list) { + switch (list_id) { + case MALWARE: + *list = safe_browsing_util::kMalwareList; + break; + case PHISH: + *list = safe_browsing_util::kPhishingList; + break; + case BINURL: + *list = safe_browsing_util::kBinUrlList; + break; + case BINHASH: + *list = safe_browsing_util::kBinHashList; + break; + default: + return false; + } + return true; } std::string Unescape(const std::string& url) { @@ -426,6 +453,10 @@ bool IsMalwareList(const std::string& list_name) { return list_name.find("-malware-") != std::string::npos; } +bool IsBadbinurlList(const std::string& list_name) { + return list_name.find("-badbinurl-") != std::string::npos; +} + static void DecodeWebSafe(std::string* decoded) { DCHECK(decoded); for (std::string::iterator i(decoded->begin()); i != decoded->end(); ++i) { diff --git a/chrome/browser/safe_browsing/safe_browsing_util.h b/chrome/browser/safe_browsing/safe_browsing_util.h index a4fd2b6..b0351c1 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util.h +++ b/chrome/browser/safe_browsing/safe_browsing_util.h @@ -261,16 +261,23 @@ namespace safe_browsing_util { // SafeBrowsing list names. extern const char kMalwareList[]; extern const char kPhishingList[]; +// Binary Download list names. +extern const char kBinUrlList[]; +extern const char kBinHashList[]; -// Converts between the SafeBrowsing list names and their enumerated value. -// If the list names change, both of these methods must be updated. enum ListType { INVALID = -1, MALWARE = 0, PHISH = 1, + BINURL = 2, + BINHASH = 3, }; + +// Maps a list name to ListType. int GetListId(const std::string& name); -std::string GetListName(int list_id); +// Maps a ListId to list name. Return false if fails. +bool GetListName(int list_id, std::string* list); + // Canonicalizes url as per Google Safe Browsing Specification. // See section 6.1 in @@ -294,6 +301,7 @@ int CompareFullHashes(const GURL& url, bool IsPhishingList(const std::string& list_name); bool IsMalwareList(const std::string& list_name); +bool IsBadbinurlList(const std::string& list_name); // Returns 'true' if 'mac' can be verified using 'key' and 'data'. bool VerifyMAC(const std::string& key, diff --git a/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc index 5204bfe..7bf7efc 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc @@ -294,3 +294,43 @@ TEST(SafeBrowsingUtilTest, FullHashCompare) { url = GURL("http://www.evil.com/okay_path.html"); EXPECT_EQ(safe_browsing_util::CompareFullHashes(url, full_hashes), -1); } + +TEST(SafeBrowsingUtilTest, ListIdListNameConversion) { + std::string list_name; + EXPECT_FALSE(safe_browsing_util::GetListName(safe_browsing_util::INVALID, + &list_name)); + EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::MALWARE, + &list_name)); + EXPECT_EQ(list_name, std::string(safe_browsing_util::kMalwareList)); + EXPECT_EQ(safe_browsing_util::MALWARE, + safe_browsing_util::GetListId(list_name)); + + EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::PHISH, + &list_name)); + EXPECT_EQ(list_name, std::string(safe_browsing_util::kPhishingList)); + EXPECT_EQ(safe_browsing_util::PHISH, + safe_browsing_util::GetListId(list_name)); + + EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINURL, + &list_name)); + EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinUrlList)); + EXPECT_EQ(safe_browsing_util::BINURL, + safe_browsing_util::GetListId(list_name)); + + + EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINHASH, + &list_name)); + EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinHashList)); + EXPECT_EQ(safe_browsing_util::BINHASH, + safe_browsing_util::GetListId(list_name)); +} + +// Since the ids are saved in file, we need to make sure they don't change. +// Since only the last bit of each id is saved in file together with +// chunkids, this checks only last bit. +TEST(SafeBrowsingUtilTest, ListIdVerification) { + EXPECT_EQ(0, safe_browsing_util::MALWARE % 2); + EXPECT_EQ(1, safe_browsing_util::PHISH % 2); + EXPECT_EQ(0, safe_browsing_util::BINURL %2); + EXPECT_EQ(1, safe_browsing_util::BINHASH % 2); +} |