diff options
Diffstat (limited to 'chrome/browser/safe_browsing/safe_browsing_database.cc')
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database.cc | 325 |
1 files changed, 237 insertions, 88 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index 00b16f7..5b9d9a9 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -4,6 +4,8 @@ #include "chrome/browser/safe_browsing/safe_browsing_database.h" +#include <algorithm> + #include "base/file_util.h" #include "base/metrics/histogram.h" #include "base/metrics/stats_counters.h" @@ -22,6 +24,9 @@ namespace { const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); // Filename suffix for download store. const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download"); +// Filename suffix for client-side phishing detection whitelist store. +const FilePath::CharType kCsdWhitelistDBFile[] = + FILE_PATH_LITERAL(" Csd Whitelist"); // Filename suffix for browse store. // TODO(lzheng): change to a better name when we change the file format. const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); @@ -29,6 +34,16 @@ const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); // The maximum staleness for a cached entry. const int kMaxStalenessMinutes = 45; +// Maximum number of entries we allow in the client-side phishing detection +// whitelist. If the whitelist on disk contains more entries then +// ContainsCsdWhitelistedUrl will always return true. +const size_t kMaxCsdWhitelistSize = 5000; + +// If the hash of this exact expression is on the csd whitelist then +// ContainsCsdWhitelistedUrl will always return true. +const char kCsdKillSwitchUrl[] = + "sb-ssl.google.com/safebrowsing/csd/killswitch"; + // To save space, the incoming |chunk_id| and |list_id| are combined // into an |encoded_chunk_id| for storage by shifting the |list_id| // into the low-order bits. These functions decode that information. @@ -59,13 +74,14 @@ void GetDownloadUrlPrefix(const GURL& url, SBPrefix* prefix) { *prefix = full_hash.prefix; } -// Generate the set of prefixes to check for |url|. +// Generate the set of full hashes to check for |url|. // TODO(shess): This function is almost the same as // |CompareFullHashes()| in safe_browsing_util.cc, except that code // does an early exit on match. Since match should be the infrequent // case (phishing or malware found), consider combining this function // with that one. -void BrowsePrefixesToCheck(const GURL& url, std::vector<SBPrefix>* prefixes) { +void BrowseFullHashesToCheck(const GURL& url, + std::vector<SBFullHash>* full_hashes) { std::vector<std::string> hosts; if (url.HostIsIPAddress()) { hosts.push_back(url.host()); @@ -81,7 +97,7 @@ void BrowsePrefixesToCheck(const GURL& url, std::vector<SBPrefix>* prefixes) { SBFullHash full_hash; base::SHA256HashString(hosts[i] + paths[j], &full_hash, sizeof(full_hash)); - prefixes->push_back(full_hash.prefix); + full_hashes->push_back(full_hash); } } } @@ -129,47 +145,55 @@ void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits, } } +// This function generates a chunk range string for |chunks|. It +// outputs one chunk range string per list and writes it to the +// |list_ranges| vector. We expect |list_ranges| to already be of the +// right size. E.g., if |chunks| contains chunks with two different +// list ids then |list_ranges| must contain two elements. void GetChunkRanges(const std::vector<int>& chunks, - std::string* list0, - std::string* list1) { - std::vector<int> chunks0; - std::vector<int> chunks1; - + std::vector<std::string>* list_ranges) { + DCHECK_GT(list_ranges->size(), 0U); + DCHECK_LE(list_ranges->size(), 2U); + std::vector<std::vector<int> > decoded_chunks(list_ranges->size()); for (std::vector<int>::const_iterator iter = chunks.begin(); iter != chunks.end(); ++iter) { int mod_list_id = GetListIdBit(*iter); - if (0 == mod_list_id) { - chunks0.push_back(DecodeChunkId(*iter)); - } else { - DCHECK_EQ(1, mod_list_id); - chunks1.push_back(DecodeChunkId(*iter)); - } + DCHECK_GE(mod_list_id, 0); + DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size()); + decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter)); + } + for (size_t i = 0; i < decoded_chunks.size(); ++i) { + ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i])); } - - ChunksToRangeString(chunks0, list0); - ChunksToRangeString(chunks1, list1); } // Helper function to create chunk range lists for Browse related // lists. -void UpdateChunkRanges(const std::vector<int>& add_chunks, - const std::vector<int>& sub_chunks, - const std::string& list_name0, - const std::string& list_name1, +void UpdateChunkRanges(SafeBrowsingStore* store, + const std::vector<std::string>& listnames, std::vector<SBListChunkRanges>* lists) { - DCHECK_EQ(safe_browsing_util::GetListId(list_name0) % 2, 0); - DCHECK_EQ(safe_browsing_util::GetListId(list_name1) % 2, 1); - DCHECK_NE(safe_browsing_util::GetListId(list_name0), - safe_browsing_util::INVALID); - DCHECK_NE(safe_browsing_util::GetListId(list_name1), - safe_browsing_util::INVALID); - - SBListChunkRanges chunkrange0(list_name0); - SBListChunkRanges chunkrange1(list_name1); - GetChunkRanges(add_chunks, &chunkrange0.adds, &chunkrange1.adds); - GetChunkRanges(sub_chunks, &chunkrange0.subs, &chunkrange1.subs); - lists->push_back(chunkrange0); - lists->push_back(chunkrange1); + DCHECK_GT(listnames.size(), 0U); + DCHECK_LE(listnames.size(), 2U); + std::vector<int> add_chunks; + std::vector<int> sub_chunks; + store->GetAddChunks(&add_chunks); + store->GetSubChunks(&sub_chunks); + + std::vector<std::string> adds(listnames.size()); + std::vector<std::string> subs(listnames.size()); + GetChunkRanges(add_chunks, &adds); + GetChunkRanges(sub_chunks, &subs); + + for (size_t i = 0; i < listnames.size(); ++i) { + const std::string& listname = listnames[i]; + DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2, + static_cast<int>(i % 2)); + DCHECK_NE(safe_browsing_util::GetListId(listname), + safe_browsing_util::INVALID); + lists->push_back(SBListChunkRanges(listname)); + lists->back().adds.swap(adds[i]); + lists->back().subs.swap(subs[i]); + } } // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from @@ -207,14 +231,12 @@ void RecordPrefixSetInfo(PrefixSetEvent event_type) { class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { public: virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( - bool enable_download_protection) { - if (enable_download_protection) { - // Create database with browse url store and download store. - return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, - new SafeBrowsingStoreFile); - } - // Create database with only browse url store. - return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, NULL); + bool enable_download_protection, + bool enable_client_side_whitelist) { + return new SafeBrowsingDatabaseNew( + new SafeBrowsingStoreFile, + enable_download_protection ? new SafeBrowsingStoreFile : NULL, + enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL); } SafeBrowsingDatabaseFactoryImpl() { } @@ -232,10 +254,12 @@ SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create() // callers just construct things directly. SafeBrowsingDatabase* SafeBrowsingDatabase::Create( - bool enable_download_protection) { + bool enable_download_protection, + bool enable_client_side_whitelist) { if (!factory_) factory_ = new SafeBrowsingDatabaseFactoryImpl(); - return factory_->CreateSafeBrowsingDatabase(enable_download_protection); + return factory_->CreateSafeBrowsingDatabase(enable_download_protection, + enable_client_side_whitelist); } SafeBrowsingDatabase::~SafeBrowsingDatabase() { @@ -259,6 +283,12 @@ FilePath SafeBrowsingDatabase::BloomFilterForFilename( return FilePath(db_filename.value() + kBloomFilterFile); } +// static +FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( + const FilePath& db_filename) { + return FilePath(db_filename.value() + kCsdWhitelistDBFile); +} + SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { DVLOG(3) << "Get store for list: " << list_id; if (list_id == safe_browsing_util::PHISH || @@ -267,6 +297,8 @@ SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { } else if (list_id == safe_browsing_util::BINURL || list_id == safe_browsing_util::BINHASH) { return download_store_.get(); + } else if (list_id == safe_browsing_util::CSDWHITELIST) { + return csd_whitelist_store_.get(); } return NULL; } @@ -281,16 +313,21 @@ SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() : creation_loop_(MessageLoop::current()), browse_store_(new SafeBrowsingStoreFile), download_store_(NULL), + csd_whitelist_store_(NULL), ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) { DCHECK(browse_store_.get()); DCHECK(!download_store_.get()); + DCHECK(!csd_whitelist_store_.get()); } SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( - SafeBrowsingStore* browse_store, SafeBrowsingStore* download_store) + SafeBrowsingStore* browse_store, + SafeBrowsingStore* download_store, + SafeBrowsingStore* csd_whitelist_store) : creation_loop_(MessageLoop::current()), browse_store_(browse_store), download_store_(download_store), + csd_whitelist_store_(csd_whitelist_store), ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)), corruption_detected_(false) { DCHECK(browse_store_.get()); @@ -302,28 +339,30 @@ SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { DCHECK_EQ(creation_loop_, MessageLoop::current()); - - // NOTE: There is no need to grab the lock in this function, since - // until it returns, there are no pointers to this class on other - // threads. Then again, that means there is no possibility of - // contention on the lock... - base::AutoLock locked(lookup_lock_); - - DCHECK(browse_filename_.empty()); // Ensure we haven't been run before. - DCHECK(download_filename_.empty()); // Ensure we haven't been run before. + // Ensure we haven't been run before. + DCHECK(browse_filename_.empty()); + DCHECK(download_filename_.empty()); + DCHECK(csd_whitelist_filename_.empty()); browse_filename_ = BrowseDBFilename(filename_base); + bloom_filter_filename_ = BloomFilterForFilename(browse_filename_); + browse_store_->Init( browse_filename_, NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); - - full_browse_hashes_.clear(); - pending_browse_hashes_.clear(); - - bloom_filter_filename_ = BloomFilterForFilename(browse_filename_); - LoadBloomFilter(); DVLOG(1) << "Init browse store: " << browse_filename_.value(); + { + // NOTE: There is no need to grab the lock in this function, since + // until it returns, there are no pointers to this class on other + // threads. Then again, that means there is no possibility of + // contention on the lock... + base::AutoLock locked(lookup_lock_); + full_browse_hashes_.clear(); + pending_browse_hashes_.clear(); + LoadBloomFilter(); + } + if (download_store_.get()) { download_filename_ = DownloadDBFilename(filename_base); download_store_->Init( @@ -331,6 +370,22 @@ void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); DVLOG(1) << "Init download store: " << download_filename_.value(); } + + if (csd_whitelist_store_.get()) { + csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base); + csd_whitelist_store_->Init( + csd_whitelist_filename_, + NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); + DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value(); + std::vector<SBAddFullHash> full_hashes; + if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) { + LoadCsdWhitelist(full_hashes); + } else { + CsdWhitelistAllUrls(); + } + } else { + CsdWhitelistAllUrls(); // Just to be safe. + } } bool SafeBrowsingDatabaseNew::ResetDatabase() { @@ -354,6 +409,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { // TODO(shess): It is simpler for the code to assume that presence // of a bloom filter always implies presence of a prefix set. prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>())); + CsdWhitelistAllUrls(); } return true; @@ -371,9 +427,9 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( prefix_hits->clear(); full_hits->clear(); - std::vector<SBPrefix> prefixes; - BrowsePrefixesToCheck(url, &prefixes); - if (prefixes.empty()) + std::vector<SBFullHash> full_hashes; + BrowseFullHashesToCheck(url, &full_hashes); + if (full_hashes.empty()) return false; // This function is called on the I/O thread, prevent changes to @@ -388,15 +444,15 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( std::vector<SBPrefix> restored; size_t miss_count = 0; - for (size_t i = 0; i < prefixes.size(); ++i) { - bool found = prefix_set_->Exists(prefixes[i]); + for (size_t i = 0; i < full_hashes.size(); ++i) { + bool found = prefix_set_->Exists(full_hashes[i].prefix); - if (browse_bloom_filter_->Exists(prefixes[i])) { + if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) { RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT); if (found) RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT); - prefix_hits->push_back(prefixes[i]); - if (prefix_miss_cache_.count(prefixes[i]) > 0) + prefix_hits->push_back(full_hashes[i].prefix); + if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0) ++miss_count; } else { // Bloom filter misses should never be in prefix set. Re-create @@ -413,7 +469,8 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( // If the item is not in the re-created list, then there is an // error in |PrefixSet::Exists()|. If the item is in the // re-created list, then the bloom filter was wrong. - if (std::binary_search(restored.begin(), restored.end(), prefixes[i])) { + if (std::binary_search(restored.begin(), restored.end(), + full_hashes[i].prefix)) { RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT); } else { RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID); @@ -481,6 +538,24 @@ bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix( &prefix_hit); } +bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) { + // This method is theoretically thread-safe but we expect all calls to + // originate from the creation thread. + DCHECK_EQ(creation_loop_, MessageLoop::current()); + base::AutoLock l(lookup_lock_); + if (csd_whitelist_all_urls_) + return true; + + std::vector<SBFullHash> full_hashes; + BrowseFullHashesToCheck(url, &full_hashes); + for (std::vector<SBFullHash>::const_iterator it = full_hashes.begin(); + it != full_hashes.end(); ++it) { + if (std::binary_search(csd_whitelist_.begin(), csd_whitelist_.end(), *it)) + return true; + } + return false; +} + // Helper to insert entries for all of the prefixes or full hashes in // |entry| into the store. void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, @@ -732,24 +807,29 @@ bool SafeBrowsingDatabaseNew::UpdateStarted( return false; } - std::vector<int> browse_add_chunks; - browse_store_->GetAddChunks(&browse_add_chunks); - std::vector<int> browse_sub_chunks; - browse_store_->GetSubChunks(&browse_sub_chunks); - UpdateChunkRanges(browse_add_chunks, browse_sub_chunks, - safe_browsing_util::kMalwareList, - safe_browsing_util::kPhishingList, - lists); + if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) { + RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN); + HandleCorruptDatabase(); + return false; + } + + std::vector<std::string> browse_listnames; + browse_listnames.push_back(safe_browsing_util::kMalwareList); + browse_listnames.push_back(safe_browsing_util::kPhishingList); + UpdateChunkRanges(browse_store_.get(), browse_listnames, lists); if (download_store_.get()) { - std::vector<int> download_add_chunks; - download_store_->GetAddChunks(&download_add_chunks); - std::vector<int> download_sub_chunks; - download_store_->GetSubChunks(&download_sub_chunks); - UpdateChunkRanges(download_add_chunks, download_sub_chunks, - safe_browsing_util::kBinUrlList, - safe_browsing_util::kBinHashList, - lists); + std::vector<std::string> download_listnames; + download_listnames.push_back(safe_browsing_util::kBinUrlList); + download_listnames.push_back(safe_browsing_util::kBinHashList); + UpdateChunkRanges(download_store_.get(), download_listnames, lists); + } + + if (csd_whitelist_store_.get()) { + std::vector<std::string> csd_whitelist_listnames; + csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList); + UpdateChunkRanges(csd_whitelist_store_.get(), + csd_whitelist_listnames, lists); } corruption_detected_ = false; @@ -772,6 +852,8 @@ void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { browse_store_->CancelUpdate(); if (download_store_.get()) download_store_->CancelUpdate(); + if (csd_whitelist_store_.get()) + csd_whitelist_store_->CancelUpdate(); return; } @@ -779,9 +861,37 @@ void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { UpdateDownloadStore(); // for browsing UpdateBrowseStore(); + // for csd whitelist + UpdateCsdWhitelistStore(); +} + +void SafeBrowsingDatabaseNew::UpdateCsdWhitelistStore() { + if (!csd_whitelist_store_.get()) + return; + + // For the csd whitelist, we don't cache and save full hashes since all + // hashes are already full. + std::vector<SBAddFullHash> empty_add_hashes; + + // Not needed for the csd whitelist. + std::set<SBPrefix> empty_miss_cache; + + // Note: prefixes will not be empty. The current data store implementation + // stores all full-length hashes as both full and prefix hashes. + std::vector<SBAddPrefix> prefixes; + std::vector<SBAddFullHash> full_hashes; + if (!csd_whitelist_store_->FinishUpdate(empty_add_hashes, + empty_miss_cache, + &prefixes, + &full_hashes)) { + RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH); + CsdWhitelistAllUrls(); + return; + } + LoadCsdWhitelist(full_hashes); } -void SafeBrowsingDatabaseNew:: UpdateDownloadStore() { +void SafeBrowsingDatabaseNew::UpdateDownloadStore() { if (!download_store_.get()) return; @@ -1010,10 +1120,15 @@ bool SafeBrowsingDatabaseNew::Delete() { if (!r2) RecordFailure(FAILURE_DATABASE_STORE_DELETE); - const bool r3 = file_util::Delete(bloom_filter_filename_, false); + const bool r3 = csd_whitelist_store_.get() ? + csd_whitelist_store_->Delete() : true; if (!r3) + RecordFailure(FAILURE_DATABASE_STORE_DELETE); + + const bool r4 = file_util::Delete(bloom_filter_filename_, false); + if (!r4) RecordFailure(FAILURE_DATABASE_FILTER_DELETE); - return r1 && r2 && r3; + return r1 && r2 && r3 && r4; } void SafeBrowsingDatabaseNew::WriteBloomFilter() { @@ -1030,3 +1145,37 @@ void SafeBrowsingDatabaseNew::WriteBloomFilter() { if (!write_ok) RecordFailure(FAILURE_DATABASE_FILTER_WRITE); } + +void SafeBrowsingDatabaseNew::CsdWhitelistAllUrls() { + base::AutoLock locked(lookup_lock_); + csd_whitelist_all_urls_ = true; + csd_whitelist_.clear(); +} + +void SafeBrowsingDatabaseNew::LoadCsdWhitelist( + const std::vector<SBAddFullHash>& full_hashes) { + DCHECK_EQ(creation_loop_, MessageLoop::current()); + if (full_hashes.size() > kMaxCsdWhitelistSize) { + CsdWhitelistAllUrls(); + return; + } + + std::vector<SBFullHash> new_csd_whitelist; + for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); + it != full_hashes.end(); ++it) { + new_csd_whitelist.push_back(it->full_hash); + } + std::sort(new_csd_whitelist.begin(), new_csd_whitelist.end()); + + SBFullHash kill_switch; + base::SHA256HashString(kCsdKillSwitchUrl, &kill_switch, sizeof(kill_switch)); + if (std::binary_search(new_csd_whitelist.begin(), new_csd_whitelist.end(), + kill_switch)) { + // The kill switch is whitelisted hence we whitelist all URLs. + CsdWhitelistAllUrls(); + } else { + base::AutoLock locked(lookup_lock_); + csd_whitelist_all_urls_ = false; + csd_whitelist_.swap(new_csd_whitelist); + } +} |