diff options
Diffstat (limited to 'chrome/browser/safe_browsing/safe_browsing_database.cc')
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database.cc | 595 |
1 files changed, 440 insertions, 155 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index 00b16f7..16c4b38 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -1,19 +1,23 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/safe_browsing/safe_browsing_database.h" +#include <algorithm> +#include <iterator> + #include "base/file_util.h" #include "base/metrics/histogram.h" #include "base/metrics/stats_counters.h" #include "base/time.h" #include "base/message_loop.h" #include "base/process_util.h" -#include "base/sha2.h" +#include "crypto/sha2.h" #include "chrome/browser/safe_browsing/bloom_filter.h" #include "chrome/browser/safe_browsing/prefix_set.h" #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" +#include "content/browser/browser_thread.h" #include "googleurl/src/gurl.h" namespace { @@ -22,6 +26,9 @@ namespace { const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); // Filename suffix for download store. const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download"); +// Filename suffix for client-side phishing detection whitelist store. +const FilePath::CharType kCsdWhitelistDBFile[] = + FILE_PATH_LITERAL(" Csd Whitelist"); // Filename suffix for browse store. // TODO(lzheng): change to a better name when we change the file format. const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); @@ -29,6 +36,16 @@ const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); // The maximum staleness for a cached entry. const int kMaxStalenessMinutes = 45; +// Maximum number of entries we allow in the client-side phishing detection +// whitelist. If the whitelist on disk contains more entries then +// ContainsCsdWhitelistedUrl will always return true. +const size_t kMaxCsdWhitelistSize = 5000; + +// If the hash of this exact expression is on the csd whitelist then +// ContainsCsdWhitelistedUrl will always return true. +const char kCsdKillSwitchUrl[] = + "sb-ssl.google.com/safebrowsing/csd/killswitch"; + // To save space, the incoming |chunk_id| and |list_id| are combined // into an |encoded_chunk_id| for storage by shifting the |list_id| // into the low-order bits. These functions decode that information. @@ -46,26 +63,19 @@ int EncodeChunkId(const int chunk, const int list_id) { return chunk << 1 | list_id % 2; } -// Get the prefix for download url. -void GetDownloadUrlPrefix(const GURL& url, SBPrefix* prefix) { - std::string hostname; - std::string path; - std::string query; - safe_browsing_util::CanonicalizeUrl(url, &hostname, &path, &query); - - SBFullHash full_hash; - base::SHA256HashString(hostname + path + query, &full_hash, - sizeof(full_hash)); - *prefix = full_hash.prefix; -} - -// Generate the set of prefixes to check for |url|. +// Generate the set of full hashes to check for |url|. If +// |include_whitelist_hashes| is true we will generate additional path-prefixes +// to match against the csd whitelist. E.g., if the path-prefix /foo is on the +// whitelist it should also match /foo/bar which is not the case for all the +// other lists. // TODO(shess): This function is almost the same as // |CompareFullHashes()| in safe_browsing_util.cc, except that code // does an early exit on match. Since match should be the infrequent // case (phishing or malware found), consider combining this function // with that one. -void BrowsePrefixesToCheck(const GURL& url, std::vector<SBPrefix>* prefixes) { +void BrowseFullHashesToCheck(const GURL& url, + bool include_whitelist_hashes, + std::vector<SBFullHash>* full_hashes) { std::vector<std::string> hosts; if (url.HostIsIPAddress()) { hosts.push_back(url.host()); @@ -78,14 +88,37 @@ void BrowsePrefixesToCheck(const GURL& url, std::vector<SBPrefix>* prefixes) { for (size_t i = 0; i < hosts.size(); ++i) { for (size_t j = 0; j < paths.size(); ++j) { + const std::string& path = paths[j]; SBFullHash full_hash; - base::SHA256HashString(hosts[i] + paths[j], &full_hash, - sizeof(full_hash)); - prefixes->push_back(full_hash.prefix); + crypto::SHA256HashString(hosts[i] + path, &full_hash, + sizeof(full_hash)); + full_hashes->push_back(full_hash); + + // We may have /foo as path-prefix in the whitelist which should + // also match with /foo/bar and /foo?bar. Hence, for every path + // that ends in '/' we also add the path without the slash. + if (include_whitelist_hashes && + path.size() > 1 && + path[path.size() - 1] == '/') { + crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1), + &full_hash, sizeof(full_hash)); + full_hashes->push_back(full_hash); + } } } } +// Get the prefixes matching the download |urls|. +void GetDownloadUrlPrefixes(const std::vector<GURL>& urls, + std::vector<SBPrefix>* prefixes) { + std::vector<SBFullHash> full_hashes; + for (size_t i = 0; i < urls.size(); ++i) + BrowseFullHashesToCheck(urls[i], false, &full_hashes); + + for (size_t i = 0; i < full_hashes.size(); ++i) + prefixes->push_back(full_hashes[i].prefix); +} + // Find the entries in |full_hashes| with prefix in |prefix_hits|, and // add them to |full_hits| if not expired. "Not expired" is when // either |last_update| was recent enough, or the item has been @@ -129,47 +162,55 @@ void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits, } } +// This function generates a chunk range string for |chunks|. It +// outputs one chunk range string per list and writes it to the +// |list_ranges| vector. We expect |list_ranges| to already be of the +// right size. E.g., if |chunks| contains chunks with two different +// list ids then |list_ranges| must contain two elements. void GetChunkRanges(const std::vector<int>& chunks, - std::string* list0, - std::string* list1) { - std::vector<int> chunks0; - std::vector<int> chunks1; - + std::vector<std::string>* list_ranges) { + DCHECK_GT(list_ranges->size(), 0U); + DCHECK_LE(list_ranges->size(), 2U); + std::vector<std::vector<int> > decoded_chunks(list_ranges->size()); for (std::vector<int>::const_iterator iter = chunks.begin(); iter != chunks.end(); ++iter) { int mod_list_id = GetListIdBit(*iter); - if (0 == mod_list_id) { - chunks0.push_back(DecodeChunkId(*iter)); - } else { - DCHECK_EQ(1, mod_list_id); - chunks1.push_back(DecodeChunkId(*iter)); - } + DCHECK_GE(mod_list_id, 0); + DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size()); + decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter)); + } + for (size_t i = 0; i < decoded_chunks.size(); ++i) { + ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i])); } - - ChunksToRangeString(chunks0, list0); - ChunksToRangeString(chunks1, list1); } // Helper function to create chunk range lists for Browse related // lists. -void UpdateChunkRanges(const std::vector<int>& add_chunks, - const std::vector<int>& sub_chunks, - const std::string& list_name0, - const std::string& list_name1, +void UpdateChunkRanges(SafeBrowsingStore* store, + const std::vector<std::string>& listnames, std::vector<SBListChunkRanges>* lists) { - DCHECK_EQ(safe_browsing_util::GetListId(list_name0) % 2, 0); - DCHECK_EQ(safe_browsing_util::GetListId(list_name1) % 2, 1); - DCHECK_NE(safe_browsing_util::GetListId(list_name0), - safe_browsing_util::INVALID); - DCHECK_NE(safe_browsing_util::GetListId(list_name1), - safe_browsing_util::INVALID); - - SBListChunkRanges chunkrange0(list_name0); - SBListChunkRanges chunkrange1(list_name1); - GetChunkRanges(add_chunks, &chunkrange0.adds, &chunkrange1.adds); - GetChunkRanges(sub_chunks, &chunkrange0.subs, &chunkrange1.subs); - lists->push_back(chunkrange0); - lists->push_back(chunkrange1); + DCHECK_GT(listnames.size(), 0U); + DCHECK_LE(listnames.size(), 2U); + std::vector<int> add_chunks; + std::vector<int> sub_chunks; + store->GetAddChunks(&add_chunks); + store->GetSubChunks(&sub_chunks); + + std::vector<std::string> adds(listnames.size()); + std::vector<std::string> subs(listnames.size()); + GetChunkRanges(add_chunks, &adds); + GetChunkRanges(sub_chunks, &subs); + + for (size_t i = 0; i < listnames.size(); ++i) { + const std::string& listname = listnames[i]; + DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2, + static_cast<int>(i % 2)); + DCHECK_NE(safe_browsing_util::GetListId(listname), + safe_browsing_util::INVALID); + lists->push_back(SBListChunkRanges(listname)); + lists->back().adds.swap(adds[i]); + lists->back().subs.swap(subs[i]); + } } // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from @@ -190,6 +231,14 @@ enum PrefixSetEvent { PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT, PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID, PREFIX_SET_GETPREFIXES_BROKEN, + PREFIX_SET_GETPREFIXES_BROKEN_SIZE, + PREFIX_SET_GETPREFIXES_FIRST_BROKEN, + PREFIX_SET_SBPREFIX_WAS_BROKEN, + PREFIX_SET_GETPREFIXES_BROKEN_SORTING, + PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION, + PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA, + PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX, + PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH, // Memory space for histograms is determined by the max. ALWAYS ADD // NEW VALUES BEFORE THIS ONE. @@ -201,20 +250,152 @@ void RecordPrefixSetInfo(PrefixSetEvent event_type) { PREFIX_SET_EVENT_MAX); } +// Generate a |PrefixSet| instance from the contents of +// |add_prefixes|. Additionally performs various checks to make sure +// that the resulting prefix set is valid, so that the +// PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in +// ContainsBrowseUrl() can be trustworthy. +safe_browsing::PrefixSet* PrefixSetFromAddPrefixes( + const std::vector<SBAddPrefix>& add_prefixes) { + // TODO(shess): If |add_prefixes| were sorted by the prefix, it + // could be passed directly to |PrefixSet()|, removing the need for + // |prefixes|. For now, |prefixes| is useful while debugging + // things. + std::vector<SBPrefix> prefixes; + for (size_t i = 0; i < add_prefixes.size(); ++i) { + prefixes.push_back(add_prefixes[i].prefix); + } + + std::sort(prefixes.begin(), prefixes.end()); + prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), + prefixes.end()); + + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(new safe_browsing::PrefixSet(prefixes)); + + std::vector<SBPrefix> restored; + prefix_set->GetPrefixes(&restored); + + // Expect them to be equal. + if (restored.size() == prefixes.size() && + std::equal(prefixes.begin(), prefixes.end(), restored.begin())) + return prefix_set.release(); + + // Log BROKEN for continuity with previous release, and SIZE to + // distinguish which test failed. + NOTREACHED(); + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); + if (restored.size() != prefixes.size()) + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE); + + // Try to distinguish between updates from one broken user and a + // distributed problem. + static bool logged_broken = false; + if (!logged_broken) { + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN); + logged_broken = true; + } + + // This seems so very very unlikely. But if it ever were true, then + // it could explain why GetPrefixes() seemed broken. + if (sizeof(int) != sizeof(int32)) + RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN); + + // Check if memory was corrupted during construction. + if (!prefix_set->CheckChecksum()) + RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH); + + // Check whether |restored| is unsorted, or has duplication. + if (restored.size()) { + size_t unsorted_count = 0; + bool duplicates = false; + SBPrefix prev = restored[0]; + for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) { + if (prev > restored[i]) { + unsorted_count++; + UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference", + prev - restored[i]); + + // When unsorted, how big is the set, and how far are we into + // it. If the set is very small or large, that might inform + // pursuit of a degenerate case. If the percentage is close + // to 0%, 100%, or 50%, then there might be an interesting + // degenerate case to explore. + UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size()); + UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent", + i * 100 / restored.size()); + + if (prefix_set->IsDeltaAt(i)) { + RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA); + + // Histograms require memory on the order of the number of + // buckets, making high-precision logging expensive. For + // now aim for a sense of the range of the problem. + UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta", + prefix_set->DeltaAt(i), 1, 0xFFFF, 50); + } else { + RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX); + } + } + if (prev == restored[i]) + duplicates = true; + } + + // Record findings. + if (unsorted_count) { + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING); + UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count); + } + if (duplicates) + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION); + + // Fix the problems noted. If |restored| was unsorted, then + // |duplicates| may give a false negative. + if (unsorted_count) + std::sort(restored.begin(), restored.end()); + if (unsorted_count || duplicates) + restored.erase(std::unique(restored.begin(), restored.end()), + restored.end()); + } + + // NOTE(shess): The following could be done using a single + // uber-loop, but it's complicated by needing multiple parallel + // iterators. Didn't seem worthwhile for something that will only + // live for a short period and only fires for one in a million + // updates. + + // Find elements in |restored| which are not in |prefixes|. + std::vector<SBPrefix> difference; + std::set_difference(restored.begin(), restored.end(), + prefixes.begin(), prefixes.end(), + std::back_inserter(difference)); + if (difference.size()) + UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size()); + + // Find elements in |prefixes| which are not in |restored|. + difference.clear(); + std::set_difference(prefixes.begin(), prefixes.end(), + restored.begin(), restored.end(), + std::back_inserter(difference)); + if (difference.size()) + UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall", + difference.size()); + + return prefix_set.release(); +} + } // namespace // The default SafeBrowsingDatabaseFactory. class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { public: virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( - bool enable_download_protection) { - if (enable_download_protection) { - // Create database with browse url store and download store. - return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, - new SafeBrowsingStoreFile); - } - // Create database with only browse url store. - return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, NULL); + bool enable_download_protection, + bool enable_client_side_whitelist) { + return new SafeBrowsingDatabaseNew( + new SafeBrowsingStoreFile, + enable_download_protection ? new SafeBrowsingStoreFile : NULL, + enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL); } SafeBrowsingDatabaseFactoryImpl() { } @@ -232,10 +413,12 @@ SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create() // callers just construct things directly. SafeBrowsingDatabase* SafeBrowsingDatabase::Create( - bool enable_download_protection) { + bool enable_download_protection, + bool enable_client_side_whitelist) { if (!factory_) factory_ = new SafeBrowsingDatabaseFactoryImpl(); - return factory_->CreateSafeBrowsingDatabase(enable_download_protection); + return factory_->CreateSafeBrowsingDatabase(enable_download_protection, + enable_client_side_whitelist); } SafeBrowsingDatabase::~SafeBrowsingDatabase() { @@ -259,6 +442,12 @@ FilePath SafeBrowsingDatabase::BloomFilterForFilename( return FilePath(db_filename.value() + kBloomFilterFile); } +// static +FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( + const FilePath& db_filename) { + return FilePath(db_filename.value() + kCsdWhitelistDBFile); +} + SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { DVLOG(3) << "Get store for list: " << list_id; if (list_id == safe_browsing_util::PHISH || @@ -267,6 +456,8 @@ SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { } else if (list_id == safe_browsing_util::BINURL || list_id == safe_browsing_util::BINHASH) { return download_store_.get(); + } else if (list_id == safe_browsing_util::CSDWHITELIST) { + return csd_whitelist_store_.get(); } return NULL; } @@ -281,16 +472,21 @@ SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() : creation_loop_(MessageLoop::current()), browse_store_(new SafeBrowsingStoreFile), download_store_(NULL), + csd_whitelist_store_(NULL), ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) { DCHECK(browse_store_.get()); DCHECK(!download_store_.get()); + DCHECK(!csd_whitelist_store_.get()); } SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( - SafeBrowsingStore* browse_store, SafeBrowsingStore* download_store) + SafeBrowsingStore* browse_store, + SafeBrowsingStore* download_store, + SafeBrowsingStore* csd_whitelist_store) : creation_loop_(MessageLoop::current()), browse_store_(browse_store), download_store_(download_store), + csd_whitelist_store_(csd_whitelist_store), ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)), corruption_detected_(false) { DCHECK(browse_store_.get()); @@ -302,28 +498,30 @@ SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { DCHECK_EQ(creation_loop_, MessageLoop::current()); - - // NOTE: There is no need to grab the lock in this function, since - // until it returns, there are no pointers to this class on other - // threads. Then again, that means there is no possibility of - // contention on the lock... - base::AutoLock locked(lookup_lock_); - - DCHECK(browse_filename_.empty()); // Ensure we haven't been run before. - DCHECK(download_filename_.empty()); // Ensure we haven't been run before. + // Ensure we haven't been run before. + DCHECK(browse_filename_.empty()); + DCHECK(download_filename_.empty()); + DCHECK(csd_whitelist_filename_.empty()); browse_filename_ = BrowseDBFilename(filename_base); + bloom_filter_filename_ = BloomFilterForFilename(browse_filename_); + browse_store_->Init( browse_filename_, NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); - - full_browse_hashes_.clear(); - pending_browse_hashes_.clear(); - - bloom_filter_filename_ = BloomFilterForFilename(browse_filename_); - LoadBloomFilter(); DVLOG(1) << "Init browse store: " << browse_filename_.value(); + { + // NOTE: There is no need to grab the lock in this function, since + // until it returns, there are no pointers to this class on other + // threads. Then again, that means there is no possibility of + // contention on the lock... + base::AutoLock locked(lookup_lock_); + full_browse_hashes_.clear(); + pending_browse_hashes_.clear(); + LoadBloomFilter(); + } + if (download_store_.get()) { download_filename_ = DownloadDBFilename(filename_base); download_store_->Init( @@ -331,6 +529,22 @@ void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); DVLOG(1) << "Init download store: " << download_filename_.value(); } + + if (csd_whitelist_store_.get()) { + csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base); + csd_whitelist_store_->Init( + csd_whitelist_filename_, + NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); + DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value(); + std::vector<SBAddFullHash> full_hashes; + if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) { + LoadCsdWhitelist(full_hashes); + } else { + CsdWhitelistAllUrls(); + } + } else { + CsdWhitelistAllUrls(); // Just to be safe. + } } bool SafeBrowsingDatabaseNew::ResetDatabase() { @@ -355,6 +569,8 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { // of a bloom filter always implies presence of a prefix set. prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>())); } + // Wants to acquire the lock itself. + CsdWhitelistAllUrls(); return true; } @@ -371,9 +587,9 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( prefix_hits->clear(); full_hits->clear(); - std::vector<SBPrefix> prefixes; - BrowsePrefixesToCheck(url, &prefixes); - if (prefixes.empty()) + std::vector<SBFullHash> full_hashes; + BrowseFullHashesToCheck(url, false, &full_hashes); + if (full_hashes.empty()) return false; // This function is called on the I/O thread, prevent changes to @@ -388,15 +604,15 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( std::vector<SBPrefix> restored; size_t miss_count = 0; - for (size_t i = 0; i < prefixes.size(); ++i) { - bool found = prefix_set_->Exists(prefixes[i]); + for (size_t i = 0; i < full_hashes.size(); ++i) { + bool found = prefix_set_->Exists(full_hashes[i].prefix); - if (browse_bloom_filter_->Exists(prefixes[i])) { + if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) { RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT); if (found) RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT); - prefix_hits->push_back(prefixes[i]); - if (prefix_miss_cache_.count(prefixes[i]) > 0) + prefix_hits->push_back(full_hashes[i].prefix); + if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0) ++miss_count; } else { // Bloom filter misses should never be in prefix set. Re-create @@ -413,7 +629,8 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( // If the item is not in the re-created list, then there is an // error in |PrefixSet::Exists()|. If the item is in the // re-created list, then the bloom filter was wrong. - if (std::binary_search(restored.begin(), restored.end(), prefixes[i])) { + if (std::binary_search(restored.begin(), restored.end(), + full_hashes[i].prefix)) { RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT); } else { RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID); @@ -439,32 +656,39 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( } bool SafeBrowsingDatabaseNew::MatchDownloadAddPrefixes( - int list_bit, const SBPrefix& prefix, SBPrefix* prefix_hit) { + int list_bit, + const std::vector<SBPrefix>& prefixes, + std::vector<SBPrefix>* prefix_hits) { + prefix_hits->clear(); + std::vector<SBAddPrefix> add_prefixes; download_store_->GetAddPrefixes(&add_prefixes); for (size_t i = 0; i < add_prefixes.size(); ++i) { - if (prefix == add_prefixes[i].prefix && - GetListIdBit(add_prefixes[i].chunk_id) == list_bit) { - *prefix_hit = prefix; - return true; + for (size_t j = 0; j < prefixes.size(); ++j) { + const SBPrefix& prefix = prefixes[j]; + if (prefix == add_prefixes[i].prefix && + GetListIdBit(add_prefixes[i].chunk_id) == list_bit) { + prefix_hits->push_back(prefix); + } } } - return false; + return !prefix_hits->empty(); } -bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(const GURL& url, - SBPrefix* prefix_hit) { +bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( + const std::vector<GURL>& urls, + std::vector<SBPrefix>* prefix_hits) { DCHECK_EQ(creation_loop_, MessageLoop::current()); // Ignore this check when download checking is not enabled. if (!download_store_.get()) return false; - SBPrefix prefix; - GetDownloadUrlPrefix(url, &prefix); + std::vector<SBPrefix> prefixes; + GetDownloadUrlPrefixes(urls, &prefixes); return MatchDownloadAddPrefixes(safe_browsing_util::BINURL % 2, - prefix, - prefix_hit); + prefixes, + prefix_hits); } bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix( @@ -475,10 +699,29 @@ bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix( if (!download_store_.get()) return false; - SBPrefix prefix_hit; + std::vector<SBPrefix> prefixes(1, prefix); + std::vector<SBPrefix> prefix_hits; return MatchDownloadAddPrefixes(safe_browsing_util::BINHASH % 2, - prefix, - &prefix_hit); + prefixes, + &prefix_hits); +} + +bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) { + // This method is theoretically thread-safe but we expect all calls to + // originate from the IO thread. + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + base::AutoLock l(lookup_lock_); + if (csd_whitelist_all_urls_) + return true; + + std::vector<SBFullHash> full_hashes; + BrowseFullHashesToCheck(url, true, &full_hashes); + for (std::vector<SBFullHash>::const_iterator it = full_hashes.begin(); + it != full_hashes.end(); ++it) { + if (std::binary_search(csd_whitelist_.begin(), csd_whitelist_.end(), *it)) + return true; + } + return false; } // Helper to insert entries for all of the prefixes or full hashes in @@ -732,24 +975,29 @@ bool SafeBrowsingDatabaseNew::UpdateStarted( return false; } - std::vector<int> browse_add_chunks; - browse_store_->GetAddChunks(&browse_add_chunks); - std::vector<int> browse_sub_chunks; - browse_store_->GetSubChunks(&browse_sub_chunks); - UpdateChunkRanges(browse_add_chunks, browse_sub_chunks, - safe_browsing_util::kMalwareList, - safe_browsing_util::kPhishingList, - lists); + if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) { + RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN); + HandleCorruptDatabase(); + return false; + } + + std::vector<std::string> browse_listnames; + browse_listnames.push_back(safe_browsing_util::kMalwareList); + browse_listnames.push_back(safe_browsing_util::kPhishingList); + UpdateChunkRanges(browse_store_.get(), browse_listnames, lists); if (download_store_.get()) { - std::vector<int> download_add_chunks; - download_store_->GetAddChunks(&download_add_chunks); - std::vector<int> download_sub_chunks; - download_store_->GetSubChunks(&download_sub_chunks); - UpdateChunkRanges(download_add_chunks, download_sub_chunks, - safe_browsing_util::kBinUrlList, - safe_browsing_util::kBinHashList, - lists); + std::vector<std::string> download_listnames; + download_listnames.push_back(safe_browsing_util::kBinUrlList); + download_listnames.push_back(safe_browsing_util::kBinHashList); + UpdateChunkRanges(download_store_.get(), download_listnames, lists); + } + + if (csd_whitelist_store_.get()) { + std::vector<std::string> csd_whitelist_listnames; + csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList); + UpdateChunkRanges(csd_whitelist_store_.get(), + csd_whitelist_listnames, lists); } corruption_detected_ = false; @@ -772,6 +1020,8 @@ void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { browse_store_->CancelUpdate(); if (download_store_.get()) download_store_->CancelUpdate(); + if (csd_whitelist_store_.get()) + csd_whitelist_store_->CancelUpdate(); return; } @@ -779,9 +1029,37 @@ void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { UpdateDownloadStore(); // for browsing UpdateBrowseStore(); + // for csd whitelist + UpdateCsdWhitelistStore(); +} + +void SafeBrowsingDatabaseNew::UpdateCsdWhitelistStore() { + if (!csd_whitelist_store_.get()) + return; + + // For the csd whitelist, we don't cache and save full hashes since all + // hashes are already full. + std::vector<SBAddFullHash> empty_add_hashes; + + // Not needed for the csd whitelist. + std::set<SBPrefix> empty_miss_cache; + + // Note: prefixes will not be empty. The current data store implementation + // stores all full-length hashes as both full and prefix hashes. + std::vector<SBAddPrefix> prefixes; + std::vector<SBAddFullHash> full_hashes; + if (!csd_whitelist_store_->FinishUpdate(empty_add_hashes, + empty_miss_cache, + &prefixes, + &full_hashes)) { + RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH); + CsdWhitelistAllUrls(); + return; + } + LoadCsdWhitelist(full_hashes); } -void SafeBrowsingDatabaseNew:: UpdateDownloadStore() { +void SafeBrowsingDatabaseNew::UpdateDownloadStore() { if (!download_store_.get()) return; @@ -854,24 +1132,8 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { filter->Insert(add_prefixes[i].prefix); } - std::vector<SBPrefix> prefixes; - for (size_t i = 0; i < add_prefixes.size(); ++i) { - prefixes.push_back(add_prefixes[i].prefix); - } - std::sort(prefixes.begin(), prefixes.end()); scoped_ptr<safe_browsing::PrefixSet> - prefix_set(new safe_browsing::PrefixSet(prefixes)); - - // Verify that |GetPrefixes()| returns the same set of prefixes as - // was passed to the constructor. - std::vector<SBPrefix> restored; - prefix_set->GetPrefixes(&restored); - prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end()); - if (restored.size() != prefixes.size() || - !std::equal(prefixes.begin(), prefixes.end(), restored.begin())) { - NOTREACHED(); - RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); - } + prefix_set(PrefixSetFromAddPrefixes(add_prefixes)); // This needs to be in sorted order by prefix for efficient access. std::sort(add_full_hashes.begin(), add_full_hashes.end(), @@ -979,24 +1241,7 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() { // TODO(shess): Write/read for prefix set. std::vector<SBAddPrefix> add_prefixes; browse_store_->GetAddPrefixes(&add_prefixes); - std::vector<SBPrefix> prefixes; - for (size_t i = 0; i < add_prefixes.size(); ++i) { - prefixes.push_back(add_prefixes[i].prefix); - } - std::sort(prefixes.begin(), prefixes.end()); - prefix_set_.reset(new safe_browsing::PrefixSet(prefixes)); - - // Double-check the prefixes so that the - // PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in - // ContainsBrowseUrl() can be trustworthy. - std::vector<SBPrefix> restored; - prefix_set_->GetPrefixes(&restored); - std::set<SBPrefix> unique(prefixes.begin(), prefixes.end()); - if (restored.size() != unique.size() || - !std::equal(unique.begin(), unique.end(), restored.begin())) { - NOTREACHED(); - RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); - } + prefix_set_.reset(PrefixSetFromAddPrefixes(add_prefixes)); } bool SafeBrowsingDatabaseNew::Delete() { @@ -1010,10 +1255,15 @@ bool SafeBrowsingDatabaseNew::Delete() { if (!r2) RecordFailure(FAILURE_DATABASE_STORE_DELETE); - const bool r3 = file_util::Delete(bloom_filter_filename_, false); + const bool r3 = csd_whitelist_store_.get() ? + csd_whitelist_store_->Delete() : true; if (!r3) + RecordFailure(FAILURE_DATABASE_STORE_DELETE); + + const bool r4 = file_util::Delete(bloom_filter_filename_, false); + if (!r4) RecordFailure(FAILURE_DATABASE_FILTER_DELETE); - return r1 && r2 && r3; + return r1 && r2 && r3 && r4; } void SafeBrowsingDatabaseNew::WriteBloomFilter() { @@ -1030,3 +1280,38 @@ void SafeBrowsingDatabaseNew::WriteBloomFilter() { if (!write_ok) RecordFailure(FAILURE_DATABASE_FILTER_WRITE); } + +void SafeBrowsingDatabaseNew::CsdWhitelistAllUrls() { + base::AutoLock locked(lookup_lock_); + csd_whitelist_all_urls_ = true; + csd_whitelist_.clear(); +} + +void SafeBrowsingDatabaseNew::LoadCsdWhitelist( + const std::vector<SBAddFullHash>& full_hashes) { + DCHECK_EQ(creation_loop_, MessageLoop::current()); + if (full_hashes.size() > kMaxCsdWhitelistSize) { + CsdWhitelistAllUrls(); + return; + } + + std::vector<SBFullHash> new_csd_whitelist; + for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); + it != full_hashes.end(); ++it) { + new_csd_whitelist.push_back(it->full_hash); + } + std::sort(new_csd_whitelist.begin(), new_csd_whitelist.end()); + + SBFullHash kill_switch; + crypto::SHA256HashString(kCsdKillSwitchUrl, &kill_switch, + sizeof(kill_switch)); + if (std::binary_search(new_csd_whitelist.begin(), new_csd_whitelist.end(), + kill_switch)) { + // The kill switch is whitelisted hence we whitelist all URLs. + CsdWhitelistAllUrls(); + } else { + base::AutoLock locked(lookup_lock_); + csd_whitelist_all_urls_ = false; + csd_whitelist_.swap(new_csd_whitelist); + } +} |