// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/safe_browsing/safe_browsing_database.h" #include #include #include "base/bind.h" #include "base/files/file_util.h" #include "base/message_loop/message_loop.h" #include "base/metrics/histogram.h" #include "base/metrics/stats_counters.h" #include "base/process/process_handle.h" #include "base/process/process_metrics.h" #include "base/sha1.h" #include "base/strings/string_number_conversions.h" #include "base/strings/stringprintf.h" #include "base/time/time.h" #include "chrome/browser/safe_browsing/prefix_set.h" #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" #include "content/public/browser/browser_thread.h" #include "crypto/sha2.h" #include "net/base/net_util.h" #include "url/gurl.h" #if defined(OS_MACOSX) #include "base/mac/mac_util.h" #endif using content::BrowserThread; namespace { // Filename suffix for the bloom filter. const base::FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); // Filename suffix for the prefix set. const base::FilePath::CharType kPrefixSetFile[] = FILE_PATH_LITERAL(" Prefix Set"); // Filename suffix for download store. const base::FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download"); // Filename suffix for client-side phishing detection whitelist store. const base::FilePath::CharType kCsdWhitelistDBFile[] = FILE_PATH_LITERAL(" Csd Whitelist"); // Filename suffix for the download whitelist store. const base::FilePath::CharType kDownloadWhitelistDBFile[] = FILE_PATH_LITERAL(" Download Whitelist"); // Filename suffix for the extension blacklist store. const base::FilePath::CharType kExtensionBlacklistDBFile[] = FILE_PATH_LITERAL(" Extension Blacklist"); // Filename suffix for the side-effect free whitelist store. const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] = FILE_PATH_LITERAL(" Side-Effect Free Whitelist"); // Filename suffix for the csd malware IP blacklist store. const base::FilePath::CharType kIPBlacklistDBFile[] = FILE_PATH_LITERAL(" IP Blacklist"); // Filename suffix for the unwanted software blacklist store. const base::FilePath::CharType kUnwantedSoftwareDBFile[] = FILE_PATH_LITERAL(" UwS List"); // Filename suffix for browse store. // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. // Unfortunately, to change the name implies lots of transition code // for little benefit. If/when file formats change (say to put all // the data in one file), that would be a convenient point to rectify // this. // TODO(shess): This shouldn't be OS-driven #if defined(OS_ANDROID) // NOTE(shess): This difference is also reflected in the list name in // safe_browsing_util.cc. // TODO(shess): Spin up an alternate list id which can be persisted in the // store. Then if a mistake is made, it won't cause confusion between // incompatible lists. const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Mobile"); #else const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); #endif // Maximum number of entries we allow in any of the whitelists. // If a whitelist on disk contains more entries then all lookups to // the whitelist will be considered a match. const size_t kMaxWhitelistSize = 5000; // If the hash of this exact expression is on a whitelist then all // lookups to this whitelist will be considered a match. const char kWhitelistKillSwitchUrl[] = "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! // If the hash of this exact expression is on a whitelist then the // malware IP blacklisting feature will be disabled in csd. // Don't change this! const char kMalwareIPKillSwitchUrl[] = "sb-ssl.google.com/safebrowsing/csd/killswitch_malware"; const size_t kMaxIpPrefixSize = 128; const size_t kMinIpPrefixSize = 1; // To save space, the incoming |chunk_id| and |list_id| are combined // into an |encoded_chunk_id| for storage by shifting the |list_id| // into the low-order bits. These functions decode that information. // TODO(lzheng): It was reasonable when database is saved in sqlite, but // there should be better ways to save chunk_id and list_id after we use // SafeBrowsingStoreFile. int GetListIdBit(const int encoded_chunk_id) { return encoded_chunk_id & 1; } int DecodeChunkId(int encoded_chunk_id) { return encoded_chunk_id >> 1; } int EncodeChunkId(const int chunk, const int list_id) { DCHECK_NE(list_id, safe_browsing_util::INVALID); return chunk << 1 | list_id % 2; } // Generate the set of full hashes to check for |url|. If // |include_whitelist_hashes| is true we will generate additional path-prefixes // to match against the csd whitelist. E.g., if the path-prefix /foo is on the // whitelist it should also match /foo/bar which is not the case for all the // other lists. We'll also always add a pattern for the empty path. // TODO(shess): This function is almost the same as // |CompareFullHashes()| in safe_browsing_util.cc, except that code // does an early exit on match. Since match should be the infrequent // case (phishing or malware found), consider combining this function // with that one. void UrlToFullHashes(const GURL& url, bool include_whitelist_hashes, std::vector* full_hashes) { std::vector hosts; if (url.HostIsIPAddress()) { hosts.push_back(url.host()); } else { safe_browsing_util::GenerateHostsToCheck(url, &hosts); } std::vector paths; safe_browsing_util::GeneratePathsToCheck(url, &paths); for (size_t i = 0; i < hosts.size(); ++i) { for (size_t j = 0; j < paths.size(); ++j) { const std::string& path = paths[j]; full_hashes->push_back(SBFullHashForString(hosts[i] + path)); // We may have /foo as path-prefix in the whitelist which should // also match with /foo/bar and /foo?bar. Hence, for every path // that ends in '/' we also add the path without the slash. if (include_whitelist_hashes && path.size() > 1 && path[path.size() - 1] == '/') { full_hashes->push_back( SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1))); } } } } // Get the prefixes matching the download |urls|. void GetDownloadUrlPrefixes(const std::vector& urls, std::vector* prefixes) { std::vector full_hashes; for (size_t i = 0; i < urls.size(); ++i) UrlToFullHashes(urls[i], false, &full_hashes); for (size_t i = 0; i < full_hashes.size(); ++i) prefixes->push_back(full_hashes[i].prefix); } // Helper function to compare addprefixes in |store| with |prefixes|. // The |list_bit| indicates which list (url or hash) to compare. // // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain // the actual matching prefixes. bool MatchAddPrefixes(SafeBrowsingStore* store, int list_bit, const std::vector& prefixes, std::vector* prefix_hits) { prefix_hits->clear(); bool found_match = false; SBAddPrefixes add_prefixes; store->GetAddPrefixes(&add_prefixes); for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); iter != add_prefixes.end(); ++iter) { for (size_t j = 0; j < prefixes.size(); ++j) { const SBPrefix& prefix = prefixes[j]; if (prefix == iter->prefix && GetListIdBit(iter->chunk_id) == list_bit) { prefix_hits->push_back(prefix); found_match = true; } } } return found_match; } // This function generates a chunk range string for |chunks|. It // outputs one chunk range string per list and writes it to the // |list_ranges| vector. We expect |list_ranges| to already be of the // right size. E.g., if |chunks| contains chunks with two different // list ids then |list_ranges| must contain two elements. void GetChunkRanges(const std::vector& chunks, std::vector* list_ranges) { // Since there are 2 possible list ids, there must be exactly two // list ranges. Even if the chunk data should only contain one // line, this code has to somehow handle corruption. DCHECK_EQ(2U, list_ranges->size()); std::vector > decoded_chunks(list_ranges->size()); for (std::vector::const_iterator iter = chunks.begin(); iter != chunks.end(); ++iter) { int mod_list_id = GetListIdBit(*iter); DCHECK_GE(mod_list_id, 0); DCHECK_LT(static_cast(mod_list_id), decoded_chunks.size()); decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter)); } for (size_t i = 0; i < decoded_chunks.size(); ++i) { ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i])); } } // Helper function to create chunk range lists for Browse related // lists. void UpdateChunkRanges(SafeBrowsingStore* store, const std::vector& listnames, std::vector* lists) { if (!store) return; DCHECK_GT(listnames.size(), 0U); DCHECK_LE(listnames.size(), 2U); std::vector add_chunks; std::vector sub_chunks; store->GetAddChunks(&add_chunks); store->GetSubChunks(&sub_chunks); // Always decode 2 ranges, even if only the first one is expected. // The loop below will only load as many into |lists| as |listnames| // indicates. std::vector adds(2); std::vector subs(2); GetChunkRanges(add_chunks, &adds); GetChunkRanges(sub_chunks, &subs); for (size_t i = 0; i < listnames.size(); ++i) { const std::string& listname = listnames[i]; DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2, static_cast(i % 2)); DCHECK_NE(safe_browsing_util::GetListId(listname), safe_browsing_util::INVALID); lists->push_back(SBListChunkRanges(listname)); lists->back().adds.swap(adds[i]); lists->back().subs.swap(subs[i]); } } void UpdateChunkRangesForLists(SafeBrowsingStore* store, const std::string& listname0, const std::string& listname1, std::vector* lists) { std::vector listnames; listnames.push_back(listname0); listnames.push_back(listname1); UpdateChunkRanges(store, listnames, lists); } void UpdateChunkRangesForList(SafeBrowsingStore* store, const std::string& listname, std::vector* lists) { UpdateChunkRanges(store, std::vector(1, listname), lists); } // This code always checks for non-zero file size. This helper makes // that less verbose. int64 GetFileSizeOrZero(const base::FilePath& file_path) { int64 size_64; if (!base::GetFileSize(file_path, &size_64)) return 0; return size_64; } // Helper for PrefixSetContainsUrlHashes(). Returns true if an un-expired match // for |full_hash| is found in |cache|, with any matches appended to |results| // (true can be returned with zero matches). |expire_base| is used to check the // cache lifetime of matches, expired matches will be discarded from |cache|. bool GetCachedFullHash(std::map* cache, const SBFullHash& full_hash, const base::Time& expire_base, std::vector* results) { // First check if there is a valid cached result for this prefix. std::map::iterator citer = cache->find(full_hash.prefix); if (citer == cache->end()) return false; // Remove expired entries. SBCachedFullHashResult& cached_result = citer->second; if (cached_result.expire_after <= expire_base) { cache->erase(citer); return false; } // Find full-hash matches. std::vector& cached_hashes = cached_result.full_hashes; for (size_t i = 0; i < cached_hashes.size(); ++i) { if (SBFullHashEqual(full_hash, cached_hashes[i].hash)) results->push_back(cached_hashes[i]); } return true; } } // namespace // The default SafeBrowsingDatabaseFactory. class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { public: SafeBrowsingDatabase* CreateSafeBrowsingDatabase( bool enable_download_protection, bool enable_client_side_whitelist, bool enable_download_whitelist, bool enable_extension_blacklist, bool enable_side_effect_free_whitelist, bool enable_ip_blacklist, bool enable_unwanted_software_list) override { return new SafeBrowsingDatabaseNew( new SafeBrowsingStoreFile, enable_download_protection ? new SafeBrowsingStoreFile : NULL, enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL, enable_download_whitelist ? new SafeBrowsingStoreFile : NULL, enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL, enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL, enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL, enable_unwanted_software_list ? new SafeBrowsingStoreFile : NULL); } SafeBrowsingDatabaseFactoryImpl() { } private: DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl); }; // static SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; // Factory method, non-thread safe. Caller has to make sure this is called // on SafeBrowsing Thread. // TODO(shess): There's no need for a factory any longer. Convert // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create() // callers just construct things directly. SafeBrowsingDatabase* SafeBrowsingDatabase::Create( bool enable_download_protection, bool enable_client_side_whitelist, bool enable_download_whitelist, bool enable_extension_blacklist, bool enable_side_effect_free_whitelist, bool enable_ip_blacklist, bool enable_unwanted_software_list) { if (!factory_) factory_ = new SafeBrowsingDatabaseFactoryImpl(); return factory_->CreateSafeBrowsingDatabase(enable_download_protection, enable_client_side_whitelist, enable_download_whitelist, enable_extension_blacklist, enable_side_effect_free_whitelist, enable_ip_blacklist, enable_unwanted_software_list); } SafeBrowsingDatabase::~SafeBrowsingDatabase() { } // static base::FilePath SafeBrowsingDatabase::BrowseDBFilename( const base::FilePath& db_base_filename) { return base::FilePath(db_base_filename.value() + kBrowseDBFile); } // static base::FilePath SafeBrowsingDatabase::DownloadDBFilename( const base::FilePath& db_base_filename) { return base::FilePath(db_base_filename.value() + kDownloadDBFile); } // static base::FilePath SafeBrowsingDatabase::BloomFilterForFilename( const base::FilePath& db_filename) { return base::FilePath(db_filename.value() + kBloomFilterFile); } // static base::FilePath SafeBrowsingDatabase::PrefixSetForFilename( const base::FilePath& db_filename) { return base::FilePath(db_filename.value() + kPrefixSetFile); } // static base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( const base::FilePath& db_filename) { return base::FilePath(db_filename.value() + kCsdWhitelistDBFile); } // static base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename( const base::FilePath& db_filename) { return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile); } // static base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename( const base::FilePath& db_filename) { return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile); } // static base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename( const base::FilePath& db_filename) { return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile); } // static base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename( const base::FilePath& db_filename) { return base::FilePath(db_filename.value() + kIPBlacklistDBFile); } // static base::FilePath SafeBrowsingDatabase::UnwantedSoftwareDBFilename( const base::FilePath& db_filename) { return base::FilePath(db_filename.value() + kUnwantedSoftwareDBFile); } SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { // Stores are not thread safe. DCHECK(thread_checker_.CalledOnValidThread()); if (list_id == safe_browsing_util::PHISH || list_id == safe_browsing_util::MALWARE) { return browse_store_.get(); } else if (list_id == safe_browsing_util::BINURL) { return download_store_.get(); } else if (list_id == safe_browsing_util::CSDWHITELIST) { return csd_whitelist_store_.get(); } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) { return download_whitelist_store_.get(); } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) { return extension_blacklist_store_.get(); } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) { return side_effect_free_whitelist_store_.get(); } else if (list_id == safe_browsing_util::IPBLACKLIST) { return ip_blacklist_store_.get(); } else if (list_id == safe_browsing_util::UNWANTEDURL) { return unwanted_software_store_.get(); } return NULL; } // static void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) { UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type, FAILURE_DATABASE_MAX); } SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() : SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, NULL, NULL, NULL, NULL, NULL, NULL, NULL) { DCHECK(browse_store_.get()); DCHECK(!download_store_.get()); DCHECK(!csd_whitelist_store_.get()); DCHECK(!download_whitelist_store_.get()); DCHECK(!extension_blacklist_store_.get()); DCHECK(!side_effect_free_whitelist_store_.get()); DCHECK(!ip_blacklist_store_.get()); DCHECK(!unwanted_software_store_.get()); } SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( SafeBrowsingStore* browse_store, SafeBrowsingStore* download_store, SafeBrowsingStore* csd_whitelist_store, SafeBrowsingStore* download_whitelist_store, SafeBrowsingStore* extension_blacklist_store, SafeBrowsingStore* side_effect_free_whitelist_store, SafeBrowsingStore* ip_blacklist_store, SafeBrowsingStore* unwanted_software_store) : browse_store_(browse_store), download_store_(download_store), csd_whitelist_store_(csd_whitelist_store), download_whitelist_store_(download_whitelist_store), extension_blacklist_store_(extension_blacklist_store), side_effect_free_whitelist_store_(side_effect_free_whitelist_store), ip_blacklist_store_(ip_blacklist_store), unwanted_software_store_(unwanted_software_store), corruption_detected_(false), change_detected_(false), reset_factory_(this) { DCHECK(browse_store_.get()); } SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { // The DCHECK is disabled due to crbug.com/338486 . // DCHECK(thread_checker_.CalledOnValidThread()); } void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) { DCHECK(thread_checker_.CalledOnValidThread()); // This should not be run multiple times. DCHECK(filename_base_.empty()); filename_base_ = filename_base; // TODO(shess): The various stores are really only necessary while doing // updates (see |UpdateFinished()|) or when querying a store directly (see // |ContainsDownloadUrl()|). // The store variables are also tested to see if a list is enabled. Perhaps // the stores could be refactored into an update object so that they are only // live in memory while being actively used. The sense of enabled probably // belongs in protocol_manager or database_manager. browse_store_->Init( BrowseDBFilename(filename_base_), base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, base::Unretained(this))); if (unwanted_software_store_.get()) { unwanted_software_store_->Init( UnwantedSoftwareDBFilename(filename_base_), base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, base::Unretained(this))); } { // NOTE: There is no need to grab the lock in this function, since // until it returns, there are no pointers to this class on other // threads. Then again, that means there is no possibility of // contention on the lock... base::AutoLock locked(lookup_lock_); prefix_gethash_cache_.clear(); LoadPrefixSet(BrowseDBFilename(filename_base_), &browse_prefix_set_, FAILURE_BROWSE_PREFIX_SET_READ); if (unwanted_software_store_.get()) { LoadPrefixSet(UnwantedSoftwareDBFilename(filename_base_), &unwanted_software_prefix_set_, FAILURE_UNWANTED_SOFTWARE_PREFIX_SET_READ); } } if (download_store_.get()) { download_store_->Init( DownloadDBFilename(filename_base_), base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, base::Unretained(this))); } if (csd_whitelist_store_.get()) { csd_whitelist_store_->Init( CsdWhitelistDBFilename(filename_base_), base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, base::Unretained(this))); std::vector full_hashes; if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) { LoadWhitelist(full_hashes, &csd_whitelist_); } else { WhitelistEverything(&csd_whitelist_); } } else { WhitelistEverything(&csd_whitelist_); // Just to be safe. } if (download_whitelist_store_.get()) { download_whitelist_store_->Init( DownloadWhitelistDBFilename(filename_base_), base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, base::Unretained(this))); std::vector full_hashes; if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) { LoadWhitelist(full_hashes, &download_whitelist_); } else { WhitelistEverything(&download_whitelist_); } } else { WhitelistEverything(&download_whitelist_); // Just to be safe. } if (extension_blacklist_store_.get()) { extension_blacklist_store_->Init( ExtensionBlacklistDBFilename(filename_base_), base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, base::Unretained(this))); } if (side_effect_free_whitelist_store_.get()) { const base::FilePath side_effect_free_whitelist_filename = SideEffectFreeWhitelistDBFilename(filename_base_); const base::FilePath side_effect_free_whitelist_prefix_set_filename = PrefixSetForFilename(side_effect_free_whitelist_filename); side_effect_free_whitelist_store_->Init( side_effect_free_whitelist_filename, base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, base::Unretained(this))); // Only use the prefix set if database is present and non-empty. if (GetFileSizeOrZero(side_effect_free_whitelist_filename)) { const base::TimeTicks before = base::TimeTicks::Now(); side_effect_free_whitelist_prefix_set_ = safe_browsing::PrefixSet::LoadFile( side_effect_free_whitelist_prefix_set_filename); UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad", base::TimeTicks::Now() - before); if (!side_effect_free_whitelist_prefix_set_.get()) RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ); } } else { // Delete any files of the side-effect free sidelist that may be around // from when it was previously enabled. SafeBrowsingStoreFile::DeleteStore( SideEffectFreeWhitelistDBFilename(filename_base_)); base::DeleteFile( PrefixSetForFilename(SideEffectFreeWhitelistDBFilename(filename_base_)), false); } if (ip_blacklist_store_.get()) { ip_blacklist_store_->Init( IpBlacklistDBFilename(filename_base_), base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, base::Unretained(this))); std::vector full_hashes; if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) { LoadIpBlacklist(full_hashes); } else { LoadIpBlacklist(std::vector()); // Clear the list. } } } bool SafeBrowsingDatabaseNew::ResetDatabase() { DCHECK(thread_checker_.CalledOnValidThread()); // Delete files on disk. // TODO(shess): Hard to see where one might want to delete without a // reset. Perhaps inline |Delete()|? if (!Delete()) return false; // Reset objects in memory. { base::AutoLock locked(lookup_lock_); prefix_gethash_cache_.clear(); browse_prefix_set_.reset(); side_effect_free_whitelist_prefix_set_.reset(); ip_blacklist_.clear(); unwanted_software_prefix_set_.reset(); } // Wants to acquire the lock itself. WhitelistEverything(&csd_whitelist_); WhitelistEverything(&download_whitelist_); return true; } bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( const GURL& url, std::vector* prefix_hits, std::vector* cache_hits) { return PrefixSetContainsUrl( url, &browse_prefix_set_, prefix_hits, cache_hits); } bool SafeBrowsingDatabaseNew::ContainsUnwantedSoftwareUrl( const GURL& url, std::vector* prefix_hits, std::vector* cache_hits) { return PrefixSetContainsUrl( url, &unwanted_software_prefix_set_, prefix_hits, cache_hits); } bool SafeBrowsingDatabaseNew::PrefixSetContainsUrl( const GURL& url, scoped_ptr* prefix_set_getter, std::vector* prefix_hits, std::vector* cache_hits) { // Clear the results first. prefix_hits->clear(); cache_hits->clear(); std::vector full_hashes; UrlToFullHashes(url, false, &full_hashes); if (full_hashes.empty()) return false; return PrefixSetContainsUrlHashes( full_hashes, prefix_set_getter, prefix_hits, cache_hits); } bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashesForTesting( const std::vector& full_hashes, std::vector* prefix_hits, std::vector* cache_hits) { return PrefixSetContainsUrlHashes( full_hashes, &browse_prefix_set_, prefix_hits, cache_hits); } bool SafeBrowsingDatabaseNew::PrefixSetContainsUrlHashes( const std::vector& full_hashes, scoped_ptr* prefix_set_getter, std::vector* prefix_hits, std::vector* cache_hits) { // Used to determine cache expiration. const base::Time now = base::Time::Now(); // This function can be called on any thread, so lock against any changes base::AutoLock locked(lookup_lock_); // |prefix_set| is empty until it is either read from disk, or the first // update populates it. Bail out without a hit if not yet available. // |prefix_set_getter| can only be accessed while holding |lookup_lock_| hence // why it is passed as a parameter rather than passing the |prefix_set| // directly. const safe_browsing::PrefixSet* prefix_set = prefix_set_getter->get(); if (!prefix_set) return false; for (size_t i = 0; i < full_hashes.size(); ++i) { if (!GetCachedFullHash( &prefix_gethash_cache_, full_hashes[i], now, cache_hits)) { // No valid cached result, check the database. if (prefix_set->Exists(full_hashes[i])) prefix_hits->push_back(full_hashes[i].prefix); } } // Multiple full hashes could share prefix, remove duplicates. std::sort(prefix_hits->begin(), prefix_hits->end()); prefix_hits->erase(std::unique(prefix_hits->begin(), prefix_hits->end()), prefix_hits->end()); return !prefix_hits->empty() || !cache_hits->empty(); } bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( const std::vector& urls, std::vector* prefix_hits) { DCHECK(thread_checker_.CalledOnValidThread()); // Ignore this check when download checking is not enabled. if (!download_store_.get()) return false; std::vector prefixes; GetDownloadUrlPrefixes(urls, &prefixes); return MatchAddPrefixes(download_store_.get(), safe_browsing_util::BINURL % 2, prefixes, prefix_hits); } bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) { std::vector full_hashes; UrlToFullHashes(url, true, &full_hashes); return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); } bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) { std::vector full_hashes; UrlToFullHashes(url, true, &full_hashes); return ContainsWhitelistedHashes(download_whitelist_, full_hashes); } bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes( const std::vector& prefixes, std::vector* prefix_hits) { DCHECK(thread_checker_.CalledOnValidThread()); if (!extension_blacklist_store_) return false; return MatchAddPrefixes(extension_blacklist_store_.get(), safe_browsing_util::EXTENSIONBLACKLIST % 2, prefixes, prefix_hits); } bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl( const GURL& url) { std::string host; std::string path; std::string query; safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query); std::string url_to_check = host + path; if (!query.empty()) url_to_check += "?" + query; SBFullHash full_hash = SBFullHashForString(url_to_check); // This function can be called on any thread, so lock against any changes base::AutoLock locked(lookup_lock_); // |side_effect_free_whitelist_prefix_set_| is empty until it is either read // from disk, or the first update populates it. Bail out without a hit if // not yet available. if (!side_effect_free_whitelist_prefix_set_.get()) return false; return side_effect_free_whitelist_prefix_set_->Exists(full_hash); } bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) { net::IPAddressNumber ip_number; if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) return false; if (ip_number.size() == net::kIPv4AddressSize) ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number); if (ip_number.size() != net::kIPv6AddressSize) return false; // better safe than sorry. // This function can be called on any thread, so lock against any changes base::AutoLock locked(lookup_lock_); for (IPBlacklist::const_iterator it = ip_blacklist_.begin(); it != ip_blacklist_.end(); ++it) { const std::string& mask = it->first; DCHECK_EQ(mask.size(), ip_number.size()); std::string subnet(net::kIPv6AddressSize, '\0'); for (size_t i = 0; i < net::kIPv6AddressSize; ++i) { subnet[i] = ip_number[i] & mask[i]; } const std::string hash = base::SHA1HashString(subnet); DVLOG(2) << "Lookup Malware IP: " << " ip:" << ip_address << " mask:" << base::HexEncode(mask.data(), mask.size()) << " subnet:" << base::HexEncode(subnet.data(), subnet.size()) << " hash:" << base::HexEncode(hash.data(), hash.size()); if (it->second.count(hash) > 0) { return true; } } return false; } bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString( const std::string& str) { std::vector hashes; hashes.push_back(SBFullHashForString(str)); return ContainsWhitelistedHashes(download_whitelist_, hashes); } bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes( const SBWhitelist& whitelist, const std::vector& hashes) { base::AutoLock l(lookup_lock_); if (whitelist.second) return true; for (std::vector::const_iterator it = hashes.begin(); it != hashes.end(); ++it) { if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), *it, SBFullHashLess)) { return true; } } return false; } // Helper to insert add-chunk entries. void SafeBrowsingDatabaseNew::InsertAddChunk( SafeBrowsingStore* store, const safe_browsing_util::ListType list_id, const SBChunkData& chunk_data) { DCHECK(thread_checker_.CalledOnValidThread()); DCHECK(store); // The server can give us a chunk that we already have because // it's part of a range. Don't add it again. const int chunk_id = chunk_data.ChunkNumber(); const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); if (store->CheckAddChunk(encoded_chunk_id)) return; store->SetAddChunk(encoded_chunk_id); if (chunk_data.IsPrefix()) { const size_t c = chunk_data.PrefixCount(); for (size_t i = 0; i < c; ++i) { STATS_COUNTER("SB.PrefixAdd", 1); store->WriteAddPrefix(encoded_chunk_id, chunk_data.PrefixAt(i)); } } else { const size_t c = chunk_data.FullHashCount(); for (size_t i = 0; i < c; ++i) { STATS_COUNTER("SB.PrefixAddFull", 1); store->WriteAddHash(encoded_chunk_id, chunk_data.FullHashAt(i)); } } } // Helper to insert sub-chunk entries. void SafeBrowsingDatabaseNew::InsertSubChunk( SafeBrowsingStore* store, const safe_browsing_util::ListType list_id, const SBChunkData& chunk_data) { DCHECK(thread_checker_.CalledOnValidThread()); DCHECK(store); // The server can give us a chunk that we already have because // it's part of a range. Don't add it again. const int chunk_id = chunk_data.ChunkNumber(); const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); if (store->CheckSubChunk(encoded_chunk_id)) return; store->SetSubChunk(encoded_chunk_id); if (chunk_data.IsPrefix()) { const size_t c = chunk_data.PrefixCount(); for (size_t i = 0; i < c; ++i) { STATS_COUNTER("SB.PrefixSub", 1); const int add_chunk_id = chunk_data.AddChunkNumberAt(i); const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id); store->WriteSubPrefix(encoded_chunk_id, encoded_add_chunk_id, chunk_data.PrefixAt(i)); } } else { const size_t c = chunk_data.FullHashCount(); for (size_t i = 0; i < c; ++i) { STATS_COUNTER("SB.PrefixSubFull", 1); const int add_chunk_id = chunk_data.AddChunkNumberAt(i); const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id); store->WriteSubHash(encoded_chunk_id, encoded_add_chunk_id, chunk_data.FullHashAt(i)); } } } void SafeBrowsingDatabaseNew::InsertChunks( const std::string& list_name, const std::vector& chunks) { DCHECK(thread_checker_.CalledOnValidThread()); if (corruption_detected_ || chunks.empty()) return; const base::TimeTicks before = base::TimeTicks::Now(); // TODO(shess): The caller should just pass list_id. const safe_browsing_util::ListType list_id = safe_browsing_util::GetListId(list_name); SafeBrowsingStore* store = GetStore(list_id); if (!store) return; change_detected_ = true; // TODO(shess): I believe that the list is always add or sub. Can this use // that productively? store->BeginChunk(); for (size_t i = 0; i < chunks.size(); ++i) { if (chunks[i]->IsAdd()) { InsertAddChunk(store, list_id, *chunks[i]); } else if (chunks[i]->IsSub()) { InsertSubChunk(store, list_id, *chunks[i]); } else { NOTREACHED(); } } store->FinishChunk(); UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before); } void SafeBrowsingDatabaseNew::DeleteChunks( const std::vector& chunk_deletes) { DCHECK(thread_checker_.CalledOnValidThread()); if (corruption_detected_ || chunk_deletes.empty()) return; const std::string& list_name = chunk_deletes.front().list_name; const safe_browsing_util::ListType list_id = safe_browsing_util::GetListId(list_name); SafeBrowsingStore* store = GetStore(list_id); if (!store) return; change_detected_ = true; for (size_t i = 0; i < chunk_deletes.size(); ++i) { std::vector chunk_numbers; RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers); for (size_t j = 0; j < chunk_numbers.size(); ++j) { const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id); if (chunk_deletes[i].is_sub_del) store->DeleteSubChunk(encoded_chunk_id); else store->DeleteAddChunk(encoded_chunk_id); } } } void SafeBrowsingDatabaseNew::CacheHashResults( const std::vector& prefixes, const std::vector& full_hits, const base::TimeDelta& cache_lifetime) { const base::Time expire_after = base::Time::Now() + cache_lifetime; // This function can be called on any thread, so lock against any changes base::AutoLock locked(lookup_lock_); // Create or reset all cached results for these prefixes. for (size_t i = 0; i < prefixes.size(); ++i) { prefix_gethash_cache_[prefixes[i]] = SBCachedFullHashResult(expire_after); } // Insert any fullhash hits. Note that there may be one, multiple, or no // fullhashes for any given entry in |prefixes|. for (size_t i = 0; i < full_hits.size(); ++i) { const SBPrefix prefix = full_hits[i].hash.prefix; prefix_gethash_cache_[prefix].full_hashes.push_back(full_hits[i]); } } bool SafeBrowsingDatabaseNew::UpdateStarted( std::vector* lists) { DCHECK(thread_checker_.CalledOnValidThread()); DCHECK(lists); // If |BeginUpdate()| fails, reset the database. if (!browse_store_->BeginUpdate()) { RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); HandleCorruptDatabase(); return false; } if (download_store_.get() && !download_store_->BeginUpdate()) { RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN); HandleCorruptDatabase(); return false; } if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) { RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); HandleCorruptDatabase(); return false; } if (download_whitelist_store_.get() && !download_whitelist_store_->BeginUpdate()) { RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); HandleCorruptDatabase(); return false; } if (extension_blacklist_store_ && !extension_blacklist_store_->BeginUpdate()) { RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN); HandleCorruptDatabase(); return false; } if (side_effect_free_whitelist_store_ && !side_effect_free_whitelist_store_->BeginUpdate()) { RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN); HandleCorruptDatabase(); return false; } if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) { RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN); HandleCorruptDatabase(); return false; } if (unwanted_software_store_ && !unwanted_software_store_->BeginUpdate()) { RecordFailure(FAILURE_UNWANTED_SOFTWARE_DATABASE_UPDATE_BEGIN); HandleCorruptDatabase(); return false; } { base::AutoLock locked(lookup_lock_); // Cached fullhash results must be cleared on every database update (whether // successful or not.) prefix_gethash_cache_.clear(); } UpdateChunkRangesForLists(browse_store_.get(), safe_browsing_util::kMalwareList, safe_browsing_util::kPhishingList, lists); // NOTE(shess): |download_store_| used to contain kBinHashList, which has been // deprecated. Code to delete the list from the store shows ~15k hits/day as // of Feb 2014, so it has been removed. Everything _should_ be resilient to // extra data of that sort. UpdateChunkRangesForList(download_store_.get(), safe_browsing_util::kBinUrlList, lists); UpdateChunkRangesForList(csd_whitelist_store_.get(), safe_browsing_util::kCsdWhiteList, lists); UpdateChunkRangesForList(download_whitelist_store_.get(), safe_browsing_util::kDownloadWhiteList, lists); UpdateChunkRangesForList(extension_blacklist_store_.get(), safe_browsing_util::kExtensionBlacklist, lists); UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(), safe_browsing_util::kSideEffectFreeWhitelist, lists); UpdateChunkRangesForList(ip_blacklist_store_.get(), safe_browsing_util::kIPBlacklist, lists); UpdateChunkRangesForList(unwanted_software_store_.get(), safe_browsing_util::kUnwantedUrlList, lists); corruption_detected_ = false; change_detected_ = false; return true; } void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { DCHECK(thread_checker_.CalledOnValidThread()); // The update may have failed due to corrupt storage (for instance, // an excessive number of invalid add_chunks and sub_chunks). // Double-check that the databases are valid. // TODO(shess): Providing a checksum for the add_chunk and sub_chunk // sections would allow throwing a corruption error in // UpdateStarted(). if (!update_succeeded) { if (!browse_store_->CheckValidity()) DLOG(ERROR) << "Safe-browsing browse database corrupt."; if (download_store_.get() && !download_store_->CheckValidity()) DLOG(ERROR) << "Safe-browsing download database corrupt."; if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity()) DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt."; if (download_whitelist_store_.get() && !download_whitelist_store_->CheckValidity()) { DLOG(ERROR) << "Safe-browsing download whitelist database corrupt."; } if (extension_blacklist_store_ && !extension_blacklist_store_->CheckValidity()) { DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt."; } if (side_effect_free_whitelist_store_ && !side_effect_free_whitelist_store_->CheckValidity()) { DLOG(ERROR) << "Safe-browsing side-effect free whitelist database " << "corrupt."; } if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) { DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt."; } if (unwanted_software_store_ && !unwanted_software_store_->CheckValidity()) { DLOG(ERROR) << "Unwanted software url list database corrupt."; } } if (corruption_detected_) return; // Unroll the transaction if there was a protocol error or if the // transaction was empty. This will leave the prefix set, the // pending hashes, and the prefix miss cache in place. if (!update_succeeded || !change_detected_) { // Track empty updates to answer questions at http://crbug.com/72216 . if (update_succeeded && !change_detected_) UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0); browse_store_->CancelUpdate(); if (download_store_.get()) download_store_->CancelUpdate(); if (csd_whitelist_store_.get()) csd_whitelist_store_->CancelUpdate(); if (download_whitelist_store_.get()) download_whitelist_store_->CancelUpdate(); if (extension_blacklist_store_) extension_blacklist_store_->CancelUpdate(); if (side_effect_free_whitelist_store_) side_effect_free_whitelist_store_->CancelUpdate(); if (ip_blacklist_store_) ip_blacklist_store_->CancelUpdate(); if (unwanted_software_store_) unwanted_software_store_->CancelUpdate(); return; } if (download_store_) { int64 size_bytes = UpdateHashPrefixStore( DownloadDBFilename(filename_base_), download_store_.get(), FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH); UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", static_cast(size_bytes / 1024)); } UpdatePrefixSetUrlStore(BrowseDBFilename(filename_base_), browse_store_.get(), &browse_prefix_set_, FAILURE_BROWSE_DATABASE_UPDATE_FINISH, FAILURE_BROWSE_PREFIX_SET_WRITE); UpdateWhitelistStore(CsdWhitelistDBFilename(filename_base_), csd_whitelist_store_.get(), &csd_whitelist_); UpdateWhitelistStore(DownloadWhitelistDBFilename(filename_base_), download_whitelist_store_.get(), &download_whitelist_); if (extension_blacklist_store_) { int64 size_bytes = UpdateHashPrefixStore( ExtensionBlacklistDBFilename(filename_base_), extension_blacklist_store_.get(), FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH); UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes", static_cast(size_bytes / 1024)); } if (side_effect_free_whitelist_store_) UpdateSideEffectFreeWhitelistStore(); if (ip_blacklist_store_) UpdateIpBlacklistStore(); if (unwanted_software_store_) { UpdatePrefixSetUrlStore(UnwantedSoftwareDBFilename(filename_base_), unwanted_software_store_.get(), &unwanted_software_prefix_set_, FAILURE_UNWANTED_SOFTWARE_DATABASE_UPDATE_FINISH, FAILURE_UNWANTED_SOFTWARE_PREFIX_SET_WRITE); } } void SafeBrowsingDatabaseNew::UpdateWhitelistStore( const base::FilePath& store_filename, SafeBrowsingStore* store, SBWhitelist* whitelist) { DCHECK(thread_checker_.CalledOnValidThread()); if (!store) return; // Note: |builder| will not be empty. The current data store implementation // stores all full-length hashes as both full and prefix hashes. safe_browsing::PrefixSetBuilder builder; std::vector full_hashes; if (!store->FinishUpdate(&builder, &full_hashes)) { RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); WhitelistEverything(whitelist); return; } #if defined(OS_MACOSX) base::mac::SetFileBackupExclusion(store_filename); #endif LoadWhitelist(full_hashes, whitelist); } int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( const base::FilePath& store_filename, SafeBrowsingStore* store, FailureType failure_type) { DCHECK(thread_checker_.CalledOnValidThread()); // These results are not used after this call. Simply ignore the // returned value after FinishUpdate(...). safe_browsing::PrefixSetBuilder builder; std::vector add_full_hashes_result; if (!store->FinishUpdate(&builder, &add_full_hashes_result)) RecordFailure(failure_type); #if defined(OS_MACOSX) base::mac::SetFileBackupExclusion(store_filename); #endif return GetFileSizeOrZero(store_filename); } void SafeBrowsingDatabaseNew::UpdatePrefixSetUrlStore( const base::FilePath& db_filename, SafeBrowsingStore* url_store, scoped_ptr* prefix_set, FailureType finish_failure_type, FailureType write_failure_type) { DCHECK(thread_checker_.CalledOnValidThread()); // Measure the amount of IO during the filter build. base::IoCounters io_before, io_after; base::ProcessHandle handle = base::GetCurrentProcessHandle(); scoped_ptr metric( #if !defined(OS_MACOSX) base::ProcessMetrics::CreateProcessMetrics(handle) #else // Getting stats only for the current process is enough, so NULL is fine. base::ProcessMetrics::CreateProcessMetrics(handle, NULL) #endif ); // IoCounters are currently not supported on Mac, and may not be // available for Linux, so we check the result and only show IO // stats if they are available. const bool got_counters = metric->GetIOCounters(&io_before); const base::TimeTicks before = base::TimeTicks::Now(); // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the // fly? Other clients use the SBAddFullHash vector, but AFAICT they only use // the SBFullHash portion. It would need an accessor on PrefixSet. safe_browsing::PrefixSetBuilder builder; std::vector add_full_hashes; if (!url_store->FinishUpdate(&builder, &add_full_hashes)) { RecordFailure(finish_failure_type); return; } std::vector full_hash_results; for (size_t i = 0; i < add_full_hashes.size(); ++i) { full_hash_results.push_back(add_full_hashes[i].full_hash); } scoped_ptr new_prefix_set( builder.GetPrefixSet(full_hash_results)); // Swap in the newly built filter. { base::AutoLock locked(lookup_lock_); prefix_set->swap(new_prefix_set); } UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); // Persist the prefix set to disk. Note: there is no need to lock since the // only write to |*prefix_set| is on this thread (in the swap() above). WritePrefixSet(db_filename, prefix_set->get(), write_failure_type); // Gather statistics. if (got_counters && metric->GetIOCounters(&io_after)) { UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes", static_cast(io_after.ReadTransferCount - io_before.ReadTransferCount) / 1024); UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes", static_cast(io_after.WriteTransferCount - io_before.WriteTransferCount) / 1024); UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", static_cast(io_after.ReadOperationCount - io_before.ReadOperationCount)); UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", static_cast(io_after.WriteOperationCount - io_before.WriteOperationCount)); } const int64 file_size = GetFileSizeOrZero(db_filename); UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", static_cast(file_size / 1024)); #if defined(OS_MACOSX) base::mac::SetFileBackupExclusion(db_filename); #endif } void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { DCHECK(thread_checker_.CalledOnValidThread()); safe_browsing::PrefixSetBuilder builder; std::vector add_full_hashes_result; if (!side_effect_free_whitelist_store_->FinishUpdate( &builder, &add_full_hashes_result)) { RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); return; } scoped_ptr new_prefix_set( builder.GetPrefixSetNoHashes()); // Swap in the newly built prefix set. { base::AutoLock locked(lookup_lock_); side_effect_free_whitelist_prefix_set_.swap(new_prefix_set); } const base::FilePath side_effect_free_whitelist_filename = SideEffectFreeWhitelistDBFilename(filename_base_); const base::FilePath side_effect_free_whitelist_prefix_set_filename = PrefixSetForFilename(side_effect_free_whitelist_filename); const base::TimeTicks before = base::TimeTicks::Now(); const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile( side_effect_free_whitelist_prefix_set_filename); UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite", base::TimeTicks::Now() - before); if (!write_ok) RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE); // Gather statistics. int64 file_size = GetFileSizeOrZero( side_effect_free_whitelist_prefix_set_filename); UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes", static_cast(file_size / 1024)); file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename); UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes", static_cast(file_size / 1024)); #if defined(OS_MACOSX) base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename); base::mac::SetFileBackupExclusion( side_effect_free_whitelist_prefix_set_filename); #endif } void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { DCHECK(thread_checker_.CalledOnValidThread()); // Note: prefixes will not be empty. The current data store implementation // stores all full-length hashes as both full and prefix hashes. safe_browsing::PrefixSetBuilder builder; std::vector full_hashes; if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) { RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); LoadIpBlacklist(std::vector()); // Clear the list. return; } #if defined(OS_MACOSX) base::mac::SetFileBackupExclusion(IpBlacklistDBFilename(filename_base_)); #endif LoadIpBlacklist(full_hashes); } void SafeBrowsingDatabaseNew::HandleCorruptDatabase() { DCHECK(thread_checker_.CalledOnValidThread()); // Reset the database after the current task has unwound (but only // reset once within the scope of a given task). if (!reset_factory_.HasWeakPtrs()) { RecordFailure(FAILURE_DATABASE_CORRUPT); base::MessageLoop::current()->PostTask(FROM_HERE, base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase, reset_factory_.GetWeakPtr())); } } void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() { DCHECK(thread_checker_.CalledOnValidThread()); RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER); corruption_detected_ = true; // Stop updating the database. ResetDatabase(); // NOTE(shess): ResetDatabase() should remove the corruption, so this should // only happen once. If you are here because you are hitting this after a // restart, then I would be very interested in working with you to figure out // what is happening, since it may affect real users. DLOG(FATAL) << "SafeBrowsing database was corrupt and reset"; } // TODO(shess): I'm not clear why this code doesn't have any // real error-handling. void SafeBrowsingDatabaseNew::LoadPrefixSet( const base::FilePath& db_filename, scoped_ptr* prefix_set, FailureType read_failure_type) { DCHECK(thread_checker_.CalledOnValidThread()); if (!prefix_set) return; DCHECK(!filename_base_.empty()); const base::FilePath prefix_set_filename = PrefixSetForFilename(db_filename); // Only use the prefix set if database is present and non-empty. if (!GetFileSizeOrZero(db_filename)) return; // Cleanup any stale bloom filter (no longer used). // TODO(shess): Track existence to drive removal of this code? const base::FilePath bloom_filter_filename = BloomFilterForFilename(db_filename); base::DeleteFile(bloom_filter_filename, false); const base::TimeTicks before = base::TimeTicks::Now(); *prefix_set = safe_browsing::PrefixSet::LoadFile(prefix_set_filename); UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before); if (!prefix_set->get()) RecordFailure(read_failure_type); } bool SafeBrowsingDatabaseNew::Delete() { DCHECK(thread_checker_.CalledOnValidThread()); DCHECK(!filename_base_.empty()); // TODO(shess): This is a mess. SafeBrowsingFileStore::Delete() closes the // store before calling DeleteStore(). DeleteStore() deletes transient files // in addition to the main file. Probably all of these should be converted to // a helper which calls Delete() if the store exists, else DeleteStore() on // the generated filename. // TODO(shess): Determine if the histograms are useful in any way. I cannot // recall any action taken as a result of their values, in which case it might // make more sense to histogram an overall thumbs-up/-down and just dig deeper // if something looks wrong. const bool r1 = browse_store_->Delete(); if (!r1) RecordFailure(FAILURE_DATABASE_STORE_DELETE); const bool r2 = download_store_.get() ? download_store_->Delete() : true; if (!r2) RecordFailure(FAILURE_DATABASE_STORE_DELETE); const bool r3 = csd_whitelist_store_.get() ? csd_whitelist_store_->Delete() : true; if (!r3) RecordFailure(FAILURE_DATABASE_STORE_DELETE); const bool r4 = download_whitelist_store_.get() ? download_whitelist_store_->Delete() : true; if (!r4) RecordFailure(FAILURE_DATABASE_STORE_DELETE); const base::FilePath browse_filename = BrowseDBFilename(filename_base_); const base::FilePath bloom_filter_filename = BloomFilterForFilename(browse_filename); const bool r5 = base::DeleteFile(bloom_filter_filename, false); if (!r5) RecordFailure(FAILURE_DATABASE_FILTER_DELETE); const base::FilePath browse_prefix_set_filename = PrefixSetForFilename(browse_filename); const bool r6 = base::DeleteFile(browse_prefix_set_filename, false); if (!r6) RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE); const base::FilePath extension_blacklist_filename = ExtensionBlacklistDBFilename(filename_base_); const bool r7 = base::DeleteFile(extension_blacklist_filename, false); if (!r7) RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE); const base::FilePath side_effect_free_whitelist_filename = SideEffectFreeWhitelistDBFilename(filename_base_); const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename, false); if (!r8) RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE); const base::FilePath side_effect_free_whitelist_prefix_set_filename = PrefixSetForFilename(side_effect_free_whitelist_filename); const bool r9 = base::DeleteFile( side_effect_free_whitelist_prefix_set_filename, false); if (!r9) RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE); const bool r10 = base::DeleteFile(IpBlacklistDBFilename(filename_base_), false); if (!r10) RecordFailure(FAILURE_IP_BLACKLIST_DELETE); const bool r11 = base::DeleteFile(UnwantedSoftwareDBFilename(filename_base_), false); if (!r11) RecordFailure(FAILURE_UNWANTED_SOFTWARE_PREFIX_SET_DELETE); return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10 && r11; } void SafeBrowsingDatabaseNew::WritePrefixSet( const base::FilePath& db_filename, const safe_browsing::PrefixSet* prefix_set, FailureType write_failure_type) { DCHECK(thread_checker_.CalledOnValidThread()); if (!prefix_set) return; const base::FilePath prefix_set_filename = PrefixSetForFilename(db_filename); const base::TimeTicks before = base::TimeTicks::Now(); const bool write_ok = prefix_set->WriteFile(prefix_set_filename); UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before); const int64 file_size = GetFileSizeOrZero(prefix_set_filename); UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes", static_cast(file_size / 1024)); if (!write_ok) RecordFailure(write_failure_type); #if defined(OS_MACOSX) base::mac::SetFileBackupExclusion(prefix_set_filename); #endif } void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) { DCHECK(thread_checker_.CalledOnValidThread()); base::AutoLock locked(lookup_lock_); whitelist->second = true; whitelist->first.clear(); } void SafeBrowsingDatabaseNew::LoadWhitelist( const std::vector& full_hashes, SBWhitelist* whitelist) { DCHECK(thread_checker_.CalledOnValidThread()); if (full_hashes.size() > kMaxWhitelistSize) { WhitelistEverything(whitelist); return; } std::vector new_whitelist; new_whitelist.reserve(full_hashes.size()); for (std::vector::const_iterator it = full_hashes.begin(); it != full_hashes.end(); ++it) { new_whitelist.push_back(it->full_hash); } std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess); SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl); if (std::binary_search(new_whitelist.begin(), new_whitelist.end(), kill_switch, SBFullHashLess)) { // The kill switch is whitelisted hence we whitelist all URLs. WhitelistEverything(whitelist); } else { base::AutoLock locked(lookup_lock_); whitelist->second = false; whitelist->first.swap(new_whitelist); } } void SafeBrowsingDatabaseNew::LoadIpBlacklist( const std::vector& full_hashes) { DCHECK(thread_checker_.CalledOnValidThread()); IPBlacklist new_blacklist; for (std::vector::const_iterator it = full_hashes.begin(); it != full_hashes.end(); ++it) { const char* full_hash = it->full_hash.full_hash; DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash)); // The format of the IP blacklist is: // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes. std::string hashed_ip_prefix(full_hash, base::kSHA1Length); size_t prefix_size = static_cast(full_hash[base::kSHA1Length]); if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) { RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID); new_blacklist.clear(); // Load empty blacklist. break; } // We precompute the mask for the given subnet size to speed up lookups. // Basically we need to create a 16B long string which has the highest // |size| bits sets to one. std::string mask(net::kIPv6AddressSize, '\0'); mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF'); if ((prefix_size % 8) != 0) { mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8)); } DVLOG(2) << "Inserting malicious IP: " << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length) << " mask:" << base::HexEncode(mask.data(), mask.size()) << " prefix_size:" << prefix_size << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(), hashed_ip_prefix.size()); new_blacklist[mask].insert(hashed_ip_prefix); } base::AutoLock locked(lookup_lock_); ip_blacklist_.swap(new_blacklist); } bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() { SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl); std::vector full_hashes; full_hashes.push_back(malware_kill_switch); return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); } bool SafeBrowsingDatabaseNew::IsCsdWhitelistKillSwitchOn() { base::AutoLock locked(lookup_lock_); return csd_whitelist_.second; }