From 200abc3b412aa5129bca6c90cfe298b05ad547b2 Mon Sep 17 00:00:00 2001 From: "paulg@google.com" Date: Fri, 5 Sep 2008 01:44:33 +0000 Subject: Cache empty responses from the SafeBrowsing servers for GetHash requests so that we don't keep asking for full hashes that don't exist. We flush this cache with each update, which is a little aggressive, but on the safe side. BUG=1358225 Review URL: http://codereview.chromium.org/454 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@1748 0039d316-1c4b-4281-b951-d872f2087c98 --- chrome/browser/safe_browsing/protocol_manager.cc | 6 ++++-- .../safe_browsing/safe_browsing_database.cc | 24 ++++++++++++++++++++++ .../browser/safe_browsing/safe_browsing_database.h | 10 +++++++-- .../safe_browsing_database_unittest.cc | 20 +++++++++++++++++- .../browser/safe_browsing/safe_browsing_service.cc | 12 +++++++---- .../browser/safe_browsing/safe_browsing_service.h | 6 ++++-- 6 files changed, 67 insertions(+), 11 deletions(-) (limited to 'chrome/browser/safe_browsing') diff --git a/chrome/browser/safe_browsing/protocol_manager.cc b/chrome/browser/safe_browsing/protocol_manager.cc index e66afc5..0f9aa5f 100644 --- a/chrome/browser/safe_browsing/protocol_manager.cc +++ b/chrome/browser/safe_browsing/protocol_manager.cc @@ -96,7 +96,7 @@ void SafeBrowsingProtocolManager::GetFullHash( // required to return empty results (i.e. treat the page as safe). if (gethash_error_count_ && Time::Now() <= next_gethash_time_) { std::vector full_hashes; - sb_service_->HandleGetHashResults(check, full_hashes); + sb_service_->HandleGetHashResults(check, full_hashes, false); return; } @@ -164,7 +164,9 @@ void SafeBrowsingProtocolManager::OnURLFetchComplete( fetcher.reset(it->first); SafeBrowsingService::SafeBrowsingCheck* check = it->second; std::vector full_hashes; + bool can_cache = false; if (response_code == 200 || response_code == 204) { + can_cache = true; gethash_error_count_ = 0; gethash_back_off_mult_ = 1; bool re_key = false; @@ -192,7 +194,7 @@ void SafeBrowsingProtocolManager::OnURLFetchComplete( // Call back the SafeBrowsingService with full_hashes, even if there was a // parse error or an error response code (in which case full_hashes will be // empty). We can't block the user regardless of the error status. - sb_service_->HandleGetHashResults(check, full_hashes); + sb_service_->HandleGetHashResults(check, full_hashes, can_cache); hash_requests_.erase(it); } else { diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index 0dea6e3..8a57559 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -219,6 +219,7 @@ bool SafeBrowsingDatabase::CreateTables() { // The SafeBrowsing service assumes this operation is synchronous. bool SafeBrowsingDatabase::ResetDatabase() { hash_cache_.clear(); + prefix_miss_cache_.clear(); bool rv = Close(); DCHECK(rv); @@ -295,6 +296,17 @@ bool SafeBrowsingDatabase::ContainsUrl( } if (!matching_list->empty() || !prefix_hits->empty()) { + // If all the prefixes are cached as 'misses', don't issue a GetHash. + bool all_misses = true; + for (std::vector::const_iterator it = prefix_hits->begin(); + it != prefix_hits->end(); ++it) { + if (prefix_miss_cache_.find(*it) == prefix_miss_cache_.end()) { + all_misses = false; + break; + } + } + if (all_misses) + return false; GetCachedFullHashes(prefix_hits, full_hits, last_update); return true; } @@ -441,6 +453,7 @@ void SafeBrowsingDatabase::StartThrottledWork() { } void SafeBrowsingDatabase::RunThrottledWork() { + prefix_miss_cache_.clear(); while (true) { bool done = ProcessChunks(); @@ -1183,7 +1196,18 @@ void SafeBrowsingDatabase::GetCachedFullHashes( } void SafeBrowsingDatabase::CacheHashResults( + const std::vector& prefixes, const std::vector& full_hits) { + if (full_hits.empty()) { + // These prefixes returned no results, so we store them in order to prevent + // asking for them again. We flush this cache at the next update. + for (std::vector::const_iterator it = prefixes.begin(); + it != prefixes.end(); ++it) { + prefix_miss_cache_.insert(*it); + } + return; + } + const Time now = Time::Now(); for (std::vector::const_iterator it = full_hits.begin(); it != full_hits.end(); ++it) { diff --git a/chrome/browser/safe_browsing/safe_browsing_database.h b/chrome/browser/safe_browsing/safe_browsing_database.h index b676665..4b4fe47 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.h +++ b/chrome/browser/safe_browsing/safe_browsing_database.h @@ -69,8 +69,11 @@ class SafeBrowsingDatabase { // to blocking user requests. void set_synchronous() { asynchronous_ = false; } - // Store the results of a GetHash response. - void CacheHashResults(const std::vector& full_hits); + // Store the results of a GetHash response. In the case of empty results, we + // cache the prefixes until the next update so that we don't have to issue + // further GetHash requests we know will be empty. + void CacheHashResults(const std::vector& prefixes, + const std::vector& full_hits); // Called when the user's machine has resumed from a lower power state. void HandleResume(); @@ -288,6 +291,9 @@ class SafeBrowsingDatabase { typedef stdext::hash_map HashCache; HashCache hash_cache_; + // Cache of prefixes that returned empty results (no full hash match). + std::set prefix_miss_cache_; + // The amount of time, in milliseconds, to wait before the next disk write. int disk_delay_; diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc index d9d4a19..0b409ed 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc @@ -327,7 +327,8 @@ void PopulateDatabaseForCacheTest(SafeBrowsingDatabase* database) { &full_hash.hash, sizeof(SBFullHash)); results.push_back(full_hash); - database->CacheHashResults(results); + std::vector prefixes; + database->CacheHashResults(prefixes, results); } TEST(SafeBrowsing, HashCaching) { @@ -448,6 +449,23 @@ TEST(SafeBrowsing, HashCaching) { database.ContainsUrl(GURL("http://www.evil.com/phishing.html"), &list, &prefixes, &full_hashes, expired); EXPECT_EQ(full_hashes.size(), 1); + + + // Testing prefix miss caching. + std::vector prefix_misses; + std::vector empty_full_hash; + prefix_misses.push_back(Sha256Prefix("http://www.bad.com/malware.html")); + prefix_misses.push_back(Sha256Prefix("http://www.bad.com/phishing.html")); + database.CacheHashResults(prefix_misses, empty_full_hash); + + // Prefixes with no full results are misses. + EXPECT_EQ(database.prefix_miss_cache_.size(), 2); + + // Update the database. + PopulateDatabaseForCacheTest(&database); + + // Prefix miss cache should be cleared. + EXPECT_EQ(database.prefix_miss_cache_.size(), 0); } void PrintStat(const wchar_t* name) { diff --git a/chrome/browser/safe_browsing/safe_browsing_service.cc b/chrome/browser/safe_browsing/safe_browsing_service.cc index 11cfad0..4dbb70f 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service.cc +++ b/chrome/browser/safe_browsing/safe_browsing_service.cc @@ -313,17 +313,20 @@ SafeBrowsingDatabase* SafeBrowsingService::GetDatabase() { // prefix hits detected in the database. void SafeBrowsingService::HandleGetHashResults( SafeBrowsingCheck* check, - const std::vector& full_hashes) { + const std::vector& full_hashes, + bool can_cache) { if (checks_.find(check) == checks_.end()) return; DCHECK(enabled_); + std::vector prefixes = check->prefix_hits; UMA_HISTOGRAM_LONG_TIMES(L"SB.Network", Time::Now() - check->start); OnHandleGetHashResults(check, full_hashes); // 'check' is deleted here. - db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( - this, &SafeBrowsingService::CacheHashResults, full_hashes)); + if (can_cache) + db_thread_->message_loop()->PostTask(FROM_HERE, NewRunnableMethod( + this, &SafeBrowsingService::CacheHashResults, prefixes, full_hashes)); } void SafeBrowsingService::OnHandleGetHashResults( @@ -516,9 +519,10 @@ void SafeBrowsingService::LogPauseDelay(TimeDelta time) { } void SafeBrowsingService::CacheHashResults( + const std::vector& prefixes, const std::vector& full_hashes) { DCHECK(MessageLoop::current() == db_thread_->message_loop()); - GetDatabase()->CacheHashResults(full_hashes); + GetDatabase()->CacheHashResults(prefixes, full_hashes); } void SafeBrowsingService::OnSuspend() { diff --git a/chrome/browser/safe_browsing/safe_browsing_service.h b/chrome/browser/safe_browsing/safe_browsing_service.h index fe7d0e7..6f10b81 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service.h +++ b/chrome/browser/safe_browsing/safe_browsing_service.h @@ -117,7 +117,8 @@ class SafeBrowsingService // SafeBrowsing storage system. void HandleGetHashResults( SafeBrowsingCheck* check, - const std::vector& full_hashes); + const std::vector& full_hashes, + bool can_cache); void HandleChunk(const std::string& list, std::deque* chunks); void HandleChunkDelete(std::vector* chunk_deletes); void GetAllChunks(); @@ -198,7 +199,8 @@ class SafeBrowsingService void OnResetComplete(); // Store the results of a GetHash request. Runs on the database thread. - void CacheHashResults(const std::vector& full_hashes); + void CacheHashResults(const std::vector& prefixes, + const std::vector& full_hashes); // Internal worker function for processing full hashes. void OnHandleGetHashResults(SafeBrowsingCheck* check, -- cgit v1.1