diff options
author | gcasto@chromium.org <gcasto@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-14 23:36:51 +0000 |
---|---|---|
committer | gcasto@chromium.org <gcasto@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-14 23:36:51 +0000 |
commit | 3f07eb3426053761be33e665a3aba40282b885a1 (patch) | |
tree | c931ebbc0cf9d8a87a8cd9f6ab16c3d80fbf900a /chrome/browser/safe_browsing | |
parent | 4f792ff35755c0e7d2df93455003373c95a2504f (diff) | |
download | chromium_src-3f07eb3426053761be33e665a3aba40282b885a1.zip chromium_src-3f07eb3426053761be33e665a3aba40282b885a1.tar.gz chromium_src-3f07eb3426053761be33e665a3aba40282b885a1.tar.bz2 |
Add caching to phishing client side detection.
BUG=None
TEST=Ran client_side_detection_service_unittest
Review URL: http://codereview.chromium.org/6374017
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@74874 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing')
3 files changed, 224 insertions, 22 deletions
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc index 9d2bb07..6a0a99e 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service.cc @@ -32,7 +32,14 @@ namespace safe_browsing { -const int ClientSideDetectionService::kMaxReportsPerDay = 3; +const int ClientSideDetectionService::kMaxReportsPerInterval = 3; + +const base::TimeDelta ClientSideDetectionService::kReportsInterval = + base::TimeDelta::FromDays(1); +const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval = + base::TimeDelta::FromDays(1); +const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval = + base::TimeDelta::FromMinutes(30); const char ClientSideDetectionService::kClientReportPhishingUrl[] = "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; @@ -44,6 +51,10 @@ struct ClientSideDetectionService::ClientReportInfo { GURL phishing_url; }; +ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time) + : is_phishing(phish), + timestamp(time) {} + // ShouldClassifyUrlRequest tracks the pre-classification checks for a // toplevel URL that has started loading into a renderer. When these // checks are complete, the renderer is notified if it should run @@ -330,9 +341,26 @@ void ClientSideDetectionService::StartClientReportPhishingRequest( DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); - if (GetNumReportsPerDay() > kMaxReportsPerDay) { - LOG(WARNING) << "Too many report phishing requests sent in the last day, " - << "not checking " << phishing_url; + bool is_phishing; + if (GetCachedResult(phishing_url, &is_phishing)) { + VLOG(1) << "Satisfying request for " << phishing_url << " from cache"; + UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); + cb->Run(phishing_url, is_phishing); + return; + } + + // We limit the number of distinct pings to kMaxReports, but we don't count + // urls already in the cache against this number. We don't want to start + // classifying too many pages as phishing, but for those that we already + // think are phishing we want to give ourselves a chance to fix false + // positives. + if (cache_.find(phishing_url) != cache_.end()) { + VLOG(1) << "Refreshing cache for " << phishing_url; + UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1); + } else if (GetNumReports() > kMaxReportsPerInterval) { + VLOG(1) << "Too many report phishing requests sent in the last " + << kReportsInterval.InHours() << " hours, not checking " + << phishing_url; UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); cb->Run(phishing_url, false); return; @@ -343,9 +371,8 @@ void ClientSideDetectionService::StartClientReportPhishingRequest( request.set_client_score(static_cast<float>(score)); std::string request_data; if (!request.SerializeToString(&request_data)) { - // For consistency, we always call the callback asynchronously, rather than - // directly from this method. - LOG(ERROR) << "Unable to serialize the CSD request. Proto file changed?"; + UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1); + VLOG(1) << "Unable to serialize the CSD request. Proto file changed?"; cb->Run(phishing_url, false); return; } @@ -410,8 +437,11 @@ void ClientSideDetectionService::HandlePhishingVerdict( const std::string& data) { ClientPhishingResponse response; scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); - if (status.is_success() && RC_REQUEST_OK == response_code && + if (status.is_success() && RC_REQUEST_OK == response_code && response.ParseFromString(data)) { + // Cache response, possibly flushing an old one. + cache_[info->phishing_url] = + make_linked_ptr(new CacheState(response.phishy(), base::Time::Now())); info->callback->Run(info->phishing_url, response.phishy()); } else { DLOG(ERROR) << "Unable to get the server verdict for URL: " @@ -422,11 +452,53 @@ void ClientSideDetectionService::HandlePhishingVerdict( delete source; } -int ClientSideDetectionService::GetNumReportsPerDay() { - base::Time cutoff = base::Time::Now() - base::TimeDelta::FromDays(1); +bool ClientSideDetectionService::GetCachedResult(const GURL& url, + bool* is_phishing) { + UpdateCache(); + + PhishingCache::iterator it = cache_.find(url); + if (it == cache_.end()) { + return false; + } + + // We still need to check if the result is valid. + const CacheState& cache_state = *it->second; + if (cache_state.is_phishing ? + cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval : + cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) { + *is_phishing = cache_state.is_phishing; + return true; + } + return false; +} + +void ClientSideDetectionService::UpdateCache() { + // Since we limit the number of requests but allow pass-through for cache + // refreshes, we don't want to remove elements from the cache if they + // could be used for this purpose even if we will not use the entry to + // satisfy the request from the cache. + base::TimeDelta positive_cache_interval = + std::max(kPositiveCacheInterval, kReportsInterval); + base::TimeDelta negative_cache_interval = + std::max(kNegativeCacheInterval, kReportsInterval); + + // Remove elements from the cache that will no longer be used. + for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) { + const CacheState& cache_state = *it->second; + if (cache_state.is_phishing ? + cache_state.timestamp > base::Time::Now() - positive_cache_interval : + cache_state.timestamp > base::Time::Now() - negative_cache_interval) { + ++it; + } else { + cache_.erase(it++); + } + } +} + +int ClientSideDetectionService::GetNumReports() { + base::Time cutoff = base::Time::Now() - kReportsInterval; - // Erase elements older than a day because we will never care about them - // again. + // Erase items older than cutoff because we will never care about them again. while (!phishing_report_times_.empty() && phishing_report_times_.front() < cutoff) { phishing_report_times_.pop(); diff --git a/chrome/browser/safe_browsing/client_side_detection_service.h b/chrome/browser/safe_browsing/client_side_detection_service.h index c299ca6..fcc823d 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.h +++ b/chrome/browser/safe_browsing/client_side_detection_service.h @@ -25,6 +25,7 @@ #include "base/callback.h" #include "base/file_path.h" #include "base/gtest_prod_util.h" +#include "base/linked_ptr.h" #include "base/platform_file.h" #include "base/ref_counted.h" #include "base/scoped_callback_factory.h" @@ -109,9 +110,22 @@ class ClientSideDetectionService : public URLFetcher::Delegate, ERROR_STATUS, }; + // CacheState holds all information necessary to respond to a caller without + // actually making a HTTP request. + struct CacheState { + bool is_phishing; + base::Time timestamp; + + CacheState(bool phish, base::Time time); + }; + typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache; + static const char kClientReportPhishingUrl[]; static const char kClientModelUrl[]; - static const int kMaxReportsPerDay; + static const int kMaxReportsPerInterval; + static const base::TimeDelta kReportsInterval; + static const base::TimeDelta kNegativeCacheInterval; + static const base::TimeDelta kPositiveCacheInterval; // Use Create() method to create an instance of this object. ClientSideDetectionService(const FilePath& model_path, @@ -175,9 +189,14 @@ class ClientSideDetectionService : public URLFetcher::Delegate, const ResponseCookies& cookies, const std::string& data); - // Get the number of phishing reports that we have sent over the last 24 - // hours. - int GetNumReportsPerDay(); + // Returns true and sets is_phishing if url is in the cache and valid. + bool GetCachedResult(const GURL& url, bool* is_phishing); + + // Invalidate cache results which are no longer useful. + void UpdateCache(); + + // Get the number of phishing reports that we have sent over kReportsInterval + int GetNumReports(); FilePath model_path_; ModelStatus model_status_; @@ -191,6 +210,14 @@ class ClientSideDetectionService : public URLFetcher::Delegate, struct ClientReportInfo; std::map<const URLFetcher*, ClientReportInfo*> client_phishing_reports_; + // Cache of completed requests. Used to satisfy requests for the same urls + // as long as the next request falls within our caching window (which is + // determined by kNegativeCacheInterval and kPositiveCacheInterval). The + // size of this cache is limited by kMaxReportsPerDay * + // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))). + // TODO(gcasto): Serialize this so that it doesn't reset on browser restart. + PhishingCache cache_; + // Timestamp of when we sent a phishing request. Used to limit the number // of phishing requests that we send in a day. // TODO(gcasto): Serialize this so that it doesn't reset on browser restart. diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc index b87758c..0e7cdbd 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc @@ -90,14 +90,66 @@ class ClientSideDetectionServiceTest : public testing::Test { response_data, success); } - int GetNumReportsPerDay() { - return csd_service_->GetNumReportsPerDay(); + int GetNumReports() { + return csd_service_->GetNumReports(); } std::queue<base::Time>& GetPhishingReportTimes() { return csd_service_->phishing_report_times_; } + void SetCache(const GURL& gurl, bool is_phishing, base::Time time) { + csd_service_->cache_[gurl] = + make_linked_ptr(new ClientSideDetectionService::CacheState(is_phishing, + time)); + } + + void TestCache() { + ClientSideDetectionService::PhishingCache& cache = csd_service_->cache_; + base::Time now = base::Time::Now(); + base::Time time = now - ClientSideDetectionService::kNegativeCacheInterval + + base::TimeDelta::FromMinutes(5); + cache[GURL("http://first.url.com/")] = + make_linked_ptr(new ClientSideDetectionService::CacheState(false, + time)); + + time = now - ClientSideDetectionService::kNegativeCacheInterval - + base::TimeDelta::FromHours(1); + cache[GURL("http://second.url.com/")] = + make_linked_ptr(new ClientSideDetectionService::CacheState(false, + time)); + + time = now - ClientSideDetectionService::kPositiveCacheInterval - + base::TimeDelta::FromMinutes(5); + cache[GURL("http://third.url.com/")] = + make_linked_ptr(new ClientSideDetectionService::CacheState(true, time)); + + time = now - ClientSideDetectionService::kPositiveCacheInterval + + base::TimeDelta::FromMinutes(5); + cache[GURL("http://fourth.url.com/")] = + make_linked_ptr(new ClientSideDetectionService::CacheState(true, time)); + + csd_service_->UpdateCache(); + + // 3 elements should be in the cache, the first, third, and fourth. + EXPECT_EQ(3U, cache.size()); + EXPECT_TRUE(cache.find(GURL("http://first.url.com/")) != cache.end()); + EXPECT_TRUE(cache.find(GURL("http://third.url.com/")) != cache.end()); + EXPECT_TRUE(cache.find(GURL("http://fourth.url.com/")) != cache.end()); + + // While 3 elements remain, only the first and the fourth are actually + // valid. + bool is_phishing; + EXPECT_TRUE(csd_service_->GetCachedResult(GURL("http://first.url.com"), + &is_phishing)); + EXPECT_FALSE(is_phishing); + EXPECT_FALSE(csd_service_->GetCachedResult(GURL("http://third.url.com"), + &is_phishing)); + EXPECT_TRUE(csd_service_->GetCachedResult(GURL("http://fourth.url.com"), + &is_phishing)); + EXPECT_TRUE(is_phishing); + } + protected: scoped_ptr<ClientSideDetectionService> csd_service_; scoped_ptr<FakeURLFetcherFactory> factory_; @@ -192,16 +244,57 @@ TEST_F(ClientSideDetectionServiceTest, SendClientReportPhishingRequest) { SetClientReportPhishingResponse(response.SerializeAsString(), true /* success */); EXPECT_TRUE(SendClientReportPhishingRequest(url, score)); + + // Caching causes this to still count as phishy. response.set_phishy(false); SetClientReportPhishingResponse(response.SerializeAsString(), true /* success */); - EXPECT_FALSE(SendClientReportPhishingRequest(url, score)); + EXPECT_TRUE(SendClientReportPhishingRequest(url, score)); + + // This request will fail and should not be cached. + GURL second_url("http://b.com/"); + response.set_phishy(false); + SetClientReportPhishingResponse(response.SerializeAsString(), + false /* success*/); + EXPECT_FALSE(SendClientReportPhishingRequest(second_url, score)); + + // Verify that the previous request was not cached. + response.set_phishy(true); + SetClientReportPhishingResponse(response.SerializeAsString(), + true /* success */); + EXPECT_TRUE(SendClientReportPhishingRequest(second_url, score)); + + // This request is blocked because it's not in the cache and we have more + // than 3 requests. + GURL third_url("http://c.com"); + response.set_phishy(true); + SetClientReportPhishingResponse(response.SerializeAsString(), + true /* success */); + EXPECT_FALSE(SendClientReportPhishingRequest(third_url, score)); + + // Verify that caching still works even when new requests are blocked. + response.set_phishy(true); + SetClientReportPhishingResponse(response.SerializeAsString(), + true /* success */); + EXPECT_TRUE(SendClientReportPhishingRequest(url, score)); + + // Verify that we allow cache refreshing even when requests are blocked. + base::Time cache_time = base::Time::Now() - base::TimeDelta::FromHours(1); + SetCache(second_url, true, cache_time); + + // Even though this element is in the cache, it's not currently valid so + // we make request and return that value instead. + response.set_phishy(false); + SetClientReportPhishingResponse(response.SerializeAsString(), + true /* success */); + EXPECT_FALSE(SendClientReportPhishingRequest(second_url, score)); base::Time after = base::Time::Now(); - // Check that we have recorded 3 requests, all within the correct time range. + // Check that we have recorded 5 requests, all within the correct time range. + // The blocked request and the cached requests should not be present. std::queue<base::Time>& report_times = GetPhishingReportTimes(); - EXPECT_EQ(3U, report_times.size()); + EXPECT_EQ(5U, report_times.size()); while (!report_times.empty()) { base::Time time = report_times.back(); report_times.pop(); @@ -225,7 +318,17 @@ TEST_F(ClientSideDetectionServiceTest, GetNumReportTest) { report_times.push(now); report_times.push(now); - EXPECT_EQ(2, GetNumReportsPerDay()); + EXPECT_EQ(2, GetNumReports()); +} + +TEST_F(ClientSideDetectionServiceTest, CacheTest) { + SetModelFetchResponse("bogus model", true /* success */); + ScopedTempDir tmp_dir; + ASSERT_TRUE(tmp_dir.CreateUniqueTempDir()); + csd_service_.reset(ClientSideDetectionService::Create( + tmp_dir.path().AppendASCII("model"), NULL)); + + TestCache(); } // We use a separate test fixture for testing the ClientSideDetectionService's |