summaryrefslogtreecommitdiffstats
path: root/chrome/browser/safe_browsing
diff options
context:
space:
mode:
authorgcasto@chromium.org <gcasto@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-14 23:36:51 +0000
committergcasto@chromium.org <gcasto@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-14 23:36:51 +0000
commit3f07eb3426053761be33e665a3aba40282b885a1 (patch)
treec931ebbc0cf9d8a87a8cd9f6ab16c3d80fbf900a /chrome/browser/safe_browsing
parent4f792ff35755c0e7d2df93455003373c95a2504f (diff)
downloadchromium_src-3f07eb3426053761be33e665a3aba40282b885a1.zip
chromium_src-3f07eb3426053761be33e665a3aba40282b885a1.tar.gz
chromium_src-3f07eb3426053761be33e665a3aba40282b885a1.tar.bz2
Add caching to phishing client side detection.
BUG=None TEST=Ran client_side_detection_service_unittest Review URL: http://codereview.chromium.org/6374017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@74874 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing')
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service.cc96
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service.h35
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service_unittest.cc115
3 files changed, 224 insertions, 22 deletions
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc
index 9d2bb07..6a0a99e 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service.cc
@@ -32,7 +32,14 @@
namespace safe_browsing {
-const int ClientSideDetectionService::kMaxReportsPerDay = 3;
+const int ClientSideDetectionService::kMaxReportsPerInterval = 3;
+
+const base::TimeDelta ClientSideDetectionService::kReportsInterval =
+ base::TimeDelta::FromDays(1);
+const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval =
+ base::TimeDelta::FromDays(1);
+const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval =
+ base::TimeDelta::FromMinutes(30);
const char ClientSideDetectionService::kClientReportPhishingUrl[] =
"https://sb-ssl.google.com/safebrowsing/clientreport/phishing";
@@ -44,6 +51,10 @@ struct ClientSideDetectionService::ClientReportInfo {
GURL phishing_url;
};
+ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time)
+ : is_phishing(phish),
+ timestamp(time) {}
+
// ShouldClassifyUrlRequest tracks the pre-classification checks for a
// toplevel URL that has started loading into a renderer. When these
// checks are complete, the renderer is notified if it should run
@@ -330,9 +341,26 @@ void ClientSideDetectionService::StartClientReportPhishingRequest(
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
scoped_ptr<ClientReportPhishingRequestCallback> cb(callback);
- if (GetNumReportsPerDay() > kMaxReportsPerDay) {
- LOG(WARNING) << "Too many report phishing requests sent in the last day, "
- << "not checking " << phishing_url;
+ bool is_phishing;
+ if (GetCachedResult(phishing_url, &is_phishing)) {
+ VLOG(1) << "Satisfying request for " << phishing_url << " from cache";
+ UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
+ cb->Run(phishing_url, is_phishing);
+ return;
+ }
+
+ // We limit the number of distinct pings to kMaxReports, but we don't count
+ // urls already in the cache against this number. We don't want to start
+ // classifying too many pages as phishing, but for those that we already
+ // think are phishing we want to give ourselves a chance to fix false
+ // positives.
+ if (cache_.find(phishing_url) != cache_.end()) {
+ VLOG(1) << "Refreshing cache for " << phishing_url;
+ UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1);
+ } else if (GetNumReports() > kMaxReportsPerInterval) {
+ VLOG(1) << "Too many report phishing requests sent in the last "
+ << kReportsInterval.InHours() << " hours, not checking "
+ << phishing_url;
UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1);
cb->Run(phishing_url, false);
return;
@@ -343,9 +371,8 @@ void ClientSideDetectionService::StartClientReportPhishingRequest(
request.set_client_score(static_cast<float>(score));
std::string request_data;
if (!request.SerializeToString(&request_data)) {
- // For consistency, we always call the callback asynchronously, rather than
- // directly from this method.
- LOG(ERROR) << "Unable to serialize the CSD request. Proto file changed?";
+ UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1);
+ VLOG(1) << "Unable to serialize the CSD request. Proto file changed?";
cb->Run(phishing_url, false);
return;
}
@@ -410,8 +437,11 @@ void ClientSideDetectionService::HandlePhishingVerdict(
const std::string& data) {
ClientPhishingResponse response;
scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]);
- if (status.is_success() && RC_REQUEST_OK == response_code &&
+ if (status.is_success() && RC_REQUEST_OK == response_code &&
response.ParseFromString(data)) {
+ // Cache response, possibly flushing an old one.
+ cache_[info->phishing_url] =
+ make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
info->callback->Run(info->phishing_url, response.phishy());
} else {
DLOG(ERROR) << "Unable to get the server verdict for URL: "
@@ -422,11 +452,53 @@ void ClientSideDetectionService::HandlePhishingVerdict(
delete source;
}
-int ClientSideDetectionService::GetNumReportsPerDay() {
- base::Time cutoff = base::Time::Now() - base::TimeDelta::FromDays(1);
+bool ClientSideDetectionService::GetCachedResult(const GURL& url,
+ bool* is_phishing) {
+ UpdateCache();
+
+ PhishingCache::iterator it = cache_.find(url);
+ if (it == cache_.end()) {
+ return false;
+ }
+
+ // We still need to check if the result is valid.
+ const CacheState& cache_state = *it->second;
+ if (cache_state.is_phishing ?
+ cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval :
+ cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) {
+ *is_phishing = cache_state.is_phishing;
+ return true;
+ }
+ return false;
+}
+
+void ClientSideDetectionService::UpdateCache() {
+ // Since we limit the number of requests but allow pass-through for cache
+ // refreshes, we don't want to remove elements from the cache if they
+ // could be used for this purpose even if we will not use the entry to
+ // satisfy the request from the cache.
+ base::TimeDelta positive_cache_interval =
+ std::max(kPositiveCacheInterval, kReportsInterval);
+ base::TimeDelta negative_cache_interval =
+ std::max(kNegativeCacheInterval, kReportsInterval);
+
+ // Remove elements from the cache that will no longer be used.
+ for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) {
+ const CacheState& cache_state = *it->second;
+ if (cache_state.is_phishing ?
+ cache_state.timestamp > base::Time::Now() - positive_cache_interval :
+ cache_state.timestamp > base::Time::Now() - negative_cache_interval) {
+ ++it;
+ } else {
+ cache_.erase(it++);
+ }
+ }
+}
+
+int ClientSideDetectionService::GetNumReports() {
+ base::Time cutoff = base::Time::Now() - kReportsInterval;
- // Erase elements older than a day because we will never care about them
- // again.
+ // Erase items older than cutoff because we will never care about them again.
while (!phishing_report_times_.empty() &&
phishing_report_times_.front() < cutoff) {
phishing_report_times_.pop();
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.h b/chrome/browser/safe_browsing/client_side_detection_service.h
index c299ca6..fcc823d 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.h
+++ b/chrome/browser/safe_browsing/client_side_detection_service.h
@@ -25,6 +25,7 @@
#include "base/callback.h"
#include "base/file_path.h"
#include "base/gtest_prod_util.h"
+#include "base/linked_ptr.h"
#include "base/platform_file.h"
#include "base/ref_counted.h"
#include "base/scoped_callback_factory.h"
@@ -109,9 +110,22 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
ERROR_STATUS,
};
+ // CacheState holds all information necessary to respond to a caller without
+ // actually making a HTTP request.
+ struct CacheState {
+ bool is_phishing;
+ base::Time timestamp;
+
+ CacheState(bool phish, base::Time time);
+ };
+ typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache;
+
static const char kClientReportPhishingUrl[];
static const char kClientModelUrl[];
- static const int kMaxReportsPerDay;
+ static const int kMaxReportsPerInterval;
+ static const base::TimeDelta kReportsInterval;
+ static const base::TimeDelta kNegativeCacheInterval;
+ static const base::TimeDelta kPositiveCacheInterval;
// Use Create() method to create an instance of this object.
ClientSideDetectionService(const FilePath& model_path,
@@ -175,9 +189,14 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
const ResponseCookies& cookies,
const std::string& data);
- // Get the number of phishing reports that we have sent over the last 24
- // hours.
- int GetNumReportsPerDay();
+ // Returns true and sets is_phishing if url is in the cache and valid.
+ bool GetCachedResult(const GURL& url, bool* is_phishing);
+
+ // Invalidate cache results which are no longer useful.
+ void UpdateCache();
+
+ // Get the number of phishing reports that we have sent over kReportsInterval
+ int GetNumReports();
FilePath model_path_;
ModelStatus model_status_;
@@ -191,6 +210,14 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
struct ClientReportInfo;
std::map<const URLFetcher*, ClientReportInfo*> client_phishing_reports_;
+ // Cache of completed requests. Used to satisfy requests for the same urls
+ // as long as the next request falls within our caching window (which is
+ // determined by kNegativeCacheInterval and kPositiveCacheInterval). The
+ // size of this cache is limited by kMaxReportsPerDay *
+ // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))).
+ // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
+ PhishingCache cache_;
+
// Timestamp of when we sent a phishing request. Used to limit the number
// of phishing requests that we send in a day.
// TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
index b87758c..0e7cdbd 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
@@ -90,14 +90,66 @@ class ClientSideDetectionServiceTest : public testing::Test {
response_data, success);
}
- int GetNumReportsPerDay() {
- return csd_service_->GetNumReportsPerDay();
+ int GetNumReports() {
+ return csd_service_->GetNumReports();
}
std::queue<base::Time>& GetPhishingReportTimes() {
return csd_service_->phishing_report_times_;
}
+ void SetCache(const GURL& gurl, bool is_phishing, base::Time time) {
+ csd_service_->cache_[gurl] =
+ make_linked_ptr(new ClientSideDetectionService::CacheState(is_phishing,
+ time));
+ }
+
+ void TestCache() {
+ ClientSideDetectionService::PhishingCache& cache = csd_service_->cache_;
+ base::Time now = base::Time::Now();
+ base::Time time = now - ClientSideDetectionService::kNegativeCacheInterval +
+ base::TimeDelta::FromMinutes(5);
+ cache[GURL("http://first.url.com/")] =
+ make_linked_ptr(new ClientSideDetectionService::CacheState(false,
+ time));
+
+ time = now - ClientSideDetectionService::kNegativeCacheInterval -
+ base::TimeDelta::FromHours(1);
+ cache[GURL("http://second.url.com/")] =
+ make_linked_ptr(new ClientSideDetectionService::CacheState(false,
+ time));
+
+ time = now - ClientSideDetectionService::kPositiveCacheInterval -
+ base::TimeDelta::FromMinutes(5);
+ cache[GURL("http://third.url.com/")] =
+ make_linked_ptr(new ClientSideDetectionService::CacheState(true, time));
+
+ time = now - ClientSideDetectionService::kPositiveCacheInterval +
+ base::TimeDelta::FromMinutes(5);
+ cache[GURL("http://fourth.url.com/")] =
+ make_linked_ptr(new ClientSideDetectionService::CacheState(true, time));
+
+ csd_service_->UpdateCache();
+
+ // 3 elements should be in the cache, the first, third, and fourth.
+ EXPECT_EQ(3U, cache.size());
+ EXPECT_TRUE(cache.find(GURL("http://first.url.com/")) != cache.end());
+ EXPECT_TRUE(cache.find(GURL("http://third.url.com/")) != cache.end());
+ EXPECT_TRUE(cache.find(GURL("http://fourth.url.com/")) != cache.end());
+
+ // While 3 elements remain, only the first and the fourth are actually
+ // valid.
+ bool is_phishing;
+ EXPECT_TRUE(csd_service_->GetCachedResult(GURL("http://first.url.com"),
+ &is_phishing));
+ EXPECT_FALSE(is_phishing);
+ EXPECT_FALSE(csd_service_->GetCachedResult(GURL("http://third.url.com"),
+ &is_phishing));
+ EXPECT_TRUE(csd_service_->GetCachedResult(GURL("http://fourth.url.com"),
+ &is_phishing));
+ EXPECT_TRUE(is_phishing);
+ }
+
protected:
scoped_ptr<ClientSideDetectionService> csd_service_;
scoped_ptr<FakeURLFetcherFactory> factory_;
@@ -192,16 +244,57 @@ TEST_F(ClientSideDetectionServiceTest, SendClientReportPhishingRequest) {
SetClientReportPhishingResponse(response.SerializeAsString(),
true /* success */);
EXPECT_TRUE(SendClientReportPhishingRequest(url, score));
+
+ // Caching causes this to still count as phishy.
response.set_phishy(false);
SetClientReportPhishingResponse(response.SerializeAsString(),
true /* success */);
- EXPECT_FALSE(SendClientReportPhishingRequest(url, score));
+ EXPECT_TRUE(SendClientReportPhishingRequest(url, score));
+
+ // This request will fail and should not be cached.
+ GURL second_url("http://b.com/");
+ response.set_phishy(false);
+ SetClientReportPhishingResponse(response.SerializeAsString(),
+ false /* success*/);
+ EXPECT_FALSE(SendClientReportPhishingRequest(second_url, score));
+
+ // Verify that the previous request was not cached.
+ response.set_phishy(true);
+ SetClientReportPhishingResponse(response.SerializeAsString(),
+ true /* success */);
+ EXPECT_TRUE(SendClientReportPhishingRequest(second_url, score));
+
+ // This request is blocked because it's not in the cache and we have more
+ // than 3 requests.
+ GURL third_url("http://c.com");
+ response.set_phishy(true);
+ SetClientReportPhishingResponse(response.SerializeAsString(),
+ true /* success */);
+ EXPECT_FALSE(SendClientReportPhishingRequest(third_url, score));
+
+ // Verify that caching still works even when new requests are blocked.
+ response.set_phishy(true);
+ SetClientReportPhishingResponse(response.SerializeAsString(),
+ true /* success */);
+ EXPECT_TRUE(SendClientReportPhishingRequest(url, score));
+
+ // Verify that we allow cache refreshing even when requests are blocked.
+ base::Time cache_time = base::Time::Now() - base::TimeDelta::FromHours(1);
+ SetCache(second_url, true, cache_time);
+
+ // Even though this element is in the cache, it's not currently valid so
+ // we make request and return that value instead.
+ response.set_phishy(false);
+ SetClientReportPhishingResponse(response.SerializeAsString(),
+ true /* success */);
+ EXPECT_FALSE(SendClientReportPhishingRequest(second_url, score));
base::Time after = base::Time::Now();
- // Check that we have recorded 3 requests, all within the correct time range.
+ // Check that we have recorded 5 requests, all within the correct time range.
+ // The blocked request and the cached requests should not be present.
std::queue<base::Time>& report_times = GetPhishingReportTimes();
- EXPECT_EQ(3U, report_times.size());
+ EXPECT_EQ(5U, report_times.size());
while (!report_times.empty()) {
base::Time time = report_times.back();
report_times.pop();
@@ -225,7 +318,17 @@ TEST_F(ClientSideDetectionServiceTest, GetNumReportTest) {
report_times.push(now);
report_times.push(now);
- EXPECT_EQ(2, GetNumReportsPerDay());
+ EXPECT_EQ(2, GetNumReports());
+}
+
+TEST_F(ClientSideDetectionServiceTest, CacheTest) {
+ SetModelFetchResponse("bogus model", true /* success */);
+ ScopedTempDir tmp_dir;
+ ASSERT_TRUE(tmp_dir.CreateUniqueTempDir());
+ csd_service_.reset(ClientSideDetectionService::Create(
+ tmp_dir.path().AppendASCII("model"), NULL));
+
+ TestCache();
}
// We use a separate test fixture for testing the ClientSideDetectionService's