Remove the check of the whitelist entries in ClientPhishingResponse.

The URL is always included in the pingback now, so this whitelist checking can happen on the server. Also, mark this and the ClientPhishingRequest.hash_prefix field as OBSOLETE in the protocol buffer file. BUG=none TEST=updated ClientSideDetectionServiceTest Review URL: http://codereview.chromium.org/8854004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@113561 0039d316-1c4b-4281-b951-d872f2087c98
author: bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-12-08 04:29:25 +0000
committer: bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-12-08 04:29:25 +0000
commit: f508e4e37229721672f9f567b856986859ff323c (patch)
tree: c66f307681bd976ff3474a540649cc05476ed06d
parent: b1c8b494a5a11f64d452d9ace5c98bd599bb1a38 (diff)
download: chromium_src-f508e4e37229721672f9f567b856986859ff323c.zip
chromium_src-f508e4e37229721672f9f567b856986859ff323c.tar.gz
chromium_src-f508e4e37229721672f9f567b856986859ff323c.tar.bz2
4 files changed, 11 insertions, 103 deletions
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc
index 341395b..05d6dd81 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service.cc
@@ -18,7 +18,6 @@
 #include "chrome/browser/browser_process.h"
 #include "chrome/browser/prefs/pref_service.h"
 #include "chrome/browser/profiles/profile.h"
-#include "chrome/browser/safe_browsing/safe_browsing_util.h"
 #include "chrome/common/net/http_return.h"
 #include "chrome/common/pref_names.h"
 #include "chrome/common/safe_browsing/client_model.pb.h"
@@ -383,8 +382,7 @@ void ClientSideDetectionService::HandlePhishingVerdict(
     // Cache response, possibly flushing an old one.
     cache_[info->phishing_url] =
         make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
-    is_phishing = (response.phishy() &&
-                   !IsFalsePositiveResponse(info->phishing_url, response));
+    is_phishing = response.phishy();
   } else {
     DLOG(ERROR) << "Unable to get the server verdict for URL: "
                 << info->phishing_url << " status: " << status.status() << " "
@@ -534,50 +532,4 @@ bool ClientSideDetectionService::ModelHasValidHashIds(
   }
   return true;
 }
-
-// static
-bool ClientSideDetectionService::IsFalsePositiveResponse(
-    const GURL& url,
-    const ClientPhishingResponse& response) {
-  if (!response.phishy() || response.whitelist_expression_size() == 0) {
-    return false;
-  }
-  // This whitelist is special.  A particular URL gets whitelisted if it
-  // matches any of the expressions on the whitelist or if any of the whitelist
-  // entries matches the URL.
-
-  std::string host, path, query;
-  safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
-  std::string canonical_url_as_pattern = host + path + query;
-
-  std::vector<std::string> url_patterns;
-  safe_browsing_util::GeneratePatternsToCheck(url, &url_patterns);
-
-  for (int i = 0; i < response.whitelist_expression_size(); ++i) {
-    GURL whitelisted_url(std::string("http://") +
-                         response.whitelist_expression(i));
-    if (!whitelisted_url.is_valid()) {
-      UMA_HISTOGRAM_COUNTS("SBClientPhishing.InvalidWhitelistExpression", 1);
-      continue;  // Skip invalid whitelist expressions.
-    }
-    // First, we check whether the canonical URL matches any of the whitelisted
-    // expressions.
-    for (size_t j = 0; j < url_patterns.size(); ++j) {
-      if (url_patterns[j] == response.whitelist_expression(i)) {
-        return true;
-      }
-    }
-    // Second, we consider the canonical URL as an expression and we check
-    // whether any of the whitelist entries matches that expression.
-    std::vector<std::string> whitelist_patterns;
-    safe_browsing_util::GeneratePatternsToCheck(whitelisted_url,
-                                                &whitelist_patterns);
-    for (size_t j = 0; j < whitelist_patterns.size(); ++j) {
-      if (whitelist_patterns[j] == canonical_url_as_pattern) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
 }  // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.h b/chrome/browser/safe_browsing/client_side_detection_service.h
index 380aa72..e04456d 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.h
+++ b/chrome/browser/safe_browsing/client_side_detection_service.h
@@ -169,8 +169,6 @@ class ClientSideDetectionService : public content::URLFetcherDelegate,
                            SetEnabledAndRefreshState);
   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress);
   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
-                           IsFalsePositiveResponse);
-  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                            ModelHasValidHashIds);
 
   // CacheState holds all information necessary to respond to a caller without
@@ -253,12 +251,6 @@ class ClientSideDetectionService : public content::URLFetcherDelegate,
   // valid hashes in the model.
   static bool ModelHasValidHashIds(const ClientSideModel& model);
 
-  // Returns true iff the response is phishing (phishy() is true) and if the
-  // given URL matches one of the whitelisted expressions in the given
-  // ClientPhishingResponse.
-  static bool IsFalsePositiveResponse(const GURL& url,
-                                      const ClientPhishingResponse& response);
-
   // Whether the service is running or not.  When the service is not running,
   // it won't download the model nor report detected phishing URLs.
   bool enabled_;
diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
index 2aa7596..56dc133 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
@@ -354,19 +354,11 @@ TEST_F(ClientSideDetectionServiceTest, SendClientReportPhishingRequest) {
                                   false /* success */);
   EXPECT_FALSE(SendClientReportPhishingRequest(second_url, score));
 
-  // This is a false positive.
-  response.set_phishy(true);
-  response.add_whitelist_expression("c.com/a.html");
-  SetClientReportPhishingResponse(response.SerializeAsString(),
-                                  true /* success */);
-  GURL third_url("http://c.com/");
-  EXPECT_FALSE(SendClientReportPhishingRequest(third_url, score));
-
   base::Time after = base::Time::Now();
 
-  // Check that we have recorded all 4 requests within the correct time range.
+  // Check that we have recorded all 3 requests within the correct time range.
   std::queue<base::Time>& report_times = GetPhishingReportTimes();
-  EXPECT_EQ(4U, report_times.size());
+  EXPECT_EQ(3U, report_times.size());
   while (!report_times.empty()) {
     base::Time time = report_times.back();
     report_times.pop();
@@ -663,37 +655,4 @@ TEST_F(ClientSideDetectionServiceTest, SetEnabledAndRefreshState) {
   EXPECT_FALSE(SendClientReportPhishingRequest(GURL("http://a.com/"), 0.4f));
   Mock::VerifyAndClearExpectations(service);
 }
-
-TEST_F(ClientSideDetectionServiceTest, IsFalsePositiveResponse) {
-  GURL url("http://www.google.com/");
-  ClientPhishingResponse response;
-
-  // If the response is not phishing is should never be a false positive.
-  response.set_phishy(false);
-  response.add_whitelist_expression("www.google.com/");
-  EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse(
-      url, response));
-
-  // If there are no entries in the whitelist it should always return false.
-  response.clear_whitelist_expression();
-  response.set_phishy(true);
-  EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse(
-      url, response));
-
-  // If the URL doesn't match any whitelist entries it whould return false.
-  response.add_whitelist_expression("www.yahoo.com/");
-  EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse(
-      url, response));
-
-  // If the URL matches the whitelist it should return true.
-  response.add_whitelist_expression("google.com/");
-  EXPECT_TRUE(ClientSideDetectionService::IsFalsePositiveResponse(
-      url, response));
-
-  // If an entry in the whitelist matches the URL it should return true.
-  response.clear_whitelist_expression();
-  response.add_whitelist_expression("www.google.com/a/b.html");
-  EXPECT_TRUE(ClientSideDetectionService::IsFalsePositiveResponse(
-      url, response));
-}
 }  // namespace safe_browsing
diff --git a/chrome/common/safe_browsing/csd.proto b/chrome/common/safe_browsing/csd.proto
index 8a55839..5fc9915 100644
--- a/chrome/common/safe_browsing/csd.proto
+++ b/chrome/common/safe_browsing/csd.proto
@@ -23,8 +23,10 @@ message ClientPhishingRequest {
   // A 5-byte SHA-256 hash prefix of the URL.  Before hashing the URL is
   // canonicalized, converted to a suffix-prefix expression and broadened
   // (www prefix is removed and everything past the last '/' is stripped).
-  // Unlike "url", this is sent for all users.
-  optional bytes hash_prefix = 10;
+  //
+  // Marked OBSOLETE because the URL is sent for all users, making the hash
+  // prefix unnecessary.
+  optional bytes OBSOLETE_hash_prefix = 10;
 
   // Score that was computed on the client.  Value is between 0.0 and 1.0.
   // The larger the value the more likely the url is phishing.
@@ -77,7 +79,10 @@ message ClientPhishingResponse {
   // expressions.  This whitelist will be empty for the vast majority
   // of the responses but might contain up to 100 entries in emergency
   // situations.
-  repeated string whitelist_expression = 2;
+  //
+  // Marked OBSOLETE because the URL is sent for all users, so the server
+  // can do whitelist matching.
+  repeated string OBSOLETE_whitelist_expression = 2;
 }
 
 message ClientDownloadRequest {
author	bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-12-08 04:29:25 +0000
committer	bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-12-08 04:29:25 +0000
commit	f508e4e37229721672f9f567b856986859ff323c (patch)
tree	c66f307681bd976ff3474a540649cc05476ed06d
parent	b1c8b494a5a11f64d452d9ace5c98bd599bb1a38 (diff)
download	chromium_src-f508e4e37229721672f9f567b856986859ff323c.zip chromium_src-f508e4e37229721672f9f567b856986859ff323c.tar.gz chromium_src-f508e4e37229721672f9f567b856986859ff323c.tar.bz2