6 files changed, 132 insertions, 17 deletions
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc
index 9e800ce..2ae761b 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service.cc
@@ -413,7 +413,8 @@ void ClientSideDetectionService::HandlePhishingVerdict(
     // Cache response, possibly flushing an old one.
     cache_[info->phishing_url] =
         make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
-    is_phishing = response.phishy();
+    is_phishing = (response.phishy() &&
+                   !IsFalsePositiveResponse(info->phishing_url, response));
   } else {
     DLOG(ERROR) << "Unable to get the server verdict for URL: "
                 << info->phishing_url << " status: " << status.status() << " "
@@ -607,4 +608,50 @@ bool ClientSideDetectionService::ModelHasValidHashIds(
   }
   return true;
 }
+
+// static
+bool ClientSideDetectionService::IsFalsePositiveResponse(
+    const GURL& url,
+    const ClientPhishingResponse& response) {
+  if (!response.phishy() || response.whitelist_expression_size() == 0) {
+    return false;
+  }
+  // This whitelist is special.  A particular URL gets whitelisted if it
+  // matches any of the expressions on the whitelist or if any of the whitelist
+  // entries matches the URL.
+
+  std::string host, path, query;
+  safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
+  std::string canonical_url_as_pattern = host + path + query;
+
+  std::vector<std::string> url_patterns;
+  safe_browsing_util::GeneratePatternsToCheck(url, &url_patterns);
+
+  for (int i = 0; i < response.whitelist_expression_size(); ++i) {
+    GURL whitelisted_url(std::string("http://") +
+                         response.whitelist_expression(i));
+    if (!whitelisted_url.is_valid()) {
+      UMA_HISTOGRAM_COUNTS("SBClientPhishing.InvalidWhitelistExpression", 1);
+      continue;  // Skip invalid whitelist expressions.
+    }
+    // First, we check whether the canonical URL matches any of the whitelisted
+    // expressions.
+    for (size_t j = 0; j < url_patterns.size(); ++j) {
+      if (url_patterns[j] == response.whitelist_expression(i)) {
+        return true;
+      }
+    }
+    // Second, we consider the canonical URL as an expression and we check
+    // whether any of the whitelist entries matches that expression.
+    std::vector<std::string> whitelist_patterns;
+    safe_browsing_util::GeneratePatternsToCheck(whitelisted_url,
+                                                &whitelist_patterns);
+    for (size_t j = 0; j < whitelist_patterns.size(); ++j) {
+      if (whitelist_patterns[j] == canonical_url_as_pattern) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
 }  // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.h b/chrome/browser/safe_browsing/client_side_detection_service.h
index 002ced1..7afe1cd 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.h
+++ b/chrome/browser/safe_browsing/client_side_detection_service.h
@@ -50,6 +50,7 @@ class URLRequestStatus;
 
 namespace safe_browsing {
 class ClientPhishingRequest;
+class ClientPhishingResponse;
 class ClientSideModel;
 
 class ClientSideDetectionService : public URLFetcher::Delegate,
@@ -167,6 +168,8 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, SetEnabled);
   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress);
   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
+                           IsFalsePositiveResponse);
+  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                            ModelHasValidHashIds);
   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                            SanitizeRequestForPingback);
@@ -263,6 +266,12 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
   // valid hashes in the model.
   static bool ModelHasValidHashIds(const ClientSideModel& model);
 
+  // Returns true iff the response is phishing (phishy() is true) and if the
+  // given URL matches one of the whitelisted expressions in the given
+  // ClientPhishingResponse.
+  static bool IsFalsePositiveResponse(const GURL& url,
+                                      const ClientPhishingResponse& response);
+
   // Whether the service is running or not.  When the service is not running,
   // it won't download the model nor report detected phishing URLs.
   bool enabled_;
diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
index 324f170..55c0f0e 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
@@ -350,14 +350,22 @@ TEST_F(ClientSideDetectionServiceTest, SendClientReportPhishingRequest) {
   GURL second_url("http://b.com/");
   response.set_phishy(false);
   SetClientReportPhishingResponse(response.SerializeAsString(),
-                                  false /* success*/);
+                                  false /* success */);
   EXPECT_FALSE(SendClientReportPhishingRequest(second_url, score));
 
+  // This is a false positive.
+  response.set_phishy(true);
+  response.add_whitelist_expression("c.com/a.html");
+  SetClientReportPhishingResponse(response.SerializeAsString(),
+                                  true /* success */);
+  GURL third_url("http://c.com/");
+  EXPECT_FALSE(SendClientReportPhishingRequest(third_url, score));
+
   base::Time after = base::Time::Now();
 
-  // Check that we have recorded all 3 requests within the correct time range.
+  // Check that we have recorded all 4 requests within the correct time range.
   std::queue<base::Time>& report_times = GetPhishingReportTimes();
-  EXPECT_EQ(3U, report_times.size());
+  EXPECT_EQ(4U, report_times.size());
   while (!report_times.empty()) {
     base::Time time = report_times.back();
     report_times.pop();
@@ -749,4 +757,36 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
             sanitized_request.SerializeAsString());
 }
 
+TEST_F(ClientSideDetectionServiceTest, IsFalsePositiveResponse) {
+  GURL url("http://www.google.com/");
+  ClientPhishingResponse response;
+
+  // If the response is not phishing is should never be a false positive.
+  response.set_phishy(false);
+  response.add_whitelist_expression("www.google.com/");
+  EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse(
+      url, response));
+
+  // If there are no entries in the whitelist it should always return false.
+  response.clear_whitelist_expression();
+  response.set_phishy(true);
+  EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse(
+      url, response));
+
+  // If the URL doesn't match any whitelist entries it whould return false.
+  response.add_whitelist_expression("www.yahoo.com/");
+  EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse(
+      url, response));
+
+  // If the URL matches the whitelist it should return true.
+  response.add_whitelist_expression("google.com/");
+  EXPECT_TRUE(ClientSideDetectionService::IsFalsePositiveResponse(
+      url, response));
+
+  // If an entry in the whitelist matches the URL it should return true.
+  response.clear_whitelist_expression();
+  response.add_whitelist_expression("www.google.com/a/b.html");
+  EXPECT_TRUE(ClientSideDetectionService::IsFalsePositiveResponse(
+      url, response));
+}
 }  // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/safe_browsing_util.cc b/chrome/browser/safe_browsing/safe_browsing_util.cc
index 9fe10ed..bb13a00 100644
--- a/chrome/browser/safe_browsing/safe_browsing_util.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_util.cc
@@ -417,6 +417,17 @@ void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths) {
     paths->push_back(path + "?" + query);
 }
 
+void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls) {
+  std::vector<std::string> hosts, paths;
+  GenerateHostsToCheck(url, &hosts);
+  GeneratePathsToCheck(url, &paths);
+  for (size_t h = 0; h < hosts.size(); ++h) {
+    for (size_t p = 0; p < paths.size(); ++p) {
+      urls->push_back(hosts[h] + paths[p]);
+    }
+  }
+}
+
 int GetHashIndex(const SBFullHash& hash,
                  const std::vector<SBFullHashResult>& full_hashes) {
   for (size_t i = 0; i < full_hashes.size(); ++i) {
@@ -431,21 +442,16 @@ int GetUrlHashIndex(const GURL& url,
   if (full_hashes.empty())
     return -1;
 
-  std::vector<std::string> hosts, paths;
-  GenerateHostsToCheck(url, &hosts);
-  GeneratePathsToCheck(url, &paths);
+  std::vector<std::string> patterns;
+  GeneratePatternsToCheck(url, &patterns);
 
-  for (size_t h = 0; h < hosts.size(); ++h) {
-    for (size_t p = 0; p < paths.size(); ++p) {
-      SBFullHash key;
-      crypto::SHA256HashString(hosts[h] + paths[p],
-                               key.full_hash,
-                               sizeof(SBFullHash));
-      int index = GetHashIndex(key, full_hashes);
-      if (index != -1) return index;
-    }
+  for (size_t i = 0; i < patterns.size(); ++i) {
+    SBFullHash key;
+    crypto::SHA256HashString(patterns[i], key.full_hash, sizeof(SBFullHash));
+    int index = GetHashIndex(key, full_hashes);
+    if (index != -1)
+      return index;
   }
-
   return -1;
 }
 
diff --git a/chrome/browser/safe_browsing/safe_browsing_util.h b/chrome/browser/safe_browsing/safe_browsing_util.h
index bec4dc8..2911f46 100644
--- a/chrome/browser/safe_browsing/safe_browsing_util.h
+++ b/chrome/browser/safe_browsing/safe_browsing_util.h
@@ -296,6 +296,9 @@ void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
 // Given a URL, returns all the paths we need to check.
 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
 
+// Given a URL, returns all the patterns we need to check.
+void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
+
 int GetHashIndex(const SBFullHash& hash,
                  const std::vector<SBFullHashResult>& full_hashes);
 
diff --git a/chrome/common/safe_browsing/csd.proto b/chrome/common/safe_browsing/csd.proto
index 8496eb7..35e4302 100644
--- a/chrome/common/safe_browsing/csd.proto
+++ b/chrome/common/safe_browsing/csd.proto
@@ -68,4 +68,14 @@ message ClientPhishingRequest {
 
 message ClientPhishingResponse {
   required bool phishy = 1;
+
+  // A list of SafeBrowsing host-suffix / path-prefix expressions that
+  // are whitelisted.  The client must match the current top-level URL
+  // against these whitelisted expressions and only apply a positive
+  // phishing verdict above if the URL does not match any expression
+  // on this whitelist.  The client must not cache these whitelisted
+  // expressions.  This whitelist will be empty for the vast majority
+  // of the responses but might contain up to 100 entries in emergency
+  // situations.
+  repeated string whitelist_expression = 2;
 }