summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service.cc49
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service.h9
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service_unittest.cc46
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_util.cc32
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_util.h3
-rw-r--r--chrome/common/safe_browsing/csd.proto10
6 files changed, 132 insertions, 17 deletions
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc
index 9e800ce..2ae761b 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service.cc
@@ -413,7 +413,8 @@ void ClientSideDetectionService::HandlePhishingVerdict(
// Cache response, possibly flushing an old one.
cache_[info->phishing_url] =
make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
- is_phishing = response.phishy();
+ is_phishing = (response.phishy() &&
+ !IsFalsePositiveResponse(info->phishing_url, response));
} else {
DLOG(ERROR) << "Unable to get the server verdict for URL: "
<< info->phishing_url << " status: " << status.status() << " "
@@ -607,4 +608,50 @@ bool ClientSideDetectionService::ModelHasValidHashIds(
}
return true;
}
+
+// static
+bool ClientSideDetectionService::IsFalsePositiveResponse(
+ const GURL& url,
+ const ClientPhishingResponse& response) {
+ if (!response.phishy() || response.whitelist_expression_size() == 0) {
+ return false;
+ }
+ // This whitelist is special. A particular URL gets whitelisted if it
+ // matches any of the expressions on the whitelist or if any of the whitelist
+ // entries matches the URL.
+
+ std::string host, path, query;
+ safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
+ std::string canonical_url_as_pattern = host + path + query;
+
+ std::vector<std::string> url_patterns;
+ safe_browsing_util::GeneratePatternsToCheck(url, &url_patterns);
+
+ for (int i = 0; i < response.whitelist_expression_size(); ++i) {
+ GURL whitelisted_url(std::string("http://") +
+ response.whitelist_expression(i));
+ if (!whitelisted_url.is_valid()) {
+ UMA_HISTOGRAM_COUNTS("SBClientPhishing.InvalidWhitelistExpression", 1);
+ continue; // Skip invalid whitelist expressions.
+ }
+ // First, we check whether the canonical URL matches any of the whitelisted
+ // expressions.
+ for (size_t j = 0; j < url_patterns.size(); ++j) {
+ if (url_patterns[j] == response.whitelist_expression(i)) {
+ return true;
+ }
+ }
+ // Second, we consider the canonical URL as an expression and we check
+ // whether any of the whitelist entries matches that expression.
+ std::vector<std::string> whitelist_patterns;
+ safe_browsing_util::GeneratePatternsToCheck(whitelisted_url,
+ &whitelist_patterns);
+ for (size_t j = 0; j < whitelist_patterns.size(); ++j) {
+ if (whitelist_patterns[j] == canonical_url_as_pattern) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
} // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.h b/chrome/browser/safe_browsing/client_side_detection_service.h
index 002ced1..7afe1cd 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.h
+++ b/chrome/browser/safe_browsing/client_side_detection_service.h
@@ -50,6 +50,7 @@ class URLRequestStatus;
namespace safe_browsing {
class ClientPhishingRequest;
+class ClientPhishingResponse;
class ClientSideModel;
class ClientSideDetectionService : public URLFetcher::Delegate,
@@ -167,6 +168,8 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, SetEnabled);
FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress);
FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
+ IsFalsePositiveResponse);
+ FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
ModelHasValidHashIds);
FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
SanitizeRequestForPingback);
@@ -263,6 +266,12 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
// valid hashes in the model.
static bool ModelHasValidHashIds(const ClientSideModel& model);
+ // Returns true iff the response is phishing (phishy() is true) and if the
+ // given URL matches one of the whitelisted expressions in the given
+ // ClientPhishingResponse.
+ static bool IsFalsePositiveResponse(const GURL& url,
+ const ClientPhishingResponse& response);
+
// Whether the service is running or not. When the service is not running,
// it won't download the model nor report detected phishing URLs.
bool enabled_;
diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
index 324f170..55c0f0e 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
@@ -350,14 +350,22 @@ TEST_F(ClientSideDetectionServiceTest, SendClientReportPhishingRequest) {
GURL second_url("http://b.com/");
response.set_phishy(false);
SetClientReportPhishingResponse(response.SerializeAsString(),
- false /* success*/);
+ false /* success */);
EXPECT_FALSE(SendClientReportPhishingRequest(second_url, score));
+ // This is a false positive.
+ response.set_phishy(true);
+ response.add_whitelist_expression("c.com/a.html");
+ SetClientReportPhishingResponse(response.SerializeAsString(),
+ true /* success */);
+ GURL third_url("http://c.com/");
+ EXPECT_FALSE(SendClientReportPhishingRequest(third_url, score));
+
base::Time after = base::Time::Now();
- // Check that we have recorded all 3 requests within the correct time range.
+ // Check that we have recorded all 4 requests within the correct time range.
std::queue<base::Time>& report_times = GetPhishingReportTimes();
- EXPECT_EQ(3U, report_times.size());
+ EXPECT_EQ(4U, report_times.size());
while (!report_times.empty()) {
base::Time time = report_times.back();
report_times.pop();
@@ -749,4 +757,36 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
sanitized_request.SerializeAsString());
}
+TEST_F(ClientSideDetectionServiceTest, IsFalsePositiveResponse) {
+ GURL url("http://www.google.com/");
+ ClientPhishingResponse response;
+
+ // If the response is not phishing is should never be a false positive.
+ response.set_phishy(false);
+ response.add_whitelist_expression("www.google.com/");
+ EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse(
+ url, response));
+
+ // If there are no entries in the whitelist it should always return false.
+ response.clear_whitelist_expression();
+ response.set_phishy(true);
+ EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse(
+ url, response));
+
+ // If the URL doesn't match any whitelist entries it whould return false.
+ response.add_whitelist_expression("www.yahoo.com/");
+ EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse(
+ url, response));
+
+ // If the URL matches the whitelist it should return true.
+ response.add_whitelist_expression("google.com/");
+ EXPECT_TRUE(ClientSideDetectionService::IsFalsePositiveResponse(
+ url, response));
+
+ // If an entry in the whitelist matches the URL it should return true.
+ response.clear_whitelist_expression();
+ response.add_whitelist_expression("www.google.com/a/b.html");
+ EXPECT_TRUE(ClientSideDetectionService::IsFalsePositiveResponse(
+ url, response));
+}
} // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/safe_browsing_util.cc b/chrome/browser/safe_browsing/safe_browsing_util.cc
index 9fe10ed..bb13a00 100644
--- a/chrome/browser/safe_browsing/safe_browsing_util.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_util.cc
@@ -417,6 +417,17 @@ void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths) {
paths->push_back(path + "?" + query);
}
+void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls) {
+ std::vector<std::string> hosts, paths;
+ GenerateHostsToCheck(url, &hosts);
+ GeneratePathsToCheck(url, &paths);
+ for (size_t h = 0; h < hosts.size(); ++h) {
+ for (size_t p = 0; p < paths.size(); ++p) {
+ urls->push_back(hosts[h] + paths[p]);
+ }
+ }
+}
+
int GetHashIndex(const SBFullHash& hash,
const std::vector<SBFullHashResult>& full_hashes) {
for (size_t i = 0; i < full_hashes.size(); ++i) {
@@ -431,21 +442,16 @@ int GetUrlHashIndex(const GURL& url,
if (full_hashes.empty())
return -1;
- std::vector<std::string> hosts, paths;
- GenerateHostsToCheck(url, &hosts);
- GeneratePathsToCheck(url, &paths);
+ std::vector<std::string> patterns;
+ GeneratePatternsToCheck(url, &patterns);
- for (size_t h = 0; h < hosts.size(); ++h) {
- for (size_t p = 0; p < paths.size(); ++p) {
- SBFullHash key;
- crypto::SHA256HashString(hosts[h] + paths[p],
- key.full_hash,
- sizeof(SBFullHash));
- int index = GetHashIndex(key, full_hashes);
- if (index != -1) return index;
- }
+ for (size_t i = 0; i < patterns.size(); ++i) {
+ SBFullHash key;
+ crypto::SHA256HashString(patterns[i], key.full_hash, sizeof(SBFullHash));
+ int index = GetHashIndex(key, full_hashes);
+ if (index != -1)
+ return index;
}
-
return -1;
}
diff --git a/chrome/browser/safe_browsing/safe_browsing_util.h b/chrome/browser/safe_browsing/safe_browsing_util.h
index bec4dc8..2911f46 100644
--- a/chrome/browser/safe_browsing/safe_browsing_util.h
+++ b/chrome/browser/safe_browsing/safe_browsing_util.h
@@ -296,6 +296,9 @@ void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
// Given a URL, returns all the paths we need to check.
void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
+// Given a URL, returns all the patterns we need to check.
+void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
+
int GetHashIndex(const SBFullHash& hash,
const std::vector<SBFullHashResult>& full_hashes);
diff --git a/chrome/common/safe_browsing/csd.proto b/chrome/common/safe_browsing/csd.proto
index 8496eb7..35e4302 100644
--- a/chrome/common/safe_browsing/csd.proto
+++ b/chrome/common/safe_browsing/csd.proto
@@ -68,4 +68,14 @@ message ClientPhishingRequest {
message ClientPhishingResponse {
required bool phishy = 1;
+
+ // A list of SafeBrowsing host-suffix / path-prefix expressions that
+ // are whitelisted. The client must match the current top-level URL
+ // against these whitelisted expressions and only apply a positive
+ // phishing verdict above if the URL does not match any expression
+ // on this whitelist. The client must not cache these whitelisted
+ // expressions. This whitelist will be empty for the vast majority
+ // of the responses but might contain up to 100 entries in emergency
+ // situations.
+ repeated string whitelist_expression = 2;
}