diff options
6 files changed, 132 insertions, 17 deletions
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc index 9e800ce..2ae761b 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service.cc @@ -413,7 +413,8 @@ void ClientSideDetectionService::HandlePhishingVerdict( // Cache response, possibly flushing an old one. cache_[info->phishing_url] = make_linked_ptr(new CacheState(response.phishy(), base::Time::Now())); - is_phishing = response.phishy(); + is_phishing = (response.phishy() && + !IsFalsePositiveResponse(info->phishing_url, response)); } else { DLOG(ERROR) << "Unable to get the server verdict for URL: " << info->phishing_url << " status: " << status.status() << " " @@ -607,4 +608,50 @@ bool ClientSideDetectionService::ModelHasValidHashIds( } return true; } + +// static +bool ClientSideDetectionService::IsFalsePositiveResponse( + const GURL& url, + const ClientPhishingResponse& response) { + if (!response.phishy() || response.whitelist_expression_size() == 0) { + return false; + } + // This whitelist is special. A particular URL gets whitelisted if it + // matches any of the expressions on the whitelist or if any of the whitelist + // entries matches the URL. + + std::string host, path, query; + safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query); + std::string canonical_url_as_pattern = host + path + query; + + std::vector<std::string> url_patterns; + safe_browsing_util::GeneratePatternsToCheck(url, &url_patterns); + + for (int i = 0; i < response.whitelist_expression_size(); ++i) { + GURL whitelisted_url(std::string("http://") + + response.whitelist_expression(i)); + if (!whitelisted_url.is_valid()) { + UMA_HISTOGRAM_COUNTS("SBClientPhishing.InvalidWhitelistExpression", 1); + continue; // Skip invalid whitelist expressions. + } + // First, we check whether the canonical URL matches any of the whitelisted + // expressions. + for (size_t j = 0; j < url_patterns.size(); ++j) { + if (url_patterns[j] == response.whitelist_expression(i)) { + return true; + } + } + // Second, we consider the canonical URL as an expression and we check + // whether any of the whitelist entries matches that expression. + std::vector<std::string> whitelist_patterns; + safe_browsing_util::GeneratePatternsToCheck(whitelisted_url, + &whitelist_patterns); + for (size_t j = 0; j < whitelist_patterns.size(); ++j) { + if (whitelist_patterns[j] == canonical_url_as_pattern) { + return true; + } + } + } + return false; +} } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/client_side_detection_service.h b/chrome/browser/safe_browsing/client_side_detection_service.h index 002ced1..7afe1cd 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.h +++ b/chrome/browser/safe_browsing/client_side_detection_service.h @@ -50,6 +50,7 @@ class URLRequestStatus; namespace safe_browsing { class ClientPhishingRequest; +class ClientPhishingResponse; class ClientSideModel; class ClientSideDetectionService : public URLFetcher::Delegate, @@ -167,6 +168,8 @@ class ClientSideDetectionService : public URLFetcher::Delegate, FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, SetEnabled); FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress); FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, + IsFalsePositiveResponse); + FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, ModelHasValidHashIds); FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, SanitizeRequestForPingback); @@ -263,6 +266,12 @@ class ClientSideDetectionService : public URLFetcher::Delegate, // valid hashes in the model. static bool ModelHasValidHashIds(const ClientSideModel& model); + // Returns true iff the response is phishing (phishy() is true) and if the + // given URL matches one of the whitelisted expressions in the given + // ClientPhishingResponse. + static bool IsFalsePositiveResponse(const GURL& url, + const ClientPhishingResponse& response); + // Whether the service is running or not. When the service is not running, // it won't download the model nor report detected phishing URLs. bool enabled_; diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc index 324f170..55c0f0e 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc @@ -350,14 +350,22 @@ TEST_F(ClientSideDetectionServiceTest, SendClientReportPhishingRequest) { GURL second_url("http://b.com/"); response.set_phishy(false); SetClientReportPhishingResponse(response.SerializeAsString(), - false /* success*/); + false /* success */); EXPECT_FALSE(SendClientReportPhishingRequest(second_url, score)); + // This is a false positive. + response.set_phishy(true); + response.add_whitelist_expression("c.com/a.html"); + SetClientReportPhishingResponse(response.SerializeAsString(), + true /* success */); + GURL third_url("http://c.com/"); + EXPECT_FALSE(SendClientReportPhishingRequest(third_url, score)); + base::Time after = base::Time::Now(); - // Check that we have recorded all 3 requests within the correct time range. + // Check that we have recorded all 4 requests within the correct time range. std::queue<base::Time>& report_times = GetPhishingReportTimes(); - EXPECT_EQ(3U, report_times.size()); + EXPECT_EQ(4U, report_times.size()); while (!report_times.empty()) { base::Time time = report_times.back(); report_times.pop(); @@ -749,4 +757,36 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) { sanitized_request.SerializeAsString()); } +TEST_F(ClientSideDetectionServiceTest, IsFalsePositiveResponse) { + GURL url("http://www.google.com/"); + ClientPhishingResponse response; + + // If the response is not phishing is should never be a false positive. + response.set_phishy(false); + response.add_whitelist_expression("www.google.com/"); + EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse( + url, response)); + + // If there are no entries in the whitelist it should always return false. + response.clear_whitelist_expression(); + response.set_phishy(true); + EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse( + url, response)); + + // If the URL doesn't match any whitelist entries it whould return false. + response.add_whitelist_expression("www.yahoo.com/"); + EXPECT_FALSE(ClientSideDetectionService::IsFalsePositiveResponse( + url, response)); + + // If the URL matches the whitelist it should return true. + response.add_whitelist_expression("google.com/"); + EXPECT_TRUE(ClientSideDetectionService::IsFalsePositiveResponse( + url, response)); + + // If an entry in the whitelist matches the URL it should return true. + response.clear_whitelist_expression(); + response.add_whitelist_expression("www.google.com/a/b.html"); + EXPECT_TRUE(ClientSideDetectionService::IsFalsePositiveResponse( + url, response)); +} } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/safe_browsing_util.cc b/chrome/browser/safe_browsing/safe_browsing_util.cc index 9fe10ed..bb13a00 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util.cc +++ b/chrome/browser/safe_browsing/safe_browsing_util.cc @@ -417,6 +417,17 @@ void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths) { paths->push_back(path + "?" + query); } +void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls) { + std::vector<std::string> hosts, paths; + GenerateHostsToCheck(url, &hosts); + GeneratePathsToCheck(url, &paths); + for (size_t h = 0; h < hosts.size(); ++h) { + for (size_t p = 0; p < paths.size(); ++p) { + urls->push_back(hosts[h] + paths[p]); + } + } +} + int GetHashIndex(const SBFullHash& hash, const std::vector<SBFullHashResult>& full_hashes) { for (size_t i = 0; i < full_hashes.size(); ++i) { @@ -431,21 +442,16 @@ int GetUrlHashIndex(const GURL& url, if (full_hashes.empty()) return -1; - std::vector<std::string> hosts, paths; - GenerateHostsToCheck(url, &hosts); - GeneratePathsToCheck(url, &paths); + std::vector<std::string> patterns; + GeneratePatternsToCheck(url, &patterns); - for (size_t h = 0; h < hosts.size(); ++h) { - for (size_t p = 0; p < paths.size(); ++p) { - SBFullHash key; - crypto::SHA256HashString(hosts[h] + paths[p], - key.full_hash, - sizeof(SBFullHash)); - int index = GetHashIndex(key, full_hashes); - if (index != -1) return index; - } + for (size_t i = 0; i < patterns.size(); ++i) { + SBFullHash key; + crypto::SHA256HashString(patterns[i], key.full_hash, sizeof(SBFullHash)); + int index = GetHashIndex(key, full_hashes); + if (index != -1) + return index; } - return -1; } diff --git a/chrome/browser/safe_browsing/safe_browsing_util.h b/chrome/browser/safe_browsing/safe_browsing_util.h index bec4dc8..2911f46 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util.h +++ b/chrome/browser/safe_browsing/safe_browsing_util.h @@ -296,6 +296,9 @@ void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); // Given a URL, returns all the paths we need to check. void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); +// Given a URL, returns all the patterns we need to check. +void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls); + int GetHashIndex(const SBFullHash& hash, const std::vector<SBFullHashResult>& full_hashes); diff --git a/chrome/common/safe_browsing/csd.proto b/chrome/common/safe_browsing/csd.proto index 8496eb7..35e4302 100644 --- a/chrome/common/safe_browsing/csd.proto +++ b/chrome/common/safe_browsing/csd.proto @@ -68,4 +68,14 @@ message ClientPhishingRequest { message ClientPhishingResponse { required bool phishy = 1; + + // A list of SafeBrowsing host-suffix / path-prefix expressions that + // are whitelisted. The client must match the current top-level URL + // against these whitelisted expressions and only apply a positive + // phishing verdict above if the URL does not match any expression + // on this whitelist. The client must not cache these whitelisted + // expressions. This whitelist will be empty for the vast majority + // of the responses but might contain up to 100 entries in emergency + // situations. + repeated string whitelist_expression = 2; } |