diff options
author | noelutz@google.com <noelutz@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-30 18:23:25 +0000 |
---|---|---|
committer | noelutz@google.com <noelutz@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-30 18:23:25 +0000 |
commit | 0d069b50d0c2f10ecddb64f92223ec7cf799698f (patch) | |
tree | d13c3480d41795fa46a1eb12e1882d0fb08066a1 /chrome | |
parent | 331f7d23388b2ae18caa8a31219a4f89326c31a9 (diff) | |
download | chromium_src-0d069b50d0c2f10ecddb64f92223ec7cf799698f.zip chromium_src-0d069b50d0c2f10ecddb64f92223ec7cf799698f.tar.gz chromium_src-0d069b50d0c2f10ecddb64f92223ec7cf799698f.tar.bz2 |
Change the client-side phishing detection hashing function to
mimic what we're doing on the server.
BUG=
TEST=BrowserFeatureExtractorTest
Review URL: http://codereview.chromium.org/7793012
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@98848 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
6 files changed, 46 insertions, 28 deletions
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.cc b/chrome/browser/safe_browsing/browser_feature_extractor.cc index 9cf4770..69415a5 100644 --- a/chrome/browser/safe_browsing/browser_feature_extractor.cc +++ b/chrome/browser/safe_browsing/browser_feature_extractor.cc @@ -9,6 +9,7 @@ #include "base/stl_util.h" #include "base/stringprintf.h" +#include "base/string_util.h" #include "base/task.h" #include "base/time.h" #include "chrome/common/safe_browsing/csd.pb.h" @@ -27,7 +28,7 @@ namespace safe_browsing { -const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5; +const int BrowserFeatureExtractor::kHashPrefixLength = 5; BrowseInfo::BrowseInfo() {} @@ -453,9 +454,27 @@ void BrowserFeatureExtractor::ComputeURLHash( &host, &path, &query); DCHECK(!host.empty()) << request->url(); DCHECK(!path.empty()) << request->url(); - request->set_suffix_prefix_hash( - crypto::SHA256HashString(host + path).substr( - 0, kSuffixPrefixHashLength)); + + // Lowercase the URL. Note: canonicalization converts the URL to ASCII. + // Percent encoded characters will not be lowercased but this is consistent + // with what we're doing on the server side. + StringToLowerASCII(&host); + StringToLowerASCII(&path); + + // Remove leading 'www.' from the host. + if (host.size() > 4 && host.substr(0, 4) == "www.") { + host.erase(0, 4); + } + // Remove everything after the last '/' to broaden the pattern. + if (path.size() > 1 && *(path.rbegin()) != '/') { + // The pattern never ends in foo.com/test? because we stripped CGI params. + // Remove everything that comes after the last '/'. + size_t last_path = path.rfind("/"); + path.erase(last_path + 1); + } + + request->set_hash_prefix(crypto::SHA256HashString(host + path).substr( + 0, kHashPrefixLength)); } }; // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.h b/chrome/browser/safe_browsing/browser_feature_extractor.h index a67438c..dd02c20 100644 --- a/chrome/browser/safe_browsing/browser_feature_extractor.h +++ b/chrome/browser/safe_browsing/browser_feature_extractor.h @@ -74,9 +74,9 @@ class BrowserFeatureExtractor { ClientPhishingRequest* request, DoneCallback* callback); - // The size of hash prefix to use for - // ClientPhishingRequest.suffix_prefix_hash. Public for testing. - static const int kSuffixPrefixHashLength; + // The size of hash prefix to use for ClientPhishingRequest.hash_prefix. + // Public for testing. + static const int kHashPrefixLength; private: friend class DeleteTask<BrowserFeatureExtractor>; diff --git a/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc b/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc index 3156f28..71b7b46 100644 --- a/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc +++ b/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc @@ -488,8 +488,8 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) { EXPECT_TRUE(ExtractFeatures(&request)); EXPECT_EQ(crypto::SHA256HashString("host.com/").substr( - 0, BrowserFeatureExtractor::kSuffixPrefixHashLength), - request.suffix_prefix_hash()); + 0, BrowserFeatureExtractor::kHashPrefixLength), + request.hash_prefix()); request.set_url("http://www.host.com/path/"); history_service()->AddPage(GURL("http://www.host.com/path/"), @@ -497,9 +497,9 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) { contents()->NavigateAndCommit(GURL("http://www.host.com/path/")); EXPECT_TRUE(ExtractFeatures(&request)); - EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/").substr( - 0, BrowserFeatureExtractor::kSuffixPrefixHashLength), - request.suffix_prefix_hash()); + EXPECT_EQ(crypto::SHA256HashString("host.com/path/").substr( + 0, BrowserFeatureExtractor::kHashPrefixLength), + request.hash_prefix()); request.set_url("http://user@www.host.com:1111/path/123?args"); history_service()->AddPage( @@ -509,9 +509,9 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) { GURL("http://user@www.host.com:1111/path/123?args")); EXPECT_TRUE(ExtractFeatures(&request)); - EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/123").substr( - 0, BrowserFeatureExtractor::kSuffixPrefixHashLength), - request.suffix_prefix_hash()); + EXPECT_EQ(crypto::SHA256HashString("host.com/path/").substr( + 0, BrowserFeatureExtractor::kHashPrefixLength), + request.hash_prefix()); // Check that escaping matches the SafeBrowsing specification. request.set_url("http://www.host.com/A%21//B"); @@ -520,8 +520,8 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) { contents()->NavigateAndCommit(GURL("http://www.host.com/A%21//B")); EXPECT_TRUE(ExtractFeatures(&request)); - EXPECT_EQ(crypto::SHA256HashString("www.host.com/A!/B").substr( - 0, BrowserFeatureExtractor::kSuffixPrefixHashLength), - request.suffix_prefix_hash()); + EXPECT_EQ(crypto::SHA256HashString("host.com/a!/").substr( + 0, BrowserFeatureExtractor::kHashPrefixLength), + request.hash_prefix()); } } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc index 30c5dee..9e800ce 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service.cc @@ -278,9 +278,8 @@ void ClientSideDetectionService::SanitizeRequestForPingback( ClientPhishingRequest* sanitized_request) { DCHECK(full_request.IsInitialized()); sanitized_request->Clear(); - if (full_request.has_suffix_prefix_hash()) { - sanitized_request->set_suffix_prefix_hash( - full_request.suffix_prefix_hash()); + if (full_request.has_hash_prefix()) { + sanitized_request->set_hash_prefix(full_request.hash_prefix()); } sanitized_request->set_client_score(full_request.client_score()); if (full_request.has_is_phishing()) { diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc index 83ce5aa..324f170 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc @@ -666,7 +666,7 @@ TEST_F(ClientSideDetectionServiceTest, SetEnabled) { TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) { ClientPhishingRequest request; request.set_url("http://www.us.host.com/blah"); - request.set_suffix_prefix_hash("hash"); + request.set_hash_prefix("hash"); request.set_client_score(0.8f); request.set_is_phishing(true); AddFeature(std::string(features::kUrlTldToken) + "com", 1.0, &request); @@ -709,7 +709,7 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) { // For easier debugging, we'll check the output protobuf fields individually. ClientPhishingRequest expected; - expected.set_suffix_prefix_hash(request.suffix_prefix_hash()); + expected.set_hash_prefix(request.hash_prefix()); expected.set_client_score(request.client_score()); expected.set_is_phishing(request.is_phishing()); AddFeature(features::kUrlNumOtherHostTokensGTOne, 1.0, &expected); @@ -719,8 +719,7 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) { AddNonModelFeature(features::kUrlHistoryVisitCount, 5.0, &expected); EXPECT_FALSE(sanitized_request.has_url()); - EXPECT_EQ(expected.suffix_prefix_hash(), - sanitized_request.suffix_prefix_hash()); + EXPECT_EQ(expected.hash_prefix(), sanitized_request.hash_prefix()); EXPECT_FLOAT_EQ(expected.client_score(), sanitized_request.client_score()); EXPECT_EQ(expected.is_phishing(), sanitized_request.is_phishing()); diff --git a/chrome/common/safe_browsing/csd.proto b/chrome/common/safe_browsing/csd.proto index b700326..8496eb7 100644 --- a/chrome/common/safe_browsing/csd.proto +++ b/chrome/common/safe_browsing/csd.proto @@ -20,10 +20,11 @@ message ClientPhishingRequest { // client. This field is ONLY set for UMA-enabled users. optional string url = 1; - // A 5-byte SHA-256 hash prefix of the URL, in SafeBrowsing host sufffix/path - // prefix form with query parameters stripped (i.e. "www.example.com/1/2/"). + // A 5-byte SHA-256 hash prefix of the URL. Before hashing the URL is + // canonicalized, converted to a suffix-prefix expression and broadened + // (www prefix is removed and everything past the last '/' is stripped). // Unlike "url", this is sent for all users. - optional bytes suffix_prefix_hash = 10; + optional bytes hash_prefix = 10; // Score that was computed on the client. Value is between 0.0 and 1.0. // The larger the value the more likely the url is phishing. |