summaryrefslogtreecommitdiffstats
path: root/chrome
diff options
context:
space:
mode:
authornoelutz@google.com <noelutz@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-30 18:23:25 +0000
committernoelutz@google.com <noelutz@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-30 18:23:25 +0000
commit0d069b50d0c2f10ecddb64f92223ec7cf799698f (patch)
treed13c3480d41795fa46a1eb12e1882d0fb08066a1 /chrome
parent331f7d23388b2ae18caa8a31219a4f89326c31a9 (diff)
downloadchromium_src-0d069b50d0c2f10ecddb64f92223ec7cf799698f.zip
chromium_src-0d069b50d0c2f10ecddb64f92223ec7cf799698f.tar.gz
chromium_src-0d069b50d0c2f10ecddb64f92223ec7cf799698f.tar.bz2
Change the client-side phishing detection hashing function to
mimic what we're doing on the server. BUG= TEST=BrowserFeatureExtractorTest Review URL: http://codereview.chromium.org/7793012 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@98848 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r--chrome/browser/safe_browsing/browser_feature_extractor.cc27
-rw-r--r--chrome/browser/safe_browsing/browser_feature_extractor.h6
-rw-r--r--chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc22
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service.cc5
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service_unittest.cc7
-rw-r--r--chrome/common/safe_browsing/csd.proto7
6 files changed, 46 insertions, 28 deletions
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.cc b/chrome/browser/safe_browsing/browser_feature_extractor.cc
index 9cf4770..69415a5 100644
--- a/chrome/browser/safe_browsing/browser_feature_extractor.cc
+++ b/chrome/browser/safe_browsing/browser_feature_extractor.cc
@@ -9,6 +9,7 @@
#include "base/stl_util.h"
#include "base/stringprintf.h"
+#include "base/string_util.h"
#include "base/task.h"
#include "base/time.h"
#include "chrome/common/safe_browsing/csd.pb.h"
@@ -27,7 +28,7 @@
namespace safe_browsing {
-const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5;
+const int BrowserFeatureExtractor::kHashPrefixLength = 5;
BrowseInfo::BrowseInfo() {}
@@ -453,9 +454,27 @@ void BrowserFeatureExtractor::ComputeURLHash(
&host, &path, &query);
DCHECK(!host.empty()) << request->url();
DCHECK(!path.empty()) << request->url();
- request->set_suffix_prefix_hash(
- crypto::SHA256HashString(host + path).substr(
- 0, kSuffixPrefixHashLength));
+
+ // Lowercase the URL. Note: canonicalization converts the URL to ASCII.
+ // Percent encoded characters will not be lowercased but this is consistent
+ // with what we're doing on the server side.
+ StringToLowerASCII(&host);
+ StringToLowerASCII(&path);
+
+ // Remove leading 'www.' from the host.
+ if (host.size() > 4 && host.substr(0, 4) == "www.") {
+ host.erase(0, 4);
+ }
+ // Remove everything after the last '/' to broaden the pattern.
+ if (path.size() > 1 && *(path.rbegin()) != '/') {
+ // The pattern never ends in foo.com/test? because we stripped CGI params.
+ // Remove everything that comes after the last '/'.
+ size_t last_path = path.rfind("/");
+ path.erase(last_path + 1);
+ }
+
+ request->set_hash_prefix(crypto::SHA256HashString(host + path).substr(
+ 0, kHashPrefixLength));
}
}; // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.h b/chrome/browser/safe_browsing/browser_feature_extractor.h
index a67438c..dd02c20 100644
--- a/chrome/browser/safe_browsing/browser_feature_extractor.h
+++ b/chrome/browser/safe_browsing/browser_feature_extractor.h
@@ -74,9 +74,9 @@ class BrowserFeatureExtractor {
ClientPhishingRequest* request,
DoneCallback* callback);
- // The size of hash prefix to use for
- // ClientPhishingRequest.suffix_prefix_hash. Public for testing.
- static const int kSuffixPrefixHashLength;
+ // The size of hash prefix to use for ClientPhishingRequest.hash_prefix.
+ // Public for testing.
+ static const int kHashPrefixLength;
private:
friend class DeleteTask<BrowserFeatureExtractor>;
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc b/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc
index 3156f28..71b7b46 100644
--- a/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc
+++ b/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc
@@ -488,8 +488,8 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
EXPECT_TRUE(ExtractFeatures(&request));
EXPECT_EQ(crypto::SHA256HashString("host.com/").substr(
- 0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
- request.suffix_prefix_hash());
+ 0, BrowserFeatureExtractor::kHashPrefixLength),
+ request.hash_prefix());
request.set_url("http://www.host.com/path/");
history_service()->AddPage(GURL("http://www.host.com/path/"),
@@ -497,9 +497,9 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
contents()->NavigateAndCommit(GURL("http://www.host.com/path/"));
EXPECT_TRUE(ExtractFeatures(&request));
- EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/").substr(
- 0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
- request.suffix_prefix_hash());
+ EXPECT_EQ(crypto::SHA256HashString("host.com/path/").substr(
+ 0, BrowserFeatureExtractor::kHashPrefixLength),
+ request.hash_prefix());
request.set_url("http://user@www.host.com:1111/path/123?args");
history_service()->AddPage(
@@ -509,9 +509,9 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
GURL("http://user@www.host.com:1111/path/123?args"));
EXPECT_TRUE(ExtractFeatures(&request));
- EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/123").substr(
- 0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
- request.suffix_prefix_hash());
+ EXPECT_EQ(crypto::SHA256HashString("host.com/path/").substr(
+ 0, BrowserFeatureExtractor::kHashPrefixLength),
+ request.hash_prefix());
// Check that escaping matches the SafeBrowsing specification.
request.set_url("http://www.host.com/A%21//B");
@@ -520,8 +520,8 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
contents()->NavigateAndCommit(GURL("http://www.host.com/A%21//B"));
EXPECT_TRUE(ExtractFeatures(&request));
- EXPECT_EQ(crypto::SHA256HashString("www.host.com/A!/B").substr(
- 0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
- request.suffix_prefix_hash());
+ EXPECT_EQ(crypto::SHA256HashString("host.com/a!/").substr(
+ 0, BrowserFeatureExtractor::kHashPrefixLength),
+ request.hash_prefix());
}
} // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc
index 30c5dee..9e800ce 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service.cc
@@ -278,9 +278,8 @@ void ClientSideDetectionService::SanitizeRequestForPingback(
ClientPhishingRequest* sanitized_request) {
DCHECK(full_request.IsInitialized());
sanitized_request->Clear();
- if (full_request.has_suffix_prefix_hash()) {
- sanitized_request->set_suffix_prefix_hash(
- full_request.suffix_prefix_hash());
+ if (full_request.has_hash_prefix()) {
+ sanitized_request->set_hash_prefix(full_request.hash_prefix());
}
sanitized_request->set_client_score(full_request.client_score());
if (full_request.has_is_phishing()) {
diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
index 83ce5aa..324f170 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
@@ -666,7 +666,7 @@ TEST_F(ClientSideDetectionServiceTest, SetEnabled) {
TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
ClientPhishingRequest request;
request.set_url("http://www.us.host.com/blah");
- request.set_suffix_prefix_hash("hash");
+ request.set_hash_prefix("hash");
request.set_client_score(0.8f);
request.set_is_phishing(true);
AddFeature(std::string(features::kUrlTldToken) + "com", 1.0, &request);
@@ -709,7 +709,7 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
// For easier debugging, we'll check the output protobuf fields individually.
ClientPhishingRequest expected;
- expected.set_suffix_prefix_hash(request.suffix_prefix_hash());
+ expected.set_hash_prefix(request.hash_prefix());
expected.set_client_score(request.client_score());
expected.set_is_phishing(request.is_phishing());
AddFeature(features::kUrlNumOtherHostTokensGTOne, 1.0, &expected);
@@ -719,8 +719,7 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
AddNonModelFeature(features::kUrlHistoryVisitCount, 5.0, &expected);
EXPECT_FALSE(sanitized_request.has_url());
- EXPECT_EQ(expected.suffix_prefix_hash(),
- sanitized_request.suffix_prefix_hash());
+ EXPECT_EQ(expected.hash_prefix(), sanitized_request.hash_prefix());
EXPECT_FLOAT_EQ(expected.client_score(), sanitized_request.client_score());
EXPECT_EQ(expected.is_phishing(), sanitized_request.is_phishing());
diff --git a/chrome/common/safe_browsing/csd.proto b/chrome/common/safe_browsing/csd.proto
index b700326..8496eb7 100644
--- a/chrome/common/safe_browsing/csd.proto
+++ b/chrome/common/safe_browsing/csd.proto
@@ -20,10 +20,11 @@ message ClientPhishingRequest {
// client. This field is ONLY set for UMA-enabled users.
optional string url = 1;
- // A 5-byte SHA-256 hash prefix of the URL, in SafeBrowsing host sufffix/path
- // prefix form with query parameters stripped (i.e. "www.example.com/1/2/").
+ // A 5-byte SHA-256 hash prefix of the URL. Before hashing the URL is
+ // canonicalized, converted to a suffix-prefix expression and broadened
+ // (www prefix is removed and everything past the last '/' is stripped).
// Unlike "url", this is sent for all users.
- optional bytes suffix_prefix_hash = 10;
+ optional bytes hash_prefix = 10;
// Score that was computed on the client. Value is between 0.0 and 1.0.
// The larger the value the more likely the url is phishing.