summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-25 02:13:52 +0000
committerbryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-25 02:13:52 +0000
commitc62634677c6859d03ca82fd90d9382c4d0045e97 (patch)
tree84a96eac2bedab371281b34ccc20f714be9a0c57
parentef23cdd535353ea3749ffd92d32bfbe68420b56b (diff)
downloadchromium_src-c62634677c6859d03ca82fd90d9382c4d0045e97.zip
chromium_src-c62634677c6859d03ca82fd90d9382c4d0045e97.tar.gz
chromium_src-c62634677c6859d03ca82fd90d9382c4d0045e97.tar.bz2
Add support for client-side phishing detection for non-UMA users.
In this mode, a sanitized pingback is sent that does not include the URL or any tokens extracted from the URL or page content. Currently, this feature is behind a command-line flag. BUG=none TEST=ClientSideDetectionServiceTest,BrowserFeatureExtractorTest Review URL: http://codereview.chromium.org/7635010 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@98168 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/browser/safe_browsing/browser_feature_extractor.cc45
-rw-r--r--chrome/browser/safe_browsing/browser_feature_extractor.h68
-rw-r--r--chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc50
-rw-r--r--chrome/browser/safe_browsing/browser_features.cc33
-rw-r--r--chrome/browser/safe_browsing/browser_features.h76
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service.cc89
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service.h22
-rw-r--r--chrome/browser/safe_browsing/client_side_detection_service_unittest.cc105
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_service.cc9
-rw-r--r--chrome/chrome_browser.gypi2
-rw-r--r--chrome/common/chrome_switches.cc6
-rw-r--r--chrome/common/chrome_switches.h1
-rw-r--r--chrome/common/safe_browsing/csd.proto9
-rw-r--r--chrome/renderer/safe_browsing/features.h6
14 files changed, 431 insertions, 90 deletions
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.cc b/chrome/browser/safe_browsing/browser_feature_extractor.cc
index fc7933e..9cf4770 100644
--- a/chrome/browser/safe_browsing/browser_feature_extractor.cc
+++ b/chrome/browser/safe_browsing/browser_feature_extractor.cc
@@ -15,39 +15,19 @@
#include "chrome/browser/history/history.h"
#include "chrome/browser/history/history_types.h"
#include "chrome/browser/profiles/profile.h"
+#include "chrome/browser/safe_browsing/browser_features.h"
#include "chrome/browser/safe_browsing/client_side_detection_service.h"
+#include "chrome/browser/safe_browsing/safe_browsing_util.h"
#include "content/common/page_transition_types.h"
#include "content/browser/browser_thread.h"
#include "content/browser/cancelable_request.h"
#include "content/browser/tab_contents/tab_contents.h"
+#include "crypto/sha2.h"
#include "googleurl/src/gurl.h"
namespace safe_browsing {
-namespace features {
-const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount";
-const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount";
-const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount";
-const char kUrlHistoryVisitCountMoreThan24hAgo[] =
- "UrlHistoryVisitCountMoreThan24hAgo";
-const char kHttpHostVisitCount[] = "HttpHostVisitCount";
-const char kHttpsHostVisitCount[] = "HttpsHostVisitCount";
-const char kFirstHttpHostVisitMoreThan24hAgo[] =
- "FirstHttpHostVisitMoreThan24hAgo";
-const char kFirstHttpsHostVisitMoreThan24hAgo[] =
- "FirstHttpsHostVisitMoreThan24hAgo";
-
-const char kHostPrefix[] = "Host";
-const char kRedirectPrefix[] = "Redirect";
-const char kReferrer[] = "Referrer";
-const char kHasSSLReferrer[] = "HasSSLReferrer";
-const char kPageTransitionType[] = "PageTransitionType";
-const char kIsFirstNavigation[] = "IsFirstNavigation";
-const char kBadIpFetch[] = "BadIpFetch=";
-const char kSafeBrowsingMaliciousUrl[] = "SafeBrowsingMaliciousUrl=";
-const char kSafeBrowsingOriginalUrl[] = "SafeBrowsingOriginalUrl=";
-const char kSafeBrowsingIsSubresource[] = "SafeBrowsingIsSubresource";
-const char kSafeBrowsingThreatType[] = "SafeBrowsingThreatType";
-} // namespace features
+
+const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5;
BrowseInfo::BrowseInfo() {}
@@ -211,6 +191,7 @@ void BrowserFeatureExtractor::ExtractFeatures(const BrowseInfo* info,
}
ExtractBrowseInfoFeatures(*info, request);
+ ComputeURLHash(request);
pending_extractions_.insert(std::make_pair(request, callback));
MessageLoop::current()->PostTask(
FROM_HERE,
@@ -463,4 +444,18 @@ bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) {
return false;
}
+void BrowserFeatureExtractor::ComputeURLHash(
+ ClientPhishingRequest* request) {
+ // Put the url into SafeBrowsing host suffix / path prefix format, with
+ // query parameters stripped.
+ std::string host, path, query;
+ safe_browsing_util::CanonicalizeUrl(GURL(request->url()),
+ &host, &path, &query);
+ DCHECK(!host.empty()) << request->url();
+ DCHECK(!path.empty()) << request->url();
+ request->set_suffix_prefix_hash(
+ crypto::SHA256HashString(host + path).substr(
+ 0, kSuffixPrefixHashLength));
+}
+
}; // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.h b/chrome/browser/safe_browsing/browser_feature_extractor.h
index 1317241..a67438c 100644
--- a/chrome/browser/safe_browsing/browser_feature_extractor.h
+++ b/chrome/browser/safe_browsing/browser_feature_extractor.h
@@ -13,6 +13,7 @@
#include <map>
#include <set>
+#include <string>
#include <utility>
#include "base/basictypes.h"
@@ -44,65 +45,6 @@ struct BrowseInfo {
~BrowseInfo();
};
-namespace features {
-
-// TODO(noelutz): move renderer/safe_browsing/features.h to common.
-////////////////////////////////////////////////////
-// History features.
-////////////////////////////////////////////////////
-
-// Number of visits to that URL stored in the browser history.
-// Should always be an integer larger than 1 because by the time
-// we lookup the history the current URL should already be stored there.
-extern const char kUrlHistoryVisitCount[];
-
-// Number of times the URL was typed in the Omnibox.
-extern const char kUrlHistoryTypedCount[];
-
-// Number of times the URL was reached by clicking a link.
-extern const char kUrlHistoryLinkCount[];
-
-// Number of times URL was visited more than 24h ago.
-extern const char kUrlHistoryVisitCountMoreThan24hAgo[];
-
-// Number of user-visible visits to all URLs on the same host/port as
-// the URL for HTTP and HTTPs.
-extern const char kHttpHostVisitCount[];
-extern const char kHttpsHostVisitCount[];
-
-// Boolean feature which is true if the host was visited for the first
-// time more than 24h ago (only considers user-visible visits like above).
-extern const char kFirstHttpHostVisitMoreThan24hAgo[];
-extern const char kFirstHttpsHostVisitMoreThan24hAgo[];
-
-////////////////////////////////////////////////////
-// Browse features.
-////////////////////////////////////////////////////
-// Note that these features may have the following prefixes appended to them
-// that tell for which page type the feature pertains.
-extern const char kHostPrefix[];
-extern const char kRedirectPrefix[];
-
-// Referrer
-extern const char kReferrer[];
-// True if the referrer was stripped because it is an SSL referrer.
-extern const char kHasSSLReferrer[];
-// Stores the page transition. See: PageTransition. We strip the qualifier.
-extern const char kPageTransitionType[];
-// True if this navigation is the first for this tab.
-extern const char kIsFirstNavigation[];
-
-// Resource was fetched from a known bad IP address.
-extern const char kBadIpFetch[];
-
-// SafeBrowsing related featues. Fields from the UnsafeResource if there is
-// any.
-extern const char kSafeBrowsingMaliciousUrl[];
-extern const char kSafeBrowsingOriginalUrl[];
-extern const char kSafeBrowsingIsSubresource[];
-extern const char kSafeBrowsingThreatType[];
-} // namespace features
-
// All methods of this class must be called on the UI thread (including
// the constructor).
class BrowserFeatureExtractor {
@@ -132,6 +74,10 @@ class BrowserFeatureExtractor {
ClientPhishingRequest* request,
DoneCallback* callback);
+ // The size of hash prefix to use for
+ // ClientPhishingRequest.suffix_prefix_hash. Public for testing.
+ static const int kSuffixPrefixHashLength;
+
private:
friend class DeleteTask<BrowserFeatureExtractor>;
typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData;
@@ -193,6 +139,10 @@ class BrowserFeatureExtractor {
// is set it will return true and false otherwise.
bool GetHistoryService(HistoryService** history);
+ // Computes the SHA-256 hash prefix for the URL and adds it to the
+ // ClientPhishingRequest.
+ void ComputeURLHash(ClientPhishingRequest* request);
+
TabContents* tab_;
ClientSideDetectionService* service_;
CancelableRequestConsumer request_consumer_;
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc b/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc
index f333755..3156f28 100644
--- a/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc
+++ b/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc
@@ -15,6 +15,7 @@
#include "chrome/browser/history/history.h"
#include "chrome/browser/history/history_backend.h"
#include "chrome/browser/profiles/profile.h"
+#include "chrome/browser/safe_browsing/browser_features.h"
#include "chrome/browser/safe_browsing/client_side_detection_service.h"
#include "chrome/test/base/testing_profile.h"
#include "content/browser/browser_thread.h"
@@ -23,6 +24,7 @@
#include "content/browser/tab_contents/test_tab_contents.h"
#include "content/common/page_transition_types.h"
#include "content/common/view_messages.h"
+#include "crypto/sha2.h"
#include "googleurl/src/gurl.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -474,4 +476,52 @@ TEST_F(BrowserFeatureExtractorTest, SafeBrowsingFeatures) {
EXPECT_DOUBLE_EQ(1.0, features[features::kSafeBrowsingIsSubresource]);
EXPECT_DOUBLE_EQ(2.0, features[features::kSafeBrowsingThreatType]);
}
+
+TEST_F(BrowserFeatureExtractorTest, URLHashes) {
+ ClientPhishingRequest request;
+ request.set_url("http://host.com/");
+ request.set_client_score(0.8f);
+
+ history_service()->AddPage(GURL("http://host.com/"),
+ history::SOURCE_BROWSED);
+ contents()->NavigateAndCommit(GURL("http://host.com/"));
+
+ EXPECT_TRUE(ExtractFeatures(&request));
+ EXPECT_EQ(crypto::SHA256HashString("host.com/").substr(
+ 0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
+ request.suffix_prefix_hash());
+
+ request.set_url("http://www.host.com/path/");
+ history_service()->AddPage(GURL("http://www.host.com/path/"),
+ history::SOURCE_BROWSED);
+ contents()->NavigateAndCommit(GURL("http://www.host.com/path/"));
+
+ EXPECT_TRUE(ExtractFeatures(&request));
+ EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/").substr(
+ 0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
+ request.suffix_prefix_hash());
+
+ request.set_url("http://user@www.host.com:1111/path/123?args");
+ history_service()->AddPage(
+ GURL("http://user@www.host.com:1111/path/123?args"),
+ history::SOURCE_BROWSED);
+ contents()->NavigateAndCommit(
+ GURL("http://user@www.host.com:1111/path/123?args"));
+
+ EXPECT_TRUE(ExtractFeatures(&request));
+ EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/123").substr(
+ 0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
+ request.suffix_prefix_hash());
+
+ // Check that escaping matches the SafeBrowsing specification.
+ request.set_url("http://www.host.com/A%21//B");
+ history_service()->AddPage(GURL("http://www.host.com/A%21//B"),
+ history::SOURCE_BROWSED);
+ contents()->NavigateAndCommit(GURL("http://www.host.com/A%21//B"));
+
+ EXPECT_TRUE(ExtractFeatures(&request));
+ EXPECT_EQ(crypto::SHA256HashString("www.host.com/A!/B").substr(
+ 0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
+ request.suffix_prefix_hash());
+}
} // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/browser_features.cc b/chrome/browser/safe_browsing/browser_features.cc
new file mode 100644
index 0000000..0d43679
--- /dev/null
+++ b/chrome/browser/safe_browsing/browser_features.cc
@@ -0,0 +1,33 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/safe_browsing/browser_features.h"
+
+namespace safe_browsing {
+namespace features {
+const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount";
+const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount";
+const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount";
+const char kUrlHistoryVisitCountMoreThan24hAgo[] =
+ "UrlHistoryVisitCountMoreThan24hAgo";
+const char kHttpHostVisitCount[] = "HttpHostVisitCount";
+const char kHttpsHostVisitCount[] = "HttpsHostVisitCount";
+const char kFirstHttpHostVisitMoreThan24hAgo[] =
+ "FirstHttpHostVisitMoreThan24hAgo";
+const char kFirstHttpsHostVisitMoreThan24hAgo[] =
+ "FirstHttpsHostVisitMoreThan24hAgo";
+
+const char kHostPrefix[] = "Host";
+const char kRedirectPrefix[] = "Redirect";
+const char kReferrer[] = "Referrer";
+const char kHasSSLReferrer[] = "HasSSLReferrer";
+const char kPageTransitionType[] = "PageTransitionType";
+const char kIsFirstNavigation[] = "IsFirstNavigation";
+const char kBadIpFetch[] = "BadIpFetch=";
+const char kSafeBrowsingMaliciousUrl[] = "SafeBrowsingMaliciousUrl=";
+const char kSafeBrowsingOriginalUrl[] = "SafeBrowsingOriginalUrl=";
+const char kSafeBrowsingIsSubresource[] = "SafeBrowsingIsSubresource";
+const char kSafeBrowsingThreatType[] = "SafeBrowsingThreatType";
+} // namespace features
+} // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/browser_features.h b/chrome/browser/safe_browsing/browser_features.h
new file mode 100644
index 0000000..cae9f5c
--- /dev/null
+++ b/chrome/browser/safe_browsing/browser_features.h
@@ -0,0 +1,76 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// Client-side phishing features that are extracted by the browser, after
+// receiving a score from the renderer.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURES_H_
+#define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURES_H_
+#pragma once
+
+namespace safe_browsing {
+namespace features {
+
+// IMPORTANT: when adding new features, you must update kAllowedFeatures in
+// chrome/browser/safe_browsing/client_side_detection_service.cc if the feature
+// should be sent in sanitized pingbacks.
+//
+////////////////////////////////////////////////////
+// History features.
+////////////////////////////////////////////////////
+
+// Number of visits to that URL stored in the browser history.
+// Should always be an integer larger than 1 because by the time
+// we lookup the history the current URL should already be stored there.
+extern const char kUrlHistoryVisitCount[];
+
+// Number of times the URL was typed in the Omnibox.
+extern const char kUrlHistoryTypedCount[];
+
+// Number of times the URL was reached by clicking a link.
+extern const char kUrlHistoryLinkCount[];
+
+// Number of times URL was visited more than 24h ago.
+extern const char kUrlHistoryVisitCountMoreThan24hAgo[];
+
+// Number of user-visible visits to all URLs on the same host/port as
+// the URL for HTTP and HTTPs.
+extern const char kHttpHostVisitCount[];
+extern const char kHttpsHostVisitCount[];
+
+// Boolean feature which is true if the host was visited for the first
+// time more than 24h ago (only considers user-visible visits like above).
+extern const char kFirstHttpHostVisitMoreThan24hAgo[];
+extern const char kFirstHttpsHostVisitMoreThan24hAgo[];
+
+////////////////////////////////////////////////////
+// Browse features.
+////////////////////////////////////////////////////
+// Note that these features may have the following prefixes appended to them
+// that tell for which page type the feature pertains.
+extern const char kHostPrefix[];
+extern const char kRedirectPrefix[];
+
+// Referrer
+extern const char kReferrer[];
+// True if the referrer was stripped because it is an SSL referrer.
+extern const char kHasSSLReferrer[];
+// Stores the page transition. See: PageTransition. We strip the qualifier.
+extern const char kPageTransitionType[];
+// True if this navigation is the first for this tab.
+extern const char kIsFirstNavigation[];
+
+// Resource was fetched from a known bad IP address.
+extern const char kBadIpFetch[];
+
+// SafeBrowsing related featues. Fields from the UnsafeResource if there is
+// any.
+extern const char kSafeBrowsingMaliciousUrl[];
+extern const char kSafeBrowsingOriginalUrl[];
+extern const char kSafeBrowsingIsSubresource[];
+extern const char kSafeBrowsingThreatType[];
+} // namespace features
+} // namespace safe_browsing
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURES_H_
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc
index 1677353..30c5dee 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service.cc
@@ -10,13 +10,18 @@
#include "base/memory/scoped_ptr.h"
#include "base/message_loop.h"
#include "base/metrics/histogram.h"
+#include "base/string_util.h"
#include "base/stl_util.h"
#include "base/task.h"
#include "base/time.h"
+#include "chrome/browser/browser_process.h"
+#include "chrome/browser/safe_browsing/browser_features.h"
+#include "chrome/browser/safe_browsing/safe_browsing_service.h"
#include "chrome/common/net/http_return.h"
#include "chrome/common/safe_browsing/client_model.pb.h"
#include "chrome/common/safe_browsing/csd.pb.h"
#include "chrome/common/safe_browsing/safebrowsing_messages.h"
+#include "chrome/renderer/safe_browsing/features.h"
#include "content/browser/browser_thread.h"
#include "content/browser/renderer_host/render_process_host.h"
#include "content/common/notification_service.h"
@@ -64,8 +69,10 @@ ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time)
ClientSideDetectionService::ClientSideDetectionService(
net::URLRequestContextGetter* request_context_getter)
: enabled_(false),
+ sb_service_(g_browser_process->safe_browsing_service()),
ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)),
request_context_getter_(request_context_getter) {
+ InitializeAllowedFeatures();
registrar_.Add(this, content::NOTIFICATION_RENDERER_PROCESS_CREATED,
NotificationService::AllSources());
}
@@ -266,6 +273,40 @@ void ClientSideDetectionService::EndFetchModel(ClientModelStatus status) {
ScheduleFetchModel(delay_ms);
}
+void ClientSideDetectionService::SanitizeRequestForPingback(
+ const ClientPhishingRequest& full_request,
+ ClientPhishingRequest* sanitized_request) {
+ DCHECK(full_request.IsInitialized());
+ sanitized_request->Clear();
+ if (full_request.has_suffix_prefix_hash()) {
+ sanitized_request->set_suffix_prefix_hash(
+ full_request.suffix_prefix_hash());
+ }
+ sanitized_request->set_client_score(full_request.client_score());
+ if (full_request.has_is_phishing()) {
+ sanitized_request->set_is_phishing(full_request.is_phishing());
+ }
+
+ for (int i = 0; i < full_request.feature_map_size(); ++i) {
+ const ClientPhishingRequest_Feature& feature = full_request.feature_map(i);
+ if (allowed_features_.find(feature.name()) != allowed_features_.end()) {
+ sanitized_request->add_feature_map()->CopyFrom(feature);
+ }
+ }
+
+ if (full_request.has_model_version()) {
+ sanitized_request->set_model_version(full_request.model_version());
+ }
+
+ for (int i = 0; i < full_request.non_model_feature_map_size(); ++i) {
+ const ClientPhishingRequest_Feature& feature =
+ full_request.non_model_feature_map(i);
+ if (allowed_features_.find(feature.name()) != allowed_features_.end()) {
+ sanitized_request->add_non_model_feature_map()->CopyFrom(feature);
+ }
+ }
+}
+
void ClientSideDetectionService::StartClientReportPhishingRequest(
ClientPhishingRequest* verdict,
ClientReportPhishingRequestCallback* callback) {
@@ -279,8 +320,16 @@ void ClientSideDetectionService::StartClientReportPhishingRequest(
return;
}
+ // Create the version of the request proto that we'll send over the network.
+ ClientPhishingRequest request_to_send;
+ if (sb_service_ && sb_service_->CanReportStats()) {
+ request_to_send.CopyFrom(*request);
+ } else {
+ SanitizeRequestForPingback(*request, &request_to_send);
+ }
+
std::string request_data;
- if (!request->SerializeToString(&request_data)) {
+ if (!request_to_send.SerializeToString(&request_data)) {
UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1);
VLOG(1) << "Unable to serialize the CSD request. Proto file changed?";
if (cb.get()) {
@@ -470,6 +519,44 @@ bool ClientSideDetectionService::InitializePrivateNetworks() {
return true;
}
+void ClientSideDetectionService::InitializeAllowedFeatures() {
+ static const char* const kAllowedFeatures[] = {
+ // Renderer (model) features.
+ features::kUrlHostIsIpAddress,
+ features::kUrlNumOtherHostTokensGTOne,
+ features::kUrlNumOtherHostTokensGTThree,
+ features::kPageHasForms,
+ features::kPageActionOtherDomainFreq,
+ features::kPageHasTextInputs,
+ features::kPageHasPswdInputs,
+ features::kPageHasRadioInputs,
+ features::kPageHasCheckInputs,
+ features::kPageExternalLinksFreq,
+ features::kPageSecureLinksFreq,
+ features::kPageNumScriptTagsGTOne,
+ features::kPageNumScriptTagsGTSix,
+ features::kPageImgOtherDomainFreq,
+ // Browser (non-model) features.
+ features::kUrlHistoryVisitCount,
+ features::kUrlHistoryTypedCount,
+ features::kUrlHistoryLinkCount,
+ features::kUrlHistoryVisitCountMoreThan24hAgo,
+ features::kHttpHostVisitCount,
+ features::kHttpsHostVisitCount,
+ features::kFirstHttpHostVisitMoreThan24hAgo,
+ features::kFirstHttpsHostVisitMoreThan24hAgo,
+ features::kHasSSLReferrer,
+ features::kPageTransitionType,
+ features::kIsFirstNavigation,
+ features::kSafeBrowsingIsSubresource,
+ features::kSafeBrowsingThreatType,
+ };
+
+ for (size_t i = 0; i < arraysize(kAllowedFeatures); ++i) {
+ allowed_features_.insert(kAllowedFeatures[i]);
+ }
+}
+
// static
void ClientSideDetectionService::SetBadSubnets(const ClientSideModel& model,
BadSubnetMap* bad_subnets) {
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.h b/chrome/browser/safe_browsing/client_side_detection_service.h
index 98a1fe2..002ced1 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.h
+++ b/chrome/browser/safe_browsing/client_side_detection_service.h
@@ -24,6 +24,7 @@
#include "base/basictypes.h"
#include "base/callback_old.h"
#include "base/gtest_prod_util.h"
+#include "base/hash_tables.h"
#include "base/memory/linked_ptr.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
@@ -36,6 +37,7 @@
#include "net/base/net_util.h"
class RenderProcessHost;
+class SafeBrowsingService;
namespace base {
class TimeDelta;
@@ -166,6 +168,8 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress);
FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
ModelHasValidHashIds);
+ FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
+ SanitizeRequestForPingback);
// CacheState holds all information necessary to respond to a caller without
// actually making a HTTP request.
@@ -196,6 +200,14 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
static const base::TimeDelta kNegativeCacheInterval;
static const base::TimeDelta kPositiveCacheInterval;
+ // Given a ClientSidePhishingRequest populated by the renderer and browser
+ // feature extractors, sanitizes it so that no data specifically identifying
+ // the URL or page content is included. This is used when sending a pingback
+ // if the user is not opted in to UMA.
+ void SanitizeRequestForPingback(
+ const ClientPhishingRequest& original_request,
+ ClientPhishingRequest* sanitized_request);
+
// Starts sending the request to the client-side detection frontends.
// This method takes ownership of both pointers.
void StartClientReportPhishingRequest(
@@ -230,6 +242,10 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
// that we consider non-public IP addresses. Returns true on success.
bool InitializePrivateNetworks();
+ // Initializes the |allowed_features_| hash_set with the features that
+ // can be sent in sanitized pingbacks.
+ void InitializeAllowedFeatures();
+
// Send the model to the given renderer.
void SendModelToProcess(RenderProcessHost* process);
@@ -256,6 +272,9 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
scoped_ptr<base::TimeDelta> model_max_age_;
scoped_ptr<URLFetcher> model_fetcher_;
+ // This pointer may be NULL if SafeBrowsing is disabled.
+ scoped_refptr<SafeBrowsingService> sb_service_;
+
// Map of client report phishing request to the corresponding callback that
// has to be invoked when the request is done.
struct ClientReportInfo;
@@ -284,6 +303,9 @@ class ClientSideDetectionService : public URLFetcher::Delegate,
// The network blocks that we consider private IP address ranges.
std::vector<AddressRange> private_networks_;
+ // Features which are allowed to be sent in sanitized pingbacks.
+ base::hash_set<std::string> allowed_features_;
+
// Map of bad subnets which are copied from the client model and put into
// this map to speed up lookups.
BadSubnetMap bad_subnets_;
diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
index 760b2a8..dcf2076 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc
@@ -10,11 +10,14 @@
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/message_loop.h"
+#include "base/stringprintf.h"
#include "base/task.h"
#include "base/time.h"
+#include "chrome/browser/safe_browsing/browser_features.h"
#include "chrome/browser/safe_browsing/client_side_detection_service.h"
#include "chrome/common/safe_browsing/client_model.pb.h"
#include "chrome/common/safe_browsing/csd.pb.h"
+#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/test/base/testing_browser_process_test.h"
#include "content/browser/browser_thread.h"
#include "content/common/url_fetcher.h"
@@ -163,6 +166,21 @@ class ClientSideDetectionServiceTest : public TestingBrowserProcessTest {
EXPECT_TRUE(is_phishing);
}
+ void AddFeature(const std::string& name, double value,
+ ClientPhishingRequest* request) {
+ ClientPhishingRequest_Feature* feature = request->add_feature_map();
+ feature->set_name(name);
+ feature->set_value(value);
+ }
+
+ void AddNonModelFeature(const std::string& name, double value,
+ ClientPhishingRequest* request) {
+ ClientPhishingRequest_Feature* feature =
+ request->add_non_model_feature_map();
+ feature->set_name(name);
+ feature->set_value(value);
+ }
+
protected:
scoped_ptr<ClientSideDetectionService> csd_service_;
scoped_ptr<FakeURLFetcherFactory> factory_;
@@ -646,4 +664,91 @@ TEST_F(ClientSideDetectionServiceTest, SetEnabled) {
Mock::VerifyAndClearExpectations(service);
}
+TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
+ ClientPhishingRequest request;
+ request.set_url("http://www.us.host.com/blah");
+ request.set_suffix_prefix_hash("hash");
+ request.set_client_score(0.8f);
+ request.set_is_phishing(true);
+ AddFeature(std::string(features::kUrlTldToken) + "com", 1.0, &request);
+ AddFeature(std::string(features::kUrlDomainToken) + "host", 1.0, &request);
+ AddFeature(std::string(features::kUrlOtherHostToken) + "us", 1.0, &request);
+ AddFeature(std::string(features::kUrlOtherHostToken) + "www", 1.0, &request);
+ AddFeature(features::kUrlNumOtherHostTokensGTOne, 1.0, &request);
+ AddFeature(std::string(features::kUrlPathToken) + "blah", 1.0, &request);
+ AddFeature(features::kPageHasForms, 1.0, &request);
+ AddFeature(std::string(features::kPageTerm) + "term", 1.0, &request);
+ AddFeature(features::kPageImgOtherDomainFreq, 0.5, &request);
+ request.set_model_version(3);
+ AddNonModelFeature(features::kUrlHistoryVisitCount, 5.0, &request);
+ AddNonModelFeature(StringPrintf("%s=http://referrer.com/",
+ features::kReferrer),
+ 1.0, &request);
+ AddNonModelFeature(StringPrintf("%s%s=http://redirreferrer.com/",
+ features::kRedirectPrefix,
+ features::kReferrer),
+ 1.0, &request);
+ AddNonModelFeature(StringPrintf("%s%s=http://hostreferrer.com/",
+ features::kHostPrefix, features::kReferrer),
+ 1.0, &request);
+ AddNonModelFeature(StringPrintf("%s%s%s=http://hostredirreferrer.com/",
+ features::kHostPrefix,
+ features::kRedirectPrefix,
+ features::kReferrer),
+ 1.0, &request);
+ AddNonModelFeature(std::string(features::kBadIpFetch) + "1.2.3.4",
+ 1.0, &request);
+ AddNonModelFeature(std::string(features::kSafeBrowsingMaliciousUrl) +
+ "http://malicious.com/", 1.0, &request);
+ AddNonModelFeature(std::string(features::kSafeBrowsingOriginalUrl) +
+ "http://original.com/", 1.0, &request);
+
+ csd_service_.reset(ClientSideDetectionService::Create(NULL));
+
+ ClientPhishingRequest sanitized_request;
+ csd_service_->SanitizeRequestForPingback(request, &sanitized_request);
+
+ // For easier debugging, we'll check the output protobuf fields individually.
+ ClientPhishingRequest expected;
+ expected.set_suffix_prefix_hash(request.suffix_prefix_hash());
+ expected.set_client_score(request.client_score());
+ expected.set_is_phishing(request.is_phishing());
+ AddFeature(features::kUrlNumOtherHostTokensGTOne, 1.0, &expected);
+ AddFeature(features::kPageHasForms, 1.0, &expected);
+ AddFeature(features::kPageImgOtherDomainFreq, 0.5, &expected);
+ expected.set_model_version(3);
+ AddNonModelFeature(features::kUrlHistoryVisitCount, 5.0, &expected);
+
+ EXPECT_FALSE(sanitized_request.has_url());
+ EXPECT_EQ(expected.suffix_prefix_hash(),
+ sanitized_request.suffix_prefix_hash());
+ EXPECT_FLOAT_EQ(expected.client_score(), sanitized_request.client_score());
+ EXPECT_EQ(expected.is_phishing(), sanitized_request.is_phishing());
+
+ ASSERT_EQ(expected.feature_map_size(), sanitized_request.feature_map_size());
+ for (int i = 0; i < expected.feature_map_size(); ++i) {
+ EXPECT_EQ(expected.feature_map(i).name(),
+ sanitized_request.feature_map(i).name()) << "Feature " << i;
+ EXPECT_DOUBLE_EQ(expected.feature_map(i).value(),
+ sanitized_request.feature_map(i).value())
+ << "Feature " << i;
+ }
+ EXPECT_EQ(expected.model_version(), sanitized_request.model_version());
+ ASSERT_EQ(expected.non_model_feature_map_size(),
+ sanitized_request.non_model_feature_map_size());
+ for (int i = 0; i < expected.non_model_feature_map_size(); ++i) {
+ EXPECT_EQ(expected.non_model_feature_map(i).name(),
+ sanitized_request.non_model_feature_map(i).name())
+ << "Non-model feature " << i;
+ EXPECT_DOUBLE_EQ(expected.non_model_feature_map(i).value(),
+ sanitized_request.non_model_feature_map(i).value())
+ << "Non-model feature " << i;
+ }
+
+ // Also check the serialized forms in case there's a field that we forget
+ // to add above.
+ EXPECT_EQ(expected.SerializeAsString(),
+ sanitized_request.SerializeAsString());
+}
+
} // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/safe_browsing_service.cc b/chrome/browser/safe_browsing/safe_browsing_service.cc
index c8f1851..256b0e1 100644
--- a/chrome/browser/safe_browsing/safe_browsing_service.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_service.cc
@@ -169,7 +169,9 @@ SafeBrowsingService::SafeBrowsingService()
#if !defined(OS_CHROMEOS)
if (!CommandLine::ForCurrentProcess()->HasSwitch(
switches::kDisableClientSidePhishingDetection) &&
- CanReportStats()) {
+ (CommandLine::ForCurrentProcess()->HasSwitch(
+ switches::kEnableSanitizedClientSidePhishingDetection) ||
+ CanReportStats())) {
csd_service_.reset(
safe_browsing::ClientSideDetectionService::Create(
g_browser_process->system_request_context()));
@@ -899,7 +901,10 @@ void SafeBrowsingService::Start() {
#else
enable_csd_whitelist_ =
(!cmdline->HasSwitch(switches::kDisableClientSidePhishingDetection) &&
- local_state && local_state->GetBoolean(prefs::kMetricsReportingEnabled));
+ (cmdline->HasSwitch(
+ switches::kEnableSanitizedClientSidePhishingDetection) ||
+ (local_state &&
+ local_state->GetBoolean(prefs::kMetricsReportingEnabled))));
#endif
BrowserThread::PostTask(
diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi
index 5e8829d..ea93ea5 100644
--- a/chrome/chrome_browser.gypi
+++ b/chrome/chrome_browser.gypi
@@ -1892,6 +1892,8 @@
'browser/safe_browsing/bloom_filter.h',
'browser/safe_browsing/browser_feature_extractor.cc',
'browser/safe_browsing/browser_feature_extractor.h',
+ 'browser/safe_browsing/browser_features.cc',
+ 'browser/safe_browsing/browser_features.h',
'browser/safe_browsing/chunk_range.cc',
'browser/safe_browsing/chunk_range.h',
'browser/safe_browsing/client_side_detection_host.cc',
diff --git a/chrome/common/chrome_switches.cc b/chrome/common/chrome_switches.cc
index e03bf0f..73187ef 100644
--- a/chrome/common/chrome_switches.cc
+++ b/chrome/common/chrome_switches.cc
@@ -497,6 +497,12 @@ const char kEnablePanels[] = "enable-panels";
// Enable speculative TCP/IP preconnection.
const char kEnablePreconnect[] = "enable-preconnect";
+// Enables the sanitized version of client-side phishing detection, for use by
+// non-UMA users. Any features containing portions of the URL or page content
+// are not sent as part of the pingback in this mode.
+const char kEnableSanitizedClientSidePhishingDetection[] =
+ "enable-sanitized-client-side-phishing-detection";
+
// Enable the IsSearchProviderInstalled and InstallSearchProvider with an extra
// parameter to indicate if the provider should be the default.
const char kEnableSearchProviderApiV2[] = "enable-search-provider-api-v2";
diff --git a/chrome/common/chrome_switches.h b/chrome/common/chrome_switches.h
index 4e7785e..7287dd9 100644
--- a/chrome/common/chrome_switches.h
+++ b/chrome/common/chrome_switches.h
@@ -144,6 +144,7 @@ extern const char kEnableOriginBoundCerts[];
extern const char kEnablePanels[];
extern const char kEnablePreconnect[];
extern const char kEnableResourceContentSettings[];
+extern const char kEnableSanitizedClientSidePhishingDetection[];
extern const char kEnableSearchProviderApiV2[];
extern const char kEnableShortcutsProvider[];
extern const char kEnableSmoothScrolling[];
diff --git a/chrome/common/safe_browsing/csd.proto b/chrome/common/safe_browsing/csd.proto
index 983b79c..b700326 100644
--- a/chrome/common/safe_browsing/csd.proto
+++ b/chrome/common/safe_browsing/csd.proto
@@ -17,8 +17,13 @@ package safe_browsing;
message ClientPhishingRequest {
// URL that the client visited. The CGI parameters are stripped by the
- // client.
- required string url = 1;
+ // client. This field is ONLY set for UMA-enabled users.
+ optional string url = 1;
+
+ // A 5-byte SHA-256 hash prefix of the URL, in SafeBrowsing host sufffix/path
+ // prefix form with query parameters stripped (i.e. "www.example.com/1/2/").
+ // Unlike "url", this is sent for all users.
+ optional bytes suffix_prefix_hash = 10;
// Score that was computed on the client. Value is between 0.0 and 1.0.
// The larger the value the more likely the url is phishing.
diff --git a/chrome/renderer/safe_browsing/features.h b/chrome/renderer/safe_browsing/features.h
index 82370c1..a8067f7 100644
--- a/chrome/renderer/safe_browsing/features.h
+++ b/chrome/renderer/safe_browsing/features.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
@@ -71,6 +71,10 @@ class FeatureMap {
namespace features {
// Constants for the various feature names that we use.
+//
+// IMPORTANT: when adding new features, you must update kAllowedFeatures in
+// chrome/browser/safe_browsing/client_side_detection_service.cc if the feature
+// should be sent in sanitized pingbacks.
////////////////////////////////////////////////////
// URL host features