diff options
author | bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-25 02:13:52 +0000 |
---|---|---|
committer | bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-25 02:13:52 +0000 |
commit | c62634677c6859d03ca82fd90d9382c4d0045e97 (patch) | |
tree | 84a96eac2bedab371281b34ccc20f714be9a0c57 | |
parent | ef23cdd535353ea3749ffd92d32bfbe68420b56b (diff) | |
download | chromium_src-c62634677c6859d03ca82fd90d9382c4d0045e97.zip chromium_src-c62634677c6859d03ca82fd90d9382c4d0045e97.tar.gz chromium_src-c62634677c6859d03ca82fd90d9382c4d0045e97.tar.bz2 |
Add support for client-side phishing detection for non-UMA users.
In this mode, a sanitized pingback is sent that does not include the URL or any
tokens extracted from the URL or page content. Currently, this feature is
behind a command-line flag.
BUG=none
TEST=ClientSideDetectionServiceTest,BrowserFeatureExtractorTest
Review URL: http://codereview.chromium.org/7635010
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@98168 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/safe_browsing/browser_feature_extractor.cc | 45 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/browser_feature_extractor.h | 68 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc | 50 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/browser_features.cc | 33 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/browser_features.h | 76 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/client_side_detection_service.cc | 89 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/client_side_detection_service.h | 22 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/client_side_detection_service_unittest.cc | 105 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_service.cc | 9 | ||||
-rw-r--r-- | chrome/chrome_browser.gypi | 2 | ||||
-rw-r--r-- | chrome/common/chrome_switches.cc | 6 | ||||
-rw-r--r-- | chrome/common/chrome_switches.h | 1 | ||||
-rw-r--r-- | chrome/common/safe_browsing/csd.proto | 9 | ||||
-rw-r--r-- | chrome/renderer/safe_browsing/features.h | 6 |
14 files changed, 431 insertions, 90 deletions
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.cc b/chrome/browser/safe_browsing/browser_feature_extractor.cc index fc7933e..9cf4770 100644 --- a/chrome/browser/safe_browsing/browser_feature_extractor.cc +++ b/chrome/browser/safe_browsing/browser_feature_extractor.cc @@ -15,39 +15,19 @@ #include "chrome/browser/history/history.h" #include "chrome/browser/history/history_types.h" #include "chrome/browser/profiles/profile.h" +#include "chrome/browser/safe_browsing/browser_features.h" #include "chrome/browser/safe_browsing/client_side_detection_service.h" +#include "chrome/browser/safe_browsing/safe_browsing_util.h" #include "content/common/page_transition_types.h" #include "content/browser/browser_thread.h" #include "content/browser/cancelable_request.h" #include "content/browser/tab_contents/tab_contents.h" +#include "crypto/sha2.h" #include "googleurl/src/gurl.h" namespace safe_browsing { -namespace features { -const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount"; -const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount"; -const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount"; -const char kUrlHistoryVisitCountMoreThan24hAgo[] = - "UrlHistoryVisitCountMoreThan24hAgo"; -const char kHttpHostVisitCount[] = "HttpHostVisitCount"; -const char kHttpsHostVisitCount[] = "HttpsHostVisitCount"; -const char kFirstHttpHostVisitMoreThan24hAgo[] = - "FirstHttpHostVisitMoreThan24hAgo"; -const char kFirstHttpsHostVisitMoreThan24hAgo[] = - "FirstHttpsHostVisitMoreThan24hAgo"; - -const char kHostPrefix[] = "Host"; -const char kRedirectPrefix[] = "Redirect"; -const char kReferrer[] = "Referrer"; -const char kHasSSLReferrer[] = "HasSSLReferrer"; -const char kPageTransitionType[] = "PageTransitionType"; -const char kIsFirstNavigation[] = "IsFirstNavigation"; -const char kBadIpFetch[] = "BadIpFetch="; -const char kSafeBrowsingMaliciousUrl[] = "SafeBrowsingMaliciousUrl="; -const char kSafeBrowsingOriginalUrl[] = "SafeBrowsingOriginalUrl="; -const char kSafeBrowsingIsSubresource[] = "SafeBrowsingIsSubresource"; -const char kSafeBrowsingThreatType[] = "SafeBrowsingThreatType"; -} // namespace features + +const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5; BrowseInfo::BrowseInfo() {} @@ -211,6 +191,7 @@ void BrowserFeatureExtractor::ExtractFeatures(const BrowseInfo* info, } ExtractBrowseInfoFeatures(*info, request); + ComputeURLHash(request); pending_extractions_.insert(std::make_pair(request, callback)); MessageLoop::current()->PostTask( FROM_HERE, @@ -463,4 +444,18 @@ bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) { return false; } +void BrowserFeatureExtractor::ComputeURLHash( + ClientPhishingRequest* request) { + // Put the url into SafeBrowsing host suffix / path prefix format, with + // query parameters stripped. + std::string host, path, query; + safe_browsing_util::CanonicalizeUrl(GURL(request->url()), + &host, &path, &query); + DCHECK(!host.empty()) << request->url(); + DCHECK(!path.empty()) << request->url(); + request->set_suffix_prefix_hash( + crypto::SHA256HashString(host + path).substr( + 0, kSuffixPrefixHashLength)); +} + }; // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.h b/chrome/browser/safe_browsing/browser_feature_extractor.h index 1317241..a67438c 100644 --- a/chrome/browser/safe_browsing/browser_feature_extractor.h +++ b/chrome/browser/safe_browsing/browser_feature_extractor.h @@ -13,6 +13,7 @@ #include <map> #include <set> +#include <string> #include <utility> #include "base/basictypes.h" @@ -44,65 +45,6 @@ struct BrowseInfo { ~BrowseInfo(); }; -namespace features { - -// TODO(noelutz): move renderer/safe_browsing/features.h to common. -//////////////////////////////////////////////////// -// History features. -//////////////////////////////////////////////////// - -// Number of visits to that URL stored in the browser history. -// Should always be an integer larger than 1 because by the time -// we lookup the history the current URL should already be stored there. -extern const char kUrlHistoryVisitCount[]; - -// Number of times the URL was typed in the Omnibox. -extern const char kUrlHistoryTypedCount[]; - -// Number of times the URL was reached by clicking a link. -extern const char kUrlHistoryLinkCount[]; - -// Number of times URL was visited more than 24h ago. -extern const char kUrlHistoryVisitCountMoreThan24hAgo[]; - -// Number of user-visible visits to all URLs on the same host/port as -// the URL for HTTP and HTTPs. -extern const char kHttpHostVisitCount[]; -extern const char kHttpsHostVisitCount[]; - -// Boolean feature which is true if the host was visited for the first -// time more than 24h ago (only considers user-visible visits like above). -extern const char kFirstHttpHostVisitMoreThan24hAgo[]; -extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; - -//////////////////////////////////////////////////// -// Browse features. -//////////////////////////////////////////////////// -// Note that these features may have the following prefixes appended to them -// that tell for which page type the feature pertains. -extern const char kHostPrefix[]; -extern const char kRedirectPrefix[]; - -// Referrer -extern const char kReferrer[]; -// True if the referrer was stripped because it is an SSL referrer. -extern const char kHasSSLReferrer[]; -// Stores the page transition. See: PageTransition. We strip the qualifier. -extern const char kPageTransitionType[]; -// True if this navigation is the first for this tab. -extern const char kIsFirstNavigation[]; - -// Resource was fetched from a known bad IP address. -extern const char kBadIpFetch[]; - -// SafeBrowsing related featues. Fields from the UnsafeResource if there is -// any. -extern const char kSafeBrowsingMaliciousUrl[]; -extern const char kSafeBrowsingOriginalUrl[]; -extern const char kSafeBrowsingIsSubresource[]; -extern const char kSafeBrowsingThreatType[]; -} // namespace features - // All methods of this class must be called on the UI thread (including // the constructor). class BrowserFeatureExtractor { @@ -132,6 +74,10 @@ class BrowserFeatureExtractor { ClientPhishingRequest* request, DoneCallback* callback); + // The size of hash prefix to use for + // ClientPhishingRequest.suffix_prefix_hash. Public for testing. + static const int kSuffixPrefixHashLength; + private: friend class DeleteTask<BrowserFeatureExtractor>; typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; @@ -193,6 +139,10 @@ class BrowserFeatureExtractor { // is set it will return true and false otherwise. bool GetHistoryService(HistoryService** history); + // Computes the SHA-256 hash prefix for the URL and adds it to the + // ClientPhishingRequest. + void ComputeURLHash(ClientPhishingRequest* request); + TabContents* tab_; ClientSideDetectionService* service_; CancelableRequestConsumer request_consumer_; diff --git a/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc b/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc index f333755..3156f28 100644 --- a/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc +++ b/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc @@ -15,6 +15,7 @@ #include "chrome/browser/history/history.h" #include "chrome/browser/history/history_backend.h" #include "chrome/browser/profiles/profile.h" +#include "chrome/browser/safe_browsing/browser_features.h" #include "chrome/browser/safe_browsing/client_side_detection_service.h" #include "chrome/test/base/testing_profile.h" #include "content/browser/browser_thread.h" @@ -23,6 +24,7 @@ #include "content/browser/tab_contents/test_tab_contents.h" #include "content/common/page_transition_types.h" #include "content/common/view_messages.h" +#include "crypto/sha2.h" #include "googleurl/src/gurl.h" #include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" @@ -474,4 +476,52 @@ TEST_F(BrowserFeatureExtractorTest, SafeBrowsingFeatures) { EXPECT_DOUBLE_EQ(1.0, features[features::kSafeBrowsingIsSubresource]); EXPECT_DOUBLE_EQ(2.0, features[features::kSafeBrowsingThreatType]); } + +TEST_F(BrowserFeatureExtractorTest, URLHashes) { + ClientPhishingRequest request; + request.set_url("http://host.com/"); + request.set_client_score(0.8f); + + history_service()->AddPage(GURL("http://host.com/"), + history::SOURCE_BROWSED); + contents()->NavigateAndCommit(GURL("http://host.com/")); + + EXPECT_TRUE(ExtractFeatures(&request)); + EXPECT_EQ(crypto::SHA256HashString("host.com/").substr( + 0, BrowserFeatureExtractor::kSuffixPrefixHashLength), + request.suffix_prefix_hash()); + + request.set_url("http://www.host.com/path/"); + history_service()->AddPage(GURL("http://www.host.com/path/"), + history::SOURCE_BROWSED); + contents()->NavigateAndCommit(GURL("http://www.host.com/path/")); + + EXPECT_TRUE(ExtractFeatures(&request)); + EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/").substr( + 0, BrowserFeatureExtractor::kSuffixPrefixHashLength), + request.suffix_prefix_hash()); + + request.set_url("http://user@www.host.com:1111/path/123?args"); + history_service()->AddPage( + GURL("http://user@www.host.com:1111/path/123?args"), + history::SOURCE_BROWSED); + contents()->NavigateAndCommit( + GURL("http://user@www.host.com:1111/path/123?args")); + + EXPECT_TRUE(ExtractFeatures(&request)); + EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/123").substr( + 0, BrowserFeatureExtractor::kSuffixPrefixHashLength), + request.suffix_prefix_hash()); + + // Check that escaping matches the SafeBrowsing specification. + request.set_url("http://www.host.com/A%21//B"); + history_service()->AddPage(GURL("http://www.host.com/A%21//B"), + history::SOURCE_BROWSED); + contents()->NavigateAndCommit(GURL("http://www.host.com/A%21//B")); + + EXPECT_TRUE(ExtractFeatures(&request)); + EXPECT_EQ(crypto::SHA256HashString("www.host.com/A!/B").substr( + 0, BrowserFeatureExtractor::kSuffixPrefixHashLength), + request.suffix_prefix_hash()); +} } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/browser_features.cc b/chrome/browser/safe_browsing/browser_features.cc new file mode 100644 index 0000000..0d43679 --- /dev/null +++ b/chrome/browser/safe_browsing/browser_features.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/browser_features.h" + +namespace safe_browsing { +namespace features { +const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount"; +const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount"; +const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount"; +const char kUrlHistoryVisitCountMoreThan24hAgo[] = + "UrlHistoryVisitCountMoreThan24hAgo"; +const char kHttpHostVisitCount[] = "HttpHostVisitCount"; +const char kHttpsHostVisitCount[] = "HttpsHostVisitCount"; +const char kFirstHttpHostVisitMoreThan24hAgo[] = + "FirstHttpHostVisitMoreThan24hAgo"; +const char kFirstHttpsHostVisitMoreThan24hAgo[] = + "FirstHttpsHostVisitMoreThan24hAgo"; + +const char kHostPrefix[] = "Host"; +const char kRedirectPrefix[] = "Redirect"; +const char kReferrer[] = "Referrer"; +const char kHasSSLReferrer[] = "HasSSLReferrer"; +const char kPageTransitionType[] = "PageTransitionType"; +const char kIsFirstNavigation[] = "IsFirstNavigation"; +const char kBadIpFetch[] = "BadIpFetch="; +const char kSafeBrowsingMaliciousUrl[] = "SafeBrowsingMaliciousUrl="; +const char kSafeBrowsingOriginalUrl[] = "SafeBrowsingOriginalUrl="; +const char kSafeBrowsingIsSubresource[] = "SafeBrowsingIsSubresource"; +const char kSafeBrowsingThreatType[] = "SafeBrowsingThreatType"; +} // namespace features +} // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/browser_features.h b/chrome/browser/safe_browsing/browser_features.h new file mode 100644 index 0000000..cae9f5c --- /dev/null +++ b/chrome/browser/safe_browsing/browser_features.h @@ -0,0 +1,76 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Client-side phishing features that are extracted by the browser, after +// receiving a score from the renderer. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURES_H_ +#define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURES_H_ +#pragma once + +namespace safe_browsing { +namespace features { + +// IMPORTANT: when adding new features, you must update kAllowedFeatures in +// chrome/browser/safe_browsing/client_side_detection_service.cc if the feature +// should be sent in sanitized pingbacks. +// +//////////////////////////////////////////////////// +// History features. +//////////////////////////////////////////////////// + +// Number of visits to that URL stored in the browser history. +// Should always be an integer larger than 1 because by the time +// we lookup the history the current URL should already be stored there. +extern const char kUrlHistoryVisitCount[]; + +// Number of times the URL was typed in the Omnibox. +extern const char kUrlHistoryTypedCount[]; + +// Number of times the URL was reached by clicking a link. +extern const char kUrlHistoryLinkCount[]; + +// Number of times URL was visited more than 24h ago. +extern const char kUrlHistoryVisitCountMoreThan24hAgo[]; + +// Number of user-visible visits to all URLs on the same host/port as +// the URL for HTTP and HTTPs. +extern const char kHttpHostVisitCount[]; +extern const char kHttpsHostVisitCount[]; + +// Boolean feature which is true if the host was visited for the first +// time more than 24h ago (only considers user-visible visits like above). +extern const char kFirstHttpHostVisitMoreThan24hAgo[]; +extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; + +//////////////////////////////////////////////////// +// Browse features. +//////////////////////////////////////////////////// +// Note that these features may have the following prefixes appended to them +// that tell for which page type the feature pertains. +extern const char kHostPrefix[]; +extern const char kRedirectPrefix[]; + +// Referrer +extern const char kReferrer[]; +// True if the referrer was stripped because it is an SSL referrer. +extern const char kHasSSLReferrer[]; +// Stores the page transition. See: PageTransition. We strip the qualifier. +extern const char kPageTransitionType[]; +// True if this navigation is the first for this tab. +extern const char kIsFirstNavigation[]; + +// Resource was fetched from a known bad IP address. +extern const char kBadIpFetch[]; + +// SafeBrowsing related featues. Fields from the UnsafeResource if there is +// any. +extern const char kSafeBrowsingMaliciousUrl[]; +extern const char kSafeBrowsingOriginalUrl[]; +extern const char kSafeBrowsingIsSubresource[]; +extern const char kSafeBrowsingThreatType[]; +} // namespace features +} // namespace safe_browsing + +#endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURES_H_ diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc index 1677353..30c5dee 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service.cc @@ -10,13 +10,18 @@ #include "base/memory/scoped_ptr.h" #include "base/message_loop.h" #include "base/metrics/histogram.h" +#include "base/string_util.h" #include "base/stl_util.h" #include "base/task.h" #include "base/time.h" +#include "chrome/browser/browser_process.h" +#include "chrome/browser/safe_browsing/browser_features.h" +#include "chrome/browser/safe_browsing/safe_browsing_service.h" #include "chrome/common/net/http_return.h" #include "chrome/common/safe_browsing/client_model.pb.h" #include "chrome/common/safe_browsing/csd.pb.h" #include "chrome/common/safe_browsing/safebrowsing_messages.h" +#include "chrome/renderer/safe_browsing/features.h" #include "content/browser/browser_thread.h" #include "content/browser/renderer_host/render_process_host.h" #include "content/common/notification_service.h" @@ -64,8 +69,10 @@ ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time) ClientSideDetectionService::ClientSideDetectionService( net::URLRequestContextGetter* request_context_getter) : enabled_(false), + sb_service_(g_browser_process->safe_browsing_service()), ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), request_context_getter_(request_context_getter) { + InitializeAllowedFeatures(); registrar_.Add(this, content::NOTIFICATION_RENDERER_PROCESS_CREATED, NotificationService::AllSources()); } @@ -266,6 +273,40 @@ void ClientSideDetectionService::EndFetchModel(ClientModelStatus status) { ScheduleFetchModel(delay_ms); } +void ClientSideDetectionService::SanitizeRequestForPingback( + const ClientPhishingRequest& full_request, + ClientPhishingRequest* sanitized_request) { + DCHECK(full_request.IsInitialized()); + sanitized_request->Clear(); + if (full_request.has_suffix_prefix_hash()) { + sanitized_request->set_suffix_prefix_hash( + full_request.suffix_prefix_hash()); + } + sanitized_request->set_client_score(full_request.client_score()); + if (full_request.has_is_phishing()) { + sanitized_request->set_is_phishing(full_request.is_phishing()); + } + + for (int i = 0; i < full_request.feature_map_size(); ++i) { + const ClientPhishingRequest_Feature& feature = full_request.feature_map(i); + if (allowed_features_.find(feature.name()) != allowed_features_.end()) { + sanitized_request->add_feature_map()->CopyFrom(feature); + } + } + + if (full_request.has_model_version()) { + sanitized_request->set_model_version(full_request.model_version()); + } + + for (int i = 0; i < full_request.non_model_feature_map_size(); ++i) { + const ClientPhishingRequest_Feature& feature = + full_request.non_model_feature_map(i); + if (allowed_features_.find(feature.name()) != allowed_features_.end()) { + sanitized_request->add_non_model_feature_map()->CopyFrom(feature); + } + } +} + void ClientSideDetectionService::StartClientReportPhishingRequest( ClientPhishingRequest* verdict, ClientReportPhishingRequestCallback* callback) { @@ -279,8 +320,16 @@ void ClientSideDetectionService::StartClientReportPhishingRequest( return; } + // Create the version of the request proto that we'll send over the network. + ClientPhishingRequest request_to_send; + if (sb_service_ && sb_service_->CanReportStats()) { + request_to_send.CopyFrom(*request); + } else { + SanitizeRequestForPingback(*request, &request_to_send); + } + std::string request_data; - if (!request->SerializeToString(&request_data)) { + if (!request_to_send.SerializeToString(&request_data)) { UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1); VLOG(1) << "Unable to serialize the CSD request. Proto file changed?"; if (cb.get()) { @@ -470,6 +519,44 @@ bool ClientSideDetectionService::InitializePrivateNetworks() { return true; } +void ClientSideDetectionService::InitializeAllowedFeatures() { + static const char* const kAllowedFeatures[] = { + // Renderer (model) features. + features::kUrlHostIsIpAddress, + features::kUrlNumOtherHostTokensGTOne, + features::kUrlNumOtherHostTokensGTThree, + features::kPageHasForms, + features::kPageActionOtherDomainFreq, + features::kPageHasTextInputs, + features::kPageHasPswdInputs, + features::kPageHasRadioInputs, + features::kPageHasCheckInputs, + features::kPageExternalLinksFreq, + features::kPageSecureLinksFreq, + features::kPageNumScriptTagsGTOne, + features::kPageNumScriptTagsGTSix, + features::kPageImgOtherDomainFreq, + // Browser (non-model) features. + features::kUrlHistoryVisitCount, + features::kUrlHistoryTypedCount, + features::kUrlHistoryLinkCount, + features::kUrlHistoryVisitCountMoreThan24hAgo, + features::kHttpHostVisitCount, + features::kHttpsHostVisitCount, + features::kFirstHttpHostVisitMoreThan24hAgo, + features::kFirstHttpsHostVisitMoreThan24hAgo, + features::kHasSSLReferrer, + features::kPageTransitionType, + features::kIsFirstNavigation, + features::kSafeBrowsingIsSubresource, + features::kSafeBrowsingThreatType, + }; + + for (size_t i = 0; i < arraysize(kAllowedFeatures); ++i) { + allowed_features_.insert(kAllowedFeatures[i]); + } +} + // static void ClientSideDetectionService::SetBadSubnets(const ClientSideModel& model, BadSubnetMap* bad_subnets) { diff --git a/chrome/browser/safe_browsing/client_side_detection_service.h b/chrome/browser/safe_browsing/client_side_detection_service.h index 98a1fe2..002ced1 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.h +++ b/chrome/browser/safe_browsing/client_side_detection_service.h @@ -24,6 +24,7 @@ #include "base/basictypes.h" #include "base/callback_old.h" #include "base/gtest_prod_util.h" +#include "base/hash_tables.h" #include "base/memory/linked_ptr.h" #include "base/memory/ref_counted.h" #include "base/memory/scoped_ptr.h" @@ -36,6 +37,7 @@ #include "net/base/net_util.h" class RenderProcessHost; +class SafeBrowsingService; namespace base { class TimeDelta; @@ -166,6 +168,8 @@ class ClientSideDetectionService : public URLFetcher::Delegate, FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress); FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, ModelHasValidHashIds); + FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, + SanitizeRequestForPingback); // CacheState holds all information necessary to respond to a caller without // actually making a HTTP request. @@ -196,6 +200,14 @@ class ClientSideDetectionService : public URLFetcher::Delegate, static const base::TimeDelta kNegativeCacheInterval; static const base::TimeDelta kPositiveCacheInterval; + // Given a ClientSidePhishingRequest populated by the renderer and browser + // feature extractors, sanitizes it so that no data specifically identifying + // the URL or page content is included. This is used when sending a pingback + // if the user is not opted in to UMA. + void SanitizeRequestForPingback( + const ClientPhishingRequest& original_request, + ClientPhishingRequest* sanitized_request); + // Starts sending the request to the client-side detection frontends. // This method takes ownership of both pointers. void StartClientReportPhishingRequest( @@ -230,6 +242,10 @@ class ClientSideDetectionService : public URLFetcher::Delegate, // that we consider non-public IP addresses. Returns true on success. bool InitializePrivateNetworks(); + // Initializes the |allowed_features_| hash_set with the features that + // can be sent in sanitized pingbacks. + void InitializeAllowedFeatures(); + // Send the model to the given renderer. void SendModelToProcess(RenderProcessHost* process); @@ -256,6 +272,9 @@ class ClientSideDetectionService : public URLFetcher::Delegate, scoped_ptr<base::TimeDelta> model_max_age_; scoped_ptr<URLFetcher> model_fetcher_; + // This pointer may be NULL if SafeBrowsing is disabled. + scoped_refptr<SafeBrowsingService> sb_service_; + // Map of client report phishing request to the corresponding callback that // has to be invoked when the request is done. struct ClientReportInfo; @@ -284,6 +303,9 @@ class ClientSideDetectionService : public URLFetcher::Delegate, // The network blocks that we consider private IP address ranges. std::vector<AddressRange> private_networks_; + // Features which are allowed to be sent in sanitized pingbacks. + base::hash_set<std::string> allowed_features_; + // Map of bad subnets which are copied from the client model and put into // this map to speed up lookups. BadSubnetMap bad_subnets_; diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc index 760b2a8..dcf2076 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc @@ -10,11 +10,14 @@ #include "base/logging.h" #include "base/memory/scoped_ptr.h" #include "base/message_loop.h" +#include "base/stringprintf.h" #include "base/task.h" #include "base/time.h" +#include "chrome/browser/safe_browsing/browser_features.h" #include "chrome/browser/safe_browsing/client_side_detection_service.h" #include "chrome/common/safe_browsing/client_model.pb.h" #include "chrome/common/safe_browsing/csd.pb.h" +#include "chrome/renderer/safe_browsing/features.h" #include "chrome/test/base/testing_browser_process_test.h" #include "content/browser/browser_thread.h" #include "content/common/url_fetcher.h" @@ -163,6 +166,21 @@ class ClientSideDetectionServiceTest : public TestingBrowserProcessTest { EXPECT_TRUE(is_phishing); } + void AddFeature(const std::string& name, double value, + ClientPhishingRequest* request) { + ClientPhishingRequest_Feature* feature = request->add_feature_map(); + feature->set_name(name); + feature->set_value(value); + } + + void AddNonModelFeature(const std::string& name, double value, + ClientPhishingRequest* request) { + ClientPhishingRequest_Feature* feature = + request->add_non_model_feature_map(); + feature->set_name(name); + feature->set_value(value); + } + protected: scoped_ptr<ClientSideDetectionService> csd_service_; scoped_ptr<FakeURLFetcherFactory> factory_; @@ -646,4 +664,91 @@ TEST_F(ClientSideDetectionServiceTest, SetEnabled) { Mock::VerifyAndClearExpectations(service); } +TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) { + ClientPhishingRequest request; + request.set_url("http://www.us.host.com/blah"); + request.set_suffix_prefix_hash("hash"); + request.set_client_score(0.8f); + request.set_is_phishing(true); + AddFeature(std::string(features::kUrlTldToken) + "com", 1.0, &request); + AddFeature(std::string(features::kUrlDomainToken) + "host", 1.0, &request); + AddFeature(std::string(features::kUrlOtherHostToken) + "us", 1.0, &request); + AddFeature(std::string(features::kUrlOtherHostToken) + "www", 1.0, &request); + AddFeature(features::kUrlNumOtherHostTokensGTOne, 1.0, &request); + AddFeature(std::string(features::kUrlPathToken) + "blah", 1.0, &request); + AddFeature(features::kPageHasForms, 1.0, &request); + AddFeature(std::string(features::kPageTerm) + "term", 1.0, &request); + AddFeature(features::kPageImgOtherDomainFreq, 0.5, &request); + request.set_model_version(3); + AddNonModelFeature(features::kUrlHistoryVisitCount, 5.0, &request); + AddNonModelFeature(StringPrintf("%s=http://referrer.com/", + features::kReferrer), + 1.0, &request); + AddNonModelFeature(StringPrintf("%s%s=http://redirreferrer.com/", + features::kRedirectPrefix, + features::kReferrer), + 1.0, &request); + AddNonModelFeature(StringPrintf("%s%s=http://hostreferrer.com/", + features::kHostPrefix, features::kReferrer), + 1.0, &request); + AddNonModelFeature(StringPrintf("%s%s%s=http://hostredirreferrer.com/", + features::kHostPrefix, + features::kRedirectPrefix, + features::kReferrer), + 1.0, &request); + AddNonModelFeature(std::string(features::kBadIpFetch) + "1.2.3.4", + 1.0, &request); + AddNonModelFeature(std::string(features::kSafeBrowsingMaliciousUrl) + + "http://malicious.com/", 1.0, &request); + AddNonModelFeature(std::string(features::kSafeBrowsingOriginalUrl) + + "http://original.com/", 1.0, &request); + + csd_service_.reset(ClientSideDetectionService::Create(NULL)); + + ClientPhishingRequest sanitized_request; + csd_service_->SanitizeRequestForPingback(request, &sanitized_request); + + // For easier debugging, we'll check the output protobuf fields individually. + ClientPhishingRequest expected; + expected.set_suffix_prefix_hash(request.suffix_prefix_hash()); + expected.set_client_score(request.client_score()); + expected.set_is_phishing(request.is_phishing()); + AddFeature(features::kUrlNumOtherHostTokensGTOne, 1.0, &expected); + AddFeature(features::kPageHasForms, 1.0, &expected); + AddFeature(features::kPageImgOtherDomainFreq, 0.5, &expected); + expected.set_model_version(3); + AddNonModelFeature(features::kUrlHistoryVisitCount, 5.0, &expected); + + EXPECT_FALSE(sanitized_request.has_url()); + EXPECT_EQ(expected.suffix_prefix_hash(), + sanitized_request.suffix_prefix_hash()); + EXPECT_FLOAT_EQ(expected.client_score(), sanitized_request.client_score()); + EXPECT_EQ(expected.is_phishing(), sanitized_request.is_phishing()); + + ASSERT_EQ(expected.feature_map_size(), sanitized_request.feature_map_size()); + for (int i = 0; i < expected.feature_map_size(); ++i) { + EXPECT_EQ(expected.feature_map(i).name(), + sanitized_request.feature_map(i).name()) << "Feature " << i; + EXPECT_DOUBLE_EQ(expected.feature_map(i).value(), + sanitized_request.feature_map(i).value()) + << "Feature " << i; + } + EXPECT_EQ(expected.model_version(), sanitized_request.model_version()); + ASSERT_EQ(expected.non_model_feature_map_size(), + sanitized_request.non_model_feature_map_size()); + for (int i = 0; i < expected.non_model_feature_map_size(); ++i) { + EXPECT_EQ(expected.non_model_feature_map(i).name(), + sanitized_request.non_model_feature_map(i).name()) + << "Non-model feature " << i; + EXPECT_DOUBLE_EQ(expected.non_model_feature_map(i).value(), + sanitized_request.non_model_feature_map(i).value()) + << "Non-model feature " << i; + } + + // Also check the serialized forms in case there's a field that we forget + // to add above. + EXPECT_EQ(expected.SerializeAsString(), + sanitized_request.SerializeAsString()); +} + } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/safe_browsing_service.cc b/chrome/browser/safe_browsing/safe_browsing_service.cc index c8f1851..256b0e1 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service.cc +++ b/chrome/browser/safe_browsing/safe_browsing_service.cc @@ -169,7 +169,9 @@ SafeBrowsingService::SafeBrowsingService() #if !defined(OS_CHROMEOS) if (!CommandLine::ForCurrentProcess()->HasSwitch( switches::kDisableClientSidePhishingDetection) && - CanReportStats()) { + (CommandLine::ForCurrentProcess()->HasSwitch( + switches::kEnableSanitizedClientSidePhishingDetection) || + CanReportStats())) { csd_service_.reset( safe_browsing::ClientSideDetectionService::Create( g_browser_process->system_request_context())); @@ -899,7 +901,10 @@ void SafeBrowsingService::Start() { #else enable_csd_whitelist_ = (!cmdline->HasSwitch(switches::kDisableClientSidePhishingDetection) && - local_state && local_state->GetBoolean(prefs::kMetricsReportingEnabled)); + (cmdline->HasSwitch( + switches::kEnableSanitizedClientSidePhishingDetection) || + (local_state && + local_state->GetBoolean(prefs::kMetricsReportingEnabled)))); #endif BrowserThread::PostTask( diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index 5e8829d..ea93ea5 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -1892,6 +1892,8 @@ 'browser/safe_browsing/bloom_filter.h', 'browser/safe_browsing/browser_feature_extractor.cc', 'browser/safe_browsing/browser_feature_extractor.h', + 'browser/safe_browsing/browser_features.cc', + 'browser/safe_browsing/browser_features.h', 'browser/safe_browsing/chunk_range.cc', 'browser/safe_browsing/chunk_range.h', 'browser/safe_browsing/client_side_detection_host.cc', diff --git a/chrome/common/chrome_switches.cc b/chrome/common/chrome_switches.cc index e03bf0f..73187ef 100644 --- a/chrome/common/chrome_switches.cc +++ b/chrome/common/chrome_switches.cc @@ -497,6 +497,12 @@ const char kEnablePanels[] = "enable-panels"; // Enable speculative TCP/IP preconnection. const char kEnablePreconnect[] = "enable-preconnect"; +// Enables the sanitized version of client-side phishing detection, for use by +// non-UMA users. Any features containing portions of the URL or page content +// are not sent as part of the pingback in this mode. +const char kEnableSanitizedClientSidePhishingDetection[] = + "enable-sanitized-client-side-phishing-detection"; + // Enable the IsSearchProviderInstalled and InstallSearchProvider with an extra // parameter to indicate if the provider should be the default. const char kEnableSearchProviderApiV2[] = "enable-search-provider-api-v2"; diff --git a/chrome/common/chrome_switches.h b/chrome/common/chrome_switches.h index 4e7785e..7287dd9 100644 --- a/chrome/common/chrome_switches.h +++ b/chrome/common/chrome_switches.h @@ -144,6 +144,7 @@ extern const char kEnableOriginBoundCerts[]; extern const char kEnablePanels[]; extern const char kEnablePreconnect[]; extern const char kEnableResourceContentSettings[]; +extern const char kEnableSanitizedClientSidePhishingDetection[]; extern const char kEnableSearchProviderApiV2[]; extern const char kEnableShortcutsProvider[]; extern const char kEnableSmoothScrolling[]; diff --git a/chrome/common/safe_browsing/csd.proto b/chrome/common/safe_browsing/csd.proto index 983b79c..b700326 100644 --- a/chrome/common/safe_browsing/csd.proto +++ b/chrome/common/safe_browsing/csd.proto @@ -17,8 +17,13 @@ package safe_browsing; message ClientPhishingRequest { // URL that the client visited. The CGI parameters are stripped by the - // client. - required string url = 1; + // client. This field is ONLY set for UMA-enabled users. + optional string url = 1; + + // A 5-byte SHA-256 hash prefix of the URL, in SafeBrowsing host sufffix/path + // prefix form with query parameters stripped (i.e. "www.example.com/1/2/"). + // Unlike "url", this is sent for all users. + optional bytes suffix_prefix_hash = 10; // Score that was computed on the client. Value is between 0.0 and 1.0. // The larger the value the more likely the url is phishing. diff --git a/chrome/renderer/safe_browsing/features.h b/chrome/renderer/safe_browsing/features.h index 82370c1..a8067f7 100644 --- a/chrome/renderer/safe_browsing/features.h +++ b/chrome/renderer/safe_browsing/features.h @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -71,6 +71,10 @@ class FeatureMap { namespace features { // Constants for the various feature names that we use. +// +// IMPORTANT: when adding new features, you must update kAllowedFeatures in +// chrome/browser/safe_browsing/client_side_detection_service.cc if the feature +// should be sent in sanitized pingbacks. //////////////////////////////////////////////////// // URL host features |