diff options
author | Kristian Monsen <kristianm@google.com> | 2011-06-28 21:49:31 +0100 |
---|---|---|
committer | Kristian Monsen <kristianm@google.com> | 2011-07-08 17:55:00 +0100 |
commit | ddb351dbec246cf1fab5ec20d2d5520909041de1 (patch) | |
tree | 158e3fb57bdcac07c7f1e767fde3c70687c9fbb1 /chrome/browser/safe_browsing | |
parent | 6b92e04f5f151c896e3088e86f70db7081009308 (diff) | |
download | external_chromium-ddb351dbec246cf1fab5ec20d2d5520909041de1.zip external_chromium-ddb351dbec246cf1fab5ec20d2d5520909041de1.tar.gz external_chromium-ddb351dbec246cf1fab5ec20d2d5520909041de1.tar.bz2 |
Merge Chromium at r12.0.742.93: Initial merge by git
Change-Id: Ic5ee2fec31358bbee305f7e915442377bfa6cda6
Diffstat (limited to 'chrome/browser/safe_browsing')
39 files changed, 3326 insertions, 888 deletions
diff --git a/chrome/browser/safe_browsing/bloom_filter.h b/chrome/browser/safe_browsing/bloom_filter.h index 60cb4fc..76f5149 100644 --- a/chrome/browser/safe_browsing/bloom_filter.h +++ b/chrome/browser/safe_browsing/bloom_filter.h @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -21,8 +21,8 @@ #include <vector> #include "base/gtest_prod_util.h" -#include "base/ref_counted.h" -#include "base/scoped_ptr.h" +#include "base/memory/ref_counted.h" +#include "base/memory/scoped_ptr.h" #include "chrome/browser/safe_browsing/safe_browsing_util.h" class FilePath; diff --git a/chrome/browser/safe_browsing/bloom_filter_unittest.cc b/chrome/browser/safe_browsing/bloom_filter_unittest.cc index 97c1558..62c5f46 100644 --- a/chrome/browser/safe_browsing/bloom_filter_unittest.cc +++ b/chrome/browser/safe_browsing/bloom_filter_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -11,10 +11,11 @@ #include "base/file_util.h" #include "base/logging.h" +#include "base/memory/ref_counted.h" #include "base/path_service.h" #include "base/rand_util.h" -#include "base/ref_counted.h" #include "base/string_util.h" +#include "base/memory/scoped_temp_dir.h" #include "testing/gtest/include/gtest/gtest.h" namespace { @@ -89,11 +90,9 @@ TEST(SafeBrowsingBloomFilter, BloomFilterFile) { filter_write->Insert(GenHash()); // Remove any left over test filters and serialize. - FilePath filter_path; - PathService::Get(base::DIR_TEMP, &filter_path); - filter_path = filter_path.AppendASCII("SafeBrowsingTestFilter"); - file_util::Delete(filter_path, false); - ASSERT_FALSE(file_util::PathExists(filter_path)); + ScopedTempDir temp_dir; + ASSERT_TRUE(temp_dir.CreateUniqueTempDir()); + FilePath filter_path = temp_dir.path().AppendASCII("SafeBrowsingTestFilter"); ASSERT_TRUE(filter_write->WriteFile(filter_path)); // Create new empty filter and load from disk. @@ -111,6 +110,4 @@ TEST(SafeBrowsingBloomFilter, BloomFilterFile) { EXPECT_EQ(0, memcmp(filter_write->data(), filter_read->data(), filter_read->size())); - - file_util::Delete(filter_path, false); } diff --git a/chrome/browser/safe_browsing/client_side_detection_host.cc b/chrome/browser/safe_browsing/client_side_detection_host.cc index 09d3071..26d241a 100644 --- a/chrome/browser/safe_browsing/client_side_detection_host.cc +++ b/chrome/browser/safe_browsing/client_side_detection_host.cc @@ -8,23 +8,26 @@ #include "base/command_line.h" #include "base/logging.h" +#include "base/memory/ref_counted.h" +#include "base/memory/scoped_ptr.h" #include "base/metrics/histogram.h" #include "base/task.h" #include "chrome/browser/browser_process.h" +#include "chrome/browser/profiles/profile.h" #include "chrome/browser/safe_browsing/client_side_detection_service.h" #include "chrome/browser/safe_browsing/safe_browsing_service.h" -#include "chrome/common/safebrowsing_messages.h" #include "chrome/common/chrome_switches.h" -#include "chrome/common/notification_service.h" -#include "chrome/common/notification_type.h" -#include "chrome/common/render_messages.h" -#include "chrome/common/render_messages_params.h" +#include "chrome/common/safe_browsing/csd.pb.h" +#include "chrome/common/safe_browsing/safebrowsing_messages.h" #include "content/browser/browser_thread.h" #include "content/browser/renderer_host/render_process_host.h" #include "content/browser/renderer_host/render_view_host.h" #include "content/browser/renderer_host/resource_dispatcher_host.h" #include "content/browser/tab_contents/navigation_controller.h" #include "content/browser/tab_contents/tab_contents.h" +#include "content/common/notification_service.h" +#include "content/common/notification_type.h" +#include "content/common/view_messages.h" #include "googleurl/src/gurl.h" namespace safe_browsing { @@ -32,52 +35,50 @@ namespace safe_browsing { // This class is instantiated each time a new toplevel URL loads, and // asynchronously checks whether the phishing classifier should run for this // URL. If so, it notifies the renderer with a StartPhishingDetection IPC. +// Objects of this class are ref-counted and will be destroyed once nobody +// uses it anymore. If |tab_contents|, |csd_service| or |host| go away you need +// to call Cancel(). We keep the |sb_service| alive in a ref pointer for as +// long as it takes. class ClientSideDetectionHost::ShouldClassifyUrlRequest - : public NotificationObserver { + : public base::RefCountedThreadSafe< + ClientSideDetectionHost::ShouldClassifyUrlRequest> { public: ShouldClassifyUrlRequest(const ViewHostMsg_FrameNavigate_Params& params, TabContents* tab_contents, - ClientSideDetectionService* service) - : params_(params), + ClientSideDetectionService* csd_service, + SafeBrowsingService* sb_service, + ClientSideDetectionHost* host) + : canceled_(false), + params_(params), tab_contents_(tab_contents), - service_(service), - ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { + csd_service_(csd_service), + sb_service_(sb_service), + host_(host) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); DCHECK(tab_contents_); - DCHECK(service_); - registrar_.Add(this, - NotificationType::TAB_CONTENTS_DESTROYED, - Source<TabContents>(tab_contents)); - } - - virtual void Observe(NotificationType type, - const NotificationSource& source, - const NotificationDetails& details) { - switch (type.value) { - case NotificationType::TAB_CONTENTS_DESTROYED: - Finish(false); - break; - default: - NOTREACHED(); - }; + DCHECK(csd_service_); + DCHECK(sb_service_); + DCHECK(host_); } void Start() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); - // For consistency, always run the pre-classification checks - // asynchronously. - BrowserThread::PostTask(BrowserThread::UI, - FROM_HERE, - method_factory_.NewRunnableMethod( - &ShouldClassifyUrlRequest::Run)); - } + // We start by doing some simple checks that can run on the UI thread. + UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1); - private: - // This object always deletes itself, so make the destructor private. - virtual ~ShouldClassifyUrlRequest() {} + // Only classify [X]HTML documents. + if (params_.contents_mime_type != "text/html" && + params_.contents_mime_type != "application/xhtml+xml") { + VLOG(1) << "Skipping phishing classification for URL: " << params_.url + << " because it has an unsupported MIME type: " + << params_.contents_mime_type; + UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", + NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, + NO_CLASSIFY_MAX); + return; + } - void Run() { // Don't run the phishing classifier if the URL came from a private // network, since we don't want to ping back in this case. We also need // to check whether the connection was proxied -- if so, we won't have the @@ -85,37 +86,142 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest if (params_.was_fetched_via_proxy) { VLOG(1) << "Skipping phishing classification for URL: " << params_.url << " because it was fetched via a proxy."; - UMA_HISTOGRAM_COUNTS("SBClientPhishing.NoClassifyProxyFetch", 1); - Finish(false); + UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", + NO_CLASSIFY_PROXY_FETCH, + NO_CLASSIFY_MAX); return; } - if (service_->IsPrivateIPAddress(params_.socket_address.host())) { + if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) { VLOG(1) << "Skipping phishing classification for URL: " << params_.url << " because of hosting on private IP: " << params_.socket_address.host(); - UMA_HISTOGRAM_COUNTS("SBClientPhishing.NoClassifyPrivateIP", 1); - Finish(false); + UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", + NO_CLASSIFY_PRIVATE_IP, + NO_CLASSIFY_MAX); + return; + } + + // Don't run the phishing classifier if the tab is incognito. + if (tab_contents_->profile()->IsOffTheRecord()) { + VLOG(1) << "Skipping phishing classification for URL: " << params_.url + << " because we're browsing incognito."; + UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", + NO_CLASSIFY_OFF_THE_RECORD, + NO_CLASSIFY_MAX); + + return; + } + + // We lookup the csd-whitelist before we lookup the cache because + // a URL may have recently been whitelisted. If the URL matches + // the csd-whitelist we won't start classification. The + // csd-whitelist check has to be done on the IO thread because it + // uses the SafeBrowsing service class. + BrowserThread::PostTask( + BrowserThread::IO, + FROM_HERE, + NewRunnableMethod(this, + &ShouldClassifyUrlRequest::CheckCsdWhitelist, + params_.url)); + } + + void Cancel() { + canceled_ = true; + // Just to make sure we don't do anything stupid we reset all these + // pointers except for the safebrowsing service class which may be + // accessed by CheckCsdWhitelist(). + tab_contents_ = NULL; + csd_service_ = NULL; + host_ = NULL; + } + + private: + friend class base::RefCountedThreadSafe< + ClientSideDetectionHost::ShouldClassifyUrlRequest>; + + // Enum used to keep stats about why the pre-classification check failed. + enum PreClassificationCheckFailures { + NO_CLASSIFY_PROXY_FETCH, + NO_CLASSIFY_PRIVATE_IP, + NO_CLASSIFY_OFF_THE_RECORD, + NO_CLASSIFY_MATCH_CSD_WHITELIST, + NO_CLASSIFY_TOO_MANY_REPORTS, + NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, + + NO_CLASSIFY_MAX // Always add new values before this one. + }; + + // The destructor can be called either from the UI or the IO thread. + virtual ~ShouldClassifyUrlRequest() { } + + void CheckCsdWhitelist(const GURL& url) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!sb_service_ || sb_service_->MatchCsdWhitelistUrl(url)) { + // We're done. There is no point in going back to the UI thread. + UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", + NO_CLASSIFY_MATCH_CSD_WHITELIST, + NO_CLASSIFY_MAX); return; } - Finish(true); + BrowserThread::PostTask( + BrowserThread::UI, + FROM_HERE, + NewRunnableMethod(this, + &ShouldClassifyUrlRequest::CheckCache)); } - void Finish(bool should_classify) { + void CheckCache() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); - if (should_classify) { - RenderViewHost* rvh = tab_contents_->render_view_host(); - rvh->Send(new ViewMsg_StartPhishingDetection( - rvh->routing_id(), params_.url)); + if (canceled_) { + return; } - delete this; + + // If result is cached, we don't want to run classification again + bool is_phishing; + if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) { + VLOG(1) << "Satisfying request for " << params_.url << " from cache"; + UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); + // Since we are already on the UI thread, this is safe. + host_->MaybeShowPhishingWarning(params_.url, is_phishing); + return; + } + + // We want to limit the number of requests, though we will ignore the + // limit for urls in the cache. We don't want to start classifying + // too many pages as phishing, but for those that we already think are + // phishing we want to give ourselves a chance to fix false positives. + if (csd_service_->IsInCache(params_.url)) { + VLOG(1) << "Reporting limit skipped for " << params_.url + << " as it was in the cache."; + UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1); + } else if (csd_service_->OverReportLimit()) { + VLOG(1) << "Too many report phishing requests sent recently, " + << "not running classification for " << params_.url; + UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", + NO_CLASSIFY_TOO_MANY_REPORTS, + NO_CLASSIFY_MAX); + return; + } + + // Everything checks out, so start classification. + // |tab_contents_| is safe to call as we will be destructed + // before it is. + RenderViewHost* rvh = tab_contents_->render_view_host(); + rvh->Send(new SafeBrowsingMsg_StartPhishingDetection( + rvh->routing_id(), params_.url)); } + // No need to protect |canceled_| with a lock because it is only read and + // written by the UI thread. + bool canceled_; ViewHostMsg_FrameNavigate_Params params_; TabContents* tab_contents_; - ClientSideDetectionService* service_; - NotificationRegistrar registrar_; - ScopedRunnableMethodFactory<ShouldClassifyUrlRequest> method_factory_; + ClientSideDetectionService* csd_service_; + // We keep a ref pointer here just to make sure the service class stays alive + // long enough. + scoped_refptr<SafeBrowsingService> sb_service_; + ClientSideDetectionHost* host_; DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest); }; @@ -148,10 +254,10 @@ class CsdClient : public SafeBrowsingService::Client { ClientSideDetectionHost::ClientSideDetectionHost(TabContents* tab) : TabContentsObserver(tab), - service_(g_browser_process->safe_browsing_detection_service()), + csd_service_(g_browser_process->safe_browsing_detection_service()), cb_factory_(ALLOW_THIS_IN_INITIALIZER_LIST(this)) { DCHECK(tab); - // Note: service_ and sb_service_ might be NULL. + // Note: csd_service_ and sb_service_ might be NULL. ResourceDispatcherHost* resource = g_browser_process->resource_dispatcher_host(); if (resource) { @@ -160,12 +266,16 @@ ClientSideDetectionHost::ClientSideDetectionHost(TabContents* tab) } ClientSideDetectionHost::~ClientSideDetectionHost() { + // Tell any pending classification request that it is being canceled. + if (classification_request_.get()) { + classification_request_->Cancel(); + } } bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) { bool handled = true; IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message) - IPC_MESSAGE_HANDLER(SafeBrowsingDetectionHostMsg_DetectedPhishingSite, + IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_DetectedPhishingSite, OnDetectedPhishingSite) IPC_MESSAGE_UNHANDLED(handled = false) IPC_END_MESSAGE_MAP() @@ -193,28 +303,40 @@ void ClientSideDetectionHost::DidNavigateMainFramePostCommit( // interstial. cb_factory_.RevokeAll(); - if (service_) { + if (csd_service_) { + // Cancel any pending classification request. + if (classification_request_.get()) { + classification_request_->Cancel(); + } + // Notify the renderer if it should classify this URL. - ShouldClassifyUrlRequest* request = - new ShouldClassifyUrlRequest(params, tab_contents(), service_); - request->Start(); + classification_request_ = new ShouldClassifyUrlRequest(params, + tab_contents(), + csd_service_, + sb_service_, + this); + classification_request_->Start(); } } -void ClientSideDetectionHost::OnDetectedPhishingSite(const GURL& phishing_url, - double phishing_score) { +void ClientSideDetectionHost::OnDetectedPhishingSite( + const std::string& verdict_str) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); // There is something seriously wrong if there is no service class but // this method is called. The renderer should not start phishing detection // if there isn't any service class in the browser. - DCHECK(service_); - if (service_) { + DCHECK(csd_service_); + // We parse the protocol buffer here. If we're unable to parse it we won't + // send the verdict further. + scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest); + if (csd_service_ && + verdict->ParseFromString(verdict_str) && + verdict->IsInitialized()) { // There shouldn't be any pending requests because we revoke them everytime // we navigate away. DCHECK(!cb_factory_.HasPendingCallbacks()); - service_->SendClientReportPhishingRequest( - phishing_url, - phishing_score, + csd_service_->SendClientReportPhishingRequest( + verdict.release(), // The service takes ownership of the verdict. cb_factory_.NewCallback( &ClientSideDetectionHost::MaybeShowPhishingWarning)); } @@ -257,7 +379,7 @@ void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url, void ClientSideDetectionHost::set_client_side_detection_service( ClientSideDetectionService* service) { - service_ = service; + csd_service_ = service; } void ClientSideDetectionHost::set_safe_browsing_service( diff --git a/chrome/browser/safe_browsing/client_side_detection_host.h b/chrome/browser/safe_browsing/client_side_detection_host.h index 5b38872..01cab97 100644 --- a/chrome/browser/safe_browsing/client_side_detection_host.h +++ b/chrome/browser/safe_browsing/client_side_detection_host.h @@ -7,8 +7,9 @@ #pragma once #include "base/basictypes.h" -#include "base/ref_counted.h" -#include "base/scoped_callback_factory.h" +#include "base/memory/ref_counted.h" +#include "base/memory/scoped_callback_factory.h" +#include "base/memory/scoped_ptr.h" #include "chrome/browser/safe_browsing/safe_browsing_service.h" #include "content/browser/tab_contents/tab_contents_observer.h" #include "googleurl/src/gurl.h" @@ -44,12 +45,14 @@ class ClientSideDetectionHost : public TabContentsObserver { private: friend class ClientSideDetectionHostTest; class ShouldClassifyUrlRequest; + friend class ShouldClassifyUrlRequest; - void OnDetectedPhishingSite(const GURL& phishing_url, double phishing_score); + // Verdict is an encoded ClientPhishingRequest protocol message. + void OnDetectedPhishingSite(const std::string& verdict); // Callback that is called when the server ping back is // done. Display an interstitial if |is_phishing| is true. - // Otherwise, we do nothgin. Called in UI thread. + // Otherwise, we do nothing. Called in UI thread. void MaybeShowPhishingWarning(GURL phishing_url, bool is_phishing); // Used for testing. This function does not take ownership of the service @@ -61,9 +64,12 @@ class ClientSideDetectionHost : public TabContentsObserver { void set_safe_browsing_service(SafeBrowsingService* service); // This pointer may be NULL if client-side phishing detection is disabled. - ClientSideDetectionService* service_; + ClientSideDetectionService* csd_service_; // This pointer may be NULL if SafeBrowsing is disabled. scoped_refptr<SafeBrowsingService> sb_service_; + // Keep a handle to the latest classification request so that we can cancel + // it if necessary. + scoped_refptr<ShouldClassifyUrlRequest> classification_request_; base::ScopedCallbackFactory<ClientSideDetectionHost> cb_factory_; diff --git a/chrome/browser/safe_browsing/client_side_detection_host_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_host_unittest.cc index 22cee1b..7a76bbd 100644 --- a/chrome/browser/safe_browsing/client_side_detection_host_unittest.cc +++ b/chrome/browser/safe_browsing/client_side_detection_host_unittest.cc @@ -4,15 +4,17 @@ #include "base/command_line.h" #include "base/file_path.h" -#include "base/ref_counted.h" -#include "base/scoped_ptr.h" -#include "base/scoped_temp_dir.h" +#include "base/memory/ref_counted.h" +#include "base/memory/scoped_ptr.h" +#include "base/memory/scoped_temp_dir.h" #include "base/task.h" #include "chrome/browser/safe_browsing/client_side_detection_host.h" #include "chrome/browser/safe_browsing/client_side_detection_service.h" #include "chrome/browser/safe_browsing/safe_browsing_service.h" #include "chrome/common/chrome_switches.h" -#include "chrome/common/render_messages.h" +#include "chrome/common/safe_browsing/csd.pb.h" +#include "chrome/common/safe_browsing/safebrowsing_messages.h" +#include "chrome/test/testing_profile.h" #include "chrome/test/ui_test_utils.h" #include "content/browser/browser_thread.h" #include "content/browser/renderer_host/test_render_view_host.h" @@ -20,25 +22,45 @@ #include "googleurl/src/gurl.h" #include "ipc/ipc_test_sink.h" #include "testing/gmock/include/gmock/gmock.h" -#include "testing/gmock/include/gmock/gmock-spec-builders.h" +#include "testing/gtest/include/gtest/gtest.h" using ::testing::_; +using ::testing::DeleteArg; using ::testing::DoAll; +using ::testing::Eq; using ::testing::Mock; +using ::testing::NiceMock; +using ::testing::NotNull; +using ::testing::Pointee; using ::testing::Return; using ::testing::SaveArg; +using ::testing::SetArgumentPointee; +using ::testing::StrictMock; + +namespace { +const bool kFalse = false; +const bool kTrue = true; +} namespace safe_browsing { +MATCHER_P(EqualsProto, other, "") { + return other.SerializeAsString() == arg.SerializeAsString(); +} + class MockClientSideDetectionService : public ClientSideDetectionService { public: explicit MockClientSideDetectionService(const FilePath& model_path) : ClientSideDetectionService(model_path, NULL) {} virtual ~MockClientSideDetectionService() {}; - MOCK_METHOD3(SendClientReportPhishingRequest, - void(const GURL&, double, ClientReportPhishingRequestCallback*)); + MOCK_METHOD2(SendClientReportPhishingRequest, + void(ClientPhishingRequest*, + ClientReportPhishingRequestCallback*)); MOCK_CONST_METHOD1(IsPrivateIPAddress, bool(const std::string&)); + MOCK_METHOD2(GetValidCachedResult, bool(const GURL&, bool*)); + MOCK_METHOD1(IsInCache, bool(const GURL&)); + MOCK_METHOD0(OverReportLimit, bool()); private: DISALLOW_COPY_AND_ASSIGN(MockClientSideDetectionService); @@ -52,6 +74,7 @@ class MockSafeBrowsingService : public SafeBrowsingService { MOCK_METHOD8(DisplayBlockingPage, void(const GURL&, const GURL&, const std::vector<GURL>&, ResourceType::Type, UrlCheckResult, Client*, int, int)); + MOCK_METHOD1(MatchCsdWhitelistUrl, bool(const GURL&)); // Helper function which calls OnBlockingPageComplete for this client // object. @@ -67,6 +90,14 @@ class MockSafeBrowsingService : public SafeBrowsingService { DISALLOW_COPY_AND_ASSIGN(MockSafeBrowsingService); }; +class MockTestingProfile : public TestingProfile { + public: + MockTestingProfile() {} + virtual ~MockTestingProfile() {} + + MOCK_METHOD0(IsOffTheRecord, bool()); +}; + // Helper function which quits the UI message loop from the IO message loop. void QuitUIMessageLoop() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); @@ -78,6 +109,12 @@ void QuitUIMessageLoop() { class ClientSideDetectionHostTest : public RenderViewHostTestHarness { public: virtual void SetUp() { + // Set custom profile object so that we can mock calls to IsOffTheRecord. + // This needs to happen before we call the parent SetUp() function. We use + // a nice mock because other parts of the code are calling IsOffTheRecord. + mock_profile_ = new NiceMock<MockTestingProfile>(); + profile_.reset(mock_profile_); + RenderViewHostTestHarness::SetUp(); ui_thread_.reset(new BrowserThread(BrowserThread::UI, &message_loop_)); // Note: we're starting a real IO thread to make sure our DCHECKs that @@ -90,21 +127,28 @@ class ClientSideDetectionHostTest : public RenderViewHostTestHarness { ASSERT_TRUE(tmp_dir.CreateUniqueTempDir()); FilePath model_path = tmp_dir.path().AppendASCII("model"); - csd_service_.reset(new MockClientSideDetectionService(model_path)); - sb_service_ = new MockSafeBrowsingService(); + csd_service_.reset(new StrictMock<MockClientSideDetectionService>( + model_path)); + sb_service_ = new StrictMock<MockSafeBrowsingService>(); csd_host_ = contents()->safebrowsing_detection_host(); csd_host_->set_client_side_detection_service(csd_service_.get()); csd_host_->set_safe_browsing_service(sb_service_.get()); + + // Save command-line so that it can be restored for every test. + original_cmd_line_.reset( + new CommandLine(*CommandLine::ForCurrentProcess())); } virtual void TearDown() { io_thread_.reset(); ui_thread_.reset(); RenderViewHostTestHarness::TearDown(); + // Restore the command-line like it was before we ran the test. + *CommandLine::ForCurrentProcess() = *original_cmd_line_; } - void OnDetectedPhishingSite(const GURL& phishing_url, double phishing_score) { - csd_host_->OnDetectedPhishingSite(phishing_url, phishing_score); + void OnDetectedPhishingSite(const std::string& verdict_str) { + csd_host_->OnDetectedPhishingSite(verdict_str); } void FlushIOMessageLoop() { @@ -117,33 +161,89 @@ class ClientSideDetectionHostTest : public RenderViewHostTestHarness { MessageLoop::current()->Run(); } + void ExpectPreClassificationChecks(const GURL& url, + const bool* is_private, + const bool* is_incognito, + const bool* match_csd_whitelist, + const bool* get_valid_cached_result, + const bool* is_in_cache, + const bool* over_report_limit) { + if (is_private) { + EXPECT_CALL(*csd_service_, IsPrivateIPAddress(_)) + .WillOnce(Return(*is_private)); + } + if (is_incognito) { + EXPECT_CALL(*mock_profile_, IsOffTheRecord()) + .WillRepeatedly(Return(*is_incognito)); + } + if (match_csd_whitelist) { + EXPECT_CALL(*sb_service_, MatchCsdWhitelistUrl(url)) + .WillOnce(Return(*match_csd_whitelist)); + } + if (get_valid_cached_result) { + EXPECT_CALL(*csd_service_, GetValidCachedResult(url, NotNull())) + .WillOnce(DoAll(SetArgumentPointee<1>(true), + Return(*get_valid_cached_result))); + } + if (is_in_cache) { + EXPECT_CALL(*csd_service_, IsInCache(url)).WillOnce(Return(*is_in_cache)); + } + if (over_report_limit) { + EXPECT_CALL(*csd_service_, OverReportLimit()) + .WillOnce(Return(*over_report_limit)); + } + } + + void WaitAndCheckPreClassificationChecks() { + // Wait for CheckCsdWhitelist to be called if at all. + FlushIOMessageLoop(); + // Checks for CheckCache() to be called if at all. + MessageLoop::current()->RunAllPending(); + EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); + EXPECT_TRUE(Mock::VerifyAndClear(sb_service_.get())); + EXPECT_TRUE(Mock::VerifyAndClear(mock_profile_)); + } + protected: ClientSideDetectionHost* csd_host_; - scoped_ptr<MockClientSideDetectionService> csd_service_; - scoped_refptr<MockSafeBrowsingService> sb_service_; + scoped_ptr<StrictMock<MockClientSideDetectionService> > csd_service_; + scoped_refptr<StrictMock<MockSafeBrowsingService> > sb_service_; + MockTestingProfile* mock_profile_; // We don't own this object private: scoped_ptr<BrowserThread> ui_thread_; scoped_ptr<BrowserThread> io_thread_; + scoped_ptr<CommandLine> original_cmd_line_; }; +TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteInvalidVerdict) { + // Case 0: renderer sends an invalid verdict string that we're unable to + // parse. + EXPECT_CALL(*csd_service_, SendClientReportPhishingRequest(_, _)).Times(0); + OnDetectedPhishingSite("Invalid Protocol Buffer"); + EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); +} + TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteNotPhishing) { // Case 1: client thinks the page is phishing. The server does not agree. // No interstitial is shown. ClientSideDetectionService::ClientReportPhishingRequestCallback* cb; - GURL phishing_url("http://phishingurl.com/"); + ClientPhishingRequest verdict; + verdict.set_url("http://phishingurl.com/"); + verdict.set_client_score(1.0f); + verdict.set_is_phishing(true); EXPECT_CALL(*csd_service_, - SendClientReportPhishingRequest(phishing_url, 1.0, _)) - .WillOnce(SaveArg<2>(&cb)); - OnDetectedPhishingSite(phishing_url, 1.0); + SendClientReportPhishingRequest(Pointee(EqualsProto(verdict)), _)) + .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb))); + OnDetectedPhishingSite(verdict.SerializeAsString()); EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); - ASSERT_TRUE(cb != NULL); + ASSERT_TRUE(cb); // Make sure DisplayBlockingPage is not going to be called. EXPECT_CALL(*sb_service_, DisplayBlockingPage(_, _, _, _, _, _, _, _)).Times(0); - cb->Run(phishing_url, false); + cb->Run(GURL(verdict.url()), false); delete cb; // If there was a message posted on the IO thread to display the // interstitial page we know that it would have been posted before @@ -159,19 +259,22 @@ TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteDisabled) { // Case 2: client thinks the page is phishing and so does the server but // showing the interstitial is disabled => no interstitial is shown. ClientSideDetectionService::ClientReportPhishingRequestCallback* cb; - GURL phishing_url("http://phishingurl.com/"); + ClientPhishingRequest verdict; + verdict.set_url("http://phishingurl.com/"); + verdict.set_client_score(1.0f); + verdict.set_is_phishing(true); EXPECT_CALL(*csd_service_, - SendClientReportPhishingRequest(phishing_url, 1.0, _)) - .WillOnce(SaveArg<2>(&cb)); - OnDetectedPhishingSite(phishing_url, 1.0); + SendClientReportPhishingRequest(Pointee(EqualsProto(verdict)), _)) + .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb))); + OnDetectedPhishingSite(verdict.SerializeAsString()); EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); - ASSERT_TRUE(cb != NULL); + ASSERT_TRUE(cb); // Make sure DisplayBlockingPage is not going to be called. EXPECT_CALL(*sb_service_, DisplayBlockingPage(_, _, _, _, _, _, _, _)).Times(0); - cb->Run(phishing_url, false); + cb->Run(GURL(verdict.url()), false); delete cb; FlushIOMessageLoop(); @@ -183,16 +286,20 @@ TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteShowInterstitial) { // We show an interstitial. ClientSideDetectionService::ClientReportPhishingRequestCallback* cb; GURL phishing_url("http://phishingurl.com/"); + ClientPhishingRequest verdict; + verdict.set_url(phishing_url.spec()); + verdict.set_client_score(1.0f); + verdict.set_is_phishing(true); CommandLine::ForCurrentProcess()->AppendSwitch( switches::kEnableClientSidePhishingInterstitial); EXPECT_CALL(*csd_service_, - SendClientReportPhishingRequest(phishing_url, 1.0, _)) - .WillOnce(SaveArg<2>(&cb)); - OnDetectedPhishingSite(phishing_url, 1.0); + SendClientReportPhishingRequest(Pointee(EqualsProto(verdict)), _)) + .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb))); + OnDetectedPhishingSite(verdict.SerializeAsString()); EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); - ASSERT_TRUE(cb != NULL); + ASSERT_TRUE(cb); SafeBrowsingService::Client* client; EXPECT_CALL(*sb_service_, @@ -235,28 +342,35 @@ TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteMultiplePings) { // a single interstitial is shown for the second URL. ClientSideDetectionService::ClientReportPhishingRequestCallback* cb; GURL phishing_url("http://phishingurl.com/"); + ClientPhishingRequest verdict; + verdict.set_url(phishing_url.spec()); + verdict.set_client_score(1.0f); + verdict.set_is_phishing(true); CommandLine::ForCurrentProcess()->AppendSwitch( switches::kEnableClientSidePhishingInterstitial); EXPECT_CALL(*csd_service_, - SendClientReportPhishingRequest(phishing_url, 1.0, _)) - .WillOnce(SaveArg<2>(&cb)); - OnDetectedPhishingSite(phishing_url, 1.0); + SendClientReportPhishingRequest(Pointee(EqualsProto(verdict)), _)) + .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb))); + OnDetectedPhishingSite(verdict.SerializeAsString()); EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); - ASSERT_TRUE(cb != NULL); + ASSERT_TRUE(cb); GURL other_phishing_url("http://other_phishing_url.com/bla"); - EXPECT_CALL(*csd_service_, IsPrivateIPAddress(_)).WillOnce(Return(false)); + ExpectPreClassificationChecks(other_phishing_url, &kFalse, &kFalse, &kFalse, + &kFalse, &kFalse, &kFalse); // We navigate away. The callback cb should be revoked. NavigateAndCommit(other_phishing_url); // Wait for the pre-classification checks to finish for other_phishing_url. - FlushIOMessageLoop(); + WaitAndCheckPreClassificationChecks(); ClientSideDetectionService::ClientReportPhishingRequestCallback* cb_other; + verdict.set_url(other_phishing_url.spec()); + verdict.set_client_score(0.8f); EXPECT_CALL(*csd_service_, - SendClientReportPhishingRequest(other_phishing_url, 0.8, _)) - .WillOnce(SaveArg<2>(&cb_other)); - OnDetectedPhishingSite(other_phishing_url, 0.8); + SendClientReportPhishingRequest(Pointee(EqualsProto(verdict)), _)) + .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb_other))); + OnDetectedPhishingSite(verdict.SerializeAsString()); EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); ASSERT_TRUE(cb_other); @@ -300,58 +414,189 @@ TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteMultiplePings) { FlushIOMessageLoop(); } +TEST_F(ClientSideDetectionHostTest, NavigationCancelsShouldClassifyUrl) { + // Test that canceling pending should classify requests works as expected. + + GURL first_url("http://first.phishy.url.com"); + // The proxy checks is done synchronously so check that it has been done + // for the first URL. + ExpectPreClassificationChecks(first_url, &kFalse, &kFalse, &kFalse, NULL, + NULL, NULL); + NavigateAndCommit(first_url); + + // Don't flush the message loop, as we want to navigate to a different + // url before the final pre-classification checks are run. + GURL second_url("http://second.url.com/"); + ExpectPreClassificationChecks(second_url, &kFalse, &kFalse, &kFalse, &kFalse, + &kFalse, &kFalse); + NavigateAndCommit(second_url); + WaitAndCheckPreClassificationChecks(); +} + TEST_F(ClientSideDetectionHostTest, ShouldClassifyUrl) { // Navigate the tab to a page. We should see a StartPhishingDetection IPC. - EXPECT_CALL(*csd_service_, IsPrivateIPAddress(_)).WillOnce(Return(false)); - NavigateAndCommit(GURL("http://host.com/")); - FlushIOMessageLoop(); + GURL url("http://host.com/"); + ExpectPreClassificationChecks(url, &kFalse, &kFalse, &kFalse, &kFalse, + &kFalse, &kFalse); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); + const IPC::Message* msg = process()->sink().GetFirstMessageMatching( - ViewMsg_StartPhishingDetection::ID); + SafeBrowsingMsg_StartPhishingDetection::ID); ASSERT_TRUE(msg); - Tuple1<GURL> url; - ViewMsg_StartPhishingDetection::Read(msg, &url); - EXPECT_EQ(GURL("http://host.com/"), url.a); + Tuple1<GURL> actual_url; + SafeBrowsingMsg_StartPhishingDetection::Read(msg, &actual_url); + EXPECT_EQ(url, actual_url.a); EXPECT_EQ(rvh()->routing_id(), msg->routing_id()); process()->sink().ClearMessages(); // Now try an in-page navigation. This should not trigger an IPC. EXPECT_CALL(*csd_service_, IsPrivateIPAddress(_)).Times(0); - NavigateAndCommit(GURL("http://host.com/#foo")); - FlushIOMessageLoop(); + url = GURL("http://host.com/#foo"); + ExpectPreClassificationChecks(url, NULL, NULL, NULL, NULL, NULL, NULL); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); + msg = process()->sink().GetFirstMessageMatching( - ViewMsg_StartPhishingDetection::ID); + SafeBrowsingMsg_StartPhishingDetection::ID); ASSERT_FALSE(msg); + // Check that XHTML is supported, in addition to the default HTML type. + // Note: for this test to work correctly, the new URL must be on the + // same domain as the previous URL, otherwise it will create a new + // RenderViewHost that won't have the mime type set. + url = GURL("http://host.com/xhtml"); + rvh()->set_contents_mime_type("application/xhtml+xml"); + ExpectPreClassificationChecks(url, &kFalse, &kFalse, &kFalse, &kFalse, + &kFalse, &kFalse); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); + msg = process()->sink().GetFirstMessageMatching( + SafeBrowsingMsg_StartPhishingDetection::ID); + ASSERT_TRUE(msg); + SafeBrowsingMsg_StartPhishingDetection::Read(msg, &actual_url); + EXPECT_EQ(url, actual_url.a); + EXPECT_EQ(rvh()->routing_id(), msg->routing_id()); + process()->sink().ClearMessages(); + // Navigate to a new host, which should cause another IPC. - EXPECT_CALL(*csd_service_, IsPrivateIPAddress(_)).WillOnce(Return(false)); - NavigateAndCommit(GURL("http://host2.com/")); - FlushIOMessageLoop(); + url = GURL("http://host2.com/"); + ExpectPreClassificationChecks(url, &kFalse, &kFalse, &kFalse, &kFalse, + &kFalse, &kFalse); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); msg = process()->sink().GetFirstMessageMatching( - ViewMsg_StartPhishingDetection::ID); + SafeBrowsingMsg_StartPhishingDetection::ID); ASSERT_TRUE(msg); - ViewMsg_StartPhishingDetection::Read(msg, &url); - EXPECT_EQ(GURL("http://host2.com/"), url.a); + SafeBrowsingMsg_StartPhishingDetection::Read(msg, &actual_url); + EXPECT_EQ(url, actual_url.a); EXPECT_EQ(rvh()->routing_id(), msg->routing_id()); process()->sink().ClearMessages(); + // If the mime type is not one that we support, no IPC should be triggered. + // Note: for this test to work correctly, the new URL must be on the + // same domain as the previous URL, otherwise it will create a new + // RenderViewHost that won't have the mime type set. + url = GURL("http://host2.com/image.jpg"); + rvh()->set_contents_mime_type("image/jpeg"); + ExpectPreClassificationChecks(url, NULL, NULL, NULL, NULL, NULL, NULL); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); + msg = process()->sink().GetFirstMessageMatching( + SafeBrowsingMsg_StartPhishingDetection::ID); + ASSERT_FALSE(msg); + // If IsPrivateIPAddress returns true, no IPC should be triggered. - EXPECT_CALL(*csd_service_, IsPrivateIPAddress(_)).WillOnce(Return(true)); - NavigateAndCommit(GURL("http://host3.com/")); - FlushIOMessageLoop(); + url = GURL("http://host3.com/"); + ExpectPreClassificationChecks(url, &kTrue, NULL, NULL, NULL, NULL, NULL); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); msg = process()->sink().GetFirstMessageMatching( - ViewMsg_StartPhishingDetection::ID); + SafeBrowsingMsg_StartPhishingDetection::ID); ASSERT_FALSE(msg); // If the connection is proxied, no IPC should be triggered. // Note: for this test to work correctly, the new URL must be on the // same domain as the previous URL, otherwise it will create a new // RenderViewHost that won't have simulate_fetch_via_proxy set. + url = GURL("http://host3.com/abc"); rvh()->set_simulate_fetch_via_proxy(true); - EXPECT_CALL(*csd_service_, IsPrivateIPAddress(_)).Times(0); - NavigateAndCommit(GURL("http://host3.com/abc")); + ExpectPreClassificationChecks(url, NULL, NULL, NULL, NULL, NULL, NULL); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); + msg = process()->sink().GetFirstMessageMatching( + SafeBrowsingMsg_StartPhishingDetection::ID); + ASSERT_FALSE(msg); + + // If the tab is incognito there should be no IPC. Also, we shouldn't + // even check the csd-whitelist. + url = GURL("http://host4.com/"); + ExpectPreClassificationChecks(url, &kFalse, &kTrue, NULL, NULL, NULL, NULL); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); + msg = process()->sink().GetFirstMessageMatching( + SafeBrowsingMsg_StartPhishingDetection::ID); + ASSERT_FALSE(msg); + + // If the URL is on the csd whitelist, no IPC should be triggered. + url = GURL("http://host5.com/"); + ExpectPreClassificationChecks(url, &kFalse, &kFalse, &kTrue, NULL, NULL, + NULL); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); + msg = process()->sink().GetFirstMessageMatching( + SafeBrowsingMsg_StartPhishingDetection::ID); + ASSERT_FALSE(msg); + + // If item is in the cache but it isn't valid, we will classify regardless + // of whether we are over the reporting limit. + url = GURL("http://host6.com/"); + ExpectPreClassificationChecks(url, &kFalse, &kFalse, &kFalse, &kFalse, &kTrue, + NULL); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); + msg = process()->sink().GetFirstMessageMatching( + SafeBrowsingMsg_StartPhishingDetection::ID); + ASSERT_TRUE(msg); + SafeBrowsingMsg_StartPhishingDetection::Read(msg, &actual_url); + EXPECT_EQ(url, actual_url.a); + EXPECT_EQ(rvh()->routing_id(), msg->routing_id()); + process()->sink().ClearMessages(); + + // If the url isn't in the cache and we are over the reporting limit, we + // don't do classification. + url = GURL("http://host7.com/"); + ExpectPreClassificationChecks(url, &kFalse, &kFalse, &kFalse, &kFalse, + &kFalse, &kTrue); + NavigateAndCommit(url); + WaitAndCheckPreClassificationChecks(); + msg = process()->sink().GetFirstMessageMatching( + SafeBrowsingMsg_StartPhishingDetection::ID); + ASSERT_FALSE(msg); + + // If result is cached, we will try and display the blocking page directly + // with no start classification message. + CommandLine::ForCurrentProcess()->AppendSwitch( + switches::kEnableClientSidePhishingInterstitial); + url = GURL("http://host8.com/"); + ExpectPreClassificationChecks(url, &kFalse, &kFalse, &kFalse, &kTrue, NULL, + NULL); + EXPECT_CALL(*sb_service_, + DisplayBlockingPage(Eq(url), Eq(url), _, _, _, _, _, _)) + .WillOnce(DeleteArg<5>()); + NavigateAndCommit(url); + // Wait for CheckCsdWhitelist to be called on the IO thread. + FlushIOMessageLoop(); + // Wait for CheckCache() to be called on the UI thread. + MessageLoop::current()->RunAllPending(); + // Wait for DisplayBlockingPage to be called on the IO thread. FlushIOMessageLoop(); + // Now we check that all expected functions were indeed called on the two + // service objects. + EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); + EXPECT_TRUE(Mock::VerifyAndClear(sb_service_.get())); msg = process()->sink().GetFirstMessageMatching( - ViewMsg_StartPhishingDetection::ID); + SafeBrowsingMsg_StartPhishingDetection::ID); ASSERT_FALSE(msg); } diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc index 1bc4f6d..e88b143 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service.cc @@ -8,20 +8,20 @@ #include "base/file_path.h" #include "base/file_util_proxy.h" #include "base/logging.h" +#include "base/memory/scoped_ptr.h" #include "base/message_loop.h" #include "base/metrics/histogram.h" #include "base/platform_file.h" -#include "base/scoped_ptr.h" #include "base/stl_util-inl.h" #include "base/task.h" #include "base/time.h" -#include "chrome/browser/safe_browsing/csd.pb.h" #include "chrome/common/net/http_return.h" #include "chrome/common/net/url_fetcher.h" -#include "chrome/common/net/url_request_context_getter.h" +#include "chrome/common/safe_browsing/csd.pb.h" #include "content/browser/browser_thread.h" #include "googleurl/src/gurl.h" #include "net/base/load_flags.h" +#include "net/url_request/url_request_context_getter.h" #include "net/url_request/url_request_status.h" namespace safe_browsing { @@ -37,8 +37,14 @@ const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval = const char ClientSideDetectionService::kClientReportPhishingUrl[] = "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; +// Note: when updatng the model version, don't forget to change the filename +// in chrome/common/chrome_constants.cc as well, or else existing users won't +// download the new model. +// +// TODO(bryner): add version metadata so that clients can download new models +// without needing a new model filename. const char ClientSideDetectionService::kClientModelUrl[] = - "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; + "https://ssl.gstatic.com/safebrowsing/csd/client_model_v1.pb"; struct ClientSideDetectionService::ClientReportInfo { scoped_ptr<ClientReportPhishingRequestCallback> callback; @@ -51,7 +57,7 @@ ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time) ClientSideDetectionService::ClientSideDetectionService( const FilePath& model_path, - URLRequestContextGetter* request_context_getter) + net::URLRequestContextGetter* request_context_getter) : model_path_(model_path), model_status_(UNKNOWN_STATUS), model_file_(base::kInvalidPlatformFileValue), @@ -71,7 +77,7 @@ ClientSideDetectionService::~ClientSideDetectionService() { /* static */ ClientSideDetectionService* ClientSideDetectionService::Create( const FilePath& model_path, - URLRequestContextGetter* request_context_getter) { + net::URLRequestContextGetter* request_context_getter) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); scoped_ptr<ClientSideDetectionService> service( new ClientSideDetectionService(model_path, request_context_getter)); @@ -93,6 +99,14 @@ ClientSideDetectionService* ClientSideDetectionService::Create( delete cb; return NULL; } + + // Delete the previous-version model file. + // TODO(bryner): Remove this for M14. + base::FileUtilProxy::Delete( + BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE), + model_path.DirName().AppendASCII("Safe Browsing Phishing Model"), + false /* not recursive */, + NULL /* not interested in result */); return service.release(); } @@ -105,15 +119,14 @@ void ClientSideDetectionService::GetModelFile(OpenModelDoneCallback* callback) { } void ClientSideDetectionService::SendClientReportPhishingRequest( - const GURL& phishing_url, - double score, + ClientPhishingRequest* verdict, ClientReportPhishingRequestCallback* callback) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); MessageLoop::current()->PostTask( FROM_HERE, method_factory_.NewRunnableMethod( &ClientSideDetectionService::StartClientReportPhishingRequest, - phishing_url, score, callback)); + verdict, callback)); } bool ClientSideDetectionService::IsPrivateIPAddress( @@ -254,45 +267,17 @@ void ClientSideDetectionService::StartGetModelFile( } void ClientSideDetectionService::StartClientReportPhishingRequest( - const GURL& phishing_url, - double score, + ClientPhishingRequest* verdict, ClientReportPhishingRequestCallback* callback) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + scoped_ptr<ClientPhishingRequest> request(verdict); scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); - bool is_phishing; - if (GetCachedResult(phishing_url, &is_phishing)) { - VLOG(1) << "Satisfying request for " << phishing_url << " from cache"; - UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); - cb->Run(phishing_url, is_phishing); - return; - } - - // We limit the number of distinct pings to kMaxReports, but we don't count - // urls already in the cache against this number. We don't want to start - // classifying too many pages as phishing, but for those that we already - // think are phishing we want to give ourselves a chance to fix false - // positives. - if (cache_.find(phishing_url) != cache_.end()) { - VLOG(1) << "Refreshing cache for " << phishing_url; - UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1); - } else if (GetNumReports() > kMaxReportsPerInterval) { - VLOG(1) << "Too many report phishing requests sent in the last " - << kReportsInterval.InHours() << " hours, not checking " - << phishing_url; - UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); - cb->Run(phishing_url, false); - return; - } - - ClientPhishingRequest request; - request.set_url(phishing_url.spec()); - request.set_client_score(static_cast<float>(score)); std::string request_data; - if (!request.SerializeToString(&request_data)) { + if (!request->SerializeToString(&request_data)) { UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1); VLOG(1) << "Unable to serialize the CSD request. Proto file changed?"; - cb->Run(phishing_url, false); + cb->Run(GURL(request->url()), false); return; } @@ -304,7 +289,7 @@ void ClientSideDetectionService::StartClientReportPhishingRequest( // Remember which callback and URL correspond to the current fetcher object. ClientReportInfo* info = new ClientReportInfo; info->callback.swap(cb); // takes ownership of the callback. - info->phishing_url = phishing_url; + info->phishing_url = GURL(request->url()); client_phishing_reports_[fetcher] = info; fetcher->set_load_flags(net::LOAD_DISABLE_CACHE); @@ -372,8 +357,14 @@ void ClientSideDetectionService::HandlePhishingVerdict( delete source; } -bool ClientSideDetectionService::GetCachedResult(const GURL& url, - bool* is_phishing) { +bool ClientSideDetectionService::IsInCache(const GURL& url) { + UpdateCache(); + + return cache_.find(url) != cache_.end(); +} + +bool ClientSideDetectionService::GetValidCachedResult(const GURL& url, + bool* is_phishing) { UpdateCache(); PhishingCache::iterator it = cache_.find(url); @@ -415,6 +406,10 @@ void ClientSideDetectionService::UpdateCache() { } } +bool ClientSideDetectionService::OverReportLimit() { + return GetNumReports() > kMaxReportsPerInterval; +} + int ClientSideDetectionService::GetNumReports() { base::Time cutoff = base::Time::Now() - kReportsInterval; diff --git a/chrome/browser/safe_browsing/client_side_detection_service.h b/chrome/browser/safe_browsing/client_side_detection_service.h index e7fbd22..ce8dfab 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service.h +++ b/chrome/browser/safe_browsing/client_side_detection_service.h @@ -26,25 +26,24 @@ #include "base/callback.h" #include "base/file_path.h" #include "base/gtest_prod_util.h" -#include "base/linked_ptr.h" +#include "base/memory/linked_ptr.h" +#include "base/memory/ref_counted.h" +#include "base/memory/scoped_callback_factory.h" +#include "base/memory/scoped_ptr.h" #include "base/platform_file.h" -#include "base/ref_counted.h" -#include "base/scoped_callback_factory.h" -#include "base/scoped_ptr.h" #include "base/task.h" #include "base/time.h" -#include "chrome/browser/safe_browsing/csd.pb.h" #include "chrome/common/net/url_fetcher.h" #include "googleurl/src/gurl.h" #include "net/base/net_util.h" -class URLRequestContextGetter; - namespace net { +class URLRequestContextGetter; class URLRequestStatus; } // namespace net namespace safe_browsing { +class ClientPhishingRequest; class ClientSideDetectionService : public URLFetcher::Delegate { public: @@ -60,7 +59,7 @@ class ClientSideDetectionService : public URLFetcher::Delegate { // The caller takes ownership of the object. This function may return NULL. static ClientSideDetectionService* Create( const FilePath& model_path, - URLRequestContextGetter* request_context_getter); + net::URLRequestContextGetter* request_context_getter); // From the URLFetcher::Delegate interface. virtual void OnURLFetchComplete(const URLFetcher* source, @@ -78,32 +77,43 @@ class ClientSideDetectionService : public URLFetcher::Delegate { // same thread as GetModelFile() was called. void GetModelFile(OpenModelDoneCallback* callback); - // Sends a request to the SafeBrowsing servers with the potentially phishing - // URL and the client-side phishing score. The |phishing_url| scheme should - // be HTTP. This method takes ownership of the |callback| and calls it once - // the result has come back from the server or if an error occurs during the - // fetch. If an error occurs the phishing verdict will always be false. The - // callback is always called after SendClientReportPhishingRequest() returns - // and on the same thread as SendClientReportPhishingRequest() was called. + // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest. + // The URL scheme of the |url()| in the request should be HTTP. This method + // takes ownership of the |verdict| as well as the |callback| and calls the + // the callback once the result has come back from the server or if an error + // occurs during the fetch. If an error occurs the phishing verdict will + // always be false. The callback is always called after + // SendClientReportPhishingRequest() returns and on the same thread as + // SendClientReportPhishingRequest() was called. virtual void SendClientReportPhishingRequest( - const GURL& phishing_url, - double score, + ClientPhishingRequest* verdict, ClientReportPhishingRequestCallback* callback); // Returns true if the given IP address string falls within a private // (unroutable) network block. Pages which are hosted on these IP addresses // are exempt from client-side phishing detection. This is called by the // ClientSideDetectionHost prior to sending the renderer a - // ViewMsg_StartPhishingDetection IPC. + // SafeBrowsingMsg_StartPhishingDetection IPC. // // ip_address should be a dotted IPv4 address, or an unbracketed IPv6 // address. virtual bool IsPrivateIPAddress(const std::string& ip_address) const; + // Returns true and sets is_phishing if url is in the cache and valid. + virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing); + + // Returns true if the url is in the cache. + virtual bool IsInCache(const GURL& url); + + // Returns true if we have sent more than kMaxReportsPerInterval in the last + // kReportsInterval. + virtual bool OverReportLimit(); + protected: // Use Create() method to create an instance of this object. - ClientSideDetectionService(const FilePath& model_path, - URLRequestContextGetter* request_context_getter); + ClientSideDetectionService( + const FilePath& model_path, + net::URLRequestContextGetter* request_context_getter); private: friend class ClientSideDetectionServiceTest; @@ -168,11 +178,10 @@ class ClientSideDetectionService : public URLFetcher::Delegate { // Helper function which closes the |model_file_| if necessary. void CloseModelFile(); - // Starts preparing the request to be sent to the client-side detection - // frontends. + // Starts sending the request to the client-side detection frontends. + // This method takes ownership of both pointers. void StartClientReportPhishingRequest( - const GURL& phishing_url, - double score, + ClientPhishingRequest* verdict, ClientReportPhishingRequestCallback* callback); // Starts getting the model file. @@ -196,9 +205,6 @@ class ClientSideDetectionService : public URLFetcher::Delegate { const ResponseCookies& cookies, const std::string& data); - // Returns true and sets is_phishing if url is in the cache and valid. - bool GetCachedResult(const GURL& url, bool* is_phishing); - // Invalidate cache results which are no longer useful. void UpdateCache(); @@ -245,7 +251,7 @@ class ClientSideDetectionService : public URLFetcher::Delegate { base::ScopedCallbackFactory<ClientSideDetectionService> callback_factory_; // The context we use to issue network requests. - scoped_refptr<URLRequestContextGetter> request_context_getter_; + scoped_refptr<net::URLRequestContextGetter> request_context_getter_; // The network blocks that we consider private IP address ranges. std::vector<AddressRange> private_networks_; diff --git a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc index 1ce8b03..203dd93 100644 --- a/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc +++ b/chrome/browser/safe_browsing/client_side_detection_service_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -11,16 +11,16 @@ #include "base/file_util.h" #include "base/file_util_proxy.h" #include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "base/memory/scoped_temp_dir.h" #include "base/message_loop.h" #include "base/platform_file.h" -#include "base/scoped_ptr.h" -#include "base/scoped_temp_dir.h" #include "base/task.h" #include "base/time.h" #include "chrome/browser/safe_browsing/client_side_detection_service.h" -#include "chrome/browser/safe_browsing/csd.pb.h" #include "chrome/common/net/test_url_fetcher_factory.h" #include "chrome/common/net/url_fetcher.h" +#include "chrome/common/safe_browsing/csd.pb.h" #include "content/browser/browser_thread.h" #include "googleurl/src/gurl.h" #include "net/url_request/url_request_status.h" @@ -64,10 +64,13 @@ class ClientSideDetectionServiceTest : public testing::Test { } bool SendClientReportPhishingRequest(const GURL& phishing_url, - double score) { + float score) { + ClientPhishingRequest* request = new ClientPhishingRequest(); + request->set_url(phishing_url.spec()); + request->set_client_score(score); + request->set_is_phishing(true); // client thinks the URL is phishing. csd_service_->SendClientReportPhishingRequest( - phishing_url, - score, + request, NewCallback(this, &ClientSideDetectionServiceTest::SendRequestDone)); phishing_url_ = phishing_url; msg_loop_.Run(); // Waits until callback is called. @@ -136,13 +139,13 @@ class ClientSideDetectionServiceTest : public testing::Test { // While 3 elements remain, only the first and the fourth are actually // valid. bool is_phishing; - EXPECT_TRUE(csd_service_->GetCachedResult(GURL("http://first.url.com"), - &is_phishing)); + EXPECT_TRUE(csd_service_->GetValidCachedResult( + GURL("http://first.url.com"), &is_phishing)); EXPECT_FALSE(is_phishing); - EXPECT_FALSE(csd_service_->GetCachedResult(GURL("http://third.url.com"), - &is_phishing)); - EXPECT_TRUE(csd_service_->GetCachedResult(GURL("http://fourth.url.com"), - &is_phishing)); + EXPECT_FALSE(csd_service_->GetValidCachedResult( + GURL("http://third.url.com"), &is_phishing)); + EXPECT_TRUE(csd_service_->GetValidCachedResult( + GURL("http://fourth.url.com"), &is_phishing)); EXPECT_TRUE(is_phishing); } @@ -226,7 +229,7 @@ TEST_F(ClientSideDetectionServiceTest, SendClientReportPhishingRequest) { tmp_dir.path().AppendASCII("model"), NULL)); GURL url("http://a.com/"); - double score = 0.4; // Some random client score. + float score = 0.4f; // Some random client score. base::Time before = base::Time::Now(); @@ -241,62 +244,31 @@ TEST_F(ClientSideDetectionServiceTest, SendClientReportPhishingRequest) { true /* success */); EXPECT_TRUE(SendClientReportPhishingRequest(url, score)); - // Caching causes this to still count as phishy. - response.set_phishy(false); - SetClientReportPhishingResponse(response.SerializeAsString(), - true /* success */); - EXPECT_TRUE(SendClientReportPhishingRequest(url, score)); - - // This request will fail and should not be cached. + // This request will fail GURL second_url("http://b.com/"); response.set_phishy(false); SetClientReportPhishingResponse(response.SerializeAsString(), false /* success*/); EXPECT_FALSE(SendClientReportPhishingRequest(second_url, score)); - // Verify that the previous request was not cached. - response.set_phishy(true); - SetClientReportPhishingResponse(response.SerializeAsString(), - true /* success */); - EXPECT_TRUE(SendClientReportPhishingRequest(second_url, score)); - - // This request is blocked because it's not in the cache and we have more - // than 3 requests. - GURL third_url("http://c.com"); - response.set_phishy(true); - SetClientReportPhishingResponse(response.SerializeAsString(), - true /* success */); - EXPECT_FALSE(SendClientReportPhishingRequest(third_url, score)); - - // Verify that caching still works even when new requests are blocked. - response.set_phishy(true); - SetClientReportPhishingResponse(response.SerializeAsString(), - true /* success */); - EXPECT_TRUE(SendClientReportPhishingRequest(url, score)); - - // Verify that we allow cache refreshing even when requests are blocked. - base::Time cache_time = base::Time::Now() - base::TimeDelta::FromHours(1); - SetCache(second_url, true, cache_time); - - // Even though this element is in the cache, it's not currently valid so - // we make request and return that value instead. - response.set_phishy(false); - SetClientReportPhishingResponse(response.SerializeAsString(), - true /* success */); - EXPECT_FALSE(SendClientReportPhishingRequest(second_url, score)); - base::Time after = base::Time::Now(); - // Check that we have recorded 5 requests, all within the correct time range. - // The blocked request and the cached requests should not be present. + // Check that we have recorded all 3 requests within the correct time range. std::queue<base::Time>& report_times = GetPhishingReportTimes(); - EXPECT_EQ(5U, report_times.size()); + EXPECT_EQ(3U, report_times.size()); while (!report_times.empty()) { base::Time time = report_times.back(); report_times.pop(); EXPECT_LE(before, time); EXPECT_GE(after, time); } + + // Only the first url should be in the cache. + bool is_phishing; + EXPECT_TRUE(csd_service_->IsInCache(url)); + EXPECT_TRUE(csd_service_->GetValidCachedResult(url, &is_phishing)); + EXPECT_TRUE(is_phishing); + EXPECT_FALSE(csd_service_->IsInCache(second_url)); } TEST_F(ClientSideDetectionServiceTest, GetNumReportTest) { diff --git a/chrome/browser/safe_browsing/csd.proto b/chrome/browser/safe_browsing/csd.proto deleted file mode 100644 index fc01f96..0000000 --- a/chrome/browser/safe_browsing/csd.proto +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// Client side phishing and malware detection request and response -// protocol buffers. Those protocol messages should be kept in sync -// with the server implementation. -// -// If you want to change this protocol definition or you have questions -// regarding its format please contact chrome-anti-phishing@googlegroups.com. - -syntax = "proto2"; - -option optimize_for = LITE_RUNTIME; - -package safe_browsing; - -message ClientPhishingRequest { - // URL that the client visited. The CGI parameters are stripped by the - // client. - required string url = 1; - - // Score that was computed on the client. Value is between 0.0 and 1.0. - // The larger the value the more likely the url is phishing. - required float client_score = 2; -} - -message ClientPhishingResponse { - required bool phishy = 1; -} diff --git a/chrome/browser/safe_browsing/filter_false_positive_perftest.cc b/chrome/browser/safe_browsing/filter_false_positive_perftest.cc index 4afa953..e69a59d 100644 --- a/chrome/browser/safe_browsing/filter_false_positive_perftest.cc +++ b/chrome/browser/safe_browsing/filter_false_positive_perftest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -56,22 +56,21 @@ #include <fstream> #include <vector> +#include "app/sql/statement.h" #include "base/command_line.h" #include "base/file_path.h" #include "base/file_util.h" #include "base/logging.h" +#include "base/memory/scoped_ptr.h" #include "base/path_service.h" #include "base/rand_util.h" -#include "base/scoped_ptr.h" -#include "base/sha2.h" #include "base/string_number_conversions.h" #include "base/string_util.h" #include "base/time.h" +#include "crypto/sha2.h" #include "chrome/browser/safe_browsing/bloom_filter.h" #include "chrome/browser/safe_browsing/safe_browsing_util.h" #include "chrome/common/chrome_paths.h" -#include "chrome/common/sqlite_compiled_statement.h" -#include "chrome/common/sqlite_utils.h" #include "googleurl/src/gurl.h" #include "testing/gtest/include/gtest/gtest.h" @@ -80,20 +79,6 @@ using base::TimeDelta; namespace { -// Ensures the SafeBrowsing database is closed properly. -class ScopedPerfDatabase { - public: - explicit ScopedPerfDatabase(sqlite3* db) : db_(db) {} - ~ScopedPerfDatabase() { - sqlite3_close(db_); - } - - private: - sqlite3* db_; - - DISALLOW_COPY_AND_ASSIGN(ScopedPerfDatabase); -}; - // Command line flags. const char kFilterVerbose[] = "filter-verbose"; const char kFilterStart[] = "filter-start"; @@ -102,7 +87,7 @@ const char kFilterCsv[] = "filter-csv"; const char kFilterNumChecks[] = "filter-num-checks"; // Number of hash checks to make during performance testing. -static const int kNumHashChecks = 10000000; +const int kNumHashChecks = 10000000; // Returns the path to the data used in this test, relative to the top of the // source directory. @@ -140,35 +125,30 @@ void BuildBloomFilter(int size_multiplier, // to look up a given prefix than performing SQL queries. bool ReadDatabase(const FilePath& path, std::vector<SBPrefix>* prefixes) { FilePath database_file = path.Append(FILE_PATH_LITERAL("database")); - sqlite3* db = NULL; - if (sqlite_utils::OpenSqliteDb(database_file, &db) != SQLITE_OK) { - sqlite3_close(db); + sql::Connection db; + if (!db.Open(database_file)) return false; - } - - ScopedPerfDatabase database(db); - scoped_ptr<SqliteStatementCache> sql_cache(new SqliteStatementCache(db)); // Get the number of items in the add_prefix table. - std::string sql = "SELECT COUNT(*) FROM add_prefix"; - SQLITE_UNIQUE_STATEMENT(count_statement, *sql_cache, sql.c_str()); - if (!count_statement.is_valid()) + const char* query = "SELECT COUNT(*) FROM add_prefix"; + sql::Statement count_statement(db.GetCachedStatement(SQL_FROM_HERE, query)); + if (!count_statement) return false; - if (count_statement->step() != SQLITE_ROW) + if (!count_statement.Step()) return false; - const int count = count_statement->column_int(0); + const int count = count_statement.ColumnInt(0); - // Load them into a prefix vector and sort + // Load them into a prefix vector and sort. prefixes->reserve(count); - sql = "SELECT prefix FROM add_prefix"; - SQLITE_UNIQUE_STATEMENT(prefix_statement, *sql_cache, sql.c_str()); - if (!prefix_statement.is_valid()) + query = "SELECT prefix FROM add_prefix"; + sql::Statement prefix_statement(db.GetCachedStatement(SQL_FROM_HERE, query)); + if (!prefix_statement) return false; - while (prefix_statement->step() == SQLITE_ROW) - prefixes->push_back(prefix_statement->column_int(0)); + while (prefix_statement.Step()) + prefixes->push_back(prefix_statement.ColumnInt(0)); DCHECK(static_cast<int>(prefixes->size()) == count); sort(prefixes->begin(), prefixes->end()); @@ -196,7 +176,7 @@ int GeneratePrefixHits(const std::string url, for (size_t i = 0; i < hosts.size(); ++i) { for (size_t j = 0; j < paths.size(); ++j) { SBPrefix prefix; - base::SHA256HashString(hosts[i] + paths[j], &prefix, sizeof(prefix)); + crypto::SHA256HashString(hosts[i] + paths[j], &prefix, sizeof(prefix)); if (bloom_filter->Exists(prefix)) prefixes->push_back(prefix); } diff --git a/chrome/browser/safe_browsing/malware_details.cc b/chrome/browser/safe_browsing/malware_details.cc index da6c848..cb2c318 100644 --- a/chrome/browser/safe_browsing/malware_details.cc +++ b/chrome/browser/safe_browsing/malware_details.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -6,15 +6,21 @@ #include "chrome/browser/safe_browsing/malware_details.h" +#include "base/callback.h" #include "base/lazy_instance.h" -#include "chrome/browser/safe_browsing/safe_browsing_service.h" +#include "chrome/browser/net/chrome_url_request_context.h" +#include "chrome/browser/profiles/profile.h" +#include "chrome/browser/safe_browsing/malware_details_cache.h" #include "chrome/browser/safe_browsing/report.pb.h" -#include "chrome/common/render_messages.h" -#include "chrome/common/render_messages_params.h" +#include "chrome/browser/safe_browsing/safe_browsing_service.h" +#include "chrome/common/safe_browsing/safebrowsing_messages.h" #include "content/browser/browser_thread.h" #include "content/browser/renderer_host/render_view_host.h" #include "content/browser/tab_contents/navigation_entry.h" #include "content/browser/tab_contents/tab_contents.h" +#include "net/base/io_buffer.h" +#include "net/disk_cache/disk_cache.h" +#include "net/url_request/url_request_context_getter.h" using safe_browsing::ClientMalwareReportRequest; @@ -30,9 +36,10 @@ class MalwareDetailsFactoryImpl : public MalwareDetailsFactory { public: MalwareDetails* CreateMalwareDetails( + SafeBrowsingService* sb_service, TabContents* tab_contents, const SafeBrowsingService::UnsafeResource& unsafe_resource) { - return new MalwareDetails(tab_contents, unsafe_resource); + return new MalwareDetails(sb_service, tab_contents, unsafe_resource); } private: @@ -50,30 +57,36 @@ static base::LazyInstance<MalwareDetailsFactoryImpl> // Create a MalwareDetails for the given tab. /* static */ MalwareDetails* MalwareDetails::NewMalwareDetails( + SafeBrowsingService* sb_service, TabContents* tab_contents, const SafeBrowsingService::UnsafeResource& resource) { // Set up the factory if this has not been done already (tests do that // before this method is called). if (!factory_) factory_ = g_malware_details_factory_impl.Pointer(); - return factory_->CreateMalwareDetails(tab_contents, resource); + return factory_->CreateMalwareDetails(sb_service, tab_contents, resource); } // Create a MalwareDetails for the given tab. Runs in the UI thread. MalwareDetails::MalwareDetails( + SafeBrowsingService* sb_service, TabContents* tab_contents, - const SafeBrowsingService::UnsafeResource resource) + const SafeBrowsingService::UnsafeResource& resource) : TabContentsObserver(tab_contents), - resource_(resource) { + request_context_getter_(tab_contents->profile()->GetRequestContext()), + sb_service_(sb_service), + resource_(resource), + cache_collector_(new MalwareDetailsCacheCollector) { StartCollection(); } -MalwareDetails::~MalwareDetails() {} +MalwareDetails::~MalwareDetails() { +} bool MalwareDetails::OnMessageReceived(const IPC::Message& message) { bool handled = true; IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message) - IPC_MESSAGE_HANDLER(ViewHostMsg_MalwareDOMDetails, + IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails, OnReceivedMalwareDOMDetails) IPC_MESSAGE_UNHANDLED(handled = false) IPC_END_MESSAGE_MAP() @@ -89,7 +102,7 @@ bool MalwareDetails::IsPublicUrl(const GURL& url) const { // resources_ and updates |resource| to point to it. ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource( const GURL& url) { - ResourceMap::iterator it = resources_.find(url.spec()); + safe_browsing::ResourceMap::iterator it = resources_.find(url.spec()); if (it != resources_.end()) { return it->second.get(); } @@ -196,7 +209,7 @@ void MalwareDetails::StartCollection() { // When the renderer is done, this is called. void MalwareDetails::OnReceivedMalwareDOMDetails( - const ViewHostMsg_MalwareDOMDetails_Params& params) { + const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) { // Schedule this in IO thread, so it doesn't conflict with future users // of our data structures (eg GetSerializedReport). BrowserThread::PostTask( @@ -206,13 +219,19 @@ void MalwareDetails::OnReceivedMalwareDOMDetails( } void MalwareDetails::AddDOMDetails( - const ViewHostMsg_MalwareDOMDetails_Params& params) { + const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + DVLOG(1) << "Nodes from the DOM: " << params.size(); + + // If we have already started collecting data from the HTTP cache, don't + // modify our state. + if (cache_collector_->HasStarted()) + return; + // Add the urls from the DOM to |resources_|. The renderer could be // sending bogus messages, so limit the number of nodes we accept. - DVLOG(1) << "Nodes from the DOM: " << params.nodes.size(); - for (uint32 i = 0; i < params.nodes.size() && i < kMaxDomNodes; ++i) { - ViewHostMsg_MalwareDOMDetails_Node node = params.nodes[i]; + for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) { + SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i]; DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; AddUrl(node.url, node.parent, node.tag_name, &(node.children)); } @@ -223,21 +242,34 @@ void MalwareDetails::AddDOMDetails( // to take an action, we expect this to be called after // OnReceivedMalwareDOMDetails in most cases. If not, we don't include // the DOM data in our report. -const std::string* MalwareDetails::GetSerializedReport() { +void MalwareDetails::FinishCollection() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); - // The |report_| protocol buffer is now generated: We add all the - // urls in our |resources_| maps. - for (ResourceMap::const_iterator it = resources_.begin(); + + cache_collector_->StartCacheCollection( + request_context_getter_, + &resources_, + &cache_result_, + NewRunnableMethod(this, &MalwareDetails::OnCacheCollectionReady)); +} + +void MalwareDetails::OnCacheCollectionReady() { + DVLOG(1) << "OnCacheCollectionReady."; + // Add all the urls in our |resources_| maps to the |report_| protocol buffer. + for (safe_browsing::ResourceMap::const_iterator it = resources_.begin(); it != resources_.end(); it++) { ClientMalwareReportRequest::Resource* pb_resource = report_->add_resources(); pb_resource->CopyFrom(*(it->second)); } - scoped_ptr<std::string> request_data(new std::string()); - if (!report_->SerializeToString(request_data.get())) { + report_->set_complete(cache_result_); + + // Send the report, using the SafeBrowsingService. + std::string serialized; + if (!report_->SerializeToString(&serialized)) { DLOG(ERROR) << "Unable to serialize the malware report."; + return; } - return request_data.release(); + sb_service_->SendSerializedMalwareDetails(serialized); } diff --git a/chrome/browser/safe_browsing/malware_details.h b/chrome/browser/safe_browsing/malware_details.h index efb3eef..494837e 100644 --- a/chrome/browser/safe_browsing/malware_details.h +++ b/chrome/browser/safe_browsing/malware_details.h @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -10,29 +10,39 @@ // users opt-in to do so from the malware warning page. // An instance of this class is generated when a malware warning page -// is shown (SafeBrowsingBlockingPage). It is passed on to the -// SafeBrowsing service when the warning goes away. +// is shown (SafeBrowsingBlockingPage). #include <string> #include <vector> #include "base/hash_tables.h" -#include "base/linked_ptr.h" -#include "base/scoped_ptr.h" +#include "base/memory/linked_ptr.h" +#include "base/memory/ref_counted.h" +#include "base/memory/scoped_ptr.h" #include "chrome/browser/safe_browsing/report.pb.h" #include "chrome/browser/safe_browsing/safe_browsing_service.h" #include "content/browser/tab_contents/tab_contents_observer.h" +#include "net/base/completion_callback.h" class TabContents; -struct ViewHostMsg_MalwareDOMDetails_Params; +struct SafeBrowsingHostMsg_MalwareDOMDetails_Node; +class MalwareDetailsCacheCollector; class MalwareDetailsFactory; +namespace safe_browsing { +// Maps a URL to its Resource. +typedef base::hash_map< + std::string, + linked_ptr<safe_browsing::ClientMalwareReportRequest::Resource> > ResourceMap; +} + class MalwareDetails : public base::RefCountedThreadSafe<MalwareDetails>, public TabContentsObserver { public: // Constructs a new MalwareDetails instance, using the factory. static MalwareDetails* NewMalwareDetails( + SafeBrowsingService* sb_service, TabContents* tab_contents, const SafeBrowsingService::UnsafeResource& resource); @@ -42,9 +52,13 @@ class MalwareDetails : public base::RefCountedThreadSafe<MalwareDetails>, factory_ = factory; } - // The SafeBrowsingService calls this from the IO thread, to get the - // serialized report as a string and send it over. - const std::string* GetSerializedReport(); + // The SafeBrowsingBlockingPage calls this from the IO thread when + // the user is leaving the blocking page and has opted-in to sending + // the report. We start the cache collection, and when we are done, + // we send the report. + void FinishCollection(); + + void OnCacheCollectionReady(); // TabContentsObserver implementation. virtual bool OnMessageReceived(const IPC::Message& message); @@ -52,24 +66,25 @@ class MalwareDetails : public base::RefCountedThreadSafe<MalwareDetails>, protected: friend class MalwareDetailsFactoryImpl; - MalwareDetails(TabContents* tab_contents, - const SafeBrowsingService::UnsafeResource resource); + MalwareDetails(SafeBrowsingService* sb_service, + TabContents* tab_contents, + const SafeBrowsingService::UnsafeResource& resource); + + virtual ~MalwareDetails(); // Called on the IO thread with the DOM details. virtual void AddDOMDetails( - const ViewHostMsg_MalwareDOMDetails_Params& params); + const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params); - virtual ~MalwareDetails(); + // The report protocol buffer. + scoped_ptr<safe_browsing::ClientMalwareReportRequest> report_; + + // Used to get a pointer to the HTTP cache. + scoped_refptr<net::URLRequestContextGetter> request_context_getter_; private: friend class base::RefCountedThreadSafe<MalwareDetails>; - // Maps a URL to its Resource. - typedef base::hash_map< - std::string, - linked_ptr<safe_browsing::ClientMalwareReportRequest::Resource> > - ResourceMap; - // Starts the collection of the report. void StartCollection(); @@ -91,23 +106,30 @@ class MalwareDetails : public base::RefCountedThreadSafe<MalwareDetails>, // Message handler. void OnReceivedMalwareDOMDetails( - const ViewHostMsg_MalwareDOMDetails_Params& params); + const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params); + + scoped_refptr<SafeBrowsingService> sb_service_; const SafeBrowsingService::UnsafeResource resource_; // For every Url we collect we create a Resource message. We keep // them in a map so we can avoid duplicates. - ResourceMap resources_; + safe_browsing::ResourceMap resources_; - // The report protocol buffer. - scoped_ptr<safe_browsing::ClientMalwareReportRequest> report_; + // Result from the cache extractor. + bool cache_result_; // The factory used to instanciate SafeBrowsingBlockingPage objects. // Usefull for tests, so they can provide their own implementation of // SafeBrowsingBlockingPage. static MalwareDetailsFactory* factory_; + // Used to collect details from the HTTP Cache. + scoped_refptr<MalwareDetailsCacheCollector> cache_collector_; + FRIEND_TEST_ALL_PREFIXES(MalwareDetailsTest, MalwareDOMDetails); + FRIEND_TEST_ALL_PREFIXES(MalwareDetailsTest, HTTPCache); + FRIEND_TEST_ALL_PREFIXES(MalwareDetailsTest, HTTPCacheNoEntries); DISALLOW_COPY_AND_ASSIGN(MalwareDetails); }; @@ -118,6 +140,7 @@ class MalwareDetailsFactory { virtual ~MalwareDetailsFactory() { } virtual MalwareDetails* CreateMalwareDetails( + SafeBrowsingService* sb_service, TabContents* tab_contents, const SafeBrowsingService::UnsafeResource& unsafe_resource) = 0; }; diff --git a/chrome/browser/safe_browsing/malware_details_cache.cc b/chrome/browser/safe_browsing/malware_details_cache.cc new file mode 100644 index 0000000..e41a792 --- /dev/null +++ b/chrome/browser/safe_browsing/malware_details_cache.cc @@ -0,0 +1,204 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Implementation of the MalwareDetails class. + +#include "chrome/browser/safe_browsing/malware_details.h" + +#include "base/callback.h" +#include "base/lazy_instance.h" +#include "base/md5.h" +#include "base/string_util.h" +#include "chrome/browser/net/chrome_url_request_context.h" +#include "chrome/browser/safe_browsing/malware_details_cache.h" +#include "chrome/browser/safe_browsing/safe_browsing_service.h" +#include "chrome/browser/safe_browsing/report.pb.h" +#include "content/browser/browser_thread.h" +#include "net/base/load_flags.h" +#include "net/http/http_response_headers.h" +#include "net/url_request/url_request_context_getter.h" +#include "net/url_request/url_request_status.h" + +using safe_browsing::ClientMalwareReportRequest; + +// Only send small files for now, a better strategy would use the size +// of the whole report and the user's bandwidth. +static const uint32 kMaxBodySizeBytes = 1024; + +MalwareDetailsCacheCollector::MalwareDetailsCacheCollector() + : has_started_(false), + current_fetch_(NULL) { +} + +MalwareDetailsCacheCollector::~MalwareDetailsCacheCollector() { +} + +void MalwareDetailsCacheCollector::StartCacheCollection( + net::URLRequestContextGetter* request_context_getter, + safe_browsing::ResourceMap* resources, + bool* result, + Task* callback) { + // Start the data collection from the HTTP cache. We use a URLFetcher + // and set the right flags so we only hit the cache. + DVLOG(1) << "Getting cache data for all urls..."; + request_context_getter_ = request_context_getter; + resources_ = resources; + resources_it_ = resources_->begin(); + result_ = result; + callback_ = callback; + has_started_ = true; + + // Post a task in the message loop, so the callers don't need to + // check if we call their callback immediately. + BrowserThread::PostTask( + BrowserThread::IO, FROM_HERE, + NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry)); +} + +bool MalwareDetailsCacheCollector::HasStarted() { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + return has_started_; +} + +// Fetch a URL and advance to the next one when done. +void MalwareDetailsCacheCollector::OpenEntry() { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + DVLOG(1) << "OpenEntry"; + + if (resources_it_ == resources_->end()) { // We are done. + AllDone(true); + return; + } + + if (!request_context_getter_) { + DVLOG(1) << "Missing request context getter"; + AllDone(false); + return; + } + + current_fetch_.reset(new URLFetcher( + GURL(resources_it_->first), + URLFetcher::GET, + this)); + current_fetch_->set_request_context(request_context_getter_); + // Only from cache, and don't save cookies. + current_fetch_->set_load_flags(net::LOAD_ONLY_FROM_CACHE | + net::LOAD_DO_NOT_SAVE_COOKIES); + current_fetch_->set_automatically_retry_on_5xx(false); // No retries. + current_fetch_->Start(); // OnURLFetchComplete will be called when done. +} + +ClientMalwareReportRequest::Resource* MalwareDetailsCacheCollector::GetResource( + const GURL& url) { + safe_browsing::ResourceMap::iterator it = resources_->find(url.spec()); + if (it != resources_->end()) { + return it->second.get(); + } + return NULL; +} + +void MalwareDetailsCacheCollector::OnURLFetchComplete( + const URLFetcher* source, + const GURL& url, + const net::URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data) { + DVLOG(1) << "OnUrlFetchComplete"; + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + DCHECK(current_fetch_.get()); + if (status.status() != net::URLRequestStatus::SUCCESS && + status.os_error() == net::ERR_CACHE_MISS) { + // Cache miss, skip this resource. + DVLOG(1) << "Cache miss for url: " << url; + AdvanceEntry(); + return; + } + + if (status.status() != net::URLRequestStatus::SUCCESS) { + // Some other error occurred, e.g. the request could have been cancelled. + DVLOG(1) << "Unsuccessful fetch: " << url; + AdvanceEntry(); + return; + } + + // Set the response headers and body to the right resource, which + // might not be the same as the one we asked for. + // For redirects, resources_it_->first != url.spec(). + ClientMalwareReportRequest::Resource* resource = GetResource(url); + if (!resource) { + DVLOG(1) << "Cannot find resource for url:" << url; + AdvanceEntry(); + return; + } + + ReadResponse(resource, source); + ReadData(resource, data); + AdvanceEntry(); +} + +void MalwareDetailsCacheCollector::ReadResponse( + ClientMalwareReportRequest::Resource* pb_resource, + const URLFetcher* source) { + DVLOG(1) << "ReadResponse"; + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + net::HttpResponseHeaders* headers = source->response_headers(); + if (!headers) { + DVLOG(1) << "Missing response headers."; + return; + } + + ClientMalwareReportRequest::HTTPResponse* pb_response = + pb_resource->mutable_response(); + pb_response->mutable_firstline()->set_code(headers->response_code()); + void* iter = NULL; + std::string name, value; + while (headers->EnumerateHeaderLines(&iter, &name, &value)) { + ClientMalwareReportRequest::HTTPHeader* pb_header = + pb_response->add_headers(); + pb_header->set_name(name); + // Strip any Set-Cookie headers. + if (LowerCaseEqualsASCII(name, "set-cookie")) { + pb_header->set_value(""); + } else { + pb_header->set_value(value); + } + } +} + +void MalwareDetailsCacheCollector::ReadData( + ClientMalwareReportRequest::Resource* pb_resource, + const std::string& data) { + DVLOG(1) << "ReadData"; + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + ClientMalwareReportRequest::HTTPResponse* pb_response = + pb_resource->mutable_response(); + if (data.size() <= kMaxBodySizeBytes) { // Only send small bodies for now. + pb_response->set_body(data); + } + pb_response->set_bodylength(data.size()); + MD5Digest digest; + MD5Sum(data.c_str(), data.size(), &digest); + pb_response->set_bodydigest(MD5DigestToBase16(digest)); +} + +void MalwareDetailsCacheCollector::AdvanceEntry() { + DVLOG(1) << "AdvanceEntry"; + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + // Advance to the next resource. + ++resources_it_; + current_fetch_.reset(NULL); + + // Create a task so we don't take over the IO thread for too long. + BrowserThread::PostTask( + BrowserThread::IO, FROM_HERE, + NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry)); +} + +void MalwareDetailsCacheCollector::AllDone(bool success) { + DVLOG(1) << "AllDone"; + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + *result_ = success; + BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, callback_); +} diff --git a/chrome/browser/safe_browsing/malware_details_cache.h b/chrome/browser/safe_browsing/malware_details_cache.h new file mode 100644 index 0000000..3f002c7 --- /dev/null +++ b/chrome/browser/safe_browsing/malware_details_cache.h @@ -0,0 +1,114 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_MALWARE_DETAILS_CACHE_H_ +#define CHROME_BROWSER_SAFE_BROWSING_MALWARE_DETAILS_CACHE_H_ +#pragma once + +// A class that gets malware details from the HTTP Cache. +// An instance of this class is generated by MalwareDetails. + +#include <string> +#include <vector> + +#include "base/hash_tables.h" +#include "base/memory/linked_ptr.h" +#include "base/memory/ref_counted.h" +#include "chrome/browser/safe_browsing/report.pb.h" +#include "chrome/common/net/url_fetcher.h" +#include "net/base/completion_callback.h" + +namespace net { +class URLRequestContext; +} + +class MalwareDetailsFactory; + +namespace safe_browsing { + +// Maps a URL to its Resource. +typedef base::hash_map< + std::string, + linked_ptr<safe_browsing::ClientMalwareReportRequest::Resource> > ResourceMap; +} + +class MalwareDetailsCacheCollector + : public base::RefCountedThreadSafe<MalwareDetailsCacheCollector>, + public URLFetcher::Delegate { + + public: + MalwareDetailsCacheCollector(); + virtual ~MalwareDetailsCacheCollector(); + + // We use |request_context_getter|, we modify |resources| and + // |result|, and we call |callback|, so they must all remain alive + // for the lifetime of this object. + void StartCacheCollection( + net::URLRequestContextGetter* request_context_getter, + safe_browsing::ResourceMap* resources, + bool* result, + Task* callback); + + // Returns whether or not StartCacheCollection has been called. + bool HasStarted(); + + protected: + // Implementation of URLFetcher::Delegate. Called after the request + // completes (either successfully or with failure). + virtual void OnURLFetchComplete(const URLFetcher* source, + const GURL& url, + const net::URLRequestStatus& status, + int response_code, + const ResponseCookies& cookies, + const std::string& data); + + private: + // Points to the url for which we are fetching the HTTP cache entry or + // redirect chain. + safe_browsing::ResourceMap::iterator resources_it_; + + // Points to the resources_ map in the MalwareDetails. + safe_browsing::ResourceMap* resources_; + + // Points to the cache_result_ in the MalwareDetails. + bool* result_; + + // Method we call when we are done. The caller must be alive for the + // whole time, we are modifying its state (see above). + Task* callback_; + + // Set to true as soon as StartCacheCollection is called. + bool has_started_; + + // Used to get a pointer to the current request context. + scoped_refptr<net::URLRequestContextGetter> request_context_getter_; + + // The current URLFetcher. + scoped_ptr<URLFetcher> current_fetch_; + + // Returns the resource from resources_ that corresponds to |url| + safe_browsing::ClientMalwareReportRequest::Resource* GetResource( + const GURL& url); + + // Creates a new URLFetcher and starts it. + void OpenEntry(); + + // Read the HTTP response from |source| and add it to |pb_resource|. + void ReadResponse( + safe_browsing::ClientMalwareReportRequest::Resource* pb_resource, + const URLFetcher* source); + + // Read the body |data| and add it to |pb_resource|. + void ReadData( + safe_browsing::ClientMalwareReportRequest::Resource* pb_resource, + const std::string& data); + + // Called when we are done. + void AllDone(bool success); + + // Advances to the next entry in resources_it_. + void AdvanceEntry(); +}; + +#endif // CHROME_BROWSER_SAFE_BROWSING_MALWARE_DETAILS_CACHE_H_ diff --git a/chrome/browser/safe_browsing/malware_details_unittest.cc b/chrome/browser/safe_browsing/malware_details_unittest.cc index 1ad936a..f13469b 100644 --- a/chrome/browser/safe_browsing/malware_details_unittest.cc +++ b/chrome/browser/safe_browsing/malware_details_unittest.cc @@ -1,38 +1,185 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include <algorithm> +#include "base/pickle.h" +#include "base/time.h" +#include "chrome/browser/profiles/profile.h" #include "chrome/browser/safe_browsing/malware_details.h" #include "chrome/browser/safe_browsing/report.pb.h" #include "chrome/common/render_messages.h" -#include "chrome/common/render_messages_params.h" +#include "chrome/common/safe_browsing/safebrowsing_messages.h" +#include "chrome/test/test_url_request_context_getter.h" +#include "chrome/test/testing_profile.h" #include "content/browser/browser_thread.h" #include "content/browser/renderer_host/test_render_view_host.h" #include "content/browser/tab_contents/navigation_entry.h" #include "content/browser/tab_contents/test_tab_contents.h" +#include "net/base/io_buffer.h" +#include "net/base/test_completion_callback.h" +#include "net/disk_cache/disk_cache.h" +#include "net/http/http_cache.h" +#include "net/http/http_response_headers.h" +#include "net/http/http_response_info.h" +#include "net/http/http_util.h" +#include "net/url_request/url_request_context.h" +#include "net/url_request/url_request_context_getter.h" static const char* kOriginalLandingURL = "http://www.originallandingpage.com/"; -static const char* kLandingURL = "http://www.landingpage.com/"; -static const char* kMalwareURL = "http://www.malware.com/"; static const char* kHttpsURL = "https://www.url.com/"; static const char* kDOMChildURL = "http://www.domparent.com/"; static const char* kDOMParentURL = "http://www.domchild.com/"; static const char* kFirstRedirectURL = "http://redirectone.com/"; static const char* kSecondRedirectURL = "http://redirecttwo.com/"; +static const char* kMalwareURL = "http://www.malware.com/"; +static const char* kMalwareHeaders = + "HTTP/1.1 200 OK\n" + "Content-Type: image/jpeg\n"; +static const char* kMalwareData = "exploit();"; + +static const char* kLandingURL = "http://www.landingpage.com/"; +static const char* kLandingHeaders = + "HTTP/1.1 200 OK\n" + "Content-Type: text/html\n" + "Content-Length: 1024\n" + "Set-Cookie: tastycookie\n"; // This header is stripped. +static const char* kLandingData = "<iframe src='http://www.malware.com'>"; + using safe_browsing::ClientMalwareReportRequest; +namespace { + +void WriteHeaders(disk_cache::Entry* entry, const std::string headers) { + net::HttpResponseInfo responseinfo; + std::string raw_headers = net::HttpUtil::AssembleRawHeaders( + headers.c_str(), headers.size()); + responseinfo.headers = new net::HttpResponseHeaders(raw_headers); + + Pickle pickle; + responseinfo.Persist(&pickle, false, false); + + scoped_refptr<net::WrappedIOBuffer> buf(new net::WrappedIOBuffer( + reinterpret_cast<const char*>(pickle.data()))); + int len = static_cast<int>(pickle.size()); + + TestCompletionCallback cb; + int rv = entry->WriteData(0, 0, buf, len, &cb, true); + ASSERT_EQ(len, cb.GetResult(rv)); +} + +void WriteData(disk_cache::Entry* entry, const std::string data) { + if (data.empty()) + return; + + int len = data.length(); + scoped_refptr<net::IOBuffer> buf(new net::IOBuffer(len)); + memcpy(buf->data(), data.data(), data.length()); + + TestCompletionCallback cb; + int rv = entry->WriteData(1, 0, buf, len, &cb, true); + ASSERT_EQ(len, cb.GetResult(rv)); +} + +void WriteToEntry(disk_cache::Backend* cache, const std::string key, + const std::string headers, const std::string data) { + TestCompletionCallback cb; + disk_cache::Entry* entry; + int rv = cache->CreateEntry(key, &entry, &cb); + rv = cb.GetResult(rv); + if (rv != net::OK) { + rv = cache->OpenEntry(key, &entry, &cb); + ASSERT_EQ(net::OK, cb.GetResult(rv)); + } + + WriteHeaders(entry, headers); + WriteData(entry, data); + + entry->Close(); +} + +void FillCache(net::URLRequestContext* context) { + TestCompletionCallback cb; + disk_cache::Backend* cache; + int rv = + context->http_transaction_factory()->GetCache()->GetBackend(&cache, &cb); + ASSERT_EQ(net::OK, cb.GetResult(rv)); + + std::string empty; + WriteToEntry(cache, kMalwareURL, kMalwareHeaders, kMalwareData); + WriteToEntry(cache, kLandingURL, kLandingHeaders, kLandingData); +} + +void QuitUIMessageLoop() { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + BrowserThread::PostTask(BrowserThread::UI, + FROM_HERE, + new MessageLoop::QuitTask()); +} + +// Lets us provide a MockURLRequestContext with an HTTP Cache we pre-populate. +// Also exposes the constructor. +class MalwareDetailsWrap : public MalwareDetails { + public: + MalwareDetailsWrap(SafeBrowsingService* sb_service, + TabContents* tab_contents, + const SafeBrowsingService::UnsafeResource& unsafe_resource, + net::URLRequestContextGetter* request_context_getter) + : MalwareDetails(sb_service, tab_contents, unsafe_resource) { + request_context_getter_ = request_context_getter; + } + + virtual ~MalwareDetailsWrap() {} +}; + +class MockSafeBrowsingService : public SafeBrowsingService { + public: + MockSafeBrowsingService() {} + virtual ~MockSafeBrowsingService() {} + + // When the MalwareDetails is done, this is called. + virtual void SendSerializedMalwareDetails(const std::string& serialized) { + DVLOG(1) << "SendSerializedMalwareDetails"; + // Notify WaitForSerializedReport. + BrowserThread::PostTask(BrowserThread::IO, + FROM_HERE, + NewRunnableFunction(&QuitUIMessageLoop)); + serialized_ = serialized; + } + + const std::string& GetSerialized() { + return serialized_; + } + + private: + std::string serialized_; + DISALLOW_COPY_AND_ASSIGN(MockSafeBrowsingService); +}; + +} // namespace. + class MalwareDetailsTest : public RenderViewHostTestHarness { public: MalwareDetailsTest() - : ui_thread_(BrowserThread::UI, MessageLoop::current()), - io_thread_(BrowserThread::IO, MessageLoop::current()) { + : ui_thread_(BrowserThread::UI, &message_loop_), + io_thread_(BrowserThread::IO), + sb_service_(new MockSafeBrowsingService()) { } virtual void SetUp() { RenderViewHostTestHarness::SetUp(); + // request_context_getter_ = new TestURLRequestContextGetter(); + + // The URLFetcher checks that the messageloop type is IO. + ASSERT_TRUE(io_thread_.StartWithOptions( + base::Thread::Options(MessageLoop::TYPE_IO, 0))); + } + + virtual void TearDown() { + io_thread_.Stop(); + RenderViewHostTestHarness::TearDown(); } static bool ResourceLessThan( @@ -41,6 +188,18 @@ class MalwareDetailsTest : public RenderViewHostTestHarness { return lhs->id() < rhs->id(); } + std::string WaitForSerializedReport(MalwareDetails* report) { + BrowserThread::PostTask( + BrowserThread::IO, + FROM_HERE, + NewRunnableMethod( + report, &MalwareDetails::FinishCollection)); + // Wait for the callback (SendSerializedMalwareDetails). + DVLOG(1) << "Waiting for SendSerializedMalwareDetails"; + MessageLoop::current()->Run(); + return sb_service_->GetSerialized(); + } + protected: void InitResource(SafeBrowsingService::UnsafeResource* resource, ResourceType::Type resource_type, @@ -49,8 +208,8 @@ class MalwareDetailsTest : public RenderViewHostTestHarness { resource->url = url; resource->resource_type = resource_type; resource->threat_type = SafeBrowsingService::URL_MALWARE; - resource->render_process_host_id = contents_->GetRenderProcessHost()->id(); - resource->render_view_id = contents_->render_view_host()->routing_id(); + resource->render_process_host_id = contents()->GetRenderProcessHost()->id(); + resource->render_view_id = contents()->render_view_host()->routing_id(); } void VerifyResults(const ClientMalwareReportRequest& report_pb, @@ -80,18 +239,48 @@ class MalwareDetailsTest : public RenderViewHostTestHarness { &MalwareDetailsTest::ResourceLessThan); for (uint32 i = 0; i < expected.size(); ++i) { - EXPECT_EQ(expected[i]->id(), resources[i]->id()); - EXPECT_EQ(expected[i]->url(), resources[i]->url()); - EXPECT_EQ(expected[i]->parent_id(), resources[i]->parent_id()); - ASSERT_EQ(expected[i]->child_ids_size(), resources[i]->child_ids_size()); - for (int j = 0; j < expected[i]->child_ids_size(); j++) { - EXPECT_EQ(expected[i]->child_ids(j), resources[i]->child_ids(j)); + VerifyResource(resources[i], expected[i]); + } + + EXPECT_EQ(expected_pb.complete(), report_pb.complete()); + } + + void VerifyResource(const ClientMalwareReportRequest::Resource* resource, + const ClientMalwareReportRequest::Resource* expected) { + EXPECT_EQ(expected->id(), resource->id()); + EXPECT_EQ(expected->url(), resource->url()); + EXPECT_EQ(expected->parent_id(), resource->parent_id()); + ASSERT_EQ(expected->child_ids_size(), resource->child_ids_size()); + for (int i = 0; i < expected->child_ids_size(); i++) { + EXPECT_EQ(expected->child_ids(i), resource->child_ids(i)); + } + + // Verify HTTP Responses + if (expected->has_response()) { + ASSERT_TRUE(resource->has_response()); + EXPECT_EQ(expected->response().firstline().code(), + resource->response().firstline().code()); + + ASSERT_EQ(expected->response().headers_size(), + resource->response().headers_size()); + for (int i = 0; i < expected->response().headers_size(); ++i) { + EXPECT_EQ(expected->response().headers(i).name(), + resource->response().headers(i).name()); + EXPECT_EQ(expected->response().headers(i).value(), + resource->response().headers(i).value()); } + + EXPECT_EQ(expected->response().body(), resource->response().body()); + EXPECT_EQ(expected->response().bodylength(), + resource->response().bodylength()); + EXPECT_EQ(expected->response().bodydigest(), + resource->response().bodydigest()); } } BrowserThread ui_thread_; BrowserThread io_thread_; + scoped_refptr<MockSafeBrowsingService> sb_service_; }; // Tests creating a simple malware report. @@ -102,12 +291,13 @@ TEST_F(MalwareDetailsTest, MalwareSubResource) { SafeBrowsingService::UnsafeResource resource; InitResource(&resource, ResourceType::SUB_RESOURCE, GURL(kMalwareURL)); - scoped_refptr<MalwareDetails> report = MalwareDetails::NewMalwareDetails( - contents(), resource); + scoped_refptr<MalwareDetailsWrap> report = new MalwareDetailsWrap( + sb_service_, contents(), resource, NULL); + + std::string serialized = WaitForSerializedReport(report); - scoped_ptr<const std::string> serialized(report->GetSerializedReport()); ClientMalwareReportRequest actual; - actual.ParseFromString(*serialized); + actual.ParseFromString(serialized); ClientMalwareReportRequest expected; expected.set_malware_url(kMalwareURL); @@ -133,12 +323,13 @@ TEST_F(MalwareDetailsTest, MalwareSubResourceWithOriginalUrl) { InitResource(&resource, ResourceType::SUB_RESOURCE, GURL(kMalwareURL)); resource.original_url = GURL(kOriginalLandingURL); - scoped_refptr<MalwareDetails> report = MalwareDetails::NewMalwareDetails( - contents(), resource); + scoped_refptr<MalwareDetailsWrap> report = new MalwareDetailsWrap( + sb_service_.get(), contents(), resource, NULL); + + std::string serialized = WaitForSerializedReport(report); - scoped_ptr<const std::string> serialized(report->GetSerializedReport()); ClientMalwareReportRequest actual; - actual.ParseFromString(*serialized); + actual.ParseFromString(serialized); ClientMalwareReportRequest expected; expected.set_malware_url(kMalwareURL); @@ -170,27 +361,27 @@ TEST_F(MalwareDetailsTest, MalwareDOMDetails) { SafeBrowsingService::UnsafeResource resource; InitResource(&resource, ResourceType::SUB_RESOURCE, GURL(kMalwareURL)); - scoped_refptr<MalwareDetails> report = MalwareDetails::NewMalwareDetails( - contents(), resource); + scoped_refptr<MalwareDetailsWrap> report = new MalwareDetailsWrap( + sb_service_.get(), contents(), resource, NULL); // Send a message from the DOM, with 2 nodes, a parent and a child. - ViewHostMsg_MalwareDOMDetails_Params params; - ViewHostMsg_MalwareDOMDetails_Node child_node; + std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node> params; + SafeBrowsingHostMsg_MalwareDOMDetails_Node child_node; child_node.url = GURL(kDOMChildURL); child_node.tag_name = "iframe"; child_node.parent = GURL(kDOMParentURL); - params.nodes.push_back(child_node); - ViewHostMsg_MalwareDOMDetails_Node parent_node; + params.push_back(child_node); + SafeBrowsingHostMsg_MalwareDOMDetails_Node parent_node; parent_node.url = GURL(kDOMParentURL); parent_node.children.push_back(GURL(kDOMChildURL)); - params.nodes.push_back(parent_node); + params.push_back(parent_node); report->OnReceivedMalwareDOMDetails(params); MessageLoop::current()->RunAllPending(); - scoped_ptr<const std::string> serialized(report->GetSerializedReport()); + std::string serialized = WaitForSerializedReport(report); ClientMalwareReportRequest actual; - actual.ParseFromString(*serialized); + actual.ParseFromString(serialized); ClientMalwareReportRequest expected; expected.set_malware_url(kMalwareURL); @@ -214,6 +405,7 @@ TEST_F(MalwareDetailsTest, MalwareDOMDetails) { pb_resource->set_id(3); pb_resource->set_url(kDOMParentURL); pb_resource->add_child_ids(2); + expected.set_complete(false); // Since the cache was missing. VerifyResults(actual, expected); } @@ -223,12 +415,12 @@ TEST_F(MalwareDetailsTest, NotPublicUrl) { controller().LoadURL(GURL(kHttpsURL), GURL(), PageTransition::TYPED); SafeBrowsingService::UnsafeResource resource; InitResource(&resource, ResourceType::SUB_RESOURCE, GURL(kMalwareURL)); - scoped_refptr<MalwareDetails> report = MalwareDetails::NewMalwareDetails( - contents(), resource); + scoped_refptr<MalwareDetailsWrap> report = new MalwareDetailsWrap( + sb_service_.get(), contents(), resource, NULL); - scoped_ptr<const std::string> serialized(report->GetSerializedReport()); + std::string serialized = WaitForSerializedReport(report); ClientMalwareReportRequest actual; - actual.ParseFromString(*serialized); + actual.ParseFromString(serialized); ClientMalwareReportRequest expected; expected.set_malware_url(kMalwareURL); // No page_url @@ -254,12 +446,12 @@ TEST_F(MalwareDetailsTest, MalwareWithRedirectUrl) { resource.redirect_urls.push_back(GURL(kSecondRedirectURL)); resource.redirect_urls.push_back(GURL(kMalwareURL)); - scoped_refptr<MalwareDetails> report = MalwareDetails::NewMalwareDetails( - contents(), resource); + scoped_refptr<MalwareDetailsWrap> report = new MalwareDetailsWrap( + sb_service_.get(), contents(), resource, NULL); - scoped_ptr<const std::string> serialized(report->GetSerializedReport()); + std::string serialized = WaitForSerializedReport(report); ClientMalwareReportRequest actual; - actual.ParseFromString(*serialized); + actual.ParseFromString(serialized); ClientMalwareReportRequest expected; expected.set_malware_url(kMalwareURL); @@ -291,3 +483,112 @@ TEST_F(MalwareDetailsTest, MalwareWithRedirectUrl) { VerifyResults(actual, expected); } + +// Tests the interaction with the HTTP cache. +TEST_F(MalwareDetailsTest, HTTPCache) { + controller().LoadURL(GURL(kLandingURL), GURL(), PageTransition::TYPED); + + SafeBrowsingService::UnsafeResource resource; + InitResource(&resource, ResourceType::SUB_RESOURCE, GURL(kMalwareURL)); + + profile()->CreateRequestContext(); + scoped_refptr<MalwareDetailsWrap> report = new MalwareDetailsWrap( + sb_service_.get(), contents(), resource + , profile()->GetRequestContext()); + + FillCache(profile()->GetRequestContext()->GetURLRequestContext()); + + // The cache collection starts after the IPC from the DOM is fired. + std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node> params; + report->OnReceivedMalwareDOMDetails(params); + + // Let the cache callbacks complete + MessageLoop::current()->RunAllPending(); + + DVLOG(1) << "Getting serialized report"; + std::string serialized = WaitForSerializedReport(report); + ClientMalwareReportRequest actual; + actual.ParseFromString(serialized); + + ClientMalwareReportRequest expected; + expected.set_malware_url(kMalwareURL); + expected.set_page_url(kLandingURL); + expected.set_referrer_url(""); + + ClientMalwareReportRequest::Resource* pb_resource = expected.add_resources(); + pb_resource->set_id(0); + pb_resource->set_url(kLandingURL); + safe_browsing::ClientMalwareReportRequest::HTTPResponse* pb_response = + pb_resource->mutable_response(); + pb_response->mutable_firstline()->set_code(200); + safe_browsing::ClientMalwareReportRequest::HTTPHeader* pb_header = + pb_response->add_headers(); + pb_header->set_name("Content-Type"); + pb_header->set_value("text/html"); + pb_header = pb_response->add_headers(); + pb_header->set_name("Content-Length"); + pb_header->set_value("1024"); + pb_header = pb_response->add_headers(); + pb_header->set_name("Set-Cookie"); + pb_header->set_value(""); // The cookie is dropped. + pb_response->set_body(kLandingData); + pb_response->set_bodylength(37); + pb_response->set_bodydigest("9ca97475598a79bc1e8fc9bd6c72cd35"); + + pb_resource = expected.add_resources(); + pb_resource->set_id(1); + pb_resource->set_url(kMalwareURL); + pb_response = pb_resource->mutable_response(); + pb_response->mutable_firstline()->set_code(200); + pb_header = pb_response->add_headers(); + pb_header->set_name("Content-Type"); + pb_header->set_value("image/jpeg"); + pb_response->set_body(kMalwareData); + pb_response->set_bodylength(10); + pb_response->set_bodydigest("581373551c43d4cf33bfb3b26838ff95"); + expected.set_complete(true); + + VerifyResults(actual, expected); +} + +// Tests the interaction with the HTTP cache (where the cache is empty). +TEST_F(MalwareDetailsTest, HTTPCacheNoEntries) { + controller().LoadURL(GURL(kLandingURL), GURL(), PageTransition::TYPED); + + SafeBrowsingService::UnsafeResource resource; + InitResource(&resource, ResourceType::SUB_RESOURCE, GURL(kMalwareURL)); + + profile()->CreateRequestContext(); + scoped_refptr<MalwareDetailsWrap> report = new MalwareDetailsWrap( + sb_service_.get(), contents(), resource, + profile()->GetRequestContext()); + + // No call to FillCache + + // The cache collection starts after the IPC from the DOM is fired. + std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node> params; + report->OnReceivedMalwareDOMDetails(params); + + // Let the cache callbacks complete + MessageLoop::current()->RunAllPending(); + + DVLOG(1) << "Getting serialized report"; + std::string serialized = WaitForSerializedReport(report); + ClientMalwareReportRequest actual; + actual.ParseFromString(serialized); + + ClientMalwareReportRequest expected; + expected.set_malware_url(kMalwareURL); + expected.set_page_url(kLandingURL); + expected.set_referrer_url(""); + + ClientMalwareReportRequest::Resource* pb_resource = expected.add_resources(); + pb_resource->set_id(0); + pb_resource->set_url(kLandingURL); + pb_resource = expected.add_resources(); + pb_resource->set_id(1); + pb_resource->set_url(kMalwareURL); + expected.set_complete(true); + + VerifyResults(actual, expected); +} diff --git a/chrome/browser/safe_browsing/prefix_set.cc b/chrome/browser/safe_browsing/prefix_set.cc index 9ddd123..7aa19a0 100644 --- a/chrome/browser/safe_browsing/prefix_set.cc +++ b/chrome/browser/safe_browsing/prefix_set.cc @@ -7,11 +7,28 @@ #include <algorithm> #include <math.h> +#include "base/file_util.h" #include "base/logging.h" +#include "base/md5.h" #include "base/metrics/histogram.h" namespace { +// |kMagic| should be reasonably unique, and not match itself across +// endianness changes. I generated this value with: +// md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9 +static uint32 kMagic = 0x864088dd; + +// Current version the code writes out. +static uint32 kVersion = 0x1; + +typedef struct { + uint32 magic; + uint32 version; + uint32 index_size; + uint32 deltas_size; +} FileHeader; + // For |std::upper_bound()| to find a prefix w/in a vector of pairs. bool PrefixLess(const std::pair<SBPrefix,size_t>& a, const std::pair<SBPrefix,size_t>& b) { @@ -29,6 +46,13 @@ PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) { size_t run_length = 0; index_.push_back(std::make_pair(prev_prefix, deltas_.size())); + // Used to build a checksum from the data used to construct the + // structures. Since the data is a bunch of uniform hashes, it + // seems reasonable to just xor most of it in, rather than trying + // to use a more complicated algorithm. + uint32 checksum = static_cast<uint32>(sorted_prefixes[0]); + checksum ^= static_cast<uint32>(deltas_.size()); + for (size_t i = 1; i < sorted_prefixes.size(); ++i) { // Skip duplicates. if (sorted_prefixes[i] == prev_prefix) @@ -37,22 +61,28 @@ PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) { // Calculate the delta. |unsigned| is mandatory, because the // sorted_prefixes could be more than INT_MAX apart. DCHECK_GT(sorted_prefixes[i], prev_prefix); - unsigned delta = sorted_prefixes[i] - prev_prefix; + const unsigned delta = sorted_prefixes[i] - prev_prefix; + const uint16 delta16 = static_cast<uint16>(delta); - // New index ref if the delta is too wide, or if too many - // consecutive deltas have been encoded. Note that - // |kMaxDelta| cannot itself be encoded. - if (delta >= kMaxDelta || run_length >= kMaxRun) { + // New index ref if the delta doesn't fit, or if too many + // consecutive deltas have been encoded. + if (delta != static_cast<unsigned>(delta16) || run_length >= kMaxRun) { + checksum ^= static_cast<uint32>(sorted_prefixes[i]); + checksum ^= static_cast<uint32>(deltas_.size()); index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size())); run_length = 0; } else { + checksum ^= static_cast<uint32>(delta16); // Continue the run of deltas. - deltas_.push_back(static_cast<uint16>(delta)); + deltas_.push_back(delta16); + DCHECK_EQ(static_cast<unsigned>(deltas_.back()), delta); ++run_length; } prev_prefix = sorted_prefixes[i]; } + checksum_ = checksum; + DCHECK(CheckChecksum()); // Send up some memory-usage stats. Bits because fractional bytes // are weird. @@ -66,6 +96,13 @@ PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) { } } +PrefixSet::PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index, + std::vector<uint16> *deltas) { + DCHECK(index && deltas); + index_.swap(*index); + deltas_.swap(*deltas); +} + PrefixSet::~PrefixSet() {} bool PrefixSet::Exists(SBPrefix prefix) const { @@ -101,7 +138,7 @@ bool PrefixSet::Exists(SBPrefix prefix) const { return current == prefix; } -void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) { +void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) const { for (size_t ii = 0; ii < index_.size(); ++ii) { // The deltas for this |index_| entry run to the next index entry, // or the end of the deltas. @@ -117,4 +154,193 @@ void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) { } } +// static +PrefixSet* PrefixSet::LoadFile(const FilePath& filter_name) { + int64 size_64; + if (!file_util::GetFileSize(filter_name, &size_64)) + return NULL; + if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest))) + return NULL; + + file_util::ScopedFILE file(file_util::OpenFile(filter_name, "rb")); + if (!file.get()) + return NULL; + + FileHeader header; + size_t read = fread(&header, sizeof(header), 1, file.get()); + if (read != 1) + return NULL; + + if (header.magic != kMagic || header.version != kVersion) + return NULL; + + std::vector<std::pair<SBPrefix,size_t> > index; + const size_t index_bytes = sizeof(index[0]) * header.index_size; + + std::vector<uint16> deltas; + const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size; + + // Check for bogus sizes before allocating any space. + const size_t expected_bytes = + sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest); + if (static_cast<int64>(expected_bytes) != size_64) + return NULL; + + // The file looks valid, start building the digest. + MD5Context context; + MD5Init(&context); + MD5Update(&context, &header, sizeof(header)); + + // Read the index vector. Herb Sutter indicates that vectors are + // guaranteed to be contiuguous, so reading to where element 0 lives + // is valid. + index.resize(header.index_size); + read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); + if (read != index.size()) + return NULL; + MD5Update(&context, &(index[0]), index_bytes); + + // Read vector of deltas. + deltas.resize(header.deltas_size); + read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); + if (read != deltas.size()) + return NULL; + MD5Update(&context, &(deltas[0]), deltas_bytes); + + MD5Digest calculated_digest; + MD5Final(&calculated_digest, &context); + + MD5Digest file_digest; + read = fread(&file_digest, sizeof(file_digest), 1, file.get()); + if (read != 1) + return NULL; + + if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) + return NULL; + + // Steals contents of |index| and |deltas| via swap(). + return new PrefixSet(&index, &deltas); +} + +bool PrefixSet::WriteFile(const FilePath& filter_name) const { + FileHeader header; + header.magic = kMagic; + header.version = kVersion; + header.index_size = static_cast<uint32>(index_.size()); + header.deltas_size = static_cast<uint32>(deltas_.size()); + + // Sanity check that the 32-bit values never mess things up. + if (static_cast<size_t>(header.index_size) != index_.size() || + static_cast<size_t>(header.deltas_size) != deltas_.size()) { + NOTREACHED(); + return false; + } + + file_util::ScopedFILE file(file_util::OpenFile(filter_name, "wb")); + if (!file.get()) + return false; + + MD5Context context; + MD5Init(&context); + + // TODO(shess): The I/O code in safe_browsing_store_file.cc would + // sure be useful about now. + size_t written = fwrite(&header, sizeof(header), 1, file.get()); + if (written != 1) + return false; + MD5Update(&context, &header, sizeof(header)); + + // As for reads, the standard guarantees the ability to access the + // contents of the vector by a pointer to an element. + const size_t index_bytes = sizeof(index_[0]) * index_.size(); + written = fwrite(&(index_[0]), sizeof(index_[0]), index_.size(), file.get()); + if (written != index_.size()) + return false; + MD5Update(&context, &(index_[0]), index_bytes); + + const size_t deltas_bytes = sizeof(deltas_[0]) * deltas_.size(); + written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(), + file.get()); + if (written != deltas_.size()) + return false; + MD5Update(&context, &(deltas_[0]), deltas_bytes); + + MD5Digest digest; + MD5Final(&digest, &context); + written = fwrite(&digest, sizeof(digest), 1, file.get()); + if (written != 1) + return false; + + // TODO(shess): Can this code check that the close was successful? + file.reset(); + + return true; +} + +size_t PrefixSet::IndexBinFor(size_t target_index) const { + // The items in |index_| have the logical index of each previous + // item in |index_| plus the count of deltas between the items. + // Since the indices into |deltas_| are absolute, the logical index + // is then the sum of the two indices. + size_t lo = 0; + size_t hi = index_.size(); + + // Binary search because linear search was too slow (really, the + // unit test sucked). Inline because the elements can't be compared + // in isolation (their position matters). + while (hi - lo > 1) { + const size_t i = (lo + hi) / 2; + + if (target_index < i + index_[i].second) { + DCHECK_LT(i, hi); // Always making progress. + hi = i; + } else { + DCHECK_GT(i, lo); // Always making progress. + lo = i; + } + } + return lo; +} + +size_t PrefixSet::GetSize() const { + return index_.size() + deltas_.size(); +} + +bool PrefixSet::IsDeltaAt(size_t target_index) const { + CHECK_LT(target_index, GetSize()); + + const size_t i = IndexBinFor(target_index); + return target_index > i + index_[i].second; +} + +uint16 PrefixSet::DeltaAt(size_t target_index) const { + CHECK_LT(target_index, GetSize()); + + // Find the |index_| entry which contains |target_index|. + const size_t i = IndexBinFor(target_index); + + // Exactly on the |index_| entry means no delta. + CHECK_GT(target_index, i + index_[i].second); + + // -i backs out the |index_| entries, -1 gets the delta that lead to + // the value at |target_index|. + CHECK_LT(target_index - i - 1, deltas_.size()); + return deltas_[target_index - i - 1]; +} + +bool PrefixSet::CheckChecksum() const { + uint32 checksum = 0; + + for (size_t ii = 0; ii < index_.size(); ++ii) { + checksum ^= static_cast<uint32>(index_[ii].first); + checksum ^= static_cast<uint32>(index_[ii].second); + } + + for (size_t di = 0; di < deltas_.size(); ++di) { + checksum ^= static_cast<uint32>(deltas_[di]); + } + + return checksum == checksum_; +} + } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/prefix_set.h b/chrome/browser/safe_browsing/prefix_set.h index cb2cd9f..528cbea 100644 --- a/chrome/browser/safe_browsing/prefix_set.h +++ b/chrome/browser/safe_browsing/prefix_set.h @@ -37,8 +37,7 @@ // 2^16 apart, which would need 512k (versus 256k to store the raw // data). // -// TODO(shess): Write serialization code. Something like this should -// work: +// The on-disk format looks like: // 4 byte magic number // 4 byte version number // 4 byte |index_.size()| @@ -55,6 +54,8 @@ #include "chrome/browser/safe_browsing/safe_browsing_util.h" +class FilePath; + namespace safe_browsing { class PrefixSet { @@ -65,19 +66,45 @@ class PrefixSet { // |true| if |prefix| was in |prefixes| passed to the constructor. bool Exists(SBPrefix prefix) const; + // Persist the set on disk. + static PrefixSet* LoadFile(const FilePath& filter_name); + bool WriteFile(const FilePath& filter_name) const; + // Regenerate the vector of prefixes passed to the constructor into // |prefixes|. Prefixes will be added in sorted order. - void GetPrefixes(std::vector<SBPrefix>* prefixes); + void GetPrefixes(std::vector<SBPrefix>* prefixes) const; - private: - // Maximum delta that can be encoded in a 16-bit unsigned. - static const unsigned kMaxDelta = 256 * 256; + // TODO(shess): The following are debugging accessors. Delete once + // the encoding problem is figured out. + + size_t IndexBinFor(size_t target_index) const; + + // The number of prefixes represented. + size_t GetSize() const; + + // Returns |true| if the element at |target_index| is between items in the + // |index_| array. + bool IsDeltaAt(size_t target_index) const; + // Returns the delta used to calculate the element at + // |target_index|. Only call if |IsDeltaAt()| returned |true|. + uint16 DeltaAt(size_t target_index) const; + + // Check whether |index_| and |deltas_| still match the CRC + // generated during construction. + bool CheckChecksum() const; + + private: // Maximum number of consecutive deltas to encode before generating // a new index entry. This helps keep the worst-case performance // for |Exists()| under control. static const size_t kMaxRun = 100; + // Helper for |LoadFile()|. Steals the contents of |index| and + // |deltas| using |swap()|. + PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index, + std::vector<uint16> *deltas); + // Top-level index of prefix to offset in |deltas_|. Each pair // indicates a base prefix and where the deltas from that prefix // begin in |deltas_|. The deltas for a pair end at the next pair's @@ -89,6 +116,11 @@ class PrefixSet { // |index_|, or the end of |deltas_| for the last |index_| pair. std::vector<uint16> deltas_; + // For debugging, used to verify that |index_| and |deltas| were not + // changed after generation during construction. |checksum_| is + // calculated from the data used to construct those vectors. + uint32 checksum_; + DISALLOW_COPY_AND_ASSIGN(PrefixSet); }; diff --git a/chrome/browser/safe_browsing/prefix_set_unittest.cc b/chrome/browser/safe_browsing/prefix_set_unittest.cc index f7be5da..f0002b7 100644 --- a/chrome/browser/safe_browsing/prefix_set_unittest.cc +++ b/chrome/browser/safe_browsing/prefix_set_unittest.cc @@ -6,59 +6,231 @@ #include <algorithm> +#include "base/file_util.h" #include "base/logging.h" +#include "base/md5.h" +#include "base/memory/scoped_ptr.h" +#include "base/memory/scoped_temp_dir.h" #include "base/rand_util.h" #include "testing/gtest/include/gtest/gtest.h" +#include "testing/platform_test.h" namespace { -SBPrefix GenPrefix() { - return static_cast<SBPrefix>(base::RandUint64()); -} +class PrefixSetTest : public PlatformTest { + protected: + // Constants for the v1 format. + static const size_t kMagicOffset = 0 * sizeof(uint32); + static const size_t kVersionOffset = 1 * sizeof(uint32); + static const size_t kIndexSizeOffset = 2 * sizeof(uint32); + static const size_t kDeltasSizeOffset = 3 * sizeof(uint32); + static const size_t kPayloadOffset = 4 * sizeof(uint32); -// Test that a small sparse random input works. -TEST(PrefixSetTest, Baseline) { - std::vector<SBPrefix> prefixes; + // Generate a set of random prefixes to share between tests. For + // most tests this generation was a large fraction of the test time. + static void SetUpTestCase() { + for (size_t i = 0; i < 50000; ++i) { + const SBPrefix prefix = static_cast<SBPrefix>(base::RandUint64()); + shared_prefixes_.push_back(prefix); + } + + // Sort for use with PrefixSet constructor. + std::sort(shared_prefixes_.begin(), shared_prefixes_.end()); + } + + // Check that all elements of |prefixes| are in |prefix_set|, and + // that nearby elements are not (for lack of a more sensible set of + // items to check for absence). + static void CheckPrefixes(safe_browsing::PrefixSet* prefix_set, + const std::vector<SBPrefix> &prefixes) { + // The set can generate the prefixes it believes it has, so that's + // a good starting point. + std::set<SBPrefix> check(prefixes.begin(), prefixes.end()); + std::vector<SBPrefix> prefixes_copy; + prefix_set->GetPrefixes(&prefixes_copy); + EXPECT_EQ(prefixes_copy.size(), check.size()); + EXPECT_TRUE(std::equal(check.begin(), check.end(), prefixes_copy.begin())); + + for (size_t i = 0; i < prefixes.size(); ++i) { + EXPECT_TRUE(prefix_set->Exists(prefixes[i])); - static const size_t kCount = 50000; + const SBPrefix left_sibling = prefixes[i] - 1; + if (check.count(left_sibling) == 0) + EXPECT_FALSE(prefix_set->Exists(left_sibling)); - for (size_t i = 0; i < kCount; ++i) { - prefixes.push_back(GenPrefix()); + const SBPrefix right_sibling = prefixes[i] + 1; + if (check.count(right_sibling) == 0) + EXPECT_FALSE(prefix_set->Exists(right_sibling)); + } } - std::sort(prefixes.begin(), prefixes.end()); - safe_browsing::PrefixSet prefix_set(prefixes); + // Generate a |PrefixSet| file from |shared_prefixes_|, store it in + // a temporary file, and return the filename in |filenamep|. + // Returns |true| on success. + bool GetPrefixSetFile(FilePath* filenamep) { + if (!temp_dir_.IsValid() && !temp_dir_.CreateUniqueTempDir()) + return false; - // Check that |GetPrefixes()| returns exactly the same set of - // prefixes as was passed in. - std::set<SBPrefix> check(prefixes.begin(), prefixes.end()); - std::vector<SBPrefix> prefixes_copy; - prefix_set.GetPrefixes(&prefixes_copy); - EXPECT_EQ(prefixes_copy.size(), check.size()); - EXPECT_TRUE(std::equal(check.begin(), check.end(), prefixes_copy.begin())); + FilePath filename = temp_dir_.path().AppendASCII("PrefixSetTest"); - // Check that the set flags all of the inputs, and also check items - // just above and below the inputs to make sure they aren't there. - for (size_t i = 0; i < prefixes.size(); ++i) { - EXPECT_TRUE(prefix_set.Exists(prefixes[i])); + safe_browsing::PrefixSet prefix_set(shared_prefixes_); + if (!prefix_set.WriteFile(filename)) + return false; - const SBPrefix left_sibling = prefixes[i] - 1; - if (check.count(left_sibling) == 0) - EXPECT_FALSE(prefix_set.Exists(left_sibling)); + *filenamep = filename; + return true; + } - const SBPrefix right_sibling = prefixes[i] + 1; - if (check.count(right_sibling) == 0) - EXPECT_FALSE(prefix_set.Exists(right_sibling)); + // Helper function to read the int32 value at |offset|, increment it + // by |inc|, and write it back in place. |fp| should be opened in + // r+ mode. + static void IncrementIntAt(FILE* fp, long offset, int inc) { + int32 value = 0; + + ASSERT_NE(-1, fseek(fp, offset, SEEK_SET)); + ASSERT_EQ(1U, fread(&value, sizeof(value), 1, fp)); + + value += inc; + + ASSERT_NE(-1, fseek(fp, offset, SEEK_SET)); + ASSERT_EQ(1U, fwrite(&value, sizeof(value), 1, fp)); } + + // Helper function to re-generated |fp|'s checksum to be correct for + // the file's contents. |fp| should be opened in r+ mode. + static void CleanChecksum(FILE* fp) { + MD5Context context; + MD5Init(&context); + + ASSERT_NE(-1, fseek(fp, 0, SEEK_END)); + long file_size = ftell(fp); + + size_t payload_size = static_cast<size_t>(file_size) - sizeof(MD5Digest); + size_t digested_size = 0; + ASSERT_NE(-1, fseek(fp, 0, SEEK_SET)); + while (digested_size < payload_size) { + char buf[1024]; + size_t nitems = std::min(payload_size - digested_size, sizeof(buf)); + ASSERT_EQ(nitems, fread(buf, 1, nitems, fp)); + MD5Update(&context, &buf, nitems); + digested_size += nitems; + } + ASSERT_EQ(digested_size, payload_size); + ASSERT_EQ(static_cast<long>(digested_size), ftell(fp)); + + MD5Digest new_digest; + MD5Final(&new_digest, &context); + ASSERT_NE(-1, fseek(fp, digested_size, SEEK_SET)); + ASSERT_EQ(1U, fwrite(&new_digest, sizeof(new_digest), 1, fp)); + ASSERT_EQ(file_size, ftell(fp)); + } + + // Open |filename| and increment the int32 at |offset| by |inc|. + // Then re-generate the checksum to account for the new contents. + void ModifyAndCleanChecksum(const FilePath& filename, long offset, int inc) { + int64 size_64; + ASSERT_TRUE(file_util::GetFileSize(filename, &size_64)); + + file_util::ScopedFILE file(file_util::OpenFile(filename, "r+b")); + IncrementIntAt(file.get(), offset, inc); + CleanChecksum(file.get()); + file.reset(); + + int64 new_size_64; + ASSERT_TRUE(file_util::GetFileSize(filename, &new_size_64)); + ASSERT_EQ(new_size_64, size_64); + } + + // Tests should not modify this shared resource. + static std::vector<SBPrefix> shared_prefixes_; + + ScopedTempDir temp_dir_; +}; + +std::vector<SBPrefix> PrefixSetTest::shared_prefixes_; + +// Test that a small sparse random input works. +TEST_F(PrefixSetTest, Baseline) { + safe_browsing::PrefixSet prefix_set(shared_prefixes_); + CheckPrefixes(&prefix_set, shared_prefixes_); } // Test that the empty set doesn't appear to have anything in it. -TEST(PrefixSetTest, Empty) { +TEST_F(PrefixSetTest, Empty) { + const std::vector<SBPrefix> empty; + safe_browsing::PrefixSet prefix_set(empty); + for (size_t i = 0; i < shared_prefixes_.size(); ++i) { + EXPECT_FALSE(prefix_set.Exists(shared_prefixes_[i])); + } +} + +// Single-element set should work fine. +TEST_F(PrefixSetTest, OneElement) { + const std::vector<SBPrefix> prefixes(100, 0); + safe_browsing::PrefixSet prefix_set(prefixes); + EXPECT_FALSE(prefix_set.Exists(-1)); + EXPECT_TRUE(prefix_set.Exists(prefixes[0])); + EXPECT_FALSE(prefix_set.Exists(1)); + + // Check that |GetPrefixes()| returns the same set of prefixes as + // was passed in. + std::vector<SBPrefix> prefixes_copy; + prefix_set.GetPrefixes(&prefixes_copy); + EXPECT_EQ(1U, prefixes_copy.size()); + EXPECT_EQ(prefixes[0], prefixes_copy[0]); +} + +// Edges of the 32-bit integer range. +TEST_F(PrefixSetTest, IntMinMax) { std::vector<SBPrefix> prefixes; + + // Using bit patterns rather than portable constants because this + // really is testing how the entire 32-bit integer range is handled. + prefixes.push_back(0x00000000); + prefixes.push_back(0x0000FFFF); + prefixes.push_back(0x7FFF0000); + prefixes.push_back(0x7FFFFFFF); + prefixes.push_back(0x80000000); + prefixes.push_back(0x8000FFFF); + prefixes.push_back(0xFFFF0000); + prefixes.push_back(0xFFFFFFFF); + + std::sort(prefixes.begin(), prefixes.end()); safe_browsing::PrefixSet prefix_set(prefixes); - for (size_t i = 0; i < 500; ++i) { - EXPECT_FALSE(prefix_set.Exists(GenPrefix())); + + // Check that |GetPrefixes()| returns the same set of prefixes as + // was passed in. + std::vector<SBPrefix> prefixes_copy; + prefix_set.GetPrefixes(&prefixes_copy); + ASSERT_EQ(prefixes_copy.size(), prefixes.size()); + EXPECT_TRUE(std::equal(prefixes.begin(), prefixes.end(), + prefixes_copy.begin())); +} + +// A range with only large deltas. +TEST_F(PrefixSetTest, AllBig) { + std::vector<SBPrefix> prefixes; + + const SBPrefix kVeryPositive = 1000 * 1000 * 1000; + const SBPrefix kVeryNegative = -kVeryPositive; + const unsigned kDelta = 10 * 1000 * 1000; + + for (SBPrefix prefix = kVeryNegative; + prefix < kVeryPositive; prefix += kDelta) { + prefixes.push_back(prefix); } + + std::sort(prefixes.begin(), prefixes.end()); + safe_browsing::PrefixSet prefix_set(prefixes); + + // Check that |GetPrefixes()| returns the same set of prefixes as + // was passed in. + std::vector<SBPrefix> prefixes_copy; + prefix_set.GetPrefixes(&prefixes_copy); + prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end()); + EXPECT_EQ(prefixes_copy.size(), prefixes.size()); + EXPECT_TRUE(std::equal(prefixes.begin(), prefixes.end(), + prefixes_copy.begin())); } // Use artificial inputs to test various edge cases in Exists(). @@ -67,7 +239,7 @@ TEST(PrefixSetTest, Empty) { // deltas above and below 2^16, and make sure they're all present. // Create a very long sequence with deltas below 2^16 to test crossing // |kMaxRun|. -TEST(PrefixSetTest, EdgeCases) { +TEST_F(PrefixSetTest, EdgeCases) { std::vector<SBPrefix> prefixes; const SBPrefix kVeryPositive = 1000 * 1000 * 1000; @@ -132,4 +304,153 @@ TEST(PrefixSetTest, EdgeCases) { } } +// Similar to Baseline test, but write the set out to a file and read +// it back in before testing. +TEST_F(PrefixSetTest, ReadWrite) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_TRUE(prefix_set.get()); + + CheckPrefixes(prefix_set.get(), shared_prefixes_); +} + +// Check that |CleanChecksum()| makes an acceptable checksum. +TEST_F(PrefixSetTest, CorruptionHelpers) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + // This will modify data in |index_|, which will fail the digest check. + file_util::ScopedFILE file(file_util::OpenFile(filename, "r+b")); + IncrementIntAt(file.get(), kPayloadOffset, 1); + file.reset(); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); + + // Fix up the checksum and it will read successfully (though the + // data will be wrong). + file.reset(file_util::OpenFile(filename, "r+b")); + CleanChecksum(file.get()); + file.reset(); + prefix_set.reset(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_TRUE(prefix_set.get()); +} + +// Bad |index_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionMagic) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kMagicOffset, 1)); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Bad |index_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionVersion) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kVersionOffset, 1)); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Bad |index_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionIndexSize) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kIndexSizeOffset, 1)); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Bad |deltas_| size is caught by the sanity check. +TEST_F(PrefixSetTest, CorruptionDeltasSize) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + ASSERT_NO_FATAL_FAILURE( + ModifyAndCleanChecksum(filename, kDeltasSizeOffset, 1)); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Test that the digest catches corruption in the middle of the file +// (in the payload between the header and the digest). +TEST_F(PrefixSetTest, CorruptionPayload) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + file_util::ScopedFILE file(file_util::OpenFile(filename, "r+b")); + ASSERT_NO_FATAL_FAILURE(IncrementIntAt(file.get(), 666, 1)); + file.reset(); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Test corruption in the digest itself. +TEST_F(PrefixSetTest, CorruptionDigest) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + int64 size_64; + ASSERT_TRUE(file_util::GetFileSize(filename, &size_64)); + file_util::ScopedFILE file(file_util::OpenFile(filename, "r+b")); + long digest_offset = static_cast<long>(size_64 - sizeof(MD5Digest)); + ASSERT_NO_FATAL_FAILURE(IncrementIntAt(file.get(), digest_offset, 1)); + file.reset(); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// Test excess data after the digest (fails the size test). +TEST_F(PrefixSetTest, CorruptionExcess) { + FilePath filename; + ASSERT_TRUE(GetPrefixSetFile(&filename)); + + // Add some junk to the trunk. + file_util::ScopedFILE file(file_util::OpenFile(filename, "ab")); + const char buf[] = "im in ur base, killing ur d00dz."; + ASSERT_EQ(strlen(buf), fwrite(buf, 1, strlen(buf), file.get())); + file.reset(); + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + ASSERT_FALSE(prefix_set.get()); +} + +// TODO(shess): Remove once the problem is debugged. But, until then, +// make sure the accessors work! +TEST_F(PrefixSetTest, DebuggingAccessors) { + std::vector<SBPrefix> prefixes; + std::unique_copy(shared_prefixes_.begin(), shared_prefixes_.end(), + std::back_inserter(prefixes)); + safe_browsing::PrefixSet prefix_set(prefixes); + + EXPECT_EQ(prefixes.size(), prefix_set.GetSize()); + EXPECT_FALSE(prefix_set.IsDeltaAt(0)); + for (size_t i = 1; i < prefixes.size(); ++i) { + const int delta = prefixes[i] - prefixes[i - 1]; + if (delta > 0xFFFF) { + EXPECT_FALSE(prefix_set.IsDeltaAt(i)); + } else { + ASSERT_TRUE(prefix_set.IsDeltaAt(i)); + EXPECT_EQ(delta, prefix_set.DeltaAt(i)); + } + } +} + } // namespace diff --git a/chrome/browser/safe_browsing/protocol_manager.cc b/chrome/browser/safe_browsing/protocol_manager.cc index 86c1328..ef88875 100644 --- a/chrome/browser/safe_browsing/protocol_manager.cc +++ b/chrome/browser/safe_browsing/protocol_manager.cc @@ -19,10 +19,10 @@ #include "chrome/browser/safe_browsing/safe_browsing_service.h" #include "chrome/common/chrome_version_info.h" #include "chrome/common/env_vars.h" -#include "chrome/common/net/url_request_context_getter.h" #include "content/browser/browser_thread.h" #include "net/base/escape.h" #include "net/base/load_flags.h" +#include "net/url_request/url_request_context_getter.h" #include "net/url_request/url_request_status.h" using base::Time; @@ -47,7 +47,7 @@ class SBProtocolManagerFactoryImpl : public SBProtocolManagerFactory { const std::string& client_name, const std::string& client_key, const std::string& wrapped_key, - URLRequestContextGetter* request_context_getter, + net::URLRequestContextGetter* request_context_getter, const std::string& info_url_prefix, const std::string& mackey_url_prefix, bool disable_auto_update) { @@ -71,7 +71,7 @@ SafeBrowsingProtocolManager* SafeBrowsingProtocolManager::Create( const std::string& client_name, const std::string& client_key, const std::string& wrapped_key, - URLRequestContextGetter* request_context_getter, + net::URLRequestContextGetter* request_context_getter, const std::string& info_url_prefix, const std::string& mackey_url_prefix, bool disable_auto_update) { @@ -89,7 +89,7 @@ SafeBrowsingProtocolManager::SafeBrowsingProtocolManager( const std::string& client_name, const std::string& client_key, const std::string& wrapped_key, - URLRequestContextGetter* request_context_getter, + net::URLRequestContextGetter* request_context_getter, const std::string& http_url_prefix, const std::string& https_url_prefix, bool disable_auto_update) @@ -649,13 +649,17 @@ void SafeBrowsingProtocolManager::ReportSafeBrowsingHit( const GURL& page_url, const GURL& referrer_url, bool is_subresource, - SafeBrowsingService::UrlCheckResult threat_type) { + SafeBrowsingService::UrlCheckResult threat_type, + const std::string& post_data) { GURL report_url = SafeBrowsingHitUrl(malicious_url, page_url, referrer_url, is_subresource, threat_type); - URLFetcher* report = new URLFetcher(report_url, URLFetcher::GET, this); + URLFetcher* report = new URLFetcher( + report_url, post_data.empty() ? URLFetcher::GET : URLFetcher::POST, this); report->set_load_flags(net::LOAD_DISABLE_CACHE); report->set_request_context(request_context_getter_); + if (!post_data.empty()) + report->set_upload_data("text/plain", post_data); report->Start(); safebrowsing_reports_.insert(report); } @@ -763,7 +767,8 @@ GURL SafeBrowsingProtocolManager::SafeBrowsingHitUrl( SafeBrowsingService::UrlCheckResult threat_type) const { DCHECK(threat_type == SafeBrowsingService::URL_MALWARE || threat_type == SafeBrowsingService::URL_PHISHING || - threat_type == SafeBrowsingService::BINARY_MALWARE_URL); + threat_type == SafeBrowsingService::BINARY_MALWARE_URL || + threat_type == SafeBrowsingService::BINARY_MALWARE_HASH); // The malware and phishing hits go over HTTP. std::string url = ComposeUrl(http_url_prefix_, "report", client_name_, version_, additional_query_); @@ -778,6 +783,9 @@ GURL SafeBrowsingProtocolManager::SafeBrowsingHitUrl( case SafeBrowsingService::BINARY_MALWARE_URL: threat_list = "binurlhit"; break; + case SafeBrowsingService::BINARY_MALWARE_HASH: + threat_list = "binhashhit"; + break; default: NOTREACHED(); } diff --git a/chrome/browser/safe_browsing/protocol_manager.h b/chrome/browser/safe_browsing/protocol_manager.h index 994849a..10c18bc 100644 --- a/chrome/browser/safe_browsing/protocol_manager.h +++ b/chrome/browser/safe_browsing/protocol_manager.h @@ -18,7 +18,7 @@ #include "base/gtest_prod_util.h" #include "base/hash_tables.h" -#include "base/scoped_ptr.h" +#include "base/memory/scoped_ptr.h" #include "base/time.h" #include "base/timer.h" #include "chrome/browser/safe_browsing/chunk_range.h" @@ -55,7 +55,7 @@ class SBProtocolManagerFactory { const std::string& client_name, const std::string& client_key, const std::string& wrapped_key, - URLRequestContextGetter* request_context_getter, + net::URLRequestContextGetter* request_context_getter, const std::string& info_url_prefix, const std::string& mackey_url_prefix, bool disable_auto_update) = 0; @@ -93,7 +93,7 @@ class SafeBrowsingProtocolManager : public URLFetcher::Delegate { const std::string& client_name, const std::string& client_key, const std::string& wrapped_key, - URLRequestContextGetter* request_context_getter, + net::URLRequestContextGetter* request_context_getter, const std::string& info_url_prefix, const std::string& mackey_url_prefix, bool disable_auto_update); @@ -132,13 +132,14 @@ class SafeBrowsingProtocolManager : public URLFetcher::Delegate { void OnChunkInserted(); // For UMA users we report to Google when a SafeBrowsing interstitial is shown - // to the user. We assume that the threat type is either URL_MALWARE or - // URL_PHISHING. + // to the user. |threat_type| should be one of the types known by + // SafeBrowsingHitUrl. void ReportSafeBrowsingHit(const GURL& malicious_url, const GURL& page_url, const GURL& referrer_url, bool is_subresource, - SafeBrowsingService::UrlCheckResult threat_type); + SafeBrowsingService::UrlCheckResult threat_type, + const std::string& post_data); // Users can opt-in on the SafeBrowsing interstitial to send detailed // malware reports. |report| is the serialized report. @@ -198,14 +199,15 @@ class SafeBrowsingProtocolManager : public URLFetcher::Delegate { // network requests using |request_context_getter|. When |disable_auto_update| // is true, protocol manager won't schedule next update until // ForceScheduleNextUpdate is called. - SafeBrowsingProtocolManager(SafeBrowsingService* sb_service, - const std::string& client_name, - const std::string& client_key, - const std::string& wrapped_key, - URLRequestContextGetter* request_context_getter, - const std::string& http_url_prefix, - const std::string& https_url_prefix, - bool disable_auto_update); + SafeBrowsingProtocolManager( + SafeBrowsingService* sb_service, + const std::string& client_name, + const std::string& client_key, + const std::string& wrapped_key, + net::URLRequestContextGetter* request_context_getter, + const std::string& http_url_prefix, + const std::string& https_url_prefix, + bool disable_auto_update); private: friend class SBProtocolManagerFactoryImpl; @@ -394,7 +396,7 @@ class SafeBrowsingProtocolManager : public URLFetcher::Delegate { std::string additional_query_; // The context we use to issue network requests. - scoped_refptr<URLRequestContextGetter> request_context_getter_; + scoped_refptr<net::URLRequestContextGetter> request_context_getter_; // URL prefix where browser fetches safebrowsing chunk updates, hashes, and // reports hits to the safebrowsing list for UMA users. diff --git a/chrome/browser/safe_browsing/protocol_manager_unittest.cc b/chrome/browser/safe_browsing/protocol_manager_unittest.cc index fc119ff..69bff62 100644 --- a/chrome/browser/safe_browsing/protocol_manager_unittest.cc +++ b/chrome/browser/safe_browsing/protocol_manager_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -218,6 +218,24 @@ TEST_F(SafeBrowsingProtocolManagerTest, TestSafeBrowsingHitUrl) { pm.SafeBrowsingHitUrl( malicious_url, page_url, referrer_url, false, SafeBrowsingService::URL_PHISHING).spec()); + + EXPECT_EQ("http://info.prefix.com/foo/report?client=unittest&appver=1.0&" + "pver=2.2&additional_query&evts=binurlhit&" + "evtd=http%3A%2F%2Fmalicious.url.com%2F&" + "evtr=http%3A%2F%2Fpage.url.com%2F&evhr=http%3A%2F%2Freferrer." + "url.com%2F&evtb=0", + pm.SafeBrowsingHitUrl( + malicious_url, page_url, referrer_url, + false, SafeBrowsingService::BINARY_MALWARE_URL).spec()); + + EXPECT_EQ("http://info.prefix.com/foo/report?client=unittest&appver=1.0&" + "pver=2.2&additional_query&evts=binhashhit&" + "evtd=http%3A%2F%2Fmalicious.url.com%2F&" + "evtr=http%3A%2F%2Fpage.url.com%2F&evhr=http%3A%2F%2Freferrer." + "url.com%2F&evtb=0", + pm.SafeBrowsingHitUrl( + malicious_url, page_url, referrer_url, + false, SafeBrowsingService::BINARY_MALWARE_HASH).spec()); } TEST_F(SafeBrowsingProtocolManagerTest, TestMalwareDetailsUrl) { diff --git a/chrome/browser/safe_browsing/report.proto b/chrome/browser/safe_browsing/report.proto index 719c918..5e203a7 100644 --- a/chrome/browser/safe_browsing/report.proto +++ b/chrome/browser/safe_browsing/report.proto @@ -87,4 +87,7 @@ message ClientMalwareReportRequest { optional string referrer_url = 3; repeated Resource resources = 4; + + // Whether the report has HTTP Responses. + optional bool complete = 5; } diff --git a/chrome/browser/safe_browsing/safe_browsing_blocking_page.cc b/chrome/browser/safe_browsing/safe_browsing_blocking_page.cc index 410f392..70d1ac5 100644 --- a/chrome/browser/safe_browsing/safe_browsing_blocking_page.cc +++ b/chrome/browser/safe_browsing/safe_browsing_blocking_page.cc @@ -52,12 +52,12 @@ static const char* const kSbReportPhishingUrl = // URL for the "Learn more" link on the multi threat malware blocking page. static const char* const kLearnMoreMalwareUrl = - "http://www.google.com/support/bin/answer.py?answer=45449&topic=360" + "https://www.google.com/support/bin/answer.py?answer=45449&topic=360" "&sa=X&oi=malwarewarninglink&resnum=1&ct=help"; // URL for the "Learn more" link on the phishing blocking page. static const char* const kLearnMorePhishingUrl = - "http://www.google.com/support/bin/answer.py?answer=106318"; + "https://www.google.com/support/bin/answer.py?answer=106318"; // URL for the "Safe Browsing Privacy Policies" link on the blocking page. // Note: this page is not yet localized. @@ -147,7 +147,7 @@ SafeBrowsingBlockingPage::SafeBrowsingBlockingPage( malware_details_ == NULL && CanShowMalwareDetailsOption()) { malware_details_ = MalwareDetails::NewMalwareDetails( - tab(), unsafe_resources[0]); + sb_service_, tab(), unsafe_resources[0]); } } @@ -605,12 +605,11 @@ void SafeBrowsingBlockingPage::FinishMalwareDetails() { bool value; if (pref && pref->GetValue()->GetAsBoolean(&value) && value) { - // Give the details object to the service class, so it can send it. + // Finish the malware details collection, send it over. BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, NewRunnableMethod( - sb_service_, &SafeBrowsingService::ReportMalwareDetails, - malware_details_)); + malware_details_.get(), &MalwareDetails::FinishCollection)); } } diff --git a/chrome/browser/safe_browsing/safe_browsing_blocking_page_test.cc b/chrome/browser/safe_browsing/safe_browsing_blocking_page_test.cc index fcbf9a9..50c41bc 100644 --- a/chrome/browser/safe_browsing/safe_browsing_blocking_page_test.cc +++ b/chrome/browser/safe_browsing/safe_browsing_blocking_page_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -49,7 +49,7 @@ class FakeSafeBrowsingService : public SafeBrowsingService { void OnCheckBrowseURLDone(const GURL& gurl, Client* client) { SafeBrowsingService::SafeBrowsingCheck check; - check.url.reset(new GURL(gurl)); + check.urls.push_back(gurl); check.client = client; check.result = badurls[gurl.spec()]; client->OnSafeBrowsingResult(check); @@ -59,12 +59,14 @@ class FakeSafeBrowsingService : public SafeBrowsingService { badurls[url.spec()] = checkresult; } - virtual void ReportMalwareDetails(scoped_refptr<MalwareDetails> details) { - details_.push_back(details); - // Notify the UI thread, that we got a report. - BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, - NewRunnableMethod(this, - &FakeSafeBrowsingService::OnMalwareDetailsDone)); + // Overrides SafeBrowsingService. + virtual void SendSerializedMalwareDetails(const std::string& serialized) { + reports_.push_back(serialized); + // Notify the UI thread that we got a report. + BrowserThread::PostTask( + BrowserThread::UI, FROM_HERE, + NewRunnableMethod(this, + &FakeSafeBrowsingService::OnMalwareDetailsDone)); } void OnMalwareDetailsDone() { @@ -72,11 +74,12 @@ class FakeSafeBrowsingService : public SafeBrowsingService { MessageLoopForUI::current()->Quit(); } - std::list<scoped_refptr<MalwareDetails> >* GetDetails() { - return &details_; + std::string GetReport() { + EXPECT_TRUE(reports_.size() == 1); + return reports_[0]; } - std::list<scoped_refptr<MalwareDetails> > details_; + std::vector<std::string> reports_; private: base::hash_map<std::string, UrlCheckResult> badurls; @@ -96,14 +99,15 @@ class TestSafeBrowsingServiceFactory : public SafeBrowsingServiceFactory { // A MalwareDetails class lets us intercept calls from the renderer. class FakeMalwareDetails : public MalwareDetails { public: - FakeMalwareDetails(TabContents* tab_contents, + FakeMalwareDetails(SafeBrowsingService* sb_service, + TabContents* tab_contents, const SafeBrowsingService::UnsafeResource& unsafe_resource) - : MalwareDetails(tab_contents, unsafe_resource) { } + : MalwareDetails(sb_service, tab_contents, unsafe_resource) { } virtual ~FakeMalwareDetails() {} virtual void AddDOMDetails( - const ViewHostMsg_MalwareDOMDetails_Params& params) { + const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) { EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::IO)); MalwareDetails::AddDOMDetails(params); @@ -136,6 +140,10 @@ class FakeMalwareDetails : public MalwareDetails { waiting_ = waiting; } + safe_browsing::ClientMalwareReportRequest* get_report() { + return report_.get(); + } + private: // Some logic to figure out if we should wait for the dom details or not. // These variables should only be accessed in the UI thread. @@ -150,9 +158,11 @@ class TestMalwareDetailsFactory : public MalwareDetailsFactory { virtual ~TestMalwareDetailsFactory() { } virtual MalwareDetails* CreateMalwareDetails( + SafeBrowsingService* sb_service, TabContents* tab_contents, const SafeBrowsingService::UnsafeResource& unsafe_resource) { - details_ = new FakeMalwareDetails(tab_contents, unsafe_resource); + details_ = new FakeMalwareDetails(sb_service, tab_contents, + unsafe_resource); return details_; } @@ -164,6 +174,46 @@ class TestMalwareDetailsFactory : public MalwareDetailsFactory { FakeMalwareDetails* details_; }; +// A SafeBrowingBlockingPage class that lets us wait until it's hidden. +class TestSafeBrowsingBlockingPage : public SafeBrowsingBlockingPage { + public: + TestSafeBrowsingBlockingPage(SafeBrowsingService* service, + TabContents* tab_contents, + const UnsafeResourceList& unsafe_resources) + : SafeBrowsingBlockingPage(service, tab_contents, unsafe_resources) { + wait_for_delete_ = false; + } + + ~TestSafeBrowsingBlockingPage() { + if (wait_for_delete_) { + // Notify that we are gone + MessageLoopForUI::current()->Quit(); + } + } + + void set_wait_for_delete() { + wait_for_delete_ = true; + } + + private: + bool wait_for_delete_; +}; + +class TestSafeBrowsingBlockingPageFactory + : public SafeBrowsingBlockingPageFactory { + public: + TestSafeBrowsingBlockingPageFactory() { } + ~TestSafeBrowsingBlockingPageFactory() { } + + virtual SafeBrowsingBlockingPage* CreateSafeBrowsingPage( + SafeBrowsingService* service, + TabContents* tab_contents, + const SafeBrowsingBlockingPage::UnsafeResourceList& unsafe_resources) { + return new TestSafeBrowsingBlockingPage(service, tab_contents, + unsafe_resources); + } +}; + // Tests the safe browsing blocking page in a browser. class SafeBrowsingBlockingPageTest : public InProcessBrowserTest, public SafeBrowsingService::Client { @@ -173,12 +223,14 @@ class SafeBrowsingBlockingPageTest : public InProcessBrowserTest, virtual void SetUp() { SafeBrowsingService::RegisterFactory(&factory_); + SafeBrowsingBlockingPage::RegisterFactory(&blocking_page_factory_); MalwareDetails::RegisterFactory(&details_factory_); InProcessBrowserTest::SetUp(); } virtual void TearDown() { InProcessBrowserTest::TearDown(); + SafeBrowsingBlockingPage::RegisterFactory(NULL); SafeBrowsingService::RegisterFactory(NULL); MalwareDetails::RegisterFactory(NULL); } @@ -236,12 +288,34 @@ class SafeBrowsingBlockingPageTest : public InProcessBrowserTest, interstitial_page->Proceed(); } - void AssertNoInterstitial() { - // ui_test_utils::RunAllPendingInMessageLoop(); + void AssertNoInterstitial(bool wait_for_delete) { + TabContents* contents = browser()->GetSelectedTabContents(); + + if (contents->showing_interstitial_page() && wait_for_delete) { + // We'll get notified when the interstitial is deleted. + static_cast<TestSafeBrowsingBlockingPage*>( + contents->interstitial_page())->set_wait_for_delete(); + ui_test_utils::RunMessageLoop(); + } + + // Can't use InterstitialPage::GetInterstitialPage() because that + // gets updated after the TestSafeBrowsingBlockingPage destructor + ASSERT_FALSE(contents->showing_interstitial_page()); + } + + bool YesInterstitial() { TabContents* contents = browser()->GetSelectedTabContents(); InterstitialPage* interstitial_page = InterstitialPage::GetInterstitialPage( contents); - ASSERT_FALSE(interstitial_page); + return interstitial_page != NULL; + } + + void WaitForInterstitial() { + TabContents* contents = browser()->GetSelectedTabContents(); + if (!InterstitialPage::GetInterstitialPage(contents)) + ui_test_utils::WaitForNotificationFrom( + NotificationType::INTERSTITIAL_ATTACHED, + Source<TabContents>(contents)); } void WaitForNavigation() { @@ -258,24 +332,79 @@ class SafeBrowsingBlockingPageTest : public InProcessBrowserTest, static_cast<FakeSafeBrowsingService*>( g_browser_process->resource_dispatcher_host()-> safe_browsing_service()); - ASSERT_EQ(1u, service->GetDetails()->size()); + + std::string serialized = service->GetReport(); + + safe_browsing::ClientMalwareReportRequest report; + ASSERT_TRUE(report.ParseFromString(serialized)); + + // Verify the report is complete. + EXPECT_TRUE(report.complete()); } + void MalwareRedirectCancelAndProceed(const std::string open_function); + protected: TestMalwareDetailsFactory details_factory_; private: TestSafeBrowsingServiceFactory factory_; + TestSafeBrowsingBlockingPageFactory blocking_page_factory_; DISALLOW_COPY_AND_ASSIGN(SafeBrowsingBlockingPageTest); }; +void SafeBrowsingBlockingPageTest::MalwareRedirectCancelAndProceed( + const std::string open_function) { + GURL load_url = test_server()->GetURL( + "files/safe_browsing/interstitial_cancel.html"); + GURL malware_url("http://localhost/files/safe_browsing/malware.html"); + AddURLResult(malware_url, SafeBrowsingService::URL_MALWARE); + + // Load the test page. + ui_test_utils::NavigateToURL(browser(), load_url); + // Trigger the safe browsing interstitial page via a redirect in "openWin()". + ui_test_utils::NavigateToURLWithDisposition( + browser(), + GURL("javascript:" + open_function + "()"), + CURRENT_TAB, + ui_test_utils::BROWSER_TEST_WAIT_FOR_TAB); + WaitForInterstitial(); + // Cancel the redirect request while interstitial page is open. + browser()->ActivateTabAt(0, true); + ui_test_utils::NavigateToURLWithDisposition( + browser(), + GURL("javascript:stopWin()"), + CURRENT_TAB, + ui_test_utils::BROWSER_TEST_WAIT_FOR_NAVIGATION); + browser()->ActivateTabAt(1, true); + // Simulate the user clicking "proceed", there should be no crash. + SendCommand("\"proceed\""); +} + namespace { const char kEmptyPage[] = "files/empty.html"; const char kMalwarePage[] = "files/safe_browsing/malware.html"; const char kMalwareIframe[] = "files/safe_browsing/malware_iframe.html"; +IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, + MalwareRedirectInIFrameCanceled) { + // 1. Test the case that redirect is a subresource. + MalwareRedirectCancelAndProceed("openWinIFrame"); + // If the redirect was from subresource but canceled, "proceed" will continue + // with the rest of resources. + AssertNoInterstitial(true); +} + +IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, MalwareRedirectCanceled) { + // 2. Test the case that redirect is the only resource. + MalwareRedirectCancelAndProceed("openWin"); + // Clicking proceed won't do anything if the main request is cancelled + // already. See crbug.com/76460. + EXPECT_TRUE(YesInterstitial()); +} + IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, MalwareDontProceed) { GURL url = test_server()->GetURL(kEmptyPage); AddURLResult(url, SafeBrowsingService::URL_MALWARE); @@ -283,7 +412,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, MalwareDontProceed) { ui_test_utils::NavigateToURL(browser(), url); SendCommand("\"takeMeBack\""); // Simulate the user clicking "back" - AssertNoInterstitial(); // Assert the interstitial is gone + AssertNoInterstitial(false); // Assert the interstitial is gone EXPECT_EQ(GURL(chrome::kAboutBlankURL), // Back to "about:blank" browser()->GetSelectedTabContents()->GetURL()); } @@ -293,10 +422,9 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, MalwareProceed) { AddURLResult(url, SafeBrowsingService::URL_MALWARE); ui_test_utils::NavigateToURL(browser(), url); - SendCommand("\"proceed\""); // Simulate the user clicking "proceed" WaitForNavigation(); // Wait until we finish the navigation. - AssertNoInterstitial(); // Assert the interstitial is gone. + AssertNoInterstitial(true); // Assert the interstitial is gone. EXPECT_EQ(url, browser()->GetSelectedTabContents()->GetURL()); } @@ -307,7 +435,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, PhishingDontProceed) { ui_test_utils::NavigateToURL(browser(), url); SendCommand("\"takeMeBack\""); // Simulate the user clicking "proceed" - AssertNoInterstitial(); // Assert the interstitial is gone + AssertNoInterstitial(false); // Assert the interstitial is gone EXPECT_EQ(GURL(chrome::kAboutBlankURL), // We are back to "about:blank". browser()->GetSelectedTabContents()->GetURL()); } @@ -320,7 +448,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, PhishingProceed) { SendCommand("\"proceed\""); // Simulate the user clicking "proceed". WaitForNavigation(); // Wait until we finish the navigation. - AssertNoInterstitial(); // Assert the interstitial is gone + AssertNoInterstitial(true); // Assert the interstitial is gone EXPECT_EQ(url, browser()->GetSelectedTabContents()->GetURL()); } @@ -332,7 +460,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, PhishingReportError) { SendCommand("\"reportError\""); // Simulate the user clicking "report error" WaitForNavigation(); // Wait until we finish the navigation. - AssertNoInterstitial(); // Assert the interstitial is gone + AssertNoInterstitial(false); // Assert the interstitial is gone // We are in the error reporting page. EXPECT_EQ("/safebrowsing/report_error/", @@ -347,7 +475,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, PhishingLearnMore) { SendCommand("\"learnMore\""); // Simulate the user clicking "learn more" WaitForNavigation(); // Wait until we finish the navigation. - AssertNoInterstitial(); // Assert the interstitial is gone + AssertNoInterstitial(false); // Assert the interstitial is gone // We are in the help page. EXPECT_EQ("/support/bin/answer.py", @@ -362,7 +490,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, MalwareIframeDontProceed) { ui_test_utils::NavigateToURL(browser(), url); SendCommand("\"takeMeBack\""); // Simulate the user clicking "back" - AssertNoInterstitial(); // Assert the interstitial is gone + AssertNoInterstitial(false); // Assert the interstitial is gone EXPECT_EQ(GURL(chrome::kAboutBlankURL), // Back to "about:blank" browser()->GetSelectedTabContents()->GetURL()); @@ -378,7 +506,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, ui_test_utils::NavigateToURL(browser(), url); SendCommand("\"proceed\""); // Simulate the user clicking "proceed" - AssertNoInterstitial(); // Assert the interstitial is gone + AssertNoInterstitial(true); // Assert the interstitial is gone EXPECT_EQ(url, browser()->GetSelectedTabContents()->GetURL()); } @@ -407,7 +535,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingBlockingPageTest, prefs::kSafeBrowsingReportingEnabled)); SendCommand("\"proceed\""); // Simulate the user clicking "back" - AssertNoInterstitial(); // Assert the interstitial is gone + AssertNoInterstitial(true); // Assert the interstitial is gone EXPECT_EQ(url, browser()->GetSelectedTabContents()->GetURL()); AssertReportSent(); diff --git a/chrome/browser/safe_browsing/safe_browsing_blocking_page_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_blocking_page_unittest.cc index 25a9a43..12399ce 100644 --- a/chrome/browser/safe_browsing/safe_browsing_blocking_page_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_blocking_page_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -8,12 +8,11 @@ #include "chrome/browser/safe_browsing/malware_details.h" #include "chrome/browser/safe_browsing/safe_browsing_blocking_page.h" #include "chrome/common/pref_names.h" -#include "chrome/common/render_messages.h" -#include "chrome/common/render_messages_params.h" #include "content/browser/browser_thread.h" #include "content/browser/renderer_host/test_render_view_host.h" #include "content/browser/tab_contents/navigation_entry.h" #include "content/browser/tab_contents/test_tab_contents.h" +#include "content/common/view_messages.h" static const char* kGoogleURL = "http://www.google.com/"; static const char* kGoodURL = "http://www.goodguys.com/"; @@ -39,15 +38,16 @@ class TestSafeBrowsingBlockingPage : public SafeBrowsingBlockingPage { class TestSafeBrowsingService: public SafeBrowsingService { public: virtual ~TestSafeBrowsingService() {} - virtual void ReportMalwareDetails(scoped_refptr<MalwareDetails> details) { - details_.push_back(details); + + virtual void SendSerializedMalwareDetails(const std::string& serialized) { + details_.push_back(serialized); } - std::list<scoped_refptr<MalwareDetails> >* GetDetails() { + std::list<std::string>* GetDetails() { return &details_; } - std::list<scoped_refptr<MalwareDetails> > details_; + std::list<std::string> details_; }; class TestSafeBrowsingBlockingPageFactory @@ -102,14 +102,19 @@ class SafeBrowsingBlockingPageTest : public RenderViewHostTestHarness, void Navigate(const char* url, int page_id) { ViewHostMsg_FrameNavigate_Params params; InitNavigateParams(¶ms, page_id, GURL(url), PageTransition::TYPED); - contents()->TestDidNavigate(contents_->render_view_host(), params); + contents()->TestDidNavigate(contents()->render_view_host(), params); } - void GoBack() { + void GoBackCrossSite() { NavigationEntry* entry = contents()->controller().GetEntryAtOffset(-1); ASSERT_TRUE(entry); contents()->controller().GoBack(); - Navigate(entry->url().spec().c_str(), entry->page_id()); + + // The navigation should commit in the pending RVH. + ViewHostMsg_FrameNavigate_Params params; + InitNavigateParams(¶ms, entry->page_id(), GURL(entry->url()), + PageTransition::TYPED); + contents()->TestDidNavigate(contents()->pending_rvh(), params); } void ShowInterstitial(ResourceType::Type resource_type, @@ -156,8 +161,8 @@ class SafeBrowsingBlockingPageTest : public RenderViewHostTestHarness, resource->url = url; resource->resource_type = resource_type; resource->threat_type = SafeBrowsingService::URL_MALWARE; - resource->render_process_host_id = contents_->GetRenderProcessHost()->id(); - resource->render_view_id = contents_->render_view_host()->routing_id(); + resource->render_process_host_id = contents()->GetRenderProcessHost()->id(); + resource->render_view_id = contents()->render_view_host()->routing_id(); } UserResponse user_response_; @@ -457,7 +462,7 @@ TEST_F(SafeBrowsingBlockingPageTest, NavigatingBackAndForth) { // Proceed, then navigate back. ProceedThroughInterstitial(sb_interstitial); Navigate(kBadURL, 2); // Commit the navigation. - GoBack(); + GoBackCrossSite(); // We are back on the good page. sb_interstitial = GetSafeBrowsingBlockingPage(); diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index 00b16f7..16c4b38 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -1,19 +1,23 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/safe_browsing/safe_browsing_database.h" +#include <algorithm> +#include <iterator> + #include "base/file_util.h" #include "base/metrics/histogram.h" #include "base/metrics/stats_counters.h" #include "base/time.h" #include "base/message_loop.h" #include "base/process_util.h" -#include "base/sha2.h" +#include "crypto/sha2.h" #include "chrome/browser/safe_browsing/bloom_filter.h" #include "chrome/browser/safe_browsing/prefix_set.h" #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" +#include "content/browser/browser_thread.h" #include "googleurl/src/gurl.h" namespace { @@ -22,6 +26,9 @@ namespace { const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); // Filename suffix for download store. const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download"); +// Filename suffix for client-side phishing detection whitelist store. +const FilePath::CharType kCsdWhitelistDBFile[] = + FILE_PATH_LITERAL(" Csd Whitelist"); // Filename suffix for browse store. // TODO(lzheng): change to a better name when we change the file format. const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); @@ -29,6 +36,16 @@ const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); // The maximum staleness for a cached entry. const int kMaxStalenessMinutes = 45; +// Maximum number of entries we allow in the client-side phishing detection +// whitelist. If the whitelist on disk contains more entries then +// ContainsCsdWhitelistedUrl will always return true. +const size_t kMaxCsdWhitelistSize = 5000; + +// If the hash of this exact expression is on the csd whitelist then +// ContainsCsdWhitelistedUrl will always return true. +const char kCsdKillSwitchUrl[] = + "sb-ssl.google.com/safebrowsing/csd/killswitch"; + // To save space, the incoming |chunk_id| and |list_id| are combined // into an |encoded_chunk_id| for storage by shifting the |list_id| // into the low-order bits. These functions decode that information. @@ -46,26 +63,19 @@ int EncodeChunkId(const int chunk, const int list_id) { return chunk << 1 | list_id % 2; } -// Get the prefix for download url. -void GetDownloadUrlPrefix(const GURL& url, SBPrefix* prefix) { - std::string hostname; - std::string path; - std::string query; - safe_browsing_util::CanonicalizeUrl(url, &hostname, &path, &query); - - SBFullHash full_hash; - base::SHA256HashString(hostname + path + query, &full_hash, - sizeof(full_hash)); - *prefix = full_hash.prefix; -} - -// Generate the set of prefixes to check for |url|. +// Generate the set of full hashes to check for |url|. If +// |include_whitelist_hashes| is true we will generate additional path-prefixes +// to match against the csd whitelist. E.g., if the path-prefix /foo is on the +// whitelist it should also match /foo/bar which is not the case for all the +// other lists. // TODO(shess): This function is almost the same as // |CompareFullHashes()| in safe_browsing_util.cc, except that code // does an early exit on match. Since match should be the infrequent // case (phishing or malware found), consider combining this function // with that one. -void BrowsePrefixesToCheck(const GURL& url, std::vector<SBPrefix>* prefixes) { +void BrowseFullHashesToCheck(const GURL& url, + bool include_whitelist_hashes, + std::vector<SBFullHash>* full_hashes) { std::vector<std::string> hosts; if (url.HostIsIPAddress()) { hosts.push_back(url.host()); @@ -78,14 +88,37 @@ void BrowsePrefixesToCheck(const GURL& url, std::vector<SBPrefix>* prefixes) { for (size_t i = 0; i < hosts.size(); ++i) { for (size_t j = 0; j < paths.size(); ++j) { + const std::string& path = paths[j]; SBFullHash full_hash; - base::SHA256HashString(hosts[i] + paths[j], &full_hash, - sizeof(full_hash)); - prefixes->push_back(full_hash.prefix); + crypto::SHA256HashString(hosts[i] + path, &full_hash, + sizeof(full_hash)); + full_hashes->push_back(full_hash); + + // We may have /foo as path-prefix in the whitelist which should + // also match with /foo/bar and /foo?bar. Hence, for every path + // that ends in '/' we also add the path without the slash. + if (include_whitelist_hashes && + path.size() > 1 && + path[path.size() - 1] == '/') { + crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1), + &full_hash, sizeof(full_hash)); + full_hashes->push_back(full_hash); + } } } } +// Get the prefixes matching the download |urls|. +void GetDownloadUrlPrefixes(const std::vector<GURL>& urls, + std::vector<SBPrefix>* prefixes) { + std::vector<SBFullHash> full_hashes; + for (size_t i = 0; i < urls.size(); ++i) + BrowseFullHashesToCheck(urls[i], false, &full_hashes); + + for (size_t i = 0; i < full_hashes.size(); ++i) + prefixes->push_back(full_hashes[i].prefix); +} + // Find the entries in |full_hashes| with prefix in |prefix_hits|, and // add them to |full_hits| if not expired. "Not expired" is when // either |last_update| was recent enough, or the item has been @@ -129,47 +162,55 @@ void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits, } } +// This function generates a chunk range string for |chunks|. It +// outputs one chunk range string per list and writes it to the +// |list_ranges| vector. We expect |list_ranges| to already be of the +// right size. E.g., if |chunks| contains chunks with two different +// list ids then |list_ranges| must contain two elements. void GetChunkRanges(const std::vector<int>& chunks, - std::string* list0, - std::string* list1) { - std::vector<int> chunks0; - std::vector<int> chunks1; - + std::vector<std::string>* list_ranges) { + DCHECK_GT(list_ranges->size(), 0U); + DCHECK_LE(list_ranges->size(), 2U); + std::vector<std::vector<int> > decoded_chunks(list_ranges->size()); for (std::vector<int>::const_iterator iter = chunks.begin(); iter != chunks.end(); ++iter) { int mod_list_id = GetListIdBit(*iter); - if (0 == mod_list_id) { - chunks0.push_back(DecodeChunkId(*iter)); - } else { - DCHECK_EQ(1, mod_list_id); - chunks1.push_back(DecodeChunkId(*iter)); - } + DCHECK_GE(mod_list_id, 0); + DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size()); + decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter)); + } + for (size_t i = 0; i < decoded_chunks.size(); ++i) { + ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i])); } - - ChunksToRangeString(chunks0, list0); - ChunksToRangeString(chunks1, list1); } // Helper function to create chunk range lists for Browse related // lists. -void UpdateChunkRanges(const std::vector<int>& add_chunks, - const std::vector<int>& sub_chunks, - const std::string& list_name0, - const std::string& list_name1, +void UpdateChunkRanges(SafeBrowsingStore* store, + const std::vector<std::string>& listnames, std::vector<SBListChunkRanges>* lists) { - DCHECK_EQ(safe_browsing_util::GetListId(list_name0) % 2, 0); - DCHECK_EQ(safe_browsing_util::GetListId(list_name1) % 2, 1); - DCHECK_NE(safe_browsing_util::GetListId(list_name0), - safe_browsing_util::INVALID); - DCHECK_NE(safe_browsing_util::GetListId(list_name1), - safe_browsing_util::INVALID); - - SBListChunkRanges chunkrange0(list_name0); - SBListChunkRanges chunkrange1(list_name1); - GetChunkRanges(add_chunks, &chunkrange0.adds, &chunkrange1.adds); - GetChunkRanges(sub_chunks, &chunkrange0.subs, &chunkrange1.subs); - lists->push_back(chunkrange0); - lists->push_back(chunkrange1); + DCHECK_GT(listnames.size(), 0U); + DCHECK_LE(listnames.size(), 2U); + std::vector<int> add_chunks; + std::vector<int> sub_chunks; + store->GetAddChunks(&add_chunks); + store->GetSubChunks(&sub_chunks); + + std::vector<std::string> adds(listnames.size()); + std::vector<std::string> subs(listnames.size()); + GetChunkRanges(add_chunks, &adds); + GetChunkRanges(sub_chunks, &subs); + + for (size_t i = 0; i < listnames.size(); ++i) { + const std::string& listname = listnames[i]; + DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2, + static_cast<int>(i % 2)); + DCHECK_NE(safe_browsing_util::GetListId(listname), + safe_browsing_util::INVALID); + lists->push_back(SBListChunkRanges(listname)); + lists->back().adds.swap(adds[i]); + lists->back().subs.swap(subs[i]); + } } // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from @@ -190,6 +231,14 @@ enum PrefixSetEvent { PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT, PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID, PREFIX_SET_GETPREFIXES_BROKEN, + PREFIX_SET_GETPREFIXES_BROKEN_SIZE, + PREFIX_SET_GETPREFIXES_FIRST_BROKEN, + PREFIX_SET_SBPREFIX_WAS_BROKEN, + PREFIX_SET_GETPREFIXES_BROKEN_SORTING, + PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION, + PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA, + PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX, + PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH, // Memory space for histograms is determined by the max. ALWAYS ADD // NEW VALUES BEFORE THIS ONE. @@ -201,20 +250,152 @@ void RecordPrefixSetInfo(PrefixSetEvent event_type) { PREFIX_SET_EVENT_MAX); } +// Generate a |PrefixSet| instance from the contents of +// |add_prefixes|. Additionally performs various checks to make sure +// that the resulting prefix set is valid, so that the +// PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in +// ContainsBrowseUrl() can be trustworthy. +safe_browsing::PrefixSet* PrefixSetFromAddPrefixes( + const std::vector<SBAddPrefix>& add_prefixes) { + // TODO(shess): If |add_prefixes| were sorted by the prefix, it + // could be passed directly to |PrefixSet()|, removing the need for + // |prefixes|. For now, |prefixes| is useful while debugging + // things. + std::vector<SBPrefix> prefixes; + for (size_t i = 0; i < add_prefixes.size(); ++i) { + prefixes.push_back(add_prefixes[i].prefix); + } + + std::sort(prefixes.begin(), prefixes.end()); + prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), + prefixes.end()); + + scoped_ptr<safe_browsing::PrefixSet> + prefix_set(new safe_browsing::PrefixSet(prefixes)); + + std::vector<SBPrefix> restored; + prefix_set->GetPrefixes(&restored); + + // Expect them to be equal. + if (restored.size() == prefixes.size() && + std::equal(prefixes.begin(), prefixes.end(), restored.begin())) + return prefix_set.release(); + + // Log BROKEN for continuity with previous release, and SIZE to + // distinguish which test failed. + NOTREACHED(); + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); + if (restored.size() != prefixes.size()) + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE); + + // Try to distinguish between updates from one broken user and a + // distributed problem. + static bool logged_broken = false; + if (!logged_broken) { + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN); + logged_broken = true; + } + + // This seems so very very unlikely. But if it ever were true, then + // it could explain why GetPrefixes() seemed broken. + if (sizeof(int) != sizeof(int32)) + RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN); + + // Check if memory was corrupted during construction. + if (!prefix_set->CheckChecksum()) + RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH); + + // Check whether |restored| is unsorted, or has duplication. + if (restored.size()) { + size_t unsorted_count = 0; + bool duplicates = false; + SBPrefix prev = restored[0]; + for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) { + if (prev > restored[i]) { + unsorted_count++; + UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference", + prev - restored[i]); + + // When unsorted, how big is the set, and how far are we into + // it. If the set is very small or large, that might inform + // pursuit of a degenerate case. If the percentage is close + // to 0%, 100%, or 50%, then there might be an interesting + // degenerate case to explore. + UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size()); + UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent", + i * 100 / restored.size()); + + if (prefix_set->IsDeltaAt(i)) { + RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA); + + // Histograms require memory on the order of the number of + // buckets, making high-precision logging expensive. For + // now aim for a sense of the range of the problem. + UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta", + prefix_set->DeltaAt(i), 1, 0xFFFF, 50); + } else { + RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX); + } + } + if (prev == restored[i]) + duplicates = true; + } + + // Record findings. + if (unsorted_count) { + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING); + UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count); + } + if (duplicates) + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION); + + // Fix the problems noted. If |restored| was unsorted, then + // |duplicates| may give a false negative. + if (unsorted_count) + std::sort(restored.begin(), restored.end()); + if (unsorted_count || duplicates) + restored.erase(std::unique(restored.begin(), restored.end()), + restored.end()); + } + + // NOTE(shess): The following could be done using a single + // uber-loop, but it's complicated by needing multiple parallel + // iterators. Didn't seem worthwhile for something that will only + // live for a short period and only fires for one in a million + // updates. + + // Find elements in |restored| which are not in |prefixes|. + std::vector<SBPrefix> difference; + std::set_difference(restored.begin(), restored.end(), + prefixes.begin(), prefixes.end(), + std::back_inserter(difference)); + if (difference.size()) + UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size()); + + // Find elements in |prefixes| which are not in |restored|. + difference.clear(); + std::set_difference(prefixes.begin(), prefixes.end(), + restored.begin(), restored.end(), + std::back_inserter(difference)); + if (difference.size()) + UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall", + difference.size()); + + return prefix_set.release(); +} + } // namespace // The default SafeBrowsingDatabaseFactory. class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { public: virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( - bool enable_download_protection) { - if (enable_download_protection) { - // Create database with browse url store and download store. - return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, - new SafeBrowsingStoreFile); - } - // Create database with only browse url store. - return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, NULL); + bool enable_download_protection, + bool enable_client_side_whitelist) { + return new SafeBrowsingDatabaseNew( + new SafeBrowsingStoreFile, + enable_download_protection ? new SafeBrowsingStoreFile : NULL, + enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL); } SafeBrowsingDatabaseFactoryImpl() { } @@ -232,10 +413,12 @@ SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create() // callers just construct things directly. SafeBrowsingDatabase* SafeBrowsingDatabase::Create( - bool enable_download_protection) { + bool enable_download_protection, + bool enable_client_side_whitelist) { if (!factory_) factory_ = new SafeBrowsingDatabaseFactoryImpl(); - return factory_->CreateSafeBrowsingDatabase(enable_download_protection); + return factory_->CreateSafeBrowsingDatabase(enable_download_protection, + enable_client_side_whitelist); } SafeBrowsingDatabase::~SafeBrowsingDatabase() { @@ -259,6 +442,12 @@ FilePath SafeBrowsingDatabase::BloomFilterForFilename( return FilePath(db_filename.value() + kBloomFilterFile); } +// static +FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( + const FilePath& db_filename) { + return FilePath(db_filename.value() + kCsdWhitelistDBFile); +} + SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { DVLOG(3) << "Get store for list: " << list_id; if (list_id == safe_browsing_util::PHISH || @@ -267,6 +456,8 @@ SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { } else if (list_id == safe_browsing_util::BINURL || list_id == safe_browsing_util::BINHASH) { return download_store_.get(); + } else if (list_id == safe_browsing_util::CSDWHITELIST) { + return csd_whitelist_store_.get(); } return NULL; } @@ -281,16 +472,21 @@ SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() : creation_loop_(MessageLoop::current()), browse_store_(new SafeBrowsingStoreFile), download_store_(NULL), + csd_whitelist_store_(NULL), ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) { DCHECK(browse_store_.get()); DCHECK(!download_store_.get()); + DCHECK(!csd_whitelist_store_.get()); } SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( - SafeBrowsingStore* browse_store, SafeBrowsingStore* download_store) + SafeBrowsingStore* browse_store, + SafeBrowsingStore* download_store, + SafeBrowsingStore* csd_whitelist_store) : creation_loop_(MessageLoop::current()), browse_store_(browse_store), download_store_(download_store), + csd_whitelist_store_(csd_whitelist_store), ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)), corruption_detected_(false) { DCHECK(browse_store_.get()); @@ -302,28 +498,30 @@ SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { DCHECK_EQ(creation_loop_, MessageLoop::current()); - - // NOTE: There is no need to grab the lock in this function, since - // until it returns, there are no pointers to this class on other - // threads. Then again, that means there is no possibility of - // contention on the lock... - base::AutoLock locked(lookup_lock_); - - DCHECK(browse_filename_.empty()); // Ensure we haven't been run before. - DCHECK(download_filename_.empty()); // Ensure we haven't been run before. + // Ensure we haven't been run before. + DCHECK(browse_filename_.empty()); + DCHECK(download_filename_.empty()); + DCHECK(csd_whitelist_filename_.empty()); browse_filename_ = BrowseDBFilename(filename_base); + bloom_filter_filename_ = BloomFilterForFilename(browse_filename_); + browse_store_->Init( browse_filename_, NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); - - full_browse_hashes_.clear(); - pending_browse_hashes_.clear(); - - bloom_filter_filename_ = BloomFilterForFilename(browse_filename_); - LoadBloomFilter(); DVLOG(1) << "Init browse store: " << browse_filename_.value(); + { + // NOTE: There is no need to grab the lock in this function, since + // until it returns, there are no pointers to this class on other + // threads. Then again, that means there is no possibility of + // contention on the lock... + base::AutoLock locked(lookup_lock_); + full_browse_hashes_.clear(); + pending_browse_hashes_.clear(); + LoadBloomFilter(); + } + if (download_store_.get()) { download_filename_ = DownloadDBFilename(filename_base); download_store_->Init( @@ -331,6 +529,22 @@ void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); DVLOG(1) << "Init download store: " << download_filename_.value(); } + + if (csd_whitelist_store_.get()) { + csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base); + csd_whitelist_store_->Init( + csd_whitelist_filename_, + NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); + DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value(); + std::vector<SBAddFullHash> full_hashes; + if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) { + LoadCsdWhitelist(full_hashes); + } else { + CsdWhitelistAllUrls(); + } + } else { + CsdWhitelistAllUrls(); // Just to be safe. + } } bool SafeBrowsingDatabaseNew::ResetDatabase() { @@ -355,6 +569,8 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { // of a bloom filter always implies presence of a prefix set. prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>())); } + // Wants to acquire the lock itself. + CsdWhitelistAllUrls(); return true; } @@ -371,9 +587,9 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( prefix_hits->clear(); full_hits->clear(); - std::vector<SBPrefix> prefixes; - BrowsePrefixesToCheck(url, &prefixes); - if (prefixes.empty()) + std::vector<SBFullHash> full_hashes; + BrowseFullHashesToCheck(url, false, &full_hashes); + if (full_hashes.empty()) return false; // This function is called on the I/O thread, prevent changes to @@ -388,15 +604,15 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( std::vector<SBPrefix> restored; size_t miss_count = 0; - for (size_t i = 0; i < prefixes.size(); ++i) { - bool found = prefix_set_->Exists(prefixes[i]); + for (size_t i = 0; i < full_hashes.size(); ++i) { + bool found = prefix_set_->Exists(full_hashes[i].prefix); - if (browse_bloom_filter_->Exists(prefixes[i])) { + if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) { RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT); if (found) RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT); - prefix_hits->push_back(prefixes[i]); - if (prefix_miss_cache_.count(prefixes[i]) > 0) + prefix_hits->push_back(full_hashes[i].prefix); + if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0) ++miss_count; } else { // Bloom filter misses should never be in prefix set. Re-create @@ -413,7 +629,8 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( // If the item is not in the re-created list, then there is an // error in |PrefixSet::Exists()|. If the item is in the // re-created list, then the bloom filter was wrong. - if (std::binary_search(restored.begin(), restored.end(), prefixes[i])) { + if (std::binary_search(restored.begin(), restored.end(), + full_hashes[i].prefix)) { RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT); } else { RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID); @@ -439,32 +656,39 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( } bool SafeBrowsingDatabaseNew::MatchDownloadAddPrefixes( - int list_bit, const SBPrefix& prefix, SBPrefix* prefix_hit) { + int list_bit, + const std::vector<SBPrefix>& prefixes, + std::vector<SBPrefix>* prefix_hits) { + prefix_hits->clear(); + std::vector<SBAddPrefix> add_prefixes; download_store_->GetAddPrefixes(&add_prefixes); for (size_t i = 0; i < add_prefixes.size(); ++i) { - if (prefix == add_prefixes[i].prefix && - GetListIdBit(add_prefixes[i].chunk_id) == list_bit) { - *prefix_hit = prefix; - return true; + for (size_t j = 0; j < prefixes.size(); ++j) { + const SBPrefix& prefix = prefixes[j]; + if (prefix == add_prefixes[i].prefix && + GetListIdBit(add_prefixes[i].chunk_id) == list_bit) { + prefix_hits->push_back(prefix); + } } } - return false; + return !prefix_hits->empty(); } -bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(const GURL& url, - SBPrefix* prefix_hit) { +bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( + const std::vector<GURL>& urls, + std::vector<SBPrefix>* prefix_hits) { DCHECK_EQ(creation_loop_, MessageLoop::current()); // Ignore this check when download checking is not enabled. if (!download_store_.get()) return false; - SBPrefix prefix; - GetDownloadUrlPrefix(url, &prefix); + std::vector<SBPrefix> prefixes; + GetDownloadUrlPrefixes(urls, &prefixes); return MatchDownloadAddPrefixes(safe_browsing_util::BINURL % 2, - prefix, - prefix_hit); + prefixes, + prefix_hits); } bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix( @@ -475,10 +699,29 @@ bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix( if (!download_store_.get()) return false; - SBPrefix prefix_hit; + std::vector<SBPrefix> prefixes(1, prefix); + std::vector<SBPrefix> prefix_hits; return MatchDownloadAddPrefixes(safe_browsing_util::BINHASH % 2, - prefix, - &prefix_hit); + prefixes, + &prefix_hits); +} + +bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) { + // This method is theoretically thread-safe but we expect all calls to + // originate from the IO thread. + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + base::AutoLock l(lookup_lock_); + if (csd_whitelist_all_urls_) + return true; + + std::vector<SBFullHash> full_hashes; + BrowseFullHashesToCheck(url, true, &full_hashes); + for (std::vector<SBFullHash>::const_iterator it = full_hashes.begin(); + it != full_hashes.end(); ++it) { + if (std::binary_search(csd_whitelist_.begin(), csd_whitelist_.end(), *it)) + return true; + } + return false; } // Helper to insert entries for all of the prefixes or full hashes in @@ -732,24 +975,29 @@ bool SafeBrowsingDatabaseNew::UpdateStarted( return false; } - std::vector<int> browse_add_chunks; - browse_store_->GetAddChunks(&browse_add_chunks); - std::vector<int> browse_sub_chunks; - browse_store_->GetSubChunks(&browse_sub_chunks); - UpdateChunkRanges(browse_add_chunks, browse_sub_chunks, - safe_browsing_util::kMalwareList, - safe_browsing_util::kPhishingList, - lists); + if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) { + RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN); + HandleCorruptDatabase(); + return false; + } + + std::vector<std::string> browse_listnames; + browse_listnames.push_back(safe_browsing_util::kMalwareList); + browse_listnames.push_back(safe_browsing_util::kPhishingList); + UpdateChunkRanges(browse_store_.get(), browse_listnames, lists); if (download_store_.get()) { - std::vector<int> download_add_chunks; - download_store_->GetAddChunks(&download_add_chunks); - std::vector<int> download_sub_chunks; - download_store_->GetSubChunks(&download_sub_chunks); - UpdateChunkRanges(download_add_chunks, download_sub_chunks, - safe_browsing_util::kBinUrlList, - safe_browsing_util::kBinHashList, - lists); + std::vector<std::string> download_listnames; + download_listnames.push_back(safe_browsing_util::kBinUrlList); + download_listnames.push_back(safe_browsing_util::kBinHashList); + UpdateChunkRanges(download_store_.get(), download_listnames, lists); + } + + if (csd_whitelist_store_.get()) { + std::vector<std::string> csd_whitelist_listnames; + csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList); + UpdateChunkRanges(csd_whitelist_store_.get(), + csd_whitelist_listnames, lists); } corruption_detected_ = false; @@ -772,6 +1020,8 @@ void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { browse_store_->CancelUpdate(); if (download_store_.get()) download_store_->CancelUpdate(); + if (csd_whitelist_store_.get()) + csd_whitelist_store_->CancelUpdate(); return; } @@ -779,9 +1029,37 @@ void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { UpdateDownloadStore(); // for browsing UpdateBrowseStore(); + // for csd whitelist + UpdateCsdWhitelistStore(); +} + +void SafeBrowsingDatabaseNew::UpdateCsdWhitelistStore() { + if (!csd_whitelist_store_.get()) + return; + + // For the csd whitelist, we don't cache and save full hashes since all + // hashes are already full. + std::vector<SBAddFullHash> empty_add_hashes; + + // Not needed for the csd whitelist. + std::set<SBPrefix> empty_miss_cache; + + // Note: prefixes will not be empty. The current data store implementation + // stores all full-length hashes as both full and prefix hashes. + std::vector<SBAddPrefix> prefixes; + std::vector<SBAddFullHash> full_hashes; + if (!csd_whitelist_store_->FinishUpdate(empty_add_hashes, + empty_miss_cache, + &prefixes, + &full_hashes)) { + RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH); + CsdWhitelistAllUrls(); + return; + } + LoadCsdWhitelist(full_hashes); } -void SafeBrowsingDatabaseNew:: UpdateDownloadStore() { +void SafeBrowsingDatabaseNew::UpdateDownloadStore() { if (!download_store_.get()) return; @@ -854,24 +1132,8 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { filter->Insert(add_prefixes[i].prefix); } - std::vector<SBPrefix> prefixes; - for (size_t i = 0; i < add_prefixes.size(); ++i) { - prefixes.push_back(add_prefixes[i].prefix); - } - std::sort(prefixes.begin(), prefixes.end()); scoped_ptr<safe_browsing::PrefixSet> - prefix_set(new safe_browsing::PrefixSet(prefixes)); - - // Verify that |GetPrefixes()| returns the same set of prefixes as - // was passed to the constructor. - std::vector<SBPrefix> restored; - prefix_set->GetPrefixes(&restored); - prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end()); - if (restored.size() != prefixes.size() || - !std::equal(prefixes.begin(), prefixes.end(), restored.begin())) { - NOTREACHED(); - RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); - } + prefix_set(PrefixSetFromAddPrefixes(add_prefixes)); // This needs to be in sorted order by prefix for efficient access. std::sort(add_full_hashes.begin(), add_full_hashes.end(), @@ -979,24 +1241,7 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() { // TODO(shess): Write/read for prefix set. std::vector<SBAddPrefix> add_prefixes; browse_store_->GetAddPrefixes(&add_prefixes); - std::vector<SBPrefix> prefixes; - for (size_t i = 0; i < add_prefixes.size(); ++i) { - prefixes.push_back(add_prefixes[i].prefix); - } - std::sort(prefixes.begin(), prefixes.end()); - prefix_set_.reset(new safe_browsing::PrefixSet(prefixes)); - - // Double-check the prefixes so that the - // PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in - // ContainsBrowseUrl() can be trustworthy. - std::vector<SBPrefix> restored; - prefix_set_->GetPrefixes(&restored); - std::set<SBPrefix> unique(prefixes.begin(), prefixes.end()); - if (restored.size() != unique.size() || - !std::equal(unique.begin(), unique.end(), restored.begin())) { - NOTREACHED(); - RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); - } + prefix_set_.reset(PrefixSetFromAddPrefixes(add_prefixes)); } bool SafeBrowsingDatabaseNew::Delete() { @@ -1010,10 +1255,15 @@ bool SafeBrowsingDatabaseNew::Delete() { if (!r2) RecordFailure(FAILURE_DATABASE_STORE_DELETE); - const bool r3 = file_util::Delete(bloom_filter_filename_, false); + const bool r3 = csd_whitelist_store_.get() ? + csd_whitelist_store_->Delete() : true; if (!r3) + RecordFailure(FAILURE_DATABASE_STORE_DELETE); + + const bool r4 = file_util::Delete(bloom_filter_filename_, false); + if (!r4) RecordFailure(FAILURE_DATABASE_FILTER_DELETE); - return r1 && r2 && r3; + return r1 && r2 && r3 && r4; } void SafeBrowsingDatabaseNew::WriteBloomFilter() { @@ -1030,3 +1280,38 @@ void SafeBrowsingDatabaseNew::WriteBloomFilter() { if (!write_ok) RecordFailure(FAILURE_DATABASE_FILTER_WRITE); } + +void SafeBrowsingDatabaseNew::CsdWhitelistAllUrls() { + base::AutoLock locked(lookup_lock_); + csd_whitelist_all_urls_ = true; + csd_whitelist_.clear(); +} + +void SafeBrowsingDatabaseNew::LoadCsdWhitelist( + const std::vector<SBAddFullHash>& full_hashes) { + DCHECK_EQ(creation_loop_, MessageLoop::current()); + if (full_hashes.size() > kMaxCsdWhitelistSize) { + CsdWhitelistAllUrls(); + return; + } + + std::vector<SBFullHash> new_csd_whitelist; + for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); + it != full_hashes.end(); ++it) { + new_csd_whitelist.push_back(it->full_hash); + } + std::sort(new_csd_whitelist.begin(), new_csd_whitelist.end()); + + SBFullHash kill_switch; + crypto::SHA256HashString(kCsdKillSwitchUrl, &kill_switch, + sizeof(kill_switch)); + if (std::binary_search(new_csd_whitelist.begin(), new_csd_whitelist.end(), + kill_switch)) { + // The kill switch is whitelisted hence we whitelist all URLs. + CsdWhitelistAllUrls(); + } else { + base::AutoLock locked(lookup_lock_); + csd_whitelist_all_urls_ = false; + csd_whitelist_.swap(new_csd_whitelist); + } +} diff --git a/chrome/browser/safe_browsing/safe_browsing_database.h b/chrome/browser/safe_browsing/safe_browsing_database.h index b1712f0..eeb83e0 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.h +++ b/chrome/browser/safe_browsing/safe_browsing_database.h @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -10,7 +10,7 @@ #include <vector> #include "base/file_path.h" -#include "base/scoped_ptr.h" +#include "base/memory/scoped_ptr.h" #include "base/synchronization/lock.h" #include "base/task.h" #include "chrome/browser/safe_browsing/safe_browsing_store.h" @@ -36,28 +36,34 @@ class SafeBrowsingDatabaseFactory { SafeBrowsingDatabaseFactory() { } virtual ~SafeBrowsingDatabaseFactory() { } virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( - bool enable_download_protection) = 0; + bool enable_download_protection, + bool enable_client_side_whitelist) = 0; private: DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory); }; - -// Encapsulates on-disk databases that for safebrowsing. There are two -// databases: browse database and download database. The browse database -// contains information about phishing and malware urls. The download -// database contains URLs for bad binaries (e.g: those containing virus) -// and hash of these downloaded contents. These on-disk databases are shared -// among all profiles, as it doesn't contain user-specific data. This object -// is not thread-safe, i.e. all its methods should be used on the same thread -// that it was created on. +// Encapsulates on-disk databases that for safebrowsing. There are +// three databases: browse, download and client-side detection (csd) +// whitelist databases. The browse database contains information +// about phishing and malware urls. The download database contains +// URLs for bad binaries (e.g: those containing virus) and hash of +// these downloaded contents. The csd whitelist database contains URLs +// that will never be considered as phishing by the client-side +// phishing detection. These on-disk databases are shared among all +// profiles, as it doesn't contain user-specific data. This object is +// not thread-safe, i.e. all its methods should be used on the same +// thread that it was created on. class SafeBrowsingDatabase { public: // Factory method for obtaining a SafeBrowsingDatabase implementation. // It is not thread safe. // |enable_download_protection| is used to control the download database // feature. - static SafeBrowsingDatabase* Create(bool enable_download_protection); + // |enable_client_side_whitelist| is used to control the csd whitelist + // database feature. + static SafeBrowsingDatabase* Create(bool enable_download_protection, + bool enable_client_side_whitelist); // Makes the passed |factory| the factory used to instantiate // a SafeBrowsingDatabase. This is used for tests. @@ -84,16 +90,22 @@ class SafeBrowsingDatabase { std::vector<SBFullHashResult>* full_hits, base::Time last_update) = 0; - // Returns false if |url| is not in Download database. If it returns true, - // |prefix_hit| should contain the prefix for |url|. - // This function could ONLY be accessed from creation thread. - virtual bool ContainsDownloadUrl(const GURL& url, - SBPrefix* prefix_hit) = 0; + // Returns false if none of |urls| are in Download database. If it returns + // true, |prefix_hits| should contain the prefixes for the URLs that were in + // the database. This function could ONLY be accessed from creation thread. + virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, + std::vector<SBPrefix>* prefix_hits) = 0; // Returns false if |prefix| is not in Download database. // This function could ONLY be accessed from creation thread. virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) = 0; + // Returns false if |url| is not on the client-side phishing detection + // whitelist. Otherwise, this function returns true. Note: the whitelist + // only contains full-length hashes so we don't return any prefix hit. + // This function should only be called from the IO thread. + virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0; + // A database transaction should look like: // // std::vector<SBListChunkRanges> lists; @@ -138,6 +150,10 @@ class SafeBrowsingDatabase { // Filename for download URL and download binary hash database. static FilePath DownloadDBFilename(const FilePath& db_base_filename); + // Filename for client-side phishing detection whitelist databsae. + static FilePath CsdWhitelistDBFilename( + const FilePath& csd_whitelist_base_filename); + // Enumerate failures for histogramming purposes. DO NOT CHANGE THE // ORDERING OF THESE VALUES. enum FailureType { @@ -153,6 +169,8 @@ class SafeBrowsingDatabase { FAILURE_DATABASE_STORE_DELETE, FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN, FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH, + FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN, + FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH, // Memory space for histograms is determined by the max. ALWAYS // ADD NEW VALUES BEFORE THIS ONE. @@ -170,12 +188,14 @@ class SafeBrowsingDatabase { class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { public: - // Create a database with a browse store and download store. Takes ownership - // of browse_store and download_store. When |download_store| is NULL, - // the database will ignore any operations related download (url hashes and - // binary hashes). + // Create a database with a browse store, download store and + // csd_whitelist_store. Takes ownership of browse_store, download_store and + // csd_whitelist_store. When |download_store| is NULL, the database + // will ignore any operations related download (url hashes and + // binary hashes). Same for the |csd_whitelist_store|. SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store, - SafeBrowsingStore* download_store); + SafeBrowsingStore* download_store, + SafeBrowsingStore* csd_whitelist_store); // Create a database with a browse store. This is a legacy interface that // useds Sqlite. @@ -191,9 +211,10 @@ class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { std::vector<SBPrefix>* prefix_hits, std::vector<SBFullHashResult>* full_hits, base::Time last_update); - virtual bool ContainsDownloadUrl(const GURL& url, - SBPrefix* prefix_hit); + virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, + std::vector<SBPrefix>* prefix_hits); virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix); + virtual bool ContainsCsdWhitelistedUrl(const GURL& url); virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists); virtual void InsertChunks(const std::string& list_name, const SBChunkList& chunks); @@ -206,7 +227,8 @@ class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { friend class SafeBrowsingDatabaseTest; FRIEND_TEST(SafeBrowsingDatabaseTest, HashCaching); - // Return the browse_store_ or download_store_ based on list_id. + // Return the browse_store_, download_store_ or csd_whitelist_store_ + // based on list_id. SafeBrowsingStore* GetStore(int list_id); // Deletes the files on disk. @@ -218,6 +240,15 @@ class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { // Writes the current bloom filter to disk. void WriteBloomFilter(); + // Loads the given full-length hashes to the csd whitelist. If the number + // of hashes is too large or if the kill switch URL is on the whitelist + // we will whitelist all URLs. + void LoadCsdWhitelist(const std::vector<SBAddFullHash>& full_hashes); + + // Call this method if an error occured with the csd whitelist. This will + // result in all calls to ContainsCsdWhitelistedUrl() to returning true. + void CsdWhitelistAllUrls(); + // Helpers for handling database corruption. // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets // |corruption_detected_|, |HandleCorruptDatabase()| posts @@ -236,15 +267,16 @@ class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { void UpdateDownloadStore(); void UpdateBrowseStore(); + void UpdateCsdWhitelistStore(); - // Helper function to compare addprefixes in download_store_ with prefix. + // Helper function to compare addprefixes in download_store_ with |prefixes|. // The |list_bit| indicates which list (download url or download hash) // to compare. - // Returns true if there is a match, |*prefix_hit| will contain the actual - // matching prefix. + // Returns true if there is a match, |*prefix_hits| will contain the actual + // matching prefixes. bool MatchDownloadAddPrefixes(int list_bit, - const SBPrefix& prefix, - SBPrefix* prefix_hit); + const std::vector<SBPrefix>& prefixes, + std::vector<SBPrefix>* prefix_hits); // Used to verify that various calls are made from the thread the // object was created on. @@ -252,7 +284,8 @@ class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { // Lock for protecting access to variables that may be used on the // IO thread. This includes |browse_bloom_filter_|, |full_browse_hashes_|, - // |pending_browse_hashes_|, and |prefix_miss_cache_|. + // |pending_browse_hashes_|, |prefix_miss_cache_|, |csd_whitelist_|, and + // |csd_whitelist_all_urls_|. base::Lock lookup_lock_; // Underlying persistent store for chunk data. @@ -264,6 +297,21 @@ class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { FilePath download_filename_; scoped_ptr<SafeBrowsingStore> download_store_; + // For the client-side phishing detection whitelist chunks and full-length + // hashes. This list only contains 256 bit hashes. + FilePath csd_whitelist_filename_; + scoped_ptr<SafeBrowsingStore> csd_whitelist_store_; + + // All the client-side phishing detection whitelist entries are loaded in + // a sorted vector. + std::vector<SBFullHash> csd_whitelist_; + + // If true, ContainsCsdWhitelistedUrl will always return true for all URLs. + // This is set to true if the csd whitelist is too large to be stored in + // memory, if the kill switch URL is on the csd whitelist or if there was + // an error during the most recent update. + bool csd_whitelist_all_urls_; + // Bloom filter generated from the add-prefixes in |browse_store_|. // Only browse_store_ requires the BloomFilter for fast query. FilePath bloom_filter_filename_; diff --git a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc index a71a797..201edcb 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -8,13 +8,14 @@ #include "app/sql/statement.h" #include "base/file_util.h" #include "base/logging.h" +#include "base/memory/scoped_temp_dir.h" #include "base/message_loop.h" -#include "base/scoped_temp_dir.h" -#include "base/sha2.h" #include "base/time.h" +#include "crypto/sha2.h" #include "chrome/browser/safe_browsing/safe_browsing_database.h" #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" #include "chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h" +#include "content/browser/browser_thread.h" #include "googleurl/src/gurl.h" #include "testing/gtest/include/gtest/gtest.h" #include "testing/platform_test.h" @@ -25,13 +26,13 @@ namespace { SBPrefix Sha256Prefix(const std::string& str) { SBPrefix prefix; - base::SHA256HashString(str, &prefix, sizeof(prefix)); + crypto::SHA256HashString(str, &prefix, sizeof(prefix)); return prefix; } SBFullHash Sha256Hash(const std::string& str) { SBFullHash hash; - base::SHA256HashString(str, &hash, sizeof(hash)); + crypto::SHA256HashString(str, &hash, sizeof(hash)); return hash; } @@ -62,6 +63,21 @@ void InsertAddChunkHostPrefixUrl(SBChunk* chunk, Sha256Prefix(url)); } +// Same as InsertAddChunkHostPrefixUrl, but with full hashes. +void InsertAddChunkHostFullHashes(SBChunk* chunk, + int chunk_number, + const std::string& host_name, + const std::string& url) { + chunk->chunk_number = chunk_number; + chunk->is_add = true; + SBChunkHost host; + host.host = Sha256Prefix(host_name); + host.entry = SBEntry::Create(SBEntry::ADD_FULL_HASH, 1); + host.entry->set_chunk_id(chunk->chunk_number); + host.entry->SetFullHashAt(0, Sha256Hash(url)); + chunk->hosts.push_back(host); +} + // Same as InsertAddChunkHostPrefixUrl, but with two urls for prefixes. void InsertAddChunkHost2PrefixUrls(SBChunk* chunk, int chunk_number, @@ -369,7 +385,10 @@ TEST_F(SafeBrowsingDatabaseTest, ListNameForBrowseAndDownload) { MessageLoop loop(MessageLoop::TYPE_DEFAULT); SafeBrowsingStoreFile* browse_store = new SafeBrowsingStoreFile(); SafeBrowsingStoreFile* download_store = new SafeBrowsingStoreFile(); - database_.reset(new SafeBrowsingDatabaseNew(browse_store, download_store)); + SafeBrowsingStoreFile* csd_whitelist_store = new SafeBrowsingStoreFile(); + database_.reset(new SafeBrowsingDatabaseNew(browse_store, + download_store, + csd_whitelist_store)); database_->Init(database_filename_); SBChunkList chunks; @@ -404,10 +423,17 @@ TEST_F(SafeBrowsingDatabaseTest, ListNameForBrowseAndDownload) { chunks.push_back(chunk); database_->InsertChunks(safe_browsing_util::kBinHashList, chunks); + chunk.hosts.clear(); + InsertAddChunkHostFullHashes(&chunk, 5, "www.forwhitelist.com/", + "www.forwhitelist.com/a.html"); + chunks.clear(); + chunks.push_back(chunk); + database_->InsertChunks(safe_browsing_util::kCsdWhiteList, chunks); + database_->UpdateFinished(true); GetListsInfo(&lists); - EXPECT_EQ(4U, lists.size()); + EXPECT_EQ(5U, lists.size()); EXPECT_TRUE(lists[0].name == safe_browsing_util::kMalwareList); EXPECT_EQ(lists[0].adds, "1"); EXPECT_TRUE(lists[0].subs.empty()); @@ -420,6 +446,9 @@ TEST_F(SafeBrowsingDatabaseTest, ListNameForBrowseAndDownload) { EXPECT_TRUE(lists[3].name == safe_browsing_util::kBinHashList); EXPECT_EQ(lists[3].adds, "4"); EXPECT_TRUE(lists[3].subs.empty()); + EXPECT_TRUE(lists[4].name == safe_browsing_util::kCsdWhiteList); + EXPECT_EQ(lists[4].adds, "5"); + EXPECT_TRUE(lists[4].subs.empty()); database_.reset(); } @@ -1043,7 +1072,7 @@ TEST_F(SafeBrowsingDatabaseTest, DISABLED_FileCorruptionHandling) { database_.reset(); MessageLoop loop(MessageLoop::TYPE_DEFAULT); SafeBrowsingStoreFile* store = new SafeBrowsingStoreFile(); - database_.reset(new SafeBrowsingDatabaseNew(store, NULL)); + database_.reset(new SafeBrowsingDatabaseNew(store, NULL, NULL)); database_->Init(database_filename_); // This will cause an empty database to be created. @@ -1112,11 +1141,14 @@ TEST_F(SafeBrowsingDatabaseTest, ContainsDownloadUrl) { MessageLoop loop(MessageLoop::TYPE_DEFAULT); SafeBrowsingStoreFile* browse_store = new SafeBrowsingStoreFile(); SafeBrowsingStoreFile* download_store = new SafeBrowsingStoreFile(); - database_.reset(new SafeBrowsingDatabaseNew(browse_store, download_store)); + SafeBrowsingStoreFile* csd_whitelist_store = new SafeBrowsingStoreFile(); + database_.reset(new SafeBrowsingDatabaseNew(browse_store, + download_store, + csd_whitelist_store)); database_->Init(database_filename_); const char kEvil1Host[] = "www.evil1.com/"; - const char kEvil1Url1[] = "www.evil1.com/download1.html"; + const char kEvil1Url1[] = "www.evil1.com/download1/"; const char kEvil1Url2[] = "www.evil1.com/download2.html"; SBChunkList chunks; @@ -1130,28 +1162,191 @@ TEST_F(SafeBrowsingDatabaseTest, ContainsDownloadUrl) { database_->InsertChunks(safe_browsing_util::kBinUrlList, chunks); database_->UpdateFinished(true); - const Time now = Time::Now(); - SBPrefix prefix_hit; - std::string matching_list; + std::vector<SBPrefix> prefix_hits; + std::vector<GURL> urls(1); + + urls[0] = GURL(std::string("http://") + kEvil1Url1); + EXPECT_TRUE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + ASSERT_EQ(prefix_hits.size(), 1U); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url1)); + + urls[0] = GURL(std::string("http://") + kEvil1Url2); + EXPECT_TRUE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + ASSERT_EQ(prefix_hits.size(), 1U); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url2)); + + urls[0] = GURL(std::string("https://") + kEvil1Url2); + EXPECT_TRUE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + ASSERT_EQ(prefix_hits.size(), 1U); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url2)); + + urls[0] = GURL(std::string("ftp://") + kEvil1Url2); + EXPECT_TRUE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + ASSERT_EQ(prefix_hits.size(), 1U); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url2)); + + urls[0] = GURL("http://www.randomevil.com"); + EXPECT_FALSE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + + // Should match with query args stripped. + urls[0] = GURL(std::string("http://") + kEvil1Url2 + "?blah"); + EXPECT_TRUE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + ASSERT_EQ(prefix_hits.size(), 1U); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url2)); + + // Should match with extra path stuff and query args stripped. + urls[0] = GURL(std::string("http://") + kEvil1Url1 + "foo/bar?blah"); + EXPECT_TRUE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + ASSERT_EQ(prefix_hits.size(), 1U); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url1)); + + // First hit in redirect chain is malware. + urls.clear(); + urls.push_back(GURL(std::string("http://") + kEvil1Url1)); + urls.push_back(GURL("http://www.randomevil.com")); + EXPECT_TRUE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + ASSERT_EQ(prefix_hits.size(), 1U); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url1)); + + // Middle hit in redirect chain is malware. + urls.clear(); + urls.push_back(GURL("http://www.randomevil.com")); + urls.push_back(GURL(std::string("http://") + kEvil1Url1)); + urls.push_back(GURL("http://www.randomevil2.com")); + EXPECT_TRUE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + ASSERT_EQ(prefix_hits.size(), 1U); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url1)); + + // Final hit in redirect chain is malware. + urls.clear(); + urls.push_back(GURL("http://www.randomevil.com")); + urls.push_back(GURL(std::string("http://") + kEvil1Url1)); + EXPECT_TRUE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + ASSERT_EQ(prefix_hits.size(), 1U); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url1)); + + // Multiple hits in redirect chain are in malware list. + urls.clear(); + urls.push_back(GURL(std::string("http://") + kEvil1Url1)); + urls.push_back(GURL(std::string("https://") + kEvil1Url2)); + EXPECT_TRUE(database_->ContainsDownloadUrl(urls, &prefix_hits)); + ASSERT_EQ(prefix_hits.size(), 2U); + EXPECT_EQ(prefix_hits[0], Sha256Prefix(kEvil1Url1)); + EXPECT_EQ(prefix_hits[1], Sha256Prefix(kEvil1Url2)); + database_.reset(); +} + +// Checks that the csd-whitelist is handled properly. +TEST_F(SafeBrowsingDatabaseTest, CsdWhitelist) { + database_.reset(); + MessageLoop loop(MessageLoop::TYPE_DEFAULT); + // We expect all calls to ContainsCsdWhitelistedUrl to be made from the IO + // thread. + BrowserThread io_thread(BrowserThread::IO, &loop); + + // If the whitelist is disabled everything should match the whitelist. + database_.reset(new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile(), + NULL, NULL)); + database_->Init(database_filename_); + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://www.phishig.com/")))); - EXPECT_TRUE(database_->ContainsDownloadUrl( - GURL(std::string("http://") + kEvil1Url1), &prefix_hit)); - EXPECT_EQ(prefix_hit, Sha256Prefix(kEvil1Url1)); + SafeBrowsingStoreFile* browse_store = new SafeBrowsingStoreFile(); + SafeBrowsingStoreFile* csd_whitelist_store = new SafeBrowsingStoreFile(); + database_.reset(new SafeBrowsingDatabaseNew(browse_store, NULL, + csd_whitelist_store)); + database_->Init(database_filename_); - EXPECT_TRUE(database_->ContainsDownloadUrl( - GURL(std::string("http://") + kEvil1Url2), &prefix_hit)); - EXPECT_EQ(prefix_hit, Sha256Prefix(kEvil1Url2)); + const char kGood1Host[] = "www.good1.com/"; + const char kGood1Url1[] = "www.good1.com/a/b.html"; + const char kGood1Url2[] = "www.good1.com/b/"; - EXPECT_TRUE(database_->ContainsDownloadUrl( - GURL(std::string("https://") + kEvil1Url2), &prefix_hit)); - EXPECT_EQ(prefix_hit, Sha256Prefix(kEvil1Url2)); + const char kGood2Host[] = "www.good2.com/"; + const char kGood2Url1[] = "www.good2.com/c"; // Should match '/c/bla'. - EXPECT_TRUE(database_->ContainsDownloadUrl( - GURL(std::string("ftp://") + kEvil1Url2), &prefix_hit)); - EXPECT_EQ(prefix_hit, Sha256Prefix(kEvil1Url2)); + SBChunkList chunks; + SBChunk chunk; + // Add two simple chunks to the csd whitelist. + InsertAddChunkHost2FullHashes(&chunk, 1, kGood1Host, + kGood1Url1, kGood1Url2); + chunks.push_back(chunk); + + chunk.hosts.clear(); + InsertAddChunkHostFullHashes(&chunk, 2, kGood2Host, kGood2Url1); + chunks.push_back(chunk); + + std::vector<SBListChunkRanges> lists; + EXPECT_TRUE(database_->UpdateStarted(&lists)); + database_->InsertChunks(safe_browsing_util::kCsdWhiteList, chunks); + database_->UpdateFinished(true); + + EXPECT_FALSE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://") + kGood1Host))); + + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://") + kGood1Url1))); + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://") + kGood1Url1 + "?a=b"))); + + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://") + kGood1Url2))); + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://") + kGood1Url2 + "/c.html"))); + + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("https://") + kGood1Url2 + "/c.html"))); + + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://") + kGood2Url1 + "/c"))); + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://") + kGood2Url1 + "/c?bla"))); + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://") + kGood2Url1 + "/c/bla"))); + + EXPECT_FALSE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://www.google.com/")))); + + // Test that the kill-switch works as intended. + chunks.clear(); + lists.clear(); + SBChunk chunk2; + InsertAddChunkHostFullHashes(&chunk2, 3, "sb-ssl.google.com/", + "sb-ssl.google.com/safebrowsing/csd/killswitch"); + chunks.push_back(chunk2); + + EXPECT_TRUE(database_->UpdateStarted(&lists)); + database_->InsertChunks(safe_browsing_util::kCsdWhiteList, chunks); + database_->UpdateFinished(true); + + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("https://") + kGood1Url2 + "/c.html"))); + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://www.google.com/")))); + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://www.phishing_url.com/")))); + + // Remove the kill-switch and verify that we can recover. + chunks.clear(); + lists.clear(); + SBChunk sub_chunk; + InsertSubChunkHostFullHash(&sub_chunk, 1, 3, + "sb-ssl.google.com/", + "sb-ssl.google.com/safebrowsing/csd/killswitch"); + chunks.push_back(sub_chunk); + + EXPECT_TRUE(database_->UpdateStarted(&lists)); + database_->InsertChunks(safe_browsing_util::kCsdWhiteList, chunks); + database_->UpdateFinished(true); + + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("https://") + kGood1Url2 + "/c.html"))); + EXPECT_TRUE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("https://") + kGood2Url1 + "/c/bla"))); + EXPECT_FALSE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://www.google.com/")))); + EXPECT_FALSE(database_->ContainsCsdWhitelistedUrl( + GURL(std::string("http://www.phishing_url.com/")))); - EXPECT_FALSE(database_->ContainsDownloadUrl(GURL("http://www.randomevil.com"), - &prefix_hit)); database_.reset(); } @@ -1262,7 +1457,9 @@ TEST_F(SafeBrowsingDatabaseTest, BinHashInsertLookup) { MessageLoop loop(MessageLoop::TYPE_DEFAULT); SafeBrowsingStoreFile* browse_store = new SafeBrowsingStoreFile(); SafeBrowsingStoreFile* download_store = new SafeBrowsingStoreFile(); - database_.reset(new SafeBrowsingDatabaseNew(browse_store, download_store)); + database_.reset(new SafeBrowsingDatabaseNew(browse_store, + download_store, + NULL)); database_->Init(database_filename_); SBChunkList chunks; @@ -1311,11 +1508,17 @@ TEST_F(SafeBrowsingDatabaseTest, EmptyUpdate) { database_->InsertChunks(safe_browsing_util::kMalwareList, chunks); database_->UpdateFinished(true); + // Get an older time to reset the lastmod time for detecting whether + // the file has been updated. + base::PlatformFileInfo before_info, after_info; + ASSERT_TRUE(file_util::GetFileInfo(filename, &before_info)); + const base::Time old_last_modified = + before_info.last_modified - base::TimeDelta::FromSeconds(10); + // Inserting another chunk updates the database file. The sleep is // needed because otherwise the entire test can finish w/in the // resolution of the lastmod time. - base::PlatformFileInfo before_info, after_info; - base::PlatformThread::Sleep(1500); + ASSERT_TRUE(file_util::SetLastModifiedTime(filename, old_last_modified)); ASSERT_TRUE(file_util::GetFileInfo(filename, &before_info)); EXPECT_TRUE(database_->UpdateStarted(&lists)); chunk.hosts.clear(); @@ -1329,7 +1532,7 @@ TEST_F(SafeBrowsingDatabaseTest, EmptyUpdate) { EXPECT_LT(before_info.last_modified, after_info.last_modified); // Deleting a chunk updates the database file. - base::PlatformThread::Sleep(1500); + ASSERT_TRUE(file_util::SetLastModifiedTime(filename, old_last_modified)); ASSERT_TRUE(file_util::GetFileInfo(filename, &before_info)); EXPECT_TRUE(database_->UpdateStarted(&lists)); AddDelChunk(safe_browsing_util::kMalwareList, chunk.chunk_number); @@ -1339,7 +1542,7 @@ TEST_F(SafeBrowsingDatabaseTest, EmptyUpdate) { // Simply calling |UpdateStarted()| then |UpdateFinished()| does not // update the database file. - base::PlatformThread::Sleep(1500); + ASSERT_TRUE(file_util::SetLastModifiedTime(filename, old_last_modified)); ASSERT_TRUE(file_util::GetFileInfo(filename, &before_info)); EXPECT_TRUE(database_->UpdateStarted(&lists)); database_->UpdateFinished(true); diff --git a/chrome/browser/safe_browsing/safe_browsing_service.cc b/chrome/browser/safe_browsing/safe_browsing_service.cc index 04ec974..9aceb11 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service.cc +++ b/chrome/browser/safe_browsing/safe_browsing_service.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -23,12 +23,12 @@ #include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_paths.h" #include "chrome/common/chrome_switches.h" -#include "chrome/common/net/url_request_context_getter.h" #include "chrome/common/pref_names.h" #include "chrome/common/url_constants.h" #include "content/browser/browser_thread.h" #include "content/browser/tab_contents/tab_contents.h" #include "net/base/registry_controlled_domain.h" +#include "net/url_request/url_request_context_getter.h" #if defined(OS_WIN) #include "chrome/installer/util/browser_distribution.h" @@ -120,8 +120,7 @@ SafeBrowsingService::UnsafeResource::UnsafeResource() SafeBrowsingService::UnsafeResource::~UnsafeResource() {} SafeBrowsingService::SafeBrowsingCheck::SafeBrowsingCheck() - : url(NULL), - full_hash(NULL), + : full_hash(NULL), client(NULL), need_get_hash(false), result(SAFE), @@ -133,12 +132,19 @@ SafeBrowsingService::SafeBrowsingCheck::~SafeBrowsingCheck() {} void SafeBrowsingService::Client::OnSafeBrowsingResult( const SafeBrowsingCheck& check) { - if (check.url.get()) { + if (!check.urls.empty()) { + DCHECK(!check.full_hash.get()); - OnBrowseUrlCheckResult(*(check.url), check.result); - OnDownloadUrlCheckResult(*(check.url), check.result); + if (!check.is_download) { + DCHECK_EQ(1U, check.urls.size()); + OnBrowseUrlCheckResult(check.urls[0], check.result); + } else { + OnDownloadUrlCheckResult(check.urls, check.result); + } } else if (check.full_hash.get()) { - OnDownloadHashCheckResult(check.full_hash->full_hash, check.result); + OnDownloadHashCheckResult( + safe_browsing_util::SBFullHashToString(*check.full_hash), + check.result); } else { NOTREACHED(); } @@ -156,6 +162,7 @@ SafeBrowsingService::SafeBrowsingService() protocol_manager_(NULL), enabled_(false), enable_download_protection_(false), + enable_csd_whitelist_(false), update_in_progress_(false), database_update_in_progress_(false), closing_database_(false), @@ -196,7 +203,7 @@ bool SafeBrowsingService::DownloadBinHashNeeded() const { return enable_download_protection_ && CanReportStats(); } -bool SafeBrowsingService::CheckDownloadUrl(const GURL& url, +bool SafeBrowsingService::CheckDownloadUrl(const std::vector<GURL>& url_chain, Client* client) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); if (!enabled_ || !enable_download_protection_) @@ -205,7 +212,7 @@ bool SafeBrowsingService::CheckDownloadUrl(const GURL& url, // We need to check the database for url prefix, and later may fetch the url // from the safebrowsing backends. These need to be asynchronous. SafeBrowsingCheck* check = new SafeBrowsingCheck(); - check->url.reset(new GURL(url)); + check->urls = url_chain; StartDownloadCheck( check, client, @@ -238,6 +245,18 @@ bool SafeBrowsingService::CheckDownloadHash(const std::string& full_hash, return false; } +bool SafeBrowsingService::MatchCsdWhitelistUrl(const GURL& url) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); + if (!enabled_ || !enable_csd_whitelist_ || !MakeDatabaseAvailable()) { + // There is something funky going on here -- for example, perhaps the user + // has not restarted since enabling metrics reporting, so we haven't + // enabled the csd whitelist yet. Just to be safe we return true in this + // case. + return true; + } + return database_->ContainsCsdWhitelistedUrl(url); +} + bool SafeBrowsingService::CheckBrowseUrl(const GURL& url, Client* client) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); @@ -273,7 +292,7 @@ bool SafeBrowsingService::CheckBrowseUrl(const GURL& url, // Needs to be asynchronous, since we could be in the constructor of a // ResourceDispatcherHost event handler which can't pause there. SafeBrowsingCheck* check = new SafeBrowsingCheck(); - check->url.reset(new GURL(url)); + check->urls.push_back(url); check->client = client; check->result = SAFE; check->is_download = false; @@ -519,14 +538,10 @@ SafeBrowsingService::~SafeBrowsingService() { void SafeBrowsingService::OnIOInitialize( const std::string& client_key, const std::string& wrapped_key, - URLRequestContextGetter* request_context_getter) { + net::URLRequestContextGetter* request_context_getter) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); enabled_ = true; - CommandLine* cmdline = CommandLine::ForCurrentProcess(); - enable_download_protection_ = - cmdline->HasSwitch(switches::kSbEnableDownloadProtection); - MakeDatabaseAvailable(); // On Windows, get the safe browsing client name from the browser @@ -542,6 +557,7 @@ void SafeBrowsingService::OnIOInitialize( std::string client_name("chromium"); #endif #endif + CommandLine* cmdline = CommandLine::ForCurrentProcess(); bool disable_auto_update = cmdline->HasSwitch(switches::kSbDisableAutoUpdate) || cmdline->HasSwitch(switches::kDisableBackgroundNetworking); @@ -585,7 +601,7 @@ void SafeBrowsingService::OnIOShutdown() { QueuedCheck queued = queued_checks_.front(); if (queued.client) { SafeBrowsingCheck sb_check; - sb_check.url.reset(new GURL(queued.url)); + sb_check.urls.push_back(queued.url); sb_check.client = queued.client; sb_check.result = SAFE; queued.client->OnSafeBrowsingResult(sb_check); @@ -658,7 +674,8 @@ SafeBrowsingDatabase* SafeBrowsingService::GetDatabase() { const base::TimeTicks before = base::TimeTicks::Now(); SafeBrowsingDatabase* database = - SafeBrowsingDatabase::Create(enable_download_protection_); + SafeBrowsingDatabase::Create(enable_download_protection_, + enable_csd_whitelist_); database->Init(path); { @@ -775,7 +792,7 @@ void SafeBrowsingService::DatabaseLoadComplete() { // the client). Since we're not the client, we have to convey this result. if (check.client && CheckBrowseUrl(check.url, check.client)) { SafeBrowsingCheck sb_check; - sb_check.url.reset(new GURL(check.url)); + sb_check.urls.push_back(check.url); sb_check.client = check.client; sb_check.result = SAFE; check.client->OnSafeBrowsingResult(sb_check); @@ -840,6 +857,7 @@ void SafeBrowsingService::DatabaseUpdateFinished(bool update_succeeded) { } void SafeBrowsingService::Start() { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); DCHECK(!safe_browsing_thread_.get()); safe_browsing_thread_.reset(new base::Thread("Chrome_SafeBrowsingThread")); if (!safe_browsing_thread_->Start()) @@ -847,6 +865,7 @@ void SafeBrowsingService::Start() { // Retrieve client MAC keys. PrefService* local_state = g_browser_process->local_state(); + DCHECK(local_state); std::string client_key, wrapped_key; if (local_state) { client_key = @@ -856,9 +875,27 @@ void SafeBrowsingService::Start() { } // We will issue network fetches using the default profile's request context. - scoped_refptr<URLRequestContextGetter> request_context_getter( + scoped_refptr<net::URLRequestContextGetter> request_context_getter( GetDefaultProfile()->GetRequestContext()); + CommandLine* cmdline = CommandLine::ForCurrentProcess(); + enable_download_protection_ = + !cmdline->HasSwitch(switches::kSbDisableDownloadProtection); + + // We only download the csd-whitelist if client-side phishing detection is + // enabled and if the user has opted in with stats collection. Note: we + // cannot check whether the metrics_service() object is created because it + // may be initialized after this method is called. +#ifdef OS_CHROMEOS + // Client-side detection is disabled on ChromeOS for now, so don't bother + // downloading the whitelist. + enable_csd_whitelist_ = false; +#else + enable_csd_whitelist_ = + (!cmdline->HasSwitch(switches::kDisableClientSidePhishingDetection) && + local_state && local_state->GetBoolean(prefs::kMetricsReportingEnabled)); +#endif + BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, NewRunnableMethod( @@ -929,10 +966,15 @@ bool SafeBrowsingService::HandleOneCheck( // Always calculate the index, for recording hits. int index = -1; - if (check->url.get() != NULL) - index = safe_browsing_util::GetUrlHashIndex(*(check->url), full_hashes); - else + if (!check->urls.empty()) { + for (size_t i = 0; i < check->urls.size(); ++i) { + index = safe_browsing_util::GetUrlHashIndex(check->urls[i], full_hashes); + if (index != -1) + break; + } + } else { index = safe_browsing_util::GetHashIndex(*(check->full_hash), full_hashes); + } // |client| is NULL if the request was cancelled. if (check->client) { @@ -984,7 +1026,7 @@ void SafeBrowsingService::DoDisplayBlockingPage( page_url = resource.original_url; } ReportSafeBrowsingHit(resource.url, page_url, referrer_url, is_subresource, - resource.threat_type); + resource.threat_type, std::string() /* post_data */); } SafeBrowsingBlockingPage::ShowBlockingPage(this, resource); @@ -997,7 +1039,8 @@ void SafeBrowsingService::ReportSafeBrowsingHit( const GURL& page_url, const GURL& referrer_url, bool is_subresource, - SafeBrowsingService::UrlCheckResult threat_type) { + SafeBrowsingService::UrlCheckResult threat_type, + const std::string& post_data) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); if (!CanReportStats()) return; @@ -1011,7 +1054,8 @@ void SafeBrowsingService::ReportSafeBrowsingHit( page_url, referrer_url, is_subresource, - threat_type)); + threat_type, + post_data)); } void SafeBrowsingService::ReportSafeBrowsingHitOnIOThread( @@ -1019,7 +1063,8 @@ void SafeBrowsingService::ReportSafeBrowsingHitOnIOThread( const GURL& page_url, const GURL& referrer_url, bool is_subresource, - SafeBrowsingService::UrlCheckResult threat_type) { + SafeBrowsingService::UrlCheckResult threat_type, + const std::string& post_data) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); if (!enabled_) return; @@ -1029,18 +1074,17 @@ void SafeBrowsingService::ReportSafeBrowsingHitOnIOThread( << threat_type; protocol_manager_->ReportSafeBrowsingHit(malicious_url, page_url, referrer_url, is_subresource, - threat_type); + threat_type, post_data); } // If the user had opted-in to send MalwareDetails, this gets called -// at the time that the blocking page is going away. -void SafeBrowsingService::ReportMalwareDetails( - scoped_refptr<MalwareDetails> details) { +// when the report is ready. +void SafeBrowsingService::SendSerializedMalwareDetails( + const std::string& serialized) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); - scoped_ptr<const std::string> serialized(details->GetSerializedReport()); - if (!serialized->empty()) { + if (!serialized.empty()) { DVLOG(1) << "Sending serialized malware details."; - protocol_manager_->ReportMalwareDetails(*serialized); + protocol_manager_->ReportMalwareDetails(serialized); } } @@ -1071,9 +1115,9 @@ void SafeBrowsingService::CheckDownloadUrlOnSBThread(SafeBrowsingCheck* check) { DCHECK_EQ(MessageLoop::current(), safe_browsing_thread_->message_loop()); DCHECK(enable_download_protection_); - SBPrefix prefix_hit; + std::vector<SBPrefix> prefix_hits; - if (!database_->ContainsDownloadUrl(*(check->url), &prefix_hit)) { + if (!database_->ContainsDownloadUrl(check->urls, &prefix_hits)) { // Good, we don't have hash for this url prefix. check->result = SAFE; BrowserThread::PostTask( @@ -1086,7 +1130,7 @@ void SafeBrowsingService::CheckDownloadUrlOnSBThread(SafeBrowsingCheck* check) { check->need_get_hash = true; check->prefix_hits.clear(); - check->prefix_hits.push_back(prefix_hit); + check->prefix_hits = prefix_hits; BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, NewRunnableMethod(this, &SafeBrowsingService::OnCheckDone, check)); diff --git a/chrome/browser/safe_browsing/safe_browsing_service.h b/chrome/browser/safe_browsing/safe_browsing_service.h index 2e4a596..0803a57 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service.h +++ b/chrome/browser/safe_browsing/safe_browsing_service.h @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -15,8 +15,8 @@ #include <vector> #include "base/hash_tables.h" -#include "base/ref_counted.h" -#include "base/scoped_ptr.h" +#include "base/memory/ref_counted.h" +#include "base/memory/scoped_ptr.h" #include "base/synchronization/lock.h" #include "base/task.h" #include "base/time.h" @@ -29,12 +29,15 @@ class PrefService; class SafeBrowsingDatabase; class SafeBrowsingProtocolManager; class SafeBrowsingServiceFactory; -class URLRequestContextGetter; namespace base { class Thread; } +namespace net { +class URLRequestContextGetter; +} + // Construction needs to happen on the main thread. class SafeBrowsingService : public base::RefCountedThreadSafe<SafeBrowsingService> { @@ -71,8 +74,8 @@ class SafeBrowsingService SafeBrowsingCheck(); ~SafeBrowsingCheck(); - // Either |url| or |prefix| is used to lookup database. - scoped_ptr<GURL> url; + // Either |urls| or |prefix| is used to lookup database. + std::vector<GURL> urls; scoped_ptr<SBFullHash> full_hash; Client* client; @@ -111,7 +114,7 @@ class SafeBrowsingService UrlCheckResult result) {} // Called when the result of checking a download URL is known. - virtual void OnDownloadUrlCheckResult(const GURL& url, + virtual void OnDownloadUrlCheckResult(const std::vector<GURL>& url_chain, UrlCheckResult result) {} // Called when the result of checking a download binary hash is known. @@ -154,12 +157,19 @@ class SafeBrowsingService // Check if the prefix for |url| is in safebrowsing download add lists. // Result will be passed to callback in |client|. - bool CheckDownloadUrl(const GURL& url, Client* client); + bool CheckDownloadUrl(const std::vector<GURL>& url_chain, Client* client); // Check if the prefix for |full_hash| is in safebrowsing binhash add lists. // Result will be passed to callback in |client|. virtual bool CheckDownloadHash(const std::string& full_hash, Client* client); + // Check if the |url| matches any of the full-length hashes from the + // client-side phishing detection whitelist. Returns true if there was a + // match and false otherwise. To make sure we are conservative we will return + // true if an error occurs. This method is expected to be called on the IO + // thread. + virtual bool MatchCsdWhitelistUrl(const GURL& url); + // Called on the IO thread to cancel a pending check if the result is no // longer needed. void CancelCheck(Client* client); @@ -209,6 +219,10 @@ class SafeBrowsingService bool enabled() const { return enabled_; } + bool download_protection_enabled() const { + return enabled_ && enable_download_protection_; + } + // Preference handling. static void RegisterPrefs(PrefService* prefs); @@ -229,17 +243,19 @@ class SafeBrowsingService // the current page is 'safe'. void LogPauseDelay(base::TimeDelta time); - // When a safebrowsing blocking page goes away, it calls this method - // so the service can serialize and send MalwareDetails. - virtual void ReportMalwareDetails(scoped_refptr<MalwareDetails> details); + // Called on the IO thread by the MalwareDetails with the serialized + // protocol buffer, so the service can send it over. + virtual void SendSerializedMalwareDetails(const std::string& serialized); // Report hits to the unsafe contents (malware, phishing, unsafe download URL) - // to the server. Can only be called on UI thread. + // to the server. Can only be called on UI thread. If |post_data| is + // non-empty, the request will be sent as a POST instead of a GET. void ReportSafeBrowsingHit(const GURL& malicious_url, const GURL& page_url, const GURL& referrer_url, bool is_subresource, - UrlCheckResult threat_type); + UrlCheckResult threat_type, + const std::string& post_data); protected: // Creates the safe browsing service. Need to initialize before using. @@ -270,7 +286,7 @@ class SafeBrowsingService // Called to initialize objects that are used on the io_thread. void OnIOInitialize(const std::string& client_key, const std::string& wrapped_key, - URLRequestContextGetter* request_context_getter); + net::URLRequestContextGetter* request_context_getter); // Called to shutdown operations on the io_thread. void OnIOShutdown(); @@ -353,7 +369,8 @@ class SafeBrowsingService const GURL& page_url, const GURL& referrer_url, bool is_subresource, - UrlCheckResult threat_type); + UrlCheckResult threat_type, + const std::string& post_data); // Checks the download hash on safe_browsing_thread_. void CheckDownloadHashOnSBThread(SafeBrowsingCheck* check); @@ -413,6 +430,10 @@ class SafeBrowsingService // so we allow this feature to be exersized. bool enable_download_protection_; + // Indicate if client-side phishing detection whitelist should be enabled + // or not. + bool enable_csd_whitelist_; + // The SafeBrowsing thread that runs database operations. // // Note: Functions that run on this thread should run synchronously and return diff --git a/chrome/browser/safe_browsing/safe_browsing_service_browsertest.cc b/chrome/browser/safe_browsing/safe_browsing_service_browsertest.cc index 44c4be8..6fc8e84 100644 --- a/chrome/browser/safe_browsing/safe_browsing_service_browsertest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_service_browsertest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -7,9 +7,9 @@ // service. #include "base/command_line.h" +#include "base/memory/ref_counted.h" #include "base/metrics/histogram.h" -#include "base/ref_counted.h" -#include "base/sha2.h" +#include "crypto/sha2.h" #include "chrome/browser/browser_process.h" #include "chrome/browser/safe_browsing/safe_browsing_database.h" #include "chrome/browser/safe_browsing/safe_browsing_service.h" @@ -51,26 +51,28 @@ class TestSafeBrowsingDatabase : public SafeBrowsingDatabase { std::vector<SBPrefix>* prefix_hits, std::vector<SBFullHashResult>* full_hits, base::Time last_update) { + std::vector<GURL> urls(1, url); return ContainsUrl(safe_browsing_util::kMalwareList, safe_browsing_util::kPhishingList, - url, prefix_hits, full_hits); + urls, prefix_hits, full_hits); } - virtual bool ContainsDownloadUrl(const GURL& url, - SBPrefix* prefix_hit) { + virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, + std::vector<SBPrefix>* prefix_hits) { std::vector<SBFullHashResult> full_hits; - std::vector<SBPrefix> prefix_hits; bool found = ContainsUrl(safe_browsing_util::kBinUrlList, safe_browsing_util::kBinHashList, - url, &prefix_hits, &full_hits); + urls, prefix_hits, &full_hits); if (!found) return false; - DCHECK_EQ(1U, prefix_hits.size()); - *prefix_hit = prefix_hits[0]; + DCHECK_LE(1U, prefix_hits->size()); return true; } virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) { return download_digest_prefix_.count(prefix) > 0; } + virtual bool ContainsCsdWhitelistedUrl(const GURL& url) { + return true; + } virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) { ADD_FAILURE() << "Not implemented."; return false; @@ -115,23 +117,31 @@ class TestSafeBrowsingDatabase : public SafeBrowsingDatabase { bool ContainsUrl(const std::string& list_name0, const std::string& list_name1, - const GURL& url, + const std::vector<GURL>& urls, std::vector<SBPrefix>* prefix_hits, std::vector<SBFullHashResult>* full_hits) { - base::hash_map<std::string, Hits>::const_iterator - badurls_it = badurls_.find(url.spec()); - - if (badurls_it == badurls_.end()) - return false; + bool hit = false; + for (size_t i = 0; i < urls.size(); ++i) { + const GURL& url = urls[i]; + base::hash_map<std::string, Hits>::const_iterator + badurls_it = badurls_.find(url.spec()); + + if (badurls_it == badurls_.end()) + continue; + + if (badurls_it->second.list_name == list_name0 || + badurls_it->second.list_name == list_name1) { + prefix_hits->insert(prefix_hits->end(), + badurls_it->second.prefix_hits.begin(), + badurls_it->second.prefix_hits.end()); + full_hits->insert(full_hits->end(), + badurls_it->second.full_hits.begin(), + badurls_it->second.full_hits.end()); + hit = true; + } - if (badurls_it->second.list_name == list_name0 || - badurls_it->second.list_name == list_name1) { - *prefix_hits = badurls_it->second.prefix_hits; - *full_hits = badurls_it->second.full_hits; - return true; } - - return false; + return hit; } base::hash_map<std::string, Hits> badurls_; @@ -145,7 +155,8 @@ class TestSafeBrowsingDatabaseFactory : public SafeBrowsingDatabaseFactory { virtual ~TestSafeBrowsingDatabaseFactory() {} virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( - bool enable_download_protection) { + bool enable_download_protection, + bool enable_client_side_whitelist) { db_ = new TestSafeBrowsingDatabase(); return db_; } @@ -165,7 +176,7 @@ class TestProtocolManager : public SafeBrowsingProtocolManager { const std::string& client_name, const std::string& client_key, const std::string& wrapped_key, - URLRequestContextGetter* request_context_getter, + net::URLRequestContextGetter* request_context_getter, const std::string& info_url_prefix, const std::string& mackey_url_prefix, bool disable_auto_update) @@ -221,7 +232,7 @@ class TestSBProtocolManagerFactory : public SBProtocolManagerFactory { const std::string& client_name, const std::string& client_key, const std::string& wrapped_key, - URLRequestContextGetter* request_context_getter, + net::URLRequestContextGetter* request_context_getter, const std::string& info_url_prefix, const std::string& mackey_url_prefix, bool disable_auto_update) { @@ -252,8 +263,8 @@ class SafeBrowsingServiceTest : public InProcessBrowserTest { std::string host; std::string path; safe_browsing_util::CanonicalizeUrl(url, &host, &path, NULL); - base::SHA256HashString(host + path, &full_hash->hash, - sizeof(SBFullHash)); + crypto::SHA256HashString(host + path, &full_hash->hash, + sizeof(SBFullHash)); full_hash->list_name = list_name; full_hash->add_chunk_id = add_chunk_id; } @@ -289,8 +300,6 @@ class SafeBrowsingServiceTest : public InProcessBrowserTest { // This test will fill up the database using testing prefixes // and urls. command_line->AppendSwitch(switches::kSbDisableAutoUpdate); - - command_line->AppendSwitch(switches::kSbEnableDownloadProtection); } virtual void SetUpInProcessBrowserTestFixture() { @@ -381,6 +390,45 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingServiceTest, Malware) { EXPECT_TRUE(ShowingInterstitialPage()); } +const char kPrefetchMalwarePage[] = "files/safe_browsing/prefetch_malware.html"; + +// This test confirms that prefetches don't themselves get the +// interstitial treatment. +IN_PROC_BROWSER_TEST_F(SafeBrowsingServiceTest, Prefetch) { + GURL url = test_server()->GetURL(kPrefetchMalwarePage); + GURL malware_url = test_server()->GetURL(kMalwarePage); + + class SetPrefetchForTest { + public: + explicit SetPrefetchForTest(bool prefetch) + : old_prefetch_state_(ResourceDispatcherHost::is_prefetch_enabled()) { + ResourceDispatcherHost::set_is_prefetch_enabled(prefetch); + } + + ~SetPrefetchForTest() { + ResourceDispatcherHost::set_is_prefetch_enabled(old_prefetch_state_); + } + private: + bool old_prefetch_state_; + } set_prefetch_for_test(true); + + // Even though we have added this uri to the safebrowsing database and + // getfullhash result, we should not see the interstitial page since the + // only malware was a prefetch target. + SBFullHashResult malware_full_hash; + int chunk_id = 0; + GenUrlFullhashResult(malware_url, safe_browsing_util::kMalwareList, + chunk_id, &malware_full_hash); + SetupResponseForUrl(malware_url, malware_full_hash); + ui_test_utils::NavigateToURL(browser(), url); + EXPECT_FALSE(ShowingInterstitialPage()); + + // However, when we navigate to the malware page, we should still get + // the interstitial. + ui_test_utils::NavigateToURL(browser(), malware_url); + EXPECT_TRUE(ShowingInterstitialPage()); +} + } // namespace class TestSBClient @@ -397,12 +445,12 @@ class TestSBClient return result_; } - void CheckDownloadUrl(const GURL& url) { + void CheckDownloadUrl(const std::vector<GURL>& url_chain) { BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, NewRunnableMethod(this, &TestSBClient::CheckDownloadUrlOnIOThread, - url)); + url_chain)); ui_test_utils::RunMessageLoop(); // Will stop in OnDownloadUrlCheckResult. } @@ -416,8 +464,8 @@ class TestSBClient } private: - void CheckDownloadUrlOnIOThread(const GURL& url) { - safe_browsing_service_->CheckDownloadUrl(url, this); + void CheckDownloadUrlOnIOThread(const std::vector<GURL>& url_chain) { + safe_browsing_service_->CheckDownloadUrl(url_chain, this); } void CheckDownloadHashOnIOThread(const std::string& full_hash) { @@ -425,7 +473,7 @@ class TestSBClient } // Called when the result of checking a download URL is known. - void OnDownloadUrlCheckResult(const GURL& url, + void OnDownloadUrlCheckResult(const std::vector<GURL>& url_chain, SafeBrowsingService::UrlCheckResult result) { result_ = result; BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, @@ -456,9 +504,37 @@ namespace { IN_PROC_BROWSER_TEST_F(SafeBrowsingServiceTest, CheckDownloadUrl) { GURL badbin_url = test_server()->GetURL(kMalwareFile); + std::vector<GURL> badbin_urls(1, badbin_url); + + scoped_refptr<TestSBClient> client(new TestSBClient); + client->CheckDownloadUrl(badbin_urls); + + // Since badbin_url is not in database, it is considered to be safe. + EXPECT_EQ(SafeBrowsingService::SAFE, client->GetResult()); + + SBFullHashResult full_hash_result; + int chunk_id = 0; + GenUrlFullhashResult(badbin_url, safe_browsing_util::kBinUrlList, + chunk_id, &full_hash_result); + SetupResponseForUrl(badbin_url, full_hash_result); + + client->CheckDownloadUrl(badbin_urls); + + // Now, the badbin_url is not safe since it is added to download database. + EXPECT_EQ(SafeBrowsingService::BINARY_MALWARE_URL, client->GetResult()); +} + +IN_PROC_BROWSER_TEST_F(SafeBrowsingServiceTest, CheckDownloadUrlRedirects) { + GURL original_url = test_server()->GetURL(kEmptyPage); + GURL badbin_url = test_server()->GetURL(kMalwareFile); + GURL final_url = test_server()->GetURL(kEmptyPage); + std::vector<GURL> badbin_urls; + badbin_urls.push_back(original_url); + badbin_urls.push_back(badbin_url); + badbin_urls.push_back(final_url); scoped_refptr<TestSBClient> client(new TestSBClient); - client->CheckDownloadUrl(badbin_url); + client->CheckDownloadUrl(badbin_urls); // Since badbin_url is not in database, it is considered to be safe. EXPECT_EQ(SafeBrowsingService::SAFE, client->GetResult()); @@ -469,7 +545,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingServiceTest, CheckDownloadUrl) { chunk_id, &full_hash_result); SetupResponseForUrl(badbin_url, full_hash_result); - client->CheckDownloadUrl(badbin_url); + client->CheckDownloadUrl(badbin_urls); // Now, the badbin_url is not safe since it is added to download database. EXPECT_EQ(SafeBrowsingService::BINARY_MALWARE_URL, client->GetResult()); @@ -498,6 +574,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingServiceTest, CheckDownloadHash) { IN_PROC_BROWSER_TEST_F(SafeBrowsingServiceTest, CheckDownloadUrlTimedOut) { GURL badbin_url = test_server()->GetURL(kMalwareFile); + std::vector<GURL> badbin_urls(1, badbin_url); scoped_refptr<TestSBClient> client(new TestSBClient); SBFullHashResult full_hash_result; @@ -505,7 +582,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingServiceTest, CheckDownloadUrlTimedOut) { GenUrlFullhashResult(badbin_url, safe_browsing_util::kBinUrlList, chunk_id, &full_hash_result); SetupResponseForUrl(badbin_url, full_hash_result); - client->CheckDownloadUrl(badbin_url); + client->CheckDownloadUrl(badbin_urls); // badbin_url is not safe since it is added to download database. EXPECT_EQ(SafeBrowsingService::BINARY_MALWARE_URL, client->GetResult()); @@ -520,7 +597,7 @@ IN_PROC_BROWSER_TEST_F(SafeBrowsingServiceTest, CheckDownloadUrlTimedOut) { int64 default_urlcheck_timeout = DownloadUrlCheckTimeout(sb_service); IntroduceGetHashDelay(kOneSec); SetDownloadUrlCheckTimeout(sb_service, kOneMs); - client->CheckDownloadUrl(badbin_url); + client->CheckDownloadUrl(badbin_urls); // There should be a timeout and the hash would be considered as safe. EXPECT_EQ(SafeBrowsingService::SAFE, client->GetResult()); diff --git a/chrome/browser/safe_browsing/safe_browsing_store.h b/chrome/browser/safe_browsing/safe_browsing_store.h index b099d9f..07b6128 100644 --- a/chrome/browser/safe_browsing/safe_browsing_store.h +++ b/chrome/browser/safe_browsing/safe_browsing_store.h @@ -177,6 +177,10 @@ class SafeBrowsingStore { // Get all Add prefixes out from the store. virtual bool GetAddPrefixes(std::vector<SBAddPrefix>* add_prefixes) = 0; + // Get all add full-length hashes. + virtual bool GetAddFullHashes( + std::vector<SBAddFullHash>* add_full_hashes) = 0; + // Start an update. None of the following methods should be called // unless this returns true. If this returns true, the update // should be terminated by FinishUpdate() or CancelUpdate(). diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.cc b/chrome/browser/safe_browsing/safe_browsing_store_file.cc index fcdb4ce..bfb6062 100644 --- a/chrome/browser/safe_browsing/safe_browsing_store_file.cc +++ b/chrome/browser/safe_browsing/safe_browsing_store_file.cc @@ -306,6 +306,31 @@ bool SafeBrowsingStoreFile::GetAddPrefixes( return true; } +bool SafeBrowsingStoreFile::GetAddFullHashes( + std::vector<SBAddFullHash>* add_full_hashes) { + add_full_hashes->clear(); + + file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb")); + if (file.get() == NULL) return false; + + FileHeader header; + if (!ReadAndVerifyHeader(filename_, file.get(), &header, NULL)) + return OnCorruptDatabase(); + + size_t offset = + header.add_chunk_count * sizeof(int32) + + header.sub_chunk_count * sizeof(int32) + + header.add_prefix_count * sizeof(SBAddPrefix) + + header.sub_prefix_count * sizeof(SBSubPrefix); + if (!FileSkip(offset, file.get())) + return false; + + return ReadToVector(add_full_hashes, + header.add_hash_count, + file.get(), + NULL); +} + bool SafeBrowsingStoreFile::WriteAddHash(int32 chunk_id, base::Time receive_time, const SBFullHash& full_hash) { diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.h b/chrome/browser/safe_browsing/safe_browsing_store_file.h index 88e1b2f..72a05f7 100644 --- a/chrome/browser/safe_browsing/safe_browsing_store_file.h +++ b/chrome/browser/safe_browsing/safe_browsing_store_file.h @@ -115,8 +115,10 @@ class SafeBrowsingStoreFile : public SafeBrowsingStore { // Delete any on-disk files, including the permanent storage. virtual bool Delete(); - // Get all Add prefixes from the store. + // Get all add hash prefixes and full-length hashes, respectively, from + // the store. virtual bool GetAddPrefixes(std::vector<SBAddPrefix>* add_prefixes); + virtual bool GetAddFullHashes(std::vector<SBAddFullHash>* add_full_hashes); virtual bool BeginChunk(); @@ -161,6 +163,7 @@ class SafeBrowsingStoreFile : public SafeBrowsingStore { const std::set<SBPrefix>& prefix_misses, std::vector<SBAddPrefix>* add_prefixes_result, std::vector<SBAddFullHash>* add_full_hashes_result); + // Enumerate different format-change events for histogramming // purposes. DO NOT CHANGE THE ORDERING OF THESE VALUES. // TODO(shess): Remove this once the format change is complete. diff --git a/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h b/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h index 5fa8ece..0868531 100644 --- a/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h +++ b/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -8,7 +8,7 @@ #include "chrome/browser/safe_browsing/safe_browsing_store.h" -#include "base/sha2.h" +#include "crypto/sha2.h" #include "testing/gtest/include/gtest/gtest.h" // Helper code for testing that a SafeBrowsingStore implementation @@ -17,7 +17,7 @@ // Helper to make it easy to initialize SBFullHash constants. inline const SBFullHash SBFullHashFromString(const char* str) { SBFullHash h; - base::SHA256HashString(str, &h.full_hash, sizeof(h.full_hash)); + crypto::SHA256HashString(str, &h.full_hash, sizeof(h.full_hash)); return h; } diff --git a/chrome/browser/safe_browsing/safe_browsing_test.cc b/chrome/browser/safe_browsing/safe_browsing_test.cc index e17cbd5..80ba01d 100644 --- a/chrome/browser/safe_browsing/safe_browsing_test.cc +++ b/chrome/browser/safe_browsing/safe_browsing_test.cc @@ -289,11 +289,16 @@ class SafeBrowsingServiceTest : public InProcessBrowserTest { // Makes sure the auto update is not triggered. This test will force the // update when needed. command_line->AppendSwitch(switches::kSbDisableAutoUpdate); + // This test uses loopback. No need to use IPv6 especially it makes // local requests slow on Windows trybot when ipv6 local address [::1] // is not setup. command_line->AppendSwitch(switches::kDisableIPv6); + // TODO(lzheng): The test server does not understand download related + // requests. We need to fix the server. + command_line->AppendSwitch(switches::kSbDisableDownloadProtection); + // In this test, we fetch SafeBrowsing data and Mac key from the same // server. Although in real production, they are served from different // servers. @@ -356,7 +361,8 @@ class SafeBrowsingServiceTestHelper &SafeBrowsingServiceTestHelper::OnCheckUrlDone)); } virtual void OnDownloadUrlCheckResult( - const GURL& url, SafeBrowsingService::UrlCheckResult result) { + const std::vector<GURL>& url_chain, + SafeBrowsingService::UrlCheckResult result) { // TODO(lzheng): Add test for DownloadUrl. } diff --git a/chrome/browser/safe_browsing/safe_browsing_util.cc b/chrome/browser/safe_browsing/safe_browsing_util.cc index 06a18ca..7b8aa9a 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util.cc +++ b/chrome/browser/safe_browsing/safe_browsing_util.cc @@ -1,13 +1,13 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/safe_browsing/safe_browsing_util.h" #include "base/base64.h" -#include "base/hmac.h" -#include "base/sha2.h" #include "base/string_util.h" +#include "crypto/hmac.h" +#include "crypto/sha2.h" #include "chrome/browser/google/google_util.h" #include "googleurl/src/gurl.h" #include "googleurl/src/url_util.h" @@ -166,14 +166,8 @@ namespace safe_browsing_util { const char kMalwareList[] = "goog-malware-shavar"; const char kPhishingList[] = "goog-phish-shavar"; const char kBinUrlList[] = "goog-badbinurl-shavar"; -const char kBinHashList[] = "goog-badbinhash-shavar"; - -// Keywords to identify a list type from listname. -// TODO(lzheng): check if this can be replaced by full listnames. -const char kMalwareListKey[] = "-malware-"; -const char kPhishingListKey[] = "-phish-"; -const char kBinUrlListKey[] = "-badbinurl-"; -const char kBinHashListKey[] = "-badbinhash-"; +const char kBinHashList[] = "goog-badbin-digestvar"; +const char kCsdWhiteList[] = "goog-csdwhite-sha256"; int GetListId(const std::string& name) { int id; @@ -183,8 +177,10 @@ int GetListId(const std::string& name) { id = PHISH; } else if (name == safe_browsing_util::kBinUrlList) { id = BINURL; - } else if (name == safe_browsing_util::kBinHashList) { + } else if (name == safe_browsing_util::kBinHashList) { id = BINHASH; + } else if (name == safe_browsing_util::kCsdWhiteList) { + id = CSDWHITELIST; } else { id = INVALID; } @@ -205,6 +201,9 @@ bool GetListName(int list_id, std::string* list) { case BINHASH: *list = safe_browsing_util::kBinHashList; break; + case CSDWHITELIST: + *list = safe_browsing_util::kCsdWhiteList; + break; default: return false; } @@ -437,9 +436,9 @@ int GetUrlHashIndex(const GURL& url, for (size_t h = 0; h < hosts.size(); ++h) { for (size_t p = 0; p < paths.size(); ++p) { SBFullHash key; - base::SHA256HashString(hosts[h] + paths[p], - key.full_hash, - sizeof(SBFullHash)); + crypto::SHA256HashString(hosts[h] + paths[p], + key.full_hash, + sizeof(SBFullHash)); int index = GetHashIndex(key, full_hashes); if (index != -1) return index; } @@ -449,19 +448,19 @@ int GetUrlHashIndex(const GURL& url, } bool IsPhishingList(const std::string& list_name) { - return list_name.find(kPhishingListKey) != std::string::npos; + return list_name.compare(kPhishingList) == 0; } bool IsMalwareList(const std::string& list_name) { - return list_name.find(kMalwareListKey) != std::string::npos; + return list_name.compare(kMalwareList) == 0; } bool IsBadbinurlList(const std::string& list_name) { - return list_name.find(kBinUrlListKey) != std::string::npos; + return list_name.compare(kBinUrlList) == 0; } bool IsBadbinhashList(const std::string& list_name) { - return list_name.find(kBinHashListKey) != std::string::npos; + return list_name.compare(kBinHashList) == 0; } static void DecodeWebSafe(std::string* decoded) { @@ -486,7 +485,7 @@ bool VerifyMAC(const std::string& key, const std::string& mac, std::string decoded_mac; base::Base64Decode(mac_copy, &decoded_mac); - base::HMAC hmac(base::HMAC::SHA1); + crypto::HMAC hmac(crypto::HMAC::SHA1); if (!hmac.Init(decoded_key)) return false; const std::string data_str(data, data_length); @@ -521,8 +520,12 @@ GURL GeneratePhishingReportUrl(const std::string& report_page, } void StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out) { - DCHECK_EQ(static_cast<size_t>(base::SHA256_LENGTH), hash_in.size()); - memcpy(hash_out->full_hash, hash_in.data(), base::SHA256_LENGTH); + DCHECK_EQ(static_cast<size_t>(crypto::SHA256_LENGTH), hash_in.size()); + memcpy(hash_out->full_hash, hash_in.data(), crypto::SHA256_LENGTH); } +std::string SBFullHashToString(const SBFullHash& hash) { + DCHECK_EQ(static_cast<size_t>(crypto::SHA256_LENGTH), sizeof(hash.full_hash)); + return std::string(hash.full_hash, sizeof(hash.full_hash)); +} } // namespace safe_browsing_util diff --git a/chrome/browser/safe_browsing/safe_browsing_util.h b/chrome/browser/safe_browsing/safe_browsing_util.h index 5b01b44..d2e9204 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util.h +++ b/chrome/browser/safe_browsing/safe_browsing_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -21,7 +21,7 @@ class GURL; class SBEntry; // A truncated hash's type. -typedef int SBPrefix; +typedef int32 SBPrefix; // Container for holding a chunk URL and the MAC of the contents of the URL. struct ChunkUrl { @@ -36,8 +36,12 @@ union SBFullHash { SBPrefix prefix; }; -inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) { - return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0; +inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) { + return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0; +} + +inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) { + return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0; } // Container for information about a specific host in an add/sub chunk. @@ -260,6 +264,8 @@ extern const char kPhishingList[]; // Binary Download list names. extern const char kBinUrlList[]; extern const char kBinHashList[]; +// SafeBrowsing client-side detection whitelist list name. +extern const char kCsdWhiteList[]; enum ListType { INVALID = -1, @@ -267,6 +273,7 @@ enum ListType { PHISH = 1, BINURL = 2, BINHASH = 3, + CSDWHITELIST = 4, }; // Maps a list name to ListType. @@ -313,7 +320,7 @@ GURL GeneratePhishingReportUrl(const std::string& report_page, const std::string& url_to_report); void StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out); - +std::string SBFullHashToString(const SBFullHash& hash_out); } // namespace safe_browsing_util #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ diff --git a/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc index a12c3ff..be20d31 100644 --- a/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_util_unittest.cc @@ -1,11 +1,11 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include <algorithm> -#include "base/sha2.h" #include "base/string_util.h" +#include "crypto/sha2.h" #include "chrome/browser/safe_browsing/safe_browsing_util.h" #include "googleurl/src/gurl.h" #include "testing/gtest/include/gtest/gtest.h" @@ -283,7 +283,7 @@ TEST(SafeBrowsingUtilTest, CanonicalizeUrl) { TEST(SafeBrowsingUtilTest, GetUrlHashIndex) { GURL url("http://www.evil.com/phish.html"); SBFullHashResult full_hash; - base::SHA256HashString(url.host() + url.path(), + crypto::SHA256HashString(url.host() + url.path(), &full_hash.hash, sizeof(SBFullHash)); std::vector<SBFullHashResult> full_hashes; @@ -335,11 +335,14 @@ TEST(SafeBrowsingUtilTest, ListIdVerification) { EXPECT_EQ(1, safe_browsing_util::BINHASH % 2); } -TEST(SafeBrowsingUtilTest, StringToSBFullHash) { +TEST(SafeBrowsingUtilTest, StringToSBFullHashAndSBFullHashToString) { // 31 chars plus the last \0 as full_hash. const std::string hash_in = "12345678902234567890323456789012"; SBFullHash hash_out; safe_browsing_util::StringToSBFullHash(hash_in, &hash_out); EXPECT_EQ(0x34333231, hash_out.prefix); EXPECT_EQ(0, memcmp(hash_in.data(), hash_out.full_hash, sizeof(SBFullHash))); + + std::string hash_final = safe_browsing_util::SBFullHashToString(hash_out); + EXPECT_EQ(hash_in, hash_final); } |