diff options
author | noelutz@google.com <noelutz@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-06-15 13:32:23 +0000 |
---|---|---|
committer | noelutz@google.com <noelutz@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-06-15 13:32:23 +0000 |
commit | a6ac3206694be8051a06e86855bc95e866a4baef (patch) | |
tree | e30b5d07452bc561f0bb249353e243ad70006f39 /chrome/browser/safe_browsing | |
parent | e121faef105636fff81929dc57fbc098d43c9030 (diff) | |
download | chromium_src-a6ac3206694be8051a06e86855bc95e866a4baef.zip chromium_src-a6ac3206694be8051a06e86855bc95e866a4baef.tar.gz chromium_src-a6ac3206694be8051a06e86855bc95e866a4baef.tar.bz2 |
Create a browser feature extractor that runs after the renderer has
decided that a page is phishing according to the extracted features
in the renderer. For now we don't use these features in the classification.
We only use them on the server side.
Currently, the browser feature extractor extracts a bunch of features based
on the history service (e.g., has the user seen this URL before).
BUG=None
TEST=BrowserFeatureExtractorTest
Review URL: http://codereview.chromium.org/7119003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@89178 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing')
6 files changed, 672 insertions, 14 deletions
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.cc b/chrome/browser/safe_browsing/browser_feature_extractor.cc new file mode 100644 index 0000000..70a9bf6 --- /dev/null +++ b/chrome/browser/safe_browsing/browser_feature_extractor.cc @@ -0,0 +1,296 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/browser_feature_extractor.h" + +#include <map> +#include <utility> + +#include "base/stl_util-inl.h" +#include "base/task.h" +#include "base/time.h" +#include "chrome/common/safe_browsing/csd.pb.h" +#include "chrome/browser/history/history.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/browser/profiles/profile.h" +#include "content/common/page_transition_types.h" +#include "content/browser/browser_thread.h" +#include "content/browser/cancelable_request.h" +#include "content/browser/tab_contents/tab_contents.h" +#include "googleurl/src/gurl.h" + +namespace safe_browsing { +namespace features { +const char kUrlHistoryVisitCount[] = "UrlHistoryVisitCount"; +const char kUrlHistoryTypedCount[] = "UrlHistoryTypedCount"; +const char kUrlHistoryLinkCount[] = "UrlHistoryLinkCount"; +const char kUrlHistoryVisitCountMoreThan24hAgo[] = + "UrlHistoryVisitCountMoreThan24hAgo"; +const char kHttpHostVisitCount[] = "HttpHostVisitCount"; +const char kHttpsHostVisitCount[] = "HttpsHostVisitCount"; +const char kFirstHttpHostVisitMoreThan24hAgo[] = + "FirstHttpHostVisitMoreThan24hAgo"; +const char kFirstHttpsHostVisitMoreThan24hAgo[] = + "FirstHttpsHostVisitMoreThan24hAgo"; +} // namespace features + +static void AddFeature(const std::string& feature_name, + double feature_value, + ClientPhishingRequest* request) { + DCHECK(request); + ClientPhishingRequest::Feature* feature = + request->add_non_model_feature_map(); + feature->set_name(feature_name); + feature->set_value(feature_value); + VLOG(2) << "Browser feature: " << feature->name() << " " << feature->value(); +} + +BrowserFeatureExtractor::BrowserFeatureExtractor(TabContents* tab) + : tab_(tab), + ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { + DCHECK(tab); +} + +BrowserFeatureExtractor::~BrowserFeatureExtractor() { + method_factory_.RevokeAll(); + // Delete all the pending extractions (delete callback and request objects). + STLDeleteContainerPairPointers(pending_extractions_.begin(), + pending_extractions_.end()); + // Also cancel all the pending history service queries. + HistoryService* history; + bool success = GetHistoryService(&history); + DCHECK(success || pending_queries_.size() == 0); + // Cancel all the pending history lookups and cleanup the memory. + for (PendingQueriesMap::iterator it = pending_queries_.begin(); + it != pending_queries_.end(); ++it) { + if (history) { + history->CancelRequest(it->first); + } + ExtractionData& extraction = it->second; + delete extraction.first; // delete request + delete extraction.second; // delete callback + } + pending_queries_.clear(); +} + +void BrowserFeatureExtractor::ExtractFeatures(ClientPhishingRequest* request, + DoneCallback* callback) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + DCHECK(request); + DCHECK_EQ(0U, request->url().find("http:")); + DCHECK(callback); + if (!callback) { + DLOG(ERROR) << "ExtractFeatures called without a callback object"; + return; + } + pending_extractions_.insert(std::make_pair(request, callback)); + MessageLoop::current()->PostTask( + FROM_HERE, + method_factory_.NewRunnableMethod( + &BrowserFeatureExtractor::StartExtractFeatures, + request, callback)); +} + +void BrowserFeatureExtractor::StartExtractFeatures( + ClientPhishingRequest* request, + DoneCallback* callback) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + ExtractionData extraction = std::make_pair(request, callback); + size_t removed = pending_extractions_.erase(extraction); + DCHECK_EQ(1U, removed); + HistoryService* history; + if (!request || !request->IsInitialized() || !GetHistoryService(&history)) { + callback->Run(false, request); + return; + } + CancelableRequestProvider::Handle handle = history->QueryURL( + GURL(request->url()), + true /* wants_visits */, + &request_consumer_, + NewCallback(this, + &BrowserFeatureExtractor::QueryUrlHistoryDone)); + + StorePendingQuery(handle, request, callback); +} + +void BrowserFeatureExtractor::QueryUrlHistoryDone( + CancelableRequestProvider::Handle handle, + bool success, + const history::URLRow* row, + history::VisitVector* visits) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + ClientPhishingRequest* request; + DoneCallback* callback; + if (!GetPendingQuery(handle, &request, &callback)) { + DLOG(FATAL) << "No pending history query found"; + return; + } + DCHECK(request); + DCHECK(callback); + if (!success) { + // URL is not found in the history. In practice this should not + // happen (unless there is a real error) because we just visited + // that URL. + callback->Run(false, request); + return; + } + AddFeature(features::kUrlHistoryVisitCount, + static_cast<double>(row->visit_count()), + request); + + base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1); + int num_visits_24h_ago = 0; + int num_visits_typed = 0; + int num_visits_link = 0; + for (history::VisitVector::const_iterator it = visits->begin(); + it != visits->end(); ++it) { + if (!PageTransition::IsMainFrame(it->transition)) { + continue; + } + if (it->visit_time < threshold) { + ++num_visits_24h_ago; + } + PageTransition::Type transition = PageTransition::StripQualifier( + it->transition); + if (transition == PageTransition::TYPED) { + ++num_visits_typed; + } else if (transition == PageTransition::LINK) { + ++num_visits_link; + } + } + AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo, + static_cast<double>(num_visits_24h_ago), + request); + AddFeature(features::kUrlHistoryTypedCount, + static_cast<double>(num_visits_typed), + request); + AddFeature(features::kUrlHistoryLinkCount, + static_cast<double>(num_visits_link), + request); + + // Issue next history lookup for host visits. + HistoryService* history; + if (!GetHistoryService(&history)) { + callback->Run(false, request); + return; + } + CancelableRequestProvider::Handle next_handle = + history->GetVisibleVisitCountToHost( + GURL(request->url()), + &request_consumer_, + NewCallback(this, &BrowserFeatureExtractor::QueryHttpHostVisitsDone)); + StorePendingQuery(next_handle, request, callback); +} + +void BrowserFeatureExtractor::QueryHttpHostVisitsDone( + CancelableRequestProvider::Handle handle, + bool success, + int num_visits, + base::Time first_visit) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + ClientPhishingRequest* request; + DoneCallback* callback; + if (!GetPendingQuery(handle, &request, &callback)) { + DLOG(FATAL) << "No pending history query found"; + return; + } + DCHECK(request); + DCHECK(callback); + if (!success) { + callback->Run(false, request); + return; + } + SetHostVisitsFeatures(num_visits, first_visit, true, request); + + // Same lookup but for the HTTPS URL. + HistoryService* history; + if (!GetHistoryService(&history)) { + callback->Run(false, request); + return; + } + std::string https_url = request->url(); + CancelableRequestProvider::Handle next_handle = + history->GetVisibleVisitCountToHost( + GURL(https_url.replace(0, 5, "https:")), + &request_consumer_, + NewCallback(this, + &BrowserFeatureExtractor::QueryHttpsHostVisitsDone)); + StorePendingQuery(next_handle, request, callback); +} + +void BrowserFeatureExtractor::QueryHttpsHostVisitsDone( + CancelableRequestProvider::Handle handle, + bool success, + int num_visits, + base::Time first_visit) { + DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + ClientPhishingRequest* request; + DoneCallback* callback; + if (!GetPendingQuery(handle, &request, &callback)) { + DLOG(FATAL) << "No pending history query found"; + return; + } + DCHECK(request); + DCHECK(callback); + if (!success) { + callback->Run(false, request); + return; + } + SetHostVisitsFeatures(num_visits, first_visit, false, request); + callback->Run(true, request); // We're done with all the history lookups. +} + +void BrowserFeatureExtractor::SetHostVisitsFeatures( + int num_visits, + base::Time first_visit, + bool is_http_query, + ClientPhishingRequest* request) { + DCHECK(request); + AddFeature(is_http_query ? + features::kHttpHostVisitCount : features::kHttpsHostVisitCount, + static_cast<double>(num_visits), + request); + AddFeature( + is_http_query ? + features::kFirstHttpHostVisitMoreThan24hAgo : + features::kFirstHttpsHostVisitMoreThan24hAgo, + (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ? + 1.0 : 0.0, + request); +} + +void BrowserFeatureExtractor::StorePendingQuery( + CancelableRequestProvider::Handle handle, + ClientPhishingRequest* request, + DoneCallback* callback) { + DCHECK_EQ(0U, pending_queries_.count(handle)); + pending_queries_[handle] = std::make_pair(request, callback); +} + +bool BrowserFeatureExtractor::GetPendingQuery( + CancelableRequestProvider::Handle handle, + ClientPhishingRequest** request, + DoneCallback** callback) { + PendingQueriesMap::iterator it = pending_queries_.find(handle); + DCHECK(it != pending_queries_.end()); + if (it != pending_queries_.end()) { + *request = it->second.first; + *callback = it->second.second; + pending_queries_.erase(it); + return true; + } + return false; +} +bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) { + *history = NULL; + if (tab_ && tab_->profile()) { + *history = tab_->profile()->GetHistoryService(Profile::EXPLICIT_ACCESS); + if (*history) { + return true; + } + } + VLOG(2) << "Unable to query history. No history service available."; + return false; +} +}; // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.h b/chrome/browser/safe_browsing/browser_feature_extractor.h new file mode 100644 index 0000000..8724334 --- /dev/null +++ b/chrome/browser/safe_browsing/browser_feature_extractor.h @@ -0,0 +1,161 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// BrowserFeatureExtractor computes various browser features for client-side +// phishing detection. For now it does a bunch of lookups in the history +// service to see whether a particular URL has been visited before by the +// user. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ +#define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ +#pragma once + +#include <map> +#include <set> +#include <utility> + +#include "base/basictypes.h" +#include "base/callback_old.h" +#include "base/task.h" +#include "base/time.h" +#include "chrome/browser/history/history_types.h" +#include "content/browser/cancelable_request.h" + +class HistoryService; +class TabContents; + +namespace safe_browsing { +class ClientPhishingRequest; + +namespace features { + +// TODO(noelutz): move renderer/safe_browsing/features.h to common. +//////////////////////////////////////////////////// +// History features. +//////////////////////////////////////////////////// + +// Number of visits to that URL stored in the browser history. +// Should always be an integer larger than 1 because by the time +// we lookup the history the current URL should already be stored there. +extern const char kUrlHistoryVisitCount[]; + +// Number of times the URL was typed in the Omnibox. +extern const char kUrlHistoryTypedCount[]; + +// Number of times the URL was reached by clicking a link. +extern const char kUrlHistoryLinkCount[]; + +// Number of times URL was visited more than 24h ago. +extern const char kUrlHistoryVisitCountMoreThan24hAgo[]; + +// Number of user-visible visits to all URLs on the same host/port as +// the URL for HTTP and HTTPs. +extern const char kHttpHostVisitCount[]; +extern const char kHttpsHostVisitCount[]; + +// Boolean feature which is true if the host was visited for the first +// time more than 24h ago (only considers user-visible visits like above). +extern const char kFirstHttpHostVisitMoreThan24hAgo[]; +extern const char kFirstHttpsHostVisitMoreThan24hAgo[]; +} // namespace features + +// All methods of this class must be called on the UI thread (including +// the constructor). +class BrowserFeatureExtractor { + public: + // Called when feature extraction is done. The first argument will be + // true iff feature extraction succeeded. The second argument is the + // phishing request which was modified by the feature extractor. The + // DoneCallback takes ownership of the request object. + typedef Callback2<bool, ClientPhishingRequest*>::Type DoneCallback; + + // The caller keeps ownership of the tab object and is responsible for + // ensuring that it stays valid for the entire lifetime of this object. + explicit BrowserFeatureExtractor(TabContents* tab); + + // The destructor will cancel any pending requests. + ~BrowserFeatureExtractor(); + + // Begins extraction of the browser features. We take ownership + // of the request object until |callback| is called (see DoneCallback above) + // and will write the extracted features to the feature map. Once the + // feature extraction is complete, |callback| is run on the UI thread. We + // take ownership of the |callback| object. This method must run on the UI + // thread. + void ExtractFeatures(ClientPhishingRequest* request, + DoneCallback* callback); + + private: + friend class DeleteTask<BrowserFeatureExtractor>; + typedef std::pair<ClientPhishingRequest*, DoneCallback*> ExtractionData; + typedef std::map<CancelableRequestProvider::Handle, + ExtractionData> PendingQueriesMap; + + // Actually starts feature extraction (does the real work). + void StartExtractFeatures(ClientPhishingRequest* request, + DoneCallback* callback); + + // HistoryService callback which is called when we're done querying URL visits + // in the history. + void QueryUrlHistoryDone(CancelableRequestProvider::Handle handle, + bool success, + const history::URLRow* row, + history::VisitVector* visits); + + // HistoryService callback which is called when we're done querying HTTP host + // visits in the history. + void QueryHttpHostVisitsDone(CancelableRequestProvider::Handle handle, + bool success, + int num_visits, + base::Time first_visit); + + // HistoryService callback which is called when we're done querying HTTPS host + // visits in the history. + void QueryHttpsHostVisitsDone(CancelableRequestProvider::Handle handle, + bool success, + int num_visits, + base::Time first_visit); + + // Helper function which sets the host history features given the + // number of host visits and the time of the fist host visit. Set + // |is_http_query| to true if the URL scheme is HTTP and to false if + // the scheme is HTTPS. + void SetHostVisitsFeatures(int num_visits, + base::Time first_visit, + bool is_http_query, + ClientPhishingRequest* request); + + // Helper function which stores the request and callback while the history + // query is being processed. + void StorePendingQuery(CancelableRequestProvider::Handle handle, + ClientPhishingRequest* request, + DoneCallback* callback); + + // Helper function which is the counterpart of StorePendingQuery. If there + // is a pending query for the given handle it will return false and set both + // the request and cb pointers. Otherwise, it will return false. + bool GetPendingQuery(CancelableRequestProvider::Handle handle, + ClientPhishingRequest** request, + DoneCallback** callback); + + // Helper function which gets the history server if possible. If the pointer + // is set it will return true and false otherwise. + bool GetHistoryService(HistoryService** history); + + TabContents* tab_; + CancelableRequestConsumer request_consumer_; + ScopedRunnableMethodFactory<BrowserFeatureExtractor> method_factory_; + + // Set of pending extractions (i.e. extractions for which ExtractFeatures was + // called but not StartExtractFeatures). + std::set<ExtractionData> pending_extractions_; + + // Set of pending queries (i.e., where history->Query...() was called but + // the history callback hasn't been invoked yet). + PendingQueriesMap pending_queries_; + + DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor); +}; +} // namespace safe_browsing +#endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_ diff --git a/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc b/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc new file mode 100644 index 0000000..e683fd7 --- /dev/null +++ b/chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc @@ -0,0 +1,163 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/browser_feature_extractor.h" + +#include <map> +#include <string> + +#include "base/memory/scoped_ptr.h" +#include "base/message_loop.h" +#include "base/time.h" +#include "chrome/common/safe_browsing/csd.pb.h" +#include "chrome/browser/history/history.h" +#include "chrome/browser/history/history_backend.h" +#include "chrome/browser/profiles/profile.h" +#include "chrome/test/testing_profile.h" +#include "content/browser/browser_thread.h" +#include "content/browser/renderer_host/test_render_view_host.h" +#include "content/browser/tab_contents/tab_contents.h" +#include "content/browser/tab_contents/test_tab_contents.h" +#include "googleurl/src/gurl.h" +#include "testing/gmock/include/gmock/gmock.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace safe_browsing { +class BrowserFeatureExtractorTest : public RenderViewHostTestHarness { + protected: + BrowserFeatureExtractorTest() + : ui_thread_(BrowserThread::UI, &message_loop_) { + } + + virtual void SetUp() { + RenderViewHostTestHarness::SetUp(); + profile_->CreateHistoryService(true /* delete_file */, false /* no_db */); + extractor_.reset(new BrowserFeatureExtractor(contents())); + num_pending_ = 0; + } + + virtual void TearDown() { + extractor_.reset(); + profile_->DestroyHistoryService(); + RenderViewHostTestHarness::TearDown(); + ASSERT_EQ(0, num_pending_); + } + + HistoryService* history_service() { + return profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); + } + + bool ExtractFeatures(ClientPhishingRequest* request) { + StartExtractFeatures(request); + MessageLoop::current()->Run(); + EXPECT_EQ(1U, success_.count(request)); + return success_.count(request) ? success_[request] : false; + } + + void StartExtractFeatures(ClientPhishingRequest* request) { + success_.erase(request); + ++num_pending_; + extractor_->ExtractFeatures( + request, + NewCallback(this, + &BrowserFeatureExtractorTest::ExtractFeaturesDone)); + } + + BrowserThread ui_thread_; + int num_pending_; + scoped_ptr<BrowserFeatureExtractor> extractor_; + std::map<ClientPhishingRequest*, bool> success_; + + private: + void ExtractFeaturesDone(bool success, ClientPhishingRequest* request) { + ASSERT_EQ(0U, success_.count(request)); + success_[request] = success; + if (--num_pending_ == 0) { + MessageLoop::current()->Quit(); + } + } +}; + +TEST_F(BrowserFeatureExtractorTest, UrlNotInHistory) { + ClientPhishingRequest request; + request.set_url("http://www.google.com/"); + request.set_client_score(0.5); + EXPECT_FALSE(ExtractFeatures(&request)); +} + +TEST_F(BrowserFeatureExtractorTest, RequestNotInitialized) { + ClientPhishingRequest request; + request.set_url("http://www.google.com/"); + // Request is missing the score value. + EXPECT_FALSE(ExtractFeatures(&request)); +} + +TEST_F(BrowserFeatureExtractorTest, UrlInHistory) { + history_service()->AddPage(GURL("http://www.foo.com/bar.html"), + history::SOURCE_BROWSED); + history_service()->AddPage(GURL("https://www.foo.com/gaa.html"), + history::SOURCE_BROWSED); // same host HTTPS. + history_service()->AddPage(GURL("http://www.foo.com/gaa.html"), + history::SOURCE_BROWSED); // same host HTTP. + history_service()->AddPage(GURL("http://bar.foo.com/gaa.html"), + history::SOURCE_BROWSED); // different host. + history_service()->AddPage(GURL("http://www.foo.com/bar.html?a=b"), + base::Time::Now() - base::TimeDelta::FromHours(23), + NULL, 0, GURL(), PageTransition::LINK, + history::RedirectList(), history::SOURCE_BROWSED, + false); + history_service()->AddPage(GURL("http://www.foo.com/bar.html"), + base::Time::Now() - base::TimeDelta::FromHours(25), + NULL, 0, GURL(), PageTransition::TYPED, + history::RedirectList(), history::SOURCE_BROWSED, + false); + history_service()->AddPage(GURL("https://www.foo.com/goo.html"), + base::Time::Now() - base::TimeDelta::FromDays(5), + NULL, 0, GURL(), PageTransition::TYPED, + history::RedirectList(), history::SOURCE_BROWSED, + false); + + ClientPhishingRequest request; + request.set_url("http://www.foo.com/bar.html"); + request.set_client_score(0.5); + EXPECT_TRUE(ExtractFeatures(&request)); + std::map<std::string, double> features; + for (int i = 0; i < request.non_model_feature_map_size(); ++i) { + const ClientPhishingRequest::Feature& feature = + request.non_model_feature_map(i); + EXPECT_EQ(0U, features.count(feature.name())); + features[feature.name()] = feature.value(); + } + EXPECT_EQ(8U, features.size()); + EXPECT_DOUBLE_EQ(2.0, features[features::kUrlHistoryVisitCount]); + EXPECT_DOUBLE_EQ(1.0, + features[features::kUrlHistoryVisitCountMoreThan24hAgo]); + EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryTypedCount]); + EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]); + EXPECT_DOUBLE_EQ(4.0, features[features::kHttpHostVisitCount]); + EXPECT_DOUBLE_EQ(2.0, features[features::kHttpsHostVisitCount]); + EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]); + EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpsHostVisitMoreThan24hAgo]); +} + +TEST_F(BrowserFeatureExtractorTest, MultipleRequestsAtOnce) { + history_service()->AddPage(GURL("http://www.foo.com/bar.html"), + history::SOURCE_BROWSED); + ClientPhishingRequest request; + request.set_url("http://www.foo.com/bar.html"); + request.set_client_score(0.5); + StartExtractFeatures(&request); + + ClientPhishingRequest request2; + request2.set_url("http://www.foo.com/goo.html"); + request2.set_client_score(1.0); + StartExtractFeatures(&request2); + + MessageLoop::current()->Run(); + EXPECT_TRUE(success_[&request]); + // Success is false because the second URL is not in the history and we are + // not able to distinguish between a missing URL in the history and an error. + EXPECT_FALSE(success_[&request2]); +} +} // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/client_side_detection_host.cc b/chrome/browser/safe_browsing/client_side_detection_host.cc index 4cb448c..03f97ae 100644 --- a/chrome/browser/safe_browsing/client_side_detection_host.cc +++ b/chrome/browser/safe_browsing/client_side_detection_host.cc @@ -13,6 +13,7 @@ #include "base/task.h" #include "chrome/browser/browser_process.h" #include "chrome/browser/profiles/profile.h" +#include "chrome/browser/safe_browsing/browser_feature_extractor.h" #include "chrome/browser/safe_browsing/client_side_detection_service.h" #include "chrome/browser/safe_browsing/safe_browsing_service.h" #include "chrome/common/chrome_switches.h" @@ -264,6 +265,7 @@ ClientSideDetectionHost* ClientSideDetectionHost::Create( ClientSideDetectionHost::ClientSideDetectionHost(TabContents* tab) : TabContentsObserver(tab), csd_service_(g_browser_process->safe_browsing_detection_service()), + feature_extractor_(new BrowserFeatureExtractor(tab)), cb_factory_(ALLOW_THIS_IN_INITIALIZER_LIST(this)) { DCHECK(tab); // Note: csd_service_ and sb_service_ might be NULL. @@ -340,12 +342,12 @@ void ClientSideDetectionHost::OnDetectedPhishingSite( // There shouldn't be any pending requests because we revoke them everytime // we navigate away. DCHECK(!cb_factory_.HasPendingCallbacks()); - VLOG(2) << "Start sending client phishing request for URL: " - << verdict->url(); - csd_service_->SendClientReportPhishingRequest( - verdict.release(), // The service takes ownership of the verdict. - cb_factory_.NewCallback( - &ClientSideDetectionHost::MaybeShowPhishingWarning)); + + // Start browser-side feature extraction. Once we're done it will send + // the client verdict request. + feature_extractor_->ExtractFeatures( + verdict.release(), + NewCallback(this, &ClientSideDetectionHost::FeatureExtractionDone)); } } @@ -377,6 +379,22 @@ void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url, } } +void ClientSideDetectionHost::FeatureExtractionDone( + bool success, + ClientPhishingRequest* request) { + if (!request) { + DLOG(FATAL) << "Invalid request object in FeatureExtractionDone"; + return; + } + VLOG(2) << "Feature extraction done (success:" << success << ") for URL: " + << request->url() << ". Start sending client phishing request."; + // Send ping no matter what - even if the browser feature extraction failed. + csd_service_->SendClientReportPhishingRequest( + request, // The service takes ownership of the request object. + cb_factory_.NewCallback( + &ClientSideDetectionHost::MaybeShowPhishingWarning)); +} + void ClientSideDetectionHost::set_client_side_detection_service( ClientSideDetectionService* service) { csd_service_ = service; diff --git a/chrome/browser/safe_browsing/client_side_detection_host.h b/chrome/browser/safe_browsing/client_side_detection_host.h index d229310..36adf93 100644 --- a/chrome/browser/safe_browsing/client_side_detection_host.h +++ b/chrome/browser/safe_browsing/client_side_detection_host.h @@ -17,7 +17,8 @@ class TabContents; namespace safe_browsing { - +class BrowserFeatureExtractor; +class ClientPhishingRequest; class ClientSideDetectionService; // This class is used to receive the IPC from the renderer which @@ -57,6 +58,11 @@ class ClientSideDetectionHost : public TabContentsObserver { // Otherwise, we do nothing. Called in UI thread. void MaybeShowPhishingWarning(GURL phishing_url, bool is_phishing); + // Callback that is called when the browser feature extractor is done. + // This method is responsible for deleting the request object. Called on + // the UI thread. + void FeatureExtractionDone(bool success, ClientPhishingRequest* request); + // Used for testing. This function does not take ownership of the service // class. void set_client_side_detection_service(ClientSideDetectionService* service); @@ -72,6 +78,8 @@ class ClientSideDetectionHost : public TabContentsObserver { // Keep a handle to the latest classification request so that we can cancel // it if necessary. scoped_refptr<ShouldClassifyUrlRequest> classification_request_; + // Browser-side feature extractor. + scoped_ptr<BrowserFeatureExtractor> feature_extractor_; base::ScopedCallbackFactory<ClientSideDetectionHost> cb_factory_; diff --git a/chrome/browser/safe_browsing/client_side_detection_host_unittest.cc b/chrome/browser/safe_browsing/client_side_detection_host_unittest.cc index 5a19583..ac0508e 100644 --- a/chrome/browser/safe_browsing/client_side_detection_host_unittest.cc +++ b/chrome/browser/safe_browsing/client_side_detection_host_unittest.cc @@ -49,6 +49,11 @@ MATCHER_P(EqualsProto, other, "") { return other.SerializeAsString() == arg.SerializeAsString(); } +ACTION(QuitUIMessageLoop) { + EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::UI)); + MessageLoopForUI::current()->Quit(); +} + class MockClientSideDetectionService : public ClientSideDetectionService { public: explicit MockClientSideDetectionService(const FilePath& model_path) @@ -98,7 +103,7 @@ class MockTestingProfile : public TestingProfile { }; // Helper function which quits the UI message loop from the IO message loop. -void QuitUIMessageLoop() { +void QuitUIMessageLoopFromIO() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, @@ -150,7 +155,7 @@ class ClientSideDetectionHostTest : public TabContentsWrapperTestHarness { // we put the quit message there. BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, - NewRunnableFunction(&QuitUIMessageLoop)); + NewRunnableFunction(&QuitUIMessageLoopFromIO)); MessageLoop::current()->Run(); } @@ -227,8 +232,9 @@ TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteNotPhishing) { EXPECT_CALL(*csd_service_, SendClientReportPhishingRequest(Pointee(EqualsProto(verdict)), _)) - .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb))); + .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb), QuitUIMessageLoop())); OnDetectedPhishingSite(verdict.SerializeAsString()); + MessageLoop::current()->Run(); EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); ASSERT_TRUE(cb); @@ -251,8 +257,9 @@ TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteDisabled) { EXPECT_CALL(*csd_service_, SendClientReportPhishingRequest(Pointee(EqualsProto(verdict)), _)) - .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb))); + .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb), QuitUIMessageLoop())); OnDetectedPhishingSite(verdict.SerializeAsString()); + MessageLoop::current()->Run(); EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); ASSERT_TRUE(cb); @@ -276,8 +283,9 @@ TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteShowInterstitial) { EXPECT_CALL(*csd_service_, SendClientReportPhishingRequest(Pointee(EqualsProto(verdict)), _)) - .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb))); + .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb), QuitUIMessageLoop())); OnDetectedPhishingSite(verdict.SerializeAsString()); + MessageLoop::current()->Run(); EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); ASSERT_TRUE(cb); @@ -328,8 +336,9 @@ TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteMultiplePings) { EXPECT_CALL(*csd_service_, SendClientReportPhishingRequest(Pointee(EqualsProto(verdict)), _)) - .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb))); + .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb), QuitUIMessageLoop())); OnDetectedPhishingSite(verdict.SerializeAsString()); + MessageLoop::current()->Run(); EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); ASSERT_TRUE(cb); GURL other_phishing_url("http://other_phishing_url.com/bla"); @@ -345,8 +354,11 @@ TEST_F(ClientSideDetectionHostTest, OnDetectedPhishingSiteMultiplePings) { verdict.set_client_score(0.8f); EXPECT_CALL(*csd_service_, SendClientReportPhishingRequest(Pointee(EqualsProto(verdict)), _)) - .WillOnce(DoAll(DeleteArg<0>(), SaveArg<1>(&cb_other))); + .WillOnce(DoAll(DeleteArg<0>(), + SaveArg<1>(&cb_other), + QuitUIMessageLoop())); OnDetectedPhishingSite(verdict.SerializeAsString()); + MessageLoop::current()->Run(); EXPECT_TRUE(Mock::VerifyAndClear(csd_service_.get())); ASSERT_TRUE(cb_other); |