diff options
author | noelutz@google.com <noelutz@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-14 22:14:13 +0000 |
---|---|---|
committer | noelutz@google.com <noelutz@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-14 22:14:13 +0000 |
commit | 1331397732bfd1a2ed9d63d92bbc2a61185bd666 (patch) | |
tree | 8f1f3367488e6749911a4db78f8ee5e67be91b6e /chrome/renderer/safe_browsing | |
parent | 28b791a577a84f316d78b37d3bea97a119215a0f (diff) | |
download | chromium_src-1331397732bfd1a2ed9d63d92bbc2a61185bd666.zip chromium_src-1331397732bfd1a2ed9d63d92bbc2a61185bd666.tar.gz chromium_src-1331397732bfd1a2ed9d63d92bbc2a61185bd666.tar.bz2 |
Change the client-side phishing detection request format. Instead of
just sending the URL with the computed score we also send back all the
extracted features.
BUG=NONE
TEST=ClientSideDetectionHostTest,ClientSideDetectionServiceTest,PhishingClassifierTest,PhishingClassifierDelegateTest
Review URL: http://codereview.chromium.org/6810020
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81657 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer/safe_browsing')
8 files changed, 128 insertions, 69 deletions
diff --git a/chrome/renderer/safe_browsing/malware_dom_details.cc b/chrome/renderer/safe_browsing/malware_dom_details.cc index 6a03295..80be708 100644 --- a/chrome/renderer/safe_browsing/malware_dom_details.cc +++ b/chrome/renderer/safe_browsing/malware_dom_details.cc @@ -6,7 +6,7 @@ #include "base/compiler_specific.h" #include "chrome/common/chrome_constants.h" -#include "chrome/common/safebrowsing_messages.h" +#include "chrome/common/safe_browsing/safebrowsing_messages.h" #include "content/renderer/render_view.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" diff --git a/chrome/renderer/safe_browsing/malware_dom_details_browsertest.cc b/chrome/renderer/safe_browsing/malware_dom_details_browsertest.cc index dc7d6ef..01e93f3 100644 --- a/chrome/renderer/safe_browsing/malware_dom_details_browsertest.cc +++ b/chrome/renderer/safe_browsing/malware_dom_details_browsertest.cc @@ -3,7 +3,7 @@ // found in the LICENSE file. #include "base/stringprintf.h" -#include "chrome/common/safebrowsing_messages.h" +#include "chrome/common/safe_browsing/safebrowsing_messages.h" #include "chrome/renderer/safe_browsing/malware_dom_details.h" #include "chrome/test/render_view_test.h" #include "net/base/escape.h" diff --git a/chrome/renderer/safe_browsing/phishing_classifier.cc b/chrome/renderer/safe_browsing/phishing_classifier.cc index ad34053..997d81a 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier.cc @@ -10,7 +10,7 @@ #include "base/compiler_specific.h" #include "base/logging.h" #include "base/string_util.h" -#include "crypto/sha2.h" +#include "chrome/common/safe_browsing/csd.pb.h" #include "chrome/common/url_constants.h" #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/features.h" @@ -19,6 +19,7 @@ #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" #include "chrome/renderer/safe_browsing/scorer.h" #include "content/renderer/render_view.h" +#include "crypto/sha2.h" #include "googleurl/src/gurl.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebDataSource.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" @@ -28,8 +29,8 @@ namespace safe_browsing { -const double PhishingClassifier::kInvalidScore = -1.0; -const double PhishingClassifier::kPhishyThreshold = 0.5; +const float PhishingClassifier::kInvalidScore = -1.0; +const float PhishingClassifier::kPhishyThreshold = 0.5; PhishingClassifier::PhishingClassifier(RenderView* render_view, FeatureExtractorClock* clock) @@ -155,6 +156,8 @@ void PhishingClassifier::TermExtractionFinished(bool success) { // Hash all of the features so that they match the model, then compute // the score. FeatureMap hashed_features; + ClientPhishingRequest verdict; + verdict.set_url(render_view_->webview()->mainFrame()->url().spec()); for (base::hash_map<std::string, double>::const_iterator it = features_->features().begin(); it != features_->features().end(); ++it) { @@ -162,10 +165,14 @@ void PhishingClassifier::TermExtractionFinished(bool success) { bool result = hashed_features.AddRealFeature( crypto::SHA256HashString(it->first), it->second); DCHECK(result); + ClientPhishingRequest::Feature* feature = verdict.add_feature_map(); + feature->set_name(it->first); + feature->set_value(it->second); } - - double score = scorer_->ComputeScore(hashed_features); - RunCallback(score >= kPhishyThreshold, score); + float score = static_cast<float>(scorer_->ComputeScore(hashed_features)); + verdict.set_client_score(score); + verdict.set_is_phishing(score >= kPhishyThreshold); + RunCallback(verdict); } else { RunFailureCallback(); } @@ -180,13 +187,19 @@ void PhishingClassifier::CheckNoPendingClassification() { } } -void PhishingClassifier::RunCallback(bool phishy, double phishy_score) { - done_callback_->Run(phishy, phishy_score); +void PhishingClassifier::RunCallback(const ClientPhishingRequest& verdict) { + done_callback_->Run(verdict); Clear(); } void PhishingClassifier::RunFailureCallback() { - RunCallback(false /* not phishy */, kInvalidScore); + ClientPhishingRequest verdict; + // In this case we're not guaranteed to have a valid URL. Just set it + // to the empty string to make sure we have a valid protocol buffer. + verdict.set_url(""); + verdict.set_client_score(kInvalidScore); + verdict.set_is_phishing(false); + RunCallback(verdict); } void PhishingClassifier::Clear() { diff --git a/chrome/renderer/safe_browsing/phishing_classifier.h b/chrome/renderer/safe_browsing/phishing_classifier.h index 32168de..8aeda98 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier.h +++ b/chrome/renderer/safe_browsing/phishing_classifier.h @@ -27,6 +27,7 @@ class RenderView; namespace safe_browsing { +class ClientPhishingRequest; class FeatureExtractorClock; class FeatureMap; class PhishingDOMFeatureExtractor; @@ -36,15 +37,16 @@ class Scorer; class PhishingClassifier { public: - // Callback to be run when phishing classification finishes. If the first - // argument is true, the page is considered phishy by the client-side model, - // and the browser should ping back to get a final verdict. The second - // argument gives the phishyness score which is used in the pingback, - // or kInvalidScore if classification failed. - typedef Callback2<bool /* phishy */, double /* phishy_score */>::Type - DoneCallback; + // Callback to be run when phishing classification finishes. The verdict + // is a ClientPhishingRequest which contains the verdict computed by the + // classifier as well as the extracted features. If the verdict.is_phishing() + // is true, the page is considered phishy by the client-side model, + // and the browser should ping back to get a final verdict. The + // verdict.client_score() is set to kInvalidScore if classification failed. + typedef Callback1<const ClientPhishingRequest& /* verdict */>::Type + DoneCallback; - static const double kInvalidScore; + static const float kInvalidScore; // Creates a new PhishingClassifier object that will operate on // |render_view|. |clock| is used to time feature extractor operations, and @@ -87,7 +89,7 @@ class PhishingClassifier { private: // Any score equal to or above this value is considered phishy. - static const double kPhishyThreshold; + static const float kPhishyThreshold; // Begins the feature extraction process, by extracting URL features and // beginning DOM feature extraction. @@ -110,7 +112,7 @@ class PhishingClassifier { void CheckNoPendingClassification(); // Helper method to run the DoneCallback and clear the state. - void RunCallback(bool phishy, double phishy_score); + void RunCallback(const ClientPhishingRequest& verdict); // Helper to run the DoneCallback when feature extraction has failed. // This always signals a non-phishy verdict for the page, with kInvalidScore. diff --git a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc index b3eb58e..9c6c60e 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc @@ -14,18 +14,30 @@ #include "base/memory/scoped_ptr.h" #include "base/string16.h" #include "base/utf_string_conversions.h" -#include "crypto/sha2.h" +#include "chrome/common/safe_browsing/csd.pb.h" #include "chrome/renderer/safe_browsing/client_model.pb.h" #include "chrome/renderer/safe_browsing/features.h" #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/render_view_fake_resources_test.h" #include "chrome/renderer/safe_browsing/scorer.h" +#include "crypto/sha2.h" #include "testing/gmock/include/gmock/gmock.h" +using ::testing::AllOf; +using ::testing::Contains; +using ::testing::Not; +using ::testing::Pair; + namespace safe_browsing { class PhishingClassifierTest : public RenderViewFakeResourcesTest { protected: + PhishingClassifierTest() + : url_tld_token_net_(features::kUrlTldToken + std::string("net")), + page_link_domain_phishing_(features::kPageLinkDomain + + std::string("phishing.com")), + page_term_login_(features::kPageTerm + std::string("login")) {} + virtual void SetUp() { // Set up WebKit and the RenderView. RenderViewFakeResourcesTest::SetUp(); @@ -33,6 +45,11 @@ class PhishingClassifierTest : public RenderViewFakeResourcesTest { // Construct a model to test with. We include one feature from each of // the feature extractors, which allows us to verify that they all ran. ClientSideModel model; + + model.add_hashes(crypto::SHA256HashString(url_tld_token_net_)); + model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_)); + model.add_hashes(crypto::SHA256HashString(page_term_login_)); + model.add_hashes(crypto::SHA256HashString("login")); model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken + std::string("net"))); model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain + @@ -69,25 +86,31 @@ class PhishingClassifierTest : public RenderViewFakeResourcesTest { } // Helper method to start phishing classification and wait for it to - // complete. Returns the success value from the PhishingClassifier's - // DoneCallback, and fills in phishy_score with the score. - bool RunPhishingClassifier(const string16* page_text, double* phishy_score) { - success_ = false; + // complete. Returns the true if the page is classified as phishy and + // false otherwise. + bool RunPhishingClassifier(const string16* page_text, + float* phishy_score, + FeatureMap* features) { + verdict_.Clear(); *phishy_score = PhishingClassifier::kInvalidScore; + features->Clear(); classifier_->BeginClassification( page_text, NewCallback(this, &PhishingClassifierTest::ClassificationFinished)); message_loop_.Run(); - *phishy_score = phishy_score_; - return success_; + *phishy_score = verdict_.client_score(); + for (int i = 0; i < verdict_.feature_map_size(); ++i) { + features->AddRealFeature(verdict_.feature_map(i).name(), + verdict_.feature_map(i).value()); + } + return verdict_.is_phishing(); } // Completion callback for classification. - void ClassificationFinished(bool success, double phishy_score) { - success_ = success; - phishy_score_ = phishy_score; + void ClassificationFinished(const ClientPhishingRequest& verdict) { + verdict_ = verdict; // copy the verdict. message_loop_.Quit(); } @@ -95,10 +118,14 @@ class PhishingClassifierTest : public RenderViewFakeResourcesTest { scoped_ptr<PhishingClassifier> classifier_; MockFeatureExtractorClock* clock_; // owned by classifier_ - // These members hold the status from the most recent call to the + // Features that are in the model. + const std::string url_tld_token_net_; + const std::string page_link_domain_phishing_; + const std::string page_term_login_; + + // This member holds the status from the most recent call to the // ClassificationFinished callback. - bool success_; - double phishy_score_; + ClientPhishingRequest verdict_; }; TEST_F(PhishingClassifierTest, TestClassification) { @@ -118,34 +145,49 @@ TEST_F(PhishingClassifierTest, TestClassification) { LoadURL("http://host.net/"); string16 page_text = ASCIIToUTF16("login"); - double phishy_score; - EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score)); - EXPECT_DOUBLE_EQ(0.5, phishy_score); + float phishy_score; + FeatureMap features; + EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features)); + // Note: features.features() might contain other features that simply aren't + // in the model. + EXPECT_THAT(features.features(), + AllOf(Contains(Pair(url_tld_token_net_, 1.0)), + Contains(Pair(page_link_domain_phishing_, 1.0)), + Contains(Pair(page_term_login_, 1.0)))); + EXPECT_FLOAT_EQ(0.5, phishy_score); // Change the link domain to something non-phishy. responses_["http://host.net/"] = "<html><body><a href=\"http://safe.com/\">login</a></body></html>"; LoadURL("http://host.net/"); - EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score)); + EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); + EXPECT_THAT(features.features(), + AllOf(Contains(Pair(url_tld_token_net_, 1.0)), + Contains(Pair(page_term_login_, 1.0)))); + EXPECT_THAT(features.features(), + Not(Contains(Pair(page_link_domain_phishing_, 1.0)))); EXPECT_GE(phishy_score, 0.0); EXPECT_LT(phishy_score, 0.5); // Extraction should fail for this case, since there is no TLD. responses_["http://localhost/"] = "<html><body>content</body></html>"; LoadURL("http://localhost/"); - EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score)); + EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); + EXPECT_EQ(0U, features.features().size()); EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); // Extraction should also fail for this case, because the URL is not http. responses_["https://host.net/"] = "<html><body>secure</body></html>"; LoadURL("https://host.net/"); - EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score)); + EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); + EXPECT_EQ(0U, features.features().size()); EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); // Extraction should fail for this case because the URL is a POST request. LoadURLWithPost("http://host.net/"); - EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score)); + EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); + EXPECT_EQ(0U, features.features().size()); EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); } diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc index 358507a..efd5d8a 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc @@ -11,7 +11,8 @@ #include "base/logging.h" #include "base/metrics/histogram.h" #include "base/memory/scoped_callback_factory.h" -#include "chrome/common/safebrowsing_messages.h" +#include "chrome/common/safe_browsing/csd.pb.h" +#include "chrome/common/safe_browsing/safebrowsing_messages.h" #include "chrome/renderer/render_thread.h" #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/phishing_classifier.h" @@ -177,19 +178,18 @@ bool PhishingClassifierDelegate::OnMessageReceived( return handled; } -void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, - double phishy_score) { +void PhishingClassifierDelegate::ClassificationDone( + const ClientPhishingRequest& verdict) { // We no longer need the page text. classifier_page_text_.clear(); - VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; - if (!is_phishy) { + VLOG(2) << "Phishy verdict = " << verdict.is_phishing() + << " score = " << verdict.client_score(); + if (!verdict.is_phishing()) { return; } - + DCHECK(last_url_sent_to_classifier_.spec() == verdict.url()); Send(new SafeBrowsingHostMsg_DetectedPhishingSite( - routing_id(), - last_url_sent_to_classifier_, - phishy_score)); + routing_id(), verdict.SerializeAsString())); } GURL PhishingClassifierDelegate::GetToplevelUrl() { diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h index a07cc8b..e6eb0f2 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h @@ -16,6 +16,7 @@ #include "ipc/ipc_platform_file.h" namespace safe_browsing { +class ClientPhishingRequest; class PhishingClassifier; class Scorer; @@ -72,7 +73,7 @@ class PhishingClassifierDelegate : public RenderViewObserver { void OnStartPhishingDetection(const GURL& url); // Called when classification for the current page finishes. - void ClassificationDone(bool is_phishy, double phishy_score); + void ClassificationDone(const ClientPhishingRequest& verdict); // Returns the RenderView's toplevel URL. GURL GetToplevelUrl(); diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc index 90578a4..f422309 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc @@ -9,7 +9,8 @@ #include "base/memory/scoped_ptr.h" #include "base/utf_string_conversions.h" -#include "chrome/common/safebrowsing_messages.h" +#include "chrome/common/safe_browsing/csd.pb.h" +#include "chrome/common/safe_browsing/safebrowsing_messages.h" #include "chrome/renderer/safe_browsing/features.h" #include "chrome/renderer/safe_browsing/phishing_classifier.h" #include "chrome/renderer/safe_browsing/render_view_fake_resources_test.h" @@ -69,23 +70,22 @@ class PhishingClassifierDelegateTest : public RenderViewFakeResourcesTest { return handled; } - void OnDetectedPhishingSite(GURL phishing_url, double phishing_score) { - detected_phishing_site_ = true; - detected_url_ = phishing_url; - detected_score_ = phishing_score; + void OnDetectedPhishingSite(const std::string& verdict_str) { + scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest); + if (verdict->ParseFromString(verdict_str) && + verdict->IsInitialized()) { + verdict_.swap(verdict); + } message_loop_.Quit(); } // Runs the ClassificationDone callback, then waits for the // DetectedPhishingSite IPC to arrive. void RunClassificationDone(PhishingClassifierDelegate* delegate, - bool is_phishy, double phishy_score) { + const ClientPhishingRequest& verdict) { // Clear out any previous state. - detected_phishing_site_ = false; - detected_url_ = GURL(); - detected_score_ = -1.0; - - delegate->ClassificationDone(is_phishy, phishy_score); + verdict_.reset(); + delegate->ClassificationDone(verdict); message_loop_.Run(); } @@ -94,9 +94,7 @@ class PhishingClassifierDelegateTest : public RenderViewFakeResourcesTest { delegate->OnStartPhishingDetection(url); } - bool detected_phishing_site_; - GURL detected_url_; - double detected_score_; + scoped_ptr<ClientPhishingRequest> verdict_; }; TEST_F(PhishingClassifierDelegateTest, Navigation) { @@ -340,10 +338,13 @@ TEST_F(PhishingClassifierDelegateTest, DetectedPhishingSite) { Mock::VerifyAndClearExpectations(classifier); // Now run the callback to simulate the classifier finishing. - RunClassificationDone(delegate, true, 0.8); - EXPECT_TRUE(detected_phishing_site_); - EXPECT_EQ(GURL("http://host.com/#a"), detected_url_); - EXPECT_EQ(0.8, detected_score_); + ClientPhishingRequest verdict; + verdict.set_url("http://host.com/#a"); + verdict.set_client_score(0.8f); + verdict.set_is_phishing(true); + RunClassificationDone(delegate, verdict); + ASSERT_TRUE(verdict_.get()); + EXPECT_EQ(verdict.SerializeAsString(), verdict_->SerializeAsString()); // The delegate will cancel pending classification on destruction. EXPECT_CALL(*classifier, CancelPendingClassification()); |