summaryrefslogtreecommitdiffstats
path: root/chrome/renderer/safe_browsing
diff options
context:
space:
mode:
authornoelutz@google.com <noelutz@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-14 22:14:13 +0000
committernoelutz@google.com <noelutz@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-14 22:14:13 +0000
commit1331397732bfd1a2ed9d63d92bbc2a61185bd666 (patch)
tree8f1f3367488e6749911a4db78f8ee5e67be91b6e /chrome/renderer/safe_browsing
parent28b791a577a84f316d78b37d3bea97a119215a0f (diff)
downloadchromium_src-1331397732bfd1a2ed9d63d92bbc2a61185bd666.zip
chromium_src-1331397732bfd1a2ed9d63d92bbc2a61185bd666.tar.gz
chromium_src-1331397732bfd1a2ed9d63d92bbc2a61185bd666.tar.bz2
Change the client-side phishing detection request format. Instead of
just sending the URL with the computed score we also send back all the extracted features. BUG=NONE TEST=ClientSideDetectionHostTest,ClientSideDetectionServiceTest,PhishingClassifierTest,PhishingClassifierDelegateTest Review URL: http://codereview.chromium.org/6810020 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81657 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer/safe_browsing')
-rw-r--r--chrome/renderer/safe_browsing/malware_dom_details.cc2
-rw-r--r--chrome/renderer/safe_browsing/malware_dom_details_browsertest.cc2
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier.cc31
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier.h22
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc82
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier_delegate.cc18
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier_delegate.h3
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc37
8 files changed, 128 insertions, 69 deletions
diff --git a/chrome/renderer/safe_browsing/malware_dom_details.cc b/chrome/renderer/safe_browsing/malware_dom_details.cc
index 6a03295..80be708 100644
--- a/chrome/renderer/safe_browsing/malware_dom_details.cc
+++ b/chrome/renderer/safe_browsing/malware_dom_details.cc
@@ -6,7 +6,7 @@
#include "base/compiler_specific.h"
#include "chrome/common/chrome_constants.h"
-#include "chrome/common/safebrowsing_messages.h"
+#include "chrome/common/safe_browsing/safebrowsing_messages.h"
#include "content/renderer/render_view.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h"
diff --git a/chrome/renderer/safe_browsing/malware_dom_details_browsertest.cc b/chrome/renderer/safe_browsing/malware_dom_details_browsertest.cc
index dc7d6ef..01e93f3 100644
--- a/chrome/renderer/safe_browsing/malware_dom_details_browsertest.cc
+++ b/chrome/renderer/safe_browsing/malware_dom_details_browsertest.cc
@@ -3,7 +3,7 @@
// found in the LICENSE file.
#include "base/stringprintf.h"
-#include "chrome/common/safebrowsing_messages.h"
+#include "chrome/common/safe_browsing/safebrowsing_messages.h"
#include "chrome/renderer/safe_browsing/malware_dom_details.h"
#include "chrome/test/render_view_test.h"
#include "net/base/escape.h"
diff --git a/chrome/renderer/safe_browsing/phishing_classifier.cc b/chrome/renderer/safe_browsing/phishing_classifier.cc
index ad34053..997d81a 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier.cc
@@ -10,7 +10,7 @@
#include "base/compiler_specific.h"
#include "base/logging.h"
#include "base/string_util.h"
-#include "crypto/sha2.h"
+#include "chrome/common/safe_browsing/csd.pb.h"
#include "chrome/common/url_constants.h"
#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/features.h"
@@ -19,6 +19,7 @@
#include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h"
#include "chrome/renderer/safe_browsing/scorer.h"
#include "content/renderer/render_view.h"
+#include "crypto/sha2.h"
#include "googleurl/src/gurl.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebDataSource.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
@@ -28,8 +29,8 @@
namespace safe_browsing {
-const double PhishingClassifier::kInvalidScore = -1.0;
-const double PhishingClassifier::kPhishyThreshold = 0.5;
+const float PhishingClassifier::kInvalidScore = -1.0;
+const float PhishingClassifier::kPhishyThreshold = 0.5;
PhishingClassifier::PhishingClassifier(RenderView* render_view,
FeatureExtractorClock* clock)
@@ -155,6 +156,8 @@ void PhishingClassifier::TermExtractionFinished(bool success) {
// Hash all of the features so that they match the model, then compute
// the score.
FeatureMap hashed_features;
+ ClientPhishingRequest verdict;
+ verdict.set_url(render_view_->webview()->mainFrame()->url().spec());
for (base::hash_map<std::string, double>::const_iterator it =
features_->features().begin();
it != features_->features().end(); ++it) {
@@ -162,10 +165,14 @@ void PhishingClassifier::TermExtractionFinished(bool success) {
bool result = hashed_features.AddRealFeature(
crypto::SHA256HashString(it->first), it->second);
DCHECK(result);
+ ClientPhishingRequest::Feature* feature = verdict.add_feature_map();
+ feature->set_name(it->first);
+ feature->set_value(it->second);
}
-
- double score = scorer_->ComputeScore(hashed_features);
- RunCallback(score >= kPhishyThreshold, score);
+ float score = static_cast<float>(scorer_->ComputeScore(hashed_features));
+ verdict.set_client_score(score);
+ verdict.set_is_phishing(score >= kPhishyThreshold);
+ RunCallback(verdict);
} else {
RunFailureCallback();
}
@@ -180,13 +187,19 @@ void PhishingClassifier::CheckNoPendingClassification() {
}
}
-void PhishingClassifier::RunCallback(bool phishy, double phishy_score) {
- done_callback_->Run(phishy, phishy_score);
+void PhishingClassifier::RunCallback(const ClientPhishingRequest& verdict) {
+ done_callback_->Run(verdict);
Clear();
}
void PhishingClassifier::RunFailureCallback() {
- RunCallback(false /* not phishy */, kInvalidScore);
+ ClientPhishingRequest verdict;
+ // In this case we're not guaranteed to have a valid URL. Just set it
+ // to the empty string to make sure we have a valid protocol buffer.
+ verdict.set_url("");
+ verdict.set_client_score(kInvalidScore);
+ verdict.set_is_phishing(false);
+ RunCallback(verdict);
}
void PhishingClassifier::Clear() {
diff --git a/chrome/renderer/safe_browsing/phishing_classifier.h b/chrome/renderer/safe_browsing/phishing_classifier.h
index 32168de..8aeda98 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier.h
+++ b/chrome/renderer/safe_browsing/phishing_classifier.h
@@ -27,6 +27,7 @@
class RenderView;
namespace safe_browsing {
+class ClientPhishingRequest;
class FeatureExtractorClock;
class FeatureMap;
class PhishingDOMFeatureExtractor;
@@ -36,15 +37,16 @@ class Scorer;
class PhishingClassifier {
public:
- // Callback to be run when phishing classification finishes. If the first
- // argument is true, the page is considered phishy by the client-side model,
- // and the browser should ping back to get a final verdict. The second
- // argument gives the phishyness score which is used in the pingback,
- // or kInvalidScore if classification failed.
- typedef Callback2<bool /* phishy */, double /* phishy_score */>::Type
- DoneCallback;
+ // Callback to be run when phishing classification finishes. The verdict
+ // is a ClientPhishingRequest which contains the verdict computed by the
+ // classifier as well as the extracted features. If the verdict.is_phishing()
+ // is true, the page is considered phishy by the client-side model,
+ // and the browser should ping back to get a final verdict. The
+ // verdict.client_score() is set to kInvalidScore if classification failed.
+ typedef Callback1<const ClientPhishingRequest& /* verdict */>::Type
+ DoneCallback;
- static const double kInvalidScore;
+ static const float kInvalidScore;
// Creates a new PhishingClassifier object that will operate on
// |render_view|. |clock| is used to time feature extractor operations, and
@@ -87,7 +89,7 @@ class PhishingClassifier {
private:
// Any score equal to or above this value is considered phishy.
- static const double kPhishyThreshold;
+ static const float kPhishyThreshold;
// Begins the feature extraction process, by extracting URL features and
// beginning DOM feature extraction.
@@ -110,7 +112,7 @@ class PhishingClassifier {
void CheckNoPendingClassification();
// Helper method to run the DoneCallback and clear the state.
- void RunCallback(bool phishy, double phishy_score);
+ void RunCallback(const ClientPhishingRequest& verdict);
// Helper to run the DoneCallback when feature extraction has failed.
// This always signals a non-phishy verdict for the page, with kInvalidScore.
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
index b3eb58e..9c6c60e 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
@@ -14,18 +14,30 @@
#include "base/memory/scoped_ptr.h"
#include "base/string16.h"
#include "base/utf_string_conversions.h"
-#include "crypto/sha2.h"
+#include "chrome/common/safe_browsing/csd.pb.h"
#include "chrome/renderer/safe_browsing/client_model.pb.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/render_view_fake_resources_test.h"
#include "chrome/renderer/safe_browsing/scorer.h"
+#include "crypto/sha2.h"
#include "testing/gmock/include/gmock/gmock.h"
+using ::testing::AllOf;
+using ::testing::Contains;
+using ::testing::Not;
+using ::testing::Pair;
+
namespace safe_browsing {
class PhishingClassifierTest : public RenderViewFakeResourcesTest {
protected:
+ PhishingClassifierTest()
+ : url_tld_token_net_(features::kUrlTldToken + std::string("net")),
+ page_link_domain_phishing_(features::kPageLinkDomain +
+ std::string("phishing.com")),
+ page_term_login_(features::kPageTerm + std::string("login")) {}
+
virtual void SetUp() {
// Set up WebKit and the RenderView.
RenderViewFakeResourcesTest::SetUp();
@@ -33,6 +45,11 @@ class PhishingClassifierTest : public RenderViewFakeResourcesTest {
// Construct a model to test with. We include one feature from each of
// the feature extractors, which allows us to verify that they all ran.
ClientSideModel model;
+
+ model.add_hashes(crypto::SHA256HashString(url_tld_token_net_));
+ model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_));
+ model.add_hashes(crypto::SHA256HashString(page_term_login_));
+ model.add_hashes(crypto::SHA256HashString("login"));
model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken +
std::string("net")));
model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain +
@@ -69,25 +86,31 @@ class PhishingClassifierTest : public RenderViewFakeResourcesTest {
}
// Helper method to start phishing classification and wait for it to
- // complete. Returns the success value from the PhishingClassifier's
- // DoneCallback, and fills in phishy_score with the score.
- bool RunPhishingClassifier(const string16* page_text, double* phishy_score) {
- success_ = false;
+ // complete. Returns the true if the page is classified as phishy and
+ // false otherwise.
+ bool RunPhishingClassifier(const string16* page_text,
+ float* phishy_score,
+ FeatureMap* features) {
+ verdict_.Clear();
*phishy_score = PhishingClassifier::kInvalidScore;
+ features->Clear();
classifier_->BeginClassification(
page_text,
NewCallback(this, &PhishingClassifierTest::ClassificationFinished));
message_loop_.Run();
- *phishy_score = phishy_score_;
- return success_;
+ *phishy_score = verdict_.client_score();
+ for (int i = 0; i < verdict_.feature_map_size(); ++i) {
+ features->AddRealFeature(verdict_.feature_map(i).name(),
+ verdict_.feature_map(i).value());
+ }
+ return verdict_.is_phishing();
}
// Completion callback for classification.
- void ClassificationFinished(bool success, double phishy_score) {
- success_ = success;
- phishy_score_ = phishy_score;
+ void ClassificationFinished(const ClientPhishingRequest& verdict) {
+ verdict_ = verdict; // copy the verdict.
message_loop_.Quit();
}
@@ -95,10 +118,14 @@ class PhishingClassifierTest : public RenderViewFakeResourcesTest {
scoped_ptr<PhishingClassifier> classifier_;
MockFeatureExtractorClock* clock_; // owned by classifier_
- // These members hold the status from the most recent call to the
+ // Features that are in the model.
+ const std::string url_tld_token_net_;
+ const std::string page_link_domain_phishing_;
+ const std::string page_term_login_;
+
+ // This member holds the status from the most recent call to the
// ClassificationFinished callback.
- bool success_;
- double phishy_score_;
+ ClientPhishingRequest verdict_;
};
TEST_F(PhishingClassifierTest, TestClassification) {
@@ -118,34 +145,49 @@ TEST_F(PhishingClassifierTest, TestClassification) {
LoadURL("http://host.net/");
string16 page_text = ASCIIToUTF16("login");
- double phishy_score;
- EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score));
- EXPECT_DOUBLE_EQ(0.5, phishy_score);
+ float phishy_score;
+ FeatureMap features;
+ EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features));
+ // Note: features.features() might contain other features that simply aren't
+ // in the model.
+ EXPECT_THAT(features.features(),
+ AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
+ Contains(Pair(page_link_domain_phishing_, 1.0)),
+ Contains(Pair(page_term_login_, 1.0))));
+ EXPECT_FLOAT_EQ(0.5, phishy_score);
// Change the link domain to something non-phishy.
responses_["http://host.net/"] =
"<html><body><a href=\"http://safe.com/\">login</a></body></html>";
LoadURL("http://host.net/");
- EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score));
+ EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
+ EXPECT_THAT(features.features(),
+ AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
+ Contains(Pair(page_term_login_, 1.0))));
+ EXPECT_THAT(features.features(),
+ Not(Contains(Pair(page_link_domain_phishing_, 1.0))));
EXPECT_GE(phishy_score, 0.0);
EXPECT_LT(phishy_score, 0.5);
// Extraction should fail for this case, since there is no TLD.
responses_["http://localhost/"] = "<html><body>content</body></html>";
LoadURL("http://localhost/");
- EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score));
+ EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
+ EXPECT_EQ(0U, features.features().size());
EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
// Extraction should also fail for this case, because the URL is not http.
responses_["https://host.net/"] = "<html><body>secure</body></html>";
LoadURL("https://host.net/");
- EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score));
+ EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
+ EXPECT_EQ(0U, features.features().size());
EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
// Extraction should fail for this case because the URL is a POST request.
LoadURLWithPost("http://host.net/");
- EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score));
+ EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
+ EXPECT_EQ(0U, features.features().size());
EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
}
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
index 358507a..efd5d8a 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
@@ -11,7 +11,8 @@
#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "base/memory/scoped_callback_factory.h"
-#include "chrome/common/safebrowsing_messages.h"
+#include "chrome/common/safe_browsing/csd.pb.h"
+#include "chrome/common/safe_browsing/safebrowsing_messages.h"
#include "chrome/renderer/render_thread.h"
#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/phishing_classifier.h"
@@ -177,19 +178,18 @@ bool PhishingClassifierDelegate::OnMessageReceived(
return handled;
}
-void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,
- double phishy_score) {
+void PhishingClassifierDelegate::ClassificationDone(
+ const ClientPhishingRequest& verdict) {
// We no longer need the page text.
classifier_page_text_.clear();
- VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;
- if (!is_phishy) {
+ VLOG(2) << "Phishy verdict = " << verdict.is_phishing()
+ << " score = " << verdict.client_score();
+ if (!verdict.is_phishing()) {
return;
}
-
+ DCHECK(last_url_sent_to_classifier_.spec() == verdict.url());
Send(new SafeBrowsingHostMsg_DetectedPhishingSite(
- routing_id(),
- last_url_sent_to_classifier_,
- phishy_score));
+ routing_id(), verdict.SerializeAsString()));
}
GURL PhishingClassifierDelegate::GetToplevelUrl() {
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
index a07cc8b..e6eb0f2 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
@@ -16,6 +16,7 @@
#include "ipc/ipc_platform_file.h"
namespace safe_browsing {
+class ClientPhishingRequest;
class PhishingClassifier;
class Scorer;
@@ -72,7 +73,7 @@ class PhishingClassifierDelegate : public RenderViewObserver {
void OnStartPhishingDetection(const GURL& url);
// Called when classification for the current page finishes.
- void ClassificationDone(bool is_phishy, double phishy_score);
+ void ClassificationDone(const ClientPhishingRequest& verdict);
// Returns the RenderView's toplevel URL.
GURL GetToplevelUrl();
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
index 90578a4..f422309 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
@@ -9,7 +9,8 @@
#include "base/memory/scoped_ptr.h"
#include "base/utf_string_conversions.h"
-#include "chrome/common/safebrowsing_messages.h"
+#include "chrome/common/safe_browsing/csd.pb.h"
+#include "chrome/common/safe_browsing/safebrowsing_messages.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/renderer/safe_browsing/phishing_classifier.h"
#include "chrome/renderer/safe_browsing/render_view_fake_resources_test.h"
@@ -69,23 +70,22 @@ class PhishingClassifierDelegateTest : public RenderViewFakeResourcesTest {
return handled;
}
- void OnDetectedPhishingSite(GURL phishing_url, double phishing_score) {
- detected_phishing_site_ = true;
- detected_url_ = phishing_url;
- detected_score_ = phishing_score;
+ void OnDetectedPhishingSite(const std::string& verdict_str) {
+ scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
+ if (verdict->ParseFromString(verdict_str) &&
+ verdict->IsInitialized()) {
+ verdict_.swap(verdict);
+ }
message_loop_.Quit();
}
// Runs the ClassificationDone callback, then waits for the
// DetectedPhishingSite IPC to arrive.
void RunClassificationDone(PhishingClassifierDelegate* delegate,
- bool is_phishy, double phishy_score) {
+ const ClientPhishingRequest& verdict) {
// Clear out any previous state.
- detected_phishing_site_ = false;
- detected_url_ = GURL();
- detected_score_ = -1.0;
-
- delegate->ClassificationDone(is_phishy, phishy_score);
+ verdict_.reset();
+ delegate->ClassificationDone(verdict);
message_loop_.Run();
}
@@ -94,9 +94,7 @@ class PhishingClassifierDelegateTest : public RenderViewFakeResourcesTest {
delegate->OnStartPhishingDetection(url);
}
- bool detected_phishing_site_;
- GURL detected_url_;
- double detected_score_;
+ scoped_ptr<ClientPhishingRequest> verdict_;
};
TEST_F(PhishingClassifierDelegateTest, Navigation) {
@@ -340,10 +338,13 @@ TEST_F(PhishingClassifierDelegateTest, DetectedPhishingSite) {
Mock::VerifyAndClearExpectations(classifier);
// Now run the callback to simulate the classifier finishing.
- RunClassificationDone(delegate, true, 0.8);
- EXPECT_TRUE(detected_phishing_site_);
- EXPECT_EQ(GURL("http://host.com/#a"), detected_url_);
- EXPECT_EQ(0.8, detected_score_);
+ ClientPhishingRequest verdict;
+ verdict.set_url("http://host.com/#a");
+ verdict.set_client_score(0.8f);
+ verdict.set_is_phishing(true);
+ RunClassificationDone(delegate, verdict);
+ ASSERT_TRUE(verdict_.get());
+ EXPECT_EQ(verdict.SerializeAsString(), verdict_->SerializeAsString());
// The delegate will cancel pending classification on destruction.
EXPECT_CALL(*classifier, CancelPendingClassification());