diff options
author | bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-07 23:46:43 +0000 |
---|---|---|
committer | bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-07 23:46:43 +0000 |
commit | c90aeec875b69a034e64240b3d2892302a4fdc59 (patch) | |
tree | 5476f7a7cc230df41b32a2b5bd5a61324f6e0598 | |
parent | 70e982ad39c6aae31a71d02bf5c6e848d538c576 (diff) | |
download | chromium_src-c90aeec875b69a034e64240b3d2892302a4fdc59.zip chromium_src-c90aeec875b69a034e64240b3d2892302a4fdc59.tar.gz chromium_src-c90aeec875b69a034e64240b3d2892302a4fdc59.tar.bz2 |
Send the ref as part of the client-side phishing detection pingback.
This will help determine whether differing scores on the same URL are related
to data that is passed through the ref (as in Google Instant queries). Also
add a test for the url feature extractor to verify that the ref is not used to
form any token features.
BUG=none
TEST=PhishingClassifierDelegateTest,PhishingUrlFeatureExtractorTest
Review URL: http://codereview.chromium.org/6812009
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@80869 0039d316-1c4b-4281-b951-d872f2087c98
4 files changed, 28 insertions, 13 deletions
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc index 39fa50c..d9c3174 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc @@ -144,7 +144,7 @@ void PhishingClassifierDelegate::PageCaptured(const string16& page_text, return; } last_finished_load_id_ = render_view()->page_id(); - last_finished_load_url_ = StripToplevelUrl(); + last_finished_load_url_ = GetToplevelUrl(); classifier_page_text_ = page_text; MaybeStartClassification(); } @@ -182,8 +182,8 @@ void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, phishy_score)); } -GURL PhishingClassifierDelegate::StripToplevelUrl() { - return StripRef(render_view()->webview()->mainFrame()->url()); +GURL PhishingClassifierDelegate::GetToplevelUrl() { + return render_view()->webview()->mainFrame()->url(); } void PhishingClassifierDelegate::MaybeStartClassification() { @@ -220,9 +220,10 @@ void PhishingClassifierDelegate::MaybeStartClassification() { return; } // If the page id is unchanged, the toplevel URL should also be unchanged. - DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_); + GURL stripped_last_load_url(StripRef(last_finished_load_url_)); + DCHECK_EQ(StripRef(GetToplevelUrl()), stripped_last_load_url); - if (last_finished_load_url_ == last_url_sent_to_classifier_) { + if (stripped_last_load_url == StripRef(last_url_sent_to_classifier_)) { // We've already classified this toplevel URL, so this was likely an // in-page navigation or a subframe navigation. The browser should not // send a StartPhishingDetection IPC in this case. @@ -231,9 +232,11 @@ void PhishingClassifierDelegate::MaybeStartClassification() { return; } - if (last_url_received_from_browser_ != last_finished_load_url_) { + if (last_url_received_from_browser_ != stripped_last_load_url) { // The browser has not yet confirmed that this URL should be classified, - // so defer classification for now. + // so defer classification for now. Note: the ref does not affect + // any of the browser's preclassification checks, so we don't require it + // to match. VLOG(2) << "Not starting classification, last url from browser is " << last_url_received_from_browser_ << ", last finished load is " << last_finished_load_url_; diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h index 8907bee..04309ba 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h @@ -70,8 +70,8 @@ class PhishingClassifierDelegate : public RenderViewObserver { // Called when classification for the current page finishes. void ClassificationDone(bool is_phishy, double phishy_score); - // Returns the RenderView's toplevel URL, with the ref stripped. - GURL StripToplevelUrl(); + // Returns the RenderView's toplevel URL. + GURL GetToplevelUrl(); // Shared code to begin classification if all conditions are met. void MaybeStartClassification(); @@ -80,7 +80,8 @@ class PhishingClassifierDelegate : public RenderViewObserver { // a scorer is made available via SetPhishingScorer(). scoped_ptr<PhishingClassifier> classifier_; - // The last URL that the browser instructed us to classify. + // The last URL that the browser instructed us to classify, + // with the ref stripped. GURL last_url_received_from_browser_; // The last URL and page id that have finished loading in the RenderView. diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc index 5af67bd..305e907 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc @@ -321,19 +321,19 @@ TEST_F(PhishingClassifierDelegateTest, DetectedPhishingSite) { // Start by loading a page to populate the delegate's state. responses_["http://host.com/"] = "<html><body>phish</body></html>"; EXPECT_CALL(*classifier, CancelPendingClassification()); - LoadURL("http://host.com/"); + LoadURL("http://host.com/#a"); Mock::VerifyAndClearExpectations(classifier); string16 page_text = ASCIIToUTF16("phish"); EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)). WillOnce(DeleteArg<1>()); - OnStartPhishingDetection(delegate, GURL("http://host.com/")); + OnStartPhishingDetection(delegate, GURL("http://host.com/#a")); delegate->PageCaptured(page_text, false); Mock::VerifyAndClearExpectations(classifier); // Now run the callback to simulate the classifier finishing. RunClassificationDone(delegate, true, 0.8); EXPECT_TRUE(detected_phishing_site_); - EXPECT_EQ(GURL("http://host.com/"), detected_url_); + EXPECT_EQ(GURL("http://host.com/#a"), detected_url_); EXPECT_EQ(0.8, detected_score_); // The delegate will cancel pending classification on destruction. diff --git a/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc b/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc index 48e324f..a07f3ca 100644 --- a/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc +++ b/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc @@ -73,6 +73,17 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) { ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); + url = "http://witharef.com/#abc"; + expected_features.Clear(); + expected_features.AddBooleanFeature(features::kUrlTldToken + + std::string("com")); + expected_features.AddBooleanFeature(features::kUrlDomainToken + + std::string("witharef")); + + features.Clear(); + ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); + EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); + url = "http://...www..lotsodots....com./"; expected_features.Clear(); expected_features.AddBooleanFeature(features::kUrlTldToken + |