diff options
author | bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-10-01 18:35:57 +0000 |
---|---|---|
committer | bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-10-01 18:35:57 +0000 |
commit | 451e9810b5a7086d37454cfd4214d0413ef2af9e (patch) | |
tree | e35926a6596876d8c17d51924f672c533381b07b /chrome/renderer/safe_browsing | |
parent | b9d3751ccc5db1a4560fb2bdcadcc8bccbaa2f2e (diff) | |
download | chromium_src-451e9810b5a7086d37454cfd4214d0413ef2af9e.zip chromium_src-451e9810b5a7086d37454cfd4214d0413ef2af9e.tar.gz chromium_src-451e9810b5a7086d37454cfd4214d0413ef2af9e.tar.bz2 |
Only run the phishing classifier for http: urls.
Also, dial back the error logging a bit for cases that could come up in
practice (for example, unqualified hostnames on a local network).
BUG=none
TEST=PhishingClassifierTest
Review URL: http://codereview.chromium.org/3461031
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@61205 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer/safe_browsing')
4 files changed, 22 insertions, 7 deletions
diff --git a/chrome/renderer/safe_browsing/phishing_classifier.cc b/chrome/renderer/safe_browsing/phishing_classifier.cc index f685612..3be76ed 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier.cc @@ -10,6 +10,7 @@ #include "base/compiler_specific.h" #include "base/logging.h" #include "base/sha2.h" +#include "chrome/common/url_constants.h" #include "chrome/renderer/render_view.h" #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/features.h" @@ -87,8 +88,16 @@ void PhishingClassifier::BeginFeatureExtraction() { return; } + // Check whether the URL is one that we should classify. + // Currently, we only classify http: URLs. + GURL url(frame->url()); + if (!url.SchemeIs(chrome::kHttpScheme)) { + RunFailureCallback(); + return; + } + features_.reset(new FeatureMap); - if (!url_extractor_->ExtractFeatures(GURL(frame->url()), features_.get())) { + if (!url_extractor_->ExtractFeatures(url, features_.get())) { RunFailureCallback(); return; } diff --git a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc index e201dae..0ef78c9 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc @@ -15,7 +15,6 @@ #include "base/sha2.h" #include "base/string16.h" #include "base/utf_string_conversions.h" -#include "chrome/common/url_constants.h" #include "chrome/renderer/safe_browsing/client_model.pb.h" #include "chrome/renderer/safe_browsing/features.h" #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" @@ -125,8 +124,15 @@ TEST_F(PhishingClassifierTest, TestClassification) { EXPECT_GE(phishy_score, 0.0); EXPECT_LT(phishy_score, 0.5); - // Extraction should fail for this case, since there is no host. - LoadURL(chrome::kAboutBlankURL); + // Extraction should fail for this case, since there is no TLD. + responses_["http://localhost/"] = "<html><body>content</body></html>"; + LoadURL("http://localhost/"); + EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score)); + EXPECT_EQ(phishy_score, PhishingClassifier::kInvalidScore); + + // Extraction should also fail for this case, because the URL is not http. + responses_["https://host.net/"] = "<html><body>secure</body></html>"; + LoadURL("https://host.net/"); EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score)); EXPECT_EQ(phishy_score, PhishingClassifier::kInvalidScore); } diff --git a/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc index a709f7c..e1db151 100644 --- a/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc +++ b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc @@ -174,7 +174,7 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() { // and advance to the first element. if (!ResetFrameData()) { // Nothing in this frame, move on to the next one. - LOG(WARNING) << "No content in frame, skipping"; + DLOG(WARNING) << "No content in frame, skipping"; continue; } cur_node = cur_frame_data_->elements.firstItem(); diff --git a/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc b/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc index 4686a41..54f924a 100644 --- a/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc +++ b/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc @@ -42,7 +42,7 @@ bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url, true /* allow_unknown_registries */); if (registry_length == 0 || registry_length == std::string::npos) { - LOG(ERROR) << "Could not find TLD for host: " << host; + DLOG(INFO) << "Could not find TLD for host: " << host; return false; } DCHECK_LT(registry_length, host.size()) @@ -62,7 +62,7 @@ bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url, std::remove(host_tokens.begin(), host_tokens.end(), ""); host_tokens.erase(new_end, host_tokens.end()); if (host_tokens.empty()) { - LOG(ERROR) << "Could not find domain for host: " << host; + DLOG(INFO) << "Could not find domain for host: " << host; return false; } if (!features->AddBooleanFeature(features::kUrlDomainToken + |