summaryrefslogtreecommitdiffstats
path: root/chrome/renderer/safe_browsing
diff options
context:
space:
mode:
authorbryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-10-01 18:35:57 +0000
committerbryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-10-01 18:35:57 +0000
commit451e9810b5a7086d37454cfd4214d0413ef2af9e (patch)
treee35926a6596876d8c17d51924f672c533381b07b /chrome/renderer/safe_browsing
parentb9d3751ccc5db1a4560fb2bdcadcc8bccbaa2f2e (diff)
downloadchromium_src-451e9810b5a7086d37454cfd4214d0413ef2af9e.zip
chromium_src-451e9810b5a7086d37454cfd4214d0413ef2af9e.tar.gz
chromium_src-451e9810b5a7086d37454cfd4214d0413ef2af9e.tar.bz2
Only run the phishing classifier for http: urls.
Also, dial back the error logging a bit for cases that could come up in practice (for example, unqualified hostnames on a local network). BUG=none TEST=PhishingClassifierTest Review URL: http://codereview.chromium.org/3461031 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@61205 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer/safe_browsing')
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier.cc11
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc12
-rw-r--r--chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc2
-rw-r--r--chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc4
4 files changed, 22 insertions, 7 deletions
diff --git a/chrome/renderer/safe_browsing/phishing_classifier.cc b/chrome/renderer/safe_browsing/phishing_classifier.cc
index f685612..3be76ed 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier.cc
@@ -10,6 +10,7 @@
#include "base/compiler_specific.h"
#include "base/logging.h"
#include "base/sha2.h"
+#include "chrome/common/url_constants.h"
#include "chrome/renderer/render_view.h"
#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/features.h"
@@ -87,8 +88,16 @@ void PhishingClassifier::BeginFeatureExtraction() {
return;
}
+ // Check whether the URL is one that we should classify.
+ // Currently, we only classify http: URLs.
+ GURL url(frame->url());
+ if (!url.SchemeIs(chrome::kHttpScheme)) {
+ RunFailureCallback();
+ return;
+ }
+
features_.reset(new FeatureMap);
- if (!url_extractor_->ExtractFeatures(GURL(frame->url()), features_.get())) {
+ if (!url_extractor_->ExtractFeatures(url, features_.get())) {
RunFailureCallback();
return;
}
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
index e201dae..0ef78c9 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
@@ -15,7 +15,6 @@
#include "base/sha2.h"
#include "base/string16.h"
#include "base/utf_string_conversions.h"
-#include "chrome/common/url_constants.h"
#include "chrome/renderer/safe_browsing/client_model.pb.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
@@ -125,8 +124,15 @@ TEST_F(PhishingClassifierTest, TestClassification) {
EXPECT_GE(phishy_score, 0.0);
EXPECT_LT(phishy_score, 0.5);
- // Extraction should fail for this case, since there is no host.
- LoadURL(chrome::kAboutBlankURL);
+ // Extraction should fail for this case, since there is no TLD.
+ responses_["http://localhost/"] = "<html><body>content</body></html>";
+ LoadURL("http://localhost/");
+ EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score));
+ EXPECT_EQ(phishy_score, PhishingClassifier::kInvalidScore);
+
+ // Extraction should also fail for this case, because the URL is not http.
+ responses_["https://host.net/"] = "<html><body>secure</body></html>";
+ LoadURL("https://host.net/");
EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score));
EXPECT_EQ(phishy_score, PhishingClassifier::kInvalidScore);
}
diff --git a/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc
index a709f7c..e1db151 100644
--- a/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc
+++ b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc
@@ -174,7 +174,7 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() {
// and advance to the first element.
if (!ResetFrameData()) {
// Nothing in this frame, move on to the next one.
- LOG(WARNING) << "No content in frame, skipping";
+ DLOG(WARNING) << "No content in frame, skipping";
continue;
}
cur_node = cur_frame_data_->elements.firstItem();
diff --git a/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc b/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc
index 4686a41..54f924a 100644
--- a/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc
+++ b/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc
@@ -42,7 +42,7 @@ bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url,
true /* allow_unknown_registries */);
if (registry_length == 0 || registry_length == std::string::npos) {
- LOG(ERROR) << "Could not find TLD for host: " << host;
+ DLOG(INFO) << "Could not find TLD for host: " << host;
return false;
}
DCHECK_LT(registry_length, host.size())
@@ -62,7 +62,7 @@ bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url,
std::remove(host_tokens.begin(), host_tokens.end(), "");
host_tokens.erase(new_end, host_tokens.end());
if (host_tokens.empty()) {
- LOG(ERROR) << "Could not find domain for host: " << host;
+ DLOG(INFO) << "Could not find domain for host: " << host;
return false;
}
if (!features->AddBooleanFeature(features::kUrlDomainToken +