Have the RenderThread initialize a PhishingClassifier when it receives a

ViewMsg_SetPhishingModel message from the browser, and run it after pages load in the renderer. The phishing classifier will only run on new navigations where the toplevel URL has changed. Since we need to keep the page text around in memory in the case where the model is not yet set when a page loads, this is also conditional on a command-line flag. The next steps will be to send the SetPhishingModel IPC from the browser, finish implementing the DetectedPhishingSite IPC, and ping to confirm the phishy verdict. BUG=none TEST=none Review URL: http://codereview.chromium.org/3615003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@66798 0039d316-1c4b-4281-b951-d872f2087c98
author: bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-11-19 20:01:00 +0000
committer: bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-11-19 20:01:00 +0000
commit: 3ead1329dfad20de066dc5882df63b4ccc851608 (patch)
tree: b6d961150ff52d1b2afe888ad42e524209693cb0
parent: d843cd31b00d652198d49b099074db6db940833d (diff)
download: chromium_src-3ead1329dfad20de066dc5882df63b4ccc851608.zip
chromium_src-3ead1329dfad20de066dc5882df63b4ccc851608.tar.gz
chromium_src-3ead1329dfad20de066dc5882df63b4ccc851608.tar.bz2
16 files changed, 559 insertions, 25 deletions
diff --git a/chrome/browser/renderer_host/browser_render_process_host.cc b/chrome/browser/renderer_host/browser_render_process_host.cc
index 2900957..961b3ad 100644
--- a/chrome/browser/renderer_host/browser_render_process_host.cc
+++ b/chrome/browser/renderer_host/browser_render_process_host.cc
@@ -628,6 +628,7 @@ void BrowserRenderProcessHost::PropagateBrowserCommandLineToRenderer(
     switches::kDisableFileSystem,
     switches::kPpapiOutOfProcess,
     switches::kEnablePrintPreview,
+    switches::kEnableClientSidePhishingDetection
   };
   renderer_cmd->CopySwitchesFrom(browser_cmd, kSwitchNames,
                                  arraysize(kSwitchNames));
diff --git a/chrome/chrome_renderer.gypi b/chrome/chrome_renderer.gypi
index 8645f5d..8a6f73c 100644
--- a/chrome/chrome_renderer.gypi
+++ b/chrome/chrome_renderer.gypi
@@ -204,6 +204,8 @@
         'renderer/safe_browsing/features.h',
         'renderer/safe_browsing/phishing_classifier.cc',
         'renderer/safe_browsing/phishing_classifier.h',
+        'renderer/safe_browsing/phishing_classifier_delegate.cc',
+        'renderer/safe_browsing/phishing_classifier_delegate.h',
         'renderer/safe_browsing/phishing_dom_feature_extractor.cc',
         'renderer/safe_browsing/phishing_dom_feature_extractor.h',
         'renderer/safe_browsing/phishing_term_feature_extractor.cc',
diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi
index 9249b66..9a1dd4c 100644
--- a/chrome/chrome_tests.gypi
+++ b/chrome/chrome_tests.gypi
@@ -2072,6 +2072,7 @@
         'renderer/render_widget_browsertest.h',
         'renderer/safe_browsing/mock_feature_extractor_clock.h',
         'renderer/safe_browsing/phishing_classifier_browsertest.cc',
+        'renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc',
         'renderer/safe_browsing/phishing_dom_feature_extractor_browsertest.cc',
         'renderer/safe_browsing/phishing_thumbnailer_browsertest.cc',
         'renderer/safe_browsing/render_view_fake_resources_test.cc',
diff --git a/chrome/common/chrome_switches.cc b/chrome/common/chrome_switches.cc
index 6ba6de8..34dbc28 100644
--- a/chrome/common/chrome_switches.cc
+++ b/chrome/common/chrome_switches.cc
@@ -396,6 +396,10 @@ const char kEnableBackgroundMode[] = "enable-background-mode";
 // Enables the benchmarking extensions.
 const char kEnableBenchmarking[]            = "enable-benchmarking";
 
+// Enable experimental client-side detection of phishing pages.
+const char kEnableClientSidePhishingDetection[] =
+    "enable-client-side-phishing-detection";
+
 // This flag enables UI for clearing server data.  Temporarily in place
 // until there's a server endpoint deployed.
 const char kEnableClearServerData[]         = "enable-clear-server-data";
diff --git a/chrome/common/chrome_switches.h b/chrome/common/chrome_switches.h
index 2f30352..422cb40 100644
--- a/chrome/common/chrome_switches.h
+++ b/chrome/common/chrome_switches.h
@@ -122,6 +122,7 @@ extern const char kEnableAppLauncher[];
 extern const char kEnableAuthNegotiatePort[];
 extern const char kEnableBackgroundMode[];
 extern const char kEnableBenchmarking[];
+extern const char kEnableClientSidePhishingDetection[];
 extern const char kEnableClearServerData[];
 extern const char kEnableClickToPlay[];
 extern const char kEnableCloudPrintProxy[];
diff --git a/chrome/renderer/render_thread.cc b/chrome/renderer/render_thread.cc
index e387ee1..ccd82df 100644
--- a/chrome/renderer/render_thread.cc
+++ b/chrome/renderer/render_thread.cc
@@ -17,6 +17,7 @@
 #include "base/metrics/stats_table.h"
 #include "base/nullable_string16.h"
 #include "base/process_util.h"
+#include "base/scoped_callback_factory.h"
 #include "base/shared_memory.h"
 #include "base/string_util.h"
 #include "base/task.h"
@@ -65,6 +66,8 @@
 #include "chrome/renderer/renderer_histogram_snapshots.h"
 #include "chrome/renderer/renderer_webidbfactory_impl.h"
 #include "chrome/renderer/renderer_webkitclient_impl.h"
+#include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
+#include "chrome/renderer/safe_browsing/scorer.h"
 #include "chrome/renderer/search_extension.h"
 #include "chrome/renderer/searchbox_extension.h"
 #include "chrome/renderer/spellchecker/spellcheck.h"
@@ -215,6 +218,26 @@ class RenderViewZoomer : public RenderViewVisitor {
   DISALLOW_COPY_AND_ASSIGN(RenderViewZoomer);
 };
 
+class RenderViewPhishingScorerSetter : public RenderViewVisitor {
+ public:
+  explicit RenderViewPhishingScorerSetter(const safe_browsing::Scorer* scorer)
+      : scorer_(scorer) {
+  }
+
+  virtual bool Visit(RenderView* render_view) {
+    safe_browsing::PhishingClassifierDelegate* delegate =
+        render_view->phishing_classifier_delegate();
+    if (delegate)
+      delegate->SetPhishingScorer(scorer_);
+    return true;
+  }
+
+ private:
+  const safe_browsing::Scorer* scorer_;
+
+  DISALLOW_COPY_AND_ASSIGN(RenderViewPhishingScorerSetter);
+};
+
 }  // namespace
 
 // When we run plugins in process, we actually run them on the render thread,
@@ -255,6 +278,7 @@ void RenderThread::Init() {
   idle_notification_delay_in_s_ = is_extension_process_ ?
       kInitialExtensionIdleHandlerDelayS : kInitialIdleHandlerDelayS;
   task_factory_.reset(new ScopedRunnableMethodFactory<RenderThread>(this));
+  callback_factory_.reset(new base::ScopedCallbackFactory<RenderThread>(this));
 
   visited_link_slave_.reset(new VisitedLinkSlave());
   user_script_slave_.reset(new UserScriptSlave());
@@ -1072,8 +1096,20 @@ void RenderThread::OnGpuChannelEstablished(
 }
 
 void RenderThread::OnSetPhishingModel(IPC::PlatformFileForTransit model_file) {
-  // TODO(bryner): create a Scorer from the model file, and propagate it to the
-  // RenderViews so that they can create PhishingClassifiers.
+  safe_browsing::Scorer::CreateFromFile(
+      IPC::PlatformFileForTransitToPlatformFile(model_file),
+      GetFileThreadMessageLoopProxy(),
+      callback_factory_->NewCallback(&RenderThread::PhishingScorerCreated));
+}
+
+void RenderThread::PhishingScorerCreated(safe_browsing::Scorer* scorer) {
+  if (!scorer) {
+    DLOG(ERROR) << "Unable to create a PhishingScorer - corrupt model?";
+    return;
+  }
+  phishing_scorer_.reset(scorer);
+  RenderViewPhishingScorerSetter setter(phishing_scorer_.get());
+  RenderView::ForEach(&setter);
 }
 
 scoped_refptr<base::MessageLoopProxy>
diff --git a/chrome/renderer/render_thread.h b/chrome/renderer/render_thread.h
index 86fab60..bdba97e 100644
--- a/chrome/renderer/render_thread.h
+++ b/chrome/renderer/render_thread.h
@@ -50,6 +50,7 @@ struct WebPreferences;
 
 namespace base {
 class MessageLoopProxy;
+template<class T> class ScopedCallbackFactory;
 class Thread;
 }
 
@@ -57,6 +58,10 @@ namespace IPC {
 struct ChannelHandle;
 }
 
+namespace safe_browsing {
+class Scorer;
+}
+
 namespace WebKit {
 class WebStorageEventDispatcher;
 }
@@ -189,6 +194,12 @@ class RenderThread : public RenderThreadBase,
     return spellchecker_.get();
   }
 
+  // Returns the phishing Scorer object, or NULL if a model has not been passed
+  // in from the browser yet.
+  const safe_browsing::Scorer* phishing_scorer() const {
+    return phishing_scorer_.get();
+  }
+
   bool plugin_refresh_allowed() const { return plugin_refresh_allowed_; }
 
   // Do DNS prefetch resolution of a hostname.
@@ -326,8 +337,12 @@ class RenderThread : public RenderThreadBase,
   // it is allowed to run on.
   void RegisterExtension(v8::Extension* extension, bool restrict_to_extensions);
 
+  // Callback to be run once the phishing Scorer has been created.
+  void PhishingScorerCreated(safe_browsing::Scorer* scorer);
+
   // These objects live solely on the render thread.
   scoped_ptr<ScopedRunnableMethodFactory<RenderThread> > task_factory_;
+  scoped_ptr<base::ScopedCallbackFactory<RenderThread> > callback_factory_;
   scoped_ptr<VisitedLinkSlave> visited_link_slave_;
   scoped_ptr<UserScriptSlave> user_script_slave_;
   scoped_ptr<RendererNetPredictor> renderer_net_predictor_;
@@ -339,6 +354,7 @@ class RenderThread : public RenderThreadBase,
   scoped_ptr<WebKit::WebStorageEventDispatcher> dom_storage_event_dispatcher_;
   scoped_ptr<WebDatabaseObserverImpl> web_database_observer_impl_;
   scoped_ptr<SpellCheck> spellchecker_;
+  scoped_ptr<const safe_browsing::Scorer> phishing_scorer_;
 
   // Used on the renderer and IPC threads.
   scoped_refptr<DBMessageFilter> db_message_filter_;
diff --git a/chrome/renderer/render_view.cc b/chrome/renderer/render_view.cc
index e3e39dc..14e74ef 100644
--- a/chrome/renderer/render_view.cc
+++ b/chrome/renderer/render_view.cc
@@ -80,6 +80,7 @@
 #include "chrome/renderer/render_widget_fullscreen_pepper.h"
 #include "chrome/renderer/renderer_webapplicationcachehost_impl.h"
 #include "chrome/renderer/renderer_webstoragenamespace_impl.h"
+#include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
 #include "chrome/renderer/searchbox_extension.h"
 #include "chrome/renderer/speech_input_dispatcher.h"
 #include "chrome/renderer/spellchecker/spellcheck.h"
@@ -528,6 +529,15 @@ RenderView::RenderView(RenderThreadBase* render_thread,
   page_click_tracker_->AddListener(password_autocomplete_manager_.get());
   page_click_tracker_->AddListener(autofill_helper_.get());
   ClearBlockedContentSettings();
+  if (CommandLine::ForCurrentProcess()->HasSwitch(
+          switches::kEnableClientSidePhishingDetection)) {
+    phishing_delegate_.reset(
+        new safe_browsing::PhishingClassifierDelegate(this, NULL));
+    RenderThread* thread = RenderThread::current();
+    if (thread && thread->phishing_scorer()) {
+      phishing_delegate_->SetPhishingScorer(thread->phishing_scorer());
+    }
+  }
 }
 
 RenderView::~RenderView() {
@@ -565,6 +575,10 @@ RenderView::~RenderView() {
 
   render_thread_->RemoveFilter(audio_message_filter_);
 
+  // Tell the PhishingClassifierDelegate that the view is going away.
+  if (phishing_delegate_.get())
+    phishing_delegate_->CancelPendingClassification();
+
 #ifndef NDEBUG
   // Make sure we are no longer referenced by the ViewMap.
   ViewMap* views = Singleton<ViewMap>::get();
@@ -1027,6 +1041,9 @@ void RenderView::CapturePageInfo(int load_id, bool preliminary_capture) {
   }
 
   OnCaptureThumbnail();
+
+  if (phishing_delegate_.get())
+    phishing_delegate_->FinishedLoad(&contents);
 }
 
 void RenderView::CaptureText(WebFrame* frame, string16* contents) {
@@ -2494,6 +2511,10 @@ void RenderView::closeWidgetSoon() {
   // to access the WebView.
   translate_helper_.CancelPendingTranslation();
 
+  // Same for the phishing classifier.
+  if (phishing_delegate_.get())
+    phishing_delegate_->CancelPendingClassification();
+
   if (script_can_close_)
     RenderWidget::closeWidgetSoon();
 }
@@ -3197,6 +3218,10 @@ void RenderView::didCommitProvisionalLoad(WebFrame* frame,
     // Any pending translation is now obsolete.
     translate_helper_.CancelPendingTranslation();
 
+    // Let the phishing classifier decide whether to cancel classification.
+    if (phishing_delegate_.get())
+      phishing_delegate_->CommittedLoadInFrame(frame);
+
     // Advance our offset in session history, applying the length limit.  There
     // is now no forward history.
     history_list_offset_++;
diff --git a/chrome/renderer/render_view.h b/chrome/renderer/render_view.h
index c51e948..8b4d143 100644
--- a/chrome/renderer/render_view.h
+++ b/chrome/renderer/render_view.h
@@ -98,6 +98,10 @@ class PluginInstance;
 class FullscreenContainer;
 }
 
+namespace safe_browsing {
+class PhishingClassifierDelegate;
+}
+
 namespace webkit_glue {
 class ImageResourceFetcher;
 struct FileUploadData;
@@ -223,6 +227,12 @@ class RenderView : public RenderWidget,
     return page_click_tracker_.get();
   }
 
+  // May be NULL if client-side phishing detection is disabled.
+  safe_browsing::PhishingClassifierDelegate*
+      phishing_classifier_delegate() const {
+    return phishing_delegate_.get();
+  }
+
   // Returns true if we should display scrollbars for the given view size and
   // false if the scrollbars should be hidden.
   bool should_display_scrollbars(int width, int height) const {
@@ -1350,6 +1360,10 @@ class RenderView : public RenderWidget,
   // Responsible for sending page load related histograms.
   PageLoadHistograms page_load_histograms_;
 
+  // Handles the interaction between the RenderView and the phishing
+  // classifier.
+  scoped_ptr<safe_browsing::PhishingClassifierDelegate> phishing_delegate_;
+
   // Misc ----------------------------------------------------------------------
 
   // The current and pending file chooser completion objects. If the queue is
diff --git a/chrome/renderer/safe_browsing/phishing_classifier.cc b/chrome/renderer/safe_browsing/phishing_classifier.cc
index d9a304d..33c995d 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier.cc
@@ -31,12 +31,23 @@ const double PhishingClassifier::kInvalidScore = -1.0;
 const double PhishingClassifier::kPhishyThreshold = 0.5;
 
 PhishingClassifier::PhishingClassifier(RenderView* render_view,
-                                       const Scorer* scorer,
                                        FeatureExtractorClock* clock)
     : render_view_(render_view),
-      scorer_(scorer),
+      scorer_(NULL),
       clock_(clock),
       ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
+  Clear();
+}
+
+PhishingClassifier::~PhishingClassifier() {
+  // The RenderView should have called CancelPendingClassification() before
+  // we are destroyed.
+  CheckNoPendingClassification();
+}
+
+void PhishingClassifier::set_phishing_scorer(const Scorer* scorer) {
+  DCHECK(!scorer_);
+  scorer_ = scorer;
   url_extractor_.reset(new PhishingUrlFeatureExtractor);
   dom_extractor_.reset(
       new PhishingDOMFeatureExtractor(render_view_, clock_.get()));
@@ -45,18 +56,16 @@ PhishingClassifier::PhishingClassifier(RenderView* render_view,
       &scorer_->page_words(),
       scorer_->max_words_per_term(),
       clock_.get()));
-
-  Clear();
 }
 
-PhishingClassifier::~PhishingClassifier() {
-  // The RenderView should have called CancelPendingClassification() before
-  // we are destroyed.
-  CheckNoPendingClassification();
+bool PhishingClassifier::is_ready() const {
+  return scorer_ != NULL;
 }
 
 void PhishingClassifier::BeginClassification(const string16* page_text,
                                              DoneCallback* done_callback) {
+  DCHECK(is_ready());
+
   // The RenderView should have called CancelPendingClassification() before
   // starting a new classification, so DCHECK this.
   CheckNoPendingClassification();
@@ -120,6 +129,7 @@ void PhishingClassifier::BeginFeatureExtraction() {
 void PhishingClassifier::CancelPendingClassification() {
   // Note that cancelling the feature extractors is simply a no-op if they
   // were not running.
+  DCHECK(is_ready());
   dom_extractor_->CancelPendingExtraction();
   term_extractor_->CancelPendingExtraction();
   method_factory_.RevokeAll();
diff --git a/chrome/renderer/safe_browsing/phishing_classifier.h b/chrome/renderer/safe_browsing/phishing_classifier.h
index b460c9a..9c19816 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier.h
+++ b/chrome/renderer/safe_browsing/phishing_classifier.h
@@ -38,14 +38,19 @@ class PhishingClassifier {
   static const double kInvalidScore;
 
   // Creates a new PhishingClassifier object that will operate on
-  // |render_view|.  |scorer| will be used for computing the final score, and
-  // must live at least as long as the PhishingClassifier.  |clock| is used to
-  // time feature extractor operations, and the PhishingClassifier takes
-  // ownership of this object.
-  PhishingClassifier(RenderView* render_view,
-                     const Scorer* scorer,
-                     FeatureExtractorClock* clock);
-  ~PhishingClassifier();
+  // |render_view|.  |clock| is used to time feature extractor operations, and
+  // the PhishingClassifier takes ownership of this object.  Note that the
+  // classifier will not be 'ready' until set_phishing_scorer() is called.
+  PhishingClassifier(RenderView* render_view, FeatureExtractorClock* clock);
+  virtual ~PhishingClassifier();
+
+  // Sets a scorer for the classifier to use in computing the phishiness score.
+  // This must live at least as long as the PhishingClassifier.
+  void set_phishing_scorer(const Scorer* scorer);
+
+  // Returns true if the classifier is ready to classify pages, i.e. it
+  // has had a scorer set via set_phishing_scorer().
+  bool is_ready() const;
 
   // Called by the RenderView when a page has finished loading.  This begins
   // the feature extraction and scoring process. |page_text| should contain
@@ -59,12 +64,17 @@ class PhishingClassifier {
   // MessageLoop to continue processing.  Once the scoring process is complete,
   // |done_callback| is run on the current thread.  PhishingClassifier takes
   // ownership of the callback.
-  void BeginClassification(const string16* page_text, DoneCallback* callback);
+  //
+  // It is an error to call BeginClassification if the classifier is not yet
+  // ready.
+  virtual void BeginClassification(const string16* page_text,
+                                   DoneCallback* callback);
 
   // Called by the RenderView (on the render thread) when a page is unloading
   // or the RenderView is being destroyed.  This cancels any extraction that
-  // is in progress.
-  void CancelPendingClassification();
+  // is in progress.  It is an error to call CancelPendingClassification if
+  // the classifier is not yet ready.
+  virtual void CancelPendingClassification();
 
  private:
   // Any score equal to or above this value is considered phishy.
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
index 324dd73..6dc9192 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
@@ -61,7 +61,7 @@ class PhishingClassifierTest : public RenderViewFakeResourcesTest {
     clock_ = new MockFeatureExtractorClock;
     scorer_.reset(Scorer::Create(model.SerializeAsString()));
     ASSERT_TRUE(scorer_.get());
-    classifier_.reset(new PhishingClassifier(view_, scorer_.get(), clock_));
+    classifier_.reset(new PhishingClassifier(view_, clock_));
   }
 
   virtual void TearDown() {
@@ -102,6 +102,13 @@ class PhishingClassifierTest : public RenderViewFakeResourcesTest {
 };
 
 TEST_F(PhishingClassifierTest, TestClassification) {
+  // No scorer yet, so the classifier is not ready.
+  EXPECT_FALSE(classifier_->is_ready());
+
+  // Now set the scorer.
+  classifier_->set_phishing_scorer(scorer_.get());
+  EXPECT_TRUE(classifier_->is_ready());
+
   // This test doesn't exercise the extraction timing.
   EXPECT_CALL(*clock_, Now())
       .WillRepeatedly(::testing::Return(base::TimeTicks::Now()));
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
new file mode 100644
index 0000000..612eb4d
--- /dev/null
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
@@ -0,0 +1,124 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
+
+#include "base/callback.h"
+#include "base/logging.h"
+#include "chrome/renderer/navigation_state.h"
+#include "chrome/renderer/render_view.h"
+#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
+#include "chrome/renderer/safe_browsing/phishing_classifier.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebFrame.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebURL.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebView.h"
+
+namespace safe_browsing {
+
+PhishingClassifierDelegate::PhishingClassifierDelegate(
+    RenderView* render_view,
+    PhishingClassifier* classifier)
+    : render_view_(render_view),
+      last_page_id_sent_to_classifier_(-1),
+      pending_classification_(false) {
+  if (!classifier) {
+    classifier = new PhishingClassifier(render_view_,
+                                        new FeatureExtractorClock());
+  }
+  classifier_.reset(classifier);
+}
+
+PhishingClassifierDelegate::~PhishingClassifierDelegate() {
+  CancelPendingClassification();
+}
+
+void PhishingClassifierDelegate::SetPhishingScorer(
+    const safe_browsing::Scorer* scorer) {
+  classifier_->set_phishing_scorer(scorer);
+
+  if (pending_classification_) {
+    pending_classification_ = false;
+    // If we have a pending classificaton, it should always be true that the
+    // main frame URL and page id have not changed since we queued the
+    // classification.  This is because we stop any pending classification on
+    // main frame loads in RenderView::didCommitProvisionalLoad().
+    DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);
+    DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_);
+    classifier_->BeginClassification(
+        &classifier_page_text_,
+        NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
+  }
+}
+
+void PhishingClassifierDelegate::CommittedLoadInFrame(
+    WebKit::WebFrame* frame) {
+  // A new page is starting to load.  Unless the load is a navigation within
+  // the same page, we need to cancel classification since the content will
+  // now be inconsistent with the phishing model.
+  NavigationState* state = NavigationState::FromDataSource(
+      frame->dataSource());
+  if (!state->was_within_same_page()) {
+    CancelPendingClassification();
+  }
+}
+
+void PhishingClassifierDelegate::FinishedLoad(string16* page_text) {
+  // We check that the page id has incremented so that we don't reclassify
+  // pages as the user moves back and forward in session history.  Note: we
+  // don't send every page id to the classifier, only those where the toplevel
+  // URL changed.
+  int load_id = render_view_->page_id();
+  if (load_id <= last_page_id_sent_to_classifier_) {
+    return;
+  }
+
+  GURL url_without_ref = StripToplevelUrl();
+  if (url_without_ref == last_url_sent_to_classifier_) {
+    // The toplevle URL is the same, except for the ref.
+    // Update the last page id we sent, but don't trigger a new classification.
+    last_page_id_sent_to_classifier_ = load_id;
+    return;
+  }
+
+  last_url_sent_to_classifier_ = url_without_ref;
+  last_page_id_sent_to_classifier_ = load_id;
+  classifier_page_text_.swap(*page_text);
+
+  if (classifier_->is_ready()) {
+    classifier_->BeginClassification(
+        &classifier_page_text_,
+        NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
+  } else {
+    // If there is no phishing classifier yet, we'll begin classification once
+    // SetPhishingScorer() is called by the RenderView.
+    pending_classification_ = true;
+  }
+}
+
+void PhishingClassifierDelegate::CancelPendingClassification() {
+  if (classifier_->is_ready()) {
+    classifier_->CancelPendingClassification();
+  }
+  classifier_page_text_.clear();
+  pending_classification_ = false;
+}
+
+void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,
+                                                    double phishy_score) {
+  // We no longer need the page text.
+  classifier_page_text_.clear();
+  VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;
+
+  // TODO(bryner): Grab a snapshot and send a DetectedPhishingSite message
+  // to the browser.
+}
+
+GURL PhishingClassifierDelegate::StripToplevelUrl() {
+  GURL toplevel_url = render_view_->webview()->mainFrame()->url();
+  GURL::Replacements replacements;
+  replacements.ClearRef();
+  return toplevel_url.ReplaceComponents(replacements);
+}
+
+}  // namespace safe_browsing
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
new file mode 100644
index 0000000..9d17127
--- /dev/null
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
@@ -0,0 +1,93 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// This class is used by the RenderView to interact with a PhishingClassifier.
+
+#ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
+#define CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
+#pragma once
+
+#include "base/scoped_ptr.h"
+#include "base/string16.h"
+#include "googleurl/src/gurl.h"
+
+class RenderView;
+
+namespace WebKit {
+class WebFrame;
+}
+
+namespace safe_browsing {
+class PhishingClassifier;
+class Scorer;
+
+class PhishingClassifierDelegate {
+ public:
+  // The RenderView owns us.  This object takes ownership of the classifier.
+  // Note that if classifier is null, a default instance of PhishingClassifier
+  // will be used.
+  PhishingClassifierDelegate(RenderView* render_view,
+                             PhishingClassifier* classifier);
+  ~PhishingClassifierDelegate();
+
+  // Called by the RenderView once there is a phishing scorer available.
+  // The scorer is passed on to the classifier.
+  void SetPhishingScorer(const safe_browsing::Scorer* scorer);
+
+  // Called by the RenderView when a page has started loading in the given
+  // WebFrame.  Typically, this will cause any pending classification to be
+  // cancelled.  However, if the load is for the main frame, and the toplevel
+  // URL has not changed, we continue running the current classification.
+  void CommittedLoadInFrame(WebKit::WebFrame* frame);
+
+  // Called by the RenderView once a page has finished loading.  Determines
+  // whether a new toplevel load has taken place, and if so, begins
+  // classification.  May modify page_text.  Note that it is an error to
+  // call OnNavigate if there is a pending classification.
+  void FinishedLoad(string16* page_text);
+
+  // Cancels any pending classification and frees the page text.  Called by
+  // the RenderView when the RenderView is going away.
+  void CancelPendingClassification();
+
+ private:
+  friend class PhishingClassifierDelegateTest;
+
+  // Called when classification for the current page finishes.
+  void ClassificationDone(bool is_phishy, double phishy_score);
+
+  // Returns the RenderView's toplevel URL, with the ref stripped.
+  GURL StripToplevelUrl();
+
+  // The RenderView that owns this object.
+  RenderView* render_view_;
+
+  // The PhishingClassifier to use for the RenderView.  This is created once
+  // a scorer is made available via SetPhishingScorer().
+  scoped_ptr<PhishingClassifier> classifier_;
+
+  // The last URL that was sent to the phishing classifier.
+  GURL last_url_sent_to_classifier_;
+
+  // The page id of the last load that was sent to the phishing classifier.
+  // This is used to suppress phishing classification on back and forward
+  // navigations in history.
+  int32 last_page_id_sent_to_classifier_;
+
+  // The page text that will be analyzed by the phishing classifier.  This is
+  // set by OnNavigate and cleared when the classifier finishes.  Note that if
+  // there is no classifier yet when OnNavigate is called, the page text will
+  // be cached until the scorer is set and a classifier can be created.
+  string16 classifier_page_text_;
+
+  // Set to true if we should run the phishing classifier on the current page
+  // as soon as SetPhishingScorer() is called.
+  bool pending_classification_;
+
+  DISALLOW_COPY_AND_ASSIGN(PhishingClassifierDelegate);
+};
+
+}  // namespace safe_browsing
+
+#endif  // CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
new file mode 100644
index 0000000..d281f25
--- /dev/null
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
@@ -0,0 +1,188 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// Note: this test uses RenderViewFakeResourcesTest in order to set up a
+// real RenderThread to hold the phishing Scorer object.
+
+#include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
+
+#include "base/scoped_ptr.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/renderer/render_view.h"
+#include "chrome/renderer/safe_browsing/features.h"
+#include "chrome/renderer/safe_browsing/phishing_classifier.h"
+#include "chrome/renderer/safe_browsing/render_view_fake_resources_test.h"
+#include "chrome/renderer/safe_browsing/scorer.h"
+#include "googleurl/src/gurl.h"
+#include "testing/gmock/include/gmock/gmock.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebFrame.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebURL.h"
+#include "third_party/WebKit/WebKit/chromium/public/WebURLRequest.h"
+
+using ::testing::_;
+using ::testing::DeleteArg;
+using ::testing::Mock;
+using ::testing::Pointee;
+using ::testing::StrictMock;
+
+namespace safe_browsing {
+
+namespace {
+class MockPhishingClassifier : public PhishingClassifier {
+ public:
+  explicit MockPhishingClassifier(RenderView* render_view)
+      : PhishingClassifier(render_view, NULL /* clock */) {}
+
+  virtual ~MockPhishingClassifier() {}
+
+  MOCK_METHOD2(BeginClassification, void(const string16*, DoneCallback*));
+  MOCK_METHOD0(CancelPendingClassification, void());
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MockPhishingClassifier);
+};
+
+class MockScorer : public Scorer {
+ public:
+  MockScorer() : Scorer() {}
+  virtual ~MockScorer() {}
+
+  MOCK_METHOD1(ComputeScore, double(const FeatureMap&));
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MockScorer);
+};
+}  // namespace
+
+class PhishingClassifierDelegateTest : public RenderViewFakeResourcesTest {
+};
+
+TEST_F(PhishingClassifierDelegateTest, Navigation) {
+  MockPhishingClassifier* classifier =
+      new StrictMock<MockPhishingClassifier>(view_);
+  PhishingClassifierDelegate delegate(view_, classifier);
+  MockScorer scorer;
+  delegate.SetPhishingScorer(&scorer);
+  ASSERT_TRUE(classifier->is_ready());
+
+  // Test an initial load.  We expect classification to happen normally.
+  responses_["http://host.com/"] =
+      "<html><body><iframe src=\"http://sub1.com/\"></iframe></body></html>";
+  LoadURL("http://host.com/");
+  WebKit::WebFrame* child_frame = GetMainFrame()->firstChild();
+  string16 page_text = ASCIIToUTF16("dummy");
+  EXPECT_CALL(*classifier, CancelPendingClassification()).Times(2);
+  delegate.CommittedLoadInFrame(GetMainFrame());
+  delegate.CommittedLoadInFrame(child_frame);
+  Mock::VerifyAndClearExpectations(classifier);
+  EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)).
+      WillOnce(DeleteArg<1>());
+  delegate.FinishedLoad(&page_text);
+  Mock::VerifyAndClearExpectations(classifier);
+
+  // Reloading the same page should not trigger a reclassification.
+  // However, it will cancel any pending classification since the
+  // content is being replaced.
+  EXPECT_CALL(*classifier, CancelPendingClassification());
+  delegate.CommittedLoadInFrame(GetMainFrame());
+  Mock::VerifyAndClearExpectations(classifier);
+  delegate.FinishedLoad(&page_text);
+
+  // Navigating in a subframe will increment the page id, but not change
+  // the toplevel URL.  This should cancel pending classification since the
+  // page content is changing, and not begin a new classification.
+  child_frame->loadRequest(WebKit::WebURLRequest(GURL("http://sub2.com/")));
+  message_loop_.Run();
+  EXPECT_CALL(*classifier, CancelPendingClassification());
+  delegate.CommittedLoadInFrame(child_frame);
+  Mock::VerifyAndClearExpectations(classifier);
+  delegate.FinishedLoad(&page_text);
+
+  // Scrolling to an anchor will increment the page id, but should not
+  // not trigger a reclassification.  A pending classification should not
+  // be cancelled, since the content is not changing.
+  LoadURL("http://host.com/#foo");
+  delegate.CommittedLoadInFrame(GetMainFrame());
+  delegate.FinishedLoad(&page_text);
+
+  // Now load a new toplevel page, which should trigger another classification.
+  LoadURL("http://host2.com/");
+  page_text = ASCIIToUTF16("dummy2");
+  EXPECT_CALL(*classifier, CancelPendingClassification());
+  delegate.CommittedLoadInFrame(GetMainFrame());
+  Mock::VerifyAndClearExpectations(classifier);
+  EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)).
+      WillOnce(DeleteArg<1>());
+  delegate.FinishedLoad(&page_text);
+  Mock::VerifyAndClearExpectations(classifier);
+
+  // The delegate will cancel pending classification on destruction.
+  EXPECT_CALL(*classifier, CancelPendingClassification());
+}
+
+TEST_F(PhishingClassifierDelegateTest, PendingClassification) {
+  // For this test, we'll create the delegate with no scorer available yet.
+  MockPhishingClassifier* classifier =
+      new StrictMock<MockPhishingClassifier>(view_);
+  PhishingClassifierDelegate delegate(view_, classifier);
+  ASSERT_FALSE(classifier->is_ready());
+
+  // Queue up a pending classification, cancel it, then queue up another one.
+  LoadURL("http://host.com/");
+  string16 page_text = ASCIIToUTF16("dummy");
+  delegate.CommittedLoadInFrame(GetMainFrame());
+  delegate.FinishedLoad(&page_text);
+
+  LoadURL("http://host2.com/");
+  delegate.CommittedLoadInFrame(GetMainFrame());
+  page_text = ASCIIToUTF16("dummy2");
+  delegate.FinishedLoad(&page_text);
+
+  // Now set a scorer, which should cause a classifier to be created and
+  // the classification to proceed.  Note that we need to reset |page_text|
+  // since it is modified by the call to FinishedLoad().
+  page_text = ASCIIToUTF16("dummy2");
+  EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)).
+      WillOnce(DeleteArg<1>());
+  MockScorer scorer;
+  delegate.SetPhishingScorer(&scorer);
+  Mock::VerifyAndClearExpectations(classifier);
+
+  // The delegate will cancel pending classification on destruction.
+  EXPECT_CALL(*classifier, CancelPendingClassification());
+}
+
+TEST_F(PhishingClassifierDelegateTest, PendingClassification_Ref) {
+  // Similar to the last test, but navigates within the page before
+  // setting the scorer.
+  MockPhishingClassifier* classifier =
+      new StrictMock<MockPhishingClassifier>(view_);
+  PhishingClassifierDelegate delegate(view_, classifier);
+  ASSERT_FALSE(classifier->is_ready());
+
+  // Queue up a pending classification, cancel it, then queue up another one.
+  LoadURL("http://host.com/");
+  delegate.CommittedLoadInFrame(GetMainFrame());
+  string16 orig_page_text = ASCIIToUTF16("dummy");
+  string16 page_text = orig_page_text;
+  delegate.FinishedLoad(&page_text);
+
+  LoadURL("http://host.com/#foo");
+  page_text = orig_page_text;
+  delegate.CommittedLoadInFrame(GetMainFrame());
+  delegate.FinishedLoad(&page_text);
+
+  // Now set a scorer, which should cause a classifier to be created and
+  // the classification to proceed.
+  EXPECT_CALL(*classifier, BeginClassification(Pointee(orig_page_text), _)).
+      WillOnce(DeleteArg<1>());
+  MockScorer scorer;
+  delegate.SetPhishingScorer(&scorer);
+  Mock::VerifyAndClearExpectations(classifier);
+
+  // The delegate will cancel pending classification on destruction.
+  EXPECT_CALL(*classifier, CancelPendingClassification());
+}
+
+}  // namespace safe_browsing
diff --git a/chrome/renderer/safe_browsing/scorer.h b/chrome/renderer/safe_browsing/scorer.h
index 5cd8cac..9e55afb 100644
--- a/chrome/renderer/safe_browsing/scorer.h
+++ b/chrome/renderer/safe_browsing/scorer.h
@@ -73,12 +73,14 @@ class Scorer {
   // expect to load.
   static const int kMaxPhishingModelSizeBytes;
 
+ protected:
+  // Most clients should use the factory method.  This constructor is public
+  // to allow for mock implementations.
+  Scorer();
+
  private:
   friend class PhishingScorerTest;
 
-  // Use factory method.
-  Scorer();
-
   // Computes the score for a given rule and feature map.  The score is computed
   // by multiplying the rule weight with the product of feature weights for the
   // given rule.  The feature weights are stored in the feature map.  If a
author	bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-11-19 20:01:00 +0000
committer	bryner@chromium.org <bryner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-11-19 20:01:00 +0000
commit	3ead1329dfad20de066dc5882df63b4ccc851608 (patch)
tree	b6d961150ff52d1b2afe888ad42e524209693cb0
parent	d843cd31b00d652198d49b099074db6db940833d (diff)
download	chromium_src-3ead1329dfad20de066dc5882df63b4ccc851608.zip chromium_src-3ead1329dfad20de066dc5882df63b4ccc851608.tar.gz chromium_src-3ead1329dfad20de066dc5882df63b4ccc851608.tar.bz2