// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" #include #include "base/bind.h" #include "base/callback.h" #include "base/lazy_instance.h" #include "base/logging.h" #include "base/metrics/histogram.h" #include "chrome/common/safe_browsing/csd.pb.h" #include "chrome/common/safe_browsing/safebrowsing_messages.h" #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/phishing_classifier.h" #include "chrome/renderer/safe_browsing/scorer.h" #include "content/public/renderer/document_state.h" #include "content/public/renderer/navigation_state.h" #include "content/public/renderer/render_thread.h" #include "content/public/renderer/render_view.h" #include "third_party/WebKit/public/platform/WebURL.h" #include "third_party/WebKit/public/web/WebDocument.h" #include "third_party/WebKit/public/web/WebFrame.h" #include "third_party/WebKit/public/web/WebView.h" using content::DocumentState; using content::NavigationState; using content::RenderThread; namespace safe_browsing { static GURL StripRef(const GURL& url) { GURL::Replacements replacements; replacements.ClearRef(); return url.ReplaceComponents(replacements); } typedef std::set PhishingClassifierDelegates; static base::LazyInstance g_delegates = LAZY_INSTANCE_INITIALIZER; static base::LazyInstance > g_phishing_scorer = LAZY_INSTANCE_INITIALIZER; // static PhishingClassifierFilter* PhishingClassifierFilter::Create() { // Private constructor and public static Create() method to facilitate // stubbing out this class for binary-size reduction purposes. return new PhishingClassifierFilter(); } PhishingClassifierFilter::PhishingClassifierFilter() : RenderProcessObserver() {} PhishingClassifierFilter::~PhishingClassifierFilter() {} bool PhishingClassifierFilter::OnControlMessageReceived( const IPC::Message& message) { bool handled = true; IPC_BEGIN_MESSAGE_MAP(PhishingClassifierFilter, message) IPC_MESSAGE_HANDLER(SafeBrowsingMsg_SetPhishingModel, OnSetPhishingModel) IPC_MESSAGE_UNHANDLED(handled = false) IPC_END_MESSAGE_MAP() return handled; } void PhishingClassifierFilter::OnSetPhishingModel(const std::string& model) { safe_browsing::Scorer* scorer = NULL; // An empty model string means we should disable client-side phishing // detection. if (!model.empty()) { scorer = safe_browsing::Scorer::Create(model); if (!scorer) { DLOG(ERROR) << "Unable to create a PhishingScorer - corrupt model?"; return; } } PhishingClassifierDelegates::iterator i; for (i = g_delegates.Get().begin(); i != g_delegates.Get().end(); ++i) { (*i)->SetPhishingScorer(scorer); } g_phishing_scorer.Get().reset(scorer); } // static PhishingClassifierDelegate* PhishingClassifierDelegate::Create( content::RenderView* render_view, PhishingClassifier* classifier) { // Private constructor and public static Create() method to facilitate // stubbing out this class for binary-size reduction purposes. return new PhishingClassifierDelegate(render_view, classifier); } PhishingClassifierDelegate::PhishingClassifierDelegate( content::RenderView* render_view, PhishingClassifier* classifier) : content::RenderViewObserver(render_view), last_main_frame_transition_(content::PAGE_TRANSITION_LINK), have_page_text_(false), is_classifying_(false) { g_delegates.Get().insert(this); if (!classifier) { classifier = new PhishingClassifier(render_view, new FeatureExtractorClock()); } classifier_.reset(classifier); if (g_phishing_scorer.Get().get()) SetPhishingScorer(g_phishing_scorer.Get().get()); } PhishingClassifierDelegate::~PhishingClassifierDelegate() { CancelPendingClassification(SHUTDOWN); g_delegates.Get().erase(this); } void PhishingClassifierDelegate::SetPhishingScorer( const safe_browsing::Scorer* scorer) { if (!render_view()->GetWebView()) return; // RenderView is tearing down. if (is_classifying_) { // If there is a classification going on right now it means we're // actually replacing an existing scorer with a new model. In // this case we simply cancel the current classification. // TODO(noelutz): if this happens too frequently we could also // replace the old scorer with the new one once classification is done // but this would complicate the code somewhat. CancelPendingClassification(NEW_PHISHING_SCORER); } classifier_->set_phishing_scorer(scorer); // Start classifying the current page if all conditions are met. // See MaybeStartClassification() for details. MaybeStartClassification(); } void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) { last_url_received_from_browser_ = StripRef(url); // Start classifying the current page if all conditions are met. // See MaybeStartClassification() for details. MaybeStartClassification(); } void PhishingClassifierDelegate::DidCommitProvisionalLoad( WebKit::WebFrame* frame, bool is_new_navigation) { // A new page is starting to load, so cancel classificaiton. // // TODO(bryner): We shouldn't need to cancel classification if the navigation // is within the same page. However, if we let classification continue in // this case, we need to properly deal with the fact that PageCaptured will // be called again for the in-page navigation. We need to be sure not to // swap out the page text while the term feature extractor is still running. DocumentState* document_state = DocumentState::FromDataSource( frame->dataSource()); NavigationState* navigation_state = document_state->navigation_state(); CancelPendingClassification(navigation_state->was_within_same_page() ? NAVIGATE_WITHIN_PAGE : NAVIGATE_AWAY); if (frame == render_view()->GetWebView()->mainFrame()) { last_main_frame_transition_ = navigation_state->transition_type(); } } void PhishingClassifierDelegate::PageCaptured(string16* page_text, bool preliminary_capture) { if (preliminary_capture) { return; } // Make sure there's no classification in progress. We don't want to swap // out the page text string from underneath the term feature extractor. // // Note: Currently, if the url hasn't changed, we won't restart // classification in this case. We may want to adjust this. CancelPendingClassification(PAGE_RECAPTURED); last_finished_load_url_ = GetToplevelUrl(); classifier_page_text_.swap(*page_text); have_page_text_ = true; MaybeStartClassification(); } void PhishingClassifierDelegate::CancelPendingClassification( CancelClassificationReason reason) { if (is_classifying_) { UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.CancelClassificationReason", reason, CANCEL_CLASSIFICATION_MAX); is_classifying_ = false; } if (classifier_->is_ready()) { classifier_->CancelPendingClassification(); } classifier_page_text_.clear(); have_page_text_ = false; } bool PhishingClassifierDelegate::OnMessageReceived( const IPC::Message& message) { bool handled = true; IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message) IPC_MESSAGE_HANDLER(SafeBrowsingMsg_StartPhishingDetection, OnStartPhishingDetection) IPC_MESSAGE_UNHANDLED(handled = false) IPC_END_MESSAGE_MAP() return handled; } void PhishingClassifierDelegate::ClassificationDone( const ClientPhishingRequest& verdict) { // We no longer need the page text. classifier_page_text_.clear(); VLOG(2) << "Phishy verdict = " << verdict.is_phishing() << " score = " << verdict.client_score(); if (verdict.client_score() != PhishingClassifier::kInvalidScore) { DCHECK_EQ(last_url_sent_to_classifier_.spec(), verdict.url()); RenderThread::Get()->Send(new SafeBrowsingHostMsg_PhishingDetectionDone( routing_id(), verdict.SerializeAsString())); } } GURL PhishingClassifierDelegate::GetToplevelUrl() { return render_view()->GetWebView()->mainFrame()->document().url(); } void PhishingClassifierDelegate::MaybeStartClassification() { // We can begin phishing classification when the following conditions are // met: // 1. A Scorer has been created // 2. The browser has sent a StartPhishingDetection message for the current // toplevel URL. // 3. The page has finished loading and the page text has been extracted. // 4. The load is a new navigation (not a session history navigation). // 5. The toplevel URL has not already been classified. // // Note that if we determine that this particular navigation should not be // classified at all (as opposed to deferring it until we get an IPC or the // load completes), we discard the page text since it won't be needed. if (!classifier_->is_ready()) { VLOG(2) << "Not starting classification, no Scorer created."; // Keep classifier_page_text_, in case a Scorer is set later. return; } if (last_main_frame_transition_ & content::PAGE_TRANSITION_FORWARD_BACK) { // Skip loads from session history navigation. However, update the // last URL sent to the classifier, so that we'll properly detect // in-page navigations. VLOG(2) << "Not starting classification for back/forward navigation"; last_url_sent_to_classifier_ = last_finished_load_url_; classifier_page_text_.clear(); // we won't need this. have_page_text_ = false; return; } GURL stripped_last_load_url(StripRef(last_finished_load_url_)); if (stripped_last_load_url == StripRef(last_url_sent_to_classifier_)) { // We've already classified this toplevel URL, so this was likely an // in-page navigation or a subframe navigation. The browser should not // send a StartPhishingDetection IPC in this case. VLOG(2) << "Toplevel URL is unchanged, not starting classification."; classifier_page_text_.clear(); // we won't need this. have_page_text_ = false; return; } if (!have_page_text_) { VLOG(2) << "Not starting classification, there is no page text ready."; return; } if (last_url_received_from_browser_ != stripped_last_load_url) { // The browser has not yet confirmed that this URL should be classified, // so defer classification for now. Note: the ref does not affect // any of the browser's preclassification checks, so we don't require it // to match. VLOG(2) << "Not starting classification, last url from browser is " << last_url_received_from_browser_ << ", last finished load is " << last_finished_load_url_; // Keep classifier_page_text_, in case the browser notifies us later that // we should classify the URL. return; } VLOG(2) << "Starting classification for " << last_finished_load_url_; last_url_sent_to_classifier_ = last_finished_load_url_; is_classifying_ = true; classifier_->BeginClassification( &classifier_page_text_, base::Bind(&PhishingClassifierDelegate::ClassificationDone, base::Unretained(this))); } } // namespace safe_browsing