diff options
author | jam@chromium.org <jam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-13 00:44:31 +0000 |
---|---|---|
committer | jam@chromium.org <jam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-04-13 00:44:31 +0000 |
commit | 93b9d6948d212b630a115dc4b77bbe6bf2b3b671 (patch) | |
tree | 92e02d514f2b44cdd2b7f82063ed3e91af3773bf /chrome/renderer | |
parent | 71504b42e176b37b9324897a53908df5ba6c723e (diff) | |
download | chromium_src-93b9d6948d212b630a115dc4b77bbe6bf2b3b671.zip chromium_src-93b9d6948d212b630a115dc4b77bbe6bf2b3b671.tar.gz chromium_src-93b9d6948d212b630a115dc4b77bbe6bf2b3b671.tar.bz2 |
Move code that talks to spelling and translate out of content. I create ChromeRenderObserver, which is a RenderViewObserver for the Chrome layer. Also, I added a TranslateTabHelper to hold the per-tab language data and moved LanguageState back to chrome since it's not used by content.
Review URL: http://codereview.chromium.org/6824068
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81341 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer')
-rw-r--r-- | chrome/renderer/autofill/autofill_agent.h | 5 | ||||
-rw-r--r-- | chrome/renderer/chrome_content_renderer_client.cc | 62 | ||||
-rw-r--r-- | chrome/renderer/chrome_content_renderer_client.h | 1 | ||||
-rw-r--r-- | chrome/renderer/chrome_render_observer.cc | 344 | ||||
-rw-r--r-- | chrome/renderer/chrome_render_observer.h | 79 | ||||
-rw-r--r-- | chrome/renderer/safe_browsing/phishing_classifier_delegate.cc | 4 | ||||
-rw-r--r-- | chrome/renderer/safe_browsing/phishing_classifier_delegate.h | 7 | ||||
-rw-r--r-- | chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc | 39 | ||||
-rw-r--r-- | chrome/renderer/translate_helper.cc | 53 | ||||
-rw-r--r-- | chrome/renderer/translate_helper.h | 38 | ||||
-rw-r--r-- | chrome/renderer/translate_helper_browsertest.cc | 2 |
11 files changed, 549 insertions, 85 deletions
diff --git a/chrome/renderer/autofill/autofill_agent.h b/chrome/renderer/autofill/autofill_agent.h index c3255fb..ba7dba9 100644 --- a/chrome/renderer/autofill/autofill_agent.h +++ b/chrome/renderer/autofill/autofill_agent.h @@ -38,6 +38,10 @@ class AutofillAgent : public RenderViewObserver, PasswordAutofillManager* password_autofill_manager); virtual ~AutofillAgent(); + // Called when the translate helper has finished translating the page. We + // use this signal to re-scan the page for forms. + void FrameTranslated(WebKit::WebFrame* frame); + // WebKit::WebAutoFillClient implementation. Public for tests. virtual void didAcceptAutoFillSuggestion(const WebKit::WebNode& node, const WebKit::WebString& value, @@ -71,7 +75,6 @@ class AutofillAgent : public RenderViewObserver, virtual void FrameWillClose(WebKit::WebFrame* frame); virtual void WillSubmitForm(WebKit::WebFrame* frame, const WebKit::WebFormElement& form); - virtual void FrameTranslated(WebKit::WebFrame* frame); // PageClickListener implementation: virtual bool InputElementClicked(const WebKit::WebInputElement& element, diff --git a/chrome/renderer/chrome_content_renderer_client.cc b/chrome/renderer/chrome_content_renderer_client.cc index 0703b7b..42cdbfc 100644 --- a/chrome/renderer/chrome_content_renderer_client.cc +++ b/chrome/renderer/chrome_content_renderer_client.cc @@ -9,7 +9,6 @@ #include "base/command_line.h" #include "base/metrics/histogram.h" #include "base/values.h" -#include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_switches.h" #include "chrome/common/extensions/extension.h" #include "chrome/common/extensions/extension_constants.h" @@ -22,6 +21,7 @@ #include "chrome/renderer/autofill/password_autofill_manager.h" #include "chrome/renderer/automation/automation_renderer_helper.h" #include "chrome/renderer/blocked_plugin.h" +#include "chrome/renderer/chrome_render_observer.h" #include "chrome/renderer/devtools_agent.h" #include "chrome/renderer/extensions/bindings_utils.h" #include "chrome/renderer/extensions/event_bindings.h" @@ -48,7 +48,6 @@ #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLError.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLRequest.h" -#include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" #include "ui/base/l10n/l10n_util.h" #include "ui/base/resource/resource_bundle.h" #include "webkit/plugins/npapi/plugin_list.h" @@ -93,7 +92,20 @@ static bool CrossesExtensionExtents(WebFrame* frame, const GURL& new_url) { namespace chrome { void ChromeContentRendererClient::RenderViewCreated(RenderView* render_view) { + safe_browsing::PhishingClassifierDelegate* phishing_classifier = NULL; +#ifndef OS_CHROMEOS + if (!CommandLine::ForCurrentProcess()->HasSwitch( + switches::kDisableClientSidePhishingDetection)) { + phishing_classifier = + new safe_browsing::PhishingClassifierDelegate(render_view, NULL); + } +#endif + new DevToolsAgent(render_view); + new ExtensionHelper(render_view); + new PrintWebViewHelper(render_view); + new SearchBox(render_view); + new safe_browsing::MalwareDOMDetails(render_view); PasswordAutofillManager* password_autofill_manager = new PasswordAutofillManager(render_view); @@ -106,23 +118,8 @@ void ChromeContentRendererClient::RenderViewCreated(RenderView* render_view) { page_click_tracker->AddListener(password_autofill_manager); page_click_tracker->AddListener(autofill_agent); - new TranslateHelper(render_view); - -#ifndef OS_CHROMEOS - if (!CommandLine::ForCurrentProcess()->HasSwitch( - switches::kDisableClientSidePhishingDetection)) { - new safe_browsing::PhishingClassifierDelegate(render_view, NULL); - } -#endif - - // Observer for Malware DOM details messages. - new safe_browsing::MalwareDOMDetails(render_view); - - new ExtensionHelper(render_view); - - new PrintWebViewHelper(render_view); - - new SearchBox(render_view); + TranslateHelper* translate = new TranslateHelper(render_view, autofill_agent); + new ChromeRenderObserver(render_view, translate, phishing_classifier); // Used only for testing/automation. if (CommandLine::ForCurrentProcess()->HasSwitch( @@ -312,33 +309,6 @@ std::string ChromeContentRendererClient::GetNavigationErrorHtml( return html; } -// Returns the ISO 639_1 language code of the specified |text|, or 'unknown' -// if it failed. -std::string ChromeContentRendererClient::DetermineTextLanguage( - const string16& text) { - std::string language = chrome::kUnknownLanguageCode; - int num_languages = 0; - int text_bytes = 0; - bool is_reliable = false; - Language cld_language = - DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, - &num_languages, NULL, &text_bytes); - // We don't trust the result if the CLD reports that the detection is not - // reliable, or if the actual text used to detect the language was less than - // 100 bytes (short texts can often lead to wrong results). - if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && - cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { - // We should not use LanguageCode_ISO_639_1 because it does not cover all - // the languages CLD can detect. As a result, it'll return the invalid - // language code for tradtional Chinese among others. - // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and - // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN - // for Simplified Chinese. - language = LanguageCodeWithDialects(cld_language); - } - return language; -} - bool ChromeContentRendererClient::RunIdleHandlerWhenWidgetsHidden() { return !ExtensionDispatcher::Get()->is_extension_process(); } diff --git a/chrome/renderer/chrome_content_renderer_client.h b/chrome/renderer/chrome_content_renderer_client.h index d69d638..ce6bd04 100644 --- a/chrome/renderer/chrome_content_renderer_client.h +++ b/chrome/renderer/chrome_content_renderer_client.h @@ -28,7 +28,6 @@ class ChromeContentRendererClient : public content::ContentRendererClient { virtual std::string GetNavigationErrorHtml( const WebKit::WebURLRequest& failed_request, const WebKit::WebURLError& error); - virtual std::string DetermineTextLanguage(const string16& text); virtual bool RunIdleHandlerWhenWidgetsHidden(); virtual bool AllowPopup(const GURL& creator); virtual bool ShouldFork(WebKit::WebFrame* frame, diff --git a/chrome/renderer/chrome_render_observer.cc b/chrome/renderer/chrome_render_observer.cc new file mode 100644 index 0000000..ef89792 --- /dev/null +++ b/chrome/renderer/chrome_render_observer.cc @@ -0,0 +1,344 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/renderer/chrome_render_observer.h" + +#include "base/command_line.h" +#include "base/metrics/histogram.h" +#include "chrome/common/chrome_switches.h" +#include "chrome/common/render_messages.h" +#include "chrome/common/thumbnail_score.h" +#include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" +#include "chrome/renderer/translate_helper.h" +#include "content/renderer/content_renderer_client.h" +#include "content/renderer/render_view.h" +#include "skia/ext/bitmap_platform_device.h" +#include "skia/ext/image_operations.h" +#include "third_party/WebKit/Source/WebKit/chromium/public/WebDataSource.h" +#include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" +#include "third_party/WebKit/Source/WebKit/chromium/public/WebRect.h" +#include "third_party/WebKit/Source/WebKit/chromium/public/WebSize.h" +#include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" +#include "ui/gfx/color_utils.h" +#include "ui/gfx/skbitmap_operations.h" +#include "webkit/glue/webkit_glue.h" + +using WebKit::WebDataSource; +using WebKit::WebFrame; +using WebKit::WebRect; +using WebKit::WebSize; +using WebKit::WebView; + +// Delay in milliseconds that we'll wait before capturing the page contents +// and thumbnail. +static const int kDelayForCaptureMs = 500; + +// Typically, we capture the page data once the page is loaded. +// Sometimes, the page never finishes to load, preventing the page capture +// To workaround this problem, we always perform a capture after the following +// delay. +static const int kDelayForForcedCaptureMs = 6000; + +// define to write the time necessary for thumbnail/DOM text retrieval, +// respectively, into the system debug log +// #define TIME_TEXT_RETRIEVAL + +// maximum number of characters in the document to index, any text beyond this +// point will be clipped +static const size_t kMaxIndexChars = 65535; + +// Size of the thumbnails that we'll generate +static const int kThumbnailWidth = 212; +static const int kThumbnailHeight = 132; + +static bool PaintViewIntoCanvas(WebView* view, + skia::PlatformCanvas& canvas) { + view->layout(); + const WebSize& size = view->size(); + + if (!canvas.initialize(size.width, size.height, true)) + return false; + + view->paint(webkit_glue::ToWebCanvas(&canvas), + WebRect(0, 0, size.width, size.height)); + // TODO: Add a way to snapshot the whole page, not just the currently + // visible part. + + return true; +} + +// Calculates how "boring" a thumbnail is. The boring score is the +// 0,1 ranged percentage of pixels that are the most common +// luma. Higher boring scores indicate that a higher percentage of a +// bitmap are all the same brightness. +static double CalculateBoringScore(SkBitmap* bitmap) { + int histogram[256] = {0}; + color_utils::BuildLumaHistogram(bitmap, histogram); + + int color_count = *std::max_element(histogram, histogram + 256); + int pixel_count = bitmap->width() * bitmap->height(); + return static_cast<double>(color_count) / pixel_count; +} + +ChromeRenderObserver::ChromeRenderObserver( + RenderView* render_view, + TranslateHelper* translate_helper, + safe_browsing::PhishingClassifierDelegate* phishing_classifier) + : RenderViewObserver(render_view), + translate_helper_(translate_helper), + phishing_classifier_(phishing_classifier), + last_indexed_page_id_(-1), + ALLOW_THIS_IN_INITIALIZER_LIST(page_info_method_factory_(this)) { +} + +ChromeRenderObserver::~ChromeRenderObserver() { +} + +bool ChromeRenderObserver::OnMessageReceived(const IPC::Message& message) { + bool handled = true; + IPC_BEGIN_MESSAGE_MAP(ChromeRenderObserver, message) + IPC_MESSAGE_HANDLER(ViewMsg_CaptureSnapshot, OnCaptureSnapshot) + IPC_MESSAGE_UNHANDLED(handled = false) + IPC_END_MESSAGE_MAP() + return handled; +} + +void ChromeRenderObserver::OnCaptureSnapshot() { + SkBitmap snapshot; + bool error = false; + + WebFrame* main_frame = render_view()->webview()->mainFrame(); + if (!main_frame) + error = true; + + if (!error && !CaptureSnapshot(render_view()->webview(), &snapshot)) + error = true; + + DCHECK(error == snapshot.empty()) << + "Snapshot should be empty on error, non-empty otherwise."; + + // Send the snapshot to the browser process. + Send(new ViewHostMsg_Snapshot(routing_id(), snapshot)); +} + +void ChromeRenderObserver::DidStopLoading() { + MessageLoop::current()->PostDelayedTask( + FROM_HERE, + page_info_method_factory_.NewRunnableMethod( + &ChromeRenderObserver::CapturePageInfo, render_view()->page_id(), + false), + render_view()->content_state_immediately() ? 0 : kDelayForCaptureMs); +} + +void ChromeRenderObserver::DidCommitProvisionalLoad(WebFrame* frame, + bool is_new_navigation) { + if (!is_new_navigation) + return; + + MessageLoop::current()->PostDelayedTask( + FROM_HERE, + page_info_method_factory_.NewRunnableMethod( + &ChromeRenderObserver::CapturePageInfo, render_view()->page_id(), + true), + kDelayForForcedCaptureMs); +} + +void ChromeRenderObserver::CapturePageInfo(int load_id, + bool preliminary_capture) { + if (load_id != render_view()->page_id()) + return; // This capture call is no longer relevant due to navigation. + + if (load_id == last_indexed_page_id_) + return; // we already indexed this page + + if (!render_view()->webview()) + return; + + WebFrame* main_frame = render_view()->webview()->mainFrame(); + if (!main_frame) + return; + + // Don't index/capture pages that are in view source mode. + if (main_frame->isViewSourceModeEnabled()) + return; + + // Don't index/capture pages that failed to load. This only checks the top + // level frame so the thumbnail may contain a frame that failed to load. + WebDataSource* ds = main_frame->dataSource(); + if (ds && ds->hasUnreachableURL()) + return; + + if (!preliminary_capture) + last_indexed_page_id_ = load_id; + + // Get the URL for this page. + GURL url(main_frame->url()); + if (url.is_empty()) + return; + + // Retrieve the frame's full text. + string16 contents; + CaptureText(main_frame, &contents); + if (contents.size()) { + if (translate_helper_) + translate_helper_->PageCaptured(contents); + // Send the text to the browser for indexing (the browser might decide not + // to index, if the URL is HTTPS for instance) and language discovery. + Send(new ViewHostMsg_PageContents(routing_id(), url, load_id, contents)); + } + + // Generate the thumbnail here if the in-browser thumbnailing isn't + // enabled. TODO(satorux): Remove this and related code once + // crbug.com/65936 is complete. + if (!CommandLine::ForCurrentProcess()->HasSwitch( + switches::kEnableInBrowserThumbnailing)) { + CaptureThumbnail(); + } + + // Will swap out the string. + if (phishing_classifier_) + phishing_classifier_->PageCaptured(&contents, preliminary_capture); +} + +void ChromeRenderObserver::CaptureText(WebFrame* frame, string16* contents) { + contents->clear(); + if (!frame) + return; + +#ifdef TIME_TEXT_RETRIEVAL + double begin = time_util::GetHighResolutionTimeNow(); +#endif + + // get the contents of the frame + *contents = frame->contentAsText(kMaxIndexChars); + +#ifdef TIME_TEXT_RETRIEVAL + double end = time_util::GetHighResolutionTimeNow(); + char buf[128]; + sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", + contents.size(), (end - begin)*1000); + OutputDebugStringA(buf); +#endif + + // When the contents are clipped to the maximum, we don't want to have a + // partial word indexed at the end that might have been clipped. Therefore, + // terminate the string at the last space to ensure no words are clipped. + if (contents->size() == kMaxIndexChars) { + size_t last_space_index = contents->find_last_of(kWhitespaceUTF16); + if (last_space_index == std::wstring::npos) + return; // don't index if we got a huge block of text with no spaces + contents->resize(last_space_index); + } +} + +void ChromeRenderObserver::CaptureThumbnail() { + WebFrame* main_frame = render_view()->webview()->mainFrame(); + if (!main_frame) + return; + + // get the URL for this page + GURL url(main_frame->url()); + if (url.is_empty()) + return; + + if (render_view()->size().IsEmpty()) + return; // Don't create an empty thumbnail! + + ThumbnailScore score; + SkBitmap thumbnail; + if (!CaptureFrameThumbnail(render_view()->webview(), kThumbnailWidth, + kThumbnailHeight, &thumbnail, &score)) + return; + + // send the thumbnail message to the browser process + Send(new ViewHostMsg_Thumbnail(routing_id(), url, score, thumbnail)); +} + +bool ChromeRenderObserver::CaptureFrameThumbnail(WebView* view, + int w, + int h, + SkBitmap* thumbnail, + ThumbnailScore* score) { + base::TimeTicks beginning_time = base::TimeTicks::Now(); + + skia::PlatformCanvas canvas; + + // Paint |view| into |canvas|. + if (!PaintViewIntoCanvas(view, canvas)) + return false; + + skia::BitmapPlatformDevice& device = + static_cast<skia::BitmapPlatformDevice&>(canvas.getTopPlatformDevice()); + + const SkBitmap& src_bmp = device.accessBitmap(false); + + SkRect dest_rect = { 0, 0, SkIntToScalar(w), SkIntToScalar(h) }; + float dest_aspect = dest_rect.width() / dest_rect.height(); + + // Get the src rect so that we can preserve the aspect ratio while filling + // the destination. + SkIRect src_rect; + if (src_bmp.width() < dest_rect.width() || + src_bmp.height() < dest_rect.height()) { + // Source image is smaller: we clip the part of source image within the + // dest rect, and then stretch it to fill the dest rect. We don't respect + // the aspect ratio in this case. + src_rect.set(0, 0, static_cast<S16CPU>(dest_rect.width()), + static_cast<S16CPU>(dest_rect.height())); + score->good_clipping = false; + } else { + float src_aspect = static_cast<float>(src_bmp.width()) / src_bmp.height(); + if (src_aspect > dest_aspect) { + // Wider than tall, clip horizontally: we center the smaller thumbnail in + // the wider screen. + S16CPU new_width = static_cast<S16CPU>(src_bmp.height() * dest_aspect); + S16CPU x_offset = (src_bmp.width() - new_width) / 2; + src_rect.set(x_offset, 0, new_width + x_offset, src_bmp.height()); + score->good_clipping = false; + } else { + src_rect.set(0, 0, src_bmp.width(), + static_cast<S16CPU>(src_bmp.width() / dest_aspect)); + score->good_clipping = true; + } + } + + score->at_top = (view->mainFrame()->scrollOffset().height == 0); + + SkBitmap subset; + device.accessBitmap(false).extractSubset(&subset, src_rect); + + // First do a fast downsample by powers of two to get close to the final size. + SkBitmap downsampled_subset = + SkBitmapOperations::DownsampleByTwoUntilSize(subset, w, h); + + // Do a high-quality resize from the downscaled size to the final size. + *thumbnail = skia::ImageOperations::Resize( + downsampled_subset, skia::ImageOperations::RESIZE_LANCZOS3, w, h); + + score->boring_score = CalculateBoringScore(thumbnail); + + HISTOGRAM_TIMES("Renderer4.Thumbnail", + base::TimeTicks::Now() - beginning_time); + + return true; +} + +bool ChromeRenderObserver::CaptureSnapshot(WebView* view, SkBitmap* snapshot) { + base::TimeTicks beginning_time = base::TimeTicks::Now(); + + skia::PlatformCanvas canvas; + if (!PaintViewIntoCanvas(view, canvas)) + return false; + + skia::BitmapPlatformDevice& device = + static_cast<skia::BitmapPlatformDevice&>(canvas.getTopPlatformDevice()); + + const SkBitmap& bitmap = device.accessBitmap(false); + if (!bitmap.copyTo(snapshot, SkBitmap::kARGB_8888_Config)) + return false; + + HISTOGRAM_TIMES("Renderer4.Snapshot", + base::TimeTicks::Now() - beginning_time); + return true; +} diff --git a/chrome/renderer/chrome_render_observer.h b/chrome/renderer/chrome_render_observer.h new file mode 100644 index 0000000..41290f2 --- /dev/null +++ b/chrome/renderer/chrome_render_observer.h @@ -0,0 +1,79 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_RENDERER_CHROME_RENDER_OBSERVER_H_ +#define CHROME_RENDERER_CHROME_RENDER_OBSERVER_H_ +#pragma once + +#include "base/task.h" +#include "content/renderer/render_view_observer.h" + +class SkBitmap; +class TranslateHelper; +struct ThumbnailScore; + +namespace WebKit { +class WebView; +} + +namespace safe_browsing { +class PhishingClassifierDelegate; +} + +// This class holds the Chrome specific parts of RenderView, and has the same +// lifetime. +class ChromeRenderObserver : public RenderViewObserver { + public: + // translate_helper and/or phishing_classifier can be NULL. + ChromeRenderObserver( + RenderView* render_view, + TranslateHelper* translate_helper, + safe_browsing::PhishingClassifierDelegate* phishing_classifier); + virtual ~ChromeRenderObserver(); + + private: + // RenderViewObserver implementation. + virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE; + virtual void DidStopLoading() OVERRIDE; + virtual void DidCommitProvisionalLoad(WebKit::WebFrame* frame, + bool is_new_navigation) OVERRIDE; + + void OnCaptureSnapshot(); + + // Captures the thumbnail and text contents for indexing for the given load + // ID. If the view's load ID is different than the parameter, this call is + // a NOP. Typically called on a timer, so the load ID may have changed in the + // meantime. + void CapturePageInfo(int load_id, bool preliminary_capture); + + // Retrieves the text from the given frame contents, the page text up to the + // maximum amount kMaxIndexChars will be placed into the given buffer. + void CaptureText(WebKit::WebFrame* frame, string16* contents); + + void CaptureThumbnail(); + + // Creates a thumbnail of |frame|'s contents resized to (|w|, |h|) + // and puts that in |thumbnail|. Thumbnail metadata goes in |score|. + bool CaptureFrameThumbnail(WebKit::WebView* view, int w, int h, + SkBitmap* thumbnail, + ThumbnailScore* score); + + // Capture a snapshot of a view. This is used to allow an extension + // to get a snapshot of a tab using chrome.tabs.captureVisibleTab(). + bool CaptureSnapshot(WebKit::WebView* view, SkBitmap* snapshot); + + // Has the same lifetime as us. + TranslateHelper* translate_helper_; + safe_browsing::PhishingClassifierDelegate* phishing_classifier_; + + // Page_id from the last page we indexed. This prevents us from indexing the + // same page twice in a row. + int32 last_indexed_page_id_; + + ScopedRunnableMethodFactory<ChromeRenderObserver> page_info_method_factory_; + + DISALLOW_COPY_AND_ASSIGN(ChromeRenderObserver); +}; + +#endif // CHROME_RENDERER_CHROME_OBSERVER_H_ diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc index d9c3174..9789920e 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc @@ -138,14 +138,14 @@ void PhishingClassifierDelegate::DidCommitProvisionalLoad( CancelPendingClassification(); } -void PhishingClassifierDelegate::PageCaptured(const string16& page_text, +void PhishingClassifierDelegate::PageCaptured(string16* page_text, bool preliminary_capture) { if (preliminary_capture) { return; } last_finished_load_id_ = render_view()->page_id(); last_finished_load_url_ = GetToplevelUrl(); - classifier_page_text_ = page_text; + classifier_page_text_.swap(*page_text); MaybeStartClassification(); } diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h index 04309ba..3e3a09e 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h @@ -33,15 +33,14 @@ class PhishingClassifierDelegate : public RenderViewObserver { // The scorer is passed on to the classifier. void SetPhishingScorer(const safe_browsing::Scorer* scorer); - // RenderViewObserver implementation, public for testing. - // Called by the RenderView once a page has finished loading. Updates the // last-loaded URL and page id, then starts classification if all other // conditions are met (see MaybeStartClassification for details). // We ignore preliminary captures, since these happen before the page has // finished loading. - virtual void PageCaptured(const string16& page_text, - bool preliminary_capture); + void PageCaptured(string16* page_text, bool preliminary_capture); + + // RenderViewObserver implementation, public for testing. // Called by the RenderView when a page has started loading in the given // WebFrame. Typically, this will cause any pending classification to be diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc index 305e907..90578a4 100644 --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc @@ -118,7 +118,7 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) { string16 page_text = ASCIIToUTF16("dummy"); EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)). WillOnce(DeleteArg<1>()); - delegate->PageCaptured(page_text, false); + delegate->PageCaptured(&page_text, false); Mock::VerifyAndClearExpectations(classifier); // Reloading the same page should not trigger a reclassification. @@ -129,7 +129,8 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) { message_loop_.Run(); Mock::VerifyAndClearExpectations(classifier); OnStartPhishingDetection(delegate, GURL("http://host.com/")); - delegate->PageCaptured(page_text, false); + page_text = ASCIIToUTF16("dummy"); + delegate->PageCaptured(&page_text, false); // Navigating in a subframe will increment the page id, but not change // the toplevel URL. This should cancel pending classification since the @@ -140,7 +141,8 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) { message_loop_.Run(); Mock::VerifyAndClearExpectations(classifier); OnStartPhishingDetection(delegate, GURL("http://host.com/")); - delegate->PageCaptured(page_text, false); + page_text = ASCIIToUTF16("dummy"); + delegate->PageCaptured(&page_text, false); // Scrolling to an anchor will increment the page id, but should not // not trigger a reclassification. Currently, a pending classification will @@ -148,7 +150,8 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) { EXPECT_CALL(*classifier, CancelPendingClassification()); LoadURL("http://host.com/#foo"); OnStartPhishingDetection(delegate, GURL("http://host.com/#foo")); - delegate->PageCaptured(page_text, false); + page_text = ASCIIToUTF16("dummy"); + delegate->PageCaptured(&page_text, false); // Now load a new toplevel page, which should trigger another classification. EXPECT_CALL(*classifier, CancelPendingClassification()); @@ -158,7 +161,7 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) { EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)). WillOnce(DeleteArg<1>()); OnStartPhishingDetection(delegate, GURL("http://host2.com/")); - delegate->PageCaptured(page_text, false); + delegate->PageCaptured(&page_text, false); Mock::VerifyAndClearExpectations(classifier); // No classification should happen on back/forward navigation. @@ -170,7 +173,7 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) { Mock::VerifyAndClearExpectations(classifier); page_text = ASCIIToUTF16("dummy"); OnStartPhishingDetection(delegate, GURL("http://host.com/#foo")); - delegate->PageCaptured(page_text, false); + delegate->PageCaptured(&page_text, false); // The delegate will cancel pending classification on destruction. EXPECT_CALL(*classifier, CancelPendingClassification()); @@ -188,15 +191,16 @@ TEST_F(PhishingClassifierDelegateTest, NoScorer) { LoadURL("http://host.com/"); string16 page_text = ASCIIToUTF16("dummy"); OnStartPhishingDetection(delegate, GURL("http://host.com/")); - delegate->PageCaptured(page_text, false); + delegate->PageCaptured(&page_text, false); LoadURL("http://host2.com/"); page_text = ASCIIToUTF16("dummy2"); OnStartPhishingDetection(delegate, GURL("http://host2.com/")); - delegate->PageCaptured(page_text, false); + delegate->PageCaptured(&page_text, false); // Now set a scorer, which should cause a classifier to be created and // the classification to proceed. + page_text = ASCIIToUTF16("dummy2"); EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)). WillOnce(DeleteArg<1>()); MockScorer scorer; @@ -220,14 +224,16 @@ TEST_F(PhishingClassifierDelegateTest, NoScorer_Ref) { LoadURL("http://host.com/"); string16 page_text = ASCIIToUTF16("dummy"); OnStartPhishingDetection(delegate, GURL("http://host.com/")); - delegate->PageCaptured(page_text, false); + delegate->PageCaptured(&page_text, false); LoadURL("http://host.com/#foo"); OnStartPhishingDetection(delegate, GURL("http://host.com/#foo")); - delegate->PageCaptured(page_text, false); + page_text = ASCIIToUTF16("dummy"); + delegate->PageCaptured(&page_text, false); // Now set a scorer, which should cause a classifier to be created and // the classification to proceed. + page_text = ASCIIToUTF16("dummy"); EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)). WillOnce(DeleteArg<1>()); MockScorer scorer; @@ -254,9 +260,10 @@ TEST_F(PhishingClassifierDelegateTest, NoStartPhishingDetection) { LoadURL("http://host.com/"); Mock::VerifyAndClearExpectations(classifier); string16 page_text = ASCIIToUTF16("phish"); - delegate->PageCaptured(page_text, false); + delegate->PageCaptured(&page_text, false); // Now simulate the StartPhishingDetection IPC. We expect classification // to begin. + page_text = ASCIIToUTF16("phish"); EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)). WillOnce(DeleteArg<1>()); OnStartPhishingDetection(delegate, GURL("http://host.com/")); @@ -267,7 +274,8 @@ TEST_F(PhishingClassifierDelegateTest, NoStartPhishingDetection) { EXPECT_CALL(*classifier, CancelPendingClassification()); responses_["http://host2.com/"] = "<html><body>phish</body></html>"; LoadURL("http://host2.com/"); - delegate->PageCaptured(page_text, false); + page_text = ASCIIToUTF16("phish"); + delegate->PageCaptured(&page_text, false); EXPECT_CALL(*classifier, CancelPendingClassification()); responses_["http://host3.com/"] = "<html><body>phish</body></html>"; @@ -295,12 +303,13 @@ TEST_F(PhishingClassifierDelegateTest, IgnorePreliminaryCapture) { Mock::VerifyAndClearExpectations(classifier); OnStartPhishingDetection(delegate, GURL("http://host.com/")); string16 page_text = ASCIIToUTF16("phish"); - delegate->PageCaptured(page_text, true); + delegate->PageCaptured(&page_text, true); // Once the non-preliminary capture happens, classification should begin. + page_text = ASCIIToUTF16("phish"); EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)). WillOnce(DeleteArg<1>()); - delegate->PageCaptured(page_text, false); + delegate->PageCaptured(&page_text, false); Mock::VerifyAndClearExpectations(classifier); // The delegate will cancel pending classification on destruction. @@ -327,7 +336,7 @@ TEST_F(PhishingClassifierDelegateTest, DetectedPhishingSite) { EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)). WillOnce(DeleteArg<1>()); OnStartPhishingDetection(delegate, GURL("http://host.com/#a")); - delegate->PageCaptured(page_text, false); + delegate->PageCaptured(&page_text, false); Mock::VerifyAndClearExpectations(classifier); // Now run the callback to simulate the classifier finishing. diff --git a/chrome/renderer/translate_helper.cc b/chrome/renderer/translate_helper.cc index 31833b6..4d4fed1 100644 --- a/chrome/renderer/translate_helper.cc +++ b/chrome/renderer/translate_helper.cc @@ -5,14 +5,18 @@ #include "chrome/renderer/translate_helper.h" #include "base/compiler_specific.h" +#include "base/metrics/histogram.h" #include "base/utf_string_conversions.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/render_messages.h" +#include "chrome/renderer/autofill/autofill_agent.h" #include "content/renderer/render_view.h" +#include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" +#include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" #include "v8/include/v8.h" #include "webkit/glue/dom_operations.h" @@ -41,16 +45,34 @@ static const char* const kAutoDetectionLanguage = "auto"; //////////////////////////////////////////////////////////////////////////////// // TranslateHelper, public: // -TranslateHelper::TranslateHelper(RenderView* render_view) +TranslateHelper::TranslateHelper(RenderView* render_view, + autofill::AutofillAgent* autofill) : RenderViewObserver(render_view), translation_pending_(false), page_id_(-1), + autofill_(autofill), ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { } TranslateHelper::~TranslateHelper() { } +void TranslateHelper::PageCaptured(const string16& contents) { + WebDocument document = render_view()->webview()->mainFrame()->document(); + // If the page explicitly specifies a language, use it, otherwise we'll + // determine it based on the text content using the CLD. + std::string language = GetPageLanguageFromMetaTag(&document); + if (language.empty()) { + base::TimeTicks begin_time = base::TimeTicks::Now(); + language = DetermineTextLanguage(contents); + UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", + base::TimeTicks::Now() - begin_time); + } + + Send(new ViewHostMsg_TranslateLanguageDetermined( + routing_id(), language, IsPageTranslatable(&document))); +} + void TranslateHelper::CancelPendingTranslation() { method_factory_.RevokeAll(); translation_pending_ = false; @@ -113,6 +135,31 @@ std::string TranslateHelper::GetPageLanguageFromMetaTag(WebDocument* document) { return language; } +// static +std::string TranslateHelper::DetermineTextLanguage(const string16& text) { + std::string language = chrome::kUnknownLanguageCode; + int num_languages = 0; + int text_bytes = 0; + bool is_reliable = false; + Language cld_language = + DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, + &num_languages, NULL, &text_bytes); + // We don't trust the result if the CLD reports that the detection is not + // reliable, or if the actual text used to detect the language was less than + // 100 bytes (short texts can often lead to wrong results). + if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && + cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { + // We should not use LanguageCode_ISO_639_1 because it does not cover all + // the languages CLD can detect. As a result, it'll return the invalid + // language code for tradtional Chinese among others. + // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and + // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN + // for Simplified Chinese. + language = LanguageCodeWithDialects(cld_language); + } + return language; +} + //////////////////////////////////////////////////////////////////////////////// // TranslateHelper, protected: // @@ -284,8 +331,8 @@ void TranslateHelper::CheckTranslateStatus() { translation_pending_ = false; - // Notify the renderer we are done. - render_view()->OnPageTranslated(); + if (autofill_) + autofill_->FrameTranslated(render_view()->webview()->mainFrame()); // Notify the browser we are done. render_view()->Send(new ViewHostMsg_PageTranslated( diff --git a/chrome/renderer/translate_helper.h b/chrome/renderer/translate_helper.h index 54a4706..902f0bc 100644 --- a/chrome/renderer/translate_helper.h +++ b/chrome/renderer/translate_helper.h @@ -17,25 +17,21 @@ class WebDocument; class WebFrame; } +namespace autofill { +class AutofillAgent; +} + // This class deals with page translation. // There is one TranslateHelper per RenderView. class TranslateHelper : public RenderViewObserver { public: - explicit TranslateHelper(RenderView* render_view); + // autofill can be NULL. + TranslateHelper(RenderView* render_view, autofill::AutofillAgent* autofill); virtual ~TranslateHelper(); - // Returns whether the page associated with |document| is a candidate for - // translation. Some pages can explictly specify (via a meta-tag) that they - // should not be translated. - static bool IsPageTranslatable(WebKit::WebDocument* document); - - // Returns the language specified in the language meta tag of |document|, or - // an empty string if no such tag was found. - // The tag may specify several languages, the first one is returned. - // Example of such meta-tag: - // <meta http-equiv="content-language" content="en, fr"> - static std::string GetPageLanguageFromMetaTag(WebKit::WebDocument* document); + // Informs us that the page's text has been extracted. + void PageCaptured(const string16& contents); protected: // The following methods are protected so they can be overridden in @@ -75,6 +71,22 @@ class TranslateHelper : public RenderViewObserver { virtual bool DontDelayTasks(); private: + // Returns whether the page associated with |document| is a candidate for + // translation. Some pages can explictly specify (via a meta-tag) that they + // should not be translated. + static bool IsPageTranslatable(WebKit::WebDocument* document); + + // Returns the language specified in the language meta tag of |document|, or + // an empty string if no such tag was found. + // The tag may specify several languages, the first one is returned. + // Example of such meta-tag: + // <meta http-equiv="content-language" content="en, fr"> + static std::string GetPageLanguageFromMetaTag(WebKit::WebDocument* document); + + // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' + // if it failed. + static std::string DetermineTextLanguage(const string16& text); + // RenderViewObserver implementation. virtual bool OnMessageReceived(const IPC::Message& message); @@ -123,6 +135,8 @@ class TranslateHelper : public RenderViewObserver { std::string source_lang_; std::string target_lang_; + autofill::AutofillAgent* autofill_; + // Method factory used to make calls to TranslatePageImpl. ScopedRunnableMethodFactory<TranslateHelper> method_factory_; diff --git a/chrome/renderer/translate_helper_browsertest.cc b/chrome/renderer/translate_helper_browsertest.cc index 067d939..9298305 100644 --- a/chrome/renderer/translate_helper_browsertest.cc +++ b/chrome/renderer/translate_helper_browsertest.cc @@ -15,7 +15,7 @@ using testing::Return; class TestTranslateHelper : public TranslateHelper { public: explicit TestTranslateHelper(RenderView* render_view) - : TranslateHelper(render_view) { + : TranslateHelper(render_view, NULL) { } virtual bool DontDelayTasks() { return true; } |