summaryrefslogtreecommitdiffstats
path: root/chrome/renderer
diff options
context:
space:
mode:
authorjam@chromium.org <jam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-13 00:44:31 +0000
committerjam@chromium.org <jam@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-13 00:44:31 +0000
commit93b9d6948d212b630a115dc4b77bbe6bf2b3b671 (patch)
tree92e02d514f2b44cdd2b7f82063ed3e91af3773bf /chrome/renderer
parent71504b42e176b37b9324897a53908df5ba6c723e (diff)
downloadchromium_src-93b9d6948d212b630a115dc4b77bbe6bf2b3b671.zip
chromium_src-93b9d6948d212b630a115dc4b77bbe6bf2b3b671.tar.gz
chromium_src-93b9d6948d212b630a115dc4b77bbe6bf2b3b671.tar.bz2
Move code that talks to spelling and translate out of content. I create ChromeRenderObserver, which is a RenderViewObserver for the Chrome layer. Also, I added a TranslateTabHelper to hold the per-tab language data and moved LanguageState back to chrome since it's not used by content.
Review URL: http://codereview.chromium.org/6824068 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81341 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer')
-rw-r--r--chrome/renderer/autofill/autofill_agent.h5
-rw-r--r--chrome/renderer/chrome_content_renderer_client.cc62
-rw-r--r--chrome/renderer/chrome_content_renderer_client.h1
-rw-r--r--chrome/renderer/chrome_render_observer.cc344
-rw-r--r--chrome/renderer/chrome_render_observer.h79
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier_delegate.cc4
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier_delegate.h7
-rw-r--r--chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc39
-rw-r--r--chrome/renderer/translate_helper.cc53
-rw-r--r--chrome/renderer/translate_helper.h38
-rw-r--r--chrome/renderer/translate_helper_browsertest.cc2
11 files changed, 549 insertions, 85 deletions
diff --git a/chrome/renderer/autofill/autofill_agent.h b/chrome/renderer/autofill/autofill_agent.h
index c3255fb..ba7dba9 100644
--- a/chrome/renderer/autofill/autofill_agent.h
+++ b/chrome/renderer/autofill/autofill_agent.h
@@ -38,6 +38,10 @@ class AutofillAgent : public RenderViewObserver,
PasswordAutofillManager* password_autofill_manager);
virtual ~AutofillAgent();
+ // Called when the translate helper has finished translating the page. We
+ // use this signal to re-scan the page for forms.
+ void FrameTranslated(WebKit::WebFrame* frame);
+
// WebKit::WebAutoFillClient implementation. Public for tests.
virtual void didAcceptAutoFillSuggestion(const WebKit::WebNode& node,
const WebKit::WebString& value,
@@ -71,7 +75,6 @@ class AutofillAgent : public RenderViewObserver,
virtual void FrameWillClose(WebKit::WebFrame* frame);
virtual void WillSubmitForm(WebKit::WebFrame* frame,
const WebKit::WebFormElement& form);
- virtual void FrameTranslated(WebKit::WebFrame* frame);
// PageClickListener implementation:
virtual bool InputElementClicked(const WebKit::WebInputElement& element,
diff --git a/chrome/renderer/chrome_content_renderer_client.cc b/chrome/renderer/chrome_content_renderer_client.cc
index 0703b7b..42cdbfc 100644
--- a/chrome/renderer/chrome_content_renderer_client.cc
+++ b/chrome/renderer/chrome_content_renderer_client.cc
@@ -9,7 +9,6 @@
#include "base/command_line.h"
#include "base/metrics/histogram.h"
#include "base/values.h"
-#include "chrome/common/chrome_constants.h"
#include "chrome/common/chrome_switches.h"
#include "chrome/common/extensions/extension.h"
#include "chrome/common/extensions/extension_constants.h"
@@ -22,6 +21,7 @@
#include "chrome/renderer/autofill/password_autofill_manager.h"
#include "chrome/renderer/automation/automation_renderer_helper.h"
#include "chrome/renderer/blocked_plugin.h"
+#include "chrome/renderer/chrome_render_observer.h"
#include "chrome/renderer/devtools_agent.h"
#include "chrome/renderer/extensions/bindings_utils.h"
#include "chrome/renderer/extensions/event_bindings.h"
@@ -48,7 +48,6 @@
#include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebURLError.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebURLRequest.h"
-#include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"
#include "ui/base/l10n/l10n_util.h"
#include "ui/base/resource/resource_bundle.h"
#include "webkit/plugins/npapi/plugin_list.h"
@@ -93,7 +92,20 @@ static bool CrossesExtensionExtents(WebFrame* frame, const GURL& new_url) {
namespace chrome {
void ChromeContentRendererClient::RenderViewCreated(RenderView* render_view) {
+ safe_browsing::PhishingClassifierDelegate* phishing_classifier = NULL;
+#ifndef OS_CHROMEOS
+ if (!CommandLine::ForCurrentProcess()->HasSwitch(
+ switches::kDisableClientSidePhishingDetection)) {
+ phishing_classifier =
+ new safe_browsing::PhishingClassifierDelegate(render_view, NULL);
+ }
+#endif
+
new DevToolsAgent(render_view);
+ new ExtensionHelper(render_view);
+ new PrintWebViewHelper(render_view);
+ new SearchBox(render_view);
+ new safe_browsing::MalwareDOMDetails(render_view);
PasswordAutofillManager* password_autofill_manager =
new PasswordAutofillManager(render_view);
@@ -106,23 +118,8 @@ void ChromeContentRendererClient::RenderViewCreated(RenderView* render_view) {
page_click_tracker->AddListener(password_autofill_manager);
page_click_tracker->AddListener(autofill_agent);
- new TranslateHelper(render_view);
-
-#ifndef OS_CHROMEOS
- if (!CommandLine::ForCurrentProcess()->HasSwitch(
- switches::kDisableClientSidePhishingDetection)) {
- new safe_browsing::PhishingClassifierDelegate(render_view, NULL);
- }
-#endif
-
- // Observer for Malware DOM details messages.
- new safe_browsing::MalwareDOMDetails(render_view);
-
- new ExtensionHelper(render_view);
-
- new PrintWebViewHelper(render_view);
-
- new SearchBox(render_view);
+ TranslateHelper* translate = new TranslateHelper(render_view, autofill_agent);
+ new ChromeRenderObserver(render_view, translate, phishing_classifier);
// Used only for testing/automation.
if (CommandLine::ForCurrentProcess()->HasSwitch(
@@ -312,33 +309,6 @@ std::string ChromeContentRendererClient::GetNavigationErrorHtml(
return html;
}
-// Returns the ISO 639_1 language code of the specified |text|, or 'unknown'
-// if it failed.
-std::string ChromeContentRendererClient::DetermineTextLanguage(
- const string16& text) {
- std::string language = chrome::kUnknownLanguageCode;
- int num_languages = 0;
- int text_bytes = 0;
- bool is_reliable = false;
- Language cld_language =
- DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
- &num_languages, NULL, &text_bytes);
- // We don't trust the result if the CLD reports that the detection is not
- // reliable, or if the actual text used to detect the language was less than
- // 100 bytes (short texts can often lead to wrong results).
- if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES &&
- cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) {
- // We should not use LanguageCode_ISO_639_1 because it does not cover all
- // the languages CLD can detect. As a result, it'll return the invalid
- // language code for tradtional Chinese among others.
- // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and
- // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN
- // for Simplified Chinese.
- language = LanguageCodeWithDialects(cld_language);
- }
- return language;
-}
-
bool ChromeContentRendererClient::RunIdleHandlerWhenWidgetsHidden() {
return !ExtensionDispatcher::Get()->is_extension_process();
}
diff --git a/chrome/renderer/chrome_content_renderer_client.h b/chrome/renderer/chrome_content_renderer_client.h
index d69d638..ce6bd04 100644
--- a/chrome/renderer/chrome_content_renderer_client.h
+++ b/chrome/renderer/chrome_content_renderer_client.h
@@ -28,7 +28,6 @@ class ChromeContentRendererClient : public content::ContentRendererClient {
virtual std::string GetNavigationErrorHtml(
const WebKit::WebURLRequest& failed_request,
const WebKit::WebURLError& error);
- virtual std::string DetermineTextLanguage(const string16& text);
virtual bool RunIdleHandlerWhenWidgetsHidden();
virtual bool AllowPopup(const GURL& creator);
virtual bool ShouldFork(WebKit::WebFrame* frame,
diff --git a/chrome/renderer/chrome_render_observer.cc b/chrome/renderer/chrome_render_observer.cc
new file mode 100644
index 0000000..ef89792
--- /dev/null
+++ b/chrome/renderer/chrome_render_observer.cc
@@ -0,0 +1,344 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/renderer/chrome_render_observer.h"
+
+#include "base/command_line.h"
+#include "base/metrics/histogram.h"
+#include "chrome/common/chrome_switches.h"
+#include "chrome/common/render_messages.h"
+#include "chrome/common/thumbnail_score.h"
+#include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
+#include "chrome/renderer/translate_helper.h"
+#include "content/renderer/content_renderer_client.h"
+#include "content/renderer/render_view.h"
+#include "skia/ext/bitmap_platform_device.h"
+#include "skia/ext/image_operations.h"
+#include "third_party/WebKit/Source/WebKit/chromium/public/WebDataSource.h"
+#include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
+#include "third_party/WebKit/Source/WebKit/chromium/public/WebRect.h"
+#include "third_party/WebKit/Source/WebKit/chromium/public/WebSize.h"
+#include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"
+#include "ui/gfx/color_utils.h"
+#include "ui/gfx/skbitmap_operations.h"
+#include "webkit/glue/webkit_glue.h"
+
+using WebKit::WebDataSource;
+using WebKit::WebFrame;
+using WebKit::WebRect;
+using WebKit::WebSize;
+using WebKit::WebView;
+
+// Delay in milliseconds that we'll wait before capturing the page contents
+// and thumbnail.
+static const int kDelayForCaptureMs = 500;
+
+// Typically, we capture the page data once the page is loaded.
+// Sometimes, the page never finishes to load, preventing the page capture
+// To workaround this problem, we always perform a capture after the following
+// delay.
+static const int kDelayForForcedCaptureMs = 6000;
+
+// define to write the time necessary for thumbnail/DOM text retrieval,
+// respectively, into the system debug log
+// #define TIME_TEXT_RETRIEVAL
+
+// maximum number of characters in the document to index, any text beyond this
+// point will be clipped
+static const size_t kMaxIndexChars = 65535;
+
+// Size of the thumbnails that we'll generate
+static const int kThumbnailWidth = 212;
+static const int kThumbnailHeight = 132;
+
+static bool PaintViewIntoCanvas(WebView* view,
+ skia::PlatformCanvas& canvas) {
+ view->layout();
+ const WebSize& size = view->size();
+
+ if (!canvas.initialize(size.width, size.height, true))
+ return false;
+
+ view->paint(webkit_glue::ToWebCanvas(&canvas),
+ WebRect(0, 0, size.width, size.height));
+ // TODO: Add a way to snapshot the whole page, not just the currently
+ // visible part.
+
+ return true;
+}
+
+// Calculates how "boring" a thumbnail is. The boring score is the
+// 0,1 ranged percentage of pixels that are the most common
+// luma. Higher boring scores indicate that a higher percentage of a
+// bitmap are all the same brightness.
+static double CalculateBoringScore(SkBitmap* bitmap) {
+ int histogram[256] = {0};
+ color_utils::BuildLumaHistogram(bitmap, histogram);
+
+ int color_count = *std::max_element(histogram, histogram + 256);
+ int pixel_count = bitmap->width() * bitmap->height();
+ return static_cast<double>(color_count) / pixel_count;
+}
+
+ChromeRenderObserver::ChromeRenderObserver(
+ RenderView* render_view,
+ TranslateHelper* translate_helper,
+ safe_browsing::PhishingClassifierDelegate* phishing_classifier)
+ : RenderViewObserver(render_view),
+ translate_helper_(translate_helper),
+ phishing_classifier_(phishing_classifier),
+ last_indexed_page_id_(-1),
+ ALLOW_THIS_IN_INITIALIZER_LIST(page_info_method_factory_(this)) {
+}
+
+ChromeRenderObserver::~ChromeRenderObserver() {
+}
+
+bool ChromeRenderObserver::OnMessageReceived(const IPC::Message& message) {
+ bool handled = true;
+ IPC_BEGIN_MESSAGE_MAP(ChromeRenderObserver, message)
+ IPC_MESSAGE_HANDLER(ViewMsg_CaptureSnapshot, OnCaptureSnapshot)
+ IPC_MESSAGE_UNHANDLED(handled = false)
+ IPC_END_MESSAGE_MAP()
+ return handled;
+}
+
+void ChromeRenderObserver::OnCaptureSnapshot() {
+ SkBitmap snapshot;
+ bool error = false;
+
+ WebFrame* main_frame = render_view()->webview()->mainFrame();
+ if (!main_frame)
+ error = true;
+
+ if (!error && !CaptureSnapshot(render_view()->webview(), &snapshot))
+ error = true;
+
+ DCHECK(error == snapshot.empty()) <<
+ "Snapshot should be empty on error, non-empty otherwise.";
+
+ // Send the snapshot to the browser process.
+ Send(new ViewHostMsg_Snapshot(routing_id(), snapshot));
+}
+
+void ChromeRenderObserver::DidStopLoading() {
+ MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ page_info_method_factory_.NewRunnableMethod(
+ &ChromeRenderObserver::CapturePageInfo, render_view()->page_id(),
+ false),
+ render_view()->content_state_immediately() ? 0 : kDelayForCaptureMs);
+}
+
+void ChromeRenderObserver::DidCommitProvisionalLoad(WebFrame* frame,
+ bool is_new_navigation) {
+ if (!is_new_navigation)
+ return;
+
+ MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ page_info_method_factory_.NewRunnableMethod(
+ &ChromeRenderObserver::CapturePageInfo, render_view()->page_id(),
+ true),
+ kDelayForForcedCaptureMs);
+}
+
+void ChromeRenderObserver::CapturePageInfo(int load_id,
+ bool preliminary_capture) {
+ if (load_id != render_view()->page_id())
+ return; // This capture call is no longer relevant due to navigation.
+
+ if (load_id == last_indexed_page_id_)
+ return; // we already indexed this page
+
+ if (!render_view()->webview())
+ return;
+
+ WebFrame* main_frame = render_view()->webview()->mainFrame();
+ if (!main_frame)
+ return;
+
+ // Don't index/capture pages that are in view source mode.
+ if (main_frame->isViewSourceModeEnabled())
+ return;
+
+ // Don't index/capture pages that failed to load. This only checks the top
+ // level frame so the thumbnail may contain a frame that failed to load.
+ WebDataSource* ds = main_frame->dataSource();
+ if (ds && ds->hasUnreachableURL())
+ return;
+
+ if (!preliminary_capture)
+ last_indexed_page_id_ = load_id;
+
+ // Get the URL for this page.
+ GURL url(main_frame->url());
+ if (url.is_empty())
+ return;
+
+ // Retrieve the frame's full text.
+ string16 contents;
+ CaptureText(main_frame, &contents);
+ if (contents.size()) {
+ if (translate_helper_)
+ translate_helper_->PageCaptured(contents);
+ // Send the text to the browser for indexing (the browser might decide not
+ // to index, if the URL is HTTPS for instance) and language discovery.
+ Send(new ViewHostMsg_PageContents(routing_id(), url, load_id, contents));
+ }
+
+ // Generate the thumbnail here if the in-browser thumbnailing isn't
+ // enabled. TODO(satorux): Remove this and related code once
+ // crbug.com/65936 is complete.
+ if (!CommandLine::ForCurrentProcess()->HasSwitch(
+ switches::kEnableInBrowserThumbnailing)) {
+ CaptureThumbnail();
+ }
+
+ // Will swap out the string.
+ if (phishing_classifier_)
+ phishing_classifier_->PageCaptured(&contents, preliminary_capture);
+}
+
+void ChromeRenderObserver::CaptureText(WebFrame* frame, string16* contents) {
+ contents->clear();
+ if (!frame)
+ return;
+
+#ifdef TIME_TEXT_RETRIEVAL
+ double begin = time_util::GetHighResolutionTimeNow();
+#endif
+
+ // get the contents of the frame
+ *contents = frame->contentAsText(kMaxIndexChars);
+
+#ifdef TIME_TEXT_RETRIEVAL
+ double end = time_util::GetHighResolutionTimeNow();
+ char buf[128];
+ sprintf_s(buf, "%d chars retrieved for indexing in %gms\n",
+ contents.size(), (end - begin)*1000);
+ OutputDebugStringA(buf);
+#endif
+
+ // When the contents are clipped to the maximum, we don't want to have a
+ // partial word indexed at the end that might have been clipped. Therefore,
+ // terminate the string at the last space to ensure no words are clipped.
+ if (contents->size() == kMaxIndexChars) {
+ size_t last_space_index = contents->find_last_of(kWhitespaceUTF16);
+ if (last_space_index == std::wstring::npos)
+ return; // don't index if we got a huge block of text with no spaces
+ contents->resize(last_space_index);
+ }
+}
+
+void ChromeRenderObserver::CaptureThumbnail() {
+ WebFrame* main_frame = render_view()->webview()->mainFrame();
+ if (!main_frame)
+ return;
+
+ // get the URL for this page
+ GURL url(main_frame->url());
+ if (url.is_empty())
+ return;
+
+ if (render_view()->size().IsEmpty())
+ return; // Don't create an empty thumbnail!
+
+ ThumbnailScore score;
+ SkBitmap thumbnail;
+ if (!CaptureFrameThumbnail(render_view()->webview(), kThumbnailWidth,
+ kThumbnailHeight, &thumbnail, &score))
+ return;
+
+ // send the thumbnail message to the browser process
+ Send(new ViewHostMsg_Thumbnail(routing_id(), url, score, thumbnail));
+}
+
+bool ChromeRenderObserver::CaptureFrameThumbnail(WebView* view,
+ int w,
+ int h,
+ SkBitmap* thumbnail,
+ ThumbnailScore* score) {
+ base::TimeTicks beginning_time = base::TimeTicks::Now();
+
+ skia::PlatformCanvas canvas;
+
+ // Paint |view| into |canvas|.
+ if (!PaintViewIntoCanvas(view, canvas))
+ return false;
+
+ skia::BitmapPlatformDevice& device =
+ static_cast<skia::BitmapPlatformDevice&>(canvas.getTopPlatformDevice());
+
+ const SkBitmap& src_bmp = device.accessBitmap(false);
+
+ SkRect dest_rect = { 0, 0, SkIntToScalar(w), SkIntToScalar(h) };
+ float dest_aspect = dest_rect.width() / dest_rect.height();
+
+ // Get the src rect so that we can preserve the aspect ratio while filling
+ // the destination.
+ SkIRect src_rect;
+ if (src_bmp.width() < dest_rect.width() ||
+ src_bmp.height() < dest_rect.height()) {
+ // Source image is smaller: we clip the part of source image within the
+ // dest rect, and then stretch it to fill the dest rect. We don't respect
+ // the aspect ratio in this case.
+ src_rect.set(0, 0, static_cast<S16CPU>(dest_rect.width()),
+ static_cast<S16CPU>(dest_rect.height()));
+ score->good_clipping = false;
+ } else {
+ float src_aspect = static_cast<float>(src_bmp.width()) / src_bmp.height();
+ if (src_aspect > dest_aspect) {
+ // Wider than tall, clip horizontally: we center the smaller thumbnail in
+ // the wider screen.
+ S16CPU new_width = static_cast<S16CPU>(src_bmp.height() * dest_aspect);
+ S16CPU x_offset = (src_bmp.width() - new_width) / 2;
+ src_rect.set(x_offset, 0, new_width + x_offset, src_bmp.height());
+ score->good_clipping = false;
+ } else {
+ src_rect.set(0, 0, src_bmp.width(),
+ static_cast<S16CPU>(src_bmp.width() / dest_aspect));
+ score->good_clipping = true;
+ }
+ }
+
+ score->at_top = (view->mainFrame()->scrollOffset().height == 0);
+
+ SkBitmap subset;
+ device.accessBitmap(false).extractSubset(&subset, src_rect);
+
+ // First do a fast downsample by powers of two to get close to the final size.
+ SkBitmap downsampled_subset =
+ SkBitmapOperations::DownsampleByTwoUntilSize(subset, w, h);
+
+ // Do a high-quality resize from the downscaled size to the final size.
+ *thumbnail = skia::ImageOperations::Resize(
+ downsampled_subset, skia::ImageOperations::RESIZE_LANCZOS3, w, h);
+
+ score->boring_score = CalculateBoringScore(thumbnail);
+
+ HISTOGRAM_TIMES("Renderer4.Thumbnail",
+ base::TimeTicks::Now() - beginning_time);
+
+ return true;
+}
+
+bool ChromeRenderObserver::CaptureSnapshot(WebView* view, SkBitmap* snapshot) {
+ base::TimeTicks beginning_time = base::TimeTicks::Now();
+
+ skia::PlatformCanvas canvas;
+ if (!PaintViewIntoCanvas(view, canvas))
+ return false;
+
+ skia::BitmapPlatformDevice& device =
+ static_cast<skia::BitmapPlatformDevice&>(canvas.getTopPlatformDevice());
+
+ const SkBitmap& bitmap = device.accessBitmap(false);
+ if (!bitmap.copyTo(snapshot, SkBitmap::kARGB_8888_Config))
+ return false;
+
+ HISTOGRAM_TIMES("Renderer4.Snapshot",
+ base::TimeTicks::Now() - beginning_time);
+ return true;
+}
diff --git a/chrome/renderer/chrome_render_observer.h b/chrome/renderer/chrome_render_observer.h
new file mode 100644
index 0000000..41290f2
--- /dev/null
+++ b/chrome/renderer/chrome_render_observer.h
@@ -0,0 +1,79 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_RENDERER_CHROME_RENDER_OBSERVER_H_
+#define CHROME_RENDERER_CHROME_RENDER_OBSERVER_H_
+#pragma once
+
+#include "base/task.h"
+#include "content/renderer/render_view_observer.h"
+
+class SkBitmap;
+class TranslateHelper;
+struct ThumbnailScore;
+
+namespace WebKit {
+class WebView;
+}
+
+namespace safe_browsing {
+class PhishingClassifierDelegate;
+}
+
+// This class holds the Chrome specific parts of RenderView, and has the same
+// lifetime.
+class ChromeRenderObserver : public RenderViewObserver {
+ public:
+ // translate_helper and/or phishing_classifier can be NULL.
+ ChromeRenderObserver(
+ RenderView* render_view,
+ TranslateHelper* translate_helper,
+ safe_browsing::PhishingClassifierDelegate* phishing_classifier);
+ virtual ~ChromeRenderObserver();
+
+ private:
+ // RenderViewObserver implementation.
+ virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
+ virtual void DidStopLoading() OVERRIDE;
+ virtual void DidCommitProvisionalLoad(WebKit::WebFrame* frame,
+ bool is_new_navigation) OVERRIDE;
+
+ void OnCaptureSnapshot();
+
+ // Captures the thumbnail and text contents for indexing for the given load
+ // ID. If the view's load ID is different than the parameter, this call is
+ // a NOP. Typically called on a timer, so the load ID may have changed in the
+ // meantime.
+ void CapturePageInfo(int load_id, bool preliminary_capture);
+
+ // Retrieves the text from the given frame contents, the page text up to the
+ // maximum amount kMaxIndexChars will be placed into the given buffer.
+ void CaptureText(WebKit::WebFrame* frame, string16* contents);
+
+ void CaptureThumbnail();
+
+ // Creates a thumbnail of |frame|'s contents resized to (|w|, |h|)
+ // and puts that in |thumbnail|. Thumbnail metadata goes in |score|.
+ bool CaptureFrameThumbnail(WebKit::WebView* view, int w, int h,
+ SkBitmap* thumbnail,
+ ThumbnailScore* score);
+
+ // Capture a snapshot of a view. This is used to allow an extension
+ // to get a snapshot of a tab using chrome.tabs.captureVisibleTab().
+ bool CaptureSnapshot(WebKit::WebView* view, SkBitmap* snapshot);
+
+ // Has the same lifetime as us.
+ TranslateHelper* translate_helper_;
+ safe_browsing::PhishingClassifierDelegate* phishing_classifier_;
+
+ // Page_id from the last page we indexed. This prevents us from indexing the
+ // same page twice in a row.
+ int32 last_indexed_page_id_;
+
+ ScopedRunnableMethodFactory<ChromeRenderObserver> page_info_method_factory_;
+
+ DISALLOW_COPY_AND_ASSIGN(ChromeRenderObserver);
+};
+
+#endif // CHROME_RENDERER_CHROME_OBSERVER_H_
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
index d9c3174..9789920e 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
@@ -138,14 +138,14 @@ void PhishingClassifierDelegate::DidCommitProvisionalLoad(
CancelPendingClassification();
}
-void PhishingClassifierDelegate::PageCaptured(const string16& page_text,
+void PhishingClassifierDelegate::PageCaptured(string16* page_text,
bool preliminary_capture) {
if (preliminary_capture) {
return;
}
last_finished_load_id_ = render_view()->page_id();
last_finished_load_url_ = GetToplevelUrl();
- classifier_page_text_ = page_text;
+ classifier_page_text_.swap(*page_text);
MaybeStartClassification();
}
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
index 04309ba..3e3a09e 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
@@ -33,15 +33,14 @@ class PhishingClassifierDelegate : public RenderViewObserver {
// The scorer is passed on to the classifier.
void SetPhishingScorer(const safe_browsing::Scorer* scorer);
- // RenderViewObserver implementation, public for testing.
-
// Called by the RenderView once a page has finished loading. Updates the
// last-loaded URL and page id, then starts classification if all other
// conditions are met (see MaybeStartClassification for details).
// We ignore preliminary captures, since these happen before the page has
// finished loading.
- virtual void PageCaptured(const string16& page_text,
- bool preliminary_capture);
+ void PageCaptured(string16* page_text, bool preliminary_capture);
+
+ // RenderViewObserver implementation, public for testing.
// Called by the RenderView when a page has started loading in the given
// WebFrame. Typically, this will cause any pending classification to be
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
index 305e907..90578a4 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
@@ -118,7 +118,7 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) {
string16 page_text = ASCIIToUTF16("dummy");
EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)).
WillOnce(DeleteArg<1>());
- delegate->PageCaptured(page_text, false);
+ delegate->PageCaptured(&page_text, false);
Mock::VerifyAndClearExpectations(classifier);
// Reloading the same page should not trigger a reclassification.
@@ -129,7 +129,8 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) {
message_loop_.Run();
Mock::VerifyAndClearExpectations(classifier);
OnStartPhishingDetection(delegate, GURL("http://host.com/"));
- delegate->PageCaptured(page_text, false);
+ page_text = ASCIIToUTF16("dummy");
+ delegate->PageCaptured(&page_text, false);
// Navigating in a subframe will increment the page id, but not change
// the toplevel URL. This should cancel pending classification since the
@@ -140,7 +141,8 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) {
message_loop_.Run();
Mock::VerifyAndClearExpectations(classifier);
OnStartPhishingDetection(delegate, GURL("http://host.com/"));
- delegate->PageCaptured(page_text, false);
+ page_text = ASCIIToUTF16("dummy");
+ delegate->PageCaptured(&page_text, false);
// Scrolling to an anchor will increment the page id, but should not
// not trigger a reclassification. Currently, a pending classification will
@@ -148,7 +150,8 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) {
EXPECT_CALL(*classifier, CancelPendingClassification());
LoadURL("http://host.com/#foo");
OnStartPhishingDetection(delegate, GURL("http://host.com/#foo"));
- delegate->PageCaptured(page_text, false);
+ page_text = ASCIIToUTF16("dummy");
+ delegate->PageCaptured(&page_text, false);
// Now load a new toplevel page, which should trigger another classification.
EXPECT_CALL(*classifier, CancelPendingClassification());
@@ -158,7 +161,7 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) {
EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)).
WillOnce(DeleteArg<1>());
OnStartPhishingDetection(delegate, GURL("http://host2.com/"));
- delegate->PageCaptured(page_text, false);
+ delegate->PageCaptured(&page_text, false);
Mock::VerifyAndClearExpectations(classifier);
// No classification should happen on back/forward navigation.
@@ -170,7 +173,7 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) {
Mock::VerifyAndClearExpectations(classifier);
page_text = ASCIIToUTF16("dummy");
OnStartPhishingDetection(delegate, GURL("http://host.com/#foo"));
- delegate->PageCaptured(page_text, false);
+ delegate->PageCaptured(&page_text, false);
// The delegate will cancel pending classification on destruction.
EXPECT_CALL(*classifier, CancelPendingClassification());
@@ -188,15 +191,16 @@ TEST_F(PhishingClassifierDelegateTest, NoScorer) {
LoadURL("http://host.com/");
string16 page_text = ASCIIToUTF16("dummy");
OnStartPhishingDetection(delegate, GURL("http://host.com/"));
- delegate->PageCaptured(page_text, false);
+ delegate->PageCaptured(&page_text, false);
LoadURL("http://host2.com/");
page_text = ASCIIToUTF16("dummy2");
OnStartPhishingDetection(delegate, GURL("http://host2.com/"));
- delegate->PageCaptured(page_text, false);
+ delegate->PageCaptured(&page_text, false);
// Now set a scorer, which should cause a classifier to be created and
// the classification to proceed.
+ page_text = ASCIIToUTF16("dummy2");
EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)).
WillOnce(DeleteArg<1>());
MockScorer scorer;
@@ -220,14 +224,16 @@ TEST_F(PhishingClassifierDelegateTest, NoScorer_Ref) {
LoadURL("http://host.com/");
string16 page_text = ASCIIToUTF16("dummy");
OnStartPhishingDetection(delegate, GURL("http://host.com/"));
- delegate->PageCaptured(page_text, false);
+ delegate->PageCaptured(&page_text, false);
LoadURL("http://host.com/#foo");
OnStartPhishingDetection(delegate, GURL("http://host.com/#foo"));
- delegate->PageCaptured(page_text, false);
+ page_text = ASCIIToUTF16("dummy");
+ delegate->PageCaptured(&page_text, false);
// Now set a scorer, which should cause a classifier to be created and
// the classification to proceed.
+ page_text = ASCIIToUTF16("dummy");
EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)).
WillOnce(DeleteArg<1>());
MockScorer scorer;
@@ -254,9 +260,10 @@ TEST_F(PhishingClassifierDelegateTest, NoStartPhishingDetection) {
LoadURL("http://host.com/");
Mock::VerifyAndClearExpectations(classifier);
string16 page_text = ASCIIToUTF16("phish");
- delegate->PageCaptured(page_text, false);
+ delegate->PageCaptured(&page_text, false);
// Now simulate the StartPhishingDetection IPC. We expect classification
// to begin.
+ page_text = ASCIIToUTF16("phish");
EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)).
WillOnce(DeleteArg<1>());
OnStartPhishingDetection(delegate, GURL("http://host.com/"));
@@ -267,7 +274,8 @@ TEST_F(PhishingClassifierDelegateTest, NoStartPhishingDetection) {
EXPECT_CALL(*classifier, CancelPendingClassification());
responses_["http://host2.com/"] = "<html><body>phish</body></html>";
LoadURL("http://host2.com/");
- delegate->PageCaptured(page_text, false);
+ page_text = ASCIIToUTF16("phish");
+ delegate->PageCaptured(&page_text, false);
EXPECT_CALL(*classifier, CancelPendingClassification());
responses_["http://host3.com/"] = "<html><body>phish</body></html>";
@@ -295,12 +303,13 @@ TEST_F(PhishingClassifierDelegateTest, IgnorePreliminaryCapture) {
Mock::VerifyAndClearExpectations(classifier);
OnStartPhishingDetection(delegate, GURL("http://host.com/"));
string16 page_text = ASCIIToUTF16("phish");
- delegate->PageCaptured(page_text, true);
+ delegate->PageCaptured(&page_text, true);
// Once the non-preliminary capture happens, classification should begin.
+ page_text = ASCIIToUTF16("phish");
EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)).
WillOnce(DeleteArg<1>());
- delegate->PageCaptured(page_text, false);
+ delegate->PageCaptured(&page_text, false);
Mock::VerifyAndClearExpectations(classifier);
// The delegate will cancel pending classification on destruction.
@@ -327,7 +336,7 @@ TEST_F(PhishingClassifierDelegateTest, DetectedPhishingSite) {
EXPECT_CALL(*classifier, BeginClassification(Pointee(page_text), _)).
WillOnce(DeleteArg<1>());
OnStartPhishingDetection(delegate, GURL("http://host.com/#a"));
- delegate->PageCaptured(page_text, false);
+ delegate->PageCaptured(&page_text, false);
Mock::VerifyAndClearExpectations(classifier);
// Now run the callback to simulate the classifier finishing.
diff --git a/chrome/renderer/translate_helper.cc b/chrome/renderer/translate_helper.cc
index 31833b6..4d4fed1 100644
--- a/chrome/renderer/translate_helper.cc
+++ b/chrome/renderer/translate_helper.cc
@@ -5,14 +5,18 @@
#include "chrome/renderer/translate_helper.h"
#include "base/compiler_specific.h"
+#include "base/metrics/histogram.h"
#include "base/utf_string_conversions.h"
#include "chrome/common/chrome_constants.h"
#include "chrome/common/render_messages.h"
+#include "chrome/renderer/autofill/autofill_agent.h"
#include "content/renderer/render_view.h"
+#include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"
+#include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"
#include "v8/include/v8.h"
#include "webkit/glue/dom_operations.h"
@@ -41,16 +45,34 @@ static const char* const kAutoDetectionLanguage = "auto";
////////////////////////////////////////////////////////////////////////////////
// TranslateHelper, public:
//
-TranslateHelper::TranslateHelper(RenderView* render_view)
+TranslateHelper::TranslateHelper(RenderView* render_view,
+ autofill::AutofillAgent* autofill)
: RenderViewObserver(render_view),
translation_pending_(false),
page_id_(-1),
+ autofill_(autofill),
ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
}
TranslateHelper::~TranslateHelper() {
}
+void TranslateHelper::PageCaptured(const string16& contents) {
+ WebDocument document = render_view()->webview()->mainFrame()->document();
+ // If the page explicitly specifies a language, use it, otherwise we'll
+ // determine it based on the text content using the CLD.
+ std::string language = GetPageLanguageFromMetaTag(&document);
+ if (language.empty()) {
+ base::TimeTicks begin_time = base::TimeTicks::Now();
+ language = DetermineTextLanguage(contents);
+ UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection",
+ base::TimeTicks::Now() - begin_time);
+ }
+
+ Send(new ViewHostMsg_TranslateLanguageDetermined(
+ routing_id(), language, IsPageTranslatable(&document)));
+}
+
void TranslateHelper::CancelPendingTranslation() {
method_factory_.RevokeAll();
translation_pending_ = false;
@@ -113,6 +135,31 @@ std::string TranslateHelper::GetPageLanguageFromMetaTag(WebDocument* document) {
return language;
}
+// static
+std::string TranslateHelper::DetermineTextLanguage(const string16& text) {
+ std::string language = chrome::kUnknownLanguageCode;
+ int num_languages = 0;
+ int text_bytes = 0;
+ bool is_reliable = false;
+ Language cld_language =
+ DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
+ &num_languages, NULL, &text_bytes);
+ // We don't trust the result if the CLD reports that the detection is not
+ // reliable, or if the actual text used to detect the language was less than
+ // 100 bytes (short texts can often lead to wrong results).
+ if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES &&
+ cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) {
+ // We should not use LanguageCode_ISO_639_1 because it does not cover all
+ // the languages CLD can detect. As a result, it'll return the invalid
+ // language code for tradtional Chinese among others.
+ // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and
+ // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN
+ // for Simplified Chinese.
+ language = LanguageCodeWithDialects(cld_language);
+ }
+ return language;
+}
+
////////////////////////////////////////////////////////////////////////////////
// TranslateHelper, protected:
//
@@ -284,8 +331,8 @@ void TranslateHelper::CheckTranslateStatus() {
translation_pending_ = false;
- // Notify the renderer we are done.
- render_view()->OnPageTranslated();
+ if (autofill_)
+ autofill_->FrameTranslated(render_view()->webview()->mainFrame());
// Notify the browser we are done.
render_view()->Send(new ViewHostMsg_PageTranslated(
diff --git a/chrome/renderer/translate_helper.h b/chrome/renderer/translate_helper.h
index 54a4706..902f0bc 100644
--- a/chrome/renderer/translate_helper.h
+++ b/chrome/renderer/translate_helper.h
@@ -17,25 +17,21 @@ class WebDocument;
class WebFrame;
}
+namespace autofill {
+class AutofillAgent;
+}
+
// This class deals with page translation.
// There is one TranslateHelper per RenderView.
class TranslateHelper : public RenderViewObserver {
public:
- explicit TranslateHelper(RenderView* render_view);
+ // autofill can be NULL.
+ TranslateHelper(RenderView* render_view, autofill::AutofillAgent* autofill);
virtual ~TranslateHelper();
- // Returns whether the page associated with |document| is a candidate for
- // translation. Some pages can explictly specify (via a meta-tag) that they
- // should not be translated.
- static bool IsPageTranslatable(WebKit::WebDocument* document);
-
- // Returns the language specified in the language meta tag of |document|, or
- // an empty string if no such tag was found.
- // The tag may specify several languages, the first one is returned.
- // Example of such meta-tag:
- // <meta http-equiv="content-language" content="en, fr">
- static std::string GetPageLanguageFromMetaTag(WebKit::WebDocument* document);
+ // Informs us that the page's text has been extracted.
+ void PageCaptured(const string16& contents);
protected:
// The following methods are protected so they can be overridden in
@@ -75,6 +71,22 @@ class TranslateHelper : public RenderViewObserver {
virtual bool DontDelayTasks();
private:
+ // Returns whether the page associated with |document| is a candidate for
+ // translation. Some pages can explictly specify (via a meta-tag) that they
+ // should not be translated.
+ static bool IsPageTranslatable(WebKit::WebDocument* document);
+
+ // Returns the language specified in the language meta tag of |document|, or
+ // an empty string if no such tag was found.
+ // The tag may specify several languages, the first one is returned.
+ // Example of such meta-tag:
+ // <meta http-equiv="content-language" content="en, fr">
+ static std::string GetPageLanguageFromMetaTag(WebKit::WebDocument* document);
+
+ // Returns the ISO 639_1 language code of the specified |text|, or 'unknown'
+ // if it failed.
+ static std::string DetermineTextLanguage(const string16& text);
+
// RenderViewObserver implementation.
virtual bool OnMessageReceived(const IPC::Message& message);
@@ -123,6 +135,8 @@ class TranslateHelper : public RenderViewObserver {
std::string source_lang_;
std::string target_lang_;
+ autofill::AutofillAgent* autofill_;
+
// Method factory used to make calls to TranslatePageImpl.
ScopedRunnableMethodFactory<TranslateHelper> method_factory_;
diff --git a/chrome/renderer/translate_helper_browsertest.cc b/chrome/renderer/translate_helper_browsertest.cc
index 067d939..9298305 100644
--- a/chrome/renderer/translate_helper_browsertest.cc
+++ b/chrome/renderer/translate_helper_browsertest.cc
@@ -15,7 +15,7 @@ using testing::Return;
class TestTranslateHelper : public TranslateHelper {
public:
explicit TestTranslateHelper(RenderView* render_view)
- : TranslateHelper(render_view) {
+ : TranslateHelper(render_view, NULL) {
}
virtual bool DontDelayTasks() { return true; }