diff options
-rw-r--r-- | chrome/renderer/render_view.cc | 38 | ||||
-rw-r--r-- | chrome/renderer/render_view.h | 6 | ||||
-rw-r--r-- | chrome/renderer/render_view_browsertest.cc | 69 | ||||
-rw-r--r-- | chrome/renderer/translate_helper.cc | 56 | ||||
-rw-r--r-- | chrome/renderer/translate_helper.h | 13 | ||||
-rw-r--r-- | webkit/glue/dom_operations.cc | 11 | ||||
-rw-r--r-- | webkit/glue/dom_operations.h | 11 |
7 files changed, 167 insertions, 37 deletions
diff --git a/chrome/renderer/render_view.cc b/chrome/renderer/render_view.cc index 861722a..287b44c 100644 --- a/chrome/renderer/render_view.cc +++ b/chrome/renderer/render_view.cc @@ -889,15 +889,22 @@ void RenderView::CapturePageInfo(int load_id, bool preliminary_capture) { string16 contents; CaptureText(main_frame, &contents); if (contents.size()) { - base::TimeTicks begin_time = base::TimeTicks::Now(); - std::string language = DetermineTextLanguage(contents); - UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", - base::TimeTicks::Now() - begin_time); WebKit::WebDocument document = main_frame->document(); + // If the page explicitly specifies a language, use it, otherwise we'll + // determine it based on the text content using the CLD. + std::string language = + TranslateHelper::GetPageLanguageFromMetaTag(&document); + if (language.empty()) { + base::TimeTicks begin_time = base::TimeTicks::Now(); + language = DetermineTextLanguage(contents); + UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", + base::TimeTicks::Now() - begin_time); + } // Send the text to the browser for indexing (the browser might decide not // to index, if the URL is HTTPS for instance) and language discovery. - Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents, - language, IsPageTranslatable(&document))); + Send(new ViewHostMsg_PageContents( + routing_id_, url, load_id, contents, language, + TranslateHelper::IsPageTranslatable(&document))); } OnCaptureThumbnail(); @@ -1799,7 +1806,7 @@ void RenderView::didStopLoading() { FROM_HERE, page_info_method_factory_.NewRunnableMethod( &RenderView::CapturePageInfo, page_id_, false), - kDelayForCaptureMs); + send_content_state_immediately_ ? 0 : kDelayForCaptureMs); } bool RenderView::isSmartInsertDeleteEnabled() { @@ -2089,23 +2096,6 @@ void RenderView::UpdateTargetURL(const GURL& url, const GURL& fallback_url) { } } -bool RenderView::IsPageTranslatable(WebKit::WebDocument* document) { - std::vector<WebKit::WebElement> meta_elements; - webkit_glue::GetMetaElementsWithName(document, ASCIIToUTF16("google"), - &meta_elements); - std::vector<WebKit::WebElement>::const_iterator iter; - for (iter = meta_elements.begin(); iter != meta_elements.end(); ++iter) { - WebString attribute = iter->getAttribute("value"); - if (attribute.isNull()) // We support both 'value' and 'content'. - attribute = iter->getAttribute("content"); - if (attribute.isNull()) - continue; - if (LowerCaseEqualsASCII(attribute, "notranslate")) - return false; - } - return true; -} - void RenderView::StartNavStateSyncTimerIfNecessary() { int delay; if (send_content_state_immediately_) diff --git a/chrome/renderer/render_view.h b/chrome/renderer/render_view.h index 8e4d807..b318548 100644 --- a/chrome/renderer/render_view.h +++ b/chrome/renderer/render_view.h @@ -1005,9 +1005,9 @@ class RenderView : public RenderWidget, HostContentSettings host_content_settings_; HostZoomLevels host_zoom_levels_; - // Whether content state (such as form state and scroll position) should be - // sent to the browser immediately. This is normally false, but set to true - // by some tests. + // Whether content state (such as form state, scroll position and page + // contents) should be sent to the browser immediately. This is normally + // false, but set to true by some tests. bool send_content_state_immediately_; // Stores if loading of images, scripts, and plugins is allowed. diff --git a/chrome/renderer/render_view_browsertest.cc b/chrome/renderer/render_view_browsertest.cc index f5539b4..35cdb6d 100644 --- a/chrome/renderer/render_view_browsertest.cc +++ b/chrome/renderer/render_view_browsertest.cc @@ -1089,3 +1089,72 @@ TEST_F(RenderViewTest, FillFormElement) { ASSERT_EQ(static_cast<IPC::Message*>(NULL), message2); EXPECT_EQ(firstname.value(), WebKit::WebString::fromUTF8("David")); } + +// Tests that we send the right translatable for a page and that we respect the +// "no translate" meta-tag. +TEST_F(RenderViewTest, TranslatablePage) { + // Suppress the normal delay that occurs when the page is loaded before which + // the renderer sends the page contents to the browser. + view_->set_send_content_state_immediately(true); + + LoadHTML("<html><body>A random page with random content.</body></html>"); + ProcessPendingMessages(); + const IPC::Message* message = render_thread_.sink().GetUniqueMessageMatching( + ViewHostMsg_PageContents::ID); + ASSERT_NE(static_cast<IPC::Message*>(NULL), message); + ViewHostMsg_PageContents::Param params; + ViewHostMsg_PageContents::Read(message, ¶ms); + EXPECT_TRUE(params.e); // Translatable should be true. + render_thread_.sink().ClearMessages(); + + // Now the page specifies the META tag to prevent translation. + LoadHTML("<html><head><meta name=\"google\" value=\"notranslate\"></head>" + "<body>A random page with random content.</body></html>"); + ProcessPendingMessages(); + message = render_thread_.sink().GetUniqueMessageMatching( + ViewHostMsg_PageContents::ID); + ASSERT_NE(static_cast<IPC::Message*>(NULL), message); + ViewHostMsg_PageContents::Read(message, ¶ms); + EXPECT_FALSE(params.e); // Translatable should be false. + render_thread_.sink().ClearMessages(); + + // Try the alternate version of the META tag (content instead of value). + LoadHTML("<html><head><meta name=\"google\" content=\"notranslate\"></head>" + "<body>A random page with random content.</body></html>"); + ProcessPendingMessages(); + message = render_thread_.sink().GetUniqueMessageMatching( + ViewHostMsg_PageContents::ID); + ASSERT_NE(static_cast<IPC::Message*>(NULL), message); + ViewHostMsg_PageContents::Read(message, ¶ms); + EXPECT_FALSE(params.e); // Translatable should be false. +} + +// Tests that the language meta tag takes precedence over the CLD when reporting +// the page's language. +TEST_F(RenderViewTest, LanguageMetaTag) { + // Suppress the normal delay that occurs when the page is loaded before which + // the renderer sends the page contents to the browser. + view_->set_send_content_state_immediately(true); + + LoadHTML("<html><head><meta http-equiv=\"content-language\" content=\"es\">" + "</head><body>A random page with random content.</body></html>"); + ProcessPendingMessages(); + const IPC::Message* message = render_thread_.sink().GetUniqueMessageMatching( + ViewHostMsg_PageContents::ID); + ASSERT_NE(static_cast<IPC::Message*>(NULL), message); + ViewHostMsg_PageContents::Param params; + ViewHostMsg_PageContents::Read(message, ¶ms); + EXPECT_EQ("es", params.d); + render_thread_.sink().ClearMessages(); + + // Makes sure we support multiple languages specified. + LoadHTML("<html><head><meta http-equiv=\"content-language\" " + "content=\" fr , es,en \">" + "</head><body>A random page with random content.</body></html>"); + ProcessPendingMessages(); + message = render_thread_.sink().GetUniqueMessageMatching( + ViewHostMsg_PageContents::ID); + ASSERT_NE(static_cast<IPC::Message*>(NULL), message); + ViewHostMsg_PageContents::Read(message, ¶ms); + EXPECT_EQ("fr", params.d); +} diff --git a/chrome/renderer/translate_helper.cc b/chrome/renderer/translate_helper.cc index d27f107..fafd52d 100644 --- a/chrome/renderer/translate_helper.cc +++ b/chrome/renderer/translate_helper.cc @@ -12,6 +12,7 @@ #include "third_party/WebKit/WebKit/chromium/public/WebScriptSource.h" #include "third_party/WebKit/WebKit/chromium/public/WebView.h" #include "v8/include/v8.h" +#include "webkit/glue/dom_operations.h" using WebKit::WebFrame; using WebKit::WebScriptSource; @@ -103,6 +104,61 @@ void TranslateHelper::CancelPendingTranslation() { target_lang_.clear(); } +// static +bool TranslateHelper::IsPageTranslatable(WebKit::WebDocument* document) { + std::vector<WebKit::WebElement> meta_elements; + webkit_glue::GetMetaElementsWithAttribute(document, + ASCIIToUTF16("name"), + ASCIIToUTF16("google"), + &meta_elements); + std::vector<WebKit::WebElement>::const_iterator iter; + for (iter = meta_elements.begin(); iter != meta_elements.end(); ++iter) { + WebKit::WebString attribute = iter->getAttribute("value"); + if (attribute.isNull()) // We support both 'value' and 'content'. + attribute = iter->getAttribute("content"); + if (attribute.isNull()) + continue; + if (LowerCaseEqualsASCII(attribute, "notranslate")) + return false; + } + return true; +} + +// static +std::string TranslateHelper::GetPageLanguageFromMetaTag( + WebKit::WebDocument* document) { + // The META language tag looks like: + // <meta http-equiv="content-language" content="en"> + // It can contain more than one language: + // <meta http-equiv="content-language" content="en, fr"> + std::vector<WebKit::WebElement> meta_elements; + webkit_glue::GetMetaElementsWithAttribute(document, + ASCIIToUTF16("http-equiv"), + ASCIIToUTF16("content-language"), + &meta_elements); + if (meta_elements.empty()) + return std::string(); + + // We don't expect more than one such tag. If there are several, just use the + // first one. + WebKit::WebString attribute = meta_elements[0].getAttribute("content"); + if (attribute.isEmpty()) + return std::string(); + + // The value is supposed to be ASCII. + if (!IsStringASCII(attribute)) + return std::string(); + + std::string language = StringToLowerASCII(UTF16ToASCII(attribute)); + size_t coma_index = language.find(','); + if (coma_index != std::string::npos) { + // There are more than 1 language specified, just keep the first one. + language = language.substr(0, coma_index); + } + TrimWhitespaceASCII(language, TRIM_ALL, &language); + return language; +} + //////////////////////////////////////////////////////////////////////////////// // TranslateHelper, protected: // diff --git a/chrome/renderer/translate_helper.h b/chrome/renderer/translate_helper.h index bec9e02..0e3a989 100644 --- a/chrome/renderer/translate_helper.h +++ b/chrome/renderer/translate_helper.h @@ -13,6 +13,7 @@ class RenderView; namespace WebKit { +class WebDocument; class WebFrame; } @@ -39,6 +40,18 @@ class TranslateHelper { // revert existing translations. void CancelPendingTranslation(); + // Returns whether the page associated with |document| is a candidate for + // translation. Some pages can explictly specify (via a meta-tag) that they + // should not be translated. + static bool IsPageTranslatable(WebKit::WebDocument* document); + + // Returns the language specified in the language meta tag of |document|, or + // an empty string if no such tag was found. + // The tag may specify several languages, the first one is returned. + // Example of such meta-tag: + // <meta http-equiv="content-language" content="en, fr"> + static std::string GetPageLanguageFromMetaTag(WebKit::WebDocument* document); + protected: // The following methods are protected so they can be overridden in // unit-tests. diff --git a/webkit/glue/dom_operations.cc b/webkit/glue/dom_operations.cc index b39943f..724e200 100644 --- a/webkit/glue/dom_operations.cc +++ b/webkit/glue/dom_operations.cc @@ -592,9 +592,10 @@ int NumberOfActiveAnimations(WebView* view) { return controller->numberOfActiveAnimations(); } -void GetMetaElementsWithName(WebDocument* document, - const string16& name, - std::vector<WebElement>* meta_elements) { +void GetMetaElementsWithAttribute(WebDocument* document, + const string16& attribute_name, + const string16& attribute_value, + std::vector<WebElement>* meta_elements) { DCHECK(document); DCHECK(meta_elements); meta_elements->clear(); @@ -610,8 +611,8 @@ void GetMetaElementsWithName(WebDocument* document, WebElement element = node.to<WebElement>(); if (!element.hasTagName("meta")) continue; - WebString meta_name = element.getAttribute("name"); - if (meta_name.isNull() || meta_name != name) + WebString value = element.getAttribute(attribute_name); + if (value.isNull() || value != attribute_value) continue; meta_elements->push_back(element); } diff --git a/webkit/glue/dom_operations.h b/webkit/glue/dom_operations.h index a2dac8c..20084f7 100644 --- a/webkit/glue/dom_operations.h +++ b/webkit/glue/dom_operations.h @@ -132,11 +132,12 @@ int NumberOfActiveAnimations(WebKit::WebView* view); WebKit::WebString GetSubResourceLinkFromElement( const WebKit::WebElement& element); -// Puts the meta-elements of |document| that have the specified |name| in -// |meta_elements|. -void GetMetaElementsWithName(WebKit::WebDocument* document, - const string16& name, - std::vector<WebKit::WebElement>* meta_elements); +// Puts the meta-elements of |document| that have the attribute |attribute_name| +// with a value of |attribute_value| in |meta_elements|. +void GetMetaElementsWithAttribute(WebKit::WebDocument* document, + const string16& attribute_name, + const string16& atribute_value, + std::vector<WebKit::WebElement>* meta_elements); } // namespace webkit_glue |