diff options
author | jcampan@chromium.org <jcampan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-07 21:41:50 +0000 |
---|---|---|
committer | jcampan@chromium.org <jcampan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-07 21:41:50 +0000 |
commit | 807cb1036f557b8e24306c66a6bac9bfb0c804af (patch) | |
tree | b10af2181ae1e2ce1640f868a217a868332ecc3e /chrome | |
parent | 462474b9444dcda47b5c140f5d31b790610b6176 (diff) | |
download | chromium_src-807cb1036f557b8e24306c66a6bac9bfb0c804af.zip chromium_src-807cb1036f557b8e24306c66a6bac9bfb0c804af.tar.gz chromium_src-807cb1036f557b8e24306c66a6bac9bfb0c804af.tar.bz2 |
Relanding the language detection code.
The code would crash if multiple PageContents notifications
were received rapidly. The CLDHelper now notifies the
TabContents directly and the TabContents ensures only one
language detection can be performed at a time.
Added unit-tests to validate these cases in web_contents_unittest.cc.
Note that patch set 1 is the original patch, patch set 2 contains the new changes.
Original review:
http://codereview.chromium.org/492024/show
BUG=30662
TEST=Run the unit-tests.
Review URL: http://codereview.chromium.org/504051
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@35735 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
24 files changed, 473 insertions, 129 deletions
diff --git a/chrome/browser/browser.cc b/chrome/browser/browser.cc index 79c2308..f9879fc 100644 --- a/chrome/browser/browser.cc +++ b/chrome/browser/browser.cc @@ -1210,8 +1210,8 @@ void Browser::OpenCreateShortcutsDialog() { DCHECK(pending_web_app_action_ == NONE); pending_web_app_action_ = CREATE_SHORTCUT; - // Start fetching web app info for CreateApplicatoinShortcut dialog and - // show the dialog when the data is available in OnDidGetApplicationInfo. + // Start fetching web app info for CreateApplicationShortcut dialog and show + // the dialog when the data is available in OnDidGetApplicationInfo. current_tab->render_view_host()->GetApplicationInfo(entry->page_id()); #else NOTIMPLEMENTED(); diff --git a/chrome/browser/browser_browsertest.cc b/chrome/browser/browser_browsertest.cc index 756b7dc..a948947 100644 --- a/chrome/browser/browser_browsertest.cc +++ b/chrome/browser/browser_browsertest.cc @@ -288,3 +288,36 @@ IN_PROC_BROWSER_TEST_F(BrowserTest, FaviconOfOnloadRedirectToAnchorPage) { controller().GetActiveEntry(); EXPECT_EQ(expected_favicon_url.spec(), entry->favicon().url().spec()); } + +// The CLD library only works on Windows at this point. +#if defined(OS_WIN) +// Tests that the CLD (Compact Language Detection) works properly. +IN_PROC_BROWSER_TEST_F(BrowserTest, PageLanguageDetection) { + static const wchar_t kDocRoot[] = L"chrome/test/data"; + scoped_refptr<HTTPTestServer> server( + HTTPTestServer::CreateServer(kDocRoot, NULL)); + ASSERT_TRUE(NULL != server.get()); + + TabContents* current_tab = browser()->GetSelectedTabContents(); + + // Navigate to a page in English. + ui_test_utils::NavigateToURL( + browser(), GURL(server->TestServerPage("files/english_page.html"))); + NavigationEntry* entry = current_tab->controller().GetActiveEntry(); + ASSERT_TRUE(NULL != entry); + EXPECT_TRUE(entry->language().empty()); + std::string lang = ui_test_utils::WaitForLanguageDetection(current_tab); + EXPECT_EQ("en", lang); + EXPECT_EQ("en", entry->language()); + + // Now navigate to a page in French. + ui_test_utils::NavigateToURL( + browser(), GURL(server->TestServerPage("files/french_page.html"))); + entry = current_tab->controller().GetActiveEntry(); + ASSERT_TRUE(NULL != entry); + EXPECT_TRUE(entry->language().empty()); + lang = ui_test_utils::WaitForLanguageDetection(current_tab); + EXPECT_EQ("fr", lang); + EXPECT_EQ("fr", entry->language()); +} +#endif diff --git a/chrome/browser/cld_helper.cc b/chrome/browser/cld_helper.cc new file mode 100644 index 0000000..856b3aa --- /dev/null +++ b/chrome/browser/cld_helper.cc @@ -0,0 +1,70 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/cld_helper.h" + +#include "chrome/browser/chrome_thread.h" +#include "chrome/browser/tab_contents/tab_contents.h" +#include "chrome/common/notification_service.h" + +#if defined(OS_WIN) +// TODO(port): The compact language detection library works only for Windows. +#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h" +#endif + +CLDHelper::CLDHelper(TabContents* tab_contents, int renderer_process_id, + int page_id, const std::wstring& page_content) + : tab_contents_(tab_contents), + renderer_process_id_(renderer_process_id), + page_id_(page_id), + page_content_(page_content) { +} + +void CLDHelper::DetectLanguage() { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::UI)); + + // Balanced in DetectionDone. + AddRef(); + + // Do the actual work on the file thread. + ChromeThread::PostTask(ChromeThread::FILE, FROM_HERE, + NewRunnableMethod(this, &CLDHelper::DoDetectLanguage)); +} + +void CLDHelper::CancelLanguageDetection() { + tab_contents_ = NULL; +} + +void CLDHelper::DoDetectLanguage() { + if (!tab_contents_) { + // If we have already been canceled, no need to perform the detection. + Release(); // Balance AddRef from DetectLanguage(). + return; + } + +#if defined(OS_WIN) // Only for windows. + int num_languages = 0; + bool is_reliable = false; + + const char* language_iso_code = LanguageCodeISO639_1( + DetectLanguageOfUnicodeText(NULL, page_content_.c_str(), true, + &is_reliable, &num_languages, NULL)); + language_ = language_iso_code; +#else + // TODO(jcampan): port the compact language detection library. + NOTIMPLEMENTED(); +#endif + + // Work is done, notify the RenderViewHost on the UI thread. + ChromeThread::PostTask(ChromeThread::UI, FROM_HERE, + NewRunnableMethod(this, &CLDHelper::DetectionDone)); +} + +void CLDHelper::DetectionDone() { + if (tab_contents_) + tab_contents_->PageLanguageDetected(); + + // Remove the ref we added to ourself in DetectLanguage(). + Release(); +} diff --git a/chrome/browser/cld_helper.h b/chrome/browser/cld_helper.h new file mode 100644 index 0000000..d72b20c --- /dev/null +++ b/chrome/browser/cld_helper.h @@ -0,0 +1,72 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_CLD_HELPER_H_ +#define CHROME_BROWSER_CLD_HELPER_H_ + +#include <string> + +#include "base/ref_counted.h" + +class TabContents; + +// The CLDHelper class detects the language of a page, using the toolbar CLD +// library. (CLD stands for Compact Language Detection.) +// It should be created and the detection process started on the UI thread. +// The detection happens on the file thread and +// TabContents::PageLanguageDetected() is then called on the associated +// TabContents when the language has been detected. +class CLDHelper : public base::RefCountedThreadSafe<CLDHelper> { + public: + // |tab_contents| should reference the TabContents the page is related to. + // |renderer_process_id|, |page_id| and |page_contents| are the id of the + // render process, the id and contents of the page the language is queried. + CLDHelper(TabContents* tab_contents, + int renderer_process_id, + int page_id, + const std::wstring& page_content); + + // Starts the process of detecting the language of the page. + void DetectLanguage(); + + // Cancel any pending language detection, meaning that the TabContents will + // not be notified. + // This should be on the same thread as DetectLanguage was called. + void CancelLanguageDetection(); + + int renderer_process_id() const { return renderer_process_id_; } + + int page_id() const { return page_id_; } + + std::string language() const { return language_; } + + private: + // Private because the class is ref counted. + friend class base::RefCountedThreadSafe<CLDHelper>; + ~CLDHelper() {} + + // Performs the actual work of detecting the language. + void DoDetectLanguage(); + + // Invoked on the UI thread once the language has been detected. + void DetectionDone(); + + // The tab contents for which the language is being detected. + TabContents* tab_contents_; + + // The id for the render process the page belongs to. + int renderer_process_id_; + + // The id and content of the page for which the language is being detected. + int page_id_; + std::wstring page_content_; + + // The language that was detected. + std::string language_; + + DISALLOW_COPY_AND_ASSIGN(CLDHelper); +}; + +#endif // CHROME_BROWSER_CLD_HELPER_H_ + diff --git a/chrome/browser/extensions/extension_tabs_module.cc b/chrome/browser/extensions/extension_tabs_module.cc index 64ee348..a97e098 100644 --- a/chrome/browser/extensions/extension_tabs_module.cc +++ b/chrome/browser/extensions/extension_tabs_module.cc @@ -839,15 +839,24 @@ bool DetectTabLanguageFunction::RunImpl() { return false; } - // Figure out what language |contents| contains. This sends an async call via - // the browser to the renderer to determine the language of the tab the - // renderer has. The renderer sends back the language of the tab after the - // tab loads (it may be delayed) to the browser, which in turn notifies this - // object that the language has been received. - contents->GetPageLanguage(); + AddRef(); // Balanced in GotLanguage() + + NavigationEntry* entry = contents->controller().GetActiveEntry(); + if (entry) { + std::string language = entry->language(); + if (!language.empty()) { + // Delay the callback invocation until after the current JS call has + // returned. + MessageLoop::current()->PostTask(FROM_HERE, NewRunnableMethod( + this, &DetectTabLanguageFunction::GotLanguage, language)); + return true; + } + } + // The tab contents does not know its language yet. Let's wait until it + // receives it. registrar_.Add(this, NotificationType::TAB_LANGUAGE_DETERMINED, - NotificationService::AllSources()); - AddRef(); // balanced in Observe() + Source<RenderViewHost>(contents->render_view_host())); + return true; } @@ -856,9 +865,16 @@ void DetectTabLanguageFunction::Observe(NotificationType type, const NotificationDetails& details) { DCHECK(type == NotificationType::TAB_LANGUAGE_DETERMINED); std::string language(*Details<std::string>(details).ptr()); + registrar_.Remove(this, NotificationType::TAB_LANGUAGE_DETERMINED, source); + + GotLanguage(language); +} + +void DetectTabLanguageFunction::GotLanguage(const std::string& language) { result_.reset(Value::CreateStringValue(language.c_str())); SendResponse(true); - Release(); // balanced in Run() + + Release(); // Balanced in Run() } // static helpers diff --git a/chrome/browser/extensions/extension_tabs_module.h b/chrome/browser/extensions/extension_tabs_module.h index 1fa7f21..4a0ec45 100644 --- a/chrome/browser/extensions/extension_tabs_module.h +++ b/chrome/browser/extensions/extension_tabs_module.h @@ -131,6 +131,7 @@ class DetectTabLanguageFunction : public AsyncExtensionFunction, virtual void Observe(NotificationType type, const NotificationSource& source, const NotificationDetails& details); + void GotLanguage(const std::string& language); NotificationRegistrar registrar_; DECLARE_EXTENSION_FUNCTION_NAME("tabs.detectLanguage") }; diff --git a/chrome/browser/renderer_host/browser_render_process_host.cc b/chrome/browser/renderer_host/browser_render_process_host.cc index eac566b..ffa61f5 100644 --- a/chrome/browser/renderer_host/browser_render_process_host.cc +++ b/chrome/browser/renderer_host/browser_render_process_host.cc @@ -735,10 +735,9 @@ bool BrowserRenderProcessHost::Send(IPC::Message* msg) { void BrowserRenderProcessHost::OnMessageReceived(const IPC::Message& msg) { mark_child_process_activity_time(); if (msg.routing_id() == MSG_ROUTING_CONTROL) { - // dispatch control messages + // Dispatch control messages. bool msg_is_ok = true; IPC_BEGIN_MESSAGE_MAP_EX(BrowserRenderProcessHost, msg, msg_is_ok) - IPC_MESSAGE_HANDLER(ViewHostMsg_PageContents, OnPageContents) IPC_MESSAGE_HANDLER(ViewHostMsg_UpdatedCacheStats, OnUpdatedCacheStats) IPC_MESSAGE_HANDLER(ViewHostMsg_SuddenTerminationChanged, @@ -762,7 +761,7 @@ void BrowserRenderProcessHost::OnMessageReceived(const IPC::Message& msg) { return; } - // dispatch incoming messages to the appropriate TabContents + // Dispatch incoming messages to the appropriate TabContents. IPC::Channel::Listener* listener = GetListenerByID(msg.routing_id()); if (!listener) { if (msg.is_sync()) { @@ -836,18 +835,6 @@ void BrowserRenderProcessHost::OnChannelError() { // TODO(darin): clean this up } -void BrowserRenderProcessHost::OnPageContents(const GURL& url, - int32 page_id, - const std::wstring& contents) { - Profile* p = profile(); - if (!p || p->IsOffTheRecord()) - return; - - HistoryService* hs = p->GetHistoryService(Profile::IMPLICIT_ACCESS); - if (hs) - hs->SetPageContents(url, contents); -} - void BrowserRenderProcessHost::OnUpdatedCacheStats( const WebCache::UsageStats& stats) { WebCacheManager::GetInstance()->ObserveStats(id(), stats); diff --git a/chrome/browser/renderer_host/browser_render_process_host.h b/chrome/browser/renderer_host/browser_render_process_host.h index 3a6e83b..0fd51ea 100644 --- a/chrome/browser/renderer_host/browser_render_process_host.h +++ b/chrome/browser/renderer_host/browser_render_process_host.h @@ -104,8 +104,6 @@ class BrowserRenderProcessHost : public RenderProcessHost, friend class VisitRelayingRenderProcessHost; // Control message handlers. - void OnPageContents(const GURL& url, int32 page_id, - const std::wstring& contents); void OnUpdatedCacheStats(const WebKit::WebCache::UsageStats& stats); void SuddenTerminationChanged(bool enabled); void OnExtensionAddListener(const std::string& event_name); diff --git a/chrome/browser/renderer_host/render_view_host.cc b/chrome/browser/renderer_host/render_view_host.cc index 1363afd..ad55180 100644 --- a/chrome/browser/renderer_host/render_view_host.cc +++ b/chrome/browser/renderer_host/render_view_host.cc @@ -46,8 +46,6 @@ #if defined(OS_WIN) // TODO(port): accessibility not yet implemented. See http://crbug.com/8288. #include "chrome/browser/browser_accessibility_manager.h" -// TODO(port): The compact language detection library works only for Windows. -#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h" #endif using base::TimeDelta; @@ -423,10 +421,6 @@ void RenderViewHost::StopFinding(bool clear_selection) { Send(new ViewMsg_StopFinding(routing_id(), clear_selection)); } -void RenderViewHost::GetPageLanguage() { - Send(new ViewMsg_DeterminePageText(routing_id())); -} - void RenderViewHost::Zoom(PageZoom::Function function) { Send(new ViewMsg_Zoom(routing_id(), function)); } @@ -759,8 +753,6 @@ void RenderViewHost::OnMessageReceived(const IPC::Message& msg) { IPC_MESSAGE_HANDLER(ViewHostMsg_DidFailProvisionalLoadWithError, OnMsgDidFailProvisionalLoadWithError) IPC_MESSAGE_HANDLER(ViewHostMsg_Find_Reply, OnMsgFindReply) - IPC_MESSAGE_HANDLER(ViewMsg_DeterminePageText_Reply, - OnDeterminePageTextReply) IPC_MESSAGE_HANDLER(ViewMsg_ExecuteCodeFinished, OnExecuteCodeFinished) IPC_MESSAGE_HANDLER(ViewHostMsg_UpdateFavIconURL, OnMsgUpdateFavIconURL) @@ -844,6 +836,7 @@ void RenderViewHost::OnMessageReceived(const IPC::Message& msg) { IPC_MESSAGE_HANDLER(ViewHostMsg_AccessibilityFocusChange, OnAccessibilityFocusChange) IPC_MESSAGE_HANDLER(ViewHostMsg_OnCSSInserted, OnCSSInserted) + IPC_MESSAGE_HANDLER(ViewHostMsg_PageContents, OnPageContents) // Have the super handle all other messages. IPC_MESSAGE_UNHANDLED(RenderWidgetHost::OnMessageReceived(msg)) IPC_END_MESSAGE_MAP_EX() @@ -1147,22 +1140,6 @@ void RenderViewHost::OnMsgFindReply(int request_id, Send(new ViewMsg_FindReplyACK(routing_id())); } -void RenderViewHost::OnDeterminePageTextReply( - const std::wstring& page_text) { -#if defined(OS_WIN) // Only for windows. - int num_languages = 0; - bool is_reliable = false; - const char* language_iso_code = LanguageCodeISO639_1( - DetectLanguageOfUnicodeText(NULL, page_text.c_str(), true, &is_reliable, - &num_languages, NULL)); - std::string language(language_iso_code); - NotificationService::current()->Notify( - NotificationType::TAB_LANGUAGE_DETERMINED, - Source<RenderViewHost>(this), - Details<std::string>(&language)); -#endif -} - void RenderViewHost::OnExecuteCodeFinished(int request_id, bool success) { std::pair<int, bool> result_details(request_id, success); NotificationService::current()->Notify( @@ -1783,3 +1760,13 @@ void RenderViewHost::OnAccessibilityFocusChange(int acc_obj_id) { void RenderViewHost::OnCSSInserted() { delegate_->DidInsertCSS(); } + +void RenderViewHost::OnPageContents(const GURL& url, + int32 page_id, + const std::wstring& contents) { + RenderViewHostDelegate::BrowserIntegration* integration_delegate = + delegate_->GetBrowserIntegrationDelegate(); + if (!integration_delegate) + return; + integration_delegate->OnPageContents(url, process()->id(), page_id, contents); +} diff --git a/chrome/browser/renderer_host/render_view_host.h b/chrome/browser/renderer_host/render_view_host.h index 0d923ca..f3b1a32 100644 --- a/chrome/browser/renderer_host/render_view_host.h +++ b/chrome/browser/renderer_host/render_view_host.h @@ -212,12 +212,6 @@ class RenderViewHost : public RenderWidgetHost, // clear the selection on the focused frame. void StopFinding(bool clear_selection); - // Get the most probable language of the text content in the tab. This sends - // a message to the render view to get the content of the page as text. The - // caller gets the language via the NotificationService by registering to the - // NotificationType TAB_LANGUAGE_DETERMINED. - void GetPageLanguage(); - // Change the zoom level of a page. void Zoom(PageZoom::Function function); @@ -423,22 +417,22 @@ class RenderViewHost : public RenderWidgetHost, // Creates a new RenderWidget with the given route id. void CreateNewWidget(int route_id, bool activatable); - // Send the response to an extension api call. + // Sends the response to an extension api call. void SendExtensionResponse(int request_id, bool success, const std::string& response, const std::string& error); - // Send a response to an extension api call that it was blocked for lack of + // Sends a response to an extension api call that it was blocked for lack of // permission. void BlockExtensionRequest(int request_id); - // Notify the renderer that its view type has changed. + // Notifies the renderer that its view type has changed. void ViewTypeChanged(ViewType::Type type); - // Tell renderer which browser window it is being attached to. + // Tells the renderer which browser window it is being attached to. void UpdateBrowserWindowId(int window_id); - // Tell render view that custom context action has been selected. + // Tells the render view that a custom context action has been selected. void PerformCustomContextMenuAction(unsigned action); protected: @@ -495,7 +489,6 @@ class RenderViewHost : public RenderWidgetHost, const gfx::Rect& selection_rect, int active_match_ordinal, bool final_update); - void OnDeterminePageTextReply(const std::wstring& tab_text); void OnExecuteCodeFinished(int request_id, bool success); void OnMsgUpdateFavIconURL(int32 page_id, const GURL& icon_url); void OnMsgDidDownloadFavIcon(int id, @@ -593,6 +586,9 @@ class RenderViewHost : public RenderWidgetHost, void OnExtensionPostMessage(int port_id, const std::string& message); void OnAccessibilityFocusChange(int acc_obj_id); void OnCSSInserted(); + void OnPageContents(const GURL& url, + int32 page_id, + const std::wstring& contents); private: friend class TestRenderViewHost; diff --git a/chrome/browser/renderer_host/render_view_host_delegate.h b/chrome/browser/renderer_host/render_view_host_delegate.h index c3df62d..de3ef0b 100644 --- a/chrome/browser/renderer_host/render_view_host_delegate.h +++ b/chrome/browser/renderer_host/render_view_host_delegate.h @@ -217,6 +217,12 @@ class RenderViewHostDelegate { virtual void OnDidGetApplicationInfo( int32 page_id, const webkit_glue::WebApplicationInfo& app_info) = 0; + + // Notification that the contents of the page has been loaded. + virtual void OnPageContents(const GURL& url, + int renderer_process_id, + int32 page_id, + const std::wstring& contents) = 0; }; // Resource ------------------------------------------------------------------ diff --git a/chrome/browser/tab_contents/navigation_entry.h b/chrome/browser/tab_contents/navigation_entry.h index e679c10..df6ad06 100644 --- a/chrome/browser/tab_contents/navigation_entry.h +++ b/chrome/browser/tab_contents/navigation_entry.h @@ -389,6 +389,15 @@ class NavigationEntry { return restore_type_; } + // The ISO 639-1 language code (ex: en, fr, zh...) for the page. + // Can be empty if the language was not detected yet or is unknown. + void set_language(const std::string& language) { + language_ = language; + } + std::string language() const { + return language_; + } + private: // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING // Session/Tab restore save portions of this class so that it can be recreated @@ -413,6 +422,7 @@ class NavigationEntry { GURL user_typed_url_; bool has_post_data_; RestoreType restore_type_; + std::string language_; // ISO 639-1 language code. // This is a cached version of the result of GetTitleForDisplay. It prevents // us from having to do URL formatting on the URL evey time the title is diff --git a/chrome/browser/tab_contents/tab_contents.cc b/chrome/browser/tab_contents/tab_contents.cc index 10efa22..e8275ce 100644 --- a/chrome/browser/tab_contents/tab_contents.cc +++ b/chrome/browser/tab_contents/tab_contents.cc @@ -369,6 +369,9 @@ TabContents::~TabContents() { UMA_HISTOGRAM_TIMES("Tab.Close", base::TimeTicks::Now() - tab_close_start_time_); } + + if (cld_helper_.get()) + cld_helper_->CancelLanguageDetection(); } // static @@ -637,6 +640,24 @@ void TabContents::Activate() { delegate_->ActivateContents(this); } +void TabContents::PageLanguageDetected() { + DCHECK(cld_helper_.get()); + + NavigationEntry* entry = controller_.GetActiveEntry(); + if (process()->id() == cld_helper_->renderer_process_id() && + entry && entry->page_id() == cld_helper_->page_id()) { + entry->set_language(cld_helper_->language()); + } + + std::string lang = cld_helper_->language(); + NotificationService::current()->Notify( + NotificationType::TAB_LANGUAGE_DETERMINED, + Source<RenderViewHost>(render_view_host()), + Details<std::string>(&lang)); + + cld_helper_ = NULL; // Release the CLD helper. +} + void TabContents::ShowContents() { if (render_widget_host_view()) render_widget_host_view()->DidBecomeSelected(); @@ -1076,10 +1097,6 @@ void TabContents::StopFinding(bool clear_selection) { render_view_host()->StopFinding(clear_selection); } -void TabContents::GetPageLanguage() { - render_view_host()->GetPageLanguage(); -} - void TabContents::OnSavePage() { // If we can not save the page, try to download it. if (!SavePackage::IsSavableContents(contents_mime_type())) { @@ -1735,6 +1752,38 @@ void TabContents::OnDidGetApplicationInfo( delegate()->OnDidGetApplicationInfo(this, page_id); } +void TabContents::OnPageContents(const GURL& url, + int renderer_process_id, + int32 page_id, + const std::wstring& contents) { + // Don't index any https pages. People generally don't want their bank + // accounts, etc. indexed on their computer, especially since some of these + // things are not marked cachable. + // TODO(brettw) we may want to consider more elaborate heuristics such as + // the cachability of the page. We may also want to consider subframes (this + // test will still index subframes if the subframe is SSL). + if (!url.SchemeIsSecure()) { + Profile* p = profile(); + if (p && !p->IsOffTheRecord()) { + HistoryService* hs = p->GetHistoryService(Profile::IMPLICIT_ACCESS); + if (hs) + hs->SetPageContents(url, contents); + } + } + + // Detect the page language. The detection happens on the file thread. + // PageLanguageDetected() is called when the language has been detected. + if (cld_helper_.get()) { + // There is already a language detection in flight, cancel it to avoid + // having multiple PageLanguageDetected() notifications on this tab. (They + // would cause a crasher as cld_helper_ would be NULLed on the 1st + // notification). + cld_helper_->CancelLanguageDetection(); + } + cld_helper_ = new CLDHelper(this, renderer_process_id, page_id, contents); + cld_helper_->DetectLanguage(); +} + void TabContents::DidStartProvisionalLoadForFrame( RenderViewHost* render_view_host, bool is_main_frame, diff --git a/chrome/browser/tab_contents/tab_contents.h b/chrome/browser/tab_contents/tab_contents.h index 112675d..4b921ce 100644 --- a/chrome/browser/tab_contents/tab_contents.h +++ b/chrome/browser/tab_contents/tab_contents.h @@ -18,6 +18,7 @@ #include "base/scoped_ptr.h" #include "chrome/browser/autocomplete/autocomplete_edit.h" #include "chrome/browser/cancelable_request.h" +#include "chrome/browser/cld_helper.h" #include "chrome/browser/dom_ui/dom_ui_factory.h" #include "chrome/browser/download/save_package.h" #include "chrome/browser/fav_icon_helper.h" @@ -288,6 +289,9 @@ class TabContents : public PageNavigator, // to the foreground if necessary. void Activate(); + // Called by CLDHelper to notify the language of a page has been detected. + void PageLanguageDetected(); + // TODO(brettw) document these. virtual void ShowContents(); virtual void HideContents(); @@ -538,9 +542,6 @@ class TabContents : public PageNavigator, return last_search_result_; } - // Get the most probable language of the text content in the tab. - void GetPageLanguage(); - // Misc state & callbacks ---------------------------------------------------- // Set whether the contents should block javascript message boxes or not. @@ -814,6 +815,10 @@ class TabContents : public PageNavigator, virtual void OnDidGetApplicationInfo( int32 page_id, const webkit_glue::WebApplicationInfo& info); + virtual void OnPageContents(const GURL& url, + int renderer_process_id, + int32 page_id, + const std::wstring& contents); // RenderViewHostDelegate::Resource implementation. virtual void DidStartProvisionalLoadForFrame(RenderViewHost* render_view_host, @@ -1178,6 +1183,9 @@ class TabContents : public PageNavigator, // profile scoped_refptr<URLRequestContextGetter> request_context_; + // Used to retrieve the language of the current page. + scoped_refptr<CLDHelper> cld_helper_; + // --------------------------------------------------------------------------- DISALLOW_COPY_AND_ASSIGN(TabContents); diff --git a/chrome/browser/tab_contents/web_contents_unittest.cc b/chrome/browser/tab_contents/web_contents_unittest.cc index 9b59406..8254001 100644 --- a/chrome/browser/tab_contents/web_contents_unittest.cc +++ b/chrome/browser/tab_contents/web_contents_unittest.cc @@ -17,6 +17,7 @@ #include "chrome/common/render_messages.h" #include "chrome/common/url_constants.h" #include "chrome/test/testing_profile.h" +#include "chrome/test/ui_test_utils.h" #include "ipc/ipc_channel.h" #include "testing/gtest/include/gtest/gtest.h" @@ -194,7 +195,9 @@ class TabContentsTest : public RenderViewHostTestHarness { public: TabContentsTest() : RenderViewHostTestHarness(), - ui_thread_(ChromeThread::UI, &message_loop_) { + ui_thread_(ChromeThread::UI, &message_loop_), + file_thread_(ChromeThread::FILE) { + file_thread_.Start(); } private: @@ -206,6 +209,7 @@ class TabContentsTest : public RenderViewHostTestHarness { } ChromeThread ui_thread_; + ChromeThread file_thread_; }; // Test to make sure that title updates get stripped of whitespace. @@ -1427,3 +1431,101 @@ TEST_F(TabContentsTest, NoJSMessageOnInterstitials) { &did_suppress_message); EXPECT_TRUE(did_suppress_message); } + +// TODO(port): The compact language detection library works only for Windows. +#if defined(OS_WIN) +// Tests the normal case of language detection when a page loads. +TEST_F(TabContentsTest, NormalLanguageDetection) { + const GURL kURL("http://www.google.com"); + const std::wstring kPageContent(L"C'est le contenu de la page. Fantastique!"); + + // Navigate to a page. + NavigateAndCommit(kURL); + + NavigationEntry* entry = contents()->controller().GetActiveEntry(); + ASSERT_TRUE(entry); + + // Simulate the PageContents message that triggers the language detection. + rvh()->TestOnMessageReceived(ViewHostMsg_PageContents(0, + kURL, entry->page_id(), + kPageContent)); + std::string language = ui_test_utils::WaitForLanguageDetection(contents()); + EXPECT_EQ("fr", language); +} + +// Tests the case of language detection when several page loads happen for the +// same page. +TEST_F(TabContentsTest, LanguageDetectionMultipleLoads) { + const GURL kURL("http://www.google.com"); + const std::wstring kPageContent(L"The page contents. It is fantastic!"); + + // Navigate to a page. + NavigateAndCommit(kURL); + + NavigationEntry* entry = contents()->controller().GetActiveEntry(); + ASSERT_TRUE(entry); + + // Simulate 2 PageContents messages that triggers the language detection. + rvh()->TestOnMessageReceived(ViewHostMsg_PageContents(0, + kURL, entry->page_id(), + kPageContent)); + rvh()->TestOnMessageReceived(ViewHostMsg_PageContents(0, + kURL, entry->page_id(), + kPageContent)); + std::string language = ui_test_utils::WaitForLanguageDetection(contents()); + EXPECT_EQ("en", language); +} + +// Tests the case of language detection when several page loads happen. +TEST_F(TabContentsTest, LanguageDetectionMultipleNavigations) { + const GURL kURL1("http://www.google.com"); + const GURL kURL2("http://www.yahoo.com"); + const std::wstring kPageContent1(L"The page contents. It is fantastic!"); + const std::wstring kPageContent2(L"C'est le contenu de la page.Fantastique!"); + // Navigate to the first page. + NavigateAndCommit(kURL1); + NavigationEntry* entry = contents()->controller().GetActiveEntry(); + ASSERT_TRUE(entry); + + // Simulate the PageContents message that triggers the language detection. + rvh()->TestOnMessageReceived(ViewHostMsg_PageContents(0, + kURL1, entry->page_id(), + kPageContent1)); + + // Before the language notification is back to the UI thread, navigate to a + // new page. + NavigateAndCommit(kURL2); + entry = contents()->controller().GetActiveEntry(); + ASSERT_TRUE(entry); + + // Simulate the PageContents message that triggers the language detection. + rvh()->TestOnMessageReceived(ViewHostMsg_PageContents(0, + kURL2, entry->page_id(), + kPageContent2)); + + std::string language = ui_test_utils::WaitForLanguageDetection(contents()); + EXPECT_EQ("fr", language); +} +#endif + +// Tests the case of language detection when the tab is closed before the +// detection has been performed. +TEST_F(TabContentsTest, LanguageDetectionClosedTab) { + const GURL kURL("http://www.google.com"); + const std::wstring kPageContent(L"The page contents. It is fantastic!"); + // Navigate to the page. + NavigateAndCommit(kURL); + NavigationEntry* entry = contents()->controller().GetActiveEntry(); + ASSERT_TRUE(entry); + + // Simulate the PageContents message that triggers the language detection. + rvh()->TestOnMessageReceived(ViewHostMsg_PageContents(0, + kURL, entry->page_id(), + kPageContent)); + + // Now close the tab. + DeleteContents(); + + // Run all pending message to make sure nothing crashes. + MessageLoop::current()->RunAllPending(); +} diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index 2c12531..f8f684e 100755 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -306,6 +306,8 @@ 'browser/chromeos/usb_mount_observer.h', 'browser/chromeos/version_loader.cc', 'browser/chromeos/version_loader.h', + 'browser/cld_helper.cc', + 'browser/cld_helper.h', 'browser/cocoa/about_ipc_bridge.h', 'browser/cocoa/about_ipc_bridge.mm', 'browser/cocoa/about_ipc_controller.h', diff --git a/chrome/common/notification_type.h b/chrome/common/notification_type.h index 97a28f3..dfd87f3 100644 --- a/chrome/common/notification_type.h +++ b/chrome/common/notification_type.h @@ -223,8 +223,9 @@ class NotificationType { // is the InfoBubble. INFO_BUBBLE_CREATED, - // Sent after a call to RenderViewHost::DeterminePageLanguage. The details - // are Details<std::string> and the source is Source<RenderViewHost>. + // Sent when the language (English, French...) for a page has been detected. + // The details Details<std::string> contain the ISO 639-1 language code and + // the source is Source<RenderViewHost>. TAB_LANGUAGE_DETERMINED, // Send after the code is run in specified tab. diff --git a/chrome/common/render_messages_internal.h b/chrome/common/render_messages_internal.h index 453cdf3..6b1d519 100644 --- a/chrome/common/render_messages_internal.h +++ b/chrome/common/render_messages_internal.h @@ -253,14 +253,6 @@ IPC_BEGIN_MESSAGES(View) string16 /* search_text */, WebKit::WebFindOptions) - // Send from the browser to the rendered to get the text content of the page. - IPC_MESSAGE_ROUTED0(ViewMsg_DeterminePageText) - - // Send from the renderer to the browser to return the text content of the - // page. - IPC_MESSAGE_ROUTED1(ViewMsg_DeterminePageText_Reply, - std::wstring /* the language */) - // Send from the renderer to the browser to return the script running result. IPC_MESSAGE_ROUTED2(ViewMsg_ExecuteCodeFinished, int, /* request id */ @@ -1105,7 +1097,7 @@ IPC_BEGIN_MESSAGES(ViewHost) IPC_MESSAGE_ROUTED1(ViewHostMsg_UpdateSpellingPanelWithMisspelledWord, string16 /* the word to update the panel with */) - // Initiate a download based on user actions like 'ALT+click'. + // Initiates a download based on user actions like 'ALT+click'. IPC_MESSAGE_ROUTED2(ViewHostMsg_DownloadUrl, GURL /* url */, GURL /* referrer */) @@ -1123,9 +1115,11 @@ IPC_BEGIN_MESSAGES(ViewHost) bool /* out - success */, std::wstring /* out - prompt field */) - // Sets the contents for the given page (URL and page ID are the first two - // arguments) given the contents that is the 3rd. - IPC_MESSAGE_CONTROL3(ViewHostMsg_PageContents, GURL, int32, std::wstring) + // Provides the contents for the given page that was loaded recently. + IPC_MESSAGE_ROUTED3(ViewHostMsg_PageContents, + GURL /* URL of the page */, + int32 /* page id */, + std::wstring /*page contents */) // Used to get the extension message bundle. IPC_SYNC_MESSAGE_CONTROL1_1(ViewHostMsg_GetExtensionMessageBundle, @@ -1150,13 +1144,13 @@ IPC_BEGIN_MESSAGES(ViewHost) // user right clicked. IPC_MESSAGE_ROUTED1(ViewHostMsg_ContextMenu, ContextMenuParams) - // Request that the given URL be opened in the specified manner. + // Requests that the given URL be opened in the specified manner. IPC_MESSAGE_ROUTED3(ViewHostMsg_OpenURL, GURL /* url */, GURL /* referrer */, WindowOpenDisposition /* disposition */) - // Notify that the preferred size of the content changed. + // Notifies that the preferred size of the content changed. IPC_MESSAGE_ROUTED1(ViewHostMsg_DidContentsPreferredSizeChange, gfx::Size /* pref_size */) diff --git a/chrome/renderer/render_view.cc b/chrome/renderer/render_view.cc index 418bf4c..d46f5e2 100644 --- a/chrome/renderer/render_view.cc +++ b/chrome/renderer/render_view.cc @@ -261,7 +261,6 @@ RenderView::RenderView(RenderThreadBase* render_thread, send_preferred_size_changes_(false), ALLOW_THIS_IN_INITIALIZER_LIST( notification_provider_(new NotificationProvider(this))), - determine_page_text_after_loading_stops_(false), view_type_(ViewType::INVALID), browser_window_id_(-1), last_top_level_navigation_page_id_(-1), @@ -446,7 +445,6 @@ void RenderView::OnMessageReceived(const IPC::Message& message) { IPC_MESSAGE_HANDLER(ViewMsg_CopyImageAt, OnCopyImageAt) IPC_MESSAGE_HANDLER(ViewMsg_ExecuteEditCommand, OnExecuteEditCommand) IPC_MESSAGE_HANDLER(ViewMsg_Find, OnFind) - IPC_MESSAGE_HANDLER(ViewMsg_DeterminePageText, OnDeterminePageText) IPC_MESSAGE_HANDLER(ViewMsg_Zoom, OnZoom) IPC_MESSAGE_HANDLER(ViewMsg_SetZoomLevelForLoadingHost, OnSetZoomLevelForLoadingHost) @@ -598,23 +596,18 @@ void RenderView::CapturePageInfo(int load_id, bool preliminary_capture) { if (!preliminary_capture) last_indexed_page_id_ = load_id; - // get the URL for this page + // Get the URL for this page. GURL url(main_frame->url()); if (url.is_empty()) return; - // full text + // Retrieve the frame's full text. std::wstring contents; CaptureText(main_frame, &contents); if (contents.size()) { - // Send the text to the browser for indexing. - Send(new ViewHostMsg_PageContents(url, load_id, contents)); - } - - // Send over text content of this page to the browser. - if (determine_page_text_after_loading_stops_) { - determine_page_text_after_loading_stops_ = false; - Send(new ViewMsg_DeterminePageText_Reply(routing_id_, contents)); + // Send the text to the browser for indexing (the browser might decide not + // to index, if the URL is HTTPS for instance) and language discovery. + Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents)); } // thumbnail @@ -626,15 +619,6 @@ void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) { if (!frame) return; - // Don't index any https pages. People generally don't want their bank - // accounts, etc. indexed on their computer, especially since some of these - // things are not marked cachable. - // TODO(brettw) we may want to consider more elaborate heuristics such as - // the cachability of the page. We may also want to consider subframes (this - // test will still index subframes if the subframe is SSL). - if (GURL(frame->url()).SchemeIsSecure()) - return; - #ifdef TIME_TEXT_RETRIEVAL double begin = time_util::GetHighResolutionTimeNow(); #endif @@ -2982,23 +2966,6 @@ void RenderView::OnFind(int request_id, const string16& search_text, } } -void RenderView::OnDeterminePageText() { - if (!is_loading_) { - if (!webview()) - return; - WebFrame* main_frame = webview()->mainFrame(); - std::wstring contents; - CaptureText(main_frame, &contents); - Send(new ViewMsg_DeterminePageText_Reply(routing_id_, contents)); - determine_page_text_after_loading_stops_ = false; - return; - } - - // We set |determine_page_text_after_loading_stops_| true here so that, - // after page has been loaded completely, the text in the page is captured. - determine_page_text_after_loading_stops_ = true; -} - void RenderView::DnsPrefetch(const std::vector<std::string>& host_names) { Send(new ViewHostMsg_DnsPrefetch(host_names)); } diff --git a/chrome/renderer/render_view.h b/chrome/renderer/render_view.h index 32ea8c6..706f5f0 100644 --- a/chrome/renderer/render_view.h +++ b/chrome/renderer/render_view.h @@ -934,9 +934,6 @@ class RenderView : public RenderWidget, // Hopds a reference to the service which provides desktop notifications. scoped_ptr<NotificationProvider> notification_provider_; - // Set to true if request for capturing page text has been made. - bool determine_page_text_after_loading_stops_; - // Holds state pertaining to a navigation that we initiated. This is held by // the WebDataSource::ExtraData attribute. We use pending_navigation_state_ // as a temporary holder for the state until the WebDataSource corresponding diff --git a/chrome/test/data/english_page.html b/chrome/test/data/english_page.html new file mode 100644 index 0000000..539920d --- /dev/null +++ b/chrome/test/data/english_page.html @@ -0,0 +1,6 @@ +<html> +<head><title>This page is in English</title></head> +<body> +No joke! This is a page written in English. Awesome don't you think? +</body> +</html> diff --git a/chrome/test/data/french_page.html b/chrome/test/data/french_page.html new file mode 100644 index 0000000..7fcf684 --- /dev/null +++ b/chrome/test/data/french_page.html @@ -0,0 +1,6 @@ +<html> +<head><title>Cette page est en Français</title></head> +<body> +Cette page a été rédigée en français. Saviez-vous que le Français est la langue officielle des jeux olympiques? Ça vous en bouche un coin, pas vrai? +</body> +</html> diff --git a/chrome/test/ui_test_utils.cc b/chrome/test/ui_test_utils.cc index 6723084..e4b5818 100644 --- a/chrome/test/ui_test_utils.cc +++ b/chrome/test/ui_test_utils.cc @@ -254,6 +254,33 @@ class SimpleNotificationObserver : public NotificationObserver { DISALLOW_COPY_AND_ASSIGN(SimpleNotificationObserver); }; +class LanguageDetectionNotificationObserver : public NotificationObserver { + public: + explicit LanguageDetectionNotificationObserver( + RenderViewHost* render_view_host) { + registrar_.Add(this, NotificationType::TAB_LANGUAGE_DETERMINED, + Source<RenderViewHost>(render_view_host)); + ui_test_utils::RunMessageLoop(); + } + + virtual void Observe(NotificationType type, + const NotificationSource& source, + const NotificationDetails& details) { + language_ = *(Details<std::string>(details).ptr()); + MessageLoopForUI::current()->Quit(); + } + + std::string language() const { + return language_; + } + + private: + NotificationRegistrar registrar_; + std::string language_; + + DISALLOW_COPY_AND_ASSIGN(LanguageDetectionNotificationObserver); +}; + class FindInPageNotificationObserver : public NotificationObserver { public: explicit FindInPageNotificationObserver(TabContents* parent_tab) @@ -484,6 +511,11 @@ void WaitForFocusInBrowser(Browser* browser) { browser); } +std::string WaitForLanguageDetection(TabContents* tab) { + LanguageDetectionNotificationObserver observer(tab->render_view_host()); + return observer.language(); +} + int FindInPage(TabContents* tab_contents, const string16& search_string, bool forward, bool match_case, int* ordinal) { tab_contents->StartFinding(search_string, forward, match_case); diff --git a/chrome/test/ui_test_utils.h b/chrome/test/ui_test_utils.h index 89dc8fc..2c4cd95 100644 --- a/chrome/test/ui_test_utils.h +++ b/chrome/test/ui_test_utils.h @@ -115,6 +115,10 @@ void WaitForFocusChange(RenderViewHost* rvh); // traversal). void WaitForFocusInBrowser(Browser* browser); +// Waits for the language of the page to have been detected and returns it. +// This should be called right after a navigation notification was received. +std::string WaitForLanguageDetection(TabContents* tab_contents); + // Performs a find in the page of the specified tab. Returns the number of // matches found. |ordinal| is an optional parameter which is set to the index // of the current match. |