summaryrefslogtreecommitdiffstats
path: root/chrome/renderer/translate
diff options
context:
space:
mode:
authorandrewhayden@chromium.org <andrewhayden@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-03-27 16:16:03 +0000
committerandrewhayden@chromium.org <andrewhayden@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-03-27 16:16:03 +0000
commite5a7a2ab88c9ccb23662f4df213cd7c7a24800f2 (patch)
treebb70857390e267f53bdda5ab8286bbc5f0a39c64 /chrome/renderer/translate
parent644616d9f93f76905f37f00fa2317d9a21032008 (diff)
downloadchromium_src-e5a7a2ab88c9ccb23662f4df213cd7c7a24800f2.zip
chromium_src-e5a7a2ab88c9ccb23662f4df213cd7c7a24800f2.tar.gz
chromium_src-e5a7a2ab88c9ccb23662f4df213cd7c7a24800f2.tar.bz2
Make it possible to read CLD data from a file.
Note that this change DOES NOT "opt-in" any platform at this time, it merely makes it possible to read the CLD data in from a file if cld_version=2 and cld_dynamic=1. Subsequent changes will be required for platforms to "opt-in" to using this approach; e.g., each platform will have to generate their data file and package it appropriately before turning this feature on in compile flags. IMPORTANT: Can't be enabled on on Android until the SIGBUS in CLD is fixed: https://code.google.com/p/cld2/issues/detail?id=11 BUG=326023 Review URL: https://codereview.chromium.org/187393005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@259900 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer/translate')
-rw-r--r--chrome/renderer/translate/translate_helper.cc187
-rw-r--r--chrome/renderer/translate/translate_helper.h82
2 files changed, 268 insertions, 1 deletions
diff --git a/chrome/renderer/translate/translate_helper.cc b/chrome/renderer/translate/translate_helper.cc
index ccd2279..fb95cbf 100644
--- a/chrome/renderer/translate/translate_helper.cc
+++ b/chrome/renderer/translate/translate_helper.cc
@@ -4,8 +4,15 @@
#include "chrome/renderer/translate/translate_helper.h"
+#if defined(CLD2_DYNAMIC_MODE)
+#include <stdint.h>
+#endif
+
#include "base/bind.h"
#include "base/compiler_specific.h"
+#if defined(CLD2_DYNAMIC_MODE)
+#include "base/files/memory_mapped_file.h"
+#endif
#include "base/logging.h"
#include "base/message_loop/message_loop.h"
#include "base/strings/string16.h"
@@ -19,6 +26,12 @@
#include "components/translate/core/common/translate_util.h"
#include "components/translate/language_detection/language_detection_util.h"
#include "content/public/renderer/render_view.h"
+#include "extensions/common/constants.h"
+#include "ipc/ipc_platform_file.h"
+#if defined(CLD2_DYNAMIC_MODE)
+#include "content/public/common/url_constants.h"
+#include "third_party/cld_2/src/public/compact_lang_det.h"
+#endif
#include "third_party/WebKit/public/web/WebDocument.h"
#include "third_party/WebKit/public/web/WebElement.h"
#include "third_party/WebKit/public/web/WebFrame.h"
@@ -64,6 +77,13 @@ const char kContentSecurityPolicy[] = "script-src 'self' 'unsafe-eval'";
} // namespace
+#if defined(CLD2_DYNAMIC_MODE)
+// The mmap for the CLD2 data must be held forever once it is available in the
+// process. This is declared static in the translate_helper.h.
+base::LazyInstance<TranslateHelper::CLDMmapWrapper>::Leaky
+ TranslateHelper::s_cld_mmap_ = LAZY_INSTANCE_INITIALIZER;
+#endif
+
////////////////////////////////////////////////////////////////////////////////
// TranslateHelper, public:
//
@@ -71,12 +91,63 @@ TranslateHelper::TranslateHelper(content::RenderView* render_view)
: content::RenderViewObserver(render_view),
page_id_(-1),
translation_pending_(false),
- weak_method_factory_(this) {
+ weak_method_factory_(this)
+#if defined(CLD2_DYNAMIC_MODE)
+ ,cld2_data_file_polling_started_(false),
+ cld2_data_file_polling_canceled_(false),
+ deferred_page_capture_(false),
+ deferred_page_id_(-1),
+ deferred_contents_(ASCIIToUTF16(""))
+#endif
+ {
}
TranslateHelper::~TranslateHelper() {
CancelPendingTranslation();
+#if defined(CLD2_DYNAMIC_MODE)
+ CancelCLD2DataFilePolling();
+#endif
+}
+
+void TranslateHelper::PrepareForUrl(const GURL& url) {
+#if defined(CLD2_DYNAMIC_MODE)
+ deferred_page_capture_ = false;
+ deferred_contents_.clear();
+ if (cld2_data_file_polling_started_)
+ return;
+
+ // TODO(andrewhayden): Refactor translate_manager.cc's IsTranslatableURL to
+ // components/translate/core/common/translate_util.cc, and ignore any URL
+ // that fails that check. This will require moving unit tests and rewiring
+ // other function calls as well, so for now replicate the logic here.
+ if (url.is_empty())
+ return;
+ if (url.SchemeIs(content::kChromeUIScheme))
+ return;
+ if (url.SchemeIs(content::kChromeDevToolsScheme))
+ return;
+ if (url.SchemeIs(content::kFtpScheme))
+ return;
+#if defined(OS_CHROMEOS)
+ if (url.SchemeIs(extensions::kExtensionScheme) &&
+ url.DomainIs(file_manager::kFileManagerAppId))
+ return;
+#endif
+
+ // Start polling for CLD data.
+ cld2_data_file_polling_started_ = true;
+ TranslateHelper::SendCLD2DataFileRequest(0, 1000);
+#endif
+}
+
+#if defined(CLD2_DYNAMIC_MODE)
+void TranslateHelper::DeferPageCaptured(const int page_id,
+ const base::string16& contents) {
+ deferred_page_capture_ = true;
+ deferred_page_id_ = page_id;
+ deferred_contents_ = contents;
}
+#endif
void TranslateHelper::PageCaptured(int page_id,
const base::string16& contents) {
@@ -92,6 +163,17 @@ void TranslateHelper::PageCaptured(int page_id,
WebFrame* main_frame = GetMainFrame();
if (!main_frame || render_view()->GetPageId() != page_id)
return;
+
+ // TODO(andrewhayden): UMA insertion point here: Track if data is available.
+ // TODO(andrewhayden): Retry insertion point here, retry till data available.
+#if defined(CLD2_DYNAMIC_MODE)
+ if (!CLD2::isDataLoaded()) {
+ // We're in dynamic mode and CLD data isn't loaded. Retry when CLD data
+ // is loaded, if ever.
+ TranslateHelper::DeferPageCaptured(page_id, contents);
+ return;
+ }
+#endif
page_id_ = page_id;
WebDocument document = main_frame->document();
std::string content_language = document.contentLanguage().utf8();
@@ -136,6 +218,9 @@ void TranslateHelper::CancelPendingTranslation() {
translation_pending_ = false;
source_lang_.clear();
target_lang_.clear();
+#if defined(CLD2_DYNAMIC_MODE)
+ CancelCLD2DataFilePolling();
+#endif
}
////////////////////////////////////////////////////////////////////////////////
@@ -310,6 +395,9 @@ bool TranslateHelper::OnMessageReceived(const IPC::Message& message) {
IPC_BEGIN_MESSAGE_MAP(TranslateHelper, message)
IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage, OnTranslatePage)
IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation)
+#if defined(CLD2_DYNAMIC_MODE)
+ IPC_MESSAGE_HANDLER(ChromeViewMsg_CLDDataAvailable, OnCLDDataAvailable);
+#endif
IPC_MESSAGE_UNHANDLED(handled = false)
IPC_END_MESSAGE_MAP()
return handled;
@@ -499,3 +587,100 @@ WebFrame* TranslateHelper::GetMainFrame() {
return web_view->mainFrame();
}
+
+#if defined(CLD2_DYNAMIC_MODE)
+void TranslateHelper::CancelCLD2DataFilePolling() {
+ cld2_data_file_polling_canceled_ = true;
+}
+
+void TranslateHelper::SendCLD2DataFileRequest(const int delay_millis,
+ const int next_delay_millis) {
+ // Terminate immediately if told to stop polling.
+ if (cld2_data_file_polling_canceled_)
+ return;
+
+ // Terminate immediately if data is already loaded.
+ if (CLD2::isDataLoaded())
+ return;
+
+ // Else, send the IPC message to the browser process requesting the data...
+ Send(new ChromeViewHostMsg_NeedCLDData(routing_id()));
+
+ // ... and enqueue another delayed task to call again. This will start a
+ // chain of polling that will last until the pointer stops being NULL,
+ // which is the right thing to do.
+ // NB: In the great majority of cases, the data file will be available and
+ // the very first delayed task will be a no-op that terminates the chain.
+ // It's only while downloading the file that this will chain for a
+ // nontrivial amount of time.
+ // Use a weak pointer to avoid keeping this helper object around forever.
+ base::MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ base::Bind(&TranslateHelper::SendCLD2DataFileRequest,
+ weak_method_factory_.GetWeakPtr(),
+ next_delay_millis, next_delay_millis),
+ base::TimeDelta::FromMilliseconds(delay_millis));
+}
+
+void TranslateHelper::OnCLDDataAvailable(
+ const IPC::PlatformFileForTransit ipc_file_handle,
+ const uint64 data_offset,
+ const uint64 data_length) {
+ LoadCLDDData(ipc_file_handle, data_offset, data_length);
+ if (deferred_page_capture_ && CLD2::isDataLoaded()) {
+ deferred_page_capture_ = false; // Don't do this a second time.
+ PageCaptured(deferred_page_id_, deferred_contents_);
+ deferred_page_id_ = -1; // Clean up for sanity
+ deferred_contents_.clear(); // Clean up for sanity
+ }
+}
+
+void TranslateHelper::LoadCLDDData(
+ const IPC::PlatformFileForTransit ipc_file_handle,
+ const uint64 data_offset,
+ const uint64 data_length) {
+ // Terminate immediately if told to stop polling.
+ if (cld2_data_file_polling_canceled_)
+ return;
+
+ // Terminate immediately if data is already loaded.
+ if (CLD2::isDataLoaded())
+ return;
+
+ // Grab the file handle
+ base::PlatformFile platform_file =
+ IPC::PlatformFileForTransitToPlatformFile(ipc_file_handle);
+ if (platform_file == base::kInvalidPlatformFileValue) {
+ LOG(ERROR) << "Can't find the CLD data file.";
+ return;
+ }
+ base::File basic_file(platform_file);
+
+ // mmap the file
+ s_cld_mmap_.Get().value = new base::MemoryMappedFile();
+ bool initialized = s_cld_mmap_.Get().value->Initialize(basic_file.Pass());
+ if (!initialized) {
+ LOG(ERROR) << "mmap initialization failed";
+ delete s_cld_mmap_.Get().value;
+ s_cld_mmap_.Get().value = NULL;
+ return;
+ }
+
+ // Sanity checks
+ uint64 max_int32 = std::numeric_limits<int32>::max();
+ if (data_length + data_offset > s_cld_mmap_.Get().value->length()
+ || data_length > max_int32) { // max signed 32 bit integer
+ LOG(ERROR) << "Illegal mmap config: data_offset="
+ << data_offset << ", data_length=" << data_length
+ << ", mmap->length()=" << s_cld_mmap_.Get().value->length();
+ delete s_cld_mmap_.Get().value;
+ s_cld_mmap_.Get().value = NULL;
+ return;
+ }
+
+ // Initialize the CLD subsystem... and it's all done!
+ const uint8* data_ptr = s_cld_mmap_.Get().value->data() + data_offset;
+ CLD2::loadDataFromRawAddress(data_ptr, data_length);
+ DCHECK(CLD2::isDataLoaded()) << "Failed to load CLD data from mmap";
+}
+#endif
diff --git a/chrome/renderer/translate/translate_helper.h b/chrome/renderer/translate/translate_helper.h
index 88dd518..b66e399 100644
--- a/chrome/renderer/translate/translate_helper.h
+++ b/chrome/renderer/translate/translate_helper.h
@@ -7,11 +7,21 @@
#include <string>
+#if defined(CLD2_DYNAMIC_MODE)
+#include "base/files/memory_mapped_file.h"
+#endif
#include "base/gtest_prod_util.h"
+#if defined(CLD2_DYNAMIC_MODE)
+#include "base/lazy_instance.h"
+#endif
#include "base/memory/weak_ptr.h"
#include "base/time/time.h"
#include "components/translate/core/common/translate_errors.h"
#include "content/public/renderer/render_view_observer.h"
+#if defined(CLD2_DYNAMIC_MODE)
+#include "ipc/ipc_platform_file.h"
+#include "url/gurl.h"
+#endif
namespace blink {
class WebDocument;
@@ -29,6 +39,11 @@ class TranslateHelper : public content::RenderViewObserver {
// Informs us that the page's text has been extracted.
void PageCaptured(int page_id, const base::string16& contents);
+ // Lets the translation system know that we are preparing to navigate to
+ // the specified URL. If there is anything that can or should be done before
+ // this URL loads, this is the time to prepare for it.
+ void PrepareForUrl(const GURL& url);
+
protected:
// The following methods are protected so they can be overridden in
// unit-tests.
@@ -148,6 +163,73 @@ class TranslateHelper : public content::RenderViewObserver {
// Method factory used to make calls to TranslatePageImpl.
base::WeakPtrFactory<TranslateHelper> weak_method_factory_;
+#if defined(CLD2_DYNAMIC_MODE)
+ // Do not ask for CLD data any more.
+ void CancelCLD2DataFilePolling();
+
+ // Invoked when PageCaptured is called prior to obtaining CLD data. This
+ // method stores the page ID into deferred_page_id_ and COPIES the contents
+ // of the page, then sets deferred_page_capture_ to true. When CLD data is
+ // eventually received (in OnCLDDataAvailable), any deferred request will be
+ // "resurrected" and allowed to proceed automatically, assuming that the
+ // page ID has not changed.
+ void DeferPageCaptured(const int page_id, const base::string16& contents);
+
+ // Immediately send an IPC request to the browser process to get the CLD
+ // data file. In most cases, the file will already exist and we will only
+ // poll once; but since the file might need to be downloaded first, poll
+ // indefinitely until a ChromeViewMsg_CLDDataAvailable message is received
+ // from the browser process.
+ // Polling will automatically halt as soon as the renderer obtains a
+ // reference to the data file.
+ void SendCLD2DataFileRequest(const int delay_millis,
+ const int next_delay_millis);
+
+ // Invoked when a ChromeViewMsg_CLDDataAvailable message is received from
+ // the browser process, providing a file handle for the CLD data file. If a
+ // PageCaptured request was previously deferred with DeferPageCaptured and
+ // the page ID has not yet changed, the PageCaptured is reinvoked to
+ // "resurrect" the language detection pathway.
+ void OnCLDDataAvailable(const IPC::PlatformFileForTransit ipc_file_handle,
+ const uint64 data_offset,
+ const uint64 data_length);
+
+ // After receiving data in OnCLDDataAvailable, loads the data into CLD2.
+ void LoadCLDDData(const IPC::PlatformFileForTransit ipc_file_handle,
+ const uint64 data_offset,
+ const uint64 data_length);
+
+ // A struct that contains the pointer to the CLD mmap. Used so that we can
+ // leverage LazyInstance:Leaky to properly scope the lifetime of the mmap.
+ struct CLDMmapWrapper {
+ CLDMmapWrapper() {
+ value = NULL;
+ }
+ base::MemoryMappedFile* value;
+ };
+ static base::LazyInstance<CLDMmapWrapper>::Leaky s_cld_mmap_;
+
+ // Whether or not polling for CLD2 data has started.
+ bool cld2_data_file_polling_started_;
+
+ // Whether or not CancelCLD2DataFilePolling has been called.
+ bool cld2_data_file_polling_canceled_;
+
+ // Whether or not a PageCaptured event arrived prior to CLD data becoming
+ // available. If true, deferred_page_id_ contains the most recent page ID
+ // and deferred_contents_ contains the most recent contents.
+ bool deferred_page_capture_;
+
+ // The ID of the page most recently reported to PageCaptured if
+ // deferred_page_capture_ is true.
+ int deferred_page_id_;
+
+ // The contents of the page most recently reported to PageCaptured if
+ // deferred_page_capture_ is true.
+ base::string16 deferred_contents_;
+
+#endif
+
DISALLOW_COPY_AND_ASSIGN(TranslateHelper);
};