summaryrefslogtreecommitdiffstats
path: root/chrome/browser/renderer_host/translation_service.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chrome/browser/renderer_host/translation_service.cc')
-rw-r--r--chrome/browser/renderer_host/translation_service.cc548
1 files changed, 533 insertions, 15 deletions
diff --git a/chrome/browser/renderer_host/translation_service.cc b/chrome/browser/renderer_host/translation_service.cc
index af5e7a9..db4441e 100644
--- a/chrome/browser/renderer_host/translation_service.cc
+++ b/chrome/browser/renderer_host/translation_service.cc
@@ -4,26 +4,544 @@
#include "chrome/browser/renderer_host/translation_service.h"
-#include "base/string_util.h"
-#include "chrome/browser/renderer_host/resource_message_filter.h"
+#include "base/json/json_reader.h"
+#include "base/stl_util-inl.h"
+#include "chrome/browser/profile.h"
#include "chrome/common/render_messages.h"
+#include "net/base/escape.h"
-TranslationService::TranslationService(ResourceMessageFilter* filter)
- : resource_message_filter_(filter) {
+#if defined(GOOGLE_CHROME_BUILD)
+#include "chrome/browser/renderer_host/translate/translate_internal.h"
+#else
+// Defining dummy URLs for unit-tests to pass.
+#define TRANSLATE_SERVER_URL "http://disabled"
+#define TRANSLATE_SERVER_SECURE_URL "https://disabled"
+#endif
+
+namespace {
+
+// The URLs we send translation requests to.
+const char kServiceURL[] = TRANSLATE_SERVER_URL;
+const char kSecureServiceURL[] = TRANSLATE_SERVER_SECURE_URL;
+
+// The different params used when sending requests to the translate server.
+const char kVersionParam[] = "v";
+const char kLangPairParam[] = "langpair";
+const char kTextParam[] = "q";
+const char kClientParam[] = "client";
+const char kFormatParam[] = "format";
+const char kSSLParam[] = "ssl";
+const char kTranslationCountParam[] = "tc";
+
+// Describes languages deemed equivalent from a translation point of view.
+// This is used to detect unnecessary translations.
+struct LocaleToCLDLanguage {
+ const char* locale_language; // Language Chrome locale is in.
+ const char* cld_language; // Language the CLD reports.
+};
+LocaleToCLDLanguage kLocaleToCLDLanguages[] = {
+ { "en-GB", "en" },
+ { "en-US", "en" },
+ { "es-419", "es" },
+};
+
+// The maximum size in bytes after which the server will refuse the request.
+const size_t kTextRequestMaxSize = 1024 * 30;
+
+// Delay to wait for before sending a request to the translation server.
+const int kSendRequestDelay = 100;
+
+// Task used to send the current pending translation request for a renderer
+// after some time has elapsed with no new request from that renderer.
+// Note that this task is canceled when TranslationRequest is destroyed, which
+// happens when the TranslationService is going away. So it is OK to have it
+// have a pointer to the TranslationService.
+class SendTranslationRequestTask : public CancelableTask {
+ public:
+ SendTranslationRequestTask(TranslationService* translation_service,
+ int renderer_id,
+ bool secure);
+ virtual void Run();
+ virtual void Cancel();
+
+ private:
+ TranslationService* translation_service_;
+ int renderer_id_;
+ bool secure_;
+ bool canceled_;
+
+ DISALLOW_COPY_AND_ASSIGN(SendTranslationRequestTask);
+};
+
+} // namespace
+
+// Contains the information necessary to send a request to the translation
+// server. It is used to group several renderer queries, as to limit the
+// load sent to the translation server.
+struct TranslationService::TranslationRequest {
+ TranslationRequest(int routing_id,
+ int page_id,
+ const std::string& source_lang,
+ const std::string& target_lang,
+ bool secure)
+ : routing_id(routing_id),
+ page_id(page_id),
+ source_lang(source_lang),
+ target_lang(target_lang),
+ secure(secure),
+ send_query_task(NULL) {
+ renderer_request_info.reset(new RendererRequestInfoList());
+ }
+
+ ~TranslationRequest() {
+ if (send_query_task)
+ send_query_task->Cancel();
+ }
+
+ void Clear() {
+ page_id = 0;
+ source_lang.clear();
+ target_lang.clear();
+ query.clear();
+ renderer_request_info->clear();
+ if (send_query_task) {
+ send_query_task->Cancel();
+ send_query_task = NULL;
+ }
+ }
+
+ int routing_id;
+ int page_id;
+ std::string source_lang;
+ std::string target_lang;
+ bool secure;
+ std::string query;
+ // renderer_request_info is a scoped_ptr so that we avoid copying the list
+ // when the request is sent. At that point we only transfer ownership of that
+ // list to renderer_request_infos_.
+ scoped_ptr<RendererRequestInfoList> renderer_request_info;
+ CancelableTask* send_query_task;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// SendTranslationRequestTask
+
+SendTranslationRequestTask::SendTranslationRequestTask(
+ TranslationService* translation_service,
+ int renderer_id,
+ bool secure)
+ : translation_service_(translation_service),
+ renderer_id_(renderer_id),
+ secure_(secure),
+ canceled_(false) {
+}
+
+void SendTranslationRequestTask::Run() {
+ if (canceled_)
+ return;
+ translation_service_->
+ SendTranslationRequestForRenderer(renderer_id_, secure_);
+}
+
+void SendTranslationRequestTask::Cancel() {
+ canceled_ = true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// TranslationService, public:
+
+TranslationService::TranslationService(IPC::Message::Sender* message_sender)
+ : message_sender_(message_sender) {
+}
+
+TranslationService::~TranslationService() {
+ STLDeleteContainerPairSecondPointers(pending_translation_requests_.begin(),
+ pending_translation_requests_.end());
+ STLDeleteContainerPairSecondPointers(
+ pending_secure_translation_requests_.begin(),
+ pending_secure_translation_requests_.end());
+ STLDeleteContainerPairPointers(renderer_request_infos_.begin(),
+ renderer_request_infos_.end());
}
void TranslationService::Translate(int routing_id,
+ int page_id,
int work_id,
- const std::vector<string16>& text_chunks,
- std::string from_language,
- std::string to_language,
+ const TextChunks& text_chunks,
+ const std::string& source_lang,
+ const std::string& target_lang,
bool secure) {
- std::vector<string16> translated_text;
- for (std::vector<string16>::const_iterator iter = text_chunks.begin();
- iter != text_chunks.end(); ++iter) {
- translated_text.push_back(StringToUpperASCII(*iter));
- }
- resource_message_filter_->Send(
- new ViewMsg_TranslateTextReponse(routing_id, work_id,
- 0, translated_text));
+ TranslationRequestMap& request_map =
+ secure ? pending_secure_translation_requests_ :
+ pending_translation_requests_;
+ TranslationRequestMap::iterator iter = request_map.find(routing_id);
+ TranslationRequest* translation_request = NULL;
+
+ string16 utf16_text = MergeTextChunks(text_chunks);
+ std::string text = EscapeUrlEncodedData(UTF16ToUTF8(utf16_text));
+
+ if (iter != request_map.end()) {
+ translation_request = iter->second;
+ if (page_id != translation_request->page_id) {
+ // We are getting a request from a renderer for a different page id.
+ // This indicates we navigated away from the page that was being
+ // translated. We should drop the current pending translations.
+ translation_request->Clear();
+ // Set the new states.
+ translation_request->page_id = page_id;
+ translation_request->source_lang = source_lang;
+ translation_request->target_lang = target_lang;
+ } else {
+ DCHECK(translation_request->source_lang == source_lang);
+ DCHECK(translation_request->target_lang == target_lang);
+ // Cancel the pending tasks to send the query. We'll be posting a new one
+ // after we updated the request.
+ translation_request->send_query_task->Cancel();
+ translation_request->send_query_task = NULL;
+ if (translation_request->query.size() + text.size() >=
+ kTextRequestMaxSize) {
+ // The request would be too big with that last addition of text, send
+ // the request now. (Single requests too big to be sent in 1 translation
+ // request are dealt with below.)
+ if (!translation_request->query.empty()) { // Single requests
+ SendRequestToTranslationServer(translation_request);
+ // The translation request has been deleted.
+ translation_request = NULL;
+ iter = request_map.end();
+ }
+ }
+ }
+ }
+
+ if (translation_request == NULL) {
+ translation_request = new TranslationRequest(routing_id, page_id,
+ source_lang, target_lang,
+ secure);
+ request_map[routing_id] = translation_request;
+ }
+
+ AddTextToRequestString(&(translation_request->query), text,
+ source_lang, target_lang, secure);
+
+ translation_request->renderer_request_info->push_back(
+ RendererRequestInfo(routing_id, work_id));
+
+ if (translation_request->query.size() > kTextRequestMaxSize) {
+ DCHECK(translation_request->renderer_request_info->size() == 1U);
+ // This one request is too large for the translation service.
+ // TODO(jcampan): we should support such requests by splitting them.
+ iter = request_map.find(routing_id);
+ DCHECK(iter != request_map.end());
+ request_map.erase(iter);
+ message_sender_->Send(
+ new ViewMsg_TranslateTextReponse(routing_id, work_id, 1, TextChunks()));
+ delete translation_request;
+ return;
+ }
+
+ // Now post the new task that will ensure we'll send the request to the
+ // translation server if no renderer requests are received within a
+ // reasonable amount of time.
+ DCHECK(!translation_request->send_query_task);
+ translation_request->send_query_task =
+ new SendTranslationRequestTask(this, routing_id, secure);
+ MessageLoop::current()->PostDelayedTask(FROM_HERE,
+ translation_request->send_query_task, GetSendRequestDelay());
+}
+
+void TranslationService::SendTranslationRequestForRenderer(int renderer_id,
+ bool secure) {
+ TranslationRequestMap& request_map =
+ secure ? pending_secure_translation_requests_ :
+ pending_translation_requests_;
+ TranslationRequestMap::const_iterator iter = request_map.find(renderer_id);
+ DCHECK(iter != request_map.end());
+ SendRequestToTranslationServer(iter->second);
+}
+
+void TranslationService::OnURLFetchComplete(const URLFetcher* source,
+ const GURL& url,
+ const URLRequestStatus& status,
+ int response_code,
+ const ResponseCookies& cookies,
+ const std::string& data) {
+ if (!status.is_success() || response_code != 200 || data.empty()) {
+ TranslationFailed(source);
+ return;
+ }
+
+ // If the response is a simple string, put it in an array. (The JSONReader
+ // requires an array or map at the root.)
+ std::string str;
+ if (data.size() > 1U && data[0] == '"') {
+ str.append("[");
+ str.append(data);
+ str.append("]");
+ }
+ scoped_ptr<Value> value(base::JSONReader::Read(str.empty() ? data : str,
+ true));
+ if (!value.get()) {
+ NOTREACHED() << "Translation server returned invalid JSON response.";
+ TranslationFailed(source);
+ return;
+ }
+
+ // If the request was for a single string, the response is the translated
+ // string.
+ TextChunksList translated_chunks_list;
+ if (value->IsType(Value::TYPE_STRING)) {
+ string16 str16;
+ if (!value->GetAsUTF16(&str16)) {
+ NOTREACHED();
+ TranslationFailed(source);
+ return;
+ }
+ TextChunks text_chunks;
+ text_chunks.push_back(str16);
+ translated_chunks_list.push_back(text_chunks);
+ } else {
+ if (!value->IsType(Value::TYPE_LIST)) {
+ NOTREACHED() << "Translation server returned unexpected JSON response "
+ " (not a list).";
+ TranslationFailed(source);
+ return;
+ }
+ ListValue* list = static_cast<ListValue*>(value.get());
+ for (size_t i = 0; i < list->GetSize(); ++i) {
+ string16 translated_text;
+ if (!list->GetStringAsUTF16(i, &translated_text)) {
+ NOTREACHED() << "Translation server returned unexpected JSON response "
+ " (unexpected type in list).";
+ TranslationFailed(source);
+ return;
+ }
+ translated_text = UnescapeForHTML(translated_text);
+ TranslationService::TextChunks text_chunks;
+ TranslationService::SplitTextChunks(translated_text, &text_chunks);
+ translated_chunks_list.push_back(text_chunks);
+ }
+ }
+
+ // We have successfully extracted all the translated text chunks, send them to
+ // the renderer.
+ SendResponseToRenderer(source, 0, translated_chunks_list);
+}
+
+// static
+bool TranslationService::ShouldTranslatePage(
+ const std::string& page_language, const std::string& chrome_language) {
+ // Most locale names are the actual ISO 639 codes that the Google translate
+ // API uses, but for the ones longer than 2 chars.
+ // See l10n_util.cc for the list.
+ for (size_t i = 0; i < arraysize(kLocaleToCLDLanguages); ++i) {
+ if (chrome_language == kLocaleToCLDLanguages[i].locale_language &&
+ page_language == kLocaleToCLDLanguages[i].cld_language) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// static
+bool TranslationService::IsTranslationEnabled() {
+ return GURL(kServiceURL).host() != "disabled";
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// TranslationService, protected:
+
+int TranslationService::GetSendRequestDelay() const {
+ return kSendRequestDelay;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// TranslationService, private:
+
+void TranslationService::SendRequestToTranslationServer(
+ TranslationRequest* request) {
+ DCHECK(!request->query.empty());
+ GURL url(request->secure ? kSecureServiceURL : kServiceURL);
+ URLFetcher* url_fetcher =
+ URLFetcher::Create(request->routing_id /* used in tests */,
+ url, URLFetcher::POST, this);
+ url_fetcher->set_upload_data("application/x-www-form-urlencoded",
+ request->query);
+ url_fetcher->set_request_context(Profile::GetDefaultRequestContext());
+ url_fetcher->Start();
+
+ // renderer_request_infos_ will now own the RendererRequestInfoList.
+ renderer_request_infos_[url_fetcher] =
+ request->renderer_request_info.release();
+
+ // Remove the request from the translation request map.
+ TranslationRequestMap& translation_request_map =
+ request->secure ? pending_secure_translation_requests_ :
+ pending_translation_requests_;
+ TranslationRequestMap::iterator iter =
+ translation_request_map.find(request->routing_id);
+ DCHECK(iter != translation_request_map.end());
+ translation_request_map.erase(iter);
+ delete request;
+}
+
+void TranslationService::SendResponseToRenderer(
+ const URLFetcher* const_url_fetcher, int error_code,
+ const TextChunksList& text_chunks_list) {
+ scoped_ptr<const URLFetcher> url_fetcher(const_url_fetcher);
+ RendererRequestInfoMap::iterator iter =
+ renderer_request_infos_.find(url_fetcher.get());
+ DCHECK(iter != renderer_request_infos_.end());
+ scoped_ptr<RendererRequestInfoList> request_info_list(iter->second);
+ DCHECK(error_code != 0 ||
+ request_info_list->size() == text_chunks_list.size());
+ for (size_t i = 0; i < request_info_list->size(); ++i) {
+ RendererRequestInfo& request_info = request_info_list->at(i);
+ message_sender_->Send(
+ new ViewMsg_TranslateTextReponse(request_info.routing_id,
+ request_info.work_id,
+ error_code,
+ error_code ? TextChunks() :
+ text_chunks_list[i]));
+ }
+ renderer_request_infos_.erase(iter);
+}
+
+void TranslationService::TranslationFailed(const URLFetcher* url_fetcher) {
+ SendResponseToRenderer(url_fetcher, 1, TranslationService::TextChunksList());
+}
+
+// static
+string16 TranslationService::MergeTextChunks(const TextChunks& text_chunks) {
+ // If there is only 1 chunk, we don't need an anchor tag as there is no order
+ // to preserve.
+ if (text_chunks.size() == 1U)
+ return text_chunks[0];
+
+ string16 str;
+ for (size_t i = 0; i < text_chunks.size(); ++i) {
+ str.append(ASCIIToUTF16("<a _CR_TR_ id='"));
+ str.append(IntToString16(i));
+ str.append(ASCIIToUTF16("'>"));
+ str.append(text_chunks[i]);
+ str.append(ASCIIToUTF16("</a>"));
+ }
+ return str;
+}
+
+// static
+void TranslationService::SplitTextChunks(const string16& translated_text,
+ TextChunks* text_chunks) {
+ const string16 kOpenTag = ASCIIToUTF16("<a _CR_TR_ ");
+ const string16 kCloseTag = ASCIIToUTF16("</a>");
+ const size_t open_tag_len = kOpenTag.size();
+
+ size_t start_index = translated_text.find(kOpenTag);
+ if (start_index == std::string::npos) {
+ // No magic anchor tag, it was a single chunk.
+ text_chunks->push_back(translated_text);
+ return;
+ }
+
+ // The server might send us some HTML with duplicated and unbalanced tags.
+ // We separate from the open tag to the next open tag located after at least
+ // one close tag.
+ while (start_index != std::string::npos) {
+ size_t stop_index =
+ translated_text.find(kCloseTag, start_index + open_tag_len);
+ string16 chunk;
+ if (stop_index == std::string::npos) {
+ // No close tag. Just report as one chunk.
+ chunk = translated_text;
+ start_index = std::string::npos; // So we break on next iteration.
+ } else {
+ // Now find the next open tag after this close tag.
+ stop_index = translated_text.find(kOpenTag, stop_index);
+ if (stop_index != std::string::npos) {
+ chunk = translated_text.substr(start_index, stop_index - start_index);
+ start_index = stop_index;
+ } else {
+ chunk = translated_text.substr(start_index);
+ start_index = std::string::npos; // So we break on next iteration.
+ }
+ }
+ chunk = RemoveTag(chunk);
+ // The translation server leaves some ampersand character in the
+ // translation.
+ chunk = UnescapeForHTML(chunk);
+ text_chunks->push_back(RemoveTag(chunk));
+ }
+}
+
+// static
+string16 TranslationService::RemoveTag(const string16& text) {
+ // Remove any anchor tags, knowing they could be extra/unbalanced tags.
+ const string16 kStartTag(ASCIIToUTF16("<a "));
+ const string16 kEndTag(ASCIIToUTF16("</a>"));
+ const string16 kGreaterThan(ASCIIToUTF16(">"));
+ const string16 kLessThan(ASCIIToUTF16("<"));
+
+ string16 result;
+ size_t start_index = text.find(kStartTag);
+ if (start_index == std::string::npos) {
+ result = text;
+ } else {
+ bool first_iter = true;
+ while (true) {
+ size_t stop_index = text.find(kGreaterThan, start_index);
+ size_t next_tag_index = text.find(kLessThan, start_index + 1);
+ // Ignore unclosed <a tag. (Ignore subsequent closing tags, they'll be
+ // removed in the next loop.)
+ if (stop_index == std::string::npos ||
+ (next_tag_index != std::string::npos &&
+ stop_index > next_tag_index)) {
+ result.append(text.substr(start_index));
+ break;
+ }
+ if (start_index > 0 && first_iter)
+ result = text.substr(0, start_index);
+ start_index = text.find(kStartTag, start_index + 1);
+ if (start_index == std::string::npos) {
+ result += text.substr(stop_index + 1);
+ break;
+ }
+ result += text.substr(stop_index + 1, start_index - stop_index - 1);
+ first_iter = false;
+ }
+ }
+
+ // Now remove </a> tags.
+ ReplaceSubstringsAfterOffset(&result, 0,
+ ASCIIToUTF16("</a>"), ASCIIToUTF16(""));
+ return result;
+}
+
+// static
+void TranslationService::AddTextToRequestString(std::string* request,
+ const std::string& text,
+ const std::string& source_lang,
+ const std::string& target_lang,
+ bool secure) {
+ if (request->empty()) {
+ // First request, add required parameters.
+ request->append(kVersionParam);
+ request->append("=1.0&");
+ request->append(kClientParam);
+ request->append("=cr&"); // cr = Chrome.
+ request->append(kFormatParam);
+ request->append("=html&");
+ request->append(kLangPairParam);
+ request->append("=");
+ request->append(source_lang);
+ request->append("%7C"); // | URL encoded.
+ request->append(target_lang);
+ if (secure) {
+ request->append("&");
+ request->append(kSSLParam);
+ request->append("=1");
+ }
+ }
+ request->append("&");
+ request->append(kTextParam);
+ request->append("=");
+ request->append(text);
}