// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. __gCrWeb['languageDetection'] = {}; (function() { /** * The cache of the text content that was extracted from the page */ __gCrWeb.languageDetection.bufferedTextContent = null; /** * The number of active requests that have populated the cache. This is * incremented every time a call to |__gCrWeb.languageDetection.detectLanguage| * populates the buffer. This is decremented every time there is a call to * retrieve the buffer. The buffer is purged when this goes down to 0. */ __gCrWeb.languageDetection.activeRequests = 0; /** * Returns true if translation of the page is allowed. * Translation is not allowed when a "notranslate" meta tag is defined. * @return {boolean} true if translation of the page is allowed. */ __gCrWeb.languageDetection['translationAllowed'] = function() { var metaTags = document.getElementsByTagName('meta'); for (var i = 0; i < metaTags.length; ++i) { if (metaTags[i].name === 'google') { if (metaTags[i].content === 'notranslate' || metaTags[i].getAttribute('value') === 'notranslate') { return false; } } } return true; }; /** * Gets the content of a meta tag by httpEquiv. * The function is case insensitive. * @param {String} httpEquiv Value of the "httpEquiv" attribute, has to be * lower case. * @return {string} Value of the "content" attribute of the meta tag. */ __gCrWeb.languageDetection['getMetaContentByHttpEquiv'] = function(httpEquiv) { var metaTags = document.getElementsByTagName('meta'); for (var i = 0; i < metaTags.length; ++i) { if (metaTags[i].httpEquiv.toLowerCase() === httpEquiv) { return metaTags[i].content; } } return ''; }; // Used by the |getTextContent| function below. __gCrWeb.languageDetection['nonTextNodeNames'] = { 'SCRIPT': 1, 'NOSCRIPT': 1, 'STYLE': 1, 'EMBED': 1, 'OBJECT': 1 }; /** * Walks a DOM tree to extract the text content. * Does not walk into a node when its name is in |nonTextNodeNames|. * @param {HTMLElement} node The DOM tree * @param {number} maxLen Output will be truncated to |maxLen| * @return {string} The text content */ __gCrWeb.languageDetection['getTextContent'] = function(node, maxLen) { if (!node || maxLen <= 0) { return ''; } var txt = ''; // Formatting and filtering. if (node.nodeType === Node.ELEMENT_NODE) { // Reject non-text nodes such as scripts. if (__gCrWeb.languageDetection.nonTextNodeNames[node.nodeName]) { return ''; } if (node.nodeName === 'BR') { return '\n'; } var style = window.getComputedStyle(node); // Only proceed if the element is visible. if (style.display === 'none' || style.visibility === 'hidden') { return ''; } // No need to add a line break before |body| as it is the first element. if (node.nodeName !== 'BODY' && style.display !== 'inline') { txt = '\n'; } } if (node.hasChildNodes()) { for (var childIdx = 0; childIdx < node.childNodes.length && txt.length < maxLen; childIdx++) { txt += __gCrWeb.languageDetection.getTextContent( node.childNodes[childIdx], maxLen - txt.length); } } else if (node.nodeType === Node.TEXT_NODE && node.textContent) { txt += node.textContent.substring(0, maxLen - txt.length); } return txt; }; /** * Detects if a page has content that needs translation and informs the native * side. The text content of a page is cached in * |__gCrWeb.languageDetection.bufferedTextContent| and retrived at a later time * retrived at a later time directly from the Obj-C side. This is to avoid * using |invokeOnHost|. */ __gCrWeb.languageDetection['detectLanguage'] = function() { if (!__gCrWeb.languageDetection.translationAllowed()) { __gCrWeb.message.invokeOnHost({ 'command': 'languageDetection.textCaptured', 'translationAllowed': false}); } else { // Constant for the maximum length of the extracted text returned by // |-detectLanguage| to the native side. // Matches desktop implementation. // Note: This should stay in sync with the constant in // js_language_detection_manager.mm . var kMaxIndexChars = 65535; var captureBeginTime = new Date(); __gCrWeb.languageDetection.activeRequests += 1; __gCrWeb.languageDetection.bufferedTextContent = __gCrWeb.languageDetection.getTextContent(document.body, kMaxIndexChars); var captureTextTime = (new Date()).getMilliseconds() - captureBeginTime.getMilliseconds(); var httpContentLanguage = __gCrWeb.languageDetection.getMetaContentByHttpEquiv( 'content-language'); __gCrWeb.message.invokeOnHost({ 'command': 'languageDetection.textCaptured', 'translationAllowed': true, 'captureTextTime': captureTextTime, 'htmlLang': document.documentElement.lang, 'httpContentLanguage': httpContentLanguage}); } } /** * Retrives the cached text content of a page. Returns it and then purges the * cache. */ __gCrWeb.languageDetection['retrieveBufferedTextContent'] = function() { var textContent = __gCrWeb.languageDetection.bufferedTextContent; __gCrWeb.languageDetection.activeRequests -= 1; if (__gCrWeb.languageDetection.activeRequests == 0) { __gCrWeb.languageDetection.bufferedTextContent = null; } return textContent; } }()) // End of anonymous function.