// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

__gCrWeb['languageDetection'] = {};


(function() {
/**
 * The cache of the text content that was extracted from the page
 */
__gCrWeb.languageDetection.bufferedTextContent = null;

/**
 * The number of active requests that have populated the cache. This is
 * incremented every time a call to |__gCrWeb.languageDetection.detectLanguage|
 * populates the buffer. This is decremented every time there is a call to
 * retrieve the buffer. The buffer is purged when this goes down to 0.
 */
__gCrWeb.languageDetection.activeRequests = 0;

/**
 * Returns true if translation of the page is allowed.
 * Translation is not allowed when a "notranslate" meta tag is defined.
 * @return {boolean} true if translation of the page is allowed.
 */
__gCrWeb.languageDetection['translationAllowed'] = function() {
  var metaTags = document.getElementsByTagName('meta');
  for (var i = 0; i < metaTags.length; ++i) {
    if (metaTags[i].name === 'google') {
      if (metaTags[i].content === 'notranslate' ||
          metaTags[i].getAttribute('value') === 'notranslate') {
        return false;
      }
    }
  }
  return true;
};

/**
 * Gets the content of a meta tag by httpEquiv.
 * The function is case insensitive.
 * @param {String} httpEquiv Value of the "httpEquiv" attribute, has to be
 *     lower case.
 * @return {string} Value of the "content" attribute of the meta tag.
 */
__gCrWeb.languageDetection['getMetaContentByHttpEquiv'] = function(httpEquiv) {
  var metaTags = document.getElementsByTagName('meta');
  for (var i = 0; i < metaTags.length; ++i) {
    if (metaTags[i].httpEquiv.toLowerCase() === httpEquiv) {
      return metaTags[i].content;
    }
  }
  return '';
};

// Used by the |getTextContent| function below.
__gCrWeb.languageDetection['nonTextNodeNames'] = {
  'SCRIPT': 1,
  'NOSCRIPT': 1,
  'STYLE': 1,
  'EMBED': 1,
  'OBJECT': 1
};

/**
 * Walks a DOM tree to extract the text content.
 * Does not walk into a node when its name is in |nonTextNodeNames|.
 * @param {HTMLElement} node The DOM tree
 * @param {number} maxLen Output will be truncated to |maxLen|
 * @return {string} The text content
 */
__gCrWeb.languageDetection['getTextContent'] = function(node, maxLen) {
  if (!node || maxLen <= 0) {
    return '';
  }

  var txt = '';
  // Formatting and filtering.
  if (node.nodeType === Node.ELEMENT_NODE) {
    // Reject non-text nodes such as scripts.
    if (__gCrWeb.languageDetection.nonTextNodeNames[node.nodeName]) {
      return '';
    }
    if (node.nodeName === 'BR') {
      return '\n';
    }
    var style = window.getComputedStyle(node);
    // Only proceed if the element is visible.
    if (style.display === 'none' || style.visibility === 'hidden') {
      return '';
    }
    // No need to add a line break before |body| as it is the first element.
    if (node.nodeName !== 'BODY' && style.display !== 'inline') {
      txt = '\n';
    }
  }

  if (node.hasChildNodes()) {
    for (var childIdx = 0;
         childIdx < node.childNodes.length && txt.length < maxLen;
         childIdx++) {
      txt += __gCrWeb.languageDetection.getTextContent(
          node.childNodes[childIdx], maxLen - txt.length);
    }
  } else if (node.nodeType === Node.TEXT_NODE && node.textContent) {
    txt += node.textContent.substring(0, maxLen - txt.length);
  }

  return txt;
};

/**
 * Detects if a page has content that needs translation and informs the native
 * side. The text content of a page is cached in
 * |__gCrWeb.languageDetection.bufferedTextContent| and retrived at a later time
 * retrived at a later time directly from the Obj-C side. This is to avoid
 * using |invokeOnHost|.
 */
__gCrWeb.languageDetection['detectLanguage'] = function() {
  if (!__gCrWeb.languageDetection.translationAllowed()) {
    __gCrWeb.message.invokeOnHost({
        'command': 'languageDetection.textCaptured',
        'translationAllowed': false});
  } else {
    // Constant for the maximum length of the extracted text returned by
    // |-detectLanguage| to the native side.
    // Matches desktop implementation.
    // Note: This should stay in sync with the constant in
    // js_language_detection_manager.mm .
    var kMaxIndexChars = 65535;
    var captureBeginTime = new Date();
    __gCrWeb.languageDetection.activeRequests += 1;
    __gCrWeb.languageDetection.bufferedTextContent =
        __gCrWeb.languageDetection.getTextContent(document.body,
            kMaxIndexChars);
    var captureTextTime =
        (new Date()).getMilliseconds() - captureBeginTime.getMilliseconds();
    var httpContentLanguage =
        __gCrWeb.languageDetection.getMetaContentByHttpEquiv(
            'content-language');
    __gCrWeb.message.invokeOnHost({
        'command': 'languageDetection.textCaptured',
        'translationAllowed': true,
        'captureTextTime': captureTextTime,
        'htmlLang': document.documentElement.lang,
        'httpContentLanguage': httpContentLanguage});
  }
}

/**
 * Retrives the cached text content of a page. Returns it and then purges the
 * cache.
 */
__gCrWeb.languageDetection['retrieveBufferedTextContent'] = function() {
  var textContent = __gCrWeb.languageDetection.bufferedTextContent;
  __gCrWeb.languageDetection.activeRequests -= 1;
  if (__gCrWeb.languageDetection.activeRequests == 0) {
    __gCrWeb.languageDetection.bufferedTextContent = null;
  }
  return textContent;
}

}())  // End of anonymous function.