summaryrefslogtreecommitdiffstats
path: root/chrome/test
diff options
context:
space:
mode:
authorjcivelli@google.com <jcivelli@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2010-04-06 22:21:02 +0000
committerjcivelli@google.com <jcivelli@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2010-04-06 22:21:02 +0000
commit85d252e22b6bec161873a0b5656d59c8ebe04e30 (patch)
tree937f8add8eac344f93d6a8ec6604796f371c761c /chrome/test
parentd41661b5e24c8d774acea07c05ef2e5896587e56 (diff)
downloadchromium_src-85d252e22b6bec161873a0b5656d59c8ebe04e30.zip
chromium_src-85d252e22b6bec161873a0b5656d59c8ebe04e30.tar.gz
chromium_src-85d252e22b6bec161873a0b5656d59c8ebe04e30.tar.bz2
Changing the translate back-end to use the Google Translate element.
When the user indicates that a page should be translated, the browser first fetches the Google Translate Element JS code. It then sends it to the renderer, which injects the script in the page, waits for the Translate element to be initialized and then calls the translate method on it. The TranslationService class previously used to translate text chunks is now unused and has been removed. Some of its static methods that are still used have been moved to the TranslateManager class. This CL also implements the "revert" translation behavior. BUG=35474,37778,35553,39375 TEST=Test the translation feature extensively. Review URL: http://codereview.chromium.org/1599016 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@43768 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/test')
-rw-r--r--chrome/test/data/translate/basic_ORIGINAL.html32
-rw-r--r--chrome/test/data/translate/basic_TRANSLATED.html33
-rw-r--r--chrome/test/data/translate/reverse_text.py223
3 files changed, 0 insertions, 288 deletions
diff --git a/chrome/test/data/translate/basic_ORIGINAL.html b/chrome/test/data/translate/basic_ORIGINAL.html
deleted file mode 100644
index 420ca73..0000000
--- a/chrome/test/data/translate/basic_ORIGINAL.html
+++ /dev/null
@@ -1,32 +0,0 @@
-<html>
-
-<script>
-function buttonClicked() {
- div = document.getElementById("lastDiv");
- bold = document.getElementById("bold");
-
- text_node = document.createTextNode("Hello!");
- // div.childNodes[0].appendChild(text_node);
- p_node = document.createElement("p");
- p_node.appendChild(text_node);
- bold.appendChild(p_node);
-}
-</script>
-
-<body>
-
- <p>A simple paragraph. Nothing to see here, move along!</p>
-
- <p>This is a paragraph with a <a href="">link</a> and some <b id="bold">bold text</b> in it!</p>
- <p>This on uses a <SPAN>span</SPAN> &nbsp;</p>
-
-
- <dIV>
- <div>This is a first div <div>with an inner div</div> and that's it</div>
- <div id="lastDiv">OK, last div 0.69%</div>
- </div>
-
- <button onclick="buttonClicked()">Click me&nbsp;</button>
-</body>
-
-</html>
diff --git a/chrome/test/data/translate/basic_TRANSLATED.html b/chrome/test/data/translate/basic_TRANSLATED.html
deleted file mode 100644
index b95f28b..0000000
--- a/chrome/test/data/translate/basic_TRANSLATED.html
+++ /dev/null
@@ -1,33 +0,0 @@
-<html>
-
-<script>
-function buttonClicked() {
- div = document.getElementById("lastDiv");
- bold = document.getElementById("bold");
-
- text_node = document.createTextNode("Hello!");
- // div.childNodes[0].appendChild(text_node);
- p_node = document.createElement("p");
- p_node.appendChild(text_node);
- bold.appendChild(p_node);
-}
-</script>
-
-<body>
-
- <p>!gnola evom ,ereh ees ot gnihtoN .hpargarap elpmis A</p>
-
- <p> a htiw hpargarap a si sihT<a href="">knil</a> emos dna <b id="bold">txet dlob</b>!ti ni </p>
- <p> a sesu no sihT<SPAN>naps</SPAN> &nbsp;</p>
-
-
- <dIV>
- <div> vid tsrif a si sihT<div>vid renni na htiw</div>ti s'taht dna </div>
- <div id="lastDiv">&#37;96.0 vid tsal ,KO</div>
- </div>
-
- <button onclick="buttonClicked()">&nbsp;em kcilC</button>
-</body>
-
-</html>
-
diff --git a/chrome/test/data/translate/reverse_text.py b/chrome/test/data/translate/reverse_text.py
deleted file mode 100644
index baed9d1..0000000
--- a/chrome/test/data/translate/reverse_text.py
+++ /dev/null
@@ -1,223 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2009 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-""" Reverses the text of an HTML file.
-
-This classes poorly parses an HTML file and reverse the text strings (and only
-the text, not the tags).
-It is used to generates the _TRANSLATED.html files that the translator unittest
-uses.
-Note it is very hacky and buggy.
-"""
-
-import codecs
-import re
-import sys
-
-def Error(msg):
- print msg;
- sys.exit(1);
-
-class HTMLParser:
- # IGNORED_[PAIRED|SINGLE]_TAGS should be kept in sync with kSkippedTags (see
- # chrome/renderer/translator.cc).
- # Paired tags are tags that are expected to have an opening and closing tag,
- # the entire zone they contain is ignored.
- # Single tags are not closed and are ignored.
- IGNORED_PAIRED_TAGS = [ "APPLET", "AREA", "BASE", "FRAME", "FRAMESET", "HR",
- "IFRAME", "MAP", "OBJECT", "PARAM", "SCRIPT", "STYLE",
- "TEXTAREA" ];
- IGNORED_SINGLE_TAGS = [ "META", "LINK", "IMG", "INPUT" ];
-
- def __init__(self, input_path, output_path):
- try:
- input_file = codecs.open(input_path, 'r', 'utf-8');
- except IOError:
- Error("Failed to open '" + input_path + "' for reading.");
-
- self.html_contents = input_file.read();
- # Python does not have a find method case-insensitive, so we keep a lower
- # case copy of the contents.
- self.html_contents_lower = self.html_contents.lower();
-
- input_file.close();
-
- self.read_index = 0
- self.write_index = 0
- try:
- self.output_file = codecs.open(output_path, 'w', 'utf-8');
- except IOError:
- Error("Failed to open '" + output_path + "' for writting.");
-
- def printDebug(self, msg):
- print u"** %s" % msg.encode('ascii', 'replace')
-
- def removeBlanks(self, str):
- p = re.compile('\s');
- return p.sub('', str);
-
- def extractTagName(self, str):
- closing_tag = False;
- str = str.strip();
- if str[0] != "<":
- Error("Interal error: attempting to extract tag name from invalid tag: " +
- str);
- if str[1] == "/":
- closing_tag = True;
-
- p = re.compile('</?\s*(\w*).*');
- m = p.match(str);
- if m == None:
- Error("Interal error: failed to extract tag name from tag: " + str);
- return (m.group(1).lower(), closing_tag);
-
- def shouldIgnoreTag(self, tag):
- """Returns a tuple (tag should be ignored, pared tags)
- """
- tag = tag.upper();
- for tag_to_ignore in self.IGNORED_PAIRED_TAGS:
- if tag_to_ignore == tag:
- return True, True;
- for tag_to_ignore in self.IGNORED_SINGLE_TAGS:
- if tag_to_ignore == tag:
- return True, False;
- return False, False;
-
- def skipToEndTag(self, tag):
- """ Move the read_index to the position after the closing tag matching
- |tag| and copies all the skipped data to the output file."""
- index = self.html_contents_lower.find("</" + tag, self.read_index);
- if index == -1:
- Error("Failed to find tag end for tag " + tag + " at index " +
- str(self.read_index));
- self.writeToOutputFile(self.html_contents[self.read_index:]);
- else:
- self.writeToOutputFile(self.html_contents[self.read_index:index]);
- self.read_index = index;
-
- def writeToOutputFile(self, text):
- try:
- self.output_file.write(text)
- except IOError:
- Error("Failed to write to output file.");
- # DEBUG
- if len(text) > 100000:
- Error("Writting too much text: " + text);
-# self.printDebug("Writting: " + text);
-# self.write_index += len(text);
-# self.printDebug("Wrote " + str(len(text)) + " bytes, write len=" + str(self.write_index));
-
- def getNextTag(self):
- """Moves the read_index to the end of the next tag and writes the tag to the
- output file.
- Returns a tuple end of file reached, tag name, if closing tag.
- """
-
- start_index = self.html_contents.find("<", self.read_index);
- if start_index == -1:
- self.writeToOutputFile(self.html_contents[self.read_index:]);
- return (True, "", False);
- stop_index = self.html_contents.find(">", start_index);
- if stop_index == -1:
- print "Unclosed tag found.";
- self.writeToOutputFile(self.html_contents[self.read_index:]);
- return (True, "", False);
-
- # Write to the file the current text reverted.
- # No need to do it if the string is only blanks, that would break the
- # indentation.
- text = self.html_contents[self.read_index:start_index]
- text = self.processText(text);
- self.writeToOutputFile(text);
-
- tag = self.html_contents[start_index:stop_index + 1];
- self.writeToOutputFile(tag);
- self.read_index = stop_index + 1;
- tag_name, closing_tag = self.extractTagName(tag);
-# self.printDebug("Raw tag=" + tag);
-# self.printDebug("tag=" + tag_name + " closing=" + str(closing_tag));
-# self.printDebug("read_index=" + str(self.read_index));
-
- return (False, tag_name, closing_tag);
-
- def processText(self, text):
- if text.isspace():
- return text;
-
- # Special case of lonely &nbsp; with spaces. It should not be reversed as
- # the renderer does not "translate" it as it is seen as empty string.
- if text.strip().lower() == '&nbsp;':
- return text;
-
- # We reverse the string manually so to preserve &nbsp; and friends.
- p = re.compile(r'&#\d{1,5};|&\w{2,6};');
- # We create a dictionary where the key is the index at which the ASCII code
- # starts and the value the index at which it ends.
- entityNameIndexes = dict();
- for match in p.finditer(text):
- entityNameIndexes[match.start()] = match.end();
- result = ""
- i = 0;
- while i < len(text):
- if entityNameIndexes.has_key(i):
- end_index = entityNameIndexes[i];
- result = text[i:end_index] + result;
- i = end_index;
- elif text[i] == "%": # Replace percent to avoid percent encoding.
- result = "&#37;" + result;
- i = i + 1;
- else:
- result = text[i] + result;
- i = i + 1;
-
- return result;
-
- def processTagContent(self):
- """Reads the text from the current index to the next tag and writes the text
- in reverse to the output file.
- """
- stop_index = self.html_contents.find("<", self.read_index);
- if stop_index == -1:
- text = self.html_contents[self.read_index:];
- self.read_index += len(text);
- else:
- text = self.html_contents[self.read_index:stop_index];
- self.read_index = stop_index;
- text = self.processText(text);
- self.writeToOutputFile(text);
-
- def start(self):
- while True:
- end_of_file, tag, closing_tag = self.getNextTag();
- # if closing_tag:
- # self.printDebug("Read tag: /" + tag);
- # else:
- # self.printDebug("Read tag: " + tag);
-
- if end_of_file: # We reached the end of the file.
- self.writeToOutputFile(self.html_contents[self.read_index:]);
- print "Done.";
- sys.exit(0);
-
- if closing_tag:
- continue;
-
- ignore_tag, paired_tag = self.shouldIgnoreTag(tag);
- if ignore_tag and paired_tag:
- self.skipToEndTag(tag);
-
- # Read and reverse the text in the tab.
- self.processTagContent();
-
-def main():
- if len(sys.argv) != 3:
- Error("Reverse the text in HTML pages\n"
- "Usage reversetext.py <original_file.html> <dest_file.html>");
-
- html_parser = HTMLParser(sys.argv[1], sys.argv[2]);
- html_parser.start();
-
-if __name__ == "__main__":
- main()