summaryrefslogtreecommitdiffstats
path: root/chrome/test
diff options
context:
space:
mode:
Diffstat (limited to 'chrome/test')
-rw-r--r--chrome/test/data/translate/basic_ORIGINAL.html32
-rw-r--r--chrome/test/data/translate/basic_TRANSLATED.html33
-rw-r--r--chrome/test/data/translate/reverse_text.py223
3 files changed, 0 insertions, 288 deletions
diff --git a/chrome/test/data/translate/basic_ORIGINAL.html b/chrome/test/data/translate/basic_ORIGINAL.html
deleted file mode 100644
index 420ca73..0000000
--- a/chrome/test/data/translate/basic_ORIGINAL.html
+++ /dev/null
@@ -1,32 +0,0 @@
-<html>
-
-<script>
-function buttonClicked() {
- div = document.getElementById("lastDiv");
- bold = document.getElementById("bold");
-
- text_node = document.createTextNode("Hello!");
- // div.childNodes[0].appendChild(text_node);
- p_node = document.createElement("p");
- p_node.appendChild(text_node);
- bold.appendChild(p_node);
-}
-</script>
-
-<body>
-
- <p>A simple paragraph. Nothing to see here, move along!</p>
-
- <p>This is a paragraph with a <a href="">link</a> and some <b id="bold">bold text</b> in it!</p>
- <p>This on uses a <SPAN>span</SPAN> &nbsp;</p>
-
-
- <dIV>
- <div>This is a first div <div>with an inner div</div> and that's it</div>
- <div id="lastDiv">OK, last div 0.69%</div>
- </div>
-
- <button onclick="buttonClicked()">Click me&nbsp;</button>
-</body>
-
-</html>
diff --git a/chrome/test/data/translate/basic_TRANSLATED.html b/chrome/test/data/translate/basic_TRANSLATED.html
deleted file mode 100644
index b95f28b..0000000
--- a/chrome/test/data/translate/basic_TRANSLATED.html
+++ /dev/null
@@ -1,33 +0,0 @@
-<html>
-
-<script>
-function buttonClicked() {
- div = document.getElementById("lastDiv");
- bold = document.getElementById("bold");
-
- text_node = document.createTextNode("Hello!");
- // div.childNodes[0].appendChild(text_node);
- p_node = document.createElement("p");
- p_node.appendChild(text_node);
- bold.appendChild(p_node);
-}
-</script>
-
-<body>
-
- <p>!gnola evom ,ereh ees ot gnihtoN .hpargarap elpmis A</p>
-
- <p> a htiw hpargarap a si sihT<a href="">knil</a> emos dna <b id="bold">txet dlob</b>!ti ni </p>
- <p> a sesu no sihT<SPAN>naps</SPAN> &nbsp;</p>
-
-
- <dIV>
- <div> vid tsrif a si sihT<div>vid renni na htiw</div>ti s'taht dna </div>
- <div id="lastDiv">&#37;96.0 vid tsal ,KO</div>
- </div>
-
- <button onclick="buttonClicked()">&nbsp;em kcilC</button>
-</body>
-
-</html>
-
diff --git a/chrome/test/data/translate/reverse_text.py b/chrome/test/data/translate/reverse_text.py
deleted file mode 100644
index baed9d1..0000000
--- a/chrome/test/data/translate/reverse_text.py
+++ /dev/null
@@ -1,223 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2009 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-""" Reverses the text of an HTML file.
-
-This classes poorly parses an HTML file and reverse the text strings (and only
-the text, not the tags).
-It is used to generates the _TRANSLATED.html files that the translator unittest
-uses.
-Note it is very hacky and buggy.
-"""
-
-import codecs
-import re
-import sys
-
-def Error(msg):
- print msg;
- sys.exit(1);
-
-class HTMLParser:
- # IGNORED_[PAIRED|SINGLE]_TAGS should be kept in sync with kSkippedTags (see
- # chrome/renderer/translator.cc).
- # Paired tags are tags that are expected to have an opening and closing tag,
- # the entire zone they contain is ignored.
- # Single tags are not closed and are ignored.
- IGNORED_PAIRED_TAGS = [ "APPLET", "AREA", "BASE", "FRAME", "FRAMESET", "HR",
- "IFRAME", "MAP", "OBJECT", "PARAM", "SCRIPT", "STYLE",
- "TEXTAREA" ];
- IGNORED_SINGLE_TAGS = [ "META", "LINK", "IMG", "INPUT" ];
-
- def __init__(self, input_path, output_path):
- try:
- input_file = codecs.open(input_path, 'r', 'utf-8');
- except IOError:
- Error("Failed to open '" + input_path + "' for reading.");
-
- self.html_contents = input_file.read();
- # Python does not have a find method case-insensitive, so we keep a lower
- # case copy of the contents.
- self.html_contents_lower = self.html_contents.lower();
-
- input_file.close();
-
- self.read_index = 0
- self.write_index = 0
- try:
- self.output_file = codecs.open(output_path, 'w', 'utf-8');
- except IOError:
- Error("Failed to open '" + output_path + "' for writting.");
-
- def printDebug(self, msg):
- print u"** %s" % msg.encode('ascii', 'replace')
-
- def removeBlanks(self, str):
- p = re.compile('\s');
- return p.sub('', str);
-
- def extractTagName(self, str):
- closing_tag = False;
- str = str.strip();
- if str[0] != "<":
- Error("Interal error: attempting to extract tag name from invalid tag: " +
- str);
- if str[1] == "/":
- closing_tag = True;
-
- p = re.compile('</?\s*(\w*).*');
- m = p.match(str);
- if m == None:
- Error("Interal error: failed to extract tag name from tag: " + str);
- return (m.group(1).lower(), closing_tag);
-
- def shouldIgnoreTag(self, tag):
- """Returns a tuple (tag should be ignored, pared tags)
- """
- tag = tag.upper();
- for tag_to_ignore in self.IGNORED_PAIRED_TAGS:
- if tag_to_ignore == tag:
- return True, True;
- for tag_to_ignore in self.IGNORED_SINGLE_TAGS:
- if tag_to_ignore == tag:
- return True, False;
- return False, False;
-
- def skipToEndTag(self, tag):
- """ Move the read_index to the position after the closing tag matching
- |tag| and copies all the skipped data to the output file."""
- index = self.html_contents_lower.find("</" + tag, self.read_index);
- if index == -1:
- Error("Failed to find tag end for tag " + tag + " at index " +
- str(self.read_index));
- self.writeToOutputFile(self.html_contents[self.read_index:]);
- else:
- self.writeToOutputFile(self.html_contents[self.read_index:index]);
- self.read_index = index;
-
- def writeToOutputFile(self, text):
- try:
- self.output_file.write(text)
- except IOError:
- Error("Failed to write to output file.");
- # DEBUG
- if len(text) > 100000:
- Error("Writting too much text: " + text);
-# self.printDebug("Writting: " + text);
-# self.write_index += len(text);
-# self.printDebug("Wrote " + str(len(text)) + " bytes, write len=" + str(self.write_index));
-
- def getNextTag(self):
- """Moves the read_index to the end of the next tag and writes the tag to the
- output file.
- Returns a tuple end of file reached, tag name, if closing tag.
- """
-
- start_index = self.html_contents.find("<", self.read_index);
- if start_index == -1:
- self.writeToOutputFile(self.html_contents[self.read_index:]);
- return (True, "", False);
- stop_index = self.html_contents.find(">", start_index);
- if stop_index == -1:
- print "Unclosed tag found.";
- self.writeToOutputFile(self.html_contents[self.read_index:]);
- return (True, "", False);
-
- # Write to the file the current text reverted.
- # No need to do it if the string is only blanks, that would break the
- # indentation.
- text = self.html_contents[self.read_index:start_index]
- text = self.processText(text);
- self.writeToOutputFile(text);
-
- tag = self.html_contents[start_index:stop_index + 1];
- self.writeToOutputFile(tag);
- self.read_index = stop_index + 1;
- tag_name, closing_tag = self.extractTagName(tag);
-# self.printDebug("Raw tag=" + tag);
-# self.printDebug("tag=" + tag_name + " closing=" + str(closing_tag));
-# self.printDebug("read_index=" + str(self.read_index));
-
- return (False, tag_name, closing_tag);
-
- def processText(self, text):
- if text.isspace():
- return text;
-
- # Special case of lonely &nbsp; with spaces. It should not be reversed as
- # the renderer does not "translate" it as it is seen as empty string.
- if text.strip().lower() == '&nbsp;':
- return text;
-
- # We reverse the string manually so to preserve &nbsp; and friends.
- p = re.compile(r'&#\d{1,5};|&\w{2,6};');
- # We create a dictionary where the key is the index at which the ASCII code
- # starts and the value the index at which it ends.
- entityNameIndexes = dict();
- for match in p.finditer(text):
- entityNameIndexes[match.start()] = match.end();
- result = ""
- i = 0;
- while i < len(text):
- if entityNameIndexes.has_key(i):
- end_index = entityNameIndexes[i];
- result = text[i:end_index] + result;
- i = end_index;
- elif text[i] == "%": # Replace percent to avoid percent encoding.
- result = "&#37;" + result;
- i = i + 1;
- else:
- result = text[i] + result;
- i = i + 1;
-
- return result;
-
- def processTagContent(self):
- """Reads the text from the current index to the next tag and writes the text
- in reverse to the output file.
- """
- stop_index = self.html_contents.find("<", self.read_index);
- if stop_index == -1:
- text = self.html_contents[self.read_index:];
- self.read_index += len(text);
- else:
- text = self.html_contents[self.read_index:stop_index];
- self.read_index = stop_index;
- text = self.processText(text);
- self.writeToOutputFile(text);
-
- def start(self):
- while True:
- end_of_file, tag, closing_tag = self.getNextTag();
- # if closing_tag:
- # self.printDebug("Read tag: /" + tag);
- # else:
- # self.printDebug("Read tag: " + tag);
-
- if end_of_file: # We reached the end of the file.
- self.writeToOutputFile(self.html_contents[self.read_index:]);
- print "Done.";
- sys.exit(0);
-
- if closing_tag:
- continue;
-
- ignore_tag, paired_tag = self.shouldIgnoreTag(tag);
- if ignore_tag and paired_tag:
- self.skipToEndTag(tag);
-
- # Read and reverse the text in the tab.
- self.processTagContent();
-
-def main():
- if len(sys.argv) != 3:
- Error("Reverse the text in HTML pages\n"
- "Usage reversetext.py <original_file.html> <dest_file.html>");
-
- html_parser = HTMLParser(sys.argv[1], sys.argv[2]);
- html_parser.start();
-
-if __name__ == "__main__":
- main()