summaryrefslogtreecommitdiffstats
path: root/chrome/renderer/translate
diff options
context:
space:
mode:
authorjcampan@chromium.org <jcampan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-02-10 18:26:11 +0000
committerjcampan@chromium.org <jcampan@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-02-10 18:26:11 +0000
commitef122393f32e86fb5312d373136d99de62274bf8 (patch)
tree645665e7b050dc4810032d7d13ff03a8c45fb53c /chrome/renderer/translate
parenta6c898d9732a133867229811c43e88274898d93f (diff)
downloadchromium_src-ef122393f32e86fb5312d373136d99de62274bf8.zip
chromium_src-ef122393f32e86fb5312d373136d99de62274bf8.tar.gz
chromium_src-ef122393f32e86fb5312d373136d99de62274bf8.tar.bz2
This CL addresses 2 issues:
- we were not computing the max request text size correctly in some cases (by forgetting the size of the &q= string) causing assertions. - the translate server might send inconsistent response in some cases, this CL makes sure we deal with them. A common case is when sending separators only strings, which we are not doing now anymore. Other more complicated case are less clear. BUG=35055 TEST=Visit www.spiegel.de and translate the page. The page should be translated correctly. Review URL: http://codereview.chromium.org/594023 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@38637 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer/translate')
-rw-r--r--chrome/renderer/translate/page_translator.cc20
-rw-r--r--chrome/renderer/translate/page_translator.h3
2 files changed, 19 insertions, 4 deletions
diff --git a/chrome/renderer/translate/page_translator.cc b/chrome/renderer/translate/page_translator.cc
index 61f7281..e2c5692 100644
--- a/chrome/renderer/translate/page_translator.cc
+++ b/chrome/renderer/translate/page_translator.cc
@@ -31,6 +31,10 @@ const char* const kInlineTags[] = { "A", "ABBR", "ACRONYM", "B", "BIG", "DEL",
"EM", "I", "INS", "S", "SPAN", "STRIKE", "STRONG", "SUB", "SUP", "U" };
}
+// A text node containing only characters in kIgnoredCharacters is not
+// translated.
+const char* const kIgnoredCharacters = ":,.[|]0123456789";
+
// Returns true when s1 < s2.
bool PageTranslator::WebStringCompare::operator()(
const WebKit::WebString& s1, const WebKit::WebString& s2) const {
@@ -56,6 +60,8 @@ PageTranslator::PageTranslator(TextTranslator* text_translator,
ignored_tags_.insert(WebKit::WebString(ASCIIToUTF16(kSkippedTags[i])));
for (size_t i = 0; i < arraysize(kInlineTags); ++i)
inline_tags_.insert(WebKit::WebString(ASCIIToUTF16(kInlineTags[i])));
+ ignore_characters_ = ASCIIToUTF16(kIgnoredCharacters);
+ ignore_characters_.append(kWhitespaceUTF16);
}
PageTranslator::~PageTranslator() {
@@ -211,10 +217,14 @@ void PageTranslator::TextTranslated(
NodeList* nodes = iter->second;
// Check the integrity of the response.
if (translated_text_chunks.size() != nodes->size()) {
- // TODO(jcampan) reenable when we figured out why the server messed up the
- // anchor tags.
+ // The server might merge or split chunks in some cases.
+ // TODO(jcampan): once the issue is resolved on the server, reenable that
+ // NOTREACHED().
// NOTREACHED() << "Translation results received are inconsistent with the "
// "request";
+ LOG(ERROR) << "translation response for work id " << work_id <<
+ " length is " << translated_text_chunks.size() << " expected " <<
+ nodes->size();
ClearNodeZone(work_id);
return;
}
@@ -232,8 +242,10 @@ void PageTranslator::TraverseNode(WebKit::WebNode node,
std::stack<NodeList*>* element_stack,
std::vector<NodeList*>* text_nodes_list) {
if (node.isTextNode()) {
- if (ContainsOnlyWhitespace(static_cast<string16>(node.nodeValue())))
- return; // Ignore text nodes with only white-spaces.
+ string16 text = static_cast<string16>(node.nodeValue());
+ if (ContainsOnlyChars(text, ignore_characters_))
+ return; // Ignore text nodes which contains only white-spaces or
+ // separators.
DCHECK(!element_stack->empty());
NodeList* text_nodes = element_stack->top();
diff --git a/chrome/renderer/translate/page_translator.h b/chrome/renderer/translate/page_translator.h
index ed7836e..1d8bbc7 100644
--- a/chrome/renderer/translate/page_translator.h
+++ b/chrome/renderer/translate/page_translator.h
@@ -144,6 +144,9 @@ class PageTranslator : public TextTranslator::Delegate {
// The original text of the text nodes in |text_nodes_|.
std::vector<TextChunks*> text_chunks_;
+ // A text node containing only the characters in this list is not translated.
+ string16 ignore_characters_;
+
DISALLOW_COPY_AND_ASSIGN(PageTranslator);
};