summaryrefslogtreecommitdiffstats
path: root/chrome
diff options
context:
space:
mode:
authordroger@chromium.org <droger@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-07-11 13:54:22 +0000
committerdroger@chromium.org <droger@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-07-11 13:54:22 +0000
commitd7575c2d6ed0a3459a539ba9c63c26e39e335f5b (patch)
treef405169b772c4a73e3081b385bbfdd583c324e82 /chrome
parent2dda4b49e79423b7d9ba22e7b6b5a300adb11857 (diff)
downloadchromium_src-d7575c2d6ed0a3459a539ba9c63c26e39e335f5b.zip
chromium_src-d7575c2d6ed0a3459a539ba9c63c26e39e335f5b.tar.gz
chromium_src-d7575c2d6ed0a3459a539ba9c63c26e39e335f5b.tar.bz2
Move language detection to chrome/common/.
This CL moves the language detection code from chrome/renderer/translate to chrome/common/translate, in order to be able to use it on iOS. This CL also enables the related unittests on iOS. BUG= Review URL: https://chromiumcodereview.appspot.com/18911002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@211108 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r--chrome/chrome_common.gypi10
-rw-r--r--chrome/chrome_renderer.gypi10
-rw-r--r--chrome/chrome_tests_unit.gypi6
-rw-r--r--chrome/common/DEPS1
-rw-r--r--chrome/common/translate/language_detection_util.cc304
-rw-r--r--chrome/common/translate/language_detection_util.h44
-rw-r--r--chrome/common/translate/language_detection_util_unittest.cc158
-rw-r--r--chrome/common/translate/translate_common_metrics.cc (renamed from chrome/renderer/translate/translate_helper_metrics.cc)42
-rw-r--r--chrome/common/translate/translate_common_metrics.h (renamed from chrome/renderer/translate/translate_helper_metrics.h)10
-rw-r--r--chrome/common/translate/translate_common_metrics_unittest.cc (renamed from chrome/renderer/translate/translate_helper_metrics_unittest.cc)180
-rw-r--r--chrome/renderer/DEPS1
-rw-r--r--chrome/renderer/translate/translate_helper.cc301
-rw-r--r--chrome/renderer/translate/translate_helper.h42
-rw-r--r--chrome/renderer/translate/translate_helper_unittest.cc157
14 files changed, 645 insertions, 621 deletions
diff --git a/chrome/chrome_common.gypi b/chrome/chrome_common.gypi
index b2d6a60..71a5eb5 100644
--- a/chrome/chrome_common.gypi
+++ b/chrome/chrome_common.gypi
@@ -441,6 +441,10 @@
'common/time_format.h',
'common/translate/language_detection_details.cc',
'common/translate/language_detection_details.h',
+ 'common/translate/language_detection_util.cc',
+ 'common/translate/language_detection_util.h',
+ 'common/translate/translate_common_metrics.cc',
+ 'common/translate/translate_common_metrics.h',
'common/translate/translate_errors.h',
'common/translate/translate_util.cc',
'common/translate/translate_util.h',
@@ -537,6 +541,7 @@
['include', '_ios\\.(cc|mm)$'],
['include', '(^|/)ios/'],
['include', '^common/chrome_version_info\\.cc$'],
+ ['include', '^common/translate'],
['include', '^common/zip'],
],
'include_dirs': [
@@ -621,6 +626,11 @@
'common/media/webrtc_logging_messages.h',
]
}],
+ ['enable_language_detection==1', {
+ 'dependencies': [
+ '../third_party/cld/cld.gyp:cld',
+ ],
+ }],
],
'target_conditions': [
['OS == "ios"', {
diff --git a/chrome/chrome_renderer.gypi b/chrome/chrome_renderer.gypi
index 652cb94..d82c3ef 100644
--- a/chrome/chrome_renderer.gypi
+++ b/chrome/chrome_renderer.gypi
@@ -328,8 +328,6 @@
'renderer/tts_dispatcher.h',
'renderer/translate/translate_helper.cc',
'renderer/translate/translate_helper.h',
- 'renderer/translate/translate_helper_metrics.cc',
- 'renderer/translate/translate_helper_metrics.h',
'renderer/validation_message_agent.cc',
'renderer/validation_message_agent.h',
'renderer/web_apps.cc',
@@ -381,14 +379,6 @@
'../third_party/mach_override/mach_override.gyp:mach_override',
],
}],
- ['enable_language_detection==1', {
- 'dependencies': [
- '../third_party/cld/cld.gyp:cld',
- ],
- 'include_dirs': [
- '../third_party/cld',
- ],
- }],
['toolkit_uses_gtk == 1', {
'dependencies': [
'../build/linux/system.gyp:gtk',
diff --git a/chrome/chrome_tests_unit.gypi b/chrome/chrome_tests_unit.gypi
index 57f757c..673764f 100644
--- a/chrome/chrome_tests_unit.gypi
+++ b/chrome/chrome_tests_unit.gypi
@@ -1731,6 +1731,8 @@
'common/switch_utils_unittest.cc',
'common/thumbnail_score_unittest.cc',
'common/time_format_unittest.cc',
+ 'common/translate/language_detection_util_unittest.cc',
+ 'common/translate/translate_common_metrics_unittest.cc',
'common/translate/translate_util_unittest.cc',
'common/worker_thread_ticker_unittest.cc',
'nacl/nacl_ipc_adapter_unittest.cc',
@@ -1763,8 +1765,6 @@
'renderer/spellchecker/spellcheck_provider_test.h',
'renderer/spellchecker/spellcheck_unittest.cc',
'renderer/spellchecker/spellcheck_worditerator_unittest.cc',
- 'renderer/translate/translate_helper_unittest.cc',
- 'renderer/translate/translate_helper_metrics_unittest.cc',
'renderer/web_apps_unittest.cc',
'service/cloud_print/cloud_print_helpers_unittest.cc',
'service/cloud_print/cloud_print_token_store_unittest.cc',
@@ -1940,6 +1940,8 @@
['include', '_ios\\.(cc|mm)$'],
['include', '(^|/)ios/'],
# TODO(ios): Add files here as they are updated to compile on iOS.
+ ['include', '^common/translate/language_detection_util_unittest\\.cc$'],
+ ['include', '^common/translate/translate_util_unittest\\.cc$'],
['include', '^common/zip_'],
],
'conditions': [
diff --git a/chrome/common/DEPS b/chrome/common/DEPS
index a066e70..52aad28 100644
--- a/chrome/common/DEPS
+++ b/chrome/common/DEPS
@@ -30,6 +30,7 @@ include_rules = [
# Other libraries.
"+chrome/third_party/xdg_user_dirs",
"+third_party/bzip2",
+ "+third_party/cld/encodings/compact_lang_det/win",
"+third_party/mt19937ar",
"+third_party/npapi",
"+third_party/re2",
diff --git a/chrome/common/translate/language_detection_util.cc b/chrome/common/translate/language_detection_util.cc
new file mode 100644
index 0000000..9710614
--- /dev/null
+++ b/chrome/common/translate/language_detection_util.cc
@@ -0,0 +1,304 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/common/translate/language_detection_util.h"
+
+#include "base/logging.h"
+#include "base/strings/string_split.h"
+#include "base/strings/string_util.h"
+#include "base/time/time.h"
+#include "chrome/common/chrome_constants.h"
+#include "chrome/common/translate/translate_common_metrics.h"
+#include "chrome/common/translate/translate_util.h"
+
+#if defined(ENABLE_LANGUAGE_DETECTION)
+#include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"
+#endif
+
+namespace {
+
+// Similar language code list. Some languages are very similar and difficult
+// for CLD to distinguish.
+struct SimilarLanguageCode {
+ const char* const code;
+ int group;
+};
+
+const SimilarLanguageCode kSimilarLanguageCodes[] = {
+ {"bs", 1},
+ {"hr", 1},
+ {"hi", 2},
+ {"ne", 2},
+};
+
+// Checks |kSimilarLanguageCodes| and returns group code.
+int GetSimilarLanguageGroupCode(const std::string& language) {
+ for (size_t i = 0; i < arraysize(kSimilarLanguageCodes); ++i) {
+ if (language.find(kSimilarLanguageCodes[i].code) != 0)
+ continue;
+ return kSimilarLanguageCodes[i].group;
+ }
+ return 0;
+}
+
+// Well-known languages which often have wrong server configuration of
+// Content-Language: en.
+// TODO(toyoshim): Remove these static tables and caller functions to
+// chrome/common/translate, and implement them as std::set<>.
+const char* kWellKnownCodesOnWrongConfiguration[] = {
+ "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th"
+};
+
+// Applies a series of language code modification in proper order.
+void ApplyLanguageCodeCorrection(std::string* code) {
+ // Correct well-known format errors.
+ LanguageDetectionUtil::CorrectLanguageCodeTypo(code);
+
+ if (!LanguageDetectionUtil::IsValidLanguageCode(*code)) {
+ *code = std::string();
+ return;
+ }
+
+ TranslateUtil::ToTranslateLanguageSynonym(code);
+}
+
+#if defined(ENABLE_LANGUAGE_DETECTION)
+// Returns the ISO 639 language code of the specified |text|, or 'unknown' if it
+// failed.
+// |is_cld_reliable| will be set as true if CLD says the detection is reliable.
+std::string DetermineTextLanguage(const base::string16& text,
+ bool* is_cld_reliable) {
+ std::string language = chrome::kUnknownLanguageCode;
+ int num_languages = 0;
+ int text_bytes = 0;
+ bool is_reliable = false;
+ Language cld_language =
+ DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
+ &num_languages, NULL, &text_bytes);
+ if (is_cld_reliable != NULL)
+ *is_cld_reliable = is_reliable;
+
+ // We don't trust the result if the CLD reports that the detection is not
+ // reliable, or if the actual text used to detect the language was less than
+ // 100 bytes (short texts can often lead to wrong results).
+ // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that
+ // the determined language code is correct with 50% confidence. Chrome should
+ // handle the real confidence value to judge.
+ if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES &&
+ cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) {
+ // We should not use LanguageCode_ISO_639_1 because it does not cover all
+ // the languages CLD can detect. As a result, it'll return the invalid
+ // language code for tradtional Chinese among others.
+ // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and
+ // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN
+ // for Simplified Chinese.
+ language = LanguageCodeWithDialects(cld_language);
+ }
+ VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text
+ << "\n*************************************\n";
+ return language;
+}
+#endif // defined(ENABLE_LANGUAGE_DETECTION)
+
+// Checks if CLD can complement a sub code when the page language doesn't know
+// the sub code.
+bool CanCLDComplementSubCode(
+ const std::string& page_language, const std::string& cld_language) {
+ // Translate server cannot treat general Chinese. If Content-Language and
+ // CLD agree that the language is Chinese and Content-Language doesn't know
+ // which dialect is used, CLD language has priority.
+ // TODO(hajimehoshi): How about the other dialects like zh-MO?
+ return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false);
+}
+
+} // namespace
+
+namespace LanguageDetectionUtil {
+
+std::string DeterminePageLanguage(const std::string& code,
+ const std::string& html_lang,
+ const base::string16& contents,
+ std::string* cld_language_p,
+ bool* is_cld_reliable_p) {
+#if defined(ENABLE_LANGUAGE_DETECTION)
+ base::TimeTicks begin_time = base::TimeTicks::Now();
+ bool is_cld_reliable;
+ std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable);
+ TranslateCommonMetrics::ReportLanguageDetectionTime(begin_time,
+ base::TimeTicks::Now());
+
+ if (cld_language_p != NULL)
+ *cld_language_p = cld_language;
+ if (is_cld_reliable_p != NULL)
+ *is_cld_reliable_p = is_cld_reliable;
+ TranslateUtil::ToTranslateLanguageSynonym(&cld_language);
+#endif // defined(ENABLE_LANGUAGE_DETECTION)
+
+ // Check if html lang attribute is valid.
+ std::string modified_html_lang;
+ if (!html_lang.empty()) {
+ modified_html_lang = html_lang;
+ ApplyLanguageCodeCorrection(&modified_html_lang);
+ TranslateCommonMetrics::ReportHtmlLang(html_lang, modified_html_lang);
+ VLOG(9) << "html lang based language code: " << modified_html_lang;
+ }
+
+ // Check if Content-Language is valid.
+ std::string modified_code;
+ if (!code.empty()) {
+ modified_code = code;
+ ApplyLanguageCodeCorrection(&modified_code);
+ TranslateCommonMetrics::ReportContentLanguage(code, modified_code);
+ }
+
+ // Adopt |modified_html_lang| if it is valid. Otherwise, adopt
+ // |modified_code|.
+ std::string language = modified_html_lang.empty() ? modified_code :
+ modified_html_lang;
+
+#if defined(ENABLE_LANGUAGE_DETECTION)
+ // If |language| is empty, just use CLD result even though it might be
+ // chrome::kUnknownLanguageCode.
+ if (language.empty()) {
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_ONLY);
+ return cld_language;
+ }
+
+ if (cld_language == chrome::kUnknownLanguageCode) {
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_UNKNOWN);
+ return language;
+ } else if (IsSameOrSimilarLanguages(language, cld_language)) {
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_AGREE);
+ return language;
+ } else if (MaybeServerWrongConfiguration(language, cld_language)) {
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_TRUST_CLD);
+ return cld_language;
+ } else if (CanCLDComplementSubCode(language, cld_language)) {
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE);
+ return cld_language;
+ } else {
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE);
+ // Content-Language value might be wrong because CLD says that this page
+ // is written in another language with confidence.
+ // In this case, Chrome doesn't rely on any of the language codes, and
+ // gives up suggesting a translation.
+ return std::string(chrome::kUnknownLanguageCode);
+ }
+#else // defined(ENABLE_LANGUAGE_DETECTION)
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED);
+#endif // defined(ENABLE_LANGUAGE_DETECTION)
+
+ return language;
+}
+
+void CorrectLanguageCodeTypo(std::string* code) {
+ DCHECK(code);
+
+ size_t coma_index = code->find(',');
+ if (coma_index != std::string::npos) {
+ // There are more than 1 language specified, just keep the first one.
+ *code = code->substr(0, coma_index);
+ }
+ TrimWhitespaceASCII(*code, TRIM_ALL, code);
+
+ // An underscore instead of a dash is a frequent mistake.
+ size_t underscore_index = code->find('_');
+ if (underscore_index != std::string::npos)
+ (*code)[underscore_index] = '-';
+
+ // Change everything up to a dash to lower-case and everything after to upper.
+ size_t dash_index = code->find('-');
+ if (dash_index != std::string::npos) {
+ *code = StringToLowerASCII(code->substr(0, dash_index)) +
+ StringToUpperASCII(code->substr(dash_index));
+ } else {
+ *code = StringToLowerASCII(*code);
+ }
+}
+
+bool IsValidLanguageCode(const std::string& code) {
+ // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/.
+ // TODO(hajimehoshi): How about es-419, which is used as an Accept language?
+ std::vector<std::string> chunks;
+ base::SplitString(code, '-', &chunks);
+
+ if (chunks.size() < 1 || 2 < chunks.size())
+ return false;
+
+ const std::string& main_code = chunks[0];
+
+ if (main_code.size() < 1 || 3 < main_code.size())
+ return false;
+
+ for (std::string::const_iterator it = main_code.begin();
+ it != main_code.end(); ++it) {
+ if (!IsAsciiAlpha(*it))
+ return false;
+ }
+
+ if (chunks.size() == 1)
+ return true;
+
+ const std::string& sub_code = chunks[1];
+
+ if (sub_code.size() != 2)
+ return false;
+
+ for (std::string::const_iterator it = sub_code.begin();
+ it != sub_code.end(); ++it) {
+ if (!IsAsciiAlpha(*it))
+ return false;
+ }
+
+ return true;
+}
+
+bool IsSameOrSimilarLanguages(const std::string& page_language,
+ const std::string& cld_language) {
+ // Language code part of |page_language| is matched to one of |cld_language|.
+ // Country code is ignored here.
+ if (page_language.size() >= 2 &&
+ cld_language.find(page_language.c_str(), 0, 2) == 0) {
+ // Languages are matched strictly. Reports false to metrics, but returns
+ // true.
+ TranslateCommonMetrics::ReportSimilarLanguageMatch(false);
+ return true;
+ }
+
+ // Check if |page_language| and |cld_language| are in the similar language
+ // list and belong to the same language group.
+ int page_code = GetSimilarLanguageGroupCode(page_language);
+ bool match = page_code != 0 &&
+ page_code == GetSimilarLanguageGroupCode(cld_language);
+
+ TranslateCommonMetrics::ReportSimilarLanguageMatch(match);
+ return match;
+}
+
+bool MaybeServerWrongConfiguration(const std::string& page_language,
+ const std::string& cld_language) {
+ // If |page_language| is not "en-*", respect it and just return false here.
+ if (!StartsWithASCII(page_language, "en", false))
+ return false;
+
+ // A server provides a language meta information representing "en-*". But it
+ // might be just a default value due to missing user configuration.
+ // Let's trust |cld_language| if the determined language is not difficult to
+ // distinguish from English, and the language is one of well-known languages
+ // which often provide "en-*" meta information mistakenly.
+ for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) {
+ if (cld_language == kWellKnownCodesOnWrongConfiguration[i])
+ return true;
+ }
+ return false;
+}
+
+} // namespace LanguageDetectionUtil
diff --git a/chrome/common/translate/language_detection_util.h b/chrome/common/translate/language_detection_util.h
new file mode 100644
index 0000000..787c0781
--- /dev/null
+++ b/chrome/common/translate/language_detection_util.h
@@ -0,0 +1,44 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_COMMON_TRANSLATE_LANGUAGE_DETECTION_UTIL_H_
+#define CHROME_COMMON_TRANSLATE_LANGUAGE_DETECTION_UTIL_H_
+
+#include <string>
+
+#include "base/strings/string16.h"
+
+namespace LanguageDetectionUtil {
+
+// Determines content page language from Content-Language code and contents.
+std::string DeterminePageLanguage(const std::string& code,
+ const std::string& html_lang,
+ const base::string16& contents,
+ std::string* cld_language,
+ bool* is_cld_reliable);
+
+// Corrects language code if it contains well-known mistakes.
+// Called only by tests.
+void CorrectLanguageCodeTypo(std::string* code);
+
+// Checks if the language code's format is valid.
+// Called only by tests.
+bool IsValidLanguageCode(const std::string& code);
+
+// Checks if languages are matched, or similar. This function returns true
+// against a language pair containing a language which is difficult for CLD to
+// distinguish.
+// Called only by tests.
+bool IsSameOrSimilarLanguages(const std::string& page_language,
+ const std::string& cld_language);
+
+// Checks if languages pair is one of well-known pairs of wrong server
+// configuration.
+// Called only by tests.
+bool MaybeServerWrongConfiguration(const std::string& page_language,
+ const std::string& cld_language);
+
+} // namespace LanguageDetectionUtil
+
+#endif // CHROME_COMMON_TRANSLATE_LANGUAGE_DETECTION_UTIL_H_
diff --git a/chrome/common/translate/language_detection_util_unittest.cc b/chrome/common/translate/language_detection_util_unittest.cc
new file mode 100644
index 0000000..4f8dbf3
--- /dev/null
+++ b/chrome/common/translate/language_detection_util_unittest.cc
@@ -0,0 +1,158 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/common/translate/language_detection_util.h"
+
+#include "base/strings/string16.h"
+#include "base/strings/utf_string_conversions.h"
+#include "chrome/common/chrome_constants.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+typedef testing::Test LanguageDetectionUtilTest;
+
+// Tests that well-known language code typos are fixed.
+TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) {
+ std::string language;
+
+ // Strip the second and later codes.
+ language = std::string("ja,en");
+ LanguageDetectionUtil::CorrectLanguageCodeTypo(&language);
+ EXPECT_EQ("ja", language);
+
+ // Replace dash with hyphen.
+ language = std::string("ja_JP");
+ LanguageDetectionUtil::CorrectLanguageCodeTypo(&language);
+ EXPECT_EQ("ja-JP", language);
+
+ // Correct wrong cases.
+ language = std::string("JA-jp");
+ LanguageDetectionUtil::CorrectLanguageCodeTypo(&language);
+ EXPECT_EQ("ja-JP", language);
+}
+
+// Tests if the language codes' format is invalid.
+TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) {
+ std::string language;
+
+ language = std::string("ja");
+ EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language));
+
+ language = std::string("ja-JP");
+ EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language));
+
+ language = std::string("ceb");
+ EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language));
+
+ language = std::string("ceb-XX");
+ EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language));
+
+ // Invalid because the sub code consists of a number.
+ language = std::string("utf-8");
+ EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language));
+
+ // Invalid because of six characters after hyphen.
+ language = std::string("ja-YUKARI");
+ EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language));
+
+ // Invalid because of four characters.
+ language = std::string("DHMO");
+ EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language));
+}
+
+// Tests that similar language table works.
+TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) {
+ EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "en"));
+ EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "ja"));
+ EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hr"));
+ EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("sr-ME", "sr"));
+ EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("ne", "hi"));
+ EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hi"));
+}
+
+// Tests that well-known languages which often have wrong server configuration
+// are handles.
+TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) {
+ EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", "ja"));
+ EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en-US",
+ "ja"));
+ EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en",
+ "zh-CN"));
+ EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("ja",
+ "en"));
+ EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en",
+ "he"));
+}
+
+// Tests that the language meta tag providing wrong information is ignored by
+// LanguageDetectionUtil due to disagreement between meta tag and CLD.
+TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) {
+ base::string16 contents = ASCIIToUTF16(
+ "<html><head><meta http-equiv='Content-Language' content='ja'></head>"
+ "<body>This is a page apparently written in English. Even though "
+ "content-language is provided, the value will be ignored if the value "
+ "is suspicious.</body></html>");
+ std::string cld_language;
+ bool is_cld_reliable;
+ std::string language = LanguageDetectionUtil::DeterminePageLanguage(
+ std::string("ja"), std::string(), contents, &cld_language,
+ &is_cld_reliable);
+ EXPECT_EQ(chrome::kUnknownLanguageCode, language);
+ EXPECT_EQ("en", cld_language);
+ EXPECT_TRUE(is_cld_reliable);
+}
+
+// Tests that the language meta tag providing "en-US" style information is
+// agreed by CLD.
+TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) {
+ base::string16 contents = ASCIIToUTF16(
+ "<html><head><meta http-equiv='Content-Language' content='en-US'></head>"
+ "<body>This is a page apparently written in English. Even though "
+ "content-language is provided, the value will be ignored if the value "
+ "is suspicious.</body></html>");
+ std::string cld_language;
+ bool is_cld_reliable;
+ std::string language = LanguageDetectionUtil::DeterminePageLanguage(
+ std::string("en-US"), std::string(), contents, &cld_language,
+ &is_cld_reliable);
+ EXPECT_EQ("en-US", language);
+ EXPECT_EQ("en", cld_language);
+ EXPECT_TRUE(is_cld_reliable);
+}
+
+// Tests that the language meta tag providing wrong information is ignored and
+// CLD's language will be adopted by LanguageDetectionUtil due to an invalid
+// meta tag.
+TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) {
+ base::string16 contents = ASCIIToUTF16(
+ "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>"
+ "<body>This is a page apparently written in English. Even though "
+ "content-language is provided, the value will be ignored and CLD's"
+ " language will be adopted if the value is invalid.</body></html>");
+ std::string cld_language;
+ bool is_cld_reliable;
+ std::string language = LanguageDetectionUtil::DeterminePageLanguage(
+ std::string("utf-8"), std::string(), contents, &cld_language,
+ &is_cld_reliable);
+ EXPECT_EQ("en", language);
+ EXPECT_EQ("en", cld_language);
+ EXPECT_TRUE(is_cld_reliable);
+}
+
+// Tests that the language meta tag providing wrong information is ignored
+// because of valid html lang attribute.
+TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) {
+ base::string16 contents = ASCIIToUTF16(
+ "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>"
+ "</head><body>This is a page apparently written in English. Even though "
+ "content-language is provided, the value will be ignored if the value "
+ "is suspicious.</body></html>");
+ std::string cld_language;
+ bool is_cld_reliable;
+ std::string language = LanguageDetectionUtil::DeterminePageLanguage(
+ std::string("ja"), std::string("en"), contents, &cld_language,
+ &is_cld_reliable);
+ EXPECT_EQ("en", language);
+ EXPECT_EQ("en", cld_language);
+ EXPECT_TRUE(is_cld_reliable);
+}
diff --git a/chrome/renderer/translate/translate_helper_metrics.cc b/chrome/common/translate/translate_common_metrics.cc
index 93ce5b0..028be6d 100644
--- a/chrome/renderer/translate/translate_helper_metrics.cc
+++ b/chrome/common/translate/translate_common_metrics.cc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "chrome/renderer/translate/translate_helper_metrics.h"
+#include "chrome/common/translate/translate_common_metrics.h"
#include "base/basictypes.h"
#include "base/metrics/histogram.h"
@@ -26,63 +26,63 @@ const char kSchemeHttp[] = "http";
const char kSchemeHttps[] = "https";
struct MetricsEntry {
- TranslateHelperMetrics::MetricsNameIndex index;
+ TranslateCommonMetrics::MetricsNameIndex index;
const char* const name;
};
// This entry table should be updated when new UMA items are added.
const MetricsEntry kMetricsEntries[] = {
- { TranslateHelperMetrics::UMA_LANGUAGE_DETECTION,
+ { TranslateCommonMetrics::UMA_LANGUAGE_DETECTION,
kRenderer4LanguageDetection },
- { TranslateHelperMetrics::UMA_CONTENT_LANGUAGE,
+ { TranslateCommonMetrics::UMA_CONTENT_LANGUAGE,
kTranslateContentLanguage },
- { TranslateHelperMetrics::UMA_HTML_LANG,
+ { TranslateCommonMetrics::UMA_HTML_LANG,
kTranslateHtmlLang },
- { TranslateHelperMetrics::UMA_LANGUAGE_VERIFICATION,
+ { TranslateCommonMetrics::UMA_LANGUAGE_VERIFICATION,
kTranslateLanguageVerification },
- { TranslateHelperMetrics::UMA_TIME_TO_BE_READY,
+ { TranslateCommonMetrics::UMA_TIME_TO_BE_READY,
kTranslateTimeToBeReady },
- { TranslateHelperMetrics::UMA_TIME_TO_LOAD,
+ { TranslateCommonMetrics::UMA_TIME_TO_LOAD,
kTranslateTimeToLoad },
- { TranslateHelperMetrics::UMA_TIME_TO_TRANSLATE,
+ { TranslateCommonMetrics::UMA_TIME_TO_TRANSLATE,
kTranslateTimeToTranslate },
- { TranslateHelperMetrics::UMA_USER_ACTION_DURATION,
+ { TranslateCommonMetrics::UMA_USER_ACTION_DURATION,
kTranslateUserActionDuration },
- { TranslateHelperMetrics::UMA_PAGE_SCHEME,
+ { TranslateCommonMetrics::UMA_PAGE_SCHEME,
kTranslatePageScheme },
- { TranslateHelperMetrics::UMA_SIMILAR_LANGUAGE_MATCH,
+ { TranslateCommonMetrics::UMA_SIMILAR_LANGUAGE_MATCH,
kTranslateSimilarLanguageMatch },
};
-COMPILE_ASSERT(arraysize(kMetricsEntries) == TranslateHelperMetrics::UMA_MAX,
+COMPILE_ASSERT(arraysize(kMetricsEntries) == TranslateCommonMetrics::UMA_MAX,
arraysize_of_kMetricsEntries_should_be_UMA_MAX);
-TranslateHelperMetrics::LanguageCheckType GetLanguageCheckMetric(
+TranslateCommonMetrics::LanguageCheckType GetLanguageCheckMetric(
const std::string& provided_code,
const std::string& revised_code) {
if (provided_code.empty())
- return TranslateHelperMetrics::LANGUAGE_NOT_PROVIDED;
+ return TranslateCommonMetrics::LANGUAGE_NOT_PROVIDED;
else if (provided_code == revised_code)
- return TranslateHelperMetrics::LANGUAGE_VALID;
- return TranslateHelperMetrics::LANGUAGE_INVALID;
+ return TranslateCommonMetrics::LANGUAGE_VALID;
+ return TranslateCommonMetrics::LANGUAGE_INVALID;
}
} // namespace
-namespace TranslateHelperMetrics {
+namespace TranslateCommonMetrics {
void ReportContentLanguage(const std::string& provided_code,
const std::string& revised_code) {
UMA_HISTOGRAM_ENUMERATION(kTranslateContentLanguage,
GetLanguageCheckMetric(provided_code, revised_code),
- TranslateHelperMetrics::LANGUAGE_MAX);
+ TranslateCommonMetrics::LANGUAGE_MAX);
}
void ReportHtmlLang(const std::string& provided_code,
const std::string& revised_code) {
UMA_HISTOGRAM_ENUMERATION(kTranslateHtmlLang,
GetLanguageCheckMetric(provided_code, revised_code),
- TranslateHelperMetrics::LANGUAGE_MAX);
+ TranslateCommonMetrics::LANGUAGE_MAX);
}
void ReportLanguageVerification(LanguageVerificationType type) {
@@ -143,4 +143,4 @@ const char* GetMetricsName(MetricsNameIndex index) {
return NULL;
}
-} // namespace TranslateHelperMetrics
+} // namespace TranslateCommonMetrics
diff --git a/chrome/renderer/translate/translate_helper_metrics.h b/chrome/common/translate/translate_common_metrics.h
index cd0050f..cfd6b14 100644
--- a/chrome/renderer/translate/translate_helper_metrics.h
+++ b/chrome/common/translate/translate_common_metrics.h
@@ -2,14 +2,14 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#ifndef CHROME_RENDERER_TRANSLATE_TRANSLATE_HELPER_METRICS_H_
-#define CHROME_RENDERER_TRANSLATE_TRANSLATE_HELPER_METRICS_H_
+#ifndef CHROME_COMMON_TRANSLATE_TRANSLATE_COMMON_METRICS_H_
+#define CHROME_COMMON_TRANSLATE_TRANSLATE_COMMON_METRICS_H_
#include <string>
#include "base/time/time.h"
-namespace TranslateHelperMetrics {
+namespace TranslateCommonMetrics {
// An indexing type to query each UMA entry name via GetMetricsName() function.
// Note: |kMetricsEntries| should be updated when a new entry is added here.
@@ -103,6 +103,6 @@ void ReportSimilarLanguageMatch(bool match);
// Gets UMA name for an entry specified by |index|.
const char* GetMetricsName(MetricsNameIndex index);
-} // namespace TranslateHelperMetrics
+} // namespace TranslateCommonMetrics
-#endif // CHROME_RENDERER_TRANSLATE_TRANSLATE_HELPER_METRICS_H_
+#endif // CHROME_COMMON_TRANSLATE_TRANSLATE_COMMON_METRICS_H_
diff --git a/chrome/renderer/translate/translate_helper_metrics_unittest.cc b/chrome/common/translate/translate_common_metrics_unittest.cc
index b0d4585..4815cb1 100644
--- a/chrome/renderer/translate/translate_helper_metrics_unittest.cc
+++ b/chrome/common/translate/translate_common_metrics_unittest.cc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "chrome/renderer/translate/translate_helper_metrics.h"
+#include "chrome/common/translate/translate_common_metrics.h"
#include "base/basictypes.h"
#include "base/memory/scoped_ptr.h"
@@ -33,23 +33,23 @@ class MetricsRecorder {
base_samples_ = histogram->SnapshotSamples();
}
- void CheckLanguage(TranslateHelperMetrics::MetricsNameIndex index,
+ void CheckLanguage(TranslateCommonMetrics::MetricsNameIndex index,
int expected_not_provided,
int expected_valid,
int expected_invalid) {
- ASSERT_EQ(TranslateHelperMetrics::GetMetricsName(index), key_);
+ ASSERT_EQ(TranslateCommonMetrics::GetMetricsName(index), key_);
Snapshot();
EXPECT_EQ(expected_not_provided,
GetCountWithoutSnapshot(
- TranslateHelperMetrics::LANGUAGE_NOT_PROVIDED));
+ TranslateCommonMetrics::LANGUAGE_NOT_PROVIDED));
EXPECT_EQ(expected_valid,
GetCountWithoutSnapshot(
- TranslateHelperMetrics::LANGUAGE_VALID));
+ TranslateCommonMetrics::LANGUAGE_VALID));
EXPECT_EQ(expected_invalid,
GetCountWithoutSnapshot(
- TranslateHelperMetrics::LANGUAGE_INVALID));
+ TranslateCommonMetrics::LANGUAGE_INVALID));
}
void CheckLanguageVerification(int expected_cld_disabled,
@@ -59,54 +59,54 @@ class MetricsRecorder {
int expected_cld_disagree,
int expected_trust_cld,
int expected_cld_complement_sub_code) {
- ASSERT_EQ(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_LANGUAGE_VERIFICATION), key_);
+ ASSERT_EQ(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_LANGUAGE_VERIFICATION), key_);
Snapshot();
EXPECT_EQ(
expected_cld_disabled,
GetCountWithoutSnapshot(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED));
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED));
EXPECT_EQ(
expected_cld_only,
GetCountWithoutSnapshot(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY));
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_ONLY));
EXPECT_EQ(
expected_unknown,
GetCountWithoutSnapshot(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN));
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_UNKNOWN));
EXPECT_EQ(
expected_cld_agree,
GetCountWithoutSnapshot(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE));
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_AGREE));
EXPECT_EQ(
expected_cld_disagree,
GetCountWithoutSnapshot(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE));
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE));
EXPECT_EQ(
expected_trust_cld,
GetCountWithoutSnapshot(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_TRUST_CLD));
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_TRUST_CLD));
EXPECT_EQ(
expected_cld_complement_sub_code,
GetCountWithoutSnapshot(
- TranslateHelperMetrics::
+ TranslateCommonMetrics::
LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE));
}
void CheckScheme(int expected_http, int expected_https, int expected_others) {
- ASSERT_EQ(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_PAGE_SCHEME), key_);
+ ASSERT_EQ(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_PAGE_SCHEME), key_);
Snapshot();
EXPECT_EQ(expected_http,
- GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_HTTP));
+ GetCountWithoutSnapshot(TranslateCommonMetrics::SCHEME_HTTP));
EXPECT_EQ(expected_https,
- GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_HTTPS));
+ GetCountWithoutSnapshot(TranslateCommonMetrics::SCHEME_HTTPS));
EXPECT_EQ(expected_others,
- GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_OTHERS));
+ GetCountWithoutSnapshot(TranslateCommonMetrics::SCHEME_OTHERS));
}
void CheckTotalCount(int count) {
@@ -170,133 +170,133 @@ class MetricsRecorder {
} // namespace
-TEST(TranslateHelperMetricsTest, ReportContentLanguage) {
- MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_CONTENT_LANGUAGE));
-
- recorder.CheckLanguage(TranslateHelperMetrics::UMA_CONTENT_LANGUAGE, 0, 0, 0);
- TranslateHelperMetrics::ReportContentLanguage(std::string(), std::string());
- recorder.CheckLanguage(TranslateHelperMetrics::UMA_CONTENT_LANGUAGE, 1, 0, 0);
- TranslateHelperMetrics::ReportContentLanguage("ja_JP", "ja-JP");
- recorder.CheckLanguage(TranslateHelperMetrics::UMA_CONTENT_LANGUAGE, 1, 0, 1);
- TranslateHelperMetrics::ReportContentLanguage("en", "en");
- recorder.CheckLanguage(TranslateHelperMetrics::UMA_CONTENT_LANGUAGE, 1, 1, 1);
+TEST(TranslateCommonMetricsTest, ReportContentLanguage) {
+ MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_CONTENT_LANGUAGE));
+
+ recorder.CheckLanguage(TranslateCommonMetrics::UMA_CONTENT_LANGUAGE, 0, 0, 0);
+ TranslateCommonMetrics::ReportContentLanguage(std::string(), std::string());
+ recorder.CheckLanguage(TranslateCommonMetrics::UMA_CONTENT_LANGUAGE, 1, 0, 0);
+ TranslateCommonMetrics::ReportContentLanguage("ja_JP", "ja-JP");
+ recorder.CheckLanguage(TranslateCommonMetrics::UMA_CONTENT_LANGUAGE, 1, 0, 1);
+ TranslateCommonMetrics::ReportContentLanguage("en", "en");
+ recorder.CheckLanguage(TranslateCommonMetrics::UMA_CONTENT_LANGUAGE, 1, 1, 1);
}
-TEST(TranslateHelperMetricsTest, ReportHtmlLang) {
- MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_HTML_LANG));
-
- recorder.CheckLanguage(TranslateHelperMetrics::UMA_HTML_LANG, 0, 0, 0);
- TranslateHelperMetrics::ReportHtmlLang(std::string(), std::string());
- recorder.CheckLanguage(TranslateHelperMetrics::UMA_HTML_LANG, 1, 0, 0);
- TranslateHelperMetrics::ReportHtmlLang("ja_JP", "ja-JP");
- recorder.CheckLanguage(TranslateHelperMetrics::UMA_HTML_LANG, 1, 0, 1);
- TranslateHelperMetrics::ReportHtmlLang("en", "en");
- recorder.CheckLanguage(TranslateHelperMetrics::UMA_HTML_LANG, 1, 1, 1);
+TEST(TranslateCommonMetricsTest, ReportHtmlLang) {
+ MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_HTML_LANG));
+
+ recorder.CheckLanguage(TranslateCommonMetrics::UMA_HTML_LANG, 0, 0, 0);
+ TranslateCommonMetrics::ReportHtmlLang(std::string(), std::string());
+ recorder.CheckLanguage(TranslateCommonMetrics::UMA_HTML_LANG, 1, 0, 0);
+ TranslateCommonMetrics::ReportHtmlLang("ja_JP", "ja-JP");
+ recorder.CheckLanguage(TranslateCommonMetrics::UMA_HTML_LANG, 1, 0, 1);
+ TranslateCommonMetrics::ReportHtmlLang("en", "en");
+ recorder.CheckLanguage(TranslateCommonMetrics::UMA_HTML_LANG, 1, 1, 1);
}
-TEST(TranslateHelperMetricsTest, ReportLanguageVerification) {
- MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_LANGUAGE_VERIFICATION));
+TEST(TranslateCommonMetricsTest, ReportLanguageVerification) {
+ MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_LANGUAGE_VERIFICATION));
recorder.CheckLanguageVerification(0, 0, 0, 0, 0, 0, 0);
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED);
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED);
recorder.CheckLanguageVerification(1, 0, 0, 0, 0, 0, 0);
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY);
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_ONLY);
recorder.CheckLanguageVerification(1, 1, 0, 0, 0, 0, 0);
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN);
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_UNKNOWN);
recorder.CheckLanguageVerification(1, 1, 1, 0, 0, 0, 0);
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE);
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_AGREE);
recorder.CheckLanguageVerification(1, 1, 1, 1, 0, 0, 0);
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE);
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE);
recorder.CheckLanguageVerification(1, 1, 1, 1, 1, 0, 0);
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_TRUST_CLD);
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_TRUST_CLD);
recorder.CheckLanguageVerification(1, 1, 1, 1, 1, 1, 0);
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE);
+ TranslateCommonMetrics::ReportLanguageVerification(
+ TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE);
recorder.CheckLanguageVerification(1, 1, 1, 1, 1, 1, 1);
}
-TEST(TranslateHelperMetricsTest, ReportTimeToBeReady) {
- MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_TIME_TO_BE_READY));
+TEST(TranslateCommonMetricsTest, ReportTimeToBeReady) {
+ MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_TIME_TO_BE_READY));
recorder.CheckTotalCount(0);
- TranslateHelperMetrics::ReportTimeToBeReady(3.14);
+ TranslateCommonMetrics::ReportTimeToBeReady(3.14);
recorder.CheckValueInLogs(3.14);
recorder.CheckTotalCount(1);
}
-TEST(TranslateHelperMetricsTest, ReportTimeToLoad) {
- MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_TIME_TO_LOAD));
+TEST(TranslateCommonMetricsTest, ReportTimeToLoad) {
+ MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_TIME_TO_LOAD));
recorder.CheckTotalCount(0);
- TranslateHelperMetrics::ReportTimeToLoad(573.0);
+ TranslateCommonMetrics::ReportTimeToLoad(573.0);
recorder.CheckValueInLogs(573.0);
recorder.CheckTotalCount(1);
}
-TEST(TranslateHelperMetricsTest, ReportTimeToTranslate) {
- MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_TIME_TO_TRANSLATE));
+TEST(TranslateCommonMetricsTest, ReportTimeToTranslate) {
+ MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_TIME_TO_TRANSLATE));
recorder.CheckTotalCount(0);
- TranslateHelperMetrics::ReportTimeToTranslate(4649.0);
+ TranslateCommonMetrics::ReportTimeToTranslate(4649.0);
recorder.CheckValueInLogs(4649.0);
recorder.CheckTotalCount(1);
}
-TEST(TranslateHelperMetricsTest, ReportUserActionDuration) {
- MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_USER_ACTION_DURATION));
+TEST(TranslateCommonMetricsTest, ReportUserActionDuration) {
+ MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_USER_ACTION_DURATION));
recorder.CheckTotalCount(0);
TimeTicks begin = TimeTicks::Now();
TimeTicks end = begin + base::TimeDelta::FromSeconds(3776);
- TranslateHelperMetrics::ReportUserActionDuration(begin, end);
+ TranslateCommonMetrics::ReportUserActionDuration(begin, end);
recorder.CheckValueInLogs(3776000.0);
recorder.CheckTotalCount(1);
}
-TEST(TranslateHelperMetricsTest, ReportPageScheme) {
- MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_PAGE_SCHEME));
+TEST(TranslateCommonMetricsTest, ReportPageScheme) {
+ MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_PAGE_SCHEME));
recorder.CheckScheme(0, 0, 0);
- TranslateHelperMetrics::ReportPageScheme("http");
+ TranslateCommonMetrics::ReportPageScheme("http");
recorder.CheckScheme(1, 0, 0);
- TranslateHelperMetrics::ReportPageScheme("https");
+ TranslateCommonMetrics::ReportPageScheme("https");
recorder.CheckScheme(1, 1, 0);
- TranslateHelperMetrics::ReportPageScheme("ftp");
+ TranslateCommonMetrics::ReportPageScheme("ftp");
recorder.CheckScheme(1, 1, 1);
}
-TEST(TranslateHelperMetricsTest, ReportSimilarLanguageMatch) {
- MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_SIMILAR_LANGUAGE_MATCH));
+TEST(TranslateCommonMetricsTest, ReportSimilarLanguageMatch) {
+ MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_SIMILAR_LANGUAGE_MATCH));
recorder.CheckTotalCount(0);
EXPECT_EQ(0, recorder.GetCount(kTrue));
EXPECT_EQ(0, recorder.GetCount(kFalse));
- TranslateHelperMetrics::ReportSimilarLanguageMatch(true);
+ TranslateCommonMetrics::ReportSimilarLanguageMatch(true);
EXPECT_EQ(1, recorder.GetCount(kTrue));
EXPECT_EQ(0, recorder.GetCount(kFalse));
- TranslateHelperMetrics::ReportSimilarLanguageMatch(false);
+ TranslateCommonMetrics::ReportSimilarLanguageMatch(false);
EXPECT_EQ(1, recorder.GetCount(kTrue));
EXPECT_EQ(1, recorder.GetCount(kFalse));
}
#if defined(ENABLE_LANGUAGE_DETECTION)
-TEST(TranslateHelperMetricsTest, ReportLanguageDetectionTime) {
- MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
- TranslateHelperMetrics::UMA_LANGUAGE_DETECTION));
+TEST(TranslateCommonMetricsTest, ReportLanguageDetectionTime) {
+ MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName(
+ TranslateCommonMetrics::UMA_LANGUAGE_DETECTION));
recorder.CheckTotalCount(0);
TimeTicks begin = TimeTicks::Now();
TimeTicks end = begin + base::TimeDelta::FromMicroseconds(9009);
- TranslateHelperMetrics::ReportLanguageDetectionTime(begin, end);
+ TranslateCommonMetrics::ReportLanguageDetectionTime(begin, end);
recorder.CheckValueInLogs(9.009);
recorder.CheckTotalCount(1);
}
diff --git a/chrome/renderer/DEPS b/chrome/renderer/DEPS
index 619dd3b..c8ec078 100644
--- a/chrome/renderer/DEPS
+++ b/chrome/renderer/DEPS
@@ -19,7 +19,6 @@ include_rules = [
"+webkit/plugins",
"+webkit/renderer",
- "+third_party/cld/encodings/compact_lang_det/win",
"+third_party/npapi/bindings",
"+third_party/re2",
"+third_party/smhasher",
diff --git a/chrome/renderer/translate/translate_helper.cc b/chrome/renderer/translate/translate_helper.cc
index 0741757..8d3c662 100644
--- a/chrome/renderer/translate/translate_helper.cc
+++ b/chrome/renderer/translate/translate_helper.cc
@@ -9,13 +9,12 @@
#include "base/logging.h"
#include "base/message_loop.h"
#include "base/strings/string16.h"
-#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "chrome/common/chrome_constants.h"
#include "chrome/common/render_messages.h"
-#include "chrome/common/translate/translate_util.h"
-#include "chrome/renderer/translate/translate_helper_metrics.h"
+#include "chrome/common/translate/language_detection_util.h"
+#include "chrome/common/translate/translate_common_metrics.h"
#include "content/public/renderer/render_view.h"
#include "third_party/WebKit/public/web/WebDocument.h"
#include "third_party/WebKit/public/web/WebElement.h"
@@ -26,10 +25,6 @@
#include "third_party/WebKit/public/web/WebView.h"
#include "v8/include/v8.h"
-#if defined(ENABLE_LANGUAGE_DETECTION)
-#include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"
-#endif
-
using WebKit::WebDocument;
using WebKit::WebElement;
using WebKit::WebFrame;
@@ -56,38 +51,6 @@ const int kTranslateStatusCheckDelayMs = 400;
// Language name passed to the Translate element for it to detect the language.
const char kAutoDetectionLanguage[] = "auto";
-// Similar language code list. Some languages are very similar and difficult
-// for CLD to distinguish.
-struct SimilarLanguageCode {
- const char* const code;
- int group;
-};
-
-const SimilarLanguageCode kSimilarLanguageCodes[] = {
- {"bs", 1},
- {"hr", 1},
- {"hi", 2},
- {"ne", 2},
-};
-
-// Checks |kSimilarLanguageCodes| and returns group code.
-int GetSimilarLanguageGroupCode(const std::string& language) {
- for (size_t i = 0; i < arraysize(kSimilarLanguageCodes); ++i) {
- if (language.find(kSimilarLanguageCodes[i].code) != 0)
- continue;
- return kSimilarLanguageCodes[i].group;
- }
- return 0;
-}
-
-// Well-known languages which often have wrong server configuration of
-// Content-Language: en.
-// TODO(toyoshim): Remove these static tables and caller functions to
-// chrome/common/translate, and implement them as std::set<>.
-const char* kWellKnownCodesOnWrongConfiguration[] = {
- "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th"
-};
-
} // namespace
////////////////////////////////////////////////////////////////////////////////
@@ -128,7 +91,7 @@ void TranslateHelper::PageCaptured(int page_id, const string16& contents) {
html_lang = html_element.getAttribute("lang").utf8();
std::string cld_language;
bool is_cld_reliable;
- std::string language = DeterminePageLanguage(
+ std::string language = LanguageDetectionUtil::DeterminePageLanguage(
content_language, html_lang, contents, &cld_language, &is_cld_reliable);
if (language.empty())
@@ -163,42 +126,6 @@ void TranslateHelper::CancelPendingTranslation() {
target_lang_.clear();
}
-#if defined(ENABLE_LANGUAGE_DETECTION)
-// static
-std::string TranslateHelper::DetermineTextLanguage(const string16& text,
- bool* is_cld_reliable) {
- std::string language = chrome::kUnknownLanguageCode;
- int num_languages = 0;
- int text_bytes = 0;
- bool is_reliable = false;
- Language cld_language =
- DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
- &num_languages, NULL, &text_bytes);
- if (is_cld_reliable != NULL)
- *is_cld_reliable = is_reliable;
-
- // We don't trust the result if the CLD reports that the detection is not
- // reliable, or if the actual text used to detect the language was less than
- // 100 bytes (short texts can often lead to wrong results).
- // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that
- // the determined language code is correct with 50% confidence. Chrome should
- // handle the real confidence value to judge.
- if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES &&
- cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) {
- // We should not use LanguageCode_ISO_639_1 because it does not cover all
- // the languages CLD can detect. As a result, it'll return the invalid
- // language code for tradtional Chinese among others.
- // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and
- // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN
- // for Simplified Chinese.
- language = LanguageCodeWithDialects(cld_language);
- }
- VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text
- << "\n*************************************\n";
- return language;
-}
-#endif // defined(ENABLE_LANGUAGE_DETECTION)
-
////////////////////////////////////////////////////////////////////////////////
// TranslateHelper, protected:
//
@@ -303,218 +230,6 @@ double TranslateHelper::ExecuteScriptAndGetDoubleResult(
////////////////////////////////////////////////////////////////////////////////
// TranslateHelper, private:
//
-// static
-void TranslateHelper::CorrectLanguageCodeTypo(std::string* code) {
- DCHECK(code);
-
- size_t coma_index = code->find(',');
- if (coma_index != std::string::npos) {
- // There are more than 1 language specified, just keep the first one.
- *code = code->substr(0, coma_index);
- }
- TrimWhitespaceASCII(*code, TRIM_ALL, code);
-
- // An underscore instead of a dash is a frequent mistake.
- size_t underscore_index = code->find('_');
- if (underscore_index != std::string::npos)
- (*code)[underscore_index] = '-';
-
- // Change everything up to a dash to lower-case and everything after to upper.
- size_t dash_index = code->find('-');
- if (dash_index != std::string::npos) {
- *code = StringToLowerASCII(code->substr(0, dash_index)) +
- StringToUpperASCII(code->substr(dash_index));
- } else {
- *code = StringToLowerASCII(*code);
- }
-}
-
-// static
-bool TranslateHelper::IsValidLanguageCode(const std::string& code) {
- // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/.
- // TODO(hajimehoshi): How about es-419, which is used as an Accept language?
- std::vector<std::string> chunks;
- base::SplitString(code, '-', &chunks);
-
- if (chunks.size() < 1 || 2 < chunks.size())
- return false;
-
- const std::string& main_code = chunks[0];
-
- if (main_code.size() < 1 || 3 < main_code.size())
- return false;
-
- for (std::string::const_iterator it = main_code.begin();
- it != main_code.end(); ++it) {
- if (!IsAsciiAlpha(*it))
- return false;
- }
-
- if (chunks.size() == 1)
- return true;
-
- const std::string& sub_code = chunks[1];
-
- if (sub_code.size() != 2)
- return false;
-
- for (std::string::const_iterator it = sub_code.begin();
- it != sub_code.end(); ++it) {
- if (!IsAsciiAlpha(*it))
- return false;
- }
-
- return true;
-}
-
-// static
-void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) {
- // Correct well-known format errors.
- CorrectLanguageCodeTypo(code);
-
- if (!IsValidLanguageCode(*code)) {
- *code = std::string();
- return;
- }
-
- TranslateUtil::ToTranslateLanguageSynonym(code);
-}
-
-// static
-bool TranslateHelper::IsSameOrSimilarLanguages(
- const std::string& page_language, const std::string& cld_language) {
- // Language code part of |page_language| is matched to one of |cld_language|.
- // Country code is ignored here.
- if (page_language.size() >= 2 &&
- cld_language.find(page_language.c_str(), 0, 2) == 0) {
- // Languages are matched strictly. Reports false to metrics, but returns
- // true.
- TranslateHelperMetrics::ReportSimilarLanguageMatch(false);
- return true;
- }
-
- // Check if |page_language| and |cld_language| are in the similar language
- // list and belong to the same language group.
- int page_code = GetSimilarLanguageGroupCode(page_language);
- bool match = page_code != 0 &&
- page_code == GetSimilarLanguageGroupCode(cld_language);
-
- TranslateHelperMetrics::ReportSimilarLanguageMatch(match);
- return match;
-}
-
-// static
-bool TranslateHelper::MaybeServerWrongConfiguration(
- const std::string& page_language, const std::string& cld_language) {
- // If |page_language| is not "en-*", respect it and just return false here.
- if (!StartsWithASCII(page_language, "en", false))
- return false;
-
- // A server provides a language meta information representing "en-*". But it
- // might be just a default value due to missing user configuration.
- // Let's trust |cld_language| if the determined language is not difficult to
- // distinguish from English, and the language is one of well-known languages
- // which often provide "en-*" meta information mistakenly.
- for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) {
- if (cld_language == kWellKnownCodesOnWrongConfiguration[i])
- return true;
- }
- return false;
-}
-
-// static
-bool TranslateHelper::CanCLDComplementSubCode(
- const std::string& page_language, const std::string& cld_language) {
- // Translate server cannot treat general Chinese. If Content-Language and
- // CLD agree that the language is Chinese and Content-Language doesn't know
- // which dialect is used, CLD language has priority.
- // TODO(hajimehoshi): How about the other dialects like zh-MO?
- return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false);
-}
-
-// static
-std::string TranslateHelper::DeterminePageLanguage(const std::string& code,
- const std::string& html_lang,
- const string16& contents,
- std::string* cld_language_p,
- bool* is_cld_reliable_p) {
-#if defined(ENABLE_LANGUAGE_DETECTION)
- base::TimeTicks begin_time = base::TimeTicks::Now();
- bool is_cld_reliable;
- std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable);
- TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time,
- base::TimeTicks::Now());
-
- if (cld_language_p != NULL)
- *cld_language_p = cld_language;
- if (is_cld_reliable_p != NULL)
- *is_cld_reliable_p = is_cld_reliable;
- TranslateUtil::ToTranslateLanguageSynonym(&cld_language);
-#endif // defined(ENABLE_LANGUAGE_DETECTION)
-
- // Check if html lang attribute is valid.
- std::string modified_html_lang;
- if (!html_lang.empty()) {
- modified_html_lang = html_lang;
- ApplyLanguageCodeCorrection(&modified_html_lang);
- TranslateHelperMetrics::ReportHtmlLang(html_lang, modified_html_lang);
- VLOG(9) << "html lang based language code: " << modified_html_lang;
- }
-
- // Check if Content-Language is valid.
- std::string modified_code;
- if (!code.empty()) {
- modified_code = code;
- ApplyLanguageCodeCorrection(&modified_code);
- TranslateHelperMetrics::ReportContentLanguage(code, modified_code);
- }
-
- // Adopt |modified_html_lang| if it is valid. Otherwise, adopt
- // |modified_code|.
- std::string language = modified_html_lang.empty() ? modified_code :
- modified_html_lang;
-
-#if defined(ENABLE_LANGUAGE_DETECTION)
- // If |language| is empty, just use CLD result even though it might be
- // chrome::kUnknownLanguageCode.
- if (language.empty()) {
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY);
- return cld_language;
- }
-
- if (cld_language == chrome::kUnknownLanguageCode) {
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN);
- return language;
- } else if (IsSameOrSimilarLanguages(language, cld_language)) {
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE);
- return language;
- } else if (MaybeServerWrongConfiguration(language, cld_language)) {
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_TRUST_CLD);
- return cld_language;
- } else if (CanCLDComplementSubCode(language, cld_language)) {
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE);
- return cld_language;
- } else {
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE);
- // Content-Language value might be wrong because CLD says that this page
- // is written in another language with confidence.
- // In this case, Chrome doesn't rely on any of the language codes, and
- // gives up suggesting a translation.
- return std::string(chrome::kUnknownLanguageCode);
- }
-#else // defined(ENABLE_LANGUAGE_DETECTION)
- TranslateHelperMetrics::ReportLanguageVerification(
- TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED);
-#endif // defined(ENABLE_LANGUAGE_DETECTION)
-
- return language;
-}
// static
bool TranslateHelper::IsTranslationAllowed(WebDocument* document) {
@@ -589,11 +304,11 @@ void TranslateHelper::OnTranslatePage(int page_id,
source_lang : kAutoDetectionLanguage;
target_lang_ = target_lang;
- TranslateHelperMetrics::ReportUserActionDuration(language_determined_time_,
+ TranslateCommonMetrics::ReportUserActionDuration(language_determined_time_,
base::TimeTicks::Now());
GURL url(main_frame->document().url());
- TranslateHelperMetrics::ReportPageScheme(url.scheme());
+ TranslateCommonMetrics::ReportPageScheme(url.scheme());
if (!IsTranslateLibAvailable()) {
// Evaluate the script to add the translation related method to the global
@@ -656,7 +371,7 @@ void TranslateHelper::CheckTranslateStatus() {
translation_pending_ = false;
// Check JavaScript performance counters for UMA reports.
- TranslateHelperMetrics::ReportTimeToTranslate(
+ TranslateCommonMetrics::ReportTimeToTranslate(
ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime"));
// Notify the browser we are done.
@@ -697,9 +412,9 @@ void TranslateHelper::TranslatePageImpl(int count) {
// The library is loaded, and ready for translation now.
// Check JavaScript performance counters for UMA reports.
- TranslateHelperMetrics::ReportTimeToBeReady(
+ TranslateCommonMetrics::ReportTimeToBeReady(
ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime"));
- TranslateHelperMetrics::ReportTimeToLoad(
+ TranslateCommonMetrics::ReportTimeToLoad(
ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime"));
if (!StartTranslation()) {
diff --git a/chrome/renderer/translate/translate_helper.h b/chrome/renderer/translate/translate_helper.h
index 21fbcaa..3b7273f 100644
--- a/chrome/renderer/translate/translate_helper.h
+++ b/chrome/renderer/translate/translate_helper.h
@@ -87,7 +87,6 @@ class TranslateHelper : public content::RenderViewObserver {
virtual double ExecuteScriptAndGetDoubleResult(const std::string& script);
private:
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, IsValidLanguageCode);
FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, AdoptHtmlLang);
FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
CLDAgreeWithLanguageCodeHavingCountryCode);
@@ -101,55 +100,14 @@ class TranslateHelper : public content::RenderViewObserver {
FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, SimilarLanguageCode);
FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, WellKnownWrongConfiguration);
- // Corrects language code if it contains well-known mistakes.
- static void CorrectLanguageCodeTypo(std::string* code);
-
// Converts language code to the one used in server supporting list.
static void ConvertLanguageCodeSynonym(std::string* code);
- // Checks if the language code's format is valid.
- static bool IsValidLanguageCode(const std::string& code);
-
- // Applies a series of language code modification in proper order.
- static void ApplyLanguageCodeCorrection(std::string* code);
-
- // Checks if languages are matched, or similar. This function returns true
- // against a language pair containing a language which is difficult for CLD
- // to distinguish.
- static bool IsSameOrSimilarLanguages(const std::string& page_language,
- const std::string& cld_language);
-
- // Checks if languages pair is one of well-known pairs of wrong server
- // configuration.
- static bool MaybeServerWrongConfiguration(const std::string& page_language,
- const std::string& cld_language);
-
- // Checks if CLD can complement a sub code when the page language doesn't
- // know the sub code.
- static bool CanCLDComplementSubCode(const std::string& page_language,
- const std::string& cld_language);
-
- // Determines content page language from Content-Language code and contents.
- static std::string DeterminePageLanguage(const std::string& code,
- const std::string& html_lang,
- const string16& contents,
- std::string* cld_language,
- bool* is_cld_reliable);
-
// Returns whether the page associated with |document| is a candidate for
// translation. Some pages can explictly specify (via a meta-tag) that they
// should not be translated.
static bool IsTranslationAllowed(WebKit::WebDocument* document);
-#if defined(ENABLE_LANGUAGE_DETECTION)
- // Returns the ISO 639_1 language code of the specified |text|, or 'unknown'
- // if it failed.
- // |is_cld_reliable| will be set as true if CLD says the detection is
- // reliable.
- static std::string DetermineTextLanguage(const string16& text,
- bool* is_cld_reliable);
-#endif
-
// RenderViewObserver implementation.
virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
diff --git a/chrome/renderer/translate/translate_helper_unittest.cc b/chrome/renderer/translate/translate_helper_unittest.cc
deleted file mode 100644
index f1d2161..0000000
--- a/chrome/renderer/translate/translate_helper_unittest.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-// Copyright (c) 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/renderer/translate/translate_helper.h"
-
-#include "base/strings/utf_string_conversions.h"
-#include "chrome/common/chrome_constants.h"
-#include "testing/gtest/include/gtest/gtest.h"
-
-typedef testing::Test TranslateHelperTest;
-
-// Tests that well-known language code typos are fixed.
-TEST_F(TranslateHelperTest, LanguageCodeTypoCorrection) {
- std::string language;
-
- // Strip the second and later codes.
- language = std::string("ja,en");
- TranslateHelper::CorrectLanguageCodeTypo(&language);
- EXPECT_EQ("ja", language);
-
- // Replace dash with hyphen.
- language = std::string("ja_JP");
- TranslateHelper::CorrectLanguageCodeTypo(&language);
- EXPECT_EQ("ja-JP", language);
-
- // Correct wrong cases.
- language = std::string("JA-jp");
- TranslateHelper::CorrectLanguageCodeTypo(&language);
- EXPECT_EQ("ja-JP", language);
-}
-
-// Tests if the language codes' format is invalid.
-TEST_F(TranslateHelperTest, IsValidLanguageCode) {
- std::string language;
-
- language = std::string("ja");
- EXPECT_TRUE(TranslateHelper::IsValidLanguageCode(language));
-
- language = std::string("ja-JP");
- EXPECT_TRUE(TranslateHelper::IsValidLanguageCode(language));
-
- language = std::string("ceb");
- EXPECT_TRUE(TranslateHelper::IsValidLanguageCode(language));
-
- language = std::string("ceb-XX");
- EXPECT_TRUE(TranslateHelper::IsValidLanguageCode(language));
-
- // Invalid because the sub code consists of a number.
- language = std::string("utf-8");
- EXPECT_FALSE(TranslateHelper::IsValidLanguageCode(language));
-
- // Invalid because of six characters after hyphen.
- language = std::string("ja-YUKARI");
- EXPECT_FALSE(TranslateHelper::IsValidLanguageCode(language));
-
- // Invalid because of four characters.
- language = std::string("DHMO");
- EXPECT_FALSE(TranslateHelper::IsValidLanguageCode(language));
-}
-
-// Tests that similar language table works.
-TEST_F(TranslateHelperTest, SimilarLanguageCode) {
- EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("en", "en"));
- EXPECT_FALSE(TranslateHelper::IsSameOrSimilarLanguages("en", "ja"));
- EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("bs", "hr"));
- EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("sr-ME", "sr"));
- EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("ne", "hi"));
- EXPECT_FALSE(TranslateHelper::IsSameOrSimilarLanguages("bs", "hi"));
-}
-
-// Tests that well-known languages which often have wrong server configuration
-// are handles.
-TEST_F(TranslateHelperTest, WellKnownWrongConfiguration) {
- EXPECT_TRUE(TranslateHelper::MaybeServerWrongConfiguration("en", "ja"));
- EXPECT_TRUE(TranslateHelper::MaybeServerWrongConfiguration("en-US", "ja"));
- EXPECT_TRUE(TranslateHelper::MaybeServerWrongConfiguration("en", "zh-CN"));
- EXPECT_FALSE(TranslateHelper::MaybeServerWrongConfiguration("ja", "en"));
- EXPECT_FALSE(TranslateHelper::MaybeServerWrongConfiguration("en", "he"));
-}
-
-// Tests that the language meta tag providing wrong information is ignored by
-// TranslateHelper due to disagreement between meta tag and CLD.
-TEST_F(TranslateHelperTest, CLDDisagreeWithWrongLanguageCode) {
- string16 contents = ASCIIToUTF16(
- "<html><head><meta http-equiv='Content-Language' content='ja'></head>"
- "<body>This is a page apparently written in English. Even though "
- "content-language is provided, the value will be ignored if the value "
- "is suspicious.</body></html>");
- std::string cld_language;
- bool is_cld_reliable;
- std::string language =
- TranslateHelper::DeterminePageLanguage(std::string("ja"), std::string(),
- contents, &cld_language,
- &is_cld_reliable);
- EXPECT_EQ(chrome::kUnknownLanguageCode, language);
- EXPECT_EQ("en", cld_language);
- EXPECT_TRUE(is_cld_reliable);
-}
-
-// Tests that the language meta tag providing "en-US" style information is
-// agreed by CLD.
-TEST_F(TranslateHelperTest, CLDAgreeWithLanguageCodeHavingCountryCode) {
- string16 contents = ASCIIToUTF16(
- "<html><head><meta http-equiv='Content-Language' content='en-US'></head>"
- "<body>This is a page apparently written in English. Even though "
- "content-language is provided, the value will be ignored if the value "
- "is suspicious.</body></html>");
- std::string cld_language;
- bool is_cld_reliable;
- std::string language =
- TranslateHelper::DeterminePageLanguage(std::string("en-US"),
- std::string(), contents,
- &cld_language, &is_cld_reliable);
- EXPECT_EQ("en-US", language);
- EXPECT_EQ("en", cld_language);
- EXPECT_TRUE(is_cld_reliable);
-}
-
-// Tests that the language meta tag providing wrong information is ignored and
-// CLD's language will be adopted by TranslateHelper due to an invalid meta tag.
-TEST_F(TranslateHelperTest, InvalidLanguageMetaTagProviding) {
- string16 contents = ASCIIToUTF16(
- "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>"
- "<body>This is a page apparently written in English. Even though "
- "content-language is provided, the value will be ignored and CLD's"
- " language will be adopted if the value is invalid.</body></html>");
- std::string cld_language;
- bool is_cld_reliable;
- std::string language =
- TranslateHelper::DeterminePageLanguage(std::string("utf-8"),
- std::string(), contents,
- &cld_language, &is_cld_reliable);
- EXPECT_EQ("en", language);
- EXPECT_EQ("en", cld_language);
- EXPECT_TRUE(is_cld_reliable);
-}
-
-// Tests that the language meta tag providing wrong information is ignored
-// because of valid html lang attribute.
-TEST_F(TranslateHelperTest, AdoptHtmlLang) {
- string16 contents = ASCIIToUTF16(
- "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>"
- "</head><body>This is a page apparently written in English. Even though "
- "content-language is provided, the value will be ignored if the value "
- "is suspicious.</body></html>");
- std::string cld_language;
- bool is_cld_reliable;
- std::string language =
- TranslateHelper::DeterminePageLanguage(std::string("ja"),
- std::string("en"),
- contents, &cld_language,
- &is_cld_reliable);
- EXPECT_EQ("en", language);
- EXPECT_EQ("en", cld_language);
- EXPECT_TRUE(is_cld_reliable);
-}