diff options
author | droger@chromium.org <droger@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-07-11 13:54:22 +0000 |
---|---|---|
committer | droger@chromium.org <droger@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-07-11 13:54:22 +0000 |
commit | d7575c2d6ed0a3459a539ba9c63c26e39e335f5b (patch) | |
tree | f405169b772c4a73e3081b385bbfdd583c324e82 /chrome | |
parent | 2dda4b49e79423b7d9ba22e7b6b5a300adb11857 (diff) | |
download | chromium_src-d7575c2d6ed0a3459a539ba9c63c26e39e335f5b.zip chromium_src-d7575c2d6ed0a3459a539ba9c63c26e39e335f5b.tar.gz chromium_src-d7575c2d6ed0a3459a539ba9c63c26e39e335f5b.tar.bz2 |
Move language detection to chrome/common/.
This CL moves the language detection code from chrome/renderer/translate
to chrome/common/translate, in order to be able to use it on iOS.
This CL also enables the related unittests on iOS.
BUG=
Review URL: https://chromiumcodereview.appspot.com/18911002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@211108 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r-- | chrome/chrome_common.gypi | 10 | ||||
-rw-r--r-- | chrome/chrome_renderer.gypi | 10 | ||||
-rw-r--r-- | chrome/chrome_tests_unit.gypi | 6 | ||||
-rw-r--r-- | chrome/common/DEPS | 1 | ||||
-rw-r--r-- | chrome/common/translate/language_detection_util.cc | 304 | ||||
-rw-r--r-- | chrome/common/translate/language_detection_util.h | 44 | ||||
-rw-r--r-- | chrome/common/translate/language_detection_util_unittest.cc | 158 | ||||
-rw-r--r-- | chrome/common/translate/translate_common_metrics.cc (renamed from chrome/renderer/translate/translate_helper_metrics.cc) | 42 | ||||
-rw-r--r-- | chrome/common/translate/translate_common_metrics.h (renamed from chrome/renderer/translate/translate_helper_metrics.h) | 10 | ||||
-rw-r--r-- | chrome/common/translate/translate_common_metrics_unittest.cc (renamed from chrome/renderer/translate/translate_helper_metrics_unittest.cc) | 180 | ||||
-rw-r--r-- | chrome/renderer/DEPS | 1 | ||||
-rw-r--r-- | chrome/renderer/translate/translate_helper.cc | 301 | ||||
-rw-r--r-- | chrome/renderer/translate/translate_helper.h | 42 | ||||
-rw-r--r-- | chrome/renderer/translate/translate_helper_unittest.cc | 157 |
14 files changed, 645 insertions, 621 deletions
diff --git a/chrome/chrome_common.gypi b/chrome/chrome_common.gypi index b2d6a60..71a5eb5 100644 --- a/chrome/chrome_common.gypi +++ b/chrome/chrome_common.gypi @@ -441,6 +441,10 @@ 'common/time_format.h', 'common/translate/language_detection_details.cc', 'common/translate/language_detection_details.h', + 'common/translate/language_detection_util.cc', + 'common/translate/language_detection_util.h', + 'common/translate/translate_common_metrics.cc', + 'common/translate/translate_common_metrics.h', 'common/translate/translate_errors.h', 'common/translate/translate_util.cc', 'common/translate/translate_util.h', @@ -537,6 +541,7 @@ ['include', '_ios\\.(cc|mm)$'], ['include', '(^|/)ios/'], ['include', '^common/chrome_version_info\\.cc$'], + ['include', '^common/translate'], ['include', '^common/zip'], ], 'include_dirs': [ @@ -621,6 +626,11 @@ 'common/media/webrtc_logging_messages.h', ] }], + ['enable_language_detection==1', { + 'dependencies': [ + '../third_party/cld/cld.gyp:cld', + ], + }], ], 'target_conditions': [ ['OS == "ios"', { diff --git a/chrome/chrome_renderer.gypi b/chrome/chrome_renderer.gypi index 652cb94..d82c3ef 100644 --- a/chrome/chrome_renderer.gypi +++ b/chrome/chrome_renderer.gypi @@ -328,8 +328,6 @@ 'renderer/tts_dispatcher.h', 'renderer/translate/translate_helper.cc', 'renderer/translate/translate_helper.h', - 'renderer/translate/translate_helper_metrics.cc', - 'renderer/translate/translate_helper_metrics.h', 'renderer/validation_message_agent.cc', 'renderer/validation_message_agent.h', 'renderer/web_apps.cc', @@ -381,14 +379,6 @@ '../third_party/mach_override/mach_override.gyp:mach_override', ], }], - ['enable_language_detection==1', { - 'dependencies': [ - '../third_party/cld/cld.gyp:cld', - ], - 'include_dirs': [ - '../third_party/cld', - ], - }], ['toolkit_uses_gtk == 1', { 'dependencies': [ '../build/linux/system.gyp:gtk', diff --git a/chrome/chrome_tests_unit.gypi b/chrome/chrome_tests_unit.gypi index 57f757c..673764f 100644 --- a/chrome/chrome_tests_unit.gypi +++ b/chrome/chrome_tests_unit.gypi @@ -1731,6 +1731,8 @@ 'common/switch_utils_unittest.cc', 'common/thumbnail_score_unittest.cc', 'common/time_format_unittest.cc', + 'common/translate/language_detection_util_unittest.cc', + 'common/translate/translate_common_metrics_unittest.cc', 'common/translate/translate_util_unittest.cc', 'common/worker_thread_ticker_unittest.cc', 'nacl/nacl_ipc_adapter_unittest.cc', @@ -1763,8 +1765,6 @@ 'renderer/spellchecker/spellcheck_provider_test.h', 'renderer/spellchecker/spellcheck_unittest.cc', 'renderer/spellchecker/spellcheck_worditerator_unittest.cc', - 'renderer/translate/translate_helper_unittest.cc', - 'renderer/translate/translate_helper_metrics_unittest.cc', 'renderer/web_apps_unittest.cc', 'service/cloud_print/cloud_print_helpers_unittest.cc', 'service/cloud_print/cloud_print_token_store_unittest.cc', @@ -1940,6 +1940,8 @@ ['include', '_ios\\.(cc|mm)$'], ['include', '(^|/)ios/'], # TODO(ios): Add files here as they are updated to compile on iOS. + ['include', '^common/translate/language_detection_util_unittest\\.cc$'], + ['include', '^common/translate/translate_util_unittest\\.cc$'], ['include', '^common/zip_'], ], 'conditions': [ diff --git a/chrome/common/DEPS b/chrome/common/DEPS index a066e70..52aad28 100644 --- a/chrome/common/DEPS +++ b/chrome/common/DEPS @@ -30,6 +30,7 @@ include_rules = [ # Other libraries. "+chrome/third_party/xdg_user_dirs", "+third_party/bzip2", + "+third_party/cld/encodings/compact_lang_det/win", "+third_party/mt19937ar", "+third_party/npapi", "+third_party/re2", diff --git a/chrome/common/translate/language_detection_util.cc b/chrome/common/translate/language_detection_util.cc new file mode 100644 index 0000000..9710614 --- /dev/null +++ b/chrome/common/translate/language_detection_util.cc @@ -0,0 +1,304 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/common/translate/language_detection_util.h" + +#include "base/logging.h" +#include "base/strings/string_split.h" +#include "base/strings/string_util.h" +#include "base/time/time.h" +#include "chrome/common/chrome_constants.h" +#include "chrome/common/translate/translate_common_metrics.h" +#include "chrome/common/translate/translate_util.h" + +#if defined(ENABLE_LANGUAGE_DETECTION) +#include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" +#endif + +namespace { + +// Similar language code list. Some languages are very similar and difficult +// for CLD to distinguish. +struct SimilarLanguageCode { + const char* const code; + int group; +}; + +const SimilarLanguageCode kSimilarLanguageCodes[] = { + {"bs", 1}, + {"hr", 1}, + {"hi", 2}, + {"ne", 2}, +}; + +// Checks |kSimilarLanguageCodes| and returns group code. +int GetSimilarLanguageGroupCode(const std::string& language) { + for (size_t i = 0; i < arraysize(kSimilarLanguageCodes); ++i) { + if (language.find(kSimilarLanguageCodes[i].code) != 0) + continue; + return kSimilarLanguageCodes[i].group; + } + return 0; +} + +// Well-known languages which often have wrong server configuration of +// Content-Language: en. +// TODO(toyoshim): Remove these static tables and caller functions to +// chrome/common/translate, and implement them as std::set<>. +const char* kWellKnownCodesOnWrongConfiguration[] = { + "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th" +}; + +// Applies a series of language code modification in proper order. +void ApplyLanguageCodeCorrection(std::string* code) { + // Correct well-known format errors. + LanguageDetectionUtil::CorrectLanguageCodeTypo(code); + + if (!LanguageDetectionUtil::IsValidLanguageCode(*code)) { + *code = std::string(); + return; + } + + TranslateUtil::ToTranslateLanguageSynonym(code); +} + +#if defined(ENABLE_LANGUAGE_DETECTION) +// Returns the ISO 639 language code of the specified |text|, or 'unknown' if it +// failed. +// |is_cld_reliable| will be set as true if CLD says the detection is reliable. +std::string DetermineTextLanguage(const base::string16& text, + bool* is_cld_reliable) { + std::string language = chrome::kUnknownLanguageCode; + int num_languages = 0; + int text_bytes = 0; + bool is_reliable = false; + Language cld_language = + DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, + &num_languages, NULL, &text_bytes); + if (is_cld_reliable != NULL) + *is_cld_reliable = is_reliable; + + // We don't trust the result if the CLD reports that the detection is not + // reliable, or if the actual text used to detect the language was less than + // 100 bytes (short texts can often lead to wrong results). + // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that + // the determined language code is correct with 50% confidence. Chrome should + // handle the real confidence value to judge. + if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && + cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { + // We should not use LanguageCode_ISO_639_1 because it does not cover all + // the languages CLD can detect. As a result, it'll return the invalid + // language code for tradtional Chinese among others. + // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and + // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN + // for Simplified Chinese. + language = LanguageCodeWithDialects(cld_language); + } + VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text + << "\n*************************************\n"; + return language; +} +#endif // defined(ENABLE_LANGUAGE_DETECTION) + +// Checks if CLD can complement a sub code when the page language doesn't know +// the sub code. +bool CanCLDComplementSubCode( + const std::string& page_language, const std::string& cld_language) { + // Translate server cannot treat general Chinese. If Content-Language and + // CLD agree that the language is Chinese and Content-Language doesn't know + // which dialect is used, CLD language has priority. + // TODO(hajimehoshi): How about the other dialects like zh-MO? + return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false); +} + +} // namespace + +namespace LanguageDetectionUtil { + +std::string DeterminePageLanguage(const std::string& code, + const std::string& html_lang, + const base::string16& contents, + std::string* cld_language_p, + bool* is_cld_reliable_p) { +#if defined(ENABLE_LANGUAGE_DETECTION) + base::TimeTicks begin_time = base::TimeTicks::Now(); + bool is_cld_reliable; + std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); + TranslateCommonMetrics::ReportLanguageDetectionTime(begin_time, + base::TimeTicks::Now()); + + if (cld_language_p != NULL) + *cld_language_p = cld_language; + if (is_cld_reliable_p != NULL) + *is_cld_reliable_p = is_cld_reliable; + TranslateUtil::ToTranslateLanguageSynonym(&cld_language); +#endif // defined(ENABLE_LANGUAGE_DETECTION) + + // Check if html lang attribute is valid. + std::string modified_html_lang; + if (!html_lang.empty()) { + modified_html_lang = html_lang; + ApplyLanguageCodeCorrection(&modified_html_lang); + TranslateCommonMetrics::ReportHtmlLang(html_lang, modified_html_lang); + VLOG(9) << "html lang based language code: " << modified_html_lang; + } + + // Check if Content-Language is valid. + std::string modified_code; + if (!code.empty()) { + modified_code = code; + ApplyLanguageCodeCorrection(&modified_code); + TranslateCommonMetrics::ReportContentLanguage(code, modified_code); + } + + // Adopt |modified_html_lang| if it is valid. Otherwise, adopt + // |modified_code|. + std::string language = modified_html_lang.empty() ? modified_code : + modified_html_lang; + +#if defined(ENABLE_LANGUAGE_DETECTION) + // If |language| is empty, just use CLD result even though it might be + // chrome::kUnknownLanguageCode. + if (language.empty()) { + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); + return cld_language; + } + + if (cld_language == chrome::kUnknownLanguageCode) { + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_UNKNOWN); + return language; + } else if (IsSameOrSimilarLanguages(language, cld_language)) { + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_AGREE); + return language; + } else if (MaybeServerWrongConfiguration(language, cld_language)) { + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_TRUST_CLD); + return cld_language; + } else if (CanCLDComplementSubCode(language, cld_language)) { + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE); + return cld_language; + } else { + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE); + // Content-Language value might be wrong because CLD says that this page + // is written in another language with confidence. + // In this case, Chrome doesn't rely on any of the language codes, and + // gives up suggesting a translation. + return std::string(chrome::kUnknownLanguageCode); + } +#else // defined(ENABLE_LANGUAGE_DETECTION) + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED); +#endif // defined(ENABLE_LANGUAGE_DETECTION) + + return language; +} + +void CorrectLanguageCodeTypo(std::string* code) { + DCHECK(code); + + size_t coma_index = code->find(','); + if (coma_index != std::string::npos) { + // There are more than 1 language specified, just keep the first one. + *code = code->substr(0, coma_index); + } + TrimWhitespaceASCII(*code, TRIM_ALL, code); + + // An underscore instead of a dash is a frequent mistake. + size_t underscore_index = code->find('_'); + if (underscore_index != std::string::npos) + (*code)[underscore_index] = '-'; + + // Change everything up to a dash to lower-case and everything after to upper. + size_t dash_index = code->find('-'); + if (dash_index != std::string::npos) { + *code = StringToLowerASCII(code->substr(0, dash_index)) + + StringToUpperASCII(code->substr(dash_index)); + } else { + *code = StringToLowerASCII(*code); + } +} + +bool IsValidLanguageCode(const std::string& code) { + // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. + // TODO(hajimehoshi): How about es-419, which is used as an Accept language? + std::vector<std::string> chunks; + base::SplitString(code, '-', &chunks); + + if (chunks.size() < 1 || 2 < chunks.size()) + return false; + + const std::string& main_code = chunks[0]; + + if (main_code.size() < 1 || 3 < main_code.size()) + return false; + + for (std::string::const_iterator it = main_code.begin(); + it != main_code.end(); ++it) { + if (!IsAsciiAlpha(*it)) + return false; + } + + if (chunks.size() == 1) + return true; + + const std::string& sub_code = chunks[1]; + + if (sub_code.size() != 2) + return false; + + for (std::string::const_iterator it = sub_code.begin(); + it != sub_code.end(); ++it) { + if (!IsAsciiAlpha(*it)) + return false; + } + + return true; +} + +bool IsSameOrSimilarLanguages(const std::string& page_language, + const std::string& cld_language) { + // Language code part of |page_language| is matched to one of |cld_language|. + // Country code is ignored here. + if (page_language.size() >= 2 && + cld_language.find(page_language.c_str(), 0, 2) == 0) { + // Languages are matched strictly. Reports false to metrics, but returns + // true. + TranslateCommonMetrics::ReportSimilarLanguageMatch(false); + return true; + } + + // Check if |page_language| and |cld_language| are in the similar language + // list and belong to the same language group. + int page_code = GetSimilarLanguageGroupCode(page_language); + bool match = page_code != 0 && + page_code == GetSimilarLanguageGroupCode(cld_language); + + TranslateCommonMetrics::ReportSimilarLanguageMatch(match); + return match; +} + +bool MaybeServerWrongConfiguration(const std::string& page_language, + const std::string& cld_language) { + // If |page_language| is not "en-*", respect it and just return false here. + if (!StartsWithASCII(page_language, "en", false)) + return false; + + // A server provides a language meta information representing "en-*". But it + // might be just a default value due to missing user configuration. + // Let's trust |cld_language| if the determined language is not difficult to + // distinguish from English, and the language is one of well-known languages + // which often provide "en-*" meta information mistakenly. + for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { + if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) + return true; + } + return false; +} + +} // namespace LanguageDetectionUtil diff --git a/chrome/common/translate/language_detection_util.h b/chrome/common/translate/language_detection_util.h new file mode 100644 index 0000000..787c0781 --- /dev/null +++ b/chrome/common/translate/language_detection_util.h @@ -0,0 +1,44 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_COMMON_TRANSLATE_LANGUAGE_DETECTION_UTIL_H_ +#define CHROME_COMMON_TRANSLATE_LANGUAGE_DETECTION_UTIL_H_ + +#include <string> + +#include "base/strings/string16.h" + +namespace LanguageDetectionUtil { + +// Determines content page language from Content-Language code and contents. +std::string DeterminePageLanguage(const std::string& code, + const std::string& html_lang, + const base::string16& contents, + std::string* cld_language, + bool* is_cld_reliable); + +// Corrects language code if it contains well-known mistakes. +// Called only by tests. +void CorrectLanguageCodeTypo(std::string* code); + +// Checks if the language code's format is valid. +// Called only by tests. +bool IsValidLanguageCode(const std::string& code); + +// Checks if languages are matched, or similar. This function returns true +// against a language pair containing a language which is difficult for CLD to +// distinguish. +// Called only by tests. +bool IsSameOrSimilarLanguages(const std::string& page_language, + const std::string& cld_language); + +// Checks if languages pair is one of well-known pairs of wrong server +// configuration. +// Called only by tests. +bool MaybeServerWrongConfiguration(const std::string& page_language, + const std::string& cld_language); + +} // namespace LanguageDetectionUtil + +#endif // CHROME_COMMON_TRANSLATE_LANGUAGE_DETECTION_UTIL_H_ diff --git a/chrome/common/translate/language_detection_util_unittest.cc b/chrome/common/translate/language_detection_util_unittest.cc new file mode 100644 index 0000000..4f8dbf3 --- /dev/null +++ b/chrome/common/translate/language_detection_util_unittest.cc @@ -0,0 +1,158 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/common/translate/language_detection_util.h" + +#include "base/strings/string16.h" +#include "base/strings/utf_string_conversions.h" +#include "chrome/common/chrome_constants.h" +#include "testing/gtest/include/gtest/gtest.h" + +typedef testing::Test LanguageDetectionUtilTest; + +// Tests that well-known language code typos are fixed. +TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) { + std::string language; + + // Strip the second and later codes. + language = std::string("ja,en"); + LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); + EXPECT_EQ("ja", language); + + // Replace dash with hyphen. + language = std::string("ja_JP"); + LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); + EXPECT_EQ("ja-JP", language); + + // Correct wrong cases. + language = std::string("JA-jp"); + LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); + EXPECT_EQ("ja-JP", language); +} + +// Tests if the language codes' format is invalid. +TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) { + std::string language; + + language = std::string("ja"); + EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); + + language = std::string("ja-JP"); + EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); + + language = std::string("ceb"); + EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); + + language = std::string("ceb-XX"); + EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); + + // Invalid because the sub code consists of a number. + language = std::string("utf-8"); + EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); + + // Invalid because of six characters after hyphen. + language = std::string("ja-YUKARI"); + EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); + + // Invalid because of four characters. + language = std::string("DHMO"); + EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); +} + +// Tests that similar language table works. +TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) { + EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "en")); + EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "ja")); + EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hr")); + EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("sr-ME", "sr")); + EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("ne", "hi")); + EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hi")); +} + +// Tests that well-known languages which often have wrong server configuration +// are handles. +TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) { + EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", "ja")); + EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en-US", + "ja")); + EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", + "zh-CN")); + EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("ja", + "en")); + EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", + "he")); +} + +// Tests that the language meta tag providing wrong information is ignored by +// LanguageDetectionUtil due to disagreement between meta tag and CLD. +TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) { + base::string16 contents = ASCIIToUTF16( + "<html><head><meta http-equiv='Content-Language' content='ja'></head>" + "<body>This is a page apparently written in English. Even though " + "content-language is provided, the value will be ignored if the value " + "is suspicious.</body></html>"); + std::string cld_language; + bool is_cld_reliable; + std::string language = LanguageDetectionUtil::DeterminePageLanguage( + std::string("ja"), std::string(), contents, &cld_language, + &is_cld_reliable); + EXPECT_EQ(chrome::kUnknownLanguageCode, language); + EXPECT_EQ("en", cld_language); + EXPECT_TRUE(is_cld_reliable); +} + +// Tests that the language meta tag providing "en-US" style information is +// agreed by CLD. +TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) { + base::string16 contents = ASCIIToUTF16( + "<html><head><meta http-equiv='Content-Language' content='en-US'></head>" + "<body>This is a page apparently written in English. Even though " + "content-language is provided, the value will be ignored if the value " + "is suspicious.</body></html>"); + std::string cld_language; + bool is_cld_reliable; + std::string language = LanguageDetectionUtil::DeterminePageLanguage( + std::string("en-US"), std::string(), contents, &cld_language, + &is_cld_reliable); + EXPECT_EQ("en-US", language); + EXPECT_EQ("en", cld_language); + EXPECT_TRUE(is_cld_reliable); +} + +// Tests that the language meta tag providing wrong information is ignored and +// CLD's language will be adopted by LanguageDetectionUtil due to an invalid +// meta tag. +TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) { + base::string16 contents = ASCIIToUTF16( + "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>" + "<body>This is a page apparently written in English. Even though " + "content-language is provided, the value will be ignored and CLD's" + " language will be adopted if the value is invalid.</body></html>"); + std::string cld_language; + bool is_cld_reliable; + std::string language = LanguageDetectionUtil::DeterminePageLanguage( + std::string("utf-8"), std::string(), contents, &cld_language, + &is_cld_reliable); + EXPECT_EQ("en", language); + EXPECT_EQ("en", cld_language); + EXPECT_TRUE(is_cld_reliable); +} + +// Tests that the language meta tag providing wrong information is ignored +// because of valid html lang attribute. +TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) { + base::string16 contents = ASCIIToUTF16( + "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>" + "</head><body>This is a page apparently written in English. Even though " + "content-language is provided, the value will be ignored if the value " + "is suspicious.</body></html>"); + std::string cld_language; + bool is_cld_reliable; + std::string language = LanguageDetectionUtil::DeterminePageLanguage( + std::string("ja"), std::string("en"), contents, &cld_language, + &is_cld_reliable); + EXPECT_EQ("en", language); + EXPECT_EQ("en", cld_language); + EXPECT_TRUE(is_cld_reliable); +} diff --git a/chrome/renderer/translate/translate_helper_metrics.cc b/chrome/common/translate/translate_common_metrics.cc index 93ce5b0..028be6d 100644 --- a/chrome/renderer/translate/translate_helper_metrics.cc +++ b/chrome/common/translate/translate_common_metrics.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "chrome/renderer/translate/translate_helper_metrics.h" +#include "chrome/common/translate/translate_common_metrics.h" #include "base/basictypes.h" #include "base/metrics/histogram.h" @@ -26,63 +26,63 @@ const char kSchemeHttp[] = "http"; const char kSchemeHttps[] = "https"; struct MetricsEntry { - TranslateHelperMetrics::MetricsNameIndex index; + TranslateCommonMetrics::MetricsNameIndex index; const char* const name; }; // This entry table should be updated when new UMA items are added. const MetricsEntry kMetricsEntries[] = { - { TranslateHelperMetrics::UMA_LANGUAGE_DETECTION, + { TranslateCommonMetrics::UMA_LANGUAGE_DETECTION, kRenderer4LanguageDetection }, - { TranslateHelperMetrics::UMA_CONTENT_LANGUAGE, + { TranslateCommonMetrics::UMA_CONTENT_LANGUAGE, kTranslateContentLanguage }, - { TranslateHelperMetrics::UMA_HTML_LANG, + { TranslateCommonMetrics::UMA_HTML_LANG, kTranslateHtmlLang }, - { TranslateHelperMetrics::UMA_LANGUAGE_VERIFICATION, + { TranslateCommonMetrics::UMA_LANGUAGE_VERIFICATION, kTranslateLanguageVerification }, - { TranslateHelperMetrics::UMA_TIME_TO_BE_READY, + { TranslateCommonMetrics::UMA_TIME_TO_BE_READY, kTranslateTimeToBeReady }, - { TranslateHelperMetrics::UMA_TIME_TO_LOAD, + { TranslateCommonMetrics::UMA_TIME_TO_LOAD, kTranslateTimeToLoad }, - { TranslateHelperMetrics::UMA_TIME_TO_TRANSLATE, + { TranslateCommonMetrics::UMA_TIME_TO_TRANSLATE, kTranslateTimeToTranslate }, - { TranslateHelperMetrics::UMA_USER_ACTION_DURATION, + { TranslateCommonMetrics::UMA_USER_ACTION_DURATION, kTranslateUserActionDuration }, - { TranslateHelperMetrics::UMA_PAGE_SCHEME, + { TranslateCommonMetrics::UMA_PAGE_SCHEME, kTranslatePageScheme }, - { TranslateHelperMetrics::UMA_SIMILAR_LANGUAGE_MATCH, + { TranslateCommonMetrics::UMA_SIMILAR_LANGUAGE_MATCH, kTranslateSimilarLanguageMatch }, }; -COMPILE_ASSERT(arraysize(kMetricsEntries) == TranslateHelperMetrics::UMA_MAX, +COMPILE_ASSERT(arraysize(kMetricsEntries) == TranslateCommonMetrics::UMA_MAX, arraysize_of_kMetricsEntries_should_be_UMA_MAX); -TranslateHelperMetrics::LanguageCheckType GetLanguageCheckMetric( +TranslateCommonMetrics::LanguageCheckType GetLanguageCheckMetric( const std::string& provided_code, const std::string& revised_code) { if (provided_code.empty()) - return TranslateHelperMetrics::LANGUAGE_NOT_PROVIDED; + return TranslateCommonMetrics::LANGUAGE_NOT_PROVIDED; else if (provided_code == revised_code) - return TranslateHelperMetrics::LANGUAGE_VALID; - return TranslateHelperMetrics::LANGUAGE_INVALID; + return TranslateCommonMetrics::LANGUAGE_VALID; + return TranslateCommonMetrics::LANGUAGE_INVALID; } } // namespace -namespace TranslateHelperMetrics { +namespace TranslateCommonMetrics { void ReportContentLanguage(const std::string& provided_code, const std::string& revised_code) { UMA_HISTOGRAM_ENUMERATION(kTranslateContentLanguage, GetLanguageCheckMetric(provided_code, revised_code), - TranslateHelperMetrics::LANGUAGE_MAX); + TranslateCommonMetrics::LANGUAGE_MAX); } void ReportHtmlLang(const std::string& provided_code, const std::string& revised_code) { UMA_HISTOGRAM_ENUMERATION(kTranslateHtmlLang, GetLanguageCheckMetric(provided_code, revised_code), - TranslateHelperMetrics::LANGUAGE_MAX); + TranslateCommonMetrics::LANGUAGE_MAX); } void ReportLanguageVerification(LanguageVerificationType type) { @@ -143,4 +143,4 @@ const char* GetMetricsName(MetricsNameIndex index) { return NULL; } -} // namespace TranslateHelperMetrics +} // namespace TranslateCommonMetrics diff --git a/chrome/renderer/translate/translate_helper_metrics.h b/chrome/common/translate/translate_common_metrics.h index cd0050f..cfd6b14 100644 --- a/chrome/renderer/translate/translate_helper_metrics.h +++ b/chrome/common/translate/translate_common_metrics.h @@ -2,14 +2,14 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef CHROME_RENDERER_TRANSLATE_TRANSLATE_HELPER_METRICS_H_ -#define CHROME_RENDERER_TRANSLATE_TRANSLATE_HELPER_METRICS_H_ +#ifndef CHROME_COMMON_TRANSLATE_TRANSLATE_COMMON_METRICS_H_ +#define CHROME_COMMON_TRANSLATE_TRANSLATE_COMMON_METRICS_H_ #include <string> #include "base/time/time.h" -namespace TranslateHelperMetrics { +namespace TranslateCommonMetrics { // An indexing type to query each UMA entry name via GetMetricsName() function. // Note: |kMetricsEntries| should be updated when a new entry is added here. @@ -103,6 +103,6 @@ void ReportSimilarLanguageMatch(bool match); // Gets UMA name for an entry specified by |index|. const char* GetMetricsName(MetricsNameIndex index); -} // namespace TranslateHelperMetrics +} // namespace TranslateCommonMetrics -#endif // CHROME_RENDERER_TRANSLATE_TRANSLATE_HELPER_METRICS_H_ +#endif // CHROME_COMMON_TRANSLATE_TRANSLATE_COMMON_METRICS_H_ diff --git a/chrome/renderer/translate/translate_helper_metrics_unittest.cc b/chrome/common/translate/translate_common_metrics_unittest.cc index b0d4585..4815cb1 100644 --- a/chrome/renderer/translate/translate_helper_metrics_unittest.cc +++ b/chrome/common/translate/translate_common_metrics_unittest.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "chrome/renderer/translate/translate_helper_metrics.h" +#include "chrome/common/translate/translate_common_metrics.h" #include "base/basictypes.h" #include "base/memory/scoped_ptr.h" @@ -33,23 +33,23 @@ class MetricsRecorder { base_samples_ = histogram->SnapshotSamples(); } - void CheckLanguage(TranslateHelperMetrics::MetricsNameIndex index, + void CheckLanguage(TranslateCommonMetrics::MetricsNameIndex index, int expected_not_provided, int expected_valid, int expected_invalid) { - ASSERT_EQ(TranslateHelperMetrics::GetMetricsName(index), key_); + ASSERT_EQ(TranslateCommonMetrics::GetMetricsName(index), key_); Snapshot(); EXPECT_EQ(expected_not_provided, GetCountWithoutSnapshot( - TranslateHelperMetrics::LANGUAGE_NOT_PROVIDED)); + TranslateCommonMetrics::LANGUAGE_NOT_PROVIDED)); EXPECT_EQ(expected_valid, GetCountWithoutSnapshot( - TranslateHelperMetrics::LANGUAGE_VALID)); + TranslateCommonMetrics::LANGUAGE_VALID)); EXPECT_EQ(expected_invalid, GetCountWithoutSnapshot( - TranslateHelperMetrics::LANGUAGE_INVALID)); + TranslateCommonMetrics::LANGUAGE_INVALID)); } void CheckLanguageVerification(int expected_cld_disabled, @@ -59,54 +59,54 @@ class MetricsRecorder { int expected_cld_disagree, int expected_trust_cld, int expected_cld_complement_sub_code) { - ASSERT_EQ(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_LANGUAGE_VERIFICATION), key_); + ASSERT_EQ(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_LANGUAGE_VERIFICATION), key_); Snapshot(); EXPECT_EQ( expected_cld_disabled, GetCountWithoutSnapshot( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED)); + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED)); EXPECT_EQ( expected_cld_only, GetCountWithoutSnapshot( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY)); + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_ONLY)); EXPECT_EQ( expected_unknown, GetCountWithoutSnapshot( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN)); + TranslateCommonMetrics::LANGUAGE_VERIFICATION_UNKNOWN)); EXPECT_EQ( expected_cld_agree, GetCountWithoutSnapshot( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE)); + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_AGREE)); EXPECT_EQ( expected_cld_disagree, GetCountWithoutSnapshot( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE)); + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE)); EXPECT_EQ( expected_trust_cld, GetCountWithoutSnapshot( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_TRUST_CLD)); + TranslateCommonMetrics::LANGUAGE_VERIFICATION_TRUST_CLD)); EXPECT_EQ( expected_cld_complement_sub_code, GetCountWithoutSnapshot( - TranslateHelperMetrics:: + TranslateCommonMetrics:: LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE)); } void CheckScheme(int expected_http, int expected_https, int expected_others) { - ASSERT_EQ(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_PAGE_SCHEME), key_); + ASSERT_EQ(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_PAGE_SCHEME), key_); Snapshot(); EXPECT_EQ(expected_http, - GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_HTTP)); + GetCountWithoutSnapshot(TranslateCommonMetrics::SCHEME_HTTP)); EXPECT_EQ(expected_https, - GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_HTTPS)); + GetCountWithoutSnapshot(TranslateCommonMetrics::SCHEME_HTTPS)); EXPECT_EQ(expected_others, - GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_OTHERS)); + GetCountWithoutSnapshot(TranslateCommonMetrics::SCHEME_OTHERS)); } void CheckTotalCount(int count) { @@ -170,133 +170,133 @@ class MetricsRecorder { } // namespace -TEST(TranslateHelperMetricsTest, ReportContentLanguage) { - MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_CONTENT_LANGUAGE)); - - recorder.CheckLanguage(TranslateHelperMetrics::UMA_CONTENT_LANGUAGE, 0, 0, 0); - TranslateHelperMetrics::ReportContentLanguage(std::string(), std::string()); - recorder.CheckLanguage(TranslateHelperMetrics::UMA_CONTENT_LANGUAGE, 1, 0, 0); - TranslateHelperMetrics::ReportContentLanguage("ja_JP", "ja-JP"); - recorder.CheckLanguage(TranslateHelperMetrics::UMA_CONTENT_LANGUAGE, 1, 0, 1); - TranslateHelperMetrics::ReportContentLanguage("en", "en"); - recorder.CheckLanguage(TranslateHelperMetrics::UMA_CONTENT_LANGUAGE, 1, 1, 1); +TEST(TranslateCommonMetricsTest, ReportContentLanguage) { + MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_CONTENT_LANGUAGE)); + + recorder.CheckLanguage(TranslateCommonMetrics::UMA_CONTENT_LANGUAGE, 0, 0, 0); + TranslateCommonMetrics::ReportContentLanguage(std::string(), std::string()); + recorder.CheckLanguage(TranslateCommonMetrics::UMA_CONTENT_LANGUAGE, 1, 0, 0); + TranslateCommonMetrics::ReportContentLanguage("ja_JP", "ja-JP"); + recorder.CheckLanguage(TranslateCommonMetrics::UMA_CONTENT_LANGUAGE, 1, 0, 1); + TranslateCommonMetrics::ReportContentLanguage("en", "en"); + recorder.CheckLanguage(TranslateCommonMetrics::UMA_CONTENT_LANGUAGE, 1, 1, 1); } -TEST(TranslateHelperMetricsTest, ReportHtmlLang) { - MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_HTML_LANG)); - - recorder.CheckLanguage(TranslateHelperMetrics::UMA_HTML_LANG, 0, 0, 0); - TranslateHelperMetrics::ReportHtmlLang(std::string(), std::string()); - recorder.CheckLanguage(TranslateHelperMetrics::UMA_HTML_LANG, 1, 0, 0); - TranslateHelperMetrics::ReportHtmlLang("ja_JP", "ja-JP"); - recorder.CheckLanguage(TranslateHelperMetrics::UMA_HTML_LANG, 1, 0, 1); - TranslateHelperMetrics::ReportHtmlLang("en", "en"); - recorder.CheckLanguage(TranslateHelperMetrics::UMA_HTML_LANG, 1, 1, 1); +TEST(TranslateCommonMetricsTest, ReportHtmlLang) { + MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_HTML_LANG)); + + recorder.CheckLanguage(TranslateCommonMetrics::UMA_HTML_LANG, 0, 0, 0); + TranslateCommonMetrics::ReportHtmlLang(std::string(), std::string()); + recorder.CheckLanguage(TranslateCommonMetrics::UMA_HTML_LANG, 1, 0, 0); + TranslateCommonMetrics::ReportHtmlLang("ja_JP", "ja-JP"); + recorder.CheckLanguage(TranslateCommonMetrics::UMA_HTML_LANG, 1, 0, 1); + TranslateCommonMetrics::ReportHtmlLang("en", "en"); + recorder.CheckLanguage(TranslateCommonMetrics::UMA_HTML_LANG, 1, 1, 1); } -TEST(TranslateHelperMetricsTest, ReportLanguageVerification) { - MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_LANGUAGE_VERIFICATION)); +TEST(TranslateCommonMetricsTest, ReportLanguageVerification) { + MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_LANGUAGE_VERIFICATION)); recorder.CheckLanguageVerification(0, 0, 0, 0, 0, 0, 0); - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED); + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED); recorder.CheckLanguageVerification(1, 0, 0, 0, 0, 0, 0); - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); recorder.CheckLanguageVerification(1, 1, 0, 0, 0, 0, 0); - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN); + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_UNKNOWN); recorder.CheckLanguageVerification(1, 1, 1, 0, 0, 0, 0); - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE); + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_AGREE); recorder.CheckLanguageVerification(1, 1, 1, 1, 0, 0, 0); - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE); + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE); recorder.CheckLanguageVerification(1, 1, 1, 1, 1, 0, 0); - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_TRUST_CLD); + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_TRUST_CLD); recorder.CheckLanguageVerification(1, 1, 1, 1, 1, 1, 0); - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE); + TranslateCommonMetrics::ReportLanguageVerification( + TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE); recorder.CheckLanguageVerification(1, 1, 1, 1, 1, 1, 1); } -TEST(TranslateHelperMetricsTest, ReportTimeToBeReady) { - MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_TIME_TO_BE_READY)); +TEST(TranslateCommonMetricsTest, ReportTimeToBeReady) { + MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_TIME_TO_BE_READY)); recorder.CheckTotalCount(0); - TranslateHelperMetrics::ReportTimeToBeReady(3.14); + TranslateCommonMetrics::ReportTimeToBeReady(3.14); recorder.CheckValueInLogs(3.14); recorder.CheckTotalCount(1); } -TEST(TranslateHelperMetricsTest, ReportTimeToLoad) { - MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_TIME_TO_LOAD)); +TEST(TranslateCommonMetricsTest, ReportTimeToLoad) { + MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_TIME_TO_LOAD)); recorder.CheckTotalCount(0); - TranslateHelperMetrics::ReportTimeToLoad(573.0); + TranslateCommonMetrics::ReportTimeToLoad(573.0); recorder.CheckValueInLogs(573.0); recorder.CheckTotalCount(1); } -TEST(TranslateHelperMetricsTest, ReportTimeToTranslate) { - MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_TIME_TO_TRANSLATE)); +TEST(TranslateCommonMetricsTest, ReportTimeToTranslate) { + MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_TIME_TO_TRANSLATE)); recorder.CheckTotalCount(0); - TranslateHelperMetrics::ReportTimeToTranslate(4649.0); + TranslateCommonMetrics::ReportTimeToTranslate(4649.0); recorder.CheckValueInLogs(4649.0); recorder.CheckTotalCount(1); } -TEST(TranslateHelperMetricsTest, ReportUserActionDuration) { - MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_USER_ACTION_DURATION)); +TEST(TranslateCommonMetricsTest, ReportUserActionDuration) { + MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_USER_ACTION_DURATION)); recorder.CheckTotalCount(0); TimeTicks begin = TimeTicks::Now(); TimeTicks end = begin + base::TimeDelta::FromSeconds(3776); - TranslateHelperMetrics::ReportUserActionDuration(begin, end); + TranslateCommonMetrics::ReportUserActionDuration(begin, end); recorder.CheckValueInLogs(3776000.0); recorder.CheckTotalCount(1); } -TEST(TranslateHelperMetricsTest, ReportPageScheme) { - MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_PAGE_SCHEME)); +TEST(TranslateCommonMetricsTest, ReportPageScheme) { + MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_PAGE_SCHEME)); recorder.CheckScheme(0, 0, 0); - TranslateHelperMetrics::ReportPageScheme("http"); + TranslateCommonMetrics::ReportPageScheme("http"); recorder.CheckScheme(1, 0, 0); - TranslateHelperMetrics::ReportPageScheme("https"); + TranslateCommonMetrics::ReportPageScheme("https"); recorder.CheckScheme(1, 1, 0); - TranslateHelperMetrics::ReportPageScheme("ftp"); + TranslateCommonMetrics::ReportPageScheme("ftp"); recorder.CheckScheme(1, 1, 1); } -TEST(TranslateHelperMetricsTest, ReportSimilarLanguageMatch) { - MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_SIMILAR_LANGUAGE_MATCH)); +TEST(TranslateCommonMetricsTest, ReportSimilarLanguageMatch) { + MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_SIMILAR_LANGUAGE_MATCH)); recorder.CheckTotalCount(0); EXPECT_EQ(0, recorder.GetCount(kTrue)); EXPECT_EQ(0, recorder.GetCount(kFalse)); - TranslateHelperMetrics::ReportSimilarLanguageMatch(true); + TranslateCommonMetrics::ReportSimilarLanguageMatch(true); EXPECT_EQ(1, recorder.GetCount(kTrue)); EXPECT_EQ(0, recorder.GetCount(kFalse)); - TranslateHelperMetrics::ReportSimilarLanguageMatch(false); + TranslateCommonMetrics::ReportSimilarLanguageMatch(false); EXPECT_EQ(1, recorder.GetCount(kTrue)); EXPECT_EQ(1, recorder.GetCount(kFalse)); } #if defined(ENABLE_LANGUAGE_DETECTION) -TEST(TranslateHelperMetricsTest, ReportLanguageDetectionTime) { - MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( - TranslateHelperMetrics::UMA_LANGUAGE_DETECTION)); +TEST(TranslateCommonMetricsTest, ReportLanguageDetectionTime) { + MetricsRecorder recorder(TranslateCommonMetrics::GetMetricsName( + TranslateCommonMetrics::UMA_LANGUAGE_DETECTION)); recorder.CheckTotalCount(0); TimeTicks begin = TimeTicks::Now(); TimeTicks end = begin + base::TimeDelta::FromMicroseconds(9009); - TranslateHelperMetrics::ReportLanguageDetectionTime(begin, end); + TranslateCommonMetrics::ReportLanguageDetectionTime(begin, end); recorder.CheckValueInLogs(9.009); recorder.CheckTotalCount(1); } diff --git a/chrome/renderer/DEPS b/chrome/renderer/DEPS index 619dd3b..c8ec078 100644 --- a/chrome/renderer/DEPS +++ b/chrome/renderer/DEPS @@ -19,7 +19,6 @@ include_rules = [ "+webkit/plugins", "+webkit/renderer", - "+third_party/cld/encodings/compact_lang_det/win", "+third_party/npapi/bindings", "+third_party/re2", "+third_party/smhasher", diff --git a/chrome/renderer/translate/translate_helper.cc b/chrome/renderer/translate/translate_helper.cc index 0741757..8d3c662 100644 --- a/chrome/renderer/translate/translate_helper.cc +++ b/chrome/renderer/translate/translate_helper.cc @@ -9,13 +9,12 @@ #include "base/logging.h" #include "base/message_loop.h" #include "base/strings/string16.h" -#include "base/strings/string_split.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/render_messages.h" -#include "chrome/common/translate/translate_util.h" -#include "chrome/renderer/translate/translate_helper_metrics.h" +#include "chrome/common/translate/language_detection_util.h" +#include "chrome/common/translate/translate_common_metrics.h" #include "content/public/renderer/render_view.h" #include "third_party/WebKit/public/web/WebDocument.h" #include "third_party/WebKit/public/web/WebElement.h" @@ -26,10 +25,6 @@ #include "third_party/WebKit/public/web/WebView.h" #include "v8/include/v8.h" -#if defined(ENABLE_LANGUAGE_DETECTION) -#include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" -#endif - using WebKit::WebDocument; using WebKit::WebElement; using WebKit::WebFrame; @@ -56,38 +51,6 @@ const int kTranslateStatusCheckDelayMs = 400; // Language name passed to the Translate element for it to detect the language. const char kAutoDetectionLanguage[] = "auto"; -// Similar language code list. Some languages are very similar and difficult -// for CLD to distinguish. -struct SimilarLanguageCode { - const char* const code; - int group; -}; - -const SimilarLanguageCode kSimilarLanguageCodes[] = { - {"bs", 1}, - {"hr", 1}, - {"hi", 2}, - {"ne", 2}, -}; - -// Checks |kSimilarLanguageCodes| and returns group code. -int GetSimilarLanguageGroupCode(const std::string& language) { - for (size_t i = 0; i < arraysize(kSimilarLanguageCodes); ++i) { - if (language.find(kSimilarLanguageCodes[i].code) != 0) - continue; - return kSimilarLanguageCodes[i].group; - } - return 0; -} - -// Well-known languages which often have wrong server configuration of -// Content-Language: en. -// TODO(toyoshim): Remove these static tables and caller functions to -// chrome/common/translate, and implement them as std::set<>. -const char* kWellKnownCodesOnWrongConfiguration[] = { - "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th" -}; - } // namespace //////////////////////////////////////////////////////////////////////////////// @@ -128,7 +91,7 @@ void TranslateHelper::PageCaptured(int page_id, const string16& contents) { html_lang = html_element.getAttribute("lang").utf8(); std::string cld_language; bool is_cld_reliable; - std::string language = DeterminePageLanguage( + std::string language = LanguageDetectionUtil::DeterminePageLanguage( content_language, html_lang, contents, &cld_language, &is_cld_reliable); if (language.empty()) @@ -163,42 +126,6 @@ void TranslateHelper::CancelPendingTranslation() { target_lang_.clear(); } -#if defined(ENABLE_LANGUAGE_DETECTION) -// static -std::string TranslateHelper::DetermineTextLanguage(const string16& text, - bool* is_cld_reliable) { - std::string language = chrome::kUnknownLanguageCode; - int num_languages = 0; - int text_bytes = 0; - bool is_reliable = false; - Language cld_language = - DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, - &num_languages, NULL, &text_bytes); - if (is_cld_reliable != NULL) - *is_cld_reliable = is_reliable; - - // We don't trust the result if the CLD reports that the detection is not - // reliable, or if the actual text used to detect the language was less than - // 100 bytes (short texts can often lead to wrong results). - // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that - // the determined language code is correct with 50% confidence. Chrome should - // handle the real confidence value to judge. - if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && - cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { - // We should not use LanguageCode_ISO_639_1 because it does not cover all - // the languages CLD can detect. As a result, it'll return the invalid - // language code for tradtional Chinese among others. - // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and - // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN - // for Simplified Chinese. - language = LanguageCodeWithDialects(cld_language); - } - VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text - << "\n*************************************\n"; - return language; -} -#endif // defined(ENABLE_LANGUAGE_DETECTION) - //////////////////////////////////////////////////////////////////////////////// // TranslateHelper, protected: // @@ -303,218 +230,6 @@ double TranslateHelper::ExecuteScriptAndGetDoubleResult( //////////////////////////////////////////////////////////////////////////////// // TranslateHelper, private: // -// static -void TranslateHelper::CorrectLanguageCodeTypo(std::string* code) { - DCHECK(code); - - size_t coma_index = code->find(','); - if (coma_index != std::string::npos) { - // There are more than 1 language specified, just keep the first one. - *code = code->substr(0, coma_index); - } - TrimWhitespaceASCII(*code, TRIM_ALL, code); - - // An underscore instead of a dash is a frequent mistake. - size_t underscore_index = code->find('_'); - if (underscore_index != std::string::npos) - (*code)[underscore_index] = '-'; - - // Change everything up to a dash to lower-case and everything after to upper. - size_t dash_index = code->find('-'); - if (dash_index != std::string::npos) { - *code = StringToLowerASCII(code->substr(0, dash_index)) + - StringToUpperASCII(code->substr(dash_index)); - } else { - *code = StringToLowerASCII(*code); - } -} - -// static -bool TranslateHelper::IsValidLanguageCode(const std::string& code) { - // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. - // TODO(hajimehoshi): How about es-419, which is used as an Accept language? - std::vector<std::string> chunks; - base::SplitString(code, '-', &chunks); - - if (chunks.size() < 1 || 2 < chunks.size()) - return false; - - const std::string& main_code = chunks[0]; - - if (main_code.size() < 1 || 3 < main_code.size()) - return false; - - for (std::string::const_iterator it = main_code.begin(); - it != main_code.end(); ++it) { - if (!IsAsciiAlpha(*it)) - return false; - } - - if (chunks.size() == 1) - return true; - - const std::string& sub_code = chunks[1]; - - if (sub_code.size() != 2) - return false; - - for (std::string::const_iterator it = sub_code.begin(); - it != sub_code.end(); ++it) { - if (!IsAsciiAlpha(*it)) - return false; - } - - return true; -} - -// static -void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) { - // Correct well-known format errors. - CorrectLanguageCodeTypo(code); - - if (!IsValidLanguageCode(*code)) { - *code = std::string(); - return; - } - - TranslateUtil::ToTranslateLanguageSynonym(code); -} - -// static -bool TranslateHelper::IsSameOrSimilarLanguages( - const std::string& page_language, const std::string& cld_language) { - // Language code part of |page_language| is matched to one of |cld_language|. - // Country code is ignored here. - if (page_language.size() >= 2 && - cld_language.find(page_language.c_str(), 0, 2) == 0) { - // Languages are matched strictly. Reports false to metrics, but returns - // true. - TranslateHelperMetrics::ReportSimilarLanguageMatch(false); - return true; - } - - // Check if |page_language| and |cld_language| are in the similar language - // list and belong to the same language group. - int page_code = GetSimilarLanguageGroupCode(page_language); - bool match = page_code != 0 && - page_code == GetSimilarLanguageGroupCode(cld_language); - - TranslateHelperMetrics::ReportSimilarLanguageMatch(match); - return match; -} - -// static -bool TranslateHelper::MaybeServerWrongConfiguration( - const std::string& page_language, const std::string& cld_language) { - // If |page_language| is not "en-*", respect it and just return false here. - if (!StartsWithASCII(page_language, "en", false)) - return false; - - // A server provides a language meta information representing "en-*". But it - // might be just a default value due to missing user configuration. - // Let's trust |cld_language| if the determined language is not difficult to - // distinguish from English, and the language is one of well-known languages - // which often provide "en-*" meta information mistakenly. - for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { - if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) - return true; - } - return false; -} - -// static -bool TranslateHelper::CanCLDComplementSubCode( - const std::string& page_language, const std::string& cld_language) { - // Translate server cannot treat general Chinese. If Content-Language and - // CLD agree that the language is Chinese and Content-Language doesn't know - // which dialect is used, CLD language has priority. - // TODO(hajimehoshi): How about the other dialects like zh-MO? - return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false); -} - -// static -std::string TranslateHelper::DeterminePageLanguage(const std::string& code, - const std::string& html_lang, - const string16& contents, - std::string* cld_language_p, - bool* is_cld_reliable_p) { -#if defined(ENABLE_LANGUAGE_DETECTION) - base::TimeTicks begin_time = base::TimeTicks::Now(); - bool is_cld_reliable; - std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); - TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, - base::TimeTicks::Now()); - - if (cld_language_p != NULL) - *cld_language_p = cld_language; - if (is_cld_reliable_p != NULL) - *is_cld_reliable_p = is_cld_reliable; - TranslateUtil::ToTranslateLanguageSynonym(&cld_language); -#endif // defined(ENABLE_LANGUAGE_DETECTION) - - // Check if html lang attribute is valid. - std::string modified_html_lang; - if (!html_lang.empty()) { - modified_html_lang = html_lang; - ApplyLanguageCodeCorrection(&modified_html_lang); - TranslateHelperMetrics::ReportHtmlLang(html_lang, modified_html_lang); - VLOG(9) << "html lang based language code: " << modified_html_lang; - } - - // Check if Content-Language is valid. - std::string modified_code; - if (!code.empty()) { - modified_code = code; - ApplyLanguageCodeCorrection(&modified_code); - TranslateHelperMetrics::ReportContentLanguage(code, modified_code); - } - - // Adopt |modified_html_lang| if it is valid. Otherwise, adopt - // |modified_code|. - std::string language = modified_html_lang.empty() ? modified_code : - modified_html_lang; - -#if defined(ENABLE_LANGUAGE_DETECTION) - // If |language| is empty, just use CLD result even though it might be - // chrome::kUnknownLanguageCode. - if (language.empty()) { - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); - return cld_language; - } - - if (cld_language == chrome::kUnknownLanguageCode) { - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN); - return language; - } else if (IsSameOrSimilarLanguages(language, cld_language)) { - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE); - return language; - } else if (MaybeServerWrongConfiguration(language, cld_language)) { - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_TRUST_CLD); - return cld_language; - } else if (CanCLDComplementSubCode(language, cld_language)) { - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE); - return cld_language; - } else { - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE); - // Content-Language value might be wrong because CLD says that this page - // is written in another language with confidence. - // In this case, Chrome doesn't rely on any of the language codes, and - // gives up suggesting a translation. - return std::string(chrome::kUnknownLanguageCode); - } -#else // defined(ENABLE_LANGUAGE_DETECTION) - TranslateHelperMetrics::ReportLanguageVerification( - TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED); -#endif // defined(ENABLE_LANGUAGE_DETECTION) - - return language; -} // static bool TranslateHelper::IsTranslationAllowed(WebDocument* document) { @@ -589,11 +304,11 @@ void TranslateHelper::OnTranslatePage(int page_id, source_lang : kAutoDetectionLanguage; target_lang_ = target_lang; - TranslateHelperMetrics::ReportUserActionDuration(language_determined_time_, + TranslateCommonMetrics::ReportUserActionDuration(language_determined_time_, base::TimeTicks::Now()); GURL url(main_frame->document().url()); - TranslateHelperMetrics::ReportPageScheme(url.scheme()); + TranslateCommonMetrics::ReportPageScheme(url.scheme()); if (!IsTranslateLibAvailable()) { // Evaluate the script to add the translation related method to the global @@ -656,7 +371,7 @@ void TranslateHelper::CheckTranslateStatus() { translation_pending_ = false; // Check JavaScript performance counters for UMA reports. - TranslateHelperMetrics::ReportTimeToTranslate( + TranslateCommonMetrics::ReportTimeToTranslate( ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime")); // Notify the browser we are done. @@ -697,9 +412,9 @@ void TranslateHelper::TranslatePageImpl(int count) { // The library is loaded, and ready for translation now. // Check JavaScript performance counters for UMA reports. - TranslateHelperMetrics::ReportTimeToBeReady( + TranslateCommonMetrics::ReportTimeToBeReady( ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime")); - TranslateHelperMetrics::ReportTimeToLoad( + TranslateCommonMetrics::ReportTimeToLoad( ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime")); if (!StartTranslation()) { diff --git a/chrome/renderer/translate/translate_helper.h b/chrome/renderer/translate/translate_helper.h index 21fbcaa..3b7273f 100644 --- a/chrome/renderer/translate/translate_helper.h +++ b/chrome/renderer/translate/translate_helper.h @@ -87,7 +87,6 @@ class TranslateHelper : public content::RenderViewObserver { virtual double ExecuteScriptAndGetDoubleResult(const std::string& script); private: - FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, IsValidLanguageCode); FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, AdoptHtmlLang); FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, CLDAgreeWithLanguageCodeHavingCountryCode); @@ -101,55 +100,14 @@ class TranslateHelper : public content::RenderViewObserver { FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, SimilarLanguageCode); FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, WellKnownWrongConfiguration); - // Corrects language code if it contains well-known mistakes. - static void CorrectLanguageCodeTypo(std::string* code); - // Converts language code to the one used in server supporting list. static void ConvertLanguageCodeSynonym(std::string* code); - // Checks if the language code's format is valid. - static bool IsValidLanguageCode(const std::string& code); - - // Applies a series of language code modification in proper order. - static void ApplyLanguageCodeCorrection(std::string* code); - - // Checks if languages are matched, or similar. This function returns true - // against a language pair containing a language which is difficult for CLD - // to distinguish. - static bool IsSameOrSimilarLanguages(const std::string& page_language, - const std::string& cld_language); - - // Checks if languages pair is one of well-known pairs of wrong server - // configuration. - static bool MaybeServerWrongConfiguration(const std::string& page_language, - const std::string& cld_language); - - // Checks if CLD can complement a sub code when the page language doesn't - // know the sub code. - static bool CanCLDComplementSubCode(const std::string& page_language, - const std::string& cld_language); - - // Determines content page language from Content-Language code and contents. - static std::string DeterminePageLanguage(const std::string& code, - const std::string& html_lang, - const string16& contents, - std::string* cld_language, - bool* is_cld_reliable); - // Returns whether the page associated with |document| is a candidate for // translation. Some pages can explictly specify (via a meta-tag) that they // should not be translated. static bool IsTranslationAllowed(WebKit::WebDocument* document); -#if defined(ENABLE_LANGUAGE_DETECTION) - // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' - // if it failed. - // |is_cld_reliable| will be set as true if CLD says the detection is - // reliable. - static std::string DetermineTextLanguage(const string16& text, - bool* is_cld_reliable); -#endif - // RenderViewObserver implementation. virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE; diff --git a/chrome/renderer/translate/translate_helper_unittest.cc b/chrome/renderer/translate/translate_helper_unittest.cc deleted file mode 100644 index f1d2161..0000000 --- a/chrome/renderer/translate/translate_helper_unittest.cc +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright (c) 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/renderer/translate/translate_helper.h" - -#include "base/strings/utf_string_conversions.h" -#include "chrome/common/chrome_constants.h" -#include "testing/gtest/include/gtest/gtest.h" - -typedef testing::Test TranslateHelperTest; - -// Tests that well-known language code typos are fixed. -TEST_F(TranslateHelperTest, LanguageCodeTypoCorrection) { - std::string language; - - // Strip the second and later codes. - language = std::string("ja,en"); - TranslateHelper::CorrectLanguageCodeTypo(&language); - EXPECT_EQ("ja", language); - - // Replace dash with hyphen. - language = std::string("ja_JP"); - TranslateHelper::CorrectLanguageCodeTypo(&language); - EXPECT_EQ("ja-JP", language); - - // Correct wrong cases. - language = std::string("JA-jp"); - TranslateHelper::CorrectLanguageCodeTypo(&language); - EXPECT_EQ("ja-JP", language); -} - -// Tests if the language codes' format is invalid. -TEST_F(TranslateHelperTest, IsValidLanguageCode) { - std::string language; - - language = std::string("ja"); - EXPECT_TRUE(TranslateHelper::IsValidLanguageCode(language)); - - language = std::string("ja-JP"); - EXPECT_TRUE(TranslateHelper::IsValidLanguageCode(language)); - - language = std::string("ceb"); - EXPECT_TRUE(TranslateHelper::IsValidLanguageCode(language)); - - language = std::string("ceb-XX"); - EXPECT_TRUE(TranslateHelper::IsValidLanguageCode(language)); - - // Invalid because the sub code consists of a number. - language = std::string("utf-8"); - EXPECT_FALSE(TranslateHelper::IsValidLanguageCode(language)); - - // Invalid because of six characters after hyphen. - language = std::string("ja-YUKARI"); - EXPECT_FALSE(TranslateHelper::IsValidLanguageCode(language)); - - // Invalid because of four characters. - language = std::string("DHMO"); - EXPECT_FALSE(TranslateHelper::IsValidLanguageCode(language)); -} - -// Tests that similar language table works. -TEST_F(TranslateHelperTest, SimilarLanguageCode) { - EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("en", "en")); - EXPECT_FALSE(TranslateHelper::IsSameOrSimilarLanguages("en", "ja")); - EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("bs", "hr")); - EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("sr-ME", "sr")); - EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("ne", "hi")); - EXPECT_FALSE(TranslateHelper::IsSameOrSimilarLanguages("bs", "hi")); -} - -// Tests that well-known languages which often have wrong server configuration -// are handles. -TEST_F(TranslateHelperTest, WellKnownWrongConfiguration) { - EXPECT_TRUE(TranslateHelper::MaybeServerWrongConfiguration("en", "ja")); - EXPECT_TRUE(TranslateHelper::MaybeServerWrongConfiguration("en-US", "ja")); - EXPECT_TRUE(TranslateHelper::MaybeServerWrongConfiguration("en", "zh-CN")); - EXPECT_FALSE(TranslateHelper::MaybeServerWrongConfiguration("ja", "en")); - EXPECT_FALSE(TranslateHelper::MaybeServerWrongConfiguration("en", "he")); -} - -// Tests that the language meta tag providing wrong information is ignored by -// TranslateHelper due to disagreement between meta tag and CLD. -TEST_F(TranslateHelperTest, CLDDisagreeWithWrongLanguageCode) { - string16 contents = ASCIIToUTF16( - "<html><head><meta http-equiv='Content-Language' content='ja'></head>" - "<body>This is a page apparently written in English. Even though " - "content-language is provided, the value will be ignored if the value " - "is suspicious.</body></html>"); - std::string cld_language; - bool is_cld_reliable; - std::string language = - TranslateHelper::DeterminePageLanguage(std::string("ja"), std::string(), - contents, &cld_language, - &is_cld_reliable); - EXPECT_EQ(chrome::kUnknownLanguageCode, language); - EXPECT_EQ("en", cld_language); - EXPECT_TRUE(is_cld_reliable); -} - -// Tests that the language meta tag providing "en-US" style information is -// agreed by CLD. -TEST_F(TranslateHelperTest, CLDAgreeWithLanguageCodeHavingCountryCode) { - string16 contents = ASCIIToUTF16( - "<html><head><meta http-equiv='Content-Language' content='en-US'></head>" - "<body>This is a page apparently written in English. Even though " - "content-language is provided, the value will be ignored if the value " - "is suspicious.</body></html>"); - std::string cld_language; - bool is_cld_reliable; - std::string language = - TranslateHelper::DeterminePageLanguage(std::string("en-US"), - std::string(), contents, - &cld_language, &is_cld_reliable); - EXPECT_EQ("en-US", language); - EXPECT_EQ("en", cld_language); - EXPECT_TRUE(is_cld_reliable); -} - -// Tests that the language meta tag providing wrong information is ignored and -// CLD's language will be adopted by TranslateHelper due to an invalid meta tag. -TEST_F(TranslateHelperTest, InvalidLanguageMetaTagProviding) { - string16 contents = ASCIIToUTF16( - "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>" - "<body>This is a page apparently written in English. Even though " - "content-language is provided, the value will be ignored and CLD's" - " language will be adopted if the value is invalid.</body></html>"); - std::string cld_language; - bool is_cld_reliable; - std::string language = - TranslateHelper::DeterminePageLanguage(std::string("utf-8"), - std::string(), contents, - &cld_language, &is_cld_reliable); - EXPECT_EQ("en", language); - EXPECT_EQ("en", cld_language); - EXPECT_TRUE(is_cld_reliable); -} - -// Tests that the language meta tag providing wrong information is ignored -// because of valid html lang attribute. -TEST_F(TranslateHelperTest, AdoptHtmlLang) { - string16 contents = ASCIIToUTF16( - "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>" - "</head><body>This is a page apparently written in English. Even though " - "content-language is provided, the value will be ignored if the value " - "is suspicious.</body></html>"); - std::string cld_language; - bool is_cld_reliable; - std::string language = - TranslateHelper::DeterminePageLanguage(std::string("ja"), - std::string("en"), - contents, &cld_language, - &is_cld_reliable); - EXPECT_EQ("en", language); - EXPECT_EQ("en", cld_language); - EXPECT_TRUE(is_cld_reliable); -} |