diff options
author | toyoshim@chromium.org <toyoshim@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-06-05 17:02:41 +0000 |
---|---|---|
committer | toyoshim@chromium.org <toyoshim@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-06-05 17:02:41 +0000 |
commit | f87d61f948a92935a0a636b61011a171c22bb38e (patch) | |
tree | a234db94d30df28605e47bb3393eb50afe1b2e16 /chrome/renderer | |
parent | a7116a0a8fc31c3d4c297fb1587879565096a0a6 (diff) | |
download | chromium_src-f87d61f948a92935a0a636b61011a171c22bb38e.zip chromium_src-f87d61f948a92935a0a636b61011a171c22bb38e.tar.gz chromium_src-f87d61f948a92935a0a636b61011a171c22bb38e.tar.bz2 |
Translate: CLD should not disagree similar languages
Currently, infobar doesn't appear if a page providing language meta
information is different from a CLD determined language.
But, some language pairs are difficult for CLD to distinguish correctly.
This change make CLD not disagree on these similar languages.
BUG=243763
TEST=unit_tests --gtest_filter='TranslateHelper*Test.*'
Review URL: https://chromiumcodereview.appspot.com/16107005
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@204280 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer')
6 files changed, 124 insertions, 20 deletions
diff --git a/chrome/renderer/translate/translate_helper.cc b/chrome/renderer/translate/translate_helper.cc index d6eaf5d..d66afe7 100644 --- a/chrome/renderer/translate/translate_helper.cc +++ b/chrome/renderer/translate/translate_helper.cc @@ -67,6 +67,28 @@ const LanguageCodeSynonym kLanguageCodeSynonyms[] = { {"tl", "fil"}, }; +// Similar language code list. Some languages are very similar and difficult +// for CLD to distinguish. +struct SimilarLanguageCode { + const char* const code; + int group; +}; + +const SimilarLanguageCode kSimilarLanguageCodes[] = { + {"bs", 1}, + {"hr", 1}, +}; + +// Checks |kSimilarLanguageCodes| and returns group code. +int GetSimilarLanguageGroupCode(const std::string& language) { + for (size_t i = 0; i < arraysize(kSimilarLanguageCodes); ++i) { + if (language.find(kSimilarLanguageCodes[i].code) != 0) + continue; + return kSimilarLanguageCodes[i].group; + } + return 0; +} + } // namespace //////////////////////////////////////////////////////////////////////////////// @@ -345,6 +367,29 @@ void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) { } // static +bool TranslateHelper::IsSameOrSimilarLanguages( + const std::string& page_language, const std::string& cld_language) { + // Language code part of |page_language| is matched to one of |cld_language|. + // Country code is ignored here. + if (page_language.size() >= 2 && + cld_language.find(page_language.c_str(), 0, 2) == 0) { + // Languages are matched strictly. Reports false to metrics, but returns + // true. + TranslateHelperMetrics::ReportSimilarLanguageMatch(false); + return true; + } + + // Check if |page_language| and |cld_language| are in the similar language + // list and belong to the same language group. + int page_code = GetSimilarLanguageGroupCode(page_language); + bool match = page_code != 0 && + page_code == GetSimilarLanguageGroupCode(cld_language); + + TranslateHelperMetrics::ReportSimilarLanguageMatch(match); + return match; +} + +// static std::string TranslateHelper::DeterminePageLanguage(const std::string& code, const std::string& html_lang, const string16& contents, @@ -398,8 +443,7 @@ std::string TranslateHelper::DeterminePageLanguage(const std::string& code, if (cld_language == chrome::kUnknownLanguageCode) { TranslateHelperMetrics::ReportLanguageVerification( TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN); - } else if (language.size() >= 2 && - cld_language.find(language.c_str(), 0, 2) != 0) { + } else if (!IsSameOrSimilarLanguages(language, cld_language)) { TranslateHelperMetrics::ReportLanguageVerification( TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE); // Content-Language value might be wrong because CLD says that this page diff --git a/chrome/renderer/translate/translate_helper.h b/chrome/renderer/translate/translate_helper.h index 4e2a16b..e983061 100644 --- a/chrome/renderer/translate/translate_helper.h +++ b/chrome/renderer/translate/translate_helper.h @@ -87,16 +87,17 @@ class TranslateHelper : public content::RenderViewObserver { virtual double ExecuteScriptAndGetDoubleResult(const std::string& script); private: - FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeTypoCorrection); - FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeSynonyms); - FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, ResetInvalidLanguageCode); - FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, - CLDDisagreeWithWrongLanguageCode); + FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, AdoptHtmlLang); FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, CLDAgreeWithLanguageCodeHavingCountryCode); FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, + CLDDisagreeWithWrongLanguageCode); + FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, InvalidLanguageMetaTagProviding); - FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, AdoptHtmlLang); + FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeTypoCorrection); + FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeSynonyms); + FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, ResetInvalidLanguageCode); + FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, SimilarLanguageCode); // Corrects language code if it contains well-known mistakes. static void CorrectLanguageCodeTypo(std::string* code); @@ -110,6 +111,12 @@ class TranslateHelper : public content::RenderViewObserver { // Applies a series of language code modification in proper order. static void ApplyLanguageCodeCorrection(std::string* code); + // Checks if languages are matched, or similar. This function returns true + // against a language pair containing a language which is difficult for CLD + // to distinguish. + static bool IsSameOrSimilarLanguages(const std::string& page_language, + const std::string& cld_language); + // Determines content page language from Content-Language code and contents. static std::string DeterminePageLanguage(const std::string& code, const std::string& html_lang, diff --git a/chrome/renderer/translate/translate_helper_metrics.cc b/chrome/renderer/translate/translate_helper_metrics.cc index cf28339..93ce5b0 100644 --- a/chrome/renderer/translate/translate_helper_metrics.cc +++ b/chrome/renderer/translate/translate_helper_metrics.cc @@ -20,6 +20,7 @@ const char kTranslateTimeToLoad[] = "Translate.TimeToLoad"; const char kTranslateTimeToTranslate[] = "Translate.TimeToTranslate"; const char kTranslateUserActionDuration[] = "Translate.UserActionDuration"; const char kTranslatePageScheme[] = "Translate.PageScheme"; +const char kTranslateSimilarLanguageMatch[] = "Translate.SimilarLanguageMatch"; const char kSchemeHttp[] = "http"; const char kSchemeHttps[] = "https"; @@ -49,6 +50,8 @@ const MetricsEntry kMetricsEntries[] = { kTranslateUserActionDuration }, { TranslateHelperMetrics::UMA_PAGE_SCHEME, kTranslatePageScheme }, + { TranslateHelperMetrics::UMA_SIMILAR_LANGUAGE_MATCH, + kTranslateSimilarLanguageMatch }, }; COMPILE_ASSERT(arraysize(kMetricsEntries) == TranslateHelperMetrics::UMA_MAX, @@ -127,6 +130,10 @@ void ReportLanguageDetectionTime(base::TimeTicks begin, base::TimeTicks end) { #endif // defined(ENABLE_LANGUAGE_DETECTION) +void ReportSimilarLanguageMatch(bool match) { + UMA_HISTOGRAM_BOOLEAN(kTranslateSimilarLanguageMatch, match); +} + const char* GetMetricsName(MetricsNameIndex index) { for (size_t i = 0; i < arraysize(kMetricsEntries); ++i) { if (kMetricsEntries[i].index == index) diff --git a/chrome/renderer/translate/translate_helper_metrics.h b/chrome/renderer/translate/translate_helper_metrics.h index 74caf38..892f67a 100644 --- a/chrome/renderer/translate/translate_helper_metrics.h +++ b/chrome/renderer/translate/translate_helper_metrics.h @@ -23,6 +23,7 @@ enum MetricsNameIndex { UMA_TIME_TO_TRANSLATE, UMA_USER_ACTION_DURATION, UMA_PAGE_SCHEME, + UMA_SIMILAR_LANGUAGE_MATCH, UMA_MAX, }; @@ -93,6 +94,10 @@ void ReportLanguageDetectionTime(base::TimeTicks begin, base::TimeTicks end); #endif // defined(ENABLE_LANGUAGE_DETECTION) +// Called when CLD agreed on a language which is different, but in the similar +// language list. +void ReportSimilarLanguageMatch(bool match); + // Gets UMA name for an entry specified by |index|. const char* GetMetricsName(MetricsNameIndex index); diff --git a/chrome/renderer/translate/translate_helper_metrics_unittest.cc b/chrome/renderer/translate/translate_helper_metrics_unittest.cc index 5a503ecd..4434e24 100644 --- a/chrome/renderer/translate/translate_helper_metrics_unittest.cc +++ b/chrome/renderer/translate/translate_helper_metrics_unittest.cc @@ -20,6 +20,9 @@ using base::TimeTicks; namespace { +const int kTrue = 1; +const int kFalse = 0; + class MetricsRecorder { public: explicit MetricsRecorder(const char* key) @@ -42,11 +45,14 @@ class MetricsRecorder { Snapshot(); EXPECT_EQ(expected_not_provided, - GetCount(TranslateHelperMetrics::LANGUAGE_NOT_PROVIDED)); + GetCountWithoutSnapshot( + TranslateHelperMetrics::LANGUAGE_NOT_PROVIDED)); EXPECT_EQ(expected_valid, - GetCount(TranslateHelperMetrics::LANGUAGE_VALID)); + GetCountWithoutSnapshot( + TranslateHelperMetrics::LANGUAGE_VALID)); EXPECT_EQ(expected_invalid, - GetCount(TranslateHelperMetrics::LANGUAGE_INVALID)); + GetCountWithoutSnapshot( + TranslateHelperMetrics::LANGUAGE_INVALID)); } void CheckLanguageVerification(int expected_cld_disabled, @@ -61,19 +67,24 @@ class MetricsRecorder { EXPECT_EQ( expected_cld_disabled, - GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED)); + GetCountWithoutSnapshot( + TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED)); EXPECT_EQ( expected_cld_only, - GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY)); + GetCountWithoutSnapshot( + TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY)); EXPECT_EQ( expected_unknown, - GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN)); + GetCountWithoutSnapshot( + TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN)); EXPECT_EQ( expected_cld_agree, - GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE)); + GetCountWithoutSnapshot( + TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE)); EXPECT_EQ( expected_cld_disagree, - GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE)); + GetCountWithoutSnapshot( + TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE)); } void CheckScheme(int expected_http, int expected_https, int expected_others) { @@ -82,9 +93,12 @@ class MetricsRecorder { Snapshot(); - EXPECT_EQ(expected_http, GetCount(TranslateHelperMetrics::SCHEME_HTTP)); - EXPECT_EQ(expected_https, GetCount(TranslateHelperMetrics::SCHEME_HTTPS)); - EXPECT_EQ(expected_others, GetCount(TranslateHelperMetrics::SCHEME_OTHERS)); + EXPECT_EQ(expected_http, + GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_HTTP)); + EXPECT_EQ(expected_https, + GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_HTTPS)); + EXPECT_EQ(expected_others, + GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_OTHERS)); } void CheckTotalCount(int count) { @@ -108,6 +122,11 @@ class MetricsRecorder { EXPECT_FALSE(true); } + HistogramBase::Count GetCount(HistogramBase::Sample value) { + Snapshot(); + return GetCountWithoutSnapshot(value); + } + private: void Snapshot() { HistogramBase* histogram = StatisticsRecorder::FindHistogram(key_); @@ -116,7 +135,7 @@ class MetricsRecorder { samples_ = histogram->SnapshotSamples(); } - HistogramBase::Count GetCount(HistogramBase::Sample value) { + HistogramBase::Count GetCountWithoutSnapshot(HistogramBase::Sample value) { if (!samples_.get()) return 0; HistogramBase::Count count = samples_->GetCount(value); @@ -241,6 +260,20 @@ TEST(TranslateHelperMetricsTest, ReportPageScheme) { recorder.CheckScheme(1, 1, 1); } +TEST(TranslateHelperMetricsTest, ReportSimilarLanguageMatch) { + MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName( + TranslateHelperMetrics::UMA_SIMILAR_LANGUAGE_MATCH)); + recorder.CheckTotalCount(0); + EXPECT_EQ(0, recorder.GetCount(kTrue)); + EXPECT_EQ(0, recorder.GetCount(kFalse)); + TranslateHelperMetrics::ReportSimilarLanguageMatch(true); + EXPECT_EQ(1, recorder.GetCount(kTrue)); + EXPECT_EQ(0, recorder.GetCount(kFalse)); + TranslateHelperMetrics::ReportSimilarLanguageMatch(false); + EXPECT_EQ(1, recorder.GetCount(kTrue)); + EXPECT_EQ(1, recorder.GetCount(kFalse)); +} + #if defined(ENABLE_LANGUAGE_DETECTION) TEST(TranslateHelperMetricsTest, ReportLanguageDetectionTime) { diff --git a/chrome/renderer/translate/translate_helper_unittest.cc b/chrome/renderer/translate/translate_helper_unittest.cc index f92a815..25fdf15 100644 --- a/chrome/renderer/translate/translate_helper_unittest.cc +++ b/chrome/renderer/translate/translate_helper_unittest.cc @@ -79,6 +79,14 @@ TEST_F(TranslateHelperTest, ResetInvalidLanguageCode) { EXPECT_TRUE(language.empty()); } +// Tests that similar language table works. +TEST_F(TranslateHelperTest, SimilarLanguageCode) { + EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("en", "en")); + EXPECT_FALSE(TranslateHelper::IsSameOrSimilarLanguages("en", "ja")); + EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("bs", "hr")); + EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("sr-ME", "sr")); +} + // Tests that the language meta tag providing wrong information is ignored by // TranslateHelper due to disagreement between meta tag and CLD. TEST_F(TranslateHelperTest, CLDDisagreeWithWrongLanguageCode) { |