summaryrefslogtreecommitdiffstats
path: root/chrome/renderer
diff options
context:
space:
mode:
authortoyoshim@chromium.org <toyoshim@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-06-05 17:02:41 +0000
committertoyoshim@chromium.org <toyoshim@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-06-05 17:02:41 +0000
commitf87d61f948a92935a0a636b61011a171c22bb38e (patch)
treea234db94d30df28605e47bb3393eb50afe1b2e16 /chrome/renderer
parenta7116a0a8fc31c3d4c297fb1587879565096a0a6 (diff)
downloadchromium_src-f87d61f948a92935a0a636b61011a171c22bb38e.zip
chromium_src-f87d61f948a92935a0a636b61011a171c22bb38e.tar.gz
chromium_src-f87d61f948a92935a0a636b61011a171c22bb38e.tar.bz2
Translate: CLD should not disagree similar languages
Currently, infobar doesn't appear if a page providing language meta information is different from a CLD determined language. But, some language pairs are difficult for CLD to distinguish correctly. This change make CLD not disagree on these similar languages. BUG=243763 TEST=unit_tests --gtest_filter='TranslateHelper*Test.*' Review URL: https://chromiumcodereview.appspot.com/16107005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@204280 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer')
-rw-r--r--chrome/renderer/translate/translate_helper.cc48
-rw-r--r--chrome/renderer/translate/translate_helper.h19
-rw-r--r--chrome/renderer/translate/translate_helper_metrics.cc7
-rw-r--r--chrome/renderer/translate/translate_helper_metrics.h5
-rw-r--r--chrome/renderer/translate/translate_helper_metrics_unittest.cc57
-rw-r--r--chrome/renderer/translate/translate_helper_unittest.cc8
6 files changed, 124 insertions, 20 deletions
diff --git a/chrome/renderer/translate/translate_helper.cc b/chrome/renderer/translate/translate_helper.cc
index d6eaf5d..d66afe7 100644
--- a/chrome/renderer/translate/translate_helper.cc
+++ b/chrome/renderer/translate/translate_helper.cc
@@ -67,6 +67,28 @@ const LanguageCodeSynonym kLanguageCodeSynonyms[] = {
{"tl", "fil"},
};
+// Similar language code list. Some languages are very similar and difficult
+// for CLD to distinguish.
+struct SimilarLanguageCode {
+ const char* const code;
+ int group;
+};
+
+const SimilarLanguageCode kSimilarLanguageCodes[] = {
+ {"bs", 1},
+ {"hr", 1},
+};
+
+// Checks |kSimilarLanguageCodes| and returns group code.
+int GetSimilarLanguageGroupCode(const std::string& language) {
+ for (size_t i = 0; i < arraysize(kSimilarLanguageCodes); ++i) {
+ if (language.find(kSimilarLanguageCodes[i].code) != 0)
+ continue;
+ return kSimilarLanguageCodes[i].group;
+ }
+ return 0;
+}
+
} // namespace
////////////////////////////////////////////////////////////////////////////////
@@ -345,6 +367,29 @@ void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) {
}
// static
+bool TranslateHelper::IsSameOrSimilarLanguages(
+ const std::string& page_language, const std::string& cld_language) {
+ // Language code part of |page_language| is matched to one of |cld_language|.
+ // Country code is ignored here.
+ if (page_language.size() >= 2 &&
+ cld_language.find(page_language.c_str(), 0, 2) == 0) {
+ // Languages are matched strictly. Reports false to metrics, but returns
+ // true.
+ TranslateHelperMetrics::ReportSimilarLanguageMatch(false);
+ return true;
+ }
+
+ // Check if |page_language| and |cld_language| are in the similar language
+ // list and belong to the same language group.
+ int page_code = GetSimilarLanguageGroupCode(page_language);
+ bool match = page_code != 0 &&
+ page_code == GetSimilarLanguageGroupCode(cld_language);
+
+ TranslateHelperMetrics::ReportSimilarLanguageMatch(match);
+ return match;
+}
+
+// static
std::string TranslateHelper::DeterminePageLanguage(const std::string& code,
const std::string& html_lang,
const string16& contents,
@@ -398,8 +443,7 @@ std::string TranslateHelper::DeterminePageLanguage(const std::string& code,
if (cld_language == chrome::kUnknownLanguageCode) {
TranslateHelperMetrics::ReportLanguageVerification(
TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN);
- } else if (language.size() >= 2 &&
- cld_language.find(language.c_str(), 0, 2) != 0) {
+ } else if (!IsSameOrSimilarLanguages(language, cld_language)) {
TranslateHelperMetrics::ReportLanguageVerification(
TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE);
// Content-Language value might be wrong because CLD says that this page
diff --git a/chrome/renderer/translate/translate_helper.h b/chrome/renderer/translate/translate_helper.h
index 4e2a16b..e983061 100644
--- a/chrome/renderer/translate/translate_helper.h
+++ b/chrome/renderer/translate/translate_helper.h
@@ -87,16 +87,17 @@ class TranslateHelper : public content::RenderViewObserver {
virtual double ExecuteScriptAndGetDoubleResult(const std::string& script);
private:
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeTypoCorrection);
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeSynonyms);
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, ResetInvalidLanguageCode);
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
- CLDDisagreeWithWrongLanguageCode);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, AdoptHtmlLang);
FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
CLDAgreeWithLanguageCodeHavingCountryCode);
FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
+ CLDDisagreeWithWrongLanguageCode);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
InvalidLanguageMetaTagProviding);
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, AdoptHtmlLang);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeTypoCorrection);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeSynonyms);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, ResetInvalidLanguageCode);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, SimilarLanguageCode);
// Corrects language code if it contains well-known mistakes.
static void CorrectLanguageCodeTypo(std::string* code);
@@ -110,6 +111,12 @@ class TranslateHelper : public content::RenderViewObserver {
// Applies a series of language code modification in proper order.
static void ApplyLanguageCodeCorrection(std::string* code);
+ // Checks if languages are matched, or similar. This function returns true
+ // against a language pair containing a language which is difficult for CLD
+ // to distinguish.
+ static bool IsSameOrSimilarLanguages(const std::string& page_language,
+ const std::string& cld_language);
+
// Determines content page language from Content-Language code and contents.
static std::string DeterminePageLanguage(const std::string& code,
const std::string& html_lang,
diff --git a/chrome/renderer/translate/translate_helper_metrics.cc b/chrome/renderer/translate/translate_helper_metrics.cc
index cf28339..93ce5b0 100644
--- a/chrome/renderer/translate/translate_helper_metrics.cc
+++ b/chrome/renderer/translate/translate_helper_metrics.cc
@@ -20,6 +20,7 @@ const char kTranslateTimeToLoad[] = "Translate.TimeToLoad";
const char kTranslateTimeToTranslate[] = "Translate.TimeToTranslate";
const char kTranslateUserActionDuration[] = "Translate.UserActionDuration";
const char kTranslatePageScheme[] = "Translate.PageScheme";
+const char kTranslateSimilarLanguageMatch[] = "Translate.SimilarLanguageMatch";
const char kSchemeHttp[] = "http";
const char kSchemeHttps[] = "https";
@@ -49,6 +50,8 @@ const MetricsEntry kMetricsEntries[] = {
kTranslateUserActionDuration },
{ TranslateHelperMetrics::UMA_PAGE_SCHEME,
kTranslatePageScheme },
+ { TranslateHelperMetrics::UMA_SIMILAR_LANGUAGE_MATCH,
+ kTranslateSimilarLanguageMatch },
};
COMPILE_ASSERT(arraysize(kMetricsEntries) == TranslateHelperMetrics::UMA_MAX,
@@ -127,6 +130,10 @@ void ReportLanguageDetectionTime(base::TimeTicks begin, base::TimeTicks end) {
#endif // defined(ENABLE_LANGUAGE_DETECTION)
+void ReportSimilarLanguageMatch(bool match) {
+ UMA_HISTOGRAM_BOOLEAN(kTranslateSimilarLanguageMatch, match);
+}
+
const char* GetMetricsName(MetricsNameIndex index) {
for (size_t i = 0; i < arraysize(kMetricsEntries); ++i) {
if (kMetricsEntries[i].index == index)
diff --git a/chrome/renderer/translate/translate_helper_metrics.h b/chrome/renderer/translate/translate_helper_metrics.h
index 74caf38..892f67a 100644
--- a/chrome/renderer/translate/translate_helper_metrics.h
+++ b/chrome/renderer/translate/translate_helper_metrics.h
@@ -23,6 +23,7 @@ enum MetricsNameIndex {
UMA_TIME_TO_TRANSLATE,
UMA_USER_ACTION_DURATION,
UMA_PAGE_SCHEME,
+ UMA_SIMILAR_LANGUAGE_MATCH,
UMA_MAX,
};
@@ -93,6 +94,10 @@ void ReportLanguageDetectionTime(base::TimeTicks begin, base::TimeTicks end);
#endif // defined(ENABLE_LANGUAGE_DETECTION)
+// Called when CLD agreed on a language which is different, but in the similar
+// language list.
+void ReportSimilarLanguageMatch(bool match);
+
// Gets UMA name for an entry specified by |index|.
const char* GetMetricsName(MetricsNameIndex index);
diff --git a/chrome/renderer/translate/translate_helper_metrics_unittest.cc b/chrome/renderer/translate/translate_helper_metrics_unittest.cc
index 5a503ecd..4434e24 100644
--- a/chrome/renderer/translate/translate_helper_metrics_unittest.cc
+++ b/chrome/renderer/translate/translate_helper_metrics_unittest.cc
@@ -20,6 +20,9 @@ using base::TimeTicks;
namespace {
+const int kTrue = 1;
+const int kFalse = 0;
+
class MetricsRecorder {
public:
explicit MetricsRecorder(const char* key)
@@ -42,11 +45,14 @@ class MetricsRecorder {
Snapshot();
EXPECT_EQ(expected_not_provided,
- GetCount(TranslateHelperMetrics::LANGUAGE_NOT_PROVIDED));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_NOT_PROVIDED));
EXPECT_EQ(expected_valid,
- GetCount(TranslateHelperMetrics::LANGUAGE_VALID));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VALID));
EXPECT_EQ(expected_invalid,
- GetCount(TranslateHelperMetrics::LANGUAGE_INVALID));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_INVALID));
}
void CheckLanguageVerification(int expected_cld_disabled,
@@ -61,19 +67,24 @@ class MetricsRecorder {
EXPECT_EQ(
expected_cld_disabled,
- GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED));
EXPECT_EQ(
expected_cld_only,
- GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY));
EXPECT_EQ(
expected_unknown,
- GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN));
EXPECT_EQ(
expected_cld_agree,
- GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE));
EXPECT_EQ(
expected_cld_disagree,
- GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE));
}
void CheckScheme(int expected_http, int expected_https, int expected_others) {
@@ -82,9 +93,12 @@ class MetricsRecorder {
Snapshot();
- EXPECT_EQ(expected_http, GetCount(TranslateHelperMetrics::SCHEME_HTTP));
- EXPECT_EQ(expected_https, GetCount(TranslateHelperMetrics::SCHEME_HTTPS));
- EXPECT_EQ(expected_others, GetCount(TranslateHelperMetrics::SCHEME_OTHERS));
+ EXPECT_EQ(expected_http,
+ GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_HTTP));
+ EXPECT_EQ(expected_https,
+ GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_HTTPS));
+ EXPECT_EQ(expected_others,
+ GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_OTHERS));
}
void CheckTotalCount(int count) {
@@ -108,6 +122,11 @@ class MetricsRecorder {
EXPECT_FALSE(true);
}
+ HistogramBase::Count GetCount(HistogramBase::Sample value) {
+ Snapshot();
+ return GetCountWithoutSnapshot(value);
+ }
+
private:
void Snapshot() {
HistogramBase* histogram = StatisticsRecorder::FindHistogram(key_);
@@ -116,7 +135,7 @@ class MetricsRecorder {
samples_ = histogram->SnapshotSamples();
}
- HistogramBase::Count GetCount(HistogramBase::Sample value) {
+ HistogramBase::Count GetCountWithoutSnapshot(HistogramBase::Sample value) {
if (!samples_.get())
return 0;
HistogramBase::Count count = samples_->GetCount(value);
@@ -241,6 +260,20 @@ TEST(TranslateHelperMetricsTest, ReportPageScheme) {
recorder.CheckScheme(1, 1, 1);
}
+TEST(TranslateHelperMetricsTest, ReportSimilarLanguageMatch) {
+ MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
+ TranslateHelperMetrics::UMA_SIMILAR_LANGUAGE_MATCH));
+ recorder.CheckTotalCount(0);
+ EXPECT_EQ(0, recorder.GetCount(kTrue));
+ EXPECT_EQ(0, recorder.GetCount(kFalse));
+ TranslateHelperMetrics::ReportSimilarLanguageMatch(true);
+ EXPECT_EQ(1, recorder.GetCount(kTrue));
+ EXPECT_EQ(0, recorder.GetCount(kFalse));
+ TranslateHelperMetrics::ReportSimilarLanguageMatch(false);
+ EXPECT_EQ(1, recorder.GetCount(kTrue));
+ EXPECT_EQ(1, recorder.GetCount(kFalse));
+}
+
#if defined(ENABLE_LANGUAGE_DETECTION)
TEST(TranslateHelperMetricsTest, ReportLanguageDetectionTime) {
diff --git a/chrome/renderer/translate/translate_helper_unittest.cc b/chrome/renderer/translate/translate_helper_unittest.cc
index f92a815..25fdf15 100644
--- a/chrome/renderer/translate/translate_helper_unittest.cc
+++ b/chrome/renderer/translate/translate_helper_unittest.cc
@@ -79,6 +79,14 @@ TEST_F(TranslateHelperTest, ResetInvalidLanguageCode) {
EXPECT_TRUE(language.empty());
}
+// Tests that similar language table works.
+TEST_F(TranslateHelperTest, SimilarLanguageCode) {
+ EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("en", "en"));
+ EXPECT_FALSE(TranslateHelper::IsSameOrSimilarLanguages("en", "ja"));
+ EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("bs", "hr"));
+ EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("sr-ME", "sr"));
+}
+
// Tests that the language meta tag providing wrong information is ignored by
// TranslateHelper due to disagreement between meta tag and CLD.
TEST_F(TranslateHelperTest, CLDDisagreeWithWrongLanguageCode) {