summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--chrome/renderer/translate/translate_helper.cc48
-rw-r--r--chrome/renderer/translate/translate_helper.h19
-rw-r--r--chrome/renderer/translate/translate_helper_metrics.cc7
-rw-r--r--chrome/renderer/translate/translate_helper_metrics.h5
-rw-r--r--chrome/renderer/translate/translate_helper_metrics_unittest.cc57
-rw-r--r--chrome/renderer/translate/translate_helper_unittest.cc8
-rw-r--r--tools/metrics/histograms/histograms.xml16
7 files changed, 140 insertions, 20 deletions
diff --git a/chrome/renderer/translate/translate_helper.cc b/chrome/renderer/translate/translate_helper.cc
index d6eaf5d..d66afe7 100644
--- a/chrome/renderer/translate/translate_helper.cc
+++ b/chrome/renderer/translate/translate_helper.cc
@@ -67,6 +67,28 @@ const LanguageCodeSynonym kLanguageCodeSynonyms[] = {
{"tl", "fil"},
};
+// Similar language code list. Some languages are very similar and difficult
+// for CLD to distinguish.
+struct SimilarLanguageCode {
+ const char* const code;
+ int group;
+};
+
+const SimilarLanguageCode kSimilarLanguageCodes[] = {
+ {"bs", 1},
+ {"hr", 1},
+};
+
+// Checks |kSimilarLanguageCodes| and returns group code.
+int GetSimilarLanguageGroupCode(const std::string& language) {
+ for (size_t i = 0; i < arraysize(kSimilarLanguageCodes); ++i) {
+ if (language.find(kSimilarLanguageCodes[i].code) != 0)
+ continue;
+ return kSimilarLanguageCodes[i].group;
+ }
+ return 0;
+}
+
} // namespace
////////////////////////////////////////////////////////////////////////////////
@@ -345,6 +367,29 @@ void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) {
}
// static
+bool TranslateHelper::IsSameOrSimilarLanguages(
+ const std::string& page_language, const std::string& cld_language) {
+ // Language code part of |page_language| is matched to one of |cld_language|.
+ // Country code is ignored here.
+ if (page_language.size() >= 2 &&
+ cld_language.find(page_language.c_str(), 0, 2) == 0) {
+ // Languages are matched strictly. Reports false to metrics, but returns
+ // true.
+ TranslateHelperMetrics::ReportSimilarLanguageMatch(false);
+ return true;
+ }
+
+ // Check if |page_language| and |cld_language| are in the similar language
+ // list and belong to the same language group.
+ int page_code = GetSimilarLanguageGroupCode(page_language);
+ bool match = page_code != 0 &&
+ page_code == GetSimilarLanguageGroupCode(cld_language);
+
+ TranslateHelperMetrics::ReportSimilarLanguageMatch(match);
+ return match;
+}
+
+// static
std::string TranslateHelper::DeterminePageLanguage(const std::string& code,
const std::string& html_lang,
const string16& contents,
@@ -398,8 +443,7 @@ std::string TranslateHelper::DeterminePageLanguage(const std::string& code,
if (cld_language == chrome::kUnknownLanguageCode) {
TranslateHelperMetrics::ReportLanguageVerification(
TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN);
- } else if (language.size() >= 2 &&
- cld_language.find(language.c_str(), 0, 2) != 0) {
+ } else if (!IsSameOrSimilarLanguages(language, cld_language)) {
TranslateHelperMetrics::ReportLanguageVerification(
TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE);
// Content-Language value might be wrong because CLD says that this page
diff --git a/chrome/renderer/translate/translate_helper.h b/chrome/renderer/translate/translate_helper.h
index 4e2a16b..e983061 100644
--- a/chrome/renderer/translate/translate_helper.h
+++ b/chrome/renderer/translate/translate_helper.h
@@ -87,16 +87,17 @@ class TranslateHelper : public content::RenderViewObserver {
virtual double ExecuteScriptAndGetDoubleResult(const std::string& script);
private:
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeTypoCorrection);
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeSynonyms);
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, ResetInvalidLanguageCode);
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
- CLDDisagreeWithWrongLanguageCode);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, AdoptHtmlLang);
FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
CLDAgreeWithLanguageCodeHavingCountryCode);
FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
+ CLDDisagreeWithWrongLanguageCode);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
InvalidLanguageMetaTagProviding);
- FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, AdoptHtmlLang);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeTypoCorrection);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeSynonyms);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, ResetInvalidLanguageCode);
+ FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, SimilarLanguageCode);
// Corrects language code if it contains well-known mistakes.
static void CorrectLanguageCodeTypo(std::string* code);
@@ -110,6 +111,12 @@ class TranslateHelper : public content::RenderViewObserver {
// Applies a series of language code modification in proper order.
static void ApplyLanguageCodeCorrection(std::string* code);
+ // Checks if languages are matched, or similar. This function returns true
+ // against a language pair containing a language which is difficult for CLD
+ // to distinguish.
+ static bool IsSameOrSimilarLanguages(const std::string& page_language,
+ const std::string& cld_language);
+
// Determines content page language from Content-Language code and contents.
static std::string DeterminePageLanguage(const std::string& code,
const std::string& html_lang,
diff --git a/chrome/renderer/translate/translate_helper_metrics.cc b/chrome/renderer/translate/translate_helper_metrics.cc
index cf28339..93ce5b0 100644
--- a/chrome/renderer/translate/translate_helper_metrics.cc
+++ b/chrome/renderer/translate/translate_helper_metrics.cc
@@ -20,6 +20,7 @@ const char kTranslateTimeToLoad[] = "Translate.TimeToLoad";
const char kTranslateTimeToTranslate[] = "Translate.TimeToTranslate";
const char kTranslateUserActionDuration[] = "Translate.UserActionDuration";
const char kTranslatePageScheme[] = "Translate.PageScheme";
+const char kTranslateSimilarLanguageMatch[] = "Translate.SimilarLanguageMatch";
const char kSchemeHttp[] = "http";
const char kSchemeHttps[] = "https";
@@ -49,6 +50,8 @@ const MetricsEntry kMetricsEntries[] = {
kTranslateUserActionDuration },
{ TranslateHelperMetrics::UMA_PAGE_SCHEME,
kTranslatePageScheme },
+ { TranslateHelperMetrics::UMA_SIMILAR_LANGUAGE_MATCH,
+ kTranslateSimilarLanguageMatch },
};
COMPILE_ASSERT(arraysize(kMetricsEntries) == TranslateHelperMetrics::UMA_MAX,
@@ -127,6 +130,10 @@ void ReportLanguageDetectionTime(base::TimeTicks begin, base::TimeTicks end) {
#endif // defined(ENABLE_LANGUAGE_DETECTION)
+void ReportSimilarLanguageMatch(bool match) {
+ UMA_HISTOGRAM_BOOLEAN(kTranslateSimilarLanguageMatch, match);
+}
+
const char* GetMetricsName(MetricsNameIndex index) {
for (size_t i = 0; i < arraysize(kMetricsEntries); ++i) {
if (kMetricsEntries[i].index == index)
diff --git a/chrome/renderer/translate/translate_helper_metrics.h b/chrome/renderer/translate/translate_helper_metrics.h
index 74caf38..892f67a 100644
--- a/chrome/renderer/translate/translate_helper_metrics.h
+++ b/chrome/renderer/translate/translate_helper_metrics.h
@@ -23,6 +23,7 @@ enum MetricsNameIndex {
UMA_TIME_TO_TRANSLATE,
UMA_USER_ACTION_DURATION,
UMA_PAGE_SCHEME,
+ UMA_SIMILAR_LANGUAGE_MATCH,
UMA_MAX,
};
@@ -93,6 +94,10 @@ void ReportLanguageDetectionTime(base::TimeTicks begin, base::TimeTicks end);
#endif // defined(ENABLE_LANGUAGE_DETECTION)
+// Called when CLD agreed on a language which is different, but in the similar
+// language list.
+void ReportSimilarLanguageMatch(bool match);
+
// Gets UMA name for an entry specified by |index|.
const char* GetMetricsName(MetricsNameIndex index);
diff --git a/chrome/renderer/translate/translate_helper_metrics_unittest.cc b/chrome/renderer/translate/translate_helper_metrics_unittest.cc
index 5a503ecd..4434e24 100644
--- a/chrome/renderer/translate/translate_helper_metrics_unittest.cc
+++ b/chrome/renderer/translate/translate_helper_metrics_unittest.cc
@@ -20,6 +20,9 @@ using base::TimeTicks;
namespace {
+const int kTrue = 1;
+const int kFalse = 0;
+
class MetricsRecorder {
public:
explicit MetricsRecorder(const char* key)
@@ -42,11 +45,14 @@ class MetricsRecorder {
Snapshot();
EXPECT_EQ(expected_not_provided,
- GetCount(TranslateHelperMetrics::LANGUAGE_NOT_PROVIDED));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_NOT_PROVIDED));
EXPECT_EQ(expected_valid,
- GetCount(TranslateHelperMetrics::LANGUAGE_VALID));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VALID));
EXPECT_EQ(expected_invalid,
- GetCount(TranslateHelperMetrics::LANGUAGE_INVALID));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_INVALID));
}
void CheckLanguageVerification(int expected_cld_disabled,
@@ -61,19 +67,24 @@ class MetricsRecorder {
EXPECT_EQ(
expected_cld_disabled,
- GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED));
EXPECT_EQ(
expected_cld_only,
- GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY));
EXPECT_EQ(
expected_unknown,
- GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN));
EXPECT_EQ(
expected_cld_agree,
- GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE));
EXPECT_EQ(
expected_cld_disagree,
- GetCount(TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE));
+ GetCountWithoutSnapshot(
+ TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE));
}
void CheckScheme(int expected_http, int expected_https, int expected_others) {
@@ -82,9 +93,12 @@ class MetricsRecorder {
Snapshot();
- EXPECT_EQ(expected_http, GetCount(TranslateHelperMetrics::SCHEME_HTTP));
- EXPECT_EQ(expected_https, GetCount(TranslateHelperMetrics::SCHEME_HTTPS));
- EXPECT_EQ(expected_others, GetCount(TranslateHelperMetrics::SCHEME_OTHERS));
+ EXPECT_EQ(expected_http,
+ GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_HTTP));
+ EXPECT_EQ(expected_https,
+ GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_HTTPS));
+ EXPECT_EQ(expected_others,
+ GetCountWithoutSnapshot(TranslateHelperMetrics::SCHEME_OTHERS));
}
void CheckTotalCount(int count) {
@@ -108,6 +122,11 @@ class MetricsRecorder {
EXPECT_FALSE(true);
}
+ HistogramBase::Count GetCount(HistogramBase::Sample value) {
+ Snapshot();
+ return GetCountWithoutSnapshot(value);
+ }
+
private:
void Snapshot() {
HistogramBase* histogram = StatisticsRecorder::FindHistogram(key_);
@@ -116,7 +135,7 @@ class MetricsRecorder {
samples_ = histogram->SnapshotSamples();
}
- HistogramBase::Count GetCount(HistogramBase::Sample value) {
+ HistogramBase::Count GetCountWithoutSnapshot(HistogramBase::Sample value) {
if (!samples_.get())
return 0;
HistogramBase::Count count = samples_->GetCount(value);
@@ -241,6 +260,20 @@ TEST(TranslateHelperMetricsTest, ReportPageScheme) {
recorder.CheckScheme(1, 1, 1);
}
+TEST(TranslateHelperMetricsTest, ReportSimilarLanguageMatch) {
+ MetricsRecorder recorder(TranslateHelperMetrics::GetMetricsName(
+ TranslateHelperMetrics::UMA_SIMILAR_LANGUAGE_MATCH));
+ recorder.CheckTotalCount(0);
+ EXPECT_EQ(0, recorder.GetCount(kTrue));
+ EXPECT_EQ(0, recorder.GetCount(kFalse));
+ TranslateHelperMetrics::ReportSimilarLanguageMatch(true);
+ EXPECT_EQ(1, recorder.GetCount(kTrue));
+ EXPECT_EQ(0, recorder.GetCount(kFalse));
+ TranslateHelperMetrics::ReportSimilarLanguageMatch(false);
+ EXPECT_EQ(1, recorder.GetCount(kTrue));
+ EXPECT_EQ(1, recorder.GetCount(kFalse));
+}
+
#if defined(ENABLE_LANGUAGE_DETECTION)
TEST(TranslateHelperMetricsTest, ReportLanguageDetectionTime) {
diff --git a/chrome/renderer/translate/translate_helper_unittest.cc b/chrome/renderer/translate/translate_helper_unittest.cc
index f92a815..25fdf15 100644
--- a/chrome/renderer/translate/translate_helper_unittest.cc
+++ b/chrome/renderer/translate/translate_helper_unittest.cc
@@ -79,6 +79,14 @@ TEST_F(TranslateHelperTest, ResetInvalidLanguageCode) {
EXPECT_TRUE(language.empty());
}
+// Tests that similar language table works.
+TEST_F(TranslateHelperTest, SimilarLanguageCode) {
+ EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("en", "en"));
+ EXPECT_FALSE(TranslateHelper::IsSameOrSimilarLanguages("en", "ja"));
+ EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("bs", "hr"));
+ EXPECT_TRUE(TranslateHelper::IsSameOrSimilarLanguages("sr-ME", "sr"));
+}
+
// Tests that the language meta tag providing wrong information is ignored by
// TranslateHelper due to disagreement between meta tag and CLD.
TEST_F(TranslateHelperTest, CLDDisagreeWithWrongLanguageCode) {
diff --git a/tools/metrics/histograms/histograms.xml b/tools/metrics/histograms/histograms.xml
index 65479db..af84661 100644
--- a/tools/metrics/histograms/histograms.xml
+++ b/tools/metrics/histograms/histograms.xml
@@ -9748,6 +9748,17 @@ other types of suffix sets.
</summary>
</histogram>
+<histogram name="Translate.SimilarLanguageMatch" enum="BooleanMatched">
+ <summary>
+ This metrics is logged whenever a page is loaded. The logged value is
+ &quot;Mathced&quot; when the CLD-detected language differs from the page
+ language code , and the two languages are such similar languages. In that
+ case, Chrome ignore the CLD-determined language and instead uses the page
+ language code. The page language code is decided by Content-Language and
+ HTML lang attribute.
+ </summary>
+</histogram>
+
<histogram name="Translate.TimeToBeReady" units="milliseconds">
<summary>
The time from injecting scripts for Chrome Translate to being ready to
@@ -10733,6 +10744,11 @@ other types of suffix sets.
<int value="1" label="HTTPS"/>
</enum>
+<enum name="BooleanMatched" type="int">
+ <int value="0" label="Not matched"/>
+ <int value="1" label="Matched"/>
+</enum>
+
<enum name="BooleanOrphan" type="int">
<int value="0" label="Non-orphan"/>
<int value="1" label="Orphan"/>