summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryuzo@chromium.org <yuzo@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-07-21 08:42:08 +0000
committeryuzo@chromium.org <yuzo@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-07-21 08:42:08 +0000
commit0426103d3c8a307b691bdf7752ce8bdf75c3e638 (patch)
tree643cfa100ad3d705575fd08c2178386df91a80ce
parent8b7ddc7fa369ba9bb74d784cc13f3eda98361246 (diff)
downloadchromium_src-0426103d3c8a307b691bdf7752ce8bdf75c3e638.zip
chromium_src-0426103d3c8a307b691bdf7752ce8bdf75c3e638.tar.gz
chromium_src-0426103d3c8a307b691bdf7752ce8bdf75c3e638.tar.bz2
Record language usage as UMA histograms.
Record accept languages and application language as UMA histograms on browser start-up. See also http://crosbug.com/17419 BUG=none TEST=run the unit tests Review URL: http://codereview.chromium.org/7348004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@93360 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/browser/browser_main.cc5
-rwxr-xr-xchrome/browser/language_usage_metrics.cc67
-rwxr-xr-xchrome/browser/language_usage_metrics.h48
-rw-r--r--chrome/browser/language_usage_metrics_unittest.cc91
-rw-r--r--chrome/chrome_browser.gypi3
-rw-r--r--chrome/chrome_tests.gypi1
6 files changed, 215 insertions, 0 deletions
diff --git a/chrome/browser/browser_main.cc b/chrome/browser/browser_main.cc
index 3a17cb0..d33e4a3 100644
--- a/chrome/browser/browser_main.cc
+++ b/chrome/browser/browser_main.cc
@@ -45,6 +45,7 @@
#include "chrome/browser/first_run/first_run_browser_process.h"
#include "chrome/browser/first_run/upgrade_util.h"
#include "chrome/browser/jankometer.h"
+#include "chrome/browser/language_usage_metrics.h"
#include "chrome/browser/metrics/field_trial_synchronizer.h"
#include "chrome/browser/metrics/histogram_synchronizer.h"
#include "chrome/browser/metrics/metrics_log.h"
@@ -1883,6 +1884,10 @@ int BrowserMain(const MainFunctionParams& parameters) {
HandleTestParameters(parsed_command_line);
RecordBreakpadStatusUMA(metrics);
about_flags::RecordUMAStatistics(local_state);
+ LanguageUsageMetrics::RecordAcceptLanguages(
+ profile->GetPrefs()->GetString(prefs::kAcceptLanguages));
+ LanguageUsageMetrics::RecordApplicationLanguage(
+ g_browser_process->GetApplicationLocale());
#if defined(OS_CHROMEOS)
metrics->StartExternalMetrics();
diff --git a/chrome/browser/language_usage_metrics.cc b/chrome/browser/language_usage_metrics.cc
new file mode 100755
index 0000000..b53c25f
--- /dev/null
+++ b/chrome/browser/language_usage_metrics.cc
@@ -0,0 +1,67 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/language_usage_metrics.h"
+
+#include <algorithm>
+
+#include "base/metrics/histogram.h"
+#include "base/string_tokenizer.h"
+#include "base/string_util.h"
+
+namespace {
+void RecordAcceptLanguage(Language language) {
+ UMA_HISTOGRAM_ENUMERATION("LanguageUsageMetrics.AcceptLanguage",
+ language, NUM_LANGUAGES);
+}
+} // namespace
+
+// static
+void LanguageUsageMetrics::RecordAcceptLanguages(
+ const std::string& accept_languages) {
+ // Rethink about the histogram memory costs when the number of languages
+ // becomes too large.
+ DCHECK_LE(NUM_LANGUAGES, 300);
+
+ std::set<Language> languages;
+ ParseAcceptLanguages(accept_languages, &languages);
+ std::for_each(languages.begin(), languages.end(), RecordAcceptLanguage);
+}
+
+// static
+void LanguageUsageMetrics::RecordApplicationLanguage(
+ const std::string& application_locale) {
+ const Language language = ToLanguage(application_locale);
+ if (language != UNKNOWN_LANGUAGE) {
+ UMA_HISTOGRAM_ENUMERATION("LanguageUsageMetrics.ApplicationLanguage",
+ language, NUM_LANGUAGES);
+ }
+}
+
+// static
+void LanguageUsageMetrics::ParseAcceptLanguages(
+ const std::string& accept_languages, std::set<Language>* languages) {
+ languages->clear();
+ StringTokenizer locales(accept_languages, ",");
+ while (locales.GetNext()) {
+ const Language language = ToLanguage(locales.token());
+ if (language != UNKNOWN_LANGUAGE) {
+ languages->insert(language);
+ }
+ }
+}
+
+// static
+Language LanguageUsageMetrics::ToLanguage(const std::string& locale) {
+ StringTokenizer parts(locale, "-_");
+ if (!parts.GetNext()) {
+ return UNKNOWN_LANGUAGE;
+ }
+ const std::string language_part = parts.token();
+ Language language;
+ if (!LanguageFromCode(language_part.c_str(), &language)) {
+ return UNKNOWN_LANGUAGE;
+ }
+ return language;
+}
diff --git a/chrome/browser/language_usage_metrics.h b/chrome/browser/language_usage_metrics.h
new file mode 100755
index 0000000..e5cc708
--- /dev/null
+++ b/chrome/browser/language_usage_metrics.h
@@ -0,0 +1,48 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_LANGUAGE_USAGE_METRICS_H_
+#define CHROME_BROWSER_LANGUAGE_USAGE_METRICS_H_
+#pragma once
+
+#include <set>
+#include <string>
+
+#include "base/gtest_prod_util.h"
+#include "third_party/cld/languages/public/languages.h"
+
+// Methods to record language usage as UMA histograms.
+// Language codes are defined in third_party/cld/languages/proto/languages.pb.h
+class LanguageUsageMetrics {
+ public:
+ // Records accept languages as a UMA histogram. |accept_languages| is a
+ // case-insensitive comma-separated list of languages/locales of either xx,
+ // xx-YY, or xx_YY format where xx is iso-639 language code and YY is iso-3166
+ // country code. Country code is ignored. That is, xx and XX-YY are considered
+ // identical and recorded once.
+ static void RecordAcceptLanguages(const std::string& accept_languages);
+
+ // Records the application language as a UMA histogram. |application_locale|
+ // is a case-insensitive locale string of either xx, xx-YY, or xx_YY format.
+ // Only the language part (xx in the example) is considered.
+ static void RecordApplicationLanguage(const std::string& application_locale);
+
+ private:
+ // This class must not be instantiated.
+ LanguageUsageMetrics();
+
+ // Parses |accept_languages| and returns a set of language codes in
+ // |languages|.
+ static void ParseAcceptLanguages(const std::string& accept_languages,
+ std::set<Language>* languages);
+
+ // Parses |locale| and returns the language code. Returns UNKNOWN_LANGUAGE in
+ // case of errors.
+ static Language ToLanguage(const std::string& locale);
+
+ FRIEND_TEST_ALL_PREFIXES(LanguageUsageMetricsTest, ParseAcceptLanguages);
+ FRIEND_TEST_ALL_PREFIXES(LanguageUsageMetricsTest, ToLanguage);
+};
+
+#endif // CHROME_BROWSER_LANGUAGE_USAGE_METRICS_H_
diff --git a/chrome/browser/language_usage_metrics_unittest.cc b/chrome/browser/language_usage_metrics_unittest.cc
new file mode 100644
index 0000000..db6e987
--- /dev/null
+++ b/chrome/browser/language_usage_metrics_unittest.cc
@@ -0,0 +1,91 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/language_usage_metrics.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(LanguageUsageMetricsTest, ParseAcceptLanguages) {
+ std::set<Language> language_set;
+ std::set<Language>::const_iterator it;
+
+ // Basic single language case.
+ LanguageUsageMetrics::ParseAcceptLanguages("ja", &language_set);
+ EXPECT_EQ(1U, language_set.size());
+ EXPECT_EQ(JAPANESE, *language_set.begin());
+
+ // Empty language.
+ LanguageUsageMetrics::ParseAcceptLanguages("", &language_set);
+ EXPECT_EQ(0U, language_set.size());
+
+ // Country code is ignored.
+ LanguageUsageMetrics::ParseAcceptLanguages("ja-JP", &language_set);
+ EXPECT_EQ(1U, language_set.size());
+ EXPECT_EQ(JAPANESE, *language_set.begin());
+
+ // Case is ignored.
+ LanguageUsageMetrics::ParseAcceptLanguages("Ja-jP", &language_set);
+ EXPECT_EQ(1U, language_set.size());
+ EXPECT_EQ(JAPANESE, *language_set.begin());
+
+ // Underscore as the separator.
+ LanguageUsageMetrics::ParseAcceptLanguages("ja_JP", &language_set);
+ EXPECT_EQ(1U, language_set.size());
+ EXPECT_EQ(JAPANESE, *language_set.begin());
+
+ // The result contains a same language code only once.
+ LanguageUsageMetrics::ParseAcceptLanguages("ja-JP,ja", &language_set);
+ EXPECT_EQ(1U, language_set.size());
+ EXPECT_EQ(JAPANESE, *language_set.begin());
+
+ // Basic two languages case.
+ LanguageUsageMetrics::ParseAcceptLanguages("en,ja", &language_set);
+ EXPECT_EQ(2U, language_set.size());
+ it = language_set.begin();
+ EXPECT_EQ(ENGLISH, *it);
+ EXPECT_EQ(JAPANESE, *++it);
+
+ // Multiple languages.
+ LanguageUsageMetrics::ParseAcceptLanguages("ja-JP,en,es,ja,en-US",
+ &language_set);
+ EXPECT_EQ(3U, language_set.size());
+ it = language_set.begin();
+ EXPECT_EQ(ENGLISH, *it);
+ EXPECT_EQ(JAPANESE, *++it);
+ EXPECT_EQ(SPANISH, *++it);
+
+ // Two empty languages.
+ LanguageUsageMetrics::ParseAcceptLanguages(",", &language_set);
+ EXPECT_EQ(0U, language_set.size());
+
+ // Trailing comma.
+ LanguageUsageMetrics::ParseAcceptLanguages("ja,", &language_set);
+ EXPECT_EQ(1U, language_set.size());
+ EXPECT_EQ(JAPANESE, *language_set.begin());
+
+ // Leading comma.
+ LanguageUsageMetrics::ParseAcceptLanguages(",es", &language_set);
+ EXPECT_EQ(1U, language_set.size());
+ EXPECT_EQ(SPANISH, *language_set.begin());
+
+ // Combination of invalid and valid.
+ LanguageUsageMetrics::ParseAcceptLanguages("zz,en", &language_set);
+ EXPECT_EQ(1U, language_set.size());
+ EXPECT_EQ(ENGLISH, *language_set.begin());
+}
+
+TEST(LanguageUsageMetricsTest, ToLanguage) {
+ // Basic case.
+ EXPECT_EQ(JAPANESE, LanguageUsageMetrics::ToLanguage("ja"));
+
+ // Case is ignored.
+ EXPECT_EQ(SPANISH, LanguageUsageMetrics::ToLanguage("Es"));
+
+ // Coutry code is ignored.
+ EXPECT_EQ(JAPANESE, LanguageUsageMetrics::ToLanguage("ja-JP"));
+
+ // Invalid locales are considered as unknown language.
+ EXPECT_EQ(UNKNOWN_LANGUAGE, LanguageUsageMetrics::ToLanguage(""));
+ EXPECT_EQ(UNKNOWN_LANGUAGE, LanguageUsageMetrics::ToLanguage("xx"));
+}
diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi
index 9a9c605..6227d54 100644
--- a/chrome/chrome_browser.gypi
+++ b/chrome/chrome_browser.gypi
@@ -37,6 +37,7 @@
'../printing/printing.gyp:printing',
'../skia/skia.gyp:skia',
'../third_party/bzip2/bzip2.gyp:bzip2',
+ '../third_party/cld/cld.gyp:cld',
'../third_party/expat/expat.gyp:expat',
'../third_party/hunspell/hunspell.gyp:hunspell',
'../third_party/icu/icu.gyp:icui18n',
@@ -1348,6 +1349,8 @@
'browser/keychain_mac.h',
'browser/language_combobox_model.cc',
'browser/language_combobox_model.h',
+ 'browser/language_usage_metrics.cc',
+ 'browser/language_usage_metrics.h',
'browser/language_order_table_model.cc',
'browser/language_order_table_model.h',
'browser/mac/authorization_util.h',
diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi
index 114a2f7..b9e7539 100644
--- a/chrome/chrome_tests.gypi
+++ b/chrome/chrome_tests.gypi
@@ -1467,6 +1467,7 @@
'browser/instant/instant_loader_manager_unittest.cc',
'browser/instant/promo_counter_unittest.cc',
'browser/internal_auth_unittest.cc',
+ 'browser/language_usage_metrics_unittest.cc',
'browser/mac/keystone_glue_unittest.mm',
'browser/media/media_internals_unittest.cc',
'browser/metrics/display_utils_unittest.cc',