summaryrefslogtreecommitdiffstats
path: root/app
diff options
context:
space:
mode:
authordmazzoni@chromium.org <dmazzoni@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-12-13 16:19:21 +0000
committerdmazzoni@chromium.org <dmazzoni@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-12-13 16:19:21 +0000
commit4db67943718c58094fc7e1a9e6ab8182d74badd6 (patch)
treeef8112377604af2c5b7cb3f3252b101b555704b9 /app
parent9d0bc41806fd2534b87b61cc09fb80f9b03ca99f (diff)
downloadchromium_src-4db67943718c58094fc7e1a9e6ab8182d74badd6.zip
chromium_src-4db67943718c58094fc7e1a9e6ab8182d74badd6.tar.gz
chromium_src-4db67943718c58094fc7e1a9e6ab8182d74badd6.tar.bz2
Add utility function to determine if a locale is valid syntax; this will
be used by the TTS extension API. Moved some locale utility functions from extension_l10n_util to l10n_util. BUG=none TEST=Adds new unit test Review URL: http://codereview.chromium.org/5643002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@69004 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'app')
-rw-r--r--app/l10n_util.cc92
-rw-r--r--app/l10n_util.h19
-rw-r--r--app/l10n_util_unittest.cc66
3 files changed, 177 insertions, 0 deletions
diff --git a/app/l10n_util.cc b/app/l10n_util.cc
index 148151d..4d6b8b9 100644
--- a/app/l10n_util.cc
+++ b/app/l10n_util.cc
@@ -29,6 +29,7 @@
#include "build/build_config.h"
#include "gfx/canvas.h"
#include "unicode/rbbi.h"
+#include "unicode/uloc.h"
#if defined(OS_MACOSX)
#include "app/l10n_util_mac.h"
@@ -475,6 +476,97 @@ string16 GetDisplayNameForLocale(const std::string& locale,
return display_name;
}
+std::string NormalizeLocale(const std::string& locale) {
+ std::string normalized_locale(locale);
+ std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
+
+ return normalized_locale;
+}
+
+void GetParentLocales(const std::string& current_locale,
+ std::vector<std::string>* parent_locales) {
+ std::string locale(NormalizeLocale(current_locale));
+
+ const int kNameCapacity = 256;
+ char parent[kNameCapacity];
+ base::strlcpy(parent, locale.c_str(), kNameCapacity);
+ parent_locales->push_back(parent);
+ UErrorCode err = U_ZERO_ERROR;
+ while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
+ if (U_FAILURE(err))
+ break;
+ parent_locales->push_back(parent);
+ }
+}
+
+bool IsValidLocaleSyntax(const std::string& locale) {
+ // Check that the length is plausible.
+ if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
+ return false;
+
+ // Strip off the part after an '@' sign, which might contain keywords,
+ // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
+ // We don't validate that part much, just check that there's at least one
+ // equals sign in a plausible place.
+ std::string prefix = locale;
+ if (locale.find("@") != std::string::npos) {
+ size_t split_point = locale.find("@");
+ std::string keywords = locale.substr(split_point + 1);
+ prefix = locale.substr(0, split_point);
+
+ size_t equals_loc = keywords.find("=");
+ if (equals_loc == std::string::npos ||
+ equals_loc < 1 || equals_loc > keywords.size() - 2)
+ return false;
+ }
+
+ // Check that all characters before the at-sign are alphanumeric, hyphen,
+ // or underscore.
+ for (size_t i = 0; i < prefix.size(); i++) {
+ char ch = prefix[i];
+ if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '-' && ch != '_')
+ return false;
+ }
+
+ // Check that the initial token (before the first hyphen/underscore)
+ // is 1 - 3 alphabetical characters (a language tag).
+ for (size_t i = 0; i < prefix.size(); i++) {
+ char ch = prefix[i];
+ if (ch == '-' || ch == '_') {
+ if (i < 1 || i > 3)
+ return false;
+ break;
+ }
+ if (!IsAsciiAlpha(ch))
+ return false;
+ }
+
+ // Check that the all tokens after the initial token are 1 - 8 characters.
+ // (Tokenize/StringTokenizer don't work here, they collapse multiple
+ // delimiters into one.)
+ int token_len = 0;
+ int token_index = 0;
+ for (size_t i = 0; i < prefix.size(); i++) {
+ char ch = prefix[i];
+ if (ch == '-' || ch == '_') {
+ if (token_index > 0 && (token_len < 1 || token_len > 8)) {
+ return false;
+ }
+ token_index++;
+ token_len = 0;
+ } else {
+ token_len++;
+ }
+ }
+ if (token_index == 0 && (token_len < 1 || token_len > 3)) {
+ return false;
+ } else if (token_len < 1 || token_len > 8) {
+ return false;
+ }
+
+ return true;
+}
+
std::wstring GetString(int message_id) {
return UTF16ToWide(GetStringUTF16(message_id));
}
diff --git a/app/l10n_util.h b/app/l10n_util.h
index a1519b5..a71c21c 100644
--- a/app/l10n_util.h
+++ b/app/l10n_util.h
@@ -56,6 +56,25 @@ string16 GetDisplayNameForLocale(const std::string& locale,
const std::string& display_locale,
bool is_for_ui);
+// Converts all - into _, to be consistent with ICU and file system names.
+std::string NormalizeLocale(const std::string& locale);
+
+// Produce a vector of parent locales for given locale.
+// It includes the current locale in the result.
+// sr_Cyrl_RS generates sr_Cyrl_RS, sr_Cyrl and sr.
+void GetParentLocales(const std::string& current_locale,
+ std::vector<std::string>* parent_locales);
+
+// Checks if a string is plausibly a syntactically-valid locale string,
+// for cases where we want the valid input to be a locale string such as
+// 'en', 'pt-BR', 'fil', 'es-419', 'zh-Hans-CN', 'i-klingon' or
+// 'de_DE@collation=phonebook', but we don't want to limit it to
+// locales that Chrome actually knows about, so 'xx-YY' should be
+// accepted, but 'z', 'German', 'en-$1', or 'abcd-1234' should not.
+// Case-insensitive. Based on BCP 47, see:
+// http://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers
+bool IsValidLocaleSyntax(const std::string& locale);
+
//
// Mac Note: See l10n_util_mac.h for some NSString versions and other support.
//
diff --git a/app/l10n_util_unittest.cc b/app/l10n_util_unittest.cc
index fa4cc24..786f1f6 100644
--- a/app/l10n_util_unittest.cc
+++ b/app/l10n_util_unittest.cc
@@ -325,3 +325,69 @@ TEST_F(L10nUtilTest, LocaleDisplayName) {
result = l10n_util::GetDisplayNameForLocale("es-419", "en", false);
EXPECT_EQ(result, ASCIIToUTF16("Spanish (Latin America and the Caribbean)"));
}
+
+TEST_F(L10nUtilTest, GetParentLocales) {
+ std::vector<std::string> locales;
+ const std::string top_locale("sr_Cyrl_RS");
+ l10n_util::GetParentLocales(top_locale, &locales);
+
+ ASSERT_EQ(3U, locales.size());
+ EXPECT_EQ("sr_Cyrl_RS", locales[0]);
+ EXPECT_EQ("sr_Cyrl", locales[1]);
+ EXPECT_EQ("sr", locales[2]);
+}
+
+TEST_F(L10nUtilTest, IsValidLocaleSyntax) {
+ // Test valid locales.
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fr"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("de"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("pt"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fil"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("haw"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en-US"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_US"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_GB"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("pt-BR"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_CN"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hans"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hans_CN"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hant"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hant_TW"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fr_CA"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("i-klingon"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("es-419"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_IE_PREEURO"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_IE_u_cu_IEP"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_IE@currency=IEP"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fr@x=y"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zn_CN@foo=bar"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax(
+ "fr@collation=phonebook;calendar=islamic-civil"));
+ EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax(
+ "sr_Latn_RS_REVISED@currency=USD"));
+
+ // Test invalid locales.
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax(""));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("x"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("12"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("456"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("a1"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("enUS"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("zhcn"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en.US"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en#US"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("-en-US"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US-"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("123-en-US"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("Latin"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("German"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("pt--BR"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("sl-macedonia"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("@"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@x"));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@x="));
+ EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@=y"));
+}