diff options
author | dmazzoni@chromium.org <dmazzoni@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-12-13 16:19:21 +0000 |
---|---|---|
committer | dmazzoni@chromium.org <dmazzoni@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-12-13 16:19:21 +0000 |
commit | 4db67943718c58094fc7e1a9e6ab8182d74badd6 (patch) | |
tree | ef8112377604af2c5b7cb3f3252b101b555704b9 /app | |
parent | 9d0bc41806fd2534b87b61cc09fb80f9b03ca99f (diff) | |
download | chromium_src-4db67943718c58094fc7e1a9e6ab8182d74badd6.zip chromium_src-4db67943718c58094fc7e1a9e6ab8182d74badd6.tar.gz chromium_src-4db67943718c58094fc7e1a9e6ab8182d74badd6.tar.bz2 |
Add utility function to determine if a locale is valid syntax; this will
be used by the TTS extension API. Moved some locale utility functions from
extension_l10n_util to l10n_util.
BUG=none
TEST=Adds new unit test
Review URL: http://codereview.chromium.org/5643002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@69004 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'app')
-rw-r--r-- | app/l10n_util.cc | 92 | ||||
-rw-r--r-- | app/l10n_util.h | 19 | ||||
-rw-r--r-- | app/l10n_util_unittest.cc | 66 |
3 files changed, 177 insertions, 0 deletions
diff --git a/app/l10n_util.cc b/app/l10n_util.cc index 148151d..4d6b8b9 100644 --- a/app/l10n_util.cc +++ b/app/l10n_util.cc @@ -29,6 +29,7 @@ #include "build/build_config.h" #include "gfx/canvas.h" #include "unicode/rbbi.h" +#include "unicode/uloc.h" #if defined(OS_MACOSX) #include "app/l10n_util_mac.h" @@ -475,6 +476,97 @@ string16 GetDisplayNameForLocale(const std::string& locale, return display_name; } +std::string NormalizeLocale(const std::string& locale) { + std::string normalized_locale(locale); + std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_'); + + return normalized_locale; +} + +void GetParentLocales(const std::string& current_locale, + std::vector<std::string>* parent_locales) { + std::string locale(NormalizeLocale(current_locale)); + + const int kNameCapacity = 256; + char parent[kNameCapacity]; + base::strlcpy(parent, locale.c_str(), kNameCapacity); + parent_locales->push_back(parent); + UErrorCode err = U_ZERO_ERROR; + while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) { + if (U_FAILURE(err)) + break; + parent_locales->push_back(parent); + } +} + +bool IsValidLocaleSyntax(const std::string& locale) { + // Check that the length is plausible. + if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY) + return false; + + // Strip off the part after an '@' sign, which might contain keywords, + // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil. + // We don't validate that part much, just check that there's at least one + // equals sign in a plausible place. + std::string prefix = locale; + if (locale.find("@") != std::string::npos) { + size_t split_point = locale.find("@"); + std::string keywords = locale.substr(split_point + 1); + prefix = locale.substr(0, split_point); + + size_t equals_loc = keywords.find("="); + if (equals_loc == std::string::npos || + equals_loc < 1 || equals_loc > keywords.size() - 2) + return false; + } + + // Check that all characters before the at-sign are alphanumeric, hyphen, + // or underscore. + for (size_t i = 0; i < prefix.size(); i++) { + char ch = prefix[i]; + if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '-' && ch != '_') + return false; + } + + // Check that the initial token (before the first hyphen/underscore) + // is 1 - 3 alphabetical characters (a language tag). + for (size_t i = 0; i < prefix.size(); i++) { + char ch = prefix[i]; + if (ch == '-' || ch == '_') { + if (i < 1 || i > 3) + return false; + break; + } + if (!IsAsciiAlpha(ch)) + return false; + } + + // Check that the all tokens after the initial token are 1 - 8 characters. + // (Tokenize/StringTokenizer don't work here, they collapse multiple + // delimiters into one.) + int token_len = 0; + int token_index = 0; + for (size_t i = 0; i < prefix.size(); i++) { + char ch = prefix[i]; + if (ch == '-' || ch == '_') { + if (token_index > 0 && (token_len < 1 || token_len > 8)) { + return false; + } + token_index++; + token_len = 0; + } else { + token_len++; + } + } + if (token_index == 0 && (token_len < 1 || token_len > 3)) { + return false; + } else if (token_len < 1 || token_len > 8) { + return false; + } + + return true; +} + std::wstring GetString(int message_id) { return UTF16ToWide(GetStringUTF16(message_id)); } diff --git a/app/l10n_util.h b/app/l10n_util.h index a1519b5..a71c21c 100644 --- a/app/l10n_util.h +++ b/app/l10n_util.h @@ -56,6 +56,25 @@ string16 GetDisplayNameForLocale(const std::string& locale, const std::string& display_locale, bool is_for_ui); +// Converts all - into _, to be consistent with ICU and file system names. +std::string NormalizeLocale(const std::string& locale); + +// Produce a vector of parent locales for given locale. +// It includes the current locale in the result. +// sr_Cyrl_RS generates sr_Cyrl_RS, sr_Cyrl and sr. +void GetParentLocales(const std::string& current_locale, + std::vector<std::string>* parent_locales); + +// Checks if a string is plausibly a syntactically-valid locale string, +// for cases where we want the valid input to be a locale string such as +// 'en', 'pt-BR', 'fil', 'es-419', 'zh-Hans-CN', 'i-klingon' or +// 'de_DE@collation=phonebook', but we don't want to limit it to +// locales that Chrome actually knows about, so 'xx-YY' should be +// accepted, but 'z', 'German', 'en-$1', or 'abcd-1234' should not. +// Case-insensitive. Based on BCP 47, see: +// http://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers +bool IsValidLocaleSyntax(const std::string& locale); + // // Mac Note: See l10n_util_mac.h for some NSString versions and other support. // diff --git a/app/l10n_util_unittest.cc b/app/l10n_util_unittest.cc index fa4cc24..786f1f6 100644 --- a/app/l10n_util_unittest.cc +++ b/app/l10n_util_unittest.cc @@ -325,3 +325,69 @@ TEST_F(L10nUtilTest, LocaleDisplayName) { result = l10n_util::GetDisplayNameForLocale("es-419", "en", false); EXPECT_EQ(result, ASCIIToUTF16("Spanish (Latin America and the Caribbean)")); } + +TEST_F(L10nUtilTest, GetParentLocales) { + std::vector<std::string> locales; + const std::string top_locale("sr_Cyrl_RS"); + l10n_util::GetParentLocales(top_locale, &locales); + + ASSERT_EQ(3U, locales.size()); + EXPECT_EQ("sr_Cyrl_RS", locales[0]); + EXPECT_EQ("sr_Cyrl", locales[1]); + EXPECT_EQ("sr", locales[2]); +} + +TEST_F(L10nUtilTest, IsValidLocaleSyntax) { + // Test valid locales. + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fr")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("de")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("pt")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fil")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("haw")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en-US")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_US")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_GB")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("pt-BR")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_CN")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hans")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hans_CN")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hant")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hant_TW")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fr_CA")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("i-klingon")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("es-419")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_IE_PREEURO")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_IE_u_cu_IEP")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_IE@currency=IEP")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fr@x=y")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zn_CN@foo=bar")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax( + "fr@collation=phonebook;calendar=islamic-civil")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax( + "sr_Latn_RS_REVISED@currency=USD")); + + // Test invalid locales. + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("x")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("12")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("456")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("a1")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("enUS")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("zhcn")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en.US")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en#US")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("-en-US")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US-")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("123-en-US")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("Latin")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("German")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("pt--BR")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("sl-macedonia")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("@")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@x")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@x=")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@=y")); +} |