diff options
author | dmazzoni@chromium.org <dmazzoni@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-12-13 16:19:21 +0000 |
---|---|---|
committer | dmazzoni@chromium.org <dmazzoni@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-12-13 16:19:21 +0000 |
commit | 4db67943718c58094fc7e1a9e6ab8182d74badd6 (patch) | |
tree | ef8112377604af2c5b7cb3f3252b101b555704b9 | |
parent | 9d0bc41806fd2534b87b61cc09fb80f9b03ca99f (diff) | |
download | chromium_src-4db67943718c58094fc7e1a9e6ab8182d74badd6.zip chromium_src-4db67943718c58094fc7e1a9e6ab8182d74badd6.tar.gz chromium_src-4db67943718c58094fc7e1a9e6ab8182d74badd6.tar.bz2 |
Add utility function to determine if a locale is valid syntax; this will
be used by the TTS extension API. Moved some locale utility functions from
extension_l10n_util to l10n_util.
BUG=none
TEST=Adds new unit test
Review URL: http://codereview.chromium.org/5643002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@69004 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | app/l10n_util.cc | 92 | ||||
-rw-r--r-- | app/l10n_util.h | 19 | ||||
-rw-r--r-- | app/l10n_util_unittest.cc | 66 | ||||
-rw-r--r-- | chrome/common/extensions/extension_l10n_util.cc | 30 | ||||
-rw-r--r-- | chrome/common/extensions/extension_l10n_util.h | 9 | ||||
-rw-r--r-- | chrome/common/extensions/extension_l10n_util_unittest.cc | 11 | ||||
-rw-r--r-- | chrome/common/extensions/extension_resource_unittest.cc | 3 |
7 files changed, 182 insertions, 48 deletions
diff --git a/app/l10n_util.cc b/app/l10n_util.cc index 148151d..4d6b8b9 100644 --- a/app/l10n_util.cc +++ b/app/l10n_util.cc @@ -29,6 +29,7 @@ #include "build/build_config.h" #include "gfx/canvas.h" #include "unicode/rbbi.h" +#include "unicode/uloc.h" #if defined(OS_MACOSX) #include "app/l10n_util_mac.h" @@ -475,6 +476,97 @@ string16 GetDisplayNameForLocale(const std::string& locale, return display_name; } +std::string NormalizeLocale(const std::string& locale) { + std::string normalized_locale(locale); + std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_'); + + return normalized_locale; +} + +void GetParentLocales(const std::string& current_locale, + std::vector<std::string>* parent_locales) { + std::string locale(NormalizeLocale(current_locale)); + + const int kNameCapacity = 256; + char parent[kNameCapacity]; + base::strlcpy(parent, locale.c_str(), kNameCapacity); + parent_locales->push_back(parent); + UErrorCode err = U_ZERO_ERROR; + while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) { + if (U_FAILURE(err)) + break; + parent_locales->push_back(parent); + } +} + +bool IsValidLocaleSyntax(const std::string& locale) { + // Check that the length is plausible. + if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY) + return false; + + // Strip off the part after an '@' sign, which might contain keywords, + // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil. + // We don't validate that part much, just check that there's at least one + // equals sign in a plausible place. + std::string prefix = locale; + if (locale.find("@") != std::string::npos) { + size_t split_point = locale.find("@"); + std::string keywords = locale.substr(split_point + 1); + prefix = locale.substr(0, split_point); + + size_t equals_loc = keywords.find("="); + if (equals_loc == std::string::npos || + equals_loc < 1 || equals_loc > keywords.size() - 2) + return false; + } + + // Check that all characters before the at-sign are alphanumeric, hyphen, + // or underscore. + for (size_t i = 0; i < prefix.size(); i++) { + char ch = prefix[i]; + if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '-' && ch != '_') + return false; + } + + // Check that the initial token (before the first hyphen/underscore) + // is 1 - 3 alphabetical characters (a language tag). + for (size_t i = 0; i < prefix.size(); i++) { + char ch = prefix[i]; + if (ch == '-' || ch == '_') { + if (i < 1 || i > 3) + return false; + break; + } + if (!IsAsciiAlpha(ch)) + return false; + } + + // Check that the all tokens after the initial token are 1 - 8 characters. + // (Tokenize/StringTokenizer don't work here, they collapse multiple + // delimiters into one.) + int token_len = 0; + int token_index = 0; + for (size_t i = 0; i < prefix.size(); i++) { + char ch = prefix[i]; + if (ch == '-' || ch == '_') { + if (token_index > 0 && (token_len < 1 || token_len > 8)) { + return false; + } + token_index++; + token_len = 0; + } else { + token_len++; + } + } + if (token_index == 0 && (token_len < 1 || token_len > 3)) { + return false; + } else if (token_len < 1 || token_len > 8) { + return false; + } + + return true; +} + std::wstring GetString(int message_id) { return UTF16ToWide(GetStringUTF16(message_id)); } diff --git a/app/l10n_util.h b/app/l10n_util.h index a1519b5..a71c21c 100644 --- a/app/l10n_util.h +++ b/app/l10n_util.h @@ -56,6 +56,25 @@ string16 GetDisplayNameForLocale(const std::string& locale, const std::string& display_locale, bool is_for_ui); +// Converts all - into _, to be consistent with ICU and file system names. +std::string NormalizeLocale(const std::string& locale); + +// Produce a vector of parent locales for given locale. +// It includes the current locale in the result. +// sr_Cyrl_RS generates sr_Cyrl_RS, sr_Cyrl and sr. +void GetParentLocales(const std::string& current_locale, + std::vector<std::string>* parent_locales); + +// Checks if a string is plausibly a syntactically-valid locale string, +// for cases where we want the valid input to be a locale string such as +// 'en', 'pt-BR', 'fil', 'es-419', 'zh-Hans-CN', 'i-klingon' or +// 'de_DE@collation=phonebook', but we don't want to limit it to +// locales that Chrome actually knows about, so 'xx-YY' should be +// accepted, but 'z', 'German', 'en-$1', or 'abcd-1234' should not. +// Case-insensitive. Based on BCP 47, see: +// http://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers +bool IsValidLocaleSyntax(const std::string& locale); + // // Mac Note: See l10n_util_mac.h for some NSString versions and other support. // diff --git a/app/l10n_util_unittest.cc b/app/l10n_util_unittest.cc index fa4cc24..786f1f6 100644 --- a/app/l10n_util_unittest.cc +++ b/app/l10n_util_unittest.cc @@ -325,3 +325,69 @@ TEST_F(L10nUtilTest, LocaleDisplayName) { result = l10n_util::GetDisplayNameForLocale("es-419", "en", false); EXPECT_EQ(result, ASCIIToUTF16("Spanish (Latin America and the Caribbean)")); } + +TEST_F(L10nUtilTest, GetParentLocales) { + std::vector<std::string> locales; + const std::string top_locale("sr_Cyrl_RS"); + l10n_util::GetParentLocales(top_locale, &locales); + + ASSERT_EQ(3U, locales.size()); + EXPECT_EQ("sr_Cyrl_RS", locales[0]); + EXPECT_EQ("sr_Cyrl", locales[1]); + EXPECT_EQ("sr", locales[2]); +} + +TEST_F(L10nUtilTest, IsValidLocaleSyntax) { + // Test valid locales. + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fr")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("de")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("pt")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fil")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("haw")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en-US")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_US")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_GB")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("pt-BR")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_CN")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hans")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hans_CN")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hant")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zh_Hant_TW")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fr_CA")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("i-klingon")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("es-419")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_IE_PREEURO")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_IE_u_cu_IEP")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("en_IE@currency=IEP")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("fr@x=y")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax("zn_CN@foo=bar")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax( + "fr@collation=phonebook;calendar=islamic-civil")); + EXPECT_EQ(true, l10n_util::IsValidLocaleSyntax( + "sr_Latn_RS_REVISED@currency=USD")); + + // Test invalid locales. + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("x")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("12")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("456")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("a1")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("enUS")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("zhcn")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en.US")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en#US")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("-en-US")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US-")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("123-en-US")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("Latin")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("German")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("pt--BR")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("sl-macedonia")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("@")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@x")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@x=")); + EXPECT_EQ(false, l10n_util::IsValidLocaleSyntax("en-US@=y")); +} diff --git a/chrome/common/extensions/extension_l10n_util.cc b/chrome/common/extensions/extension_l10n_util.cc index 34762fd..994ab83 100644 --- a/chrome/common/extensions/extension_l10n_util.cc +++ b/chrome/common/extensions/extension_l10n_util.cc @@ -167,37 +167,14 @@ bool AddLocale(const std::set<std::string>& chrome_locales, return true; } -std::string NormalizeLocale(const std::string& locale) { - std::string normalized_locale(locale); - std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_'); - - return normalized_locale; -} - std::string CurrentLocaleOrDefault() { - std::string current_locale = NormalizeLocale(*GetProcessLocale()); + std::string current_locale = l10n_util::NormalizeLocale(*GetProcessLocale()); if (current_locale.empty()) current_locale = "en"; return current_locale; } -void GetParentLocales(const std::string& current_locale, - std::vector<std::string>* parent_locales) { - std::string locale(NormalizeLocale(current_locale)); - - const int kNameCapacity = 256; - char parent[kNameCapacity]; - base::strlcpy(parent, locale.c_str(), kNameCapacity); - parent_locales->push_back(parent); - UErrorCode err = U_ZERO_ERROR; - while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) { - if (U_FAILURE(err)) - break; - parent_locales->push_back(parent); - } -} - void GetAllLocales(std::set<std::string>* all_locales) { const std::vector<std::string>& available_locales = l10n_util::GetAvailableLocales(); @@ -205,7 +182,7 @@ void GetAllLocales(std::set<std::string>* all_locales) { // I.e. for sr_Cyrl_RS we add sr_Cyrl_RS, sr_Cyrl and sr. for (size_t i = 0; i < available_locales.size(); ++i) { std::vector<std::string> result; - GetParentLocales(available_locales[i], &result); + l10n_util::GetParentLocales(available_locales[i], &result); all_locales->insert(result.begin(), result.end()); } } @@ -240,6 +217,7 @@ bool GetValidLocales(const FilePath& locale_path, return true; } + // Loads contents of the messages file for given locale. If file is not found, // or there was parsing error we return NULL and set |error|. // Caller owns the returned object. @@ -270,7 +248,7 @@ ExtensionMessageBundle* LoadMessageCatalogs( // Order locales to load as current_locale, first_parent, ..., default_locale. std::vector<std::string> all_fallback_locales; if (!application_locale.empty() && application_locale != default_locale) - GetParentLocales(application_locale, &all_fallback_locales); + l10n_util::GetParentLocales(application_locale, &all_fallback_locales); all_fallback_locales.push_back(default_locale); std::vector<linked_ptr<DictionaryValue> > catalogs; diff --git a/chrome/common/extensions/extension_l10n_util.h b/chrome/common/extensions/extension_l10n_util.h index e1fdfa0..57aa68f 100644 --- a/chrome/common/extensions/extension_l10n_util.h +++ b/chrome/common/extensions/extension_l10n_util.h @@ -59,18 +59,9 @@ bool AddLocale(const std::set<std::string>& chrome_locales, std::set<std::string>* valid_locales, std::string* error); -// Converts all - into _, to be consistent with ICU and file system names. -std::string NormalizeLocale(const std::string& locale); - // Returns normalized current locale, or default locale - en_US. std::string CurrentLocaleOrDefault(); -// Produce a vector of parent locales for given locale. -// It includes the current locale in the result. -// sr_Cyrl_RS generates sr_Cyrl_RS, sr_Cyrl and sr. -void GetParentLocales(const std::string& current_locale, - std::vector<std::string>* parent_locales); - // Extends list of Chrome locales to them and their parents, so we can do // proper fallback. void GetAllLocales(std::set<std::string>* all_locales); diff --git a/chrome/common/extensions/extension_l10n_util_unittest.cc b/chrome/common/extensions/extension_l10n_util_unittest.cc index 1487452..0ec7e97 100644 --- a/chrome/common/extensions/extension_l10n_util_unittest.cc +++ b/chrome/common/extensions/extension_l10n_util_unittest.cc @@ -214,17 +214,6 @@ TEST(ExtensionL10nUtil, LoadMessageCatalogsDuplicateKeys) { EXPECT_TRUE(error.empty()); } -TEST(ExtensionL10nUtil, GetParentLocales) { - std::vector<std::string> locales; - const std::string top_locale("sr_Cyrl_RS"); - extension_l10n_util::GetParentLocales(top_locale, &locales); - - ASSERT_EQ(3U, locales.size()); - EXPECT_EQ("sr_Cyrl_RS", locales[0]); - EXPECT_EQ("sr_Cyrl", locales[1]); - EXPECT_EQ("sr", locales[2]); -} - // Caller owns the returned object. ExtensionMessageBundle* CreateManifestBundle() { linked_ptr<DictionaryValue> catalog(new DictionaryValue); diff --git a/chrome/common/extensions/extension_resource_unittest.cc b/chrome/common/extensions/extension_resource_unittest.cc index 1590225e..e7b3ef0 100644 --- a/chrome/common/extensions/extension_resource_unittest.cc +++ b/chrome/common/extensions/extension_resource_unittest.cc @@ -58,8 +58,7 @@ TEST(ExtensionResourceTest, CreateWithAllResourcesOnDisk) { ASSERT_TRUE(file_util::CreateDirectory(l10n_path)); std::vector<std::string> locales; - extension_l10n_util::GetParentLocales(l10n_util::GetApplicationLocale(""), - &locales); + l10n_util::GetParentLocales(l10n_util::GetApplicationLocale(""), &locales); ASSERT_FALSE(locales.empty()); for (size_t i = 0; i < locales.size(); i++) { FilePath make_path; |