diff options
Diffstat (limited to 'app/l10n_util.cc')
-rw-r--r-- | app/l10n_util.cc | 876 |
1 files changed, 0 insertions, 876 deletions
diff --git a/app/l10n_util.cc b/app/l10n_util.cc deleted file mode 100644 index 64189e99..0000000 --- a/app/l10n_util.cc +++ /dev/null @@ -1,876 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "app/l10n_util.h" - -#if defined(TOOLKIT_USES_GTK) -#include <glib/gutils.h> -#endif - -#include <algorithm> -#include <cstdlib> -#include <iterator> - -#include "app/l10n_util_collator.h" -#include "base/command_line.h" -#include "base/file_util.h" -#include "base/i18n/file_util_icu.h" -#include "base/i18n/rtl.h" -#include "base/path_service.h" -#include "base/scoped_ptr.h" -#include "base/string16.h" -#include "base/string_number_conversions.h" -#include "base/string_split.h" -#include "base/sys_string_conversions.h" -#include "base/utf_string_conversions.h" -#include "build/build_config.h" -#include "gfx/canvas.h" -#include "ui/base/resource/resource_bundle.h" -#include "ui/base/ui_base_paths.h" -#include "unicode/rbbi.h" -#include "unicode/uloc.h" - -#if defined(OS_MACOSX) -#include "app/l10n_util_mac.h" -#elif defined(OS_WIN) -#include "app/l10n_util_win.h" -#endif - -namespace { - -#if defined(OS_WIN) -static const FilePath::CharType kLocaleFileExtension[] = L".dll"; -#elif defined(OS_POSIX) -static const FilePath::CharType kLocaleFileExtension[] = ".pak"; -#endif - -static const char* const kAcceptLanguageList[] = { - "af", // Afrikaans - "am", // Amharic - "ar", // Arabic - "az", // Azerbaijani - "be", // Belarusian - "bg", // Bulgarian - "bh", // Bihari - "bn", // Bengali - "br", // Breton - "bs", // Bosnian - "ca", // Catalan - "co", // Corsican - "cs", // Czech - "cy", // Welsh - "da", // Danish - "de", // German - "de-AT", // German (Austria) - "de-CH", // German (Switzerland) - "de-DE", // German (Germany) - "el", // Greek - "en", // English - "en-AU", // English (Austrailia) - "en-CA", // English (Canada) - "en-GB", // English (UK) - "en-NZ", // English (New Zealand) - "en-US", // English (US) - "en-ZA", // English (South Africa) - "eo", // Esperanto - // TODO(jungshik) : Do we want to list all es-Foo for Latin-American - // Spanish speaking countries? - "es", // Spanish - "et", // Estonian - "eu", // Basque - "fa", // Persian - "fi", // Finnish - "fil", // Filipino - "fo", // Faroese - "fr", // French - "fr-CA", // French (Canada) - "fr-CH", // French (Switzerland) - "fr-FR", // French (France) - "fy", // Frisian - "ga", // Irish - "gd", // Scots Gaelic - "gl", // Galician - "gn", // Guarani - "gu", // Gujarati - "ha", // Hausa - "haw", // Hawaiian - "he", // Hebrew - "hi", // Hindi - "hr", // Croatian - "hu", // Hungarian - "hy", // Armenian - "ia", // Interlingua - "id", // Indonesian - "is", // Icelandic - "it", // Italian - "it-CH", // Italian (Switzerland) - "it-IT", // Italian (Italy) - "ja", // Japanese - "jw", // Javanese - "ka", // Georgian - "kk", // Kazakh - "km", // Cambodian - "kn", // Kannada - "ko", // Korean - "ku", // Kurdish - "ky", // Kyrgyz - "la", // Latin - "ln", // Lingala - "lo", // Laothian - "lt", // Lithuanian - "lv", // Latvian - "mk", // Macedonian - "ml", // Malayalam - "mn", // Mongolian - "mo", // Moldavian - "mr", // Marathi - "ms", // Malay - "mt", // Maltese - "nb", // Norwegian (Bokmal) - "ne", // Nepali - "nl", // Dutch - "nn", // Norwegian (Nynorsk) - "no", // Norwegian - "oc", // Occitan - "om", // Oromo - "or", // Oriya - "pa", // Punjabi - "pl", // Polish - "ps", // Pashto - "pt", // Portuguese - "pt-BR", // Portuguese (Brazil) - "pt-PT", // Portuguese (Portugal) - "qu", // Quechua - "rm", // Romansh - "ro", // Romanian - "ru", // Russian - "sd", // Sindhi - "sh", // Serbo-Croatian - "si", // Sinhalese - "sk", // Slovak - "sl", // Slovenian - "sn", // Shona - "so", // Somali - "sq", // Albanian - "sr", // Serbian - "st", // Sesotho - "su", // Sundanese - "sv", // Swedish - "sw", // Swahili - "ta", // Tamil - "te", // Telugu - "tg", // Tajik - "th", // Thai - "ti", // Tigrinya - "tk", // Turkmen - "to", // Tonga - "tr", // Turkish - "tt", // Tatar - "tw", // Twi - "ug", // Uighur - "uk", // Ukrainian - "ur", // Urdu - "uz", // Uzbek - "vi", // Vietnamese - "xh", // Xhosa - "yi", // Yiddish - "yo", // Yoruba - "zh", // Chinese - "zh-CN", // Chinese (Simplified) - "zh-TW", // Chinese (Traditional) - "zu", // Zulu -}; - -// Returns true if |locale_name| has an alias in the ICU data file. -bool IsDuplicateName(const std::string& locale_name) { - static const char* const kDuplicateNames[] = { - "en", - "pt", - "zh", - "zh_hans_cn", - "zh_hant_tw" - }; - - // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain). - // 'es-419' (Spanish in Latin America) is not available in ICU so that it - // has to be added manually in GetAvailableLocales(). - if (LowerCaseEqualsASCII(locale_name.substr(0, 3), "es_")) - return true; - for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) { - if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0) - return true; - } - return false; -} - -bool IsLocaleNameTranslated(const char* locale, - const std::string& display_locale) { - string16 display_name = - l10n_util::GetDisplayNameForLocale(locale, display_locale, false); - // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not - // uloc_getDisplayName returns the actual translation or the default - // value (locale code), we have to rely on this hack to tell whether - // the translation is available or not. If ICU doesn't have a translated - // name for this locale, GetDisplayNameForLocale will just return the - // locale code. - return !IsStringASCII(display_name) || UTF16ToASCII(display_name) != locale; -} - -// We added 30+ minimally populated locales with only a few entries -// (exemplar character set, script, writing direction and its own -// lanaguage name). These locales have to be distinguished from the -// fully populated locales to which Chrome is localized. -bool IsLocalePartiallyPopulated(const std::string& locale_name) { - // For partially populated locales, even the translation for "English" - // is not available. A more robust/elegant way to check is to add a special - // field (say, 'isPartial' to our version of ICU locale files) and - // check its value, but this hack seems to work well. - return !IsLocaleNameTranslated("en", locale_name); -} - -#if !defined(OS_MACOSX) -bool IsLocaleAvailable(const std::string& locale, - const FilePath& locale_path) { - // If locale has any illegal characters in it, we don't want to try to - // load it because it may be pointing outside the locale data file directory. - if (!file_util::IsFilenameLegal(ASCIIToUTF16(locale))) - return false; - - // IsLocalePartiallyPopulated() can be called here for an early return w/o - // checking the resource availability below. It'd help when Chrome is run - // under a system locale Chrome is not localized to (e.g.Farsi on Linux), - // but it'd slow down the start up time a little bit for locales Chrome is - // localized to. So, we don't call it here. - if (!l10n_util::IsLocaleSupportedByOS(locale)) - return false; - - FilePath test_path = locale_path; - test_path = - test_path.AppendASCII(locale).ReplaceExtension(kLocaleFileExtension); - return file_util::PathExists(test_path); -} - -bool CheckAndResolveLocale(const std::string& locale, - const FilePath& locale_path, - std::string* resolved_locale) { - if (IsLocaleAvailable(locale, locale_path)) { - *resolved_locale = locale; - return true; - } - // If the locale matches language but not country, use that instead. - // TODO(jungshik) : Nothing is done about languages that Chrome - // does not support but available on Windows. We fall - // back to en-US in GetApplicationLocale so that it's a not critical, - // but we can do better. - std::string::size_type hyphen_pos = locale.find('-'); - if (hyphen_pos != std::string::npos && hyphen_pos > 0) { - std::string lang(locale, 0, hyphen_pos); - std::string region(locale, hyphen_pos + 1); - std::string tmp_locale(lang); - // Map es-RR other than es-ES to es-419 (Chrome's Latin American - // Spanish locale). - if (LowerCaseEqualsASCII(lang, "es") && !LowerCaseEqualsASCII(region, "es")) - tmp_locale.append("-419"); - else if (LowerCaseEqualsASCII(lang, "zh")) { - // Map zh-HK and zh-MK to zh-TW. Otherwise, zh-FOO is mapped to zh-CN. - if (LowerCaseEqualsASCII(region, "hk") || - LowerCaseEqualsASCII(region, "mk")) { - tmp_locale.append("-TW"); - } else { - tmp_locale.append("-CN"); - } - } - if (IsLocaleAvailable(tmp_locale, locale_path)) { - resolved_locale->swap(tmp_locale); - return true; - } - } - - // Google updater uses no, iw and en for our nb, he, and en-US. - // We need to map them to our codes. - struct { - const char* source; - const char* dest; - } alias_map[] = { - {"no", "nb"}, - {"tl", "fil"}, - {"iw", "he"}, - {"en", "en-US"}, - }; - - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) { - if (LowerCaseEqualsASCII(locale, alias_map[i].source)) { - std::string tmp_locale(alias_map[i].dest); - if (IsLocaleAvailable(tmp_locale, locale_path)) { - resolved_locale->swap(tmp_locale); - return true; - } - } - } - - return false; -} -#endif - -// On Linux, the text layout engine Pango determines paragraph directionality -// by looking at the first strongly-directional character in the text. This -// means text such as "Google Chrome foo bar..." will be layed out LTR even -// if "foo bar" is RTL. So this function prepends the necessary RLM in such -// cases. -void AdjustParagraphDirectionality(string16* paragraph) { -#if defined(OS_LINUX) - if (base::i18n::IsRTL() && - base::i18n::StringContainsStrongRTLChars(*paragraph)) { - paragraph->insert(0, 1, static_cast<char16>(base::i18n::kRightToLeftMark)); - } -#endif -} - -#if defined(OS_WIN) -std::string GetCanonicalLocale(const std::string& locale) { - return base::i18n::GetCanonicalLocale(locale.c_str()); -} -#endif - -} // namespace - -namespace l10n_util { - -std::string GetApplicationLocale(const std::string& pref_locale) { -#if defined(OS_MACOSX) - - // Use any override (Cocoa for the browser), otherwise use the preference - // passed to the function. - std::string app_locale = l10n_util::GetLocaleOverride(); - if (app_locale.empty()) - app_locale = pref_locale; - - // The above should handle all of the cases Chrome normally hits, but for some - // unit tests, we need something to fall back too. - if (app_locale.empty()) - app_locale = "en-US"; - - // Windows/Linux call SetICUDefaultLocale after determining the actual locale - // with CheckAndResolveLocal to make ICU APIs work in that locale. - // Mac doesn't use a locale directory tree of resources (it uses Mac style - // resources), so mirror the Windows/Linux behavior of calling - // SetICUDefaultLocale. - base::i18n::SetICUDefaultLocale(app_locale); - return app_locale; - -#else - - FilePath locale_path; - PathService::Get(ui::DIR_LOCALES, &locale_path); - std::string resolved_locale; - std::vector<std::string> candidates; - - // We only use --lang and the app pref on Windows. On Linux, we only - // look at the LC_*/LANG environment variables. We do, however, pass --lang - // to renderer and plugin processes so they know what language the parent - // process decided to use. - -#if defined(OS_WIN) - - // First, try the preference value. - if (!pref_locale.empty()) - candidates.push_back(pref_locale); - - // Next, try the overridden locale. - const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides(); - if (!languages.empty()) { - candidates.reserve(candidates.size() + languages.size()); - std::transform(languages.begin(), languages.end(), - std::back_inserter(candidates), &GetCanonicalLocale); - } else { - // If no override was set, defer to ICU - candidates.push_back(base::i18n::GetConfiguredLocale()); - } - -#elif defined(OS_CHROMEOS) - - // On ChromeOS, use the application locale preference. - if (!pref_locale.empty()) - candidates.push_back(pref_locale); - -#elif defined(OS_POSIX) && defined(TOOLKIT_USES_GTK) - - // GLib implements correct environment variable parsing with - // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG. - // We used to use our custom parsing code along with ICU for this purpose. - // If we have a port that does not depend on GTK, we have to - // restore our custom code for that port. - const char* const* languages = g_get_language_names(); - DCHECK(languages); // A valid pointer is guaranteed. - DCHECK(*languages); // At least one entry, "C", is guaranteed. - - for (; *languages != NULL; ++languages) { - candidates.push_back(base::i18n::GetCanonicalLocale(*languages)); - } - -#else -#error Unsupported platform, see build/build_config.h -#endif - - std::vector<std::string>::const_iterator i = candidates.begin(); - for (; i != candidates.end(); ++i) { - if (CheckAndResolveLocale(*i, locale_path, &resolved_locale)) { - base::i18n::SetICUDefaultLocale(resolved_locale); - return resolved_locale; - } - } - - // Fallback on en-US. - const std::string fallback_locale("en-US"); - if (IsLocaleAvailable(fallback_locale, locale_path)) { - base::i18n::SetICUDefaultLocale(fallback_locale); - return fallback_locale; - } - - // No locale data file was found; we shouldn't get here. - NOTREACHED(); - - return std::string(); - -#endif -} - -string16 GetDisplayNameForLocale(const std::string& locale, - const std::string& display_locale, - bool is_for_ui) { - std::string locale_code = locale; - // Internally, we use the language code of zh-CN and zh-TW, but we want the - // display names to be Chinese (Simplified) and Chinese (Traditional) instead - // of Chinese (China) and Chinese (Taiwan). To do that, we pass zh-Hans - // and zh-Hant to ICU. Even with this mapping, we'd get - // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and - // even longer results in other languages. Arguably, they're better than - // the current results : Chinese (China) / Chinese (Taiwan). - // TODO(jungshik): Do one of the following: - // 1. Special-case Chinese by getting the custom-translation for them - // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE. - // 3. Get translations for two directly from the ICU resouce bundle - // because they're not accessible with other any API. - // 4. Patch ICU to special-case zh-Hans/zh-Hant for us. - // #1 and #2 wouldn't work if display_locale != current UI locale although - // we can think of additional hack to work around the problem. - // #3 can be potentially expensive. - if (locale_code == "zh-CN") - locale_code = "zh-Hans"; - else if (locale_code == "zh-TW") - locale_code = "zh-Hant"; - - UErrorCode error = U_ZERO_ERROR; - const int buffer_size = 1024; - - string16 display_name; - int actual_size = uloc_getDisplayName(locale_code.c_str(), - display_locale.c_str(), - WriteInto(&display_name, buffer_size + 1), buffer_size, &error); - DCHECK(U_SUCCESS(error)); - display_name.resize(actual_size); - // Add an RTL mark so parentheses are properly placed. - if (is_for_ui && base::i18n::IsRTL()) - display_name.push_back(static_cast<char16>(base::i18n::kRightToLeftMark)); - return display_name; -} - -std::string NormalizeLocale(const std::string& locale) { - std::string normalized_locale(locale); - std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_'); - - return normalized_locale; -} - -void GetParentLocales(const std::string& current_locale, - std::vector<std::string>* parent_locales) { - std::string locale(NormalizeLocale(current_locale)); - - const int kNameCapacity = 256; - char parent[kNameCapacity]; - base::strlcpy(parent, locale.c_str(), kNameCapacity); - parent_locales->push_back(parent); - UErrorCode err = U_ZERO_ERROR; - while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) { - if (U_FAILURE(err)) - break; - parent_locales->push_back(parent); - } -} - -bool IsValidLocaleSyntax(const std::string& locale) { - // Check that the length is plausible. - if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY) - return false; - - // Strip off the part after an '@' sign, which might contain keywords, - // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil. - // We don't validate that part much, just check that there's at least one - // equals sign in a plausible place. Normalize the prefix so that hyphens - // are changed to underscores. - std::string prefix = NormalizeLocale(locale); - size_t split_point = locale.find("@"); - if (split_point != std::string::npos) { - std::string keywords = locale.substr(split_point + 1); - prefix = locale.substr(0, split_point); - - size_t equals_loc = keywords.find("="); - if (equals_loc == std::string::npos || - equals_loc < 1 || equals_loc > keywords.size() - 2) - return false; - } - - // Check that all characters before the at-sign are alphanumeric or - // underscore. - for (size_t i = 0; i < prefix.size(); i++) { - char ch = prefix[i]; - if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_') - return false; - } - - // Check that the initial token (before the first hyphen/underscore) - // is 1 - 3 alphabetical characters (a language tag). - for (size_t i = 0; i < prefix.size(); i++) { - char ch = prefix[i]; - if (ch == '_') { - if (i < 1 || i > 3) - return false; - break; - } - if (!IsAsciiAlpha(ch)) - return false; - } - - // Check that the all tokens after the initial token are 1 - 8 characters. - // (Tokenize/StringTokenizer don't work here, they collapse multiple - // delimiters into one.) - int token_len = 0; - int token_index = 0; - for (size_t i = 0; i < prefix.size(); i++) { - if (prefix[i] != '_') { - token_len++; - continue; - } - - if (token_index > 0 && (token_len < 1 || token_len > 8)) { - return false; - } - token_index++; - token_len = 0; - } - if (token_index == 0 && (token_len < 1 || token_len > 3)) { - return false; - } else if (token_len < 1 || token_len > 8) { - return false; - } - - return true; -} - -std::string GetStringUTF8(int message_id) { - return UTF16ToUTF8(GetStringUTF16(message_id)); -} - -string16 GetStringUTF16(int message_id) { - ResourceBundle& rb = ResourceBundle::GetSharedInstance(); - string16 str = rb.GetLocalizedString(message_id); - AdjustParagraphDirectionality(&str); - - return str; -} - -static string16 GetStringF(int message_id, - const std::vector<string16>& replacements, - std::vector<size_t>* offsets) { - // TODO(tc): We could save a string copy if we got the raw string as - // a StringPiece and were able to call ReplaceStringPlaceholders with - // a StringPiece format string and string16 substitution strings. In - // practice, the strings should be relatively short. - ResourceBundle& rb = ResourceBundle::GetSharedInstance(); - const string16& format_string = rb.GetLocalizedString(message_id); - -#ifndef NDEBUG - // Make sure every replacement string is being used, so we don't just - // silently fail to insert one. If |offsets| is non-NULL, then don't do this - // check as the code may simply want to find the placeholders rather than - // actually replacing them. - if (!offsets) { - std::string utf8_string = UTF16ToUTF8(format_string); - - // $9 is the highest allowed placeholder. - for (size_t i = 0; i < 9; ++i) { - bool placeholder_should_exist = replacements.size() > i; - - std::string placeholder = StringPrintf("$%d", static_cast<int>(i + 1)); - size_t pos = utf8_string.find(placeholder.c_str()); - if (placeholder_should_exist) { - DCHECK_NE(std::string::npos, pos) << - " Didn't find a " << placeholder << " placeholder in " << - utf8_string; - } else { - DCHECK_EQ(std::string::npos, pos) << - " Unexpectedly found a " << placeholder << " placeholder in " << - utf8_string; - } - } - } -#endif - - string16 formatted = ReplaceStringPlaceholders(format_string, replacements, - offsets); - AdjustParagraphDirectionality(&formatted); - - return formatted; -} - -std::string GetStringFUTF8(int message_id, - const string16& a) { - return UTF16ToUTF8(GetStringFUTF16(message_id, a)); -} - -std::string GetStringFUTF8(int message_id, - const string16& a, - const string16& b) { - return UTF16ToUTF8(GetStringFUTF16(message_id, a, b)); -} - -std::string GetStringFUTF8(int message_id, - const string16& a, - const string16& b, - const string16& c) { - return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c)); -} - -std::string GetStringFUTF8(int message_id, - const string16& a, - const string16& b, - const string16& c, - const string16& d) { - return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d)); -} - -string16 GetStringFUTF16(int message_id, - const string16& a) { - std::vector<string16> replacements; - replacements.push_back(a); - return GetStringF(message_id, replacements, NULL); -} - -string16 GetStringFUTF16(int message_id, - const string16& a, - const string16& b) { - return GetStringFUTF16(message_id, a, b, NULL); -} - -string16 GetStringFUTF16(int message_id, - const string16& a, - const string16& b, - const string16& c) { - std::vector<string16> replacements; - replacements.push_back(a); - replacements.push_back(b); - replacements.push_back(c); - return GetStringF(message_id, replacements, NULL); -} - -string16 GetStringFUTF16(int message_id, - const string16& a, - const string16& b, - const string16& c, - const string16& d) { - std::vector<string16> replacements; - replacements.push_back(a); - replacements.push_back(b); - replacements.push_back(c); - replacements.push_back(d); - return GetStringF(message_id, replacements, NULL); -} - -string16 GetStringFUTF16(int message_id, const string16& a, size_t* offset) { - DCHECK(offset); - std::vector<size_t> offsets; - std::vector<string16> replacements; - replacements.push_back(a); - string16 result = GetStringF(message_id, replacements, &offsets); - DCHECK(offsets.size() == 1); - *offset = offsets[0]; - return result; -} - -string16 GetStringFUTF16(int message_id, - const string16& a, - const string16& b, - std::vector<size_t>* offsets) { - std::vector<string16> replacements; - replacements.push_back(a); - replacements.push_back(b); - return GetStringF(message_id, replacements, offsets); -} - -string16 GetStringFUTF16Int(int message_id, int a) { - return GetStringFUTF16(message_id, UTF8ToUTF16(base::IntToString(a))); -} - -string16 GetStringFUTF16Int(int message_id, int64 a) { - return GetStringFUTF16(message_id, UTF8ToUTF16(base::Int64ToString(a))); -} - -string16 TruncateString(const string16& string, size_t length) { - if (string.size() <= length) - // String fits, return it. - return string; - - if (length == 0) { - // No room for the elide string, return an empty string. - return string16(); - } - size_t max = length - 1; - - // Added to the end of strings that are too big. - static const char16 kElideString[] = { 0x2026, 0 }; - - if (max == 0) { - // Just enough room for the elide string. - return kElideString; - } - - // Use a line iterator to find the first boundary. - UErrorCode status = U_ZERO_ERROR; - scoped_ptr<icu::RuleBasedBreakIterator> bi( - static_cast<icu::RuleBasedBreakIterator*>( - icu::RuleBasedBreakIterator::createLineInstance( - icu::Locale::getDefault(), status))); - if (U_FAILURE(status)) - return string.substr(0, max) + kElideString; - bi->setText(string.c_str()); - int32_t index = bi->preceding(static_cast<int32_t>(max)); - if (index == icu::BreakIterator::DONE) { - index = static_cast<int32_t>(max); - } else { - // Found a valid break (may be the beginning of the string). Now use - // a character iterator to find the previous non-whitespace character. - icu::StringCharacterIterator char_iterator(string.c_str()); - if (index == 0) { - // No valid line breaks. Start at the end again. This ensures we break - // on a valid character boundary. - index = static_cast<int32_t>(max); - } - char_iterator.setIndex(index); - while (char_iterator.hasPrevious()) { - char_iterator.previous(); - if (!(u_isspace(char_iterator.current()) || - u_charType(char_iterator.current()) == U_CONTROL_CHAR || - u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) { - // Not a whitespace character. Advance the iterator so that we - // include the current character in the truncated string. - char_iterator.next(); - break; - } - } - if (char_iterator.hasPrevious()) { - // Found a valid break point. - index = char_iterator.getIndex(); - } else { - // String has leading whitespace, return the elide string. - return kElideString; - } - } - return string.substr(0, index) + kElideString; -} - -string16 ToLower(const string16& string) { - icu::UnicodeString lower_u_str( - icu::UnicodeString(string.c_str()).toLower(icu::Locale::getDefault())); - string16 result; - lower_u_str.extract(0, lower_u_str.length(), - WriteInto(&result, lower_u_str.length() + 1)); - return result; -} - -string16 ToUpper(const string16& string) { - icu::UnicodeString upper_u_str( - icu::UnicodeString(string.c_str()).toUpper(icu::Locale::getDefault())); - string16 result; - upper_u_str.extract(0, upper_u_str.length(), - WriteInto(&result, upper_u_str.length() + 1)); - return result; -} - -// Compares the character data stored in two different string16 strings by -// specified Collator instance. -UCollationResult CompareString16WithCollator(const icu::Collator* collator, - const string16& lhs, - const string16& rhs) { - DCHECK(collator); - UErrorCode error = U_ZERO_ERROR; - UCollationResult result = collator->compare( - static_cast<const UChar*>(lhs.c_str()), static_cast<int>(lhs.length()), - static_cast<const UChar*>(rhs.c_str()), static_cast<int>(rhs.length()), - error); - DCHECK(U_SUCCESS(error)); - return result; -} - -// Specialization of operator() method for string16 version. -template <> -bool StringComparator<string16>::operator()(const string16& lhs, - const string16& rhs) { - // If we can not get collator instance for specified locale, just do simple - // string compare. - if (!collator_) - return lhs < rhs; - return CompareString16WithCollator(collator_, lhs, rhs) == UCOL_LESS; -}; - -void SortStrings16(const std::string& locale, - std::vector<string16>* strings) { - SortVectorWithStringKey(locale, strings, false); -} - -const std::vector<std::string>& GetAvailableLocales() { - static std::vector<std::string> locales; - if (locales.empty()) { - int num_locales = uloc_countAvailable(); - for (int i = 0; i < num_locales; ++i) { - std::string locale_name = uloc_getAvailable(i); - // Filter out the names that have aliases. - if (IsDuplicateName(locale_name)) - continue; - // Filter out locales for which we have only partially populated data - // and to which Chrome is not localized. - if (IsLocalePartiallyPopulated(locale_name)) - continue; - if (!IsLocaleSupportedByOS(locale_name)) - continue; - // Normalize underscores to hyphens because that's what our locale files - // use. - std::replace(locale_name.begin(), locale_name.end(), '_', '-'); - - // Map the Chinese locale names over to zh-CN and zh-TW. - if (LowerCaseEqualsASCII(locale_name, "zh-hans")) { - locale_name = "zh-CN"; - } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) { - locale_name = "zh-TW"; - } - locales.push_back(locale_name); - } - - // Manually add 'es-419' to the list. See the comment in IsDuplicateName(). - locales.push_back("es-419"); - } - return locales; -} - -void GetAcceptLanguagesForLocale(const std::string& display_locale, - std::vector<std::string>* locale_codes) { - for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) { - if (!IsLocaleNameTranslated(kAcceptLanguageList[i], display_locale)) - // TODO(jungshik) : Put them at the of the list with language codes - // enclosed by brackets instead of skipping. - continue; - locale_codes->push_back(kAcceptLanguageList[i]); - } -} - -} // namespace l10n_util |