diff options
Diffstat (limited to 'ui/base/l10n/l10n_util.cc')
-rw-r--r-- | ui/base/l10n/l10n_util.cc | 876 |
1 files changed, 876 insertions, 0 deletions
diff --git a/ui/base/l10n/l10n_util.cc b/ui/base/l10n/l10n_util.cc new file mode 100644 index 0000000..26e12c85 --- /dev/null +++ b/ui/base/l10n/l10n_util.cc @@ -0,0 +1,876 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "ui/base/l10n/l10n_util.h" + +#if defined(TOOLKIT_USES_GTK) +#include <glib/gutils.h> +#endif + +#include <algorithm> +#include <cstdlib> +#include <iterator> + +#include "base/command_line.h" +#include "base/file_util.h" +#include "base/i18n/file_util_icu.h" +#include "base/i18n/rtl.h" +#include "base/path_service.h" +#include "base/scoped_ptr.h" +#include "base/string16.h" +#include "base/string_number_conversions.h" +#include "base/string_split.h" +#include "base/sys_string_conversions.h" +#include "base/utf_string_conversions.h" +#include "build/build_config.h" +#include "gfx/canvas.h" +#include "ui/base/l10n/l10n_util_collator.h" +#include "ui/base/resource/resource_bundle.h" +#include "ui/base/ui_base_paths.h" +#include "unicode/rbbi.h" +#include "unicode/uloc.h" + +#if defined(OS_MACOSX) +#include "ui/base/l10n/l10n_util_mac.h" +#elif defined(OS_WIN) +#include "ui/base/l10n/l10n_util_win.h" +#endif + +namespace { + +#if defined(OS_WIN) +static const FilePath::CharType kLocaleFileExtension[] = L".dll"; +#elif defined(OS_POSIX) +static const FilePath::CharType kLocaleFileExtension[] = ".pak"; +#endif + +static const char* const kAcceptLanguageList[] = { + "af", // Afrikaans + "am", // Amharic + "ar", // Arabic + "az", // Azerbaijani + "be", // Belarusian + "bg", // Bulgarian + "bh", // Bihari + "bn", // Bengali + "br", // Breton + "bs", // Bosnian + "ca", // Catalan + "co", // Corsican + "cs", // Czech + "cy", // Welsh + "da", // Danish + "de", // German + "de-AT", // German (Austria) + "de-CH", // German (Switzerland) + "de-DE", // German (Germany) + "el", // Greek + "en", // English + "en-AU", // English (Austrailia) + "en-CA", // English (Canada) + "en-GB", // English (UK) + "en-NZ", // English (New Zealand) + "en-US", // English (US) + "en-ZA", // English (South Africa) + "eo", // Esperanto + // TODO(jungshik) : Do we want to list all es-Foo for Latin-American + // Spanish speaking countries? + "es", // Spanish + "et", // Estonian + "eu", // Basque + "fa", // Persian + "fi", // Finnish + "fil", // Filipino + "fo", // Faroese + "fr", // French + "fr-CA", // French (Canada) + "fr-CH", // French (Switzerland) + "fr-FR", // French (France) + "fy", // Frisian + "ga", // Irish + "gd", // Scots Gaelic + "gl", // Galician + "gn", // Guarani + "gu", // Gujarati + "ha", // Hausa + "haw", // Hawaiian + "he", // Hebrew + "hi", // Hindi + "hr", // Croatian + "hu", // Hungarian + "hy", // Armenian + "ia", // Interlingua + "id", // Indonesian + "is", // Icelandic + "it", // Italian + "it-CH", // Italian (Switzerland) + "it-IT", // Italian (Italy) + "ja", // Japanese + "jw", // Javanese + "ka", // Georgian + "kk", // Kazakh + "km", // Cambodian + "kn", // Kannada + "ko", // Korean + "ku", // Kurdish + "ky", // Kyrgyz + "la", // Latin + "ln", // Lingala + "lo", // Laothian + "lt", // Lithuanian + "lv", // Latvian + "mk", // Macedonian + "ml", // Malayalam + "mn", // Mongolian + "mo", // Moldavian + "mr", // Marathi + "ms", // Malay + "mt", // Maltese + "nb", // Norwegian (Bokmal) + "ne", // Nepali + "nl", // Dutch + "nn", // Norwegian (Nynorsk) + "no", // Norwegian + "oc", // Occitan + "om", // Oromo + "or", // Oriya + "pa", // Punjabi + "pl", // Polish + "ps", // Pashto + "pt", // Portuguese + "pt-BR", // Portuguese (Brazil) + "pt-PT", // Portuguese (Portugal) + "qu", // Quechua + "rm", // Romansh + "ro", // Romanian + "ru", // Russian + "sd", // Sindhi + "sh", // Serbo-Croatian + "si", // Sinhalese + "sk", // Slovak + "sl", // Slovenian + "sn", // Shona + "so", // Somali + "sq", // Albanian + "sr", // Serbian + "st", // Sesotho + "su", // Sundanese + "sv", // Swedish + "sw", // Swahili + "ta", // Tamil + "te", // Telugu + "tg", // Tajik + "th", // Thai + "ti", // Tigrinya + "tk", // Turkmen + "to", // Tonga + "tr", // Turkish + "tt", // Tatar + "tw", // Twi + "ug", // Uighur + "uk", // Ukrainian + "ur", // Urdu + "uz", // Uzbek + "vi", // Vietnamese + "xh", // Xhosa + "yi", // Yiddish + "yo", // Yoruba + "zh", // Chinese + "zh-CN", // Chinese (Simplified) + "zh-TW", // Chinese (Traditional) + "zu", // Zulu +}; + +// Returns true if |locale_name| has an alias in the ICU data file. +bool IsDuplicateName(const std::string& locale_name) { + static const char* const kDuplicateNames[] = { + "en", + "pt", + "zh", + "zh_hans_cn", + "zh_hant_tw" + }; + + // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain). + // 'es-419' (Spanish in Latin America) is not available in ICU so that it + // has to be added manually in GetAvailableLocales(). + if (LowerCaseEqualsASCII(locale_name.substr(0, 3), "es_")) + return true; + for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) { + if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0) + return true; + } + return false; +} + +bool IsLocaleNameTranslated(const char* locale, + const std::string& display_locale) { + string16 display_name = + l10n_util::GetDisplayNameForLocale(locale, display_locale, false); + // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not + // uloc_getDisplayName returns the actual translation or the default + // value (locale code), we have to rely on this hack to tell whether + // the translation is available or not. If ICU doesn't have a translated + // name for this locale, GetDisplayNameForLocale will just return the + // locale code. + return !IsStringASCII(display_name) || UTF16ToASCII(display_name) != locale; +} + +// We added 30+ minimally populated locales with only a few entries +// (exemplar character set, script, writing direction and its own +// lanaguage name). These locales have to be distinguished from the +// fully populated locales to which Chrome is localized. +bool IsLocalePartiallyPopulated(const std::string& locale_name) { + // For partially populated locales, even the translation for "English" + // is not available. A more robust/elegant way to check is to add a special + // field (say, 'isPartial' to our version of ICU locale files) and + // check its value, but this hack seems to work well. + return !IsLocaleNameTranslated("en", locale_name); +} + +#if !defined(OS_MACOSX) +bool IsLocaleAvailable(const std::string& locale, + const FilePath& locale_path) { + // If locale has any illegal characters in it, we don't want to try to + // load it because it may be pointing outside the locale data file directory. + if (!file_util::IsFilenameLegal(ASCIIToUTF16(locale))) + return false; + + // IsLocalePartiallyPopulated() can be called here for an early return w/o + // checking the resource availability below. It'd help when Chrome is run + // under a system locale Chrome is not localized to (e.g.Farsi on Linux), + // but it'd slow down the start up time a little bit for locales Chrome is + // localized to. So, we don't call it here. + if (!l10n_util::IsLocaleSupportedByOS(locale)) + return false; + + FilePath test_path = locale_path; + test_path = + test_path.AppendASCII(locale).ReplaceExtension(kLocaleFileExtension); + return file_util::PathExists(test_path); +} + +bool CheckAndResolveLocale(const std::string& locale, + const FilePath& locale_path, + std::string* resolved_locale) { + if (IsLocaleAvailable(locale, locale_path)) { + *resolved_locale = locale; + return true; + } + // If the locale matches language but not country, use that instead. + // TODO(jungshik) : Nothing is done about languages that Chrome + // does not support but available on Windows. We fall + // back to en-US in GetApplicationLocale so that it's a not critical, + // but we can do better. + std::string::size_type hyphen_pos = locale.find('-'); + if (hyphen_pos != std::string::npos && hyphen_pos > 0) { + std::string lang(locale, 0, hyphen_pos); + std::string region(locale, hyphen_pos + 1); + std::string tmp_locale(lang); + // Map es-RR other than es-ES to es-419 (Chrome's Latin American + // Spanish locale). + if (LowerCaseEqualsASCII(lang, "es") && !LowerCaseEqualsASCII(region, "es")) + tmp_locale.append("-419"); + else if (LowerCaseEqualsASCII(lang, "zh")) { + // Map zh-HK and zh-MK to zh-TW. Otherwise, zh-FOO is mapped to zh-CN. + if (LowerCaseEqualsASCII(region, "hk") || + LowerCaseEqualsASCII(region, "mk")) { + tmp_locale.append("-TW"); + } else { + tmp_locale.append("-CN"); + } + } + if (IsLocaleAvailable(tmp_locale, locale_path)) { + resolved_locale->swap(tmp_locale); + return true; + } + } + + // Google updater uses no, iw and en for our nb, he, and en-US. + // We need to map them to our codes. + struct { + const char* source; + const char* dest; + } alias_map[] = { + {"no", "nb"}, + {"tl", "fil"}, + {"iw", "he"}, + {"en", "en-US"}, + }; + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) { + if (LowerCaseEqualsASCII(locale, alias_map[i].source)) { + std::string tmp_locale(alias_map[i].dest); + if (IsLocaleAvailable(tmp_locale, locale_path)) { + resolved_locale->swap(tmp_locale); + return true; + } + } + } + + return false; +} +#endif + +// On Linux, the text layout engine Pango determines paragraph directionality +// by looking at the first strongly-directional character in the text. This +// means text such as "Google Chrome foo bar..." will be layed out LTR even +// if "foo bar" is RTL. So this function prepends the necessary RLM in such +// cases. +void AdjustParagraphDirectionality(string16* paragraph) { +#if defined(OS_LINUX) + if (base::i18n::IsRTL() && + base::i18n::StringContainsStrongRTLChars(*paragraph)) { + paragraph->insert(0, 1, static_cast<char16>(base::i18n::kRightToLeftMark)); + } +#endif +} + +#if defined(OS_WIN) +std::string GetCanonicalLocale(const std::string& locale) { + return base::i18n::GetCanonicalLocale(locale.c_str()); +} +#endif + +} // namespace + +namespace l10n_util { + +std::string GetApplicationLocale(const std::string& pref_locale) { +#if defined(OS_MACOSX) + + // Use any override (Cocoa for the browser), otherwise use the preference + // passed to the function. + std::string app_locale = l10n_util::GetLocaleOverride(); + if (app_locale.empty()) + app_locale = pref_locale; + + // The above should handle all of the cases Chrome normally hits, but for some + // unit tests, we need something to fall back too. + if (app_locale.empty()) + app_locale = "en-US"; + + // Windows/Linux call SetICUDefaultLocale after determining the actual locale + // with CheckAndResolveLocal to make ICU APIs work in that locale. + // Mac doesn't use a locale directory tree of resources (it uses Mac style + // resources), so mirror the Windows/Linux behavior of calling + // SetICUDefaultLocale. + base::i18n::SetICUDefaultLocale(app_locale); + return app_locale; + +#else + + FilePath locale_path; + PathService::Get(ui::DIR_LOCALES, &locale_path); + std::string resolved_locale; + std::vector<std::string> candidates; + + // We only use --lang and the app pref on Windows. On Linux, we only + // look at the LC_*/LANG environment variables. We do, however, pass --lang + // to renderer and plugin processes so they know what language the parent + // process decided to use. + +#if defined(OS_WIN) + + // First, try the preference value. + if (!pref_locale.empty()) + candidates.push_back(pref_locale); + + // Next, try the overridden locale. + const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides(); + if (!languages.empty()) { + candidates.reserve(candidates.size() + languages.size()); + std::transform(languages.begin(), languages.end(), + std::back_inserter(candidates), &GetCanonicalLocale); + } else { + // If no override was set, defer to ICU + candidates.push_back(base::i18n::GetConfiguredLocale()); + } + +#elif defined(OS_CHROMEOS) + + // On ChromeOS, use the application locale preference. + if (!pref_locale.empty()) + candidates.push_back(pref_locale); + +#elif defined(OS_POSIX) && defined(TOOLKIT_USES_GTK) + + // GLib implements correct environment variable parsing with + // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG. + // We used to use our custom parsing code along with ICU for this purpose. + // If we have a port that does not depend on GTK, we have to + // restore our custom code for that port. + const char* const* languages = g_get_language_names(); + DCHECK(languages); // A valid pointer is guaranteed. + DCHECK(*languages); // At least one entry, "C", is guaranteed. + + for (; *languages != NULL; ++languages) { + candidates.push_back(base::i18n::GetCanonicalLocale(*languages)); + } + +#else +#error Unsupported platform, see build/build_config.h +#endif + + std::vector<std::string>::const_iterator i = candidates.begin(); + for (; i != candidates.end(); ++i) { + if (CheckAndResolveLocale(*i, locale_path, &resolved_locale)) { + base::i18n::SetICUDefaultLocale(resolved_locale); + return resolved_locale; + } + } + + // Fallback on en-US. + const std::string fallback_locale("en-US"); + if (IsLocaleAvailable(fallback_locale, locale_path)) { + base::i18n::SetICUDefaultLocale(fallback_locale); + return fallback_locale; + } + + // No locale data file was found; we shouldn't get here. + NOTREACHED(); + + return std::string(); + +#endif +} + +string16 GetDisplayNameForLocale(const std::string& locale, + const std::string& display_locale, + bool is_for_ui) { + std::string locale_code = locale; + // Internally, we use the language code of zh-CN and zh-TW, but we want the + // display names to be Chinese (Simplified) and Chinese (Traditional) instead + // of Chinese (China) and Chinese (Taiwan). To do that, we pass zh-Hans + // and zh-Hant to ICU. Even with this mapping, we'd get + // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and + // even longer results in other languages. Arguably, they're better than + // the current results : Chinese (China) / Chinese (Taiwan). + // TODO(jungshik): Do one of the following: + // 1. Special-case Chinese by getting the custom-translation for them + // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE. + // 3. Get translations for two directly from the ICU resouce bundle + // because they're not accessible with other any API. + // 4. Patch ICU to special-case zh-Hans/zh-Hant for us. + // #1 and #2 wouldn't work if display_locale != current UI locale although + // we can think of additional hack to work around the problem. + // #3 can be potentially expensive. + if (locale_code == "zh-CN") + locale_code = "zh-Hans"; + else if (locale_code == "zh-TW") + locale_code = "zh-Hant"; + + UErrorCode error = U_ZERO_ERROR; + const int buffer_size = 1024; + + string16 display_name; + int actual_size = uloc_getDisplayName(locale_code.c_str(), + display_locale.c_str(), + WriteInto(&display_name, buffer_size + 1), buffer_size, &error); + DCHECK(U_SUCCESS(error)); + display_name.resize(actual_size); + // Add an RTL mark so parentheses are properly placed. + if (is_for_ui && base::i18n::IsRTL()) + display_name.push_back(static_cast<char16>(base::i18n::kRightToLeftMark)); + return display_name; +} + +std::string NormalizeLocale(const std::string& locale) { + std::string normalized_locale(locale); + std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_'); + + return normalized_locale; +} + +void GetParentLocales(const std::string& current_locale, + std::vector<std::string>* parent_locales) { + std::string locale(NormalizeLocale(current_locale)); + + const int kNameCapacity = 256; + char parent[kNameCapacity]; + base::strlcpy(parent, locale.c_str(), kNameCapacity); + parent_locales->push_back(parent); + UErrorCode err = U_ZERO_ERROR; + while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) { + if (U_FAILURE(err)) + break; + parent_locales->push_back(parent); + } +} + +bool IsValidLocaleSyntax(const std::string& locale) { + // Check that the length is plausible. + if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY) + return false; + + // Strip off the part after an '@' sign, which might contain keywords, + // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil. + // We don't validate that part much, just check that there's at least one + // equals sign in a plausible place. Normalize the prefix so that hyphens + // are changed to underscores. + std::string prefix = NormalizeLocale(locale); + size_t split_point = locale.find("@"); + if (split_point != std::string::npos) { + std::string keywords = locale.substr(split_point + 1); + prefix = locale.substr(0, split_point); + + size_t equals_loc = keywords.find("="); + if (equals_loc == std::string::npos || + equals_loc < 1 || equals_loc > keywords.size() - 2) + return false; + } + + // Check that all characters before the at-sign are alphanumeric or + // underscore. + for (size_t i = 0; i < prefix.size(); i++) { + char ch = prefix[i]; + if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_') + return false; + } + + // Check that the initial token (before the first hyphen/underscore) + // is 1 - 3 alphabetical characters (a language tag). + for (size_t i = 0; i < prefix.size(); i++) { + char ch = prefix[i]; + if (ch == '_') { + if (i < 1 || i > 3) + return false; + break; + } + if (!IsAsciiAlpha(ch)) + return false; + } + + // Check that the all tokens after the initial token are 1 - 8 characters. + // (Tokenize/StringTokenizer don't work here, they collapse multiple + // delimiters into one.) + int token_len = 0; + int token_index = 0; + for (size_t i = 0; i < prefix.size(); i++) { + if (prefix[i] != '_') { + token_len++; + continue; + } + + if (token_index > 0 && (token_len < 1 || token_len > 8)) { + return false; + } + token_index++; + token_len = 0; + } + if (token_index == 0 && (token_len < 1 || token_len > 3)) { + return false; + } else if (token_len < 1 || token_len > 8) { + return false; + } + + return true; +} + +std::string GetStringUTF8(int message_id) { + return UTF16ToUTF8(GetStringUTF16(message_id)); +} + +string16 GetStringUTF16(int message_id) { + ResourceBundle& rb = ResourceBundle::GetSharedInstance(); + string16 str = rb.GetLocalizedString(message_id); + AdjustParagraphDirectionality(&str); + + return str; +} + +static string16 GetStringF(int message_id, + const std::vector<string16>& replacements, + std::vector<size_t>* offsets) { + // TODO(tc): We could save a string copy if we got the raw string as + // a StringPiece and were able to call ReplaceStringPlaceholders with + // a StringPiece format string and string16 substitution strings. In + // practice, the strings should be relatively short. + ResourceBundle& rb = ResourceBundle::GetSharedInstance(); + const string16& format_string = rb.GetLocalizedString(message_id); + +#ifndef NDEBUG + // Make sure every replacement string is being used, so we don't just + // silently fail to insert one. If |offsets| is non-NULL, then don't do this + // check as the code may simply want to find the placeholders rather than + // actually replacing them. + if (!offsets) { + std::string utf8_string = UTF16ToUTF8(format_string); + + // $9 is the highest allowed placeholder. + for (size_t i = 0; i < 9; ++i) { + bool placeholder_should_exist = replacements.size() > i; + + std::string placeholder = StringPrintf("$%d", static_cast<int>(i + 1)); + size_t pos = utf8_string.find(placeholder.c_str()); + if (placeholder_should_exist) { + DCHECK_NE(std::string::npos, pos) << + " Didn't find a " << placeholder << " placeholder in " << + utf8_string; + } else { + DCHECK_EQ(std::string::npos, pos) << + " Unexpectedly found a " << placeholder << " placeholder in " << + utf8_string; + } + } + } +#endif + + string16 formatted = ReplaceStringPlaceholders(format_string, replacements, + offsets); + AdjustParagraphDirectionality(&formatted); + + return formatted; +} + +std::string GetStringFUTF8(int message_id, + const string16& a) { + return UTF16ToUTF8(GetStringFUTF16(message_id, a)); +} + +std::string GetStringFUTF8(int message_id, + const string16& a, + const string16& b) { + return UTF16ToUTF8(GetStringFUTF16(message_id, a, b)); +} + +std::string GetStringFUTF8(int message_id, + const string16& a, + const string16& b, + const string16& c) { + return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c)); +} + +std::string GetStringFUTF8(int message_id, + const string16& a, + const string16& b, + const string16& c, + const string16& d) { + return UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d)); +} + +string16 GetStringFUTF16(int message_id, + const string16& a) { + std::vector<string16> replacements; + replacements.push_back(a); + return GetStringF(message_id, replacements, NULL); +} + +string16 GetStringFUTF16(int message_id, + const string16& a, + const string16& b) { + return GetStringFUTF16(message_id, a, b, NULL); +} + +string16 GetStringFUTF16(int message_id, + const string16& a, + const string16& b, + const string16& c) { + std::vector<string16> replacements; + replacements.push_back(a); + replacements.push_back(b); + replacements.push_back(c); + return GetStringF(message_id, replacements, NULL); +} + +string16 GetStringFUTF16(int message_id, + const string16& a, + const string16& b, + const string16& c, + const string16& d) { + std::vector<string16> replacements; + replacements.push_back(a); + replacements.push_back(b); + replacements.push_back(c); + replacements.push_back(d); + return GetStringF(message_id, replacements, NULL); +} + +string16 GetStringFUTF16(int message_id, const string16& a, size_t* offset) { + DCHECK(offset); + std::vector<size_t> offsets; + std::vector<string16> replacements; + replacements.push_back(a); + string16 result = GetStringF(message_id, replacements, &offsets); + DCHECK(offsets.size() == 1); + *offset = offsets[0]; + return result; +} + +string16 GetStringFUTF16(int message_id, + const string16& a, + const string16& b, + std::vector<size_t>* offsets) { + std::vector<string16> replacements; + replacements.push_back(a); + replacements.push_back(b); + return GetStringF(message_id, replacements, offsets); +} + +string16 GetStringFUTF16Int(int message_id, int a) { + return GetStringFUTF16(message_id, UTF8ToUTF16(base::IntToString(a))); +} + +string16 GetStringFUTF16Int(int message_id, int64 a) { + return GetStringFUTF16(message_id, UTF8ToUTF16(base::Int64ToString(a))); +} + +string16 TruncateString(const string16& string, size_t length) { + if (string.size() <= length) + // String fits, return it. + return string; + + if (length == 0) { + // No room for the elide string, return an empty string. + return string16(); + } + size_t max = length - 1; + + // Added to the end of strings that are too big. + static const char16 kElideString[] = { 0x2026, 0 }; + + if (max == 0) { + // Just enough room for the elide string. + return kElideString; + } + + // Use a line iterator to find the first boundary. + UErrorCode status = U_ZERO_ERROR; + scoped_ptr<icu::RuleBasedBreakIterator> bi( + static_cast<icu::RuleBasedBreakIterator*>( + icu::RuleBasedBreakIterator::createLineInstance( + icu::Locale::getDefault(), status))); + if (U_FAILURE(status)) + return string.substr(0, max) + kElideString; + bi->setText(string.c_str()); + int32_t index = bi->preceding(static_cast<int32_t>(max)); + if (index == icu::BreakIterator::DONE) { + index = static_cast<int32_t>(max); + } else { + // Found a valid break (may be the beginning of the string). Now use + // a character iterator to find the previous non-whitespace character. + icu::StringCharacterIterator char_iterator(string.c_str()); + if (index == 0) { + // No valid line breaks. Start at the end again. This ensures we break + // on a valid character boundary. + index = static_cast<int32_t>(max); + } + char_iterator.setIndex(index); + while (char_iterator.hasPrevious()) { + char_iterator.previous(); + if (!(u_isspace(char_iterator.current()) || + u_charType(char_iterator.current()) == U_CONTROL_CHAR || + u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) { + // Not a whitespace character. Advance the iterator so that we + // include the current character in the truncated string. + char_iterator.next(); + break; + } + } + if (char_iterator.hasPrevious()) { + // Found a valid break point. + index = char_iterator.getIndex(); + } else { + // String has leading whitespace, return the elide string. + return kElideString; + } + } + return string.substr(0, index) + kElideString; +} + +string16 ToLower(const string16& string) { + icu::UnicodeString lower_u_str( + icu::UnicodeString(string.c_str()).toLower(icu::Locale::getDefault())); + string16 result; + lower_u_str.extract(0, lower_u_str.length(), + WriteInto(&result, lower_u_str.length() + 1)); + return result; +} + +string16 ToUpper(const string16& string) { + icu::UnicodeString upper_u_str( + icu::UnicodeString(string.c_str()).toUpper(icu::Locale::getDefault())); + string16 result; + upper_u_str.extract(0, upper_u_str.length(), + WriteInto(&result, upper_u_str.length() + 1)); + return result; +} + +// Compares the character data stored in two different string16 strings by +// specified Collator instance. +UCollationResult CompareString16WithCollator(const icu::Collator* collator, + const string16& lhs, + const string16& rhs) { + DCHECK(collator); + UErrorCode error = U_ZERO_ERROR; + UCollationResult result = collator->compare( + static_cast<const UChar*>(lhs.c_str()), static_cast<int>(lhs.length()), + static_cast<const UChar*>(rhs.c_str()), static_cast<int>(rhs.length()), + error); + DCHECK(U_SUCCESS(error)); + return result; +} + +// Specialization of operator() method for string16 version. +template <> +bool StringComparator<string16>::operator()(const string16& lhs, + const string16& rhs) { + // If we can not get collator instance for specified locale, just do simple + // string compare. + if (!collator_) + return lhs < rhs; + return CompareString16WithCollator(collator_, lhs, rhs) == UCOL_LESS; +}; + +void SortStrings16(const std::string& locale, + std::vector<string16>* strings) { + SortVectorWithStringKey(locale, strings, false); +} + +const std::vector<std::string>& GetAvailableLocales() { + static std::vector<std::string> locales; + if (locales.empty()) { + int num_locales = uloc_countAvailable(); + for (int i = 0; i < num_locales; ++i) { + std::string locale_name = uloc_getAvailable(i); + // Filter out the names that have aliases. + if (IsDuplicateName(locale_name)) + continue; + // Filter out locales for which we have only partially populated data + // and to which Chrome is not localized. + if (IsLocalePartiallyPopulated(locale_name)) + continue; + if (!IsLocaleSupportedByOS(locale_name)) + continue; + // Normalize underscores to hyphens because that's what our locale files + // use. + std::replace(locale_name.begin(), locale_name.end(), '_', '-'); + + // Map the Chinese locale names over to zh-CN and zh-TW. + if (LowerCaseEqualsASCII(locale_name, "zh-hans")) { + locale_name = "zh-CN"; + } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) { + locale_name = "zh-TW"; + } + locales.push_back(locale_name); + } + + // Manually add 'es-419' to the list. See the comment in IsDuplicateName(). + locales.push_back("es-419"); + } + return locales; +} + +void GetAcceptLanguagesForLocale(const std::string& display_locale, + std::vector<std::string>* locale_codes) { + for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) { + if (!IsLocaleNameTranslated(kAcceptLanguageList[i], display_locale)) + // TODO(jungshik) : Put them at the of the list with language codes + // enclosed by brackets instead of skipping. + continue; + locale_codes->push_back(kAcceptLanguageList[i]); + } +} + +} // namespace l10n_util |