summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjungshik@google.com <jungshik@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-03-25 05:09:46 +0000
committerjungshik@google.com <jungshik@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2009-03-25 05:09:46 +0000
commit55211299fce265f04970c3643ed521887092f5a6 (patch)
tree216871e52d71a833175add65846c8e5419c7f9cf
parent07f95333a47323bfbd65c8443e0fcc470956cb27 (diff)
downloadchromium_src-55211299fce265f04970c3643ed521887092f5a6.zip
chromium_src-55211299fce265f04970c3643ed521887092f5a6.tar.gz
chromium_src-55211299fce265f04970c3643ed521887092f5a6.tar.bz2
Rewrite GetTextDirection() to call GetTextDirectionForLocale(locale_name), which
in turn calls uscript_getCode() to get the script code and compare it against Hebrew and Arabic scripts (as opposed to languages). TEST=L10n*.GetTextDirection Review URL: http://codereview.chromium.org/40125 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@12435 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/common/l10n_util.cc43
-rw-r--r--chrome/common/l10n_util.h9
-rw-r--r--chrome/common/l10n_util_unittest.cc32
3 files changed, 61 insertions, 23 deletions
diff --git a/chrome/common/l10n_util.cc b/chrome/common/l10n_util.cc
index c182c4e..de01f6b 100644
--- a/chrome/common/l10n_util.cc
+++ b/chrome/common/l10n_util.cc
@@ -19,6 +19,7 @@
#include "chrome/common/chrome_switches.h"
#include "chrome/common/gfx/chrome_canvas.h"
#include "chrome/common/resource_bundle.h"
+#include "unicode/uscript.h"
// TODO(playmobil): remove this undef once SkPostConfig.h is fixed.
// skia/include/corecg/SkPostConfig.h #defines strcasecmp() so we can't use
@@ -437,33 +438,35 @@ std::wstring ToLower(const std::wstring& string) {
}
#endif // defined(WCHAR_T_IS_UTF32)
-// Returns the text direction.
-// This function retrieves the language corresponding to the default ICU locale
-// (assuming that SetICUDefaultLocale is called) and determines the text
-// direction by comparing it with "ar" or "he".
-// Note that script is better than language here to get a wider coverage.
-// Unfortunately, getScript in ICU returns an empty string unless
-// the locale is created with an explicit script specified. For now,
-// it does not matter much because we only support Hebrew and Arabic.
-// (c.f. other languages written in RTL : Farsi, Urdu, Syriac, Azerbaijani
-// in Arabic, etc)
-// TODO(hbono): Need to find better identification methods than checking
-// if the language ID is Arabic or Hebrew. (http://b/issue?id=1054119)
-// Use an ICU API when added (see http://bugs.icu-project.org/trac/ticket/6228).
+// Returns the text direction for the default ICU locale. It is assumed
+// that SetICUDefaultLocale has been called to set the default locale to
+// the UI locale of Chrome.
TextDirection GetTextDirection() {
if (g_text_direction == UNKNOWN_DIRECTION) {
const Locale& locale = Locale::getDefault();
- const char* lang = locale.getLanguage();
- // Check only for Arabic and Hebrew languages now.
- if (strcmp(lang, "ar") == 0 || strcmp(lang, "he") == 0) {
- g_text_direction = RIGHT_TO_LEFT;
- } else {
- g_text_direction = LEFT_TO_RIGHT;
- }
+ g_text_direction = GetTextDirectionForLocale(locale.getName());
}
return g_text_direction;
}
+TextDirection GetTextDirectionForLocale(const char* locale_name) {
+ UScriptCode scripts[10]; // 10 scripts should be enough for any locale.
+ UErrorCode error = U_ZERO_ERROR;
+ int n = uscript_getCode(locale_name, scripts, 10, &error);
+ DCHECK(U_SUCCESS(error) && n > 0);
+
+ // Checking Arabic and Hebrew scripts cover Arabic, Hebrew, Farsi,
+ // Urdu and Azerbaijani written in Arabic. Syriac script
+ // (another RTL) is not a living script and we didn't yet localize
+ // to locales using other living RTL scripts such as Thaana and N'ko.
+ // TODO(jungshik): Use a new ICU API, uloc_getCharacterOrientation to avoid
+ // 'hardcoded-comparision' with Arabic and Hebrew scripts once we
+ // upgrade ICU to 4.0 or later or port it to our copy of ICU.
+ if (scripts[0] == USCRIPT_ARABIC || scripts[0] == USCRIPT_HEBREW)
+ return RIGHT_TO_LEFT;
+ return LEFT_TO_RIGHT;
+}
+
TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
#if defined(WCHAR_T_IS_UTF32)
string16 text_utf16 = WideToUTF16(text);
diff --git a/chrome/common/l10n_util.h b/chrome/common/l10n_util.h
index 97c3171..e836a40 100644
--- a/chrome/common/l10n_util.h
+++ b/chrome/common/l10n_util.h
@@ -107,14 +107,17 @@ enum TextDirection {
LEFT_TO_RIGHT,
};
-// Returns the locale-specific text direction.
-// This function retrieves the application locale and determines the text
-// direction. Its possible results are listed below:
+// Returns the text direction for the default ICU locale. It is assumed
+// that SetICUDefaultLocale has been called to set the default locale to
+// the UI locale of Chrome. Its return is one of the following three:
// * LEFT_TO_RIGHT: Left-To-Right (e.g. English, Chinese, etc.);
// * RIGHT_TO_LEFT: Right-To-Left (e.g. Arabic, Hebrew, etc.), and;
// * UNKNOWN_DIRECTION: unknown (or error).
TextDirection GetTextDirection();
+// Returns the text direction for |locale_name|.
+TextDirection GetTextDirectionForLocale(const char* locale_name);
+
// Given the string in |text|, returns the directionality of the first
// character with strong directionality in the string. If no character in the
// text has strong directionality, LEFT_TO_RIGHT is returned. The Bidi
diff --git a/chrome/common/l10n_util_unittest.cc b/chrome/common/l10n_util_unittest.cc
index 5464108..eb0b028 100644
--- a/chrome/common/l10n_util_unittest.cc
+++ b/chrome/common/l10n_util_unittest.cc
@@ -31,6 +31,10 @@ class StringWrapper {
DISALLOW_COPY_AND_ASSIGN(StringWrapper);
};
+l10n_util::TextDirection GetTextDirection(const char* locale_name) {
+ return l10n_util::GetTextDirectionForLocale(locale_name);
+}
+
} // namespace
class L10nUtilTest : public PlatformTest {
@@ -376,3 +380,31 @@ TEST_F(L10nUtilTest, WrapPathWithLTRFormatting) {
EXPECT_EQ(wrapped_path, test_data[i].wrapped_path);
}
}
+
+TEST_F(L10nUtilTest, GetTextDirection) {
+ EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("ar"));
+ EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("ar_EG"));
+ EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("he"));
+ EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("he_IL"));
+ // iw is an obsolete code for Hebrew.
+ EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("iw"));
+#if 0
+ // Enable these when we localize to Farsi, Urdu, Azerbaijani
+ // written in Arabic and Dhivehi. At the moment, our copy of
+ // ICU data does not have entry for them.
+ EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("fa"));
+ EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("ur"));
+ EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("az_Arab"));
+ // Dhivehi that uses Thaana script.
+ EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("dv"));
+#endif
+ EXPECT_EQ(l10n_util::LEFT_TO_RIGHT, GetTextDirection("en"));
+ // Chinese in China with '-'.
+ EXPECT_EQ(l10n_util::LEFT_TO_RIGHT, GetTextDirection("zh-CN"));
+ // Filipino : 3-letter code
+ EXPECT_EQ(l10n_util::LEFT_TO_RIGHT, GetTextDirection("fil"));
+ // Russian
+ EXPECT_EQ(l10n_util::LEFT_TO_RIGHT, GetTextDirection("ru"));
+ // Japanese that uses multiple scripts
+ EXPECT_EQ(l10n_util::LEFT_TO_RIGHT, GetTextDirection("ja"));
+}