Rewrite GetTextDirection() to call GetTextDirectionForLocale(locale_name), which

in turn calls uscript_getCode() to get the script code and compare it against Hebrew and Arabic scripts (as opposed to languages). TEST=L10n*.GetTextDirection Review URL: http://codereview.chromium.org/40125 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@12435 0039d316-1c4b-4281-b951-d872f2087c98
author: jungshik@google.com <jungshik@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2009-03-25 05:09:46 +0000
committer: jungshik@google.com <jungshik@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2009-03-25 05:09:46 +0000
commit: 55211299fce265f04970c3643ed521887092f5a6 (patch)
tree: 216871e52d71a833175add65846c8e5419c7f9cf
parent: 07f95333a47323bfbd65c8443e0fcc470956cb27 (diff)
download: chromium_src-55211299fce265f04970c3643ed521887092f5a6.zip
chromium_src-55211299fce265f04970c3643ed521887092f5a6.tar.gz
chromium_src-55211299fce265f04970c3643ed521887092f5a6.tar.bz2
3 files changed, 61 insertions, 23 deletions
diff --git a/chrome/common/l10n_util.cc b/chrome/common/l10n_util.cc
index c182c4e..de01f6b 100644
--- a/chrome/common/l10n_util.cc
+++ b/chrome/common/l10n_util.cc
@@ -19,6 +19,7 @@
 #include "chrome/common/chrome_switches.h"
 #include "chrome/common/gfx/chrome_canvas.h"
 #include "chrome/common/resource_bundle.h"
+#include "unicode/uscript.h"
 
 // TODO(playmobil): remove this undef once SkPostConfig.h is fixed.
 // skia/include/corecg/SkPostConfig.h #defines strcasecmp() so we can't use
@@ -437,33 +438,35 @@ std::wstring ToLower(const std::wstring& string) {
 }
 #endif  // defined(WCHAR_T_IS_UTF32)
 
-// Returns the text direction.
-// This function retrieves the language corresponding to the default ICU locale
-// (assuming that SetICUDefaultLocale is called) and determines the text
-// direction by comparing it with "ar" or "he".
-// Note that script is better than language here to get a wider coverage.
-// Unfortunately, getScript in ICU returns an empty string unless
-// the locale is created with an explicit script specified. For now,
-// it does not matter much because we only support Hebrew and Arabic.
-// (c.f. other languages written in RTL : Farsi, Urdu, Syriac, Azerbaijani
-//  in Arabic, etc)
-// TODO(hbono): Need to find better identification methods than checking
-// if the language ID is Arabic or Hebrew. (http://b/issue?id=1054119)
-// Use an ICU API when added (see http://bugs.icu-project.org/trac/ticket/6228).
+// Returns the text direction for the default ICU locale. It is assumed
+// that SetICUDefaultLocale has been called to set the default locale to
+// the UI locale of Chrome.
 TextDirection GetTextDirection() {
   if (g_text_direction == UNKNOWN_DIRECTION) {
     const Locale& locale = Locale::getDefault();
-    const char* lang = locale.getLanguage();
-    // Check only for Arabic and Hebrew languages now.
-    if (strcmp(lang, "ar") == 0 || strcmp(lang, "he") == 0) {
-      g_text_direction = RIGHT_TO_LEFT;
-    } else {
-      g_text_direction = LEFT_TO_RIGHT;
-    }
+    g_text_direction = GetTextDirectionForLocale(locale.getName());
   }
   return g_text_direction;
 }
 
+TextDirection GetTextDirectionForLocale(const char* locale_name) {
+  UScriptCode scripts[10]; // 10 scripts should be enough for any locale.
+  UErrorCode error = U_ZERO_ERROR;
+  int n = uscript_getCode(locale_name, scripts, 10, &error);
+  DCHECK(U_SUCCESS(error) && n > 0);
+
+  // Checking Arabic and Hebrew scripts cover Arabic, Hebrew, Farsi,
+  // Urdu and Azerbaijani written in Arabic. Syriac script
+  // (another RTL) is not a living script and we didn't yet localize
+  // to locales using other living RTL scripts such as Thaana and N'ko.
+  // TODO(jungshik): Use a new ICU API, uloc_getCharacterOrientation to avoid
+  // 'hardcoded-comparision' with Arabic and Hebrew scripts once we
+  // upgrade ICU to 4.0 or later or port it to our copy of ICU.
+  if (scripts[0] == USCRIPT_ARABIC || scripts[0] == USCRIPT_HEBREW)
+    return RIGHT_TO_LEFT;
+  return LEFT_TO_RIGHT;
+}
+
 TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
 #if defined(WCHAR_T_IS_UTF32)
   string16 text_utf16 = WideToUTF16(text);
diff --git a/chrome/common/l10n_util.h b/chrome/common/l10n_util.h
index 97c3171..e836a40 100644
--- a/chrome/common/l10n_util.h
+++ b/chrome/common/l10n_util.h
@@ -107,14 +107,17 @@ enum TextDirection {
   LEFT_TO_RIGHT,
 };
 
-// Returns the locale-specific text direction.
-// This function retrieves the application locale and determines the text
-// direction. Its possible results are listed below:
+// Returns the text direction for the default ICU locale. It is assumed
+// that SetICUDefaultLocale has been called to set the default locale to
+// the UI locale of Chrome. Its return is one of the following three:
 //  * LEFT_TO_RIGHT: Left-To-Right (e.g. English, Chinese, etc.);
 //  * RIGHT_TO_LEFT: Right-To-Left (e.g. Arabic, Hebrew, etc.), and;
 //  * UNKNOWN_DIRECTION: unknown (or error).
 TextDirection GetTextDirection();
 
+// Returns the text direction for |locale_name|.
+TextDirection GetTextDirectionForLocale(const char* locale_name);
+
 // Given the string in |text|, returns the directionality of the first
 // character with strong directionality in the string. If no character in the
 // text has strong directionality, LEFT_TO_RIGHT is returned. The Bidi
diff --git a/chrome/common/l10n_util_unittest.cc b/chrome/common/l10n_util_unittest.cc
index 5464108..eb0b028 100644
--- a/chrome/common/l10n_util_unittest.cc
+++ b/chrome/common/l10n_util_unittest.cc
@@ -31,6 +31,10 @@ class StringWrapper {
   DISALLOW_COPY_AND_ASSIGN(StringWrapper);
 };
 
+l10n_util::TextDirection GetTextDirection(const char* locale_name) {
+  return l10n_util::GetTextDirectionForLocale(locale_name);
+}
+
 }  // namespace
 
 class L10nUtilTest : public PlatformTest {
@@ -376,3 +380,31 @@ TEST_F(L10nUtilTest, WrapPathWithLTRFormatting) {
     EXPECT_EQ(wrapped_path, test_data[i].wrapped_path);
   }
 }
+
+TEST_F(L10nUtilTest, GetTextDirection) {
+  EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("ar"));
+  EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("ar_EG"));
+  EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("he"));
+  EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("he_IL"));
+  // iw is an obsolete code for Hebrew.
+  EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("iw"));
+#if 0
+  // Enable these when we localize to Farsi, Urdu, Azerbaijani
+  // written in Arabic and Dhivehi. At the moment, our copy of
+  // ICU data does not have entry for them.
+  EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("fa"));
+  EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("ur"));
+  EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("az_Arab"));
+  // Dhivehi that uses Thaana script.
+  EXPECT_EQ(l10n_util::RIGHT_TO_LEFT, GetTextDirection("dv"));
+#endif
+  EXPECT_EQ(l10n_util::LEFT_TO_RIGHT, GetTextDirection("en"));
+  // Chinese in China with '-'.
+  EXPECT_EQ(l10n_util::LEFT_TO_RIGHT, GetTextDirection("zh-CN"));
+  // Filipino : 3-letter code
+  EXPECT_EQ(l10n_util::LEFT_TO_RIGHT, GetTextDirection("fil"));
+  // Russian
+  EXPECT_EQ(l10n_util::LEFT_TO_RIGHT, GetTextDirection("ru"));
+  // Japanese that uses multiple scripts
+  EXPECT_EQ(l10n_util::LEFT_TO_RIGHT, GetTextDirection("ja"));
+}
author	jungshik@google.com <jungshik@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2009-03-25 05:09:46 +0000
committer	jungshik@google.com <jungshik@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2009-03-25 05:09:46 +0000
commit	55211299fce265f04970c3643ed521887092f5a6 (patch)
tree	216871e52d71a833175add65846c8e5419c7f9cf
parent	07f95333a47323bfbd65c8443e0fcc470956cb27 (diff)
download	chromium_src-55211299fce265f04970c3643ed521887092f5a6.zip chromium_src-55211299fce265f04970c3643ed521887092f5a6.tar.gz chromium_src-55211299fce265f04970c3643ed521887092f5a6.tar.bz2