diff options
Diffstat (limited to 'base/i18n')
-rw-r--r-- | base/i18n/rtl.cc | 67 | ||||
-rw-r--r-- | base/i18n/rtl.h | 27 | ||||
-rw-r--r-- | base/i18n/word_iterator.cc | 39 | ||||
-rw-r--r-- | base/i18n/word_iterator.h | 9 | ||||
-rw-r--r-- | base/i18n/word_iterator_unittest.cc | 76 |
5 files changed, 128 insertions, 90 deletions
diff --git a/base/i18n/rtl.cc b/base/i18n/rtl.cc index 9fbf35e..65973bd 100644 --- a/base/i18n/rtl.cc +++ b/base/i18n/rtl.cc @@ -102,13 +102,8 @@ TextDirection GetTextDirectionForLocale(const char* locale_name) { return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; } -TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { -#if defined(WCHAR_T_IS_UTF32) - string16 text_utf16 = WideToUTF16(text); - const UChar* string = text_utf16.c_str(); -#else +TextDirection GetFirstStrongCharacterDirection(const string16& text) { const UChar* string = text.c_str(); -#endif size_t length = text.length(); size_t position = 0; while (position < length) { @@ -136,8 +131,14 @@ TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { return LEFT_TO_RIGHT; } -bool AdjustStringForLocaleDirection(const std::wstring& text, - std::wstring* localized_text) { +#if defined(WCHAR_T_IS_UTF32) +TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { + return GetFirstStrongCharacterDirection(WideToUTF16(text)); +} +#endif + +bool AdjustStringForLocaleDirection(const string16& text, + string16* localized_text) { if (!IsRTL() || text.empty()) return false; @@ -153,13 +154,21 @@ bool AdjustStringForLocaleDirection(const std::wstring& text, return true; } -bool StringContainsStrongRTLChars(const std::wstring& text) { #if defined(WCHAR_T_IS_UTF32) - string16 text_utf16 = WideToUTF16(text); - const UChar* string = text_utf16.c_str(); -#else - const UChar* string = text.c_str(); +bool AdjustStringForLocaleDirection(const std::wstring& text, + std::wstring* localized_text) { + string16 out; + if (AdjustStringForLocaleDirection(WideToUTF16(text), &out)) { + // We should only touch the output on success. + *localized_text = UTF16ToWide(out); + return true; + } + return false; +} #endif + +bool StringContainsStrongRTLChars(const string16& text) { + const UChar* string = text.c_str(); size_t length = text.length(); size_t position = 0; while (position < length) { @@ -179,6 +188,24 @@ bool StringContainsStrongRTLChars(const std::wstring& text) { return false; } +#if defined(WCHAR_T_IS_UTF32) +bool StringContainsStrongRTLChars(const std::wstring& text) { + return StringContainsStrongRTLChars(WideToUTF16(text)); +} +#endif + +void WrapStringWithLTRFormatting(string16* text) { + if (text->empty()) + return; + + // Inserting an LRE (Left-To-Right Embedding) mark as the first character. + text->insert(0, 1, kLeftToRightEmbeddingMark); + + // Inserting a PDF (Pop Directional Formatting) mark as the last character. + text->push_back(kPopDirectionalFormatting); +} + +#if defined(WCHAR_T_IS_UTF32) void WrapStringWithLTRFormatting(std::wstring* text) { if (text->empty()) return; @@ -189,7 +216,20 @@ void WrapStringWithLTRFormatting(std::wstring* text) { // Inserting a PDF (Pop Directional Formatting) mark as the last character. text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); } +#endif + +void WrapStringWithRTLFormatting(string16* text) { + if (text->empty()) + return; + + // Inserting an RLE (Right-To-Left Embedding) mark as the first character. + text->insert(0, 1, kRightToLeftEmbeddingMark); + // Inserting a PDF (Pop Directional Formatting) mark as the last character. + text->push_back(kPopDirectionalFormatting); +} + +#if defined(WCHAR_T_IS_UTF32) void WrapStringWithRTLFormatting(std::wstring* text) { if (text->empty()) return; @@ -200,6 +240,7 @@ void WrapStringWithRTLFormatting(std::wstring* text) { // Inserting a PDF (Pop Directional Formatting) mark as the last character. text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); } +#endif void WrapPathWithLTRFormatting(const FilePath& path, string16* rtl_safe_path) { diff --git a/base/i18n/rtl.h b/base/i18n/rtl.h index f708206..98a4e35 100644 --- a/base/i18n/rtl.h +++ b/base/i18n/rtl.h @@ -6,6 +6,7 @@ #define BASE_I18N_RTL_H_ #include "base/string16.h" +#include "build/build_config.h" class FilePath; @@ -56,7 +57,10 @@ TextDirection GetTextDirectionForLocale(const char* locale_name); // character types L, LRE, LRO, R, AL, RLE, and RLO are considered as strong // directionality characters. Please refer to http://unicode.org/reports/tr9/ // for more information. +TextDirection GetFirstStrongCharacterDirection(const string16& text); +#if defined(WCHAR_T_IS_UTF32) TextDirection GetFirstStrongCharacterDirection(const std::wstring& text); +#endif // Given the string in |text|, this function creates a copy of the string with // the appropriate Unicode formatting marks that mark the string direction @@ -72,6 +76,15 @@ TextDirection GetFirstStrongCharacterDirection(const std::wstring& text); // string is always treated as a right-to-left string. This is done by // inserting certain Unicode formatting marks into the returned string. // +// TODO(brettw) bug 47194: This funciton is confusing. If it does no adjustment +// becuase the current locale is not RTL, it will do nothing and return false. +// This means you have to check the return value in many cases which doesn't +// make sense. This should be cleaned up and probably just take a single +// argument that's a pointer to a string that it modifies as necessary. In the +// meantime, the recommended usage is to use the same arg as input & output, +// which will work without extra checks: +// AdjustStringForLocaleDirection(text, &text); +// // TODO(idana) bug# 1206120: this function adjusts the string in question only // if the current locale is right-to-left. The function does not take care of // the opposite case (an RTL string displayed in an LTR context) since @@ -80,23 +93,36 @@ TextDirection GetFirstStrongCharacterDirection(const std::wstring& text); // installed. Since the English version of Windows doesn't have right-to-left // language support installed by default, inserting the direction Unicode mark // results in Windows displaying squares. +bool AdjustStringForLocaleDirection(const string16& text, + string16* localized_text); +#if defined(WCHAR_T_IS_UTF32) bool AdjustStringForLocaleDirection(const std::wstring& text, std::wstring* localized_text); +#endif // Returns true if the string contains at least one character with strong right // to left directionality; that is, a character with either R or AL Unicode // BiDi character type. +bool StringContainsStrongRTLChars(const string16& text); +#if defined(WCHAR_T_IS_UTF32) bool StringContainsStrongRTLChars(const std::wstring& text); +#endif // Wraps a string with an LRE-PDF pair which essentialy marks the string as a // Left-To-Right string. Doing this is useful in order to make sure LTR // strings are rendered properly in an RTL context. +void WrapStringWithLTRFormatting(string16* text); +#if defined(WCHAR_T_IS_UTF32) void WrapStringWithLTRFormatting(std::wstring* text); +#endif // Wraps a string with an RLE-PDF pair which essentialy marks the string as a // Right-To-Left string. Doing this is useful in order to make sure RTL // strings are rendered properly in an LTR context. +void WrapStringWithRTLFormatting(string16* text); +#if defined(WCHAR_T_IS_UTF32) void WrapStringWithRTLFormatting(std::wstring* text); +#endif // Wraps file path to get it to display correctly in RTL UI. All filepaths // should be passed through this function before display in UI for RTL locales. @@ -116,6 +142,7 @@ std::wstring GetDisplayStringInLTRDirectionality(std::wstring* text); // semantic effect. They can be deleted so they might not always appear in a // pair. const string16 StripWrappingBidiControlCharacters(const string16& text); + } // namespace i18n } // namespace base diff --git a/base/i18n/word_iterator.cc b/base/i18n/word_iterator.cc index feb77eb..a9fa4af 100644 --- a/base/i18n/word_iterator.cc +++ b/base/i18n/word_iterator.cc @@ -10,7 +10,7 @@ const size_t npos = -1; -WordIterator::WordIterator(const std::wstring& str, BreakType break_type) +WordIterator::WordIterator(const string16* str, BreakType break_type) : iter_(NULL), string_(str), break_type_(break_type), @@ -37,26 +37,9 @@ bool WordIterator::Init() { NOTREACHED(); break_type = UBRK_LINE; } -#if defined(WCHAR_T_IS_UTF16) iter_ = ubrk_open(break_type, NULL, - string_.data(), static_cast<int32_t>(string_.size()), + string_->data(), static_cast<int32_t>(string_->size()), &status); -#else // WCHAR_T_IS_UTF16 - // When wchar_t is wider than UChar (16 bits), transform |string_| into a - // UChar* string. Size the UChar* buffer to be large enough to hold twice - // as many UTF-16 code points as there are UCS-4 characters, in case each - // character translates to a UTF-16 surrogate pair, and leave room for a NUL - // terminator. - // TODO(avi): avoid this alloc - chars_.resize(string_.length() * sizeof(UChar) + 1); - - UErrorCode error = U_ZERO_ERROR; - int32_t destLength; - u_strFromWCS(&chars_[0], chars_.size(), &destLength, string_.data(), - string_.length(), &error); - - iter_ = ubrk_open(break_type, NULL, &chars_[0], destLength, &status); -#endif if (U_FAILURE(status)) { NOTREACHED() << "ubrk_open failed"; return false; @@ -81,21 +64,7 @@ bool WordIterator::IsWord() const { return (ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE); } -std::wstring WordIterator::GetWord() const { +string16 WordIterator::GetWord() const { DCHECK(prev_ != npos && pos_ != npos); -#if defined(WCHAR_T_IS_UTF16) - return string_.substr(prev_, pos_ - prev_); -#else // WCHAR_T_IS_UTF16 - // See comment in Init(). If there are no surrogate pairs, - // |out_length| will be exactly |in_length|, if there are surrogate - // pairs it will be less than |in_length|. - int32_t out_length; - UErrorCode error = U_ZERO_ERROR; - const int32_t in_length = pos_ - prev_; - std::vector<std::wstring::value_type> out_buffer(in_length); - u_strToWCS(&out_buffer[0], in_length, &out_length, - &chars_[prev_], in_length, &error); - DCHECK_LE(out_length, in_length); - return std::wstring(&out_buffer[0], out_length); -#endif + return string_->substr(prev_, pos_ - prev_); } diff --git a/base/i18n/word_iterator.h b/base/i18n/word_iterator.h index c9648ca..aabafab1 100644 --- a/base/i18n/word_iterator.h +++ b/base/i18n/word_iterator.h @@ -11,6 +11,7 @@ #include "unicode/uchar.h" #include "base/basictypes.h" +#include "base/string16.h" // The WordIterator class iterates through the words and word breaks // in a string. (In the string " foo bar! ", the word breaks are at the @@ -18,7 +19,7 @@ // // To extract the words from a string, move a WordIterator through the // string and test whether IsWord() is true. E.g., -// WordIterator iter(str, WordIterator::BREAK_WORD); +// WordIterator iter(&str, WordIterator::BREAK_WORD); // if (!iter.Init()) return false; // while (iter.Advance()) { // if (iter.IsWord()) { @@ -36,7 +37,7 @@ class WordIterator { }; // Requires |str| to live as long as the WordIterator does. - WordIterator(const std::wstring& str, BreakType break_type); + WordIterator(const string16* str, BreakType break_type); ~WordIterator(); // Init() must be called before any of the iterators are valid. @@ -63,7 +64,7 @@ class WordIterator { // Return the word between prev() and pos(). // Advance() must have been called successfully at least once // for pos() to have advanced to somewhere useful. - std::wstring GetWord() const; + string16 GetWord() const; private: // ICU iterator. @@ -73,7 +74,7 @@ class WordIterator { #endif // The string we're iterating over. - const std::wstring& string_; + const string16* string_; // The breaking style (word/line). BreakType break_type_; diff --git a/base/i18n/word_iterator_unittest.cc b/base/i18n/word_iterator_unittest.cc index d653e1d4..92aff76 100644 --- a/base/i18n/word_iterator_unittest.cc +++ b/base/i18n/word_iterator_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -6,84 +6,87 @@ #include "base/string_piece.h" #include "base/string_util.h" -#include "base/sys_string_conversions.h" +#include "base/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" TEST(WordIteratorTest, BreakWord) { - std::wstring str(L" foo bar! \npouet boom"); - WordIterator iter(str, WordIterator::BREAK_WORD); + string16 space(UTF8ToUTF16(" ")); + + string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); + WordIterator iter(&str, WordIterator::BREAK_WORD); ASSERT_TRUE(iter.Init()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_EQ(space, iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(L"foo", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_EQ(space, iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(L"bar", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L"!", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("!"), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_EQ(space, iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L"\n", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(L"pouet", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_EQ(space, iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(L"boom", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord()); EXPECT_FALSE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); } TEST(WordIteratorTest, BreakLine) { - std::wstring str(L" foo bar! \npouet boom"); - WordIterator iter(str, WordIterator::BREAK_LINE); + string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); + WordIterator iter(&str, WordIterator::BREAK_LINE); ASSERT_TRUE(iter.Init()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L"foo ", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(L"bar! \n", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L"pouet ", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L"boom", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord()); EXPECT_FALSE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); } TEST(WordIteratorTest, BreakWide16) { // "Παγκόσμιος Ιστός" - const std::wstring str(L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" - L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"); - const std::wstring word1(str.substr(0, 10)); - const std::wstring word2(str.substr(11, 5)); - WordIterator iter(str, WordIterator::BREAK_WORD); + const string16 str(WideToUTF16( + L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" + L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2")); + const string16 word1(str.substr(0, 10)); + const string16 word2(str.substr(11, 5)); + WordIterator iter(&str, WordIterator::BREAK_WORD); ASSERT_TRUE(iter.Init()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); EXPECT_EQ(word1, iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); EXPECT_EQ(word2, iter.GetWord()); @@ -93,25 +96,22 @@ TEST(WordIteratorTest, BreakWide16) { TEST(WordIteratorTest, BreakWide32) { // U+1D49C MATHEMATICAL SCRIPT CAPITAL A - const char *very_wide_char = "\xF0\x9D\x92\x9C"; - const std::wstring str( - base::SysUTF8ToWide(StringPrintf("%s a", very_wide_char))); -#if defined(WCHAR_T_IS_UTF16) - const std::wstring very_wide_word(str.substr(0, 2)); -#elif defined(WCHAR_T_IS_UTF32) - const std::wstring very_wide_word(str.substr(0, 1)); -#endif - WordIterator iter(str, WordIterator::BREAK_WORD); + const char* very_wide_char = "\xF0\x9D\x92\x9C"; + const string16 str( + UTF8ToUTF16(StringPrintf("%s a", very_wide_char))); + const string16 very_wide_word(str.substr(0, 2)); + + WordIterator iter(&str, WordIterator::BREAK_WORD); ASSERT_TRUE(iter.Init()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); EXPECT_EQ(very_wide_word, iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(L"a", iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("a"), iter.GetWord()); EXPECT_FALSE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); } |