5 files changed, 128 insertions, 90 deletions
diff --git a/base/i18n/rtl.cc b/base/i18n/rtl.cc
index 9fbf35e..65973bd 100644
--- a/base/i18n/rtl.cc
+++ b/base/i18n/rtl.cc
@@ -102,13 +102,8 @@ TextDirection GetTextDirectionForLocale(const char* locale_name) {
   return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
 }
 
-TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
-#if defined(WCHAR_T_IS_UTF32)
-  string16 text_utf16 = WideToUTF16(text);
-  const UChar* string = text_utf16.c_str();
-#else
+TextDirection GetFirstStrongCharacterDirection(const string16& text) {
   const UChar* string = text.c_str();
-#endif
   size_t length = text.length();
   size_t position = 0;
   while (position < length) {
@@ -136,8 +131,14 @@ TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
   return LEFT_TO_RIGHT;
 }
 
-bool AdjustStringForLocaleDirection(const std::wstring& text,
-                                    std::wstring* localized_text) {
+#if defined(WCHAR_T_IS_UTF32)
+TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
+  return GetFirstStrongCharacterDirection(WideToUTF16(text));
+}
+#endif
+
+bool AdjustStringForLocaleDirection(const string16& text,
+                                    string16* localized_text) {
   if (!IsRTL() || text.empty())
     return false;
 
@@ -153,13 +154,21 @@ bool AdjustStringForLocaleDirection(const std::wstring& text,
   return true;
 }
 
-bool StringContainsStrongRTLChars(const std::wstring& text) {
 #if defined(WCHAR_T_IS_UTF32)
-  string16 text_utf16 = WideToUTF16(text);
-  const UChar* string = text_utf16.c_str();
-#else
-  const UChar* string = text.c_str();
+bool AdjustStringForLocaleDirection(const std::wstring& text,
+                                    std::wstring* localized_text) {
+  string16 out;
+  if (AdjustStringForLocaleDirection(WideToUTF16(text), &out)) {
+    // We should only touch the output on success.
+    *localized_text = UTF16ToWide(out);
+    return true;
+  }
+  return false;
+}
 #endif
+
+bool StringContainsStrongRTLChars(const string16& text) {
+  const UChar* string = text.c_str();
   size_t length = text.length();
   size_t position = 0;
   while (position < length) {
@@ -179,6 +188,24 @@ bool StringContainsStrongRTLChars(const std::wstring& text) {
   return false;
 }
 
+#if defined(WCHAR_T_IS_UTF32)
+bool StringContainsStrongRTLChars(const std::wstring& text) {
+  return StringContainsStrongRTLChars(WideToUTF16(text));
+}
+#endif
+
+void WrapStringWithLTRFormatting(string16* text) {
+  if (text->empty())
+    return;
+
+  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
+  text->insert(0, 1, kLeftToRightEmbeddingMark);
+
+  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
+  text->push_back(kPopDirectionalFormatting);
+}
+
+#if defined(WCHAR_T_IS_UTF32)
 void WrapStringWithLTRFormatting(std::wstring* text) {
   if (text->empty())
     return;
@@ -189,7 +216,20 @@ void WrapStringWithLTRFormatting(std::wstring* text) {
   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
   text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
 }
+#endif
+
+void WrapStringWithRTLFormatting(string16* text) {
+  if (text->empty())
+    return;
+
+  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
+  text->insert(0, 1, kRightToLeftEmbeddingMark);
 
+  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
+  text->push_back(kPopDirectionalFormatting);
+}
+
+#if defined(WCHAR_T_IS_UTF32)
 void WrapStringWithRTLFormatting(std::wstring* text) {
   if (text->empty())
     return;
@@ -200,6 +240,7 @@ void WrapStringWithRTLFormatting(std::wstring* text) {
   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
   text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
 }
+#endif
 
 void WrapPathWithLTRFormatting(const FilePath& path,
                                string16* rtl_safe_path) {
diff --git a/base/i18n/rtl.h b/base/i18n/rtl.h
index f708206..98a4e35 100644
--- a/base/i18n/rtl.h
+++ b/base/i18n/rtl.h
@@ -6,6 +6,7 @@
 #define BASE_I18N_RTL_H_
 
 #include "base/string16.h"
+#include "build/build_config.h"
 
 class FilePath;
 
@@ -56,7 +57,10 @@ TextDirection GetTextDirectionForLocale(const char* locale_name);
 // character types L, LRE, LRO, R, AL, RLE, and RLO are considered as strong
 // directionality characters. Please refer to http://unicode.org/reports/tr9/
 // for more information.
+TextDirection GetFirstStrongCharacterDirection(const string16& text);
+#if defined(WCHAR_T_IS_UTF32)
 TextDirection GetFirstStrongCharacterDirection(const std::wstring& text);
+#endif
 
 // Given the string in |text|, this function creates a copy of the string with
 // the appropriate Unicode formatting marks that mark the string direction
@@ -72,6 +76,15 @@ TextDirection GetFirstStrongCharacterDirection(const std::wstring& text);
 // string is always treated as a right-to-left string. This is done by
 // inserting certain Unicode formatting marks into the returned string.
 //
+// TODO(brettw) bug 47194: This funciton is confusing. If it does no adjustment
+// becuase the current locale is not RTL, it will do nothing and return false.
+// This means you have to check the return value in many cases which doesn't
+// make sense. This should be cleaned up and probably just take a single
+// argument that's a pointer to a string that it modifies as necessary. In the
+// meantime, the recommended usage is to use the same arg as input & output,
+// which will work without extra checks:
+//   AdjustStringForLocaleDirection(text, &text);
+//
 // TODO(idana) bug# 1206120: this function adjusts the string in question only
 // if the current locale is right-to-left. The function does not take care of
 // the opposite case (an RTL string displayed in an LTR context) since
@@ -80,23 +93,36 @@ TextDirection GetFirstStrongCharacterDirection(const std::wstring& text);
 // installed. Since the English version of Windows doesn't have right-to-left
 // language support installed by default, inserting the direction Unicode mark
 // results in Windows displaying squares.
+bool AdjustStringForLocaleDirection(const string16& text,
+                                    string16* localized_text);
+#if defined(WCHAR_T_IS_UTF32)
 bool AdjustStringForLocaleDirection(const std::wstring& text,
                                     std::wstring* localized_text);
+#endif
 
 // Returns true if the string contains at least one character with strong right
 // to left directionality; that is, a character with either R or AL Unicode
 // BiDi character type.
+bool StringContainsStrongRTLChars(const string16& text);
+#if defined(WCHAR_T_IS_UTF32)
 bool StringContainsStrongRTLChars(const std::wstring& text);
+#endif
 
 // Wraps a string with an LRE-PDF pair which essentialy marks the string as a
 // Left-To-Right string. Doing this is useful in order to make sure LTR
 // strings are rendered properly in an RTL context.
+void WrapStringWithLTRFormatting(string16* text);
+#if defined(WCHAR_T_IS_UTF32)
 void WrapStringWithLTRFormatting(std::wstring* text);
+#endif
 
 // Wraps a string with an RLE-PDF pair which essentialy marks the string as a
 // Right-To-Left string. Doing this is useful in order to make sure RTL
 // strings are rendered properly in an LTR context.
+void WrapStringWithRTLFormatting(string16* text);
+#if defined(WCHAR_T_IS_UTF32)
 void WrapStringWithRTLFormatting(std::wstring* text);
+#endif
 
 // Wraps file path to get it to display correctly in RTL UI. All filepaths
 // should be passed through this function before display in UI for RTL locales.
@@ -116,6 +142,7 @@ std::wstring GetDisplayStringInLTRDirectionality(std::wstring* text);
 // semantic effect. They can be deleted so they might not always appear in a
 // pair.
 const string16 StripWrappingBidiControlCharacters(const string16& text);
+
 }  // namespace i18n
 }  // namespace base
 
diff --git a/base/i18n/word_iterator.cc b/base/i18n/word_iterator.cc
index feb77eb..a9fa4af 100644
--- a/base/i18n/word_iterator.cc
+++ b/base/i18n/word_iterator.cc
@@ -10,7 +10,7 @@
 
 const size_t npos = -1;
 
-WordIterator::WordIterator(const std::wstring& str, BreakType break_type)
+WordIterator::WordIterator(const string16* str, BreakType break_type)
     : iter_(NULL),
       string_(str),
       break_type_(break_type),
@@ -37,26 +37,9 @@ bool WordIterator::Init() {
       NOTREACHED();
       break_type = UBRK_LINE;
   }
-#if defined(WCHAR_T_IS_UTF16)
   iter_ = ubrk_open(break_type, NULL,
-                    string_.data(), static_cast<int32_t>(string_.size()),
+                    string_->data(), static_cast<int32_t>(string_->size()),
                     &status);
-#else  // WCHAR_T_IS_UTF16
-  // When wchar_t is wider than UChar (16 bits), transform |string_| into a
-  // UChar* string.  Size the UChar* buffer to be large enough to hold twice
-  // as many UTF-16 code points as there are UCS-4 characters, in case each
-  // character translates to a UTF-16 surrogate pair, and leave room for a NUL
-  // terminator.
-  // TODO(avi): avoid this alloc
-  chars_.resize(string_.length() * sizeof(UChar) + 1);
-
-  UErrorCode error = U_ZERO_ERROR;
-  int32_t destLength;
-  u_strFromWCS(&chars_[0], chars_.size(), &destLength, string_.data(),
-               string_.length(), &error);
-
-  iter_ = ubrk_open(break_type, NULL, &chars_[0], destLength, &status);
-#endif
   if (U_FAILURE(status)) {
     NOTREACHED() << "ubrk_open failed";
     return false;
@@ -81,21 +64,7 @@ bool WordIterator::IsWord() const {
   return (ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE);
 }
 
-std::wstring WordIterator::GetWord() const {
+string16 WordIterator::GetWord() const {
   DCHECK(prev_ != npos && pos_ != npos);
-#if defined(WCHAR_T_IS_UTF16)
-  return string_.substr(prev_, pos_ - prev_);
-#else  // WCHAR_T_IS_UTF16
-  // See comment in Init().  If there are no surrogate pairs,
-  // |out_length| will be exactly |in_length|, if there are surrogate
-  // pairs it will be less than |in_length|.
-  int32_t out_length;
-  UErrorCode error = U_ZERO_ERROR;
-  const int32_t in_length = pos_ - prev_;
-  std::vector<std::wstring::value_type> out_buffer(in_length);
-  u_strToWCS(&out_buffer[0], in_length, &out_length,
-             &chars_[prev_], in_length, &error);
-  DCHECK_LE(out_length, in_length);
-  return std::wstring(&out_buffer[0], out_length);
-#endif
+  return string_->substr(prev_, pos_ - prev_);
 }
diff --git a/base/i18n/word_iterator.h b/base/i18n/word_iterator.h
index c9648ca..aabafab1 100644
--- a/base/i18n/word_iterator.h
+++ b/base/i18n/word_iterator.h
@@ -11,6 +11,7 @@
 #include "unicode/uchar.h"
 
 #include "base/basictypes.h"
+#include "base/string16.h"
 
 // The WordIterator class iterates through the words and word breaks
 // in a string.  (In the string " foo bar! ", the word breaks are at the
@@ -18,7 +19,7 @@
 //
 // To extract the words from a string, move a WordIterator through the
 // string and test whether IsWord() is true.  E.g.,
-//   WordIterator iter(str, WordIterator::BREAK_WORD);
+//   WordIterator iter(&str, WordIterator::BREAK_WORD);
 //   if (!iter.Init()) return false;
 //   while (iter.Advance()) {
 //     if (iter.IsWord()) {
@@ -36,7 +37,7 @@ class WordIterator {
   };
 
   // Requires |str| to live as long as the WordIterator does.
-  WordIterator(const std::wstring& str, BreakType break_type);
+  WordIterator(const string16* str, BreakType break_type);
   ~WordIterator();
 
   // Init() must be called before any of the iterators are valid.
@@ -63,7 +64,7 @@ class WordIterator {
   // Return the word between prev() and pos().
   // Advance() must have been called successfully at least once
   // for pos() to have advanced to somewhere useful.
-  std::wstring GetWord() const;
+  string16 GetWord() const;
 
  private:
   // ICU iterator.
@@ -73,7 +74,7 @@ class WordIterator {
 #endif
 
   // The string we're iterating over.
-  const std::wstring& string_;
+  const string16* string_;
 
   // The breaking style (word/line).
   BreakType break_type_;
diff --git a/base/i18n/word_iterator_unittest.cc b/base/i18n/word_iterator_unittest.cc
index d653e1d4..92aff76 100644
--- a/base/i18n/word_iterator_unittest.cc
+++ b/base/i18n/word_iterator_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -6,84 +6,87 @@
 
 #include "base/string_piece.h"
 #include "base/string_util.h"
-#include "base/sys_string_conversions.h"
+#include "base/utf_string_conversions.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
 TEST(WordIteratorTest, BreakWord) {
-  std::wstring str(L" foo bar! \npouet boom");
-  WordIterator iter(str, WordIterator::BREAK_WORD);
+  string16 space(UTF8ToUTF16(" "));
+
+  string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
+  WordIterator iter(&str, WordIterator::BREAK_WORD);
   ASSERT_TRUE(iter.Init());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_EQ(space, iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_TRUE(iter.IsWord());
-  EXPECT_EQ(L"foo", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_EQ(space, iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_TRUE(iter.IsWord());
-  EXPECT_EQ(L"bar", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L"!", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("!"), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_EQ(space, iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L"\n", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_TRUE(iter.IsWord());
-  EXPECT_EQ(L"pouet", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_EQ(space, iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_TRUE(iter.IsWord());
-  EXPECT_EQ(L"boom", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord());
   EXPECT_FALSE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
 }
 
 TEST(WordIteratorTest, BreakLine) {
-  std::wstring str(L" foo bar! \npouet boom");
-  WordIterator iter(str, WordIterator::BREAK_LINE);
+  string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
+  WordIterator iter(&str, WordIterator::BREAK_LINE);
   ASSERT_TRUE(iter.Init());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L"foo ", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_TRUE(iter.IsWord());
-  EXPECT_EQ(L"bar! \n", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L"pouet ", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L"boom", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord());
   EXPECT_FALSE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
 }
 
 TEST(WordIteratorTest, BreakWide16) {
   //  "Παγκόσμιος Ιστός"
-  const std::wstring str(L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
-                         L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2");
-  const std::wstring word1(str.substr(0, 10));
-  const std::wstring word2(str.substr(11, 5));
-  WordIterator iter(str, WordIterator::BREAK_WORD);
+  const string16 str(WideToUTF16(
+      L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
+      L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
+  const string16 word1(str.substr(0, 10));
+  const string16 word2(str.substr(11, 5));
+  WordIterator iter(&str, WordIterator::BREAK_WORD);
   ASSERT_TRUE(iter.Init());
   EXPECT_TRUE(iter.Advance());
   EXPECT_TRUE(iter.IsWord());
   EXPECT_EQ(word1, iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_TRUE(iter.IsWord());
   EXPECT_EQ(word2, iter.GetWord());
@@ -93,25 +96,22 @@ TEST(WordIteratorTest, BreakWide16) {
 
 TEST(WordIteratorTest, BreakWide32) {
   // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
-  const char *very_wide_char = "\xF0\x9D\x92\x9C";
-  const std::wstring str(
-      base::SysUTF8ToWide(StringPrintf("%s a", very_wide_char)));
-#if defined(WCHAR_T_IS_UTF16)
-  const std::wstring very_wide_word(str.substr(0, 2));
-#elif defined(WCHAR_T_IS_UTF32)
-  const std::wstring very_wide_word(str.substr(0, 1));
-#endif
-  WordIterator iter(str, WordIterator::BREAK_WORD);
+  const char* very_wide_char = "\xF0\x9D\x92\x9C";
+  const string16 str(
+      UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
+  const string16 very_wide_word(str.substr(0, 2));
+
+  WordIterator iter(&str, WordIterator::BREAK_WORD);
   ASSERT_TRUE(iter.Init());
   EXPECT_TRUE(iter.Advance());
   EXPECT_TRUE(iter.IsWord());
   EXPECT_EQ(very_wide_word, iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
-  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord());
   EXPECT_TRUE(iter.Advance());
   EXPECT_TRUE(iter.IsWord());
-  EXPECT_EQ(L"a", iter.GetWord());
+  EXPECT_EQ(UTF8ToUTF16("a"), iter.GetWord());
   EXPECT_FALSE(iter.Advance());
   EXPECT_FALSE(iter.IsWord());
 }