diff options
author | Kristian Monsen <kristianm@google.com> | 2011-05-11 20:53:37 +0100 |
---|---|---|
committer | Kristian Monsen <kristianm@google.com> | 2011-05-16 13:54:48 +0100 |
commit | 21d179b334e59e9a3bfcaed4c4430bef1bc5759d (patch) | |
tree | 64e2bb6da27af6a5c93ca34f6051584aafbfcb9e /base/i18n | |
parent | 0c63f00edd6ed0482fd5cbcea937ca088baf7858 (diff) | |
download | external_chromium-21d179b334e59e9a3bfcaed4c4430bef1bc5759d.zip external_chromium-21d179b334e59e9a3bfcaed4c4430bef1bc5759d.tar.gz external_chromium-21d179b334e59e9a3bfcaed4c4430bef1bc5759d.tar.bz2 |
Merge Chromium at 10.0.621.0: Initial merge by git.
Change-Id: I070cc91c608dfa4a968a5a54c173260765ac8097
Diffstat (limited to 'base/i18n')
-rw-r--r-- | base/i18n/break_iterator.cc | 101 | ||||
-rw-r--r-- | base/i18n/break_iterator.h | 108 | ||||
-rw-r--r-- | base/i18n/break_iterator_unittest.cc | 308 | ||||
-rw-r--r-- | base/i18n/file_util_icu.cc | 46 | ||||
-rw-r--r-- | base/i18n/number_formatting.cc | 26 | ||||
-rw-r--r-- | base/i18n/rtl.cc | 52 | ||||
-rw-r--r-- | base/i18n/rtl.h | 1 | ||||
-rw-r--r-- | base/i18n/time_formatting.cc | 23 | ||||
-rw-r--r-- | base/i18n/time_formatting.h | 14 | ||||
-rw-r--r-- | base/i18n/word_iterator.cc | 70 | ||||
-rw-r--r-- | base/i18n/word_iterator.h | 89 | ||||
-rw-r--r-- | base/i18n/word_iterator_unittest.cc | 117 |
12 files changed, 628 insertions, 327 deletions
diff --git a/base/i18n/break_iterator.cc b/base/i18n/break_iterator.cc new file mode 100644 index 0000000..e1b5e29 --- /dev/null +++ b/base/i18n/break_iterator.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/break_iterator.h" + +#include "base/logging.h" +#include "unicode/ubrk.h" +#include "unicode/uchar.h" +#include "unicode/ustring.h" + +namespace base { + +const size_t npos = -1; + +BreakIterator::BreakIterator(const string16* str, BreakType break_type) + : iter_(NULL), + string_(str), + break_type_(break_type), + prev_(npos), + pos_(0) { +} + +BreakIterator::~BreakIterator() { + if (iter_) + ubrk_close(static_cast<UBreakIterator*>(iter_)); +} + +bool BreakIterator::Init() { + UErrorCode status = U_ZERO_ERROR; + UBreakIteratorType break_type; + switch (break_type_) { + case BREAK_WORD: + break_type = UBRK_WORD; + break; + case BREAK_SPACE: + case BREAK_NEWLINE: + break_type = UBRK_LINE; + break; + default: + NOTREACHED() << "invalid break_type_"; + return false; + } + iter_ = ubrk_open(break_type, NULL, + string_->data(), static_cast<int32_t>(string_->size()), + &status); + if (U_FAILURE(status)) { + NOTREACHED() << "ubrk_open failed"; + return false; + } + // Move the iterator to the beginning of the string. + ubrk_first(static_cast<UBreakIterator*>(iter_)); + return true; +} + +bool BreakIterator::Advance() { + int32_t pos; + int32_t status; + prev_ = pos_; + switch (break_type_) { + case BREAK_WORD: + case BREAK_SPACE: + pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); + if (pos == UBRK_DONE) { + pos_ = npos; + return false; + } + pos_ = static_cast<size_t>(pos); + return true; + case BREAK_NEWLINE: + do { + pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); + if (pos == UBRK_DONE) { + break; + } + pos_ = static_cast<size_t>(pos); + status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); + } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT); + if (pos == UBRK_DONE && prev_ == pos_) { + pos_ = npos; + return false; + } + return true; + default: + NOTREACHED() << "invalid break_type_"; + return false; + } +} + +bool BreakIterator::IsWord() const { + return (break_type_ == BREAK_WORD && + ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)) != + UBRK_WORD_NONE); +} + +string16 BreakIterator::GetString() const { + DCHECK(prev_ != npos && pos_ != npos); + return string_->substr(prev_, pos_ - prev_); +} + +} // namespace base diff --git a/base/i18n/break_iterator.h b/base/i18n/break_iterator.h new file mode 100644 index 0000000..9de7ac7 --- /dev/null +++ b/base/i18n/break_iterator.h @@ -0,0 +1,108 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_I18N_BREAK_ITERATOR_H_ +#define BASE_I18N_BREAK_ITERATOR_H_ +#pragma once + +#include "base/basictypes.h" +#include "base/string16.h" + +// The BreakIterator class iterates through the words, word breaks, and +// line breaks in a UTF-16 string. +// +// It provides several modes, BREAK_WORD, BREAK_SPACE, and BREAK_NEWLINE, +// which modify how characters are aggregated into the returned string. +// +// Under BREAK_WORD mode, once a word is encountered any non-word +// characters are not included in the returned string (e.g. in the +// UTF-16 equivalent of the string " foo bar! ", the word breaks are at +// the periods in ". .foo. .bar.!. ."). +// +// Under BREAK_SPACE mode, once a word is encountered, any non-word +// characters are included in the returned string, breaking only when a +// space-equivalent character is encountered (e.g. in the +// UTF16-equivalent of the string " foo bar! ", the word breaks are at +// the periods in ". .foo .bar! ."). +// +// Under BREAK_NEWLINE mode, all characters are included in the returned +// string, breking only when a newline-equivalent character is encountered +// (eg. in the UTF-16 equivalent of the string "foo\nbar!\n\n", the line +// breaks are at the periods in ".foo\n.bar\n.\n."). +// +// To extract the words from a string, move a BREAK_WORD BreakIterator +// through the string and test whether IsWord() is true. E.g., +// BreakIterator iter(&str, BreakIterator::BREAK_WORD); +// if (!iter.Init()) return false; +// while (iter.Advance()) { +// if (iter.IsWord()) { +// // region [iter.prev(),iter.pos()) contains a word. +// VLOG(1) << "word: " << iter.GetString(); +// } +// } + +namespace base { + +class BreakIterator { + public: + enum BreakType { + BREAK_WORD, + BREAK_SPACE, + BREAK_NEWLINE, + }; + + // Requires |str| to live as long as the BreakIterator does. + BreakIterator(const string16* str, BreakType break_type); + ~BreakIterator(); + + // Init() must be called before any of the iterators are valid. + // Returns false if ICU failed to initialize. + bool Init(); + + // Return the current break position within the string, + // or BreakIterator::npos when done. + size_t pos() const { return pos_; } + + // Return the value of pos() returned before Advance() was last called. + size_t prev() const { return prev_; } + + // Advance to the next break. Returns false if we've run past the end of + // the string. (Note that the very last "break" is after the final + // character in the string, and when we advance to that position it's the + // last time Advance() returns true.) + bool Advance(); + + // Under BREAK_WORD mode, returns true if the break we just hit is the + // end of a word. (Otherwise, the break iterator just skipped over e.g. + // whitespace or punctuation.) Under BREAK_SPACE and BREAK_NEWLINE modes, + // this distinction doesn't apply and it always retuns false. + bool IsWord() const; + + // Return the string between prev() and pos(). + // Advance() must have been called successfully at least once + // for pos() to have advanced to somewhere useful. + string16 GetString() const; + + private: + // ICU iterator, avoiding ICU ubrk.h dependence. + // This is actually an ICU UBreakiterator* type, which turns out to be + // a typedef for a void* in the ICU headers. Using void* directly prevents + // callers from needing access to the ICU public headers directory. + void* iter_; + + // The string we're iterating over. + const string16* string_; + + // The breaking style (word/space/newline). + BreakType break_type_; + + // Previous and current iterator positions. + size_t prev_, pos_; + + DISALLOW_COPY_AND_ASSIGN(BreakIterator); +}; + +} // namespace base + +#endif // BASE_I18N_BREAK_ITERATOR_H__ diff --git a/base/i18n/break_iterator_unittest.cc b/base/i18n/break_iterator_unittest.cc new file mode 100644 index 0000000..bf4fdc1 --- /dev/null +++ b/base/i18n/break_iterator_unittest.cc @@ -0,0 +1,308 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/break_iterator.h" + +#include "base/string_piece.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(BreakIteratorTest, BreakWordEmpty) { + string16 empty; + base::BreakIterator iter(&empty, base::BreakIterator::BREAK_WORD); + ASSERT_TRUE(iter.Init()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakWord) { + string16 space(UTF8ToUTF16(" ")); + string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(space, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(space, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(space, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(space, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakWide16) { + // Two greek words separated by space. + const string16 str(WideToUTF16( + L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" + L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2")); + const string16 word1(str.substr(0, 10)); + const string16 word2(str.substr(11, 5)); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(word1, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(word2, iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakWide32) { + // U+1D49C MATHEMATICAL SCRIPT CAPITAL A + const char* very_wide_char = "\xF0\x9D\x92\x9C"; + const string16 str( + UTF8ToUTF16(StringPrintf("%s a", very_wide_char))); + const string16 very_wide_word(str.substr(0, 2)); + + base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(very_wide_word, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakSpaceEmpty) { + string16 empty; + base::BreakIterator iter(&empty, base::BreakIterator::BREAK_SPACE); + ASSERT_TRUE(iter.Init()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakSpace) { + string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakSpaceSP) { + string16 str(UTF8ToUTF16(" foo bar! \npouet boom ")); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("boom "), iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakSpacekWide16) { + // Two Greek words. + const string16 str(WideToUTF16( + L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" + L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2")); + const string16 word1(str.substr(0, 11)); + const string16 word2(str.substr(11, 5)); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(word1, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(word2, iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakSpaceWide32) { + // U+1D49C MATHEMATICAL SCRIPT CAPITAL A + const char* very_wide_char = "\xF0\x9D\x92\x9C"; + const string16 str( + UTF8ToUTF16(StringPrintf("%s a", very_wide_char))); + const string16 very_wide_word(str.substr(0, 3)); + + base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(very_wide_word, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakLineEmpty) { + string16 empty; + base::BreakIterator iter(&empty, base::BreakIterator::BREAK_NEWLINE); + ASSERT_TRUE(iter.Init()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakLine) { + string16 nl(UTF8ToUTF16("\n")); + string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom")); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(nl, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(nl, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("pouet boom"), iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakLineNL) { + string16 nl(UTF8ToUTF16("\n")); + string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom\n")); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(nl, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(nl, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("pouet boom\n"), iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakLineWide16) { + // Two Greek words separated by newline. + const string16 str(WideToUTF16( + L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" + L"\x03bf\x03c2\x000a\x0399\x03c3\x03c4\x03cc\x03c2")); + const string16 line1(str.substr(0, 11)); + const string16 line2(str.substr(11, 5)); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(line1, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(line2, iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} + +TEST(BreakIteratorTest, BreakLineWide32) { + // U+1D49C MATHEMATICAL SCRIPT CAPITAL A + const char* very_wide_char = "\xF0\x9D\x92\x9C"; + const string16 str( + UTF8ToUTF16(StringPrintf("%s\na", very_wide_char))); + const string16 very_wide_line(str.substr(0, 3)); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(very_wide_line, iter.GetString()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. + EXPECT_FALSE(iter.IsWord()); +} diff --git a/base/i18n/file_util_icu.cc b/base/i18n/file_util_icu.cc index 0e9c2cd..34eefac 100644 --- a/base/i18n/file_util_icu.cc +++ b/base/i18n/file_util_icu.cc @@ -21,6 +21,10 @@ namespace { class IllegalCharacters { public: + static IllegalCharacters* GetInstance() { + return Singleton<IllegalCharacters>::get(); + } + bool contains(UChar32 ucs4) { return !!set->contains(ucs4); } @@ -76,19 +80,8 @@ IllegalCharacters::IllegalCharacters() { class LocaleAwareComparator { public: - LocaleAwareComparator() { - UErrorCode error_code = U_ZERO_ERROR; - // Use the default collator. The default locale should have been properly - // set by the time this constructor is called. - collator_.reset(icu::Collator::createInstance(error_code)); - DCHECK(U_SUCCESS(error_code)); - // Make it case-sensitive. - collator_->setStrength(icu::Collator::TERTIARY); - // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we - // do not pay performance penalty to guarantee sort order correctness for - // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a - // reasonable tradeoff because such file names should be rare and the sort - // order doesn't change much anyway. + static LocaleAwareComparator* GetInstance() { + return Singleton<LocaleAwareComparator>::get(); } // Note: A similar function is available in l10n_util. @@ -111,6 +104,21 @@ class LocaleAwareComparator { } private: + LocaleAwareComparator() { + UErrorCode error_code = U_ZERO_ERROR; + // Use the default collator. The default locale should have been properly + // set by the time this constructor is called. + collator_.reset(icu::Collator::createInstance(error_code)); + DCHECK(U_SUCCESS(error_code)); + // Make it case-sensitive. + collator_->setStrength(icu::Collator::TERTIARY); + // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we + // do not pay performance penalty to guarantee sort order correctness for + // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a + // reasonable tradeoff because such file names should be rare and the sort + // order doesn't change much anyway. + } + scoped_ptr<icu::Collator> collator_; Lock lock_; friend struct DefaultSingletonTraits<LocaleAwareComparator>; @@ -123,19 +131,19 @@ class LocaleAwareComparator { namespace file_util { bool IsFilenameLegal(const string16& file_name) { - return Singleton<IllegalCharacters>()->containsNone(file_name); + return IllegalCharacters::GetInstance()->containsNone(file_name); } void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, char replace_char) { DCHECK(file_name); - DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char))); + DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char))); // Remove leading and trailing whitespace. TrimWhitespace(*file_name, TRIM_ALL, file_name); - IllegalCharacters* illegal = Singleton<IllegalCharacters>::get(); + IllegalCharacters* illegal = IllegalCharacters::GetInstance(); int cursor = 0; // The ICU macros expect an int. while (cursor < static_cast<int>(file_name->size())) { int char_begin = cursor; @@ -171,8 +179,8 @@ void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { #if defined(OS_WIN) - return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(), - b.value().c_str()) < 0; + return LocaleAwareComparator::GetInstance()->Compare(a.value().c_str(), + b.value().c_str()) < 0; #elif defined(OS_POSIX) // On linux, the file system encoding is not defined. We assume @@ -181,7 +189,7 @@ bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { // ICU's collator can take strings in OS native encoding. But we convert the // strings to UTF-16 ourselves to ensure conversion consistency. // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? - return Singleton<LocaleAwareComparator>()->Compare( + return LocaleAwareComparator::GetInstance()->Compare( WideToUTF16(base::SysNativeMBToWide(a.value().c_str())), WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0; #else diff --git a/base/i18n/number_formatting.cc b/base/i18n/number_formatting.cc index 7a69294..df6af14 100644 --- a/base/i18n/number_formatting.cc +++ b/base/i18n/number_formatting.cc @@ -6,7 +6,8 @@ #include "base/format_macros.h" #include "base/logging.h" -#include "base/singleton.h" +#include "base/lazy_instance.h" +#include "base/scoped_ptr.h" #include "base/string_util.h" #include "base/utf_string_conversions.h" #include "unicode/numfmt.h" @@ -16,25 +17,26 @@ namespace base { namespace { -struct NumberFormatSingletonTraits - : public DefaultSingletonTraits<icu::NumberFormat> { - static icu::NumberFormat* New() { +struct NumberFormatWrapper { + NumberFormatWrapper() { + // There's no ICU call to destroy a NumberFormat object other than + // operator delete, so use the default Delete, which calls operator delete. + // This can cause problems if a different allocator is used by this file + // than by ICU. UErrorCode status = U_ZERO_ERROR; - icu::NumberFormat* formatter = icu::NumberFormat::createInstance(status); + number_format.reset(icu::NumberFormat::createInstance(status)); DCHECK(U_SUCCESS(status)); - return formatter; } - // There's no ICU call to destroy a NumberFormat object other than - // operator delete, so use the default Delete, which calls operator delete. - // This can cause problems if a different allocator is used by this file than - // by ICU. + + scoped_ptr<icu::NumberFormat> number_format; }; } // namespace +static LazyInstance<NumberFormatWrapper> g_number_format(LINKER_INITIALIZED); + string16 FormatNumber(int64 number) { - icu::NumberFormat* number_format = - Singleton<icu::NumberFormat, NumberFormatSingletonTraits>::get(); + icu::NumberFormat* number_format = g_number_format.Get().number_format.get(); if (!number_format) { // As a fallback, just return the raw number in a string. diff --git a/base/i18n/rtl.cc b/base/i18n/rtl.cc index 6a5d293..12b376d 100644 --- a/base/i18n/rtl.cc +++ b/base/i18n/rtl.cc @@ -163,6 +163,7 @@ TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { } #endif +#if defined(OS_WIN) bool AdjustStringForLocaleDirection(string16* text) { if (!IsRTL() || text->empty()) return false; @@ -177,6 +178,57 @@ bool AdjustStringForLocaleDirection(string16* text) { return true; } +#else +bool AdjustStringForLocaleDirection(string16* text) { + // On OS X & GTK the directionality of a label is determined by the first + // strongly directional character. + // However, we want to make sure that in an LTR-language-UI all strings are + // left aligned and vice versa. + // A problem can arise if we display a string which starts with user input. + // User input may be of the opposite directionality to the UI. So the whole + // string will be displayed in the opposite directionality, e.g. if we want to + // display in an LTR UI [such as US English]: + // + // EMAN_NOISNETXE is now installed. + // + // Since EXTENSION_NAME begins with a strong RTL char, the label's + // directionality will be set to RTL and the string will be displayed visually + // as: + // + // .is now installed EMAN_NOISNETXE + // + // In order to solve this issue, we prepend an LRM to the string. An LRM is a + // strongly directional LTR char. + // We also append an LRM at the end, which ensures that we're in an LTR + // context. + + // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the + // box so there is no issue with displaying zero-width bidi control characters + // on any system. Thus no need for the !IsRTL() check here. + if (text->empty()) + return false; + + bool ui_direction_is_rtl = IsRTL(); + + bool has_rtl_chars = StringContainsStrongRTLChars(*text); + if (!ui_direction_is_rtl && has_rtl_chars) { + WrapStringWithRTLFormatting(text); + text->insert(0, 1, kLeftToRightMark); + text->push_back(kLeftToRightMark); + } else if (ui_direction_is_rtl && has_rtl_chars) { + WrapStringWithRTLFormatting(text); + text->insert(0, 1, kRightToLeftMark); + text->push_back(kRightToLeftMark); + } else if (ui_direction_is_rtl) { + WrapStringWithLTRFormatting(text); + text->insert(0, 1, kRightToLeftMark); + text->push_back(kRightToLeftMark); + } + + return true; +} + +#endif // !OS_WIN #if defined(WCHAR_T_IS_UTF32) bool AdjustStringForLocaleDirection(std::wstring* text) { diff --git a/base/i18n/rtl.h b/base/i18n/rtl.h index 82ac576..a75ed4f 100644 --- a/base/i18n/rtl.h +++ b/base/i18n/rtl.h @@ -84,6 +84,7 @@ TextDirection GetFirstStrongCharacterDirection(const std::wstring& text); // string is always treated as a right-to-left string. This is done by // inserting certain Unicode formatting marks into the returned string. // +// ** Notes about the Windows version of this function: // TODO(idana) bug 6806: this function adjusts the string in question only // if the current locale is right-to-left. The function does not take care of // the opposite case (an RTL string displayed in an LTR context) since diff --git a/base/i18n/time_formatting.cc b/base/i18n/time_formatting.cc index 406145d..3fa984a 100644 --- a/base/i18n/time_formatting.cc +++ b/base/i18n/time_formatting.cc @@ -14,24 +14,21 @@ using base::Time; namespace { -std::wstring TimeFormat(const icu::DateFormat* formatter, - const Time& time) { +string16 TimeFormat(const icu::DateFormat* formatter, + const Time& time) { DCHECK(formatter); icu::UnicodeString date_string; formatter->format(static_cast<UDate>(time.ToDoubleT() * 1000), date_string); - std::wstring output; - bool success = UTF16ToWide(date_string.getBuffer(), date_string.length(), - &output); - DCHECK(success); - return output; + return string16(date_string.getBuffer(), + static_cast<size_t>(date_string.length())); } } // namespace namespace base { -std::wstring TimeFormatTimeOfDay(const Time& time) { +string16 TimeFormatTimeOfDay(const Time& time) { // We can omit the locale parameter because the default should match // Chrome's application locale. scoped_ptr<icu::DateFormat> formatter( @@ -39,31 +36,31 @@ std::wstring TimeFormatTimeOfDay(const Time& time) { return TimeFormat(formatter.get(), time); } -std::wstring TimeFormatShortDate(const Time& time) { +string16 TimeFormatShortDate(const Time& time) { scoped_ptr<icu::DateFormat> formatter( icu::DateFormat::createDateInstance(icu::DateFormat::kMedium)); return TimeFormat(formatter.get(), time); } -std::wstring TimeFormatShortDateNumeric(const Time& time) { +string16 TimeFormatShortDateNumeric(const Time& time) { scoped_ptr<icu::DateFormat> formatter( icu::DateFormat::createDateInstance(icu::DateFormat::kShort)); return TimeFormat(formatter.get(), time); } -std::wstring TimeFormatShortDateAndTime(const Time& time) { +string16 TimeFormatShortDateAndTime(const Time& time) { scoped_ptr<icu::DateFormat> formatter( icu::DateFormat::createDateTimeInstance(icu::DateFormat::kShort)); return TimeFormat(formatter.get(), time); } -std::wstring TimeFormatFriendlyDateAndTime(const Time& time) { +string16 TimeFormatFriendlyDateAndTime(const Time& time) { scoped_ptr<icu::DateFormat> formatter( icu::DateFormat::createDateTimeInstance(icu::DateFormat::kFull)); return TimeFormat(formatter.get(), time); } -std::wstring TimeFormatFriendlyDate(const Time& time) { +string16 TimeFormatFriendlyDate(const Time& time) { scoped_ptr<icu::DateFormat> formatter(icu::DateFormat::createDateInstance( icu::DateFormat::kFull)); return TimeFormat(formatter.get(), time); diff --git a/base/i18n/time_formatting.h b/base/i18n/time_formatting.h index d78ae9b..e70ad3d 100644 --- a/base/i18n/time_formatting.h +++ b/base/i18n/time_formatting.h @@ -9,32 +9,32 @@ #define BASE_I18N_TIME_FORMATTING_H_ #pragma once -#include <string> +#include "base/string16.h" namespace base { class Time; // Returns the time of day, e.g., "3:07 PM". -std::wstring TimeFormatTimeOfDay(const Time& time); +string16 TimeFormatTimeOfDay(const Time& time); // Returns a shortened date, e.g. "Nov 7, 2007" -std::wstring TimeFormatShortDate(const Time& time); +string16 TimeFormatShortDate(const Time& time); // Returns a numeric date such as 12/13/52. -std::wstring TimeFormatShortDateNumeric(const Time& time); +string16 TimeFormatShortDateNumeric(const Time& time); // Formats a time in a friendly sentence format, e.g. // "Monday, March 6, 2008 2:44:30 PM". -std::wstring TimeFormatShortDateAndTime(const Time& time); +string16 TimeFormatShortDateAndTime(const Time& time); // Formats a time in a friendly sentence format, e.g. // "Monday, March 6, 2008 2:44:30 PM". -std::wstring TimeFormatFriendlyDateAndTime(const Time& time); +string16 TimeFormatFriendlyDateAndTime(const Time& time); // Formats a time in a friendly sentence format, e.g. // "Monday, March 6, 2008". -std::wstring TimeFormatFriendlyDate(const Time& time); +string16 TimeFormatFriendlyDate(const Time& time); } // namespace base diff --git a/base/i18n/word_iterator.cc b/base/i18n/word_iterator.cc deleted file mode 100644 index a9fa4af..0000000 --- a/base/i18n/word_iterator.cc +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/word_iterator.h" - -#include "base/logging.h" -#include "unicode/ubrk.h" -#include "unicode/ustring.h" - -const size_t npos = -1; - -WordIterator::WordIterator(const string16* str, BreakType break_type) - : iter_(NULL), - string_(str), - break_type_(break_type), - prev_(npos), - pos_(0) { -} - -WordIterator::~WordIterator() { - if (iter_) - ubrk_close(iter_); -} - -bool WordIterator::Init() { - UErrorCode status = U_ZERO_ERROR; - UBreakIteratorType break_type; - switch (break_type_) { - case BREAK_WORD: - break_type = UBRK_WORD; - break; - case BREAK_LINE: - break_type = UBRK_LINE; - break; - default: - NOTREACHED(); - break_type = UBRK_LINE; - } - iter_ = ubrk_open(break_type, NULL, - string_->data(), static_cast<int32_t>(string_->size()), - &status); - if (U_FAILURE(status)) { - NOTREACHED() << "ubrk_open failed"; - return false; - } - ubrk_first(iter_); // Move the iterator to the beginning of the string. - return true; -} - -bool WordIterator::Advance() { - prev_ = pos_; - const int32_t pos = ubrk_next(iter_); - if (pos == UBRK_DONE) { - pos_ = npos; - return false; - } else { - pos_ = static_cast<size_t>(pos); - return true; - } -} - -bool WordIterator::IsWord() const { - return (ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE); -} - -string16 WordIterator::GetWord() const { - DCHECK(prev_ != npos && pos_ != npos); - return string_->substr(prev_, pos_ - prev_); -} diff --git a/base/i18n/word_iterator.h b/base/i18n/word_iterator.h deleted file mode 100644 index b097bc2..0000000 --- a/base/i18n/word_iterator.h +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_WORD_ITERATOR_H_ -#define BASE_I18N_WORD_ITERATOR_H_ -#pragma once - -#include <vector> - -#include "unicode/ubrk.h" -#include "unicode/uchar.h" - -#include "base/basictypes.h" -#include "base/string16.h" - -// The WordIterator class iterates through the words and word breaks -// in a string. (In the string " foo bar! ", the word breaks are at the -// periods in ". .foo. .bar.!. .".) -// -// To extract the words from a string, move a WordIterator through the -// string and test whether IsWord() is true. E.g., -// WordIterator iter(&str, WordIterator::BREAK_WORD); -// if (!iter.Init()) return false; -// while (iter.Advance()) { -// if (iter.IsWord()) { -// // region [iter.prev(),iter.pos()) contains a word. -// VLOG(1) << "word: " << iter.GetWord(); -// } -// } - - -class WordIterator { - public: - enum BreakType { - BREAK_WORD, - BREAK_LINE - }; - - // Requires |str| to live as long as the WordIterator does. - WordIterator(const string16* str, BreakType break_type); - ~WordIterator(); - - // Init() must be called before any of the iterators are valid. - // Returns false if ICU failed to initialize. - bool Init(); - - // Return the current break position within the string, - // or WordIterator::npos when done. - size_t pos() const { return pos_; } - // Return the value of pos() returned before Advance() was last called. - size_t prev() const { return prev_; } - - // Advance to the next break. Returns false if we've run past the end of - // the string. (Note that the very last "word break" is after the final - // character in the string, and when we advance to that position it's the - // last time Advance() returns true.) - bool Advance(); - - // Returns true if the break we just hit is the end of a word. - // (Otherwise, the break iterator just skipped over e.g. whitespace - // or punctuation.) - bool IsWord() const; - - // Return the word between prev() and pos(). - // Advance() must have been called successfully at least once - // for pos() to have advanced to somewhere useful. - string16 GetWord() const; - - private: - // ICU iterator. - UBreakIterator* iter_; -#if !defined(WCHAR_T_IS_UTF16) - std::vector<UChar> chars_; -#endif - - // The string we're iterating over. - const string16* string_; - - // The breaking style (word/line). - BreakType break_type_; - - // Previous and current iterator positions. - size_t prev_, pos_; - - DISALLOW_COPY_AND_ASSIGN(WordIterator); -}; - -#endif // BASE_I18N_WORD_ITERATOR_H__ diff --git a/base/i18n/word_iterator_unittest.cc b/base/i18n/word_iterator_unittest.cc deleted file mode 100644 index 92aff76..0000000 --- a/base/i18n/word_iterator_unittest.cc +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/word_iterator.h" - -#include "base/string_piece.h" -#include "base/string_util.h" -#include "base/utf_string_conversions.h" -#include "testing/gtest/include/gtest/gtest.h" - -TEST(WordIteratorTest, BreakWord) { - string16 space(UTF8ToUTF16(" ")); - - string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); - WordIterator iter(&str, WordIterator::BREAK_WORD); - ASSERT_TRUE(iter.Init()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(space, iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(space, iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("!"), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(space, iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(space, iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord()); - EXPECT_FALSE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); -} - -TEST(WordIteratorTest, BreakLine) { - string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); - WordIterator iter(&str, WordIterator::BREAK_LINE); - ASSERT_TRUE(iter.Init()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord()); - EXPECT_FALSE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); -} - -TEST(WordIteratorTest, BreakWide16) { - // "Παγκόσμιος Ιστός" - const string16 str(WideToUTF16( - L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" - L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2")); - const string16 word1(str.substr(0, 10)); - const string16 word2(str.substr(11, 5)); - WordIterator iter(&str, WordIterator::BREAK_WORD); - ASSERT_TRUE(iter.Init()); - EXPECT_TRUE(iter.Advance()); - EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(word1, iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(word2, iter.GetWord()); - EXPECT_FALSE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); -} - -TEST(WordIteratorTest, BreakWide32) { - // U+1D49C MATHEMATICAL SCRIPT CAPITAL A - const char* very_wide_char = "\xF0\x9D\x92\x9C"; - const string16 str( - UTF8ToUTF16(StringPrintf("%s a", very_wide_char))); - const string16 very_wide_word(str.substr(0, 2)); - - WordIterator iter(&str, WordIterator::BREAK_WORD); - ASSERT_TRUE(iter.Init()); - EXPECT_TRUE(iter.Advance()); - EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(very_wide_word, iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord()); - EXPECT_TRUE(iter.Advance()); - EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("a"), iter.GetWord()); - EXPECT_FALSE(iter.Advance()); - EXPECT_FALSE(iter.IsWord()); -} |