diff options
-rw-r--r-- | base/base.gyp | 11 | ||||
-rw-r--r-- | base/i18n/break_iterator.cc (renamed from base/i18n/word_iterator.cc) | 23 | ||||
-rw-r--r-- | base/i18n/break_iterator.h (renamed from base/i18n/word_iterator.h) | 48 | ||||
-rw-r--r-- | base/i18n/break_iterator_unittest.cc (renamed from base/i18n/word_iterator_unittest.cc) | 62 | ||||
-rw-r--r-- | chrome/browser/autocomplete/history_quick_provider.cc | 6 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index.cc | 6 | ||||
-rw-r--r-- | chrome/browser/history/query_parser.cc | 10 | ||||
-rw-r--r-- | views/view_text_utils.cc | 6 |
8 files changed, 87 insertions, 85 deletions
diff --git a/base/base.gyp b/base/base.gyp index f68359a..70cf465 100644 --- a/base/base.gyp +++ b/base/base.gyp @@ -31,6 +31,8 @@ 'base', ], 'sources': [ + 'i18n/break_iterator.cc', + 'i18n/break_iterator.h', 'i18n/char_iterator.cc', 'i18n/char_iterator.h', 'i18n/file_util_icu.cc', @@ -47,8 +49,6 @@ 'i18n/rtl.h', 'i18n/time_formatting.cc', 'i18n/time_formatting.h', - 'i18n/word_iterator.cc', - 'i18n/word_iterator.h', ], }, { @@ -87,11 +87,11 @@ 'gmock_unittest.cc', 'hmac_unittest.cc', 'id_map_unittest.cc', + 'i18n/break_iterator_unittest.cc', 'i18n/char_iterator_unittest.cc', 'i18n/file_util_icu_unittest.cc', 'i18n/icu_string_conversions_unittest.cc', 'i18n/rtl_unittest.cc', - 'i18n/word_iterator_unittest.cc', 'json/json_reader_unittest.cc', 'json/json_writer_unittest.cc', 'json/string_escape_unittest.cc', @@ -173,11 +173,6 @@ 'win/scoped_variant_unittest.cc', 'worker_pool_unittest.cc', ], - 'include_dirs': [ - # word_iterator.h (used by word_iterator_unittest.cc) leaks an ICU - # #include for unicode/uchar.h. This should probably be cleaned up. - '../third_party/icu/public/common', - ], 'dependencies': [ 'base', 'base_i18n', diff --git a/base/i18n/word_iterator.cc b/base/i18n/break_iterator.cc index 7ad9c84..f0f5240 100644 --- a/base/i18n/word_iterator.cc +++ b/base/i18n/break_iterator.cc @@ -2,16 +2,18 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "base/i18n/word_iterator.h" +#include "base/i18n/break_iterator.h" #include "base/logging.h" #include "unicode/ubrk.h" #include "unicode/uchar.h" #include "unicode/ustring.h" +namespace base { + const size_t npos = -1; -WordIterator::WordIterator(const string16* str, BreakType break_type) +BreakIterator::BreakIterator(const string16* str, BreakType break_type) : iter_(NULL), string_(str), break_type_(break_type), @@ -19,19 +21,19 @@ WordIterator::WordIterator(const string16* str, BreakType break_type) pos_(0) { } -WordIterator::~WordIterator() { +BreakIterator::~BreakIterator() { if (iter_) ubrk_close(iter_); } -bool WordIterator::Init() { +bool BreakIterator::Init() { UErrorCode status = U_ZERO_ERROR; UBreakIteratorType break_type; switch (break_type_) { case BREAK_WORD: break_type = UBRK_WORD; break; - case BREAK_LINE: + case BREAK_SPACE: break_type = UBRK_LINE; break; default: @@ -49,7 +51,7 @@ bool WordIterator::Init() { return true; } -bool WordIterator::Advance() { +bool BreakIterator::Advance() { prev_ = pos_; const int32_t pos = ubrk_next(iter_); if (pos == UBRK_DONE) { @@ -61,11 +63,14 @@ bool WordIterator::Advance() { } } -bool WordIterator::IsWord() const { - return (ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE); +bool BreakIterator::IsWord() const { + return (break_type_ == BREAK_WORD && + ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE); } -string16 WordIterator::GetWord() const { +string16 BreakIterator::GetString() const { DCHECK(prev_ != npos && pos_ != npos); return string_->substr(prev_, pos_ - prev_); } + +} // namespace base diff --git a/base/i18n/word_iterator.h b/base/i18n/break_iterator.h index ada86b9..0e89060 100644 --- a/base/i18n/word_iterator.h +++ b/base/i18n/break_iterator.h @@ -2,19 +2,17 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef BASE_I18N_WORD_ITERATOR_H_ -#define BASE_I18N_WORD_ITERATOR_H_ +#ifndef BASE_I18N_BREAK_ITERATOR_H_ +#define BASE_I18N_BREAK_ITERATOR_H_ #pragma once -#include <vector> - #include "base/basictypes.h" #include "base/string16.h" -// The WordIterator class iterates through the words and word breaks +// The BreakIterator class iterates through the words and word breaks // in a UTF-16 string. // -// It provides two modes, BREAK_WORD and BREAK_LINE, which modify how +// It provides two modes, BREAK_WORD and BREAK_SPACE, which modify how // trailing non-word characters are aggregated into the returned word. // // Under BREAK_WORD mode (more common), the non-word characters are @@ -22,40 +20,41 @@ // the string " foo bar! ", the word breaks are at the periods in // ". .foo. .bar.!. ."). // -// Under BREAK_LINE mode (less common), the non-word characters are +// Under BREAK_SPACE mode (less common), the non-word characters are // included in the word, breaking only when a space-equivalent character // is encountered (e.g. in the UTF16-equivalent of the string " foo bar! ", // the word breaks are at the periods in ". .foo .bar! ."). // -// To extract the words from a string, move a BREAK_WORD WordIterator +// To extract the words from a string, move a BREAK_WORD BreakIterator // through the string and test whether IsWord() is true. E.g., -// WordIterator iter(&str, WordIterator::BREAK_WORD); +// BreakIterator iter(&str, BreakIterator::BREAK_WORD); // if (!iter.Init()) return false; // while (iter.Advance()) { // if (iter.IsWord()) { // // region [iter.prev(),iter.pos()) contains a word. -// VLOG(1) << "word: " << iter.GetWord(); +// VLOG(1) << "word: " << iter.GetString(); // } // } +namespace base { -class WordIterator { +class BreakIterator { public: enum BreakType { BREAK_WORD, - BREAK_LINE + BREAK_SPACE }; - // Requires |str| to live as long as the WordIterator does. - WordIterator(const string16* str, BreakType break_type); - ~WordIterator(); + // Requires |str| to live as long as the BreakIterator does. + BreakIterator(const string16* str, BreakType break_type); + ~BreakIterator(); // Init() must be called before any of the iterators are valid. // Returns false if ICU failed to initialize. bool Init(); // Return the current break position within the string, - // or WordIterator::npos when done. + // or BreakIterator::npos when done. size_t pos() const { return pos_; } // Return the value of pos() returned before Advance() was last called. size_t prev() const { return prev_; } @@ -66,15 +65,16 @@ class WordIterator { // last time Advance() returns true.) bool Advance(); - // Returns true if the break we just hit is the end of a word. - // (Otherwise, the break iterator just skipped over e.g. whitespace - // or punctuation.) + // Under BREAK_WORD mode, returns true if the break we just hit is the + // end of a word. (Otherwise, the break iterator just skipped over e.g. + // whitespace or punctuation.) Under BREAK_SPACE mode, this distinction + // doesn't apply and it always retuns false. bool IsWord() const; - // Return the word between prev() and pos(). + // Return the string between prev() and pos(). // Advance() must have been called successfully at least once // for pos() to have advanced to somewhere useful. - string16 GetWord() const; + string16 GetString() const; private: // ICU iterator, avoiding ICU ubrk.h dependence. @@ -92,7 +92,9 @@ class WordIterator { // Previous and current iterator positions. size_t prev_, pos_; - DISALLOW_COPY_AND_ASSIGN(WordIterator); + DISALLOW_COPY_AND_ASSIGN(BreakIterator); }; -#endif // BASE_I18N_WORD_ITERATOR_H__ +} // namespace base + +#endif // BASE_I18N_BREAK_ITERATOR_H__ diff --git a/base/i18n/word_iterator_unittest.cc b/base/i18n/break_iterator_unittest.cc index 92aff76..8add918 100644 --- a/base/i18n/word_iterator_unittest.cc +++ b/base/i18n/break_iterator_unittest.cc @@ -2,116 +2,116 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "base/i18n/word_iterator.h" +#include "base/i18n/break_iterator.h" #include "base/string_piece.h" #include "base/string_util.h" #include "base/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" -TEST(WordIteratorTest, BreakWord) { +TEST(BreakIteratorTest, BreakWord) { string16 space(UTF8ToUTF16(" ")); string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); - WordIterator iter(&str, WordIterator::BREAK_WORD); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD); ASSERT_TRUE(iter.Init()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(space, iter.GetWord()); + EXPECT_EQ(space, iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(space, iter.GetWord()); + EXPECT_EQ(space, iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("!"), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(space, iter.GetWord()); + EXPECT_EQ(space, iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(space, iter.GetWord()); + EXPECT_EQ(space, iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString()); EXPECT_FALSE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); } -TEST(WordIteratorTest, BreakLine) { +TEST(BreakIteratorTest, BreakSpace) { string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); - WordIterator iter(&str, WordIterator::BREAK_LINE); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE); ASSERT_TRUE(iter.Init()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString()); EXPECT_TRUE(iter.Advance()); - EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetWord()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString()); EXPECT_FALSE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); } -TEST(WordIteratorTest, BreakWide16) { +TEST(BreakIteratorTest, BreakWide16) { // "Παγκόσμιος Ιστός" const string16 str(WideToUTF16( L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2")); const string16 word1(str.substr(0, 10)); const string16 word2(str.substr(11, 5)); - WordIterator iter(&str, WordIterator::BREAK_WORD); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD); ASSERT_TRUE(iter.Init()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(word1, iter.GetWord()); + EXPECT_EQ(word1, iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(word2, iter.GetWord()); + EXPECT_EQ(word2, iter.GetString()); EXPECT_FALSE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); } -TEST(WordIteratorTest, BreakWide32) { +TEST(BreakIteratorTest, BreakWide32) { // U+1D49C MATHEMATICAL SCRIPT CAPITAL A const char* very_wide_char = "\xF0\x9D\x92\x9C"; const string16 str( UTF8ToUTF16(StringPrintf("%s a", very_wide_char))); const string16 very_wide_word(str.substr(0, 2)); - WordIterator iter(&str, WordIterator::BREAK_WORD); + base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD); ASSERT_TRUE(iter.Init()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(very_wide_word, iter.GetWord()); + EXPECT_EQ(very_wide_word, iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); EXPECT_TRUE(iter.Advance()); EXPECT_TRUE(iter.IsWord()); - EXPECT_EQ(UTF8ToUTF16("a"), iter.GetWord()); + EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString()); EXPECT_FALSE(iter.Advance()); EXPECT_FALSE(iter.IsWord()); } diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc index fbd12bd..82219b2f1 100644 --- a/chrome/browser/autocomplete/history_quick_provider.cc +++ b/chrome/browser/autocomplete/history_quick_provider.cc @@ -5,7 +5,7 @@ #include "chrome/browser/autocomplete/history_quick_provider.h" #include "base/basictypes.h" -#include "base/i18n/word_iterator.h" +#include "base/i18n/break_iterator.h" #include "base/string_util.h" #include "base/logging.h" #include "base/utf_string_conversions.h" @@ -167,11 +167,11 @@ void HistoryQuickProvider::SetIndexForTesting( history::InMemoryURLIndex::String16Vector HistoryQuickProvider::WordVectorFromString16(const string16& uni_string) { history::InMemoryURLIndex::String16Vector words; - WordIterator iter(&uni_string, WordIterator::BREAK_WORD); + base::BreakIterator iter(&uni_string, base::BreakIterator::BREAK_WORD); if (iter.Init()) { while (iter.Advance()) { if (iter.IsWord()) - words.push_back(iter.GetWord()); + words.push_back(iter.GetString()); } } return words; diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc index b3dabab9..62a22e6 100644 --- a/chrome/browser/history/in_memory_url_index.cc +++ b/chrome/browser/history/in_memory_url_index.cc @@ -8,7 +8,7 @@ #include <limits> #include "app/l10n_util.h" -#include "base/i18n/word_iterator.h" +#include "base/i18n/break_iterator.h" #include "base/string_util.h" #include "base/time.h" #include "base/utf_string_conversions.h" @@ -234,11 +234,11 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( const string16& uni_string) { String16Set words; - WordIterator iter(&uni_string, WordIterator::BREAK_WORD); + base::BreakIterator iter(&uni_string, base::BreakIterator::BREAK_WORD); if (iter.Init()) { while (iter.Advance()) { if (iter.IsWord()) - words.insert(iter.GetWord()); + words.insert(iter.GetString()); } } return words; diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc index e1afb86..12ecc29 100644 --- a/chrome/browser/history/query_parser.cc +++ b/chrome/browser/history/query_parser.cc @@ -7,7 +7,7 @@ #include <algorithm> #include "app/l10n_util.h" -#include "base/i18n/word_iterator.h" +#include "base/i18n/break_iterator.h" #include "base/logging.h" #include "base/scoped_vector.h" #include "base/string_util.h" @@ -322,7 +322,7 @@ bool QueryParser::DoesQueryMatch(const string16& text, bool QueryParser::ParseQueryImpl(const string16& query, QueryNodeList* root) { - WordIterator iter(&query, WordIterator::BREAK_WORD); + base::BreakIterator iter(&query, base::BreakIterator::BREAK_WORD); // TODO(evanm): support a locale here if (!iter.Init()) return false; @@ -338,7 +338,7 @@ bool QueryParser::ParseQueryImpl(const string16& query, // is not necessarily a word, but could also be a sequence of punctuation // or whitespace. if (iter.IsWord()) { - string16 word = iter.GetWord(); + string16 word = iter.GetString(); QueryNodeWord* word_node = new QueryNodeWord(word); if (in_quotes) @@ -365,7 +365,7 @@ bool QueryParser::ParseQueryImpl(const string16& query, void QueryParser::ExtractQueryWords(const string16& text, std::vector<QueryWord>* words) { - WordIterator iter(&text, WordIterator::BREAK_WORD); + base::BreakIterator iter(&text, base::BreakIterator::BREAK_WORD); // TODO(evanm): support a locale here if (!iter.Init()) return; @@ -375,7 +375,7 @@ void QueryParser::ExtractQueryWords(const string16& text, // is not necessarily a word, but could also be a sequence of punctuation // or whitespace. if (iter.IsWord()) { - string16 word = iter.GetWord(); + string16 word = iter.GetString(); if (!word.empty()) { words->push_back(QueryWord()); words->back().word = word; diff --git a/views/view_text_utils.cc b/views/view_text_utils.cc index 73bef45..df42544 100644 --- a/views/view_text_utils.cc +++ b/views/view_text_utils.cc @@ -5,7 +5,7 @@ #include "views/view_text_utils.h" #include "app/bidi_line_iterator.h" -#include "base/i18n/word_iterator.h" +#include "base/i18n/break_iterator.h" #include "base/logging.h" #include "base/utf_string_conversions.h" #include "gfx/canvas_skia.h" @@ -98,7 +98,7 @@ void DrawTextStartingFrom(gfx::Canvas* canvas, // Iterate through line breaking opportunities (which in English would be // spaces and such). This tells us where to wrap. string16 text16(WideToUTF16(text)); - WordIterator iter(&text16, WordIterator::BREAK_LINE); + base::BreakIterator iter(&text16, base::BreakIterator::BREAK_SPACE); if (!iter.Init()) return; @@ -112,7 +112,7 @@ void DrawTextStartingFrom(gfx::Canvas* canvas, // Get the word and figure out the dimensions. std::wstring word; if (!ltr_within_rtl) - word = UTF16ToWide(iter.GetWord()); // Get the next word. + word = UTF16ToWide(iter.GetString()); // Get the next word. else word = text; // Draw the whole text at once. |