summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--base/base.gyp11
-rw-r--r--base/i18n/break_iterator.cc (renamed from base/i18n/word_iterator.cc)23
-rw-r--r--base/i18n/break_iterator.h (renamed from base/i18n/word_iterator.h)48
-rw-r--r--base/i18n/break_iterator_unittest.cc (renamed from base/i18n/word_iterator_unittest.cc)62
-rw-r--r--chrome/browser/autocomplete/history_quick_provider.cc6
-rw-r--r--chrome/browser/history/in_memory_url_index.cc6
-rw-r--r--chrome/browser/history/query_parser.cc10
-rw-r--r--views/view_text_utils.cc6
8 files changed, 87 insertions, 85 deletions
diff --git a/base/base.gyp b/base/base.gyp
index f68359a..70cf465 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -31,6 +31,8 @@
'base',
],
'sources': [
+ 'i18n/break_iterator.cc',
+ 'i18n/break_iterator.h',
'i18n/char_iterator.cc',
'i18n/char_iterator.h',
'i18n/file_util_icu.cc',
@@ -47,8 +49,6 @@
'i18n/rtl.h',
'i18n/time_formatting.cc',
'i18n/time_formatting.h',
- 'i18n/word_iterator.cc',
- 'i18n/word_iterator.h',
],
},
{
@@ -87,11 +87,11 @@
'gmock_unittest.cc',
'hmac_unittest.cc',
'id_map_unittest.cc',
+ 'i18n/break_iterator_unittest.cc',
'i18n/char_iterator_unittest.cc',
'i18n/file_util_icu_unittest.cc',
'i18n/icu_string_conversions_unittest.cc',
'i18n/rtl_unittest.cc',
- 'i18n/word_iterator_unittest.cc',
'json/json_reader_unittest.cc',
'json/json_writer_unittest.cc',
'json/string_escape_unittest.cc',
@@ -173,11 +173,6 @@
'win/scoped_variant_unittest.cc',
'worker_pool_unittest.cc',
],
- 'include_dirs': [
- # word_iterator.h (used by word_iterator_unittest.cc) leaks an ICU
- # #include for unicode/uchar.h. This should probably be cleaned up.
- '../third_party/icu/public/common',
- ],
'dependencies': [
'base',
'base_i18n',
diff --git a/base/i18n/word_iterator.cc b/base/i18n/break_iterator.cc
index 7ad9c84..f0f5240 100644
--- a/base/i18n/word_iterator.cc
+++ b/base/i18n/break_iterator.cc
@@ -2,16 +2,18 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/i18n/word_iterator.h"
+#include "base/i18n/break_iterator.h"
#include "base/logging.h"
#include "unicode/ubrk.h"
#include "unicode/uchar.h"
#include "unicode/ustring.h"
+namespace base {
+
const size_t npos = -1;
-WordIterator::WordIterator(const string16* str, BreakType break_type)
+BreakIterator::BreakIterator(const string16* str, BreakType break_type)
: iter_(NULL),
string_(str),
break_type_(break_type),
@@ -19,19 +21,19 @@ WordIterator::WordIterator(const string16* str, BreakType break_type)
pos_(0) {
}
-WordIterator::~WordIterator() {
+BreakIterator::~BreakIterator() {
if (iter_)
ubrk_close(iter_);
}
-bool WordIterator::Init() {
+bool BreakIterator::Init() {
UErrorCode status = U_ZERO_ERROR;
UBreakIteratorType break_type;
switch (break_type_) {
case BREAK_WORD:
break_type = UBRK_WORD;
break;
- case BREAK_LINE:
+ case BREAK_SPACE:
break_type = UBRK_LINE;
break;
default:
@@ -49,7 +51,7 @@ bool WordIterator::Init() {
return true;
}
-bool WordIterator::Advance() {
+bool BreakIterator::Advance() {
prev_ = pos_;
const int32_t pos = ubrk_next(iter_);
if (pos == UBRK_DONE) {
@@ -61,11 +63,14 @@ bool WordIterator::Advance() {
}
}
-bool WordIterator::IsWord() const {
- return (ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE);
+bool BreakIterator::IsWord() const {
+ return (break_type_ == BREAK_WORD &&
+ ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE);
}
-string16 WordIterator::GetWord() const {
+string16 BreakIterator::GetString() const {
DCHECK(prev_ != npos && pos_ != npos);
return string_->substr(prev_, pos_ - prev_);
}
+
+} // namespace base
diff --git a/base/i18n/word_iterator.h b/base/i18n/break_iterator.h
index ada86b9..0e89060 100644
--- a/base/i18n/word_iterator.h
+++ b/base/i18n/break_iterator.h
@@ -2,19 +2,17 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#ifndef BASE_I18N_WORD_ITERATOR_H_
-#define BASE_I18N_WORD_ITERATOR_H_
+#ifndef BASE_I18N_BREAK_ITERATOR_H_
+#define BASE_I18N_BREAK_ITERATOR_H_
#pragma once
-#include <vector>
-
#include "base/basictypes.h"
#include "base/string16.h"
-// The WordIterator class iterates through the words and word breaks
+// The BreakIterator class iterates through the words and word breaks
// in a UTF-16 string.
//
-// It provides two modes, BREAK_WORD and BREAK_LINE, which modify how
+// It provides two modes, BREAK_WORD and BREAK_SPACE, which modify how
// trailing non-word characters are aggregated into the returned word.
//
// Under BREAK_WORD mode (more common), the non-word characters are
@@ -22,40 +20,41 @@
// the string " foo bar! ", the word breaks are at the periods in
// ". .foo. .bar.!. .").
//
-// Under BREAK_LINE mode (less common), the non-word characters are
+// Under BREAK_SPACE mode (less common), the non-word characters are
// included in the word, breaking only when a space-equivalent character
// is encountered (e.g. in the UTF16-equivalent of the string " foo bar! ",
// the word breaks are at the periods in ". .foo .bar! .").
//
-// To extract the words from a string, move a BREAK_WORD WordIterator
+// To extract the words from a string, move a BREAK_WORD BreakIterator
// through the string and test whether IsWord() is true. E.g.,
-// WordIterator iter(&str, WordIterator::BREAK_WORD);
+// BreakIterator iter(&str, BreakIterator::BREAK_WORD);
// if (!iter.Init()) return false;
// while (iter.Advance()) {
// if (iter.IsWord()) {
// // region [iter.prev(),iter.pos()) contains a word.
-// VLOG(1) << "word: " << iter.GetWord();
+// VLOG(1) << "word: " << iter.GetString();
// }
// }
+namespace base {
-class WordIterator {
+class BreakIterator {
public:
enum BreakType {
BREAK_WORD,
- BREAK_LINE
+ BREAK_SPACE
};
- // Requires |str| to live as long as the WordIterator does.
- WordIterator(const string16* str, BreakType break_type);
- ~WordIterator();
+ // Requires |str| to live as long as the BreakIterator does.
+ BreakIterator(const string16* str, BreakType break_type);
+ ~BreakIterator();
// Init() must be called before any of the iterators are valid.
// Returns false if ICU failed to initialize.
bool Init();
// Return the current break position within the string,
- // or WordIterator::npos when done.
+ // or BreakIterator::npos when done.
size_t pos() const { return pos_; }
// Return the value of pos() returned before Advance() was last called.
size_t prev() const { return prev_; }
@@ -66,15 +65,16 @@ class WordIterator {
// last time Advance() returns true.)
bool Advance();
- // Returns true if the break we just hit is the end of a word.
- // (Otherwise, the break iterator just skipped over e.g. whitespace
- // or punctuation.)
+ // Under BREAK_WORD mode, returns true if the break we just hit is the
+ // end of a word. (Otherwise, the break iterator just skipped over e.g.
+ // whitespace or punctuation.) Under BREAK_SPACE mode, this distinction
+ // doesn't apply and it always retuns false.
bool IsWord() const;
- // Return the word between prev() and pos().
+ // Return the string between prev() and pos().
// Advance() must have been called successfully at least once
// for pos() to have advanced to somewhere useful.
- string16 GetWord() const;
+ string16 GetString() const;
private:
// ICU iterator, avoiding ICU ubrk.h dependence.
@@ -92,7 +92,9 @@ class WordIterator {
// Previous and current iterator positions.
size_t prev_, pos_;
- DISALLOW_COPY_AND_ASSIGN(WordIterator);
+ DISALLOW_COPY_AND_ASSIGN(BreakIterator);
};
-#endif // BASE_I18N_WORD_ITERATOR_H__
+} // namespace base
+
+#endif // BASE_I18N_BREAK_ITERATOR_H__
diff --git a/base/i18n/word_iterator_unittest.cc b/base/i18n/break_iterator_unittest.cc
index 92aff76..8add918 100644
--- a/base/i18n/word_iterator_unittest.cc
+++ b/base/i18n/break_iterator_unittest.cc
@@ -2,116 +2,116 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/i18n/word_iterator.h"
+#include "base/i18n/break_iterator.h"
#include "base/string_piece.h"
#include "base/string_util.h"
#include "base/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
-TEST(WordIteratorTest, BreakWord) {
+TEST(BreakIteratorTest, BreakWord) {
string16 space(UTF8ToUTF16(" "));
string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
- WordIterator iter(&str, WordIterator::BREAK_WORD);
+ base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
ASSERT_TRUE(iter.Init());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(space, iter.GetWord());
+ EXPECT_EQ(space, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(space, iter.GetWord());
+ EXPECT_EQ(space, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("!"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(space, iter.GetWord());
+ EXPECT_EQ(space, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(space, iter.GetWord());
+ EXPECT_EQ(space, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
EXPECT_FALSE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
}
-TEST(WordIteratorTest, BreakLine) {
+TEST(BreakIteratorTest, BreakSpace) {
string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
- WordIterator iter(&str, WordIterator::BREAK_LINE);
+ base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
ASSERT_TRUE(iter.Init());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
EXPECT_TRUE(iter.Advance());
- EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetWord());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
EXPECT_FALSE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
}
-TEST(WordIteratorTest, BreakWide16) {
+TEST(BreakIteratorTest, BreakWide16) {
// "Παγκόσμιος Ιστός"
const string16 str(WideToUTF16(
L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
const string16 word1(str.substr(0, 10));
const string16 word2(str.substr(11, 5));
- WordIterator iter(&str, WordIterator::BREAK_WORD);
+ base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
ASSERT_TRUE(iter.Init());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(word1, iter.GetWord());
+ EXPECT_EQ(word1, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(word2, iter.GetWord());
+ EXPECT_EQ(word2, iter.GetString());
EXPECT_FALSE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
}
-TEST(WordIteratorTest, BreakWide32) {
+TEST(BreakIteratorTest, BreakWide32) {
// U+1D49C MATHEMATICAL SCRIPT CAPITAL A
const char* very_wide_char = "\xF0\x9D\x92\x9C";
const string16 str(
UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
const string16 very_wide_word(str.substr(0, 2));
- WordIterator iter(&str, WordIterator::BREAK_WORD);
+ base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
ASSERT_TRUE(iter.Init());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(very_wide_word, iter.GetWord());
+ EXPECT_EQ(very_wide_word, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("a"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
EXPECT_FALSE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
}
diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc
index fbd12bd..82219b2f1 100644
--- a/chrome/browser/autocomplete/history_quick_provider.cc
+++ b/chrome/browser/autocomplete/history_quick_provider.cc
@@ -5,7 +5,7 @@
#include "chrome/browser/autocomplete/history_quick_provider.h"
#include "base/basictypes.h"
-#include "base/i18n/word_iterator.h"
+#include "base/i18n/break_iterator.h"
#include "base/string_util.h"
#include "base/logging.h"
#include "base/utf_string_conversions.h"
@@ -167,11 +167,11 @@ void HistoryQuickProvider::SetIndexForTesting(
history::InMemoryURLIndex::String16Vector
HistoryQuickProvider::WordVectorFromString16(const string16& uni_string) {
history::InMemoryURLIndex::String16Vector words;
- WordIterator iter(&uni_string, WordIterator::BREAK_WORD);
+ base::BreakIterator iter(&uni_string, base::BreakIterator::BREAK_WORD);
if (iter.Init()) {
while (iter.Advance()) {
if (iter.IsWord())
- words.push_back(iter.GetWord());
+ words.push_back(iter.GetString());
}
}
return words;
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc
index b3dabab9..62a22e6 100644
--- a/chrome/browser/history/in_memory_url_index.cc
+++ b/chrome/browser/history/in_memory_url_index.cc
@@ -8,7 +8,7 @@
#include <limits>
#include "app/l10n_util.h"
-#include "base/i18n/word_iterator.h"
+#include "base/i18n/break_iterator.h"
#include "base/string_util.h"
#include "base/time.h"
#include "base/utf_string_conversions.h"
@@ -234,11 +234,11 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(
const string16& uni_string) {
String16Set words;
- WordIterator iter(&uni_string, WordIterator::BREAK_WORD);
+ base::BreakIterator iter(&uni_string, base::BreakIterator::BREAK_WORD);
if (iter.Init()) {
while (iter.Advance()) {
if (iter.IsWord())
- words.insert(iter.GetWord());
+ words.insert(iter.GetString());
}
}
return words;
diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc
index e1afb86..12ecc29 100644
--- a/chrome/browser/history/query_parser.cc
+++ b/chrome/browser/history/query_parser.cc
@@ -7,7 +7,7 @@
#include <algorithm>
#include "app/l10n_util.h"
-#include "base/i18n/word_iterator.h"
+#include "base/i18n/break_iterator.h"
#include "base/logging.h"
#include "base/scoped_vector.h"
#include "base/string_util.h"
@@ -322,7 +322,7 @@ bool QueryParser::DoesQueryMatch(const string16& text,
bool QueryParser::ParseQueryImpl(const string16& query,
QueryNodeList* root) {
- WordIterator iter(&query, WordIterator::BREAK_WORD);
+ base::BreakIterator iter(&query, base::BreakIterator::BREAK_WORD);
// TODO(evanm): support a locale here
if (!iter.Init())
return false;
@@ -338,7 +338,7 @@ bool QueryParser::ParseQueryImpl(const string16& query,
// is not necessarily a word, but could also be a sequence of punctuation
// or whitespace.
if (iter.IsWord()) {
- string16 word = iter.GetWord();
+ string16 word = iter.GetString();
QueryNodeWord* word_node = new QueryNodeWord(word);
if (in_quotes)
@@ -365,7 +365,7 @@ bool QueryParser::ParseQueryImpl(const string16& query,
void QueryParser::ExtractQueryWords(const string16& text,
std::vector<QueryWord>* words) {
- WordIterator iter(&text, WordIterator::BREAK_WORD);
+ base::BreakIterator iter(&text, base::BreakIterator::BREAK_WORD);
// TODO(evanm): support a locale here
if (!iter.Init())
return;
@@ -375,7 +375,7 @@ void QueryParser::ExtractQueryWords(const string16& text,
// is not necessarily a word, but could also be a sequence of punctuation
// or whitespace.
if (iter.IsWord()) {
- string16 word = iter.GetWord();
+ string16 word = iter.GetString();
if (!word.empty()) {
words->push_back(QueryWord());
words->back().word = word;
diff --git a/views/view_text_utils.cc b/views/view_text_utils.cc
index 73bef45..df42544 100644
--- a/views/view_text_utils.cc
+++ b/views/view_text_utils.cc
@@ -5,7 +5,7 @@
#include "views/view_text_utils.h"
#include "app/bidi_line_iterator.h"
-#include "base/i18n/word_iterator.h"
+#include "base/i18n/break_iterator.h"
#include "base/logging.h"
#include "base/utf_string_conversions.h"
#include "gfx/canvas_skia.h"
@@ -98,7 +98,7 @@ void DrawTextStartingFrom(gfx::Canvas* canvas,
// Iterate through line breaking opportunities (which in English would be
// spaces and such). This tells us where to wrap.
string16 text16(WideToUTF16(text));
- WordIterator iter(&text16, WordIterator::BREAK_LINE);
+ base::BreakIterator iter(&text16, base::BreakIterator::BREAK_SPACE);
if (!iter.Init())
return;
@@ -112,7 +112,7 @@ void DrawTextStartingFrom(gfx::Canvas* canvas,
// Get the word and figure out the dimensions.
std::wstring word;
if (!ltr_within_rtl)
- word = UTF16ToWide(iter.GetWord()); // Get the next word.
+ word = UTF16ToWide(iter.GetString()); // Get the next word.
else
word = text; // Draw the whole text at once.