summaryrefslogtreecommitdiffstats
path: root/base
diff options
context:
space:
mode:
authorcdn@chromium.org <cdn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-12-15 18:00:54 +0000
committercdn@chromium.org <cdn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-12-15 18:00:54 +0000
commit55954d890dd1f817ea7f49374f1de14d826b2fa4 (patch)
treedd9f64a4d64642a6d9d5c8de019dc5c778016071 /base
parent6df44fb660221182373b00ad27840040167205d7 (diff)
downloadchromium_src-55954d890dd1f817ea7f49374f1de14d826b2fa4.zip
chromium_src-55954d890dd1f817ea7f49374f1de14d826b2fa4.tar.gz
chromium_src-55954d890dd1f817ea7f49374f1de14d826b2fa4.tar.bz2
Commiting second word iterator patch for tsepez. this was originally reviewed at http://codereview.chromium.org/5796003/
Review URL: http://codereview.chromium.org/5707011 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@69278 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r--base/base.gyp11
-rw-r--r--base/i18n/break_iterator.cc (renamed from base/i18n/word_iterator.cc)23
-rw-r--r--base/i18n/break_iterator.h (renamed from base/i18n/word_iterator.h)48
-rw-r--r--base/i18n/break_iterator_unittest.cc (renamed from base/i18n/word_iterator_unittest.cc)62
4 files changed, 73 insertions, 71 deletions
diff --git a/base/base.gyp b/base/base.gyp
index f68359a..70cf465 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -31,6 +31,8 @@
'base',
],
'sources': [
+ 'i18n/break_iterator.cc',
+ 'i18n/break_iterator.h',
'i18n/char_iterator.cc',
'i18n/char_iterator.h',
'i18n/file_util_icu.cc',
@@ -47,8 +49,6 @@
'i18n/rtl.h',
'i18n/time_formatting.cc',
'i18n/time_formatting.h',
- 'i18n/word_iterator.cc',
- 'i18n/word_iterator.h',
],
},
{
@@ -87,11 +87,11 @@
'gmock_unittest.cc',
'hmac_unittest.cc',
'id_map_unittest.cc',
+ 'i18n/break_iterator_unittest.cc',
'i18n/char_iterator_unittest.cc',
'i18n/file_util_icu_unittest.cc',
'i18n/icu_string_conversions_unittest.cc',
'i18n/rtl_unittest.cc',
- 'i18n/word_iterator_unittest.cc',
'json/json_reader_unittest.cc',
'json/json_writer_unittest.cc',
'json/string_escape_unittest.cc',
@@ -173,11 +173,6 @@
'win/scoped_variant_unittest.cc',
'worker_pool_unittest.cc',
],
- 'include_dirs': [
- # word_iterator.h (used by word_iterator_unittest.cc) leaks an ICU
- # #include for unicode/uchar.h. This should probably be cleaned up.
- '../third_party/icu/public/common',
- ],
'dependencies': [
'base',
'base_i18n',
diff --git a/base/i18n/word_iterator.cc b/base/i18n/break_iterator.cc
index 7ad9c84..f0f5240 100644
--- a/base/i18n/word_iterator.cc
+++ b/base/i18n/break_iterator.cc
@@ -2,16 +2,18 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/i18n/word_iterator.h"
+#include "base/i18n/break_iterator.h"
#include "base/logging.h"
#include "unicode/ubrk.h"
#include "unicode/uchar.h"
#include "unicode/ustring.h"
+namespace base {
+
const size_t npos = -1;
-WordIterator::WordIterator(const string16* str, BreakType break_type)
+BreakIterator::BreakIterator(const string16* str, BreakType break_type)
: iter_(NULL),
string_(str),
break_type_(break_type),
@@ -19,19 +21,19 @@ WordIterator::WordIterator(const string16* str, BreakType break_type)
pos_(0) {
}
-WordIterator::~WordIterator() {
+BreakIterator::~BreakIterator() {
if (iter_)
ubrk_close(iter_);
}
-bool WordIterator::Init() {
+bool BreakIterator::Init() {
UErrorCode status = U_ZERO_ERROR;
UBreakIteratorType break_type;
switch (break_type_) {
case BREAK_WORD:
break_type = UBRK_WORD;
break;
- case BREAK_LINE:
+ case BREAK_SPACE:
break_type = UBRK_LINE;
break;
default:
@@ -49,7 +51,7 @@ bool WordIterator::Init() {
return true;
}
-bool WordIterator::Advance() {
+bool BreakIterator::Advance() {
prev_ = pos_;
const int32_t pos = ubrk_next(iter_);
if (pos == UBRK_DONE) {
@@ -61,11 +63,14 @@ bool WordIterator::Advance() {
}
}
-bool WordIterator::IsWord() const {
- return (ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE);
+bool BreakIterator::IsWord() const {
+ return (break_type_ == BREAK_WORD &&
+ ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE);
}
-string16 WordIterator::GetWord() const {
+string16 BreakIterator::GetString() const {
DCHECK(prev_ != npos && pos_ != npos);
return string_->substr(prev_, pos_ - prev_);
}
+
+} // namespace base
diff --git a/base/i18n/word_iterator.h b/base/i18n/break_iterator.h
index ada86b9..0e89060 100644
--- a/base/i18n/word_iterator.h
+++ b/base/i18n/break_iterator.h
@@ -2,19 +2,17 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#ifndef BASE_I18N_WORD_ITERATOR_H_
-#define BASE_I18N_WORD_ITERATOR_H_
+#ifndef BASE_I18N_BREAK_ITERATOR_H_
+#define BASE_I18N_BREAK_ITERATOR_H_
#pragma once
-#include <vector>
-
#include "base/basictypes.h"
#include "base/string16.h"
-// The WordIterator class iterates through the words and word breaks
+// The BreakIterator class iterates through the words and word breaks
// in a UTF-16 string.
//
-// It provides two modes, BREAK_WORD and BREAK_LINE, which modify how
+// It provides two modes, BREAK_WORD and BREAK_SPACE, which modify how
// trailing non-word characters are aggregated into the returned word.
//
// Under BREAK_WORD mode (more common), the non-word characters are
@@ -22,40 +20,41 @@
// the string " foo bar! ", the word breaks are at the periods in
// ". .foo. .bar.!. .").
//
-// Under BREAK_LINE mode (less common), the non-word characters are
+// Under BREAK_SPACE mode (less common), the non-word characters are
// included in the word, breaking only when a space-equivalent character
// is encountered (e.g. in the UTF16-equivalent of the string " foo bar! ",
// the word breaks are at the periods in ". .foo .bar! .").
//
-// To extract the words from a string, move a BREAK_WORD WordIterator
+// To extract the words from a string, move a BREAK_WORD BreakIterator
// through the string and test whether IsWord() is true. E.g.,
-// WordIterator iter(&str, WordIterator::BREAK_WORD);
+// BreakIterator iter(&str, BreakIterator::BREAK_WORD);
// if (!iter.Init()) return false;
// while (iter.Advance()) {
// if (iter.IsWord()) {
// // region [iter.prev(),iter.pos()) contains a word.
-// VLOG(1) << "word: " << iter.GetWord();
+// VLOG(1) << "word: " << iter.GetString();
// }
// }
+namespace base {
-class WordIterator {
+class BreakIterator {
public:
enum BreakType {
BREAK_WORD,
- BREAK_LINE
+ BREAK_SPACE
};
- // Requires |str| to live as long as the WordIterator does.
- WordIterator(const string16* str, BreakType break_type);
- ~WordIterator();
+ // Requires |str| to live as long as the BreakIterator does.
+ BreakIterator(const string16* str, BreakType break_type);
+ ~BreakIterator();
// Init() must be called before any of the iterators are valid.
// Returns false if ICU failed to initialize.
bool Init();
// Return the current break position within the string,
- // or WordIterator::npos when done.
+ // or BreakIterator::npos when done.
size_t pos() const { return pos_; }
// Return the value of pos() returned before Advance() was last called.
size_t prev() const { return prev_; }
@@ -66,15 +65,16 @@ class WordIterator {
// last time Advance() returns true.)
bool Advance();
- // Returns true if the break we just hit is the end of a word.
- // (Otherwise, the break iterator just skipped over e.g. whitespace
- // or punctuation.)
+ // Under BREAK_WORD mode, returns true if the break we just hit is the
+ // end of a word. (Otherwise, the break iterator just skipped over e.g.
+ // whitespace or punctuation.) Under BREAK_SPACE mode, this distinction
+ // doesn't apply and it always retuns false.
bool IsWord() const;
- // Return the word between prev() and pos().
+ // Return the string between prev() and pos().
// Advance() must have been called successfully at least once
// for pos() to have advanced to somewhere useful.
- string16 GetWord() const;
+ string16 GetString() const;
private:
// ICU iterator, avoiding ICU ubrk.h dependence.
@@ -92,7 +92,9 @@ class WordIterator {
// Previous and current iterator positions.
size_t prev_, pos_;
- DISALLOW_COPY_AND_ASSIGN(WordIterator);
+ DISALLOW_COPY_AND_ASSIGN(BreakIterator);
};
-#endif // BASE_I18N_WORD_ITERATOR_H__
+} // namespace base
+
+#endif // BASE_I18N_BREAK_ITERATOR_H__
diff --git a/base/i18n/word_iterator_unittest.cc b/base/i18n/break_iterator_unittest.cc
index 92aff76..8add918 100644
--- a/base/i18n/word_iterator_unittest.cc
+++ b/base/i18n/break_iterator_unittest.cc
@@ -2,116 +2,116 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/i18n/word_iterator.h"
+#include "base/i18n/break_iterator.h"
#include "base/string_piece.h"
#include "base/string_util.h"
#include "base/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
-TEST(WordIteratorTest, BreakWord) {
+TEST(BreakIteratorTest, BreakWord) {
string16 space(UTF8ToUTF16(" "));
string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
- WordIterator iter(&str, WordIterator::BREAK_WORD);
+ base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
ASSERT_TRUE(iter.Init());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(space, iter.GetWord());
+ EXPECT_EQ(space, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(space, iter.GetWord());
+ EXPECT_EQ(space, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("!"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(space, iter.GetWord());
+ EXPECT_EQ(space, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(space, iter.GetWord());
+ EXPECT_EQ(space, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
EXPECT_FALSE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
}
-TEST(WordIteratorTest, BreakLine) {
+TEST(BreakIteratorTest, BreakSpace) {
string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
- WordIterator iter(&str, WordIterator::BREAK_LINE);
+ base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
ASSERT_TRUE(iter.Init());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
EXPECT_TRUE(iter.Advance());
- EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetWord());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
EXPECT_FALSE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
}
-TEST(WordIteratorTest, BreakWide16) {
+TEST(BreakIteratorTest, BreakWide16) {
// "Παγκόσμιος Ιστός"
const string16 str(WideToUTF16(
L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
const string16 word1(str.substr(0, 10));
const string16 word2(str.substr(11, 5));
- WordIterator iter(&str, WordIterator::BREAK_WORD);
+ base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
ASSERT_TRUE(iter.Init());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(word1, iter.GetWord());
+ EXPECT_EQ(word1, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(word2, iter.GetWord());
+ EXPECT_EQ(word2, iter.GetString());
EXPECT_FALSE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
}
-TEST(WordIteratorTest, BreakWide32) {
+TEST(BreakIteratorTest, BreakWide32) {
// U+1D49C MATHEMATICAL SCRIPT CAPITAL A
const char* very_wide_char = "\xF0\x9D\x92\x9C";
const string16 str(
UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
const string16 very_wide_word(str.substr(0, 2));
- WordIterator iter(&str, WordIterator::BREAK_WORD);
+ base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
ASSERT_TRUE(iter.Init());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(very_wide_word, iter.GetWord());
+ EXPECT_EQ(very_wide_word, iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16(" "), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
EXPECT_TRUE(iter.Advance());
EXPECT_TRUE(iter.IsWord());
- EXPECT_EQ(UTF8ToUTF16("a"), iter.GetWord());
+ EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
EXPECT_FALSE(iter.Advance());
EXPECT_FALSE(iter.IsWord());
}