diff options
author | cdn@chromium.org <cdn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-12-10 18:12:02 +0000 |
---|---|---|
committer | cdn@chromium.org <cdn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-12-10 18:12:02 +0000 |
commit | 807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46 (patch) | |
tree | 3ef929145061f8e423b56dd8d9a8f11e943888c6 /base/i18n | |
parent | 7ada519aa6dd315ebb94d10357487f2daebd4654 (diff) | |
download | chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.zip chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.tar.gz chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.tar.bz2 |
Remove ICU header dependencies outside of /i18n directory.
Committing for tsepez. Original code review is here http://codereview.chromium.org/5682002/
BUG=49747
TEST=WordIteratorTest.*
Review URL: http://codereview.chromium.org/5718003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@68867 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/i18n')
-rw-r--r-- | base/i18n/word_iterator.cc | 1 | ||||
-rw-r--r-- | base/i18n/word_iterator.h | 33 |
2 files changed, 22 insertions, 12 deletions
diff --git a/base/i18n/word_iterator.cc b/base/i18n/word_iterator.cc index a9fa4af..7ad9c84 100644 --- a/base/i18n/word_iterator.cc +++ b/base/i18n/word_iterator.cc @@ -6,6 +6,7 @@ #include "base/logging.h" #include "unicode/ubrk.h" +#include "unicode/uchar.h" #include "unicode/ustring.h" const size_t npos = -1; diff --git a/base/i18n/word_iterator.h b/base/i18n/word_iterator.h index b097bc2..ada86b9 100644 --- a/base/i18n/word_iterator.h +++ b/base/i18n/word_iterator.h @@ -8,18 +8,27 @@ #include <vector> -#include "unicode/ubrk.h" -#include "unicode/uchar.h" - #include "base/basictypes.h" #include "base/string16.h" // The WordIterator class iterates through the words and word breaks -// in a string. (In the string " foo bar! ", the word breaks are at the -// periods in ". .foo. .bar.!. .".) +// in a UTF-16 string. +// +// It provides two modes, BREAK_WORD and BREAK_LINE, which modify how +// trailing non-word characters are aggregated into the returned word. +// +// Under BREAK_WORD mode (more common), the non-word characters are +// not included with a returned word (e.g. in the UTF-16 equivalent of +// the string " foo bar! ", the word breaks are at the periods in +// ". .foo. .bar.!. ."). +// +// Under BREAK_LINE mode (less common), the non-word characters are +// included in the word, breaking only when a space-equivalent character +// is encountered (e.g. in the UTF16-equivalent of the string " foo bar! ", +// the word breaks are at the periods in ". .foo .bar! ."). // -// To extract the words from a string, move a WordIterator through the -// string and test whether IsWord() is true. E.g., +// To extract the words from a string, move a BREAK_WORD WordIterator +// through the string and test whether IsWord() is true. E.g., // WordIterator iter(&str, WordIterator::BREAK_WORD); // if (!iter.Init()) return false; // while (iter.Advance()) { @@ -68,11 +77,11 @@ class WordIterator { string16 GetWord() const; private: - // ICU iterator. - UBreakIterator* iter_; -#if !defined(WCHAR_T_IS_UTF16) - std::vector<UChar> chars_; -#endif + // ICU iterator, avoiding ICU ubrk.h dependence. + // This is actually an ICU UBreakiterator* type, which turns out to be + // a typedef for a void* in the ICU headers. Using void* directly prevents + // callers from needing access to the ICU public headers directory. + void* iter_; // The string we're iterating over. const string16* string_; |