summaryrefslogtreecommitdiffstats
path: root/base/i18n
diff options
context:
space:
mode:
authorcdn@chromium.org <cdn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-12-10 18:12:02 +0000
committercdn@chromium.org <cdn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-12-10 18:12:02 +0000
commit807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46 (patch)
tree3ef929145061f8e423b56dd8d9a8f11e943888c6 /base/i18n
parent7ada519aa6dd315ebb94d10357487f2daebd4654 (diff)
downloadchromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.zip
chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.tar.gz
chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.tar.bz2
Remove ICU header dependencies outside of /i18n directory.
Committing for tsepez. Original code review is here http://codereview.chromium.org/5682002/ BUG=49747 TEST=WordIteratorTest.* Review URL: http://codereview.chromium.org/5718003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@68867 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/i18n')
-rw-r--r--base/i18n/word_iterator.cc1
-rw-r--r--base/i18n/word_iterator.h33
2 files changed, 22 insertions, 12 deletions
diff --git a/base/i18n/word_iterator.cc b/base/i18n/word_iterator.cc
index a9fa4af..7ad9c84 100644
--- a/base/i18n/word_iterator.cc
+++ b/base/i18n/word_iterator.cc
@@ -6,6 +6,7 @@
#include "base/logging.h"
#include "unicode/ubrk.h"
+#include "unicode/uchar.h"
#include "unicode/ustring.h"
const size_t npos = -1;
diff --git a/base/i18n/word_iterator.h b/base/i18n/word_iterator.h
index b097bc2..ada86b9 100644
--- a/base/i18n/word_iterator.h
+++ b/base/i18n/word_iterator.h
@@ -8,18 +8,27 @@
#include <vector>
-#include "unicode/ubrk.h"
-#include "unicode/uchar.h"
-
#include "base/basictypes.h"
#include "base/string16.h"
// The WordIterator class iterates through the words and word breaks
-// in a string. (In the string " foo bar! ", the word breaks are at the
-// periods in ". .foo. .bar.!. .".)
+// in a UTF-16 string.
+//
+// It provides two modes, BREAK_WORD and BREAK_LINE, which modify how
+// trailing non-word characters are aggregated into the returned word.
+//
+// Under BREAK_WORD mode (more common), the non-word characters are
+// not included with a returned word (e.g. in the UTF-16 equivalent of
+// the string " foo bar! ", the word breaks are at the periods in
+// ". .foo. .bar.!. .").
+//
+// Under BREAK_LINE mode (less common), the non-word characters are
+// included in the word, breaking only when a space-equivalent character
+// is encountered (e.g. in the UTF16-equivalent of the string " foo bar! ",
+// the word breaks are at the periods in ". .foo .bar! .").
//
-// To extract the words from a string, move a WordIterator through the
-// string and test whether IsWord() is true. E.g.,
+// To extract the words from a string, move a BREAK_WORD WordIterator
+// through the string and test whether IsWord() is true. E.g.,
// WordIterator iter(&str, WordIterator::BREAK_WORD);
// if (!iter.Init()) return false;
// while (iter.Advance()) {
@@ -68,11 +77,11 @@ class WordIterator {
string16 GetWord() const;
private:
- // ICU iterator.
- UBreakIterator* iter_;
-#if !defined(WCHAR_T_IS_UTF16)
- std::vector<UChar> chars_;
-#endif
+ // ICU iterator, avoiding ICU ubrk.h dependence.
+ // This is actually an ICU UBreakiterator* type, which turns out to be
+ // a typedef for a void* in the ICU headers. Using void* directly prevents
+ // callers from needing access to the ICU public headers directory.
+ void* iter_;
// The string we're iterating over.
const string16* string_;