Remove ICU header dependencies outside of /i18n directory.

Committing for tsepez. Original code review is here http://codereview.chromium.org/5682002/ BUG=49747 TEST=WordIteratorTest.* Review URL: http://codereview.chromium.org/5718003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@68867 0039d316-1c4b-4281-b951-d872f2087c98
author: cdn@chromium.org <cdn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-12-10 18:12:02 +0000
committer: cdn@chromium.org <cdn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-12-10 18:12:02 +0000
commit: 807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46 (patch)
tree: 3ef929145061f8e423b56dd8d9a8f11e943888c6 /base/i18n
parent: 7ada519aa6dd315ebb94d10357487f2daebd4654 (diff)
download: chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.zip
chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.tar.gz
chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.tar.bz2
2 files changed, 22 insertions, 12 deletions
diff --git a/base/i18n/word_iterator.cc b/base/i18n/word_iterator.cc
index a9fa4af..7ad9c84 100644
--- a/base/i18n/word_iterator.cc
+++ b/base/i18n/word_iterator.cc
@@ -6,6 +6,7 @@
 
 #include "base/logging.h"
 #include "unicode/ubrk.h"
+#include "unicode/uchar.h"
 #include "unicode/ustring.h"
 
 const size_t npos = -1;
diff --git a/base/i18n/word_iterator.h b/base/i18n/word_iterator.h
index b097bc2..ada86b9 100644
--- a/base/i18n/word_iterator.h
+++ b/base/i18n/word_iterator.h
@@ -8,18 +8,27 @@
 
 #include <vector>
 
-#include "unicode/ubrk.h"
-#include "unicode/uchar.h"
-
 #include "base/basictypes.h"
 #include "base/string16.h"
 
 // The WordIterator class iterates through the words and word breaks
-// in a string.  (In the string " foo bar! ", the word breaks are at the
-// periods in ". .foo. .bar.!. .".)
+// in a UTF-16 string.
+//
+// It provides two modes, BREAK_WORD and BREAK_LINE, which modify how
+// trailing non-word characters are aggregated into the returned word.
+//
+// Under BREAK_WORD mode (more common), the non-word characters are
+// not included with a returned word (e.g. in the UTF-16 equivalent of
+// the string " foo bar! ", the word breaks are at the periods in
+// ". .foo. .bar.!. .").
+//
+// Under BREAK_LINE mode (less common), the non-word characters are
+// included in the word, breaking only when a space-equivalent character
+// is encountered (e.g. in the UTF16-equivalent of the string " foo bar! ",
+// the word breaks are at the periods in ". .foo .bar! .").
 //
-// To extract the words from a string, move a WordIterator through the
-// string and test whether IsWord() is true.  E.g.,
+// To extract the words from a string, move a BREAK_WORD WordIterator
+// through the string and test whether IsWord() is true.  E.g.,
 //   WordIterator iter(&str, WordIterator::BREAK_WORD);
 //   if (!iter.Init()) return false;
 //   while (iter.Advance()) {
@@ -68,11 +77,11 @@ class WordIterator {
   string16 GetWord() const;
 
  private:
-  // ICU iterator.
-  UBreakIterator* iter_;
-#if !defined(WCHAR_T_IS_UTF16)
-  std::vector<UChar> chars_;
-#endif
+  // ICU iterator, avoiding ICU ubrk.h dependence.
+  // This is actually an ICU UBreakiterator* type, which turns out to be
+  // a typedef for a void* in the ICU headers. Using void* directly prevents
+  // callers from needing access to the ICU public headers directory.
+  void* iter_;
 
   // The string we're iterating over.
   const string16* string_;
author	cdn@chromium.org <cdn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-12-10 18:12:02 +0000
committer	cdn@chromium.org <cdn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-12-10 18:12:02 +0000
commit	807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46 (patch)
tree	3ef929145061f8e423b56dd8d9a8f11e943888c6 /base/i18n
parent	7ada519aa6dd315ebb94d10357487f2daebd4654 (diff)
download	chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.zip chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.tar.gz chromium_src-807aa93835fa0ada2ccbe849fbc1bb4b5cf7fb46.tar.bz2