summaryrefslogtreecommitdiffstats
path: root/base/word_iterator.h
diff options
context:
space:
mode:
authorinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-26 21:49:38 +0000
committerinitial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>2008-07-26 21:49:38 +0000
commitd7cae12696b96500c05dd2d430f6238922c20c96 (patch)
treeecff27b367735535b2a66477f8cd89d3c462a6c0 /base/word_iterator.h
parentee2815e28d408216cf94e874825b6bcf76c69083 (diff)
downloadchromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.zip
chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.tar.gz
chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.tar.bz2
Add base to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@8 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/word_iterator.h')
-rw-r--r--base/word_iterator.h110
1 files changed, 110 insertions, 0 deletions
diff --git a/base/word_iterator.h b/base/word_iterator.h
new file mode 100644
index 0000000..fe86411
--- /dev/null
+++ b/base/word_iterator.h
@@ -0,0 +1,110 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef BASE_WORD_ITERATOR_H__
+#define BASE_WORD_ITERATOR_H__
+
+#include "base/basictypes.h"
+
+// The WordIterator class iterates through the words and word breaks
+// in a string. (In the string " foo bar! ", the word breaks are at the
+// periods in ". .foo. .bar.!. .".)
+//
+// To extract the words from a string, move a WordIterator through the
+// string and test whether IsWord() is true. E.g.,
+// WordIterator iter(str, WordIterator::BREAK_WORD);
+// if (!iter.Init()) return false;
+// while (iter.Advance()) {
+// if (iter.IsWord()) {
+// // region [iter.prev(),iter.pos()) contains a word.
+// LOG(INFO) << "word: " << iter.GetWord();
+// }
+// }
+
+
+class WordIterator {
+ public:
+ enum BreakType {
+ BREAK_WORD,
+ BREAK_LINE
+ };
+
+ // Requires |str| to live as long as the WordIterator does.
+ WordIterator(const std::wstring& str, BreakType break_type);
+ ~WordIterator();
+
+ // Init() must be called before any of the iterators are valid.
+ // Returns false if ICU failed to initialize.
+ bool Init();
+
+ // Return the current break position within the string,
+ // or WordIterator::npos when done.
+ int pos() const { return pos_; }
+ // Return the value of pos() returned before Advance() was last called.
+ int prev() const { return prev_; }
+
+ // A special position value indicating "end of string".
+ static const int npos;
+
+ // Advance to the next break. Returns false if we've run past the end of
+ // the string. (Note that the very last "word break" is after the final
+ // character in the string, and when we advance to that position it's the
+ // last time Advance() returns true.)
+ bool Advance();
+
+ // Returns true if the break we just hit is the end of a word.
+ // (Otherwise, the break iterator just skipped over e.g. whitespace
+ // or punctuation.)
+ bool IsWord() const;
+
+ // Return the word between prev() and pos().
+ // Advance() must have been called successfully at least once
+ // for pos() to have advanced to somewhere useful.
+ std::wstring GetWord() const {
+ DCHECK(prev_ >= 0 && pos_ >= 0);
+ return string_.substr(prev_, pos_ - prev_);
+ }
+
+ private:
+ // ICU iterator.
+ void* iter_;
+
+ // The string we're iterating over.
+ const std::wstring& string_;
+
+ // The breaking style (word/line).
+ BreakType break_type_;
+
+ // Previous and current iterator positions.
+ int prev_, pos_;
+
+ DISALLOW_EVIL_CONSTRUCTORS(WordIterator);
+};
+
+#endif // BASE_WORD_ITERATOR_H__ \ No newline at end of file