diff options
author | andrewhayden@chromium.org <andrewhayden@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-06-13 00:42:55 +0000 |
---|---|---|
committer | andrewhayden@chromium.org <andrewhayden@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-06-13 00:42:55 +0000 |
commit | 20430bb356063683af266f0d4c37d921242ee3b9 (patch) | |
tree | ba065e9d3a40a9d4ac42821402985ca20e9c0aac /chrome/renderer/spellchecker | |
parent | 8094dc81060e8d642d2d1e130671026abb25ff86 (diff) | |
download | chromium_src-20430bb356063683af266f0d4c37d921242ee3b9.zip chromium_src-20430bb356063683af266f0d4c37d921242ee3b9.tar.gz chromium_src-20430bb356063683af266f0d4c37d921242ee3b9.tar.bz2 |
Refactor code to avoid direct dependency upon ICU: spellcheck_worditerator
BUG=367677
Review URL: https://codereview.chromium.org/270203003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@276869 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer/spellchecker')
-rw-r--r-- | chrome/renderer/spellchecker/spellcheck_worditerator.cc | 68 | ||||
-rw-r--r-- | chrome/renderer/spellchecker/spellcheck_worditerator.h | 18 |
2 files changed, 37 insertions, 49 deletions
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator.cc b/chrome/renderer/spellchecker/spellcheck_worditerator.cc index 1297c5a..a849042 100644 --- a/chrome/renderer/spellchecker/spellcheck_worditerator.cc +++ b/chrome/renderer/spellchecker/spellcheck_worditerator.cc @@ -10,6 +10,7 @@ #include <string> #include "base/basictypes.h" +#include "base/i18n/break_iterator.h" #include "base/logging.h" #include "base/strings/stringprintf.h" #include "base/strings/utf_string_conversions.h" @@ -299,10 +300,8 @@ bool SpellcheckCharAttribute::OutputDefault(UChar c, SpellcheckWordIterator::SpellcheckWordIterator() : text_(NULL), - length_(0), - position_(UBRK_DONE), attribute_(NULL), - iterator_(NULL) { + iterator_() { } SpellcheckWordIterator::~SpellcheckWordIterator() { @@ -315,18 +314,22 @@ bool SpellcheckWordIterator::Initialize( // Create a custom ICU break iterator with empty text used in this object. (We // allow setting text later so we can re-use this iterator.) DCHECK(attribute); - UErrorCode open_status = U_ZERO_ERROR; - UParseError parse_status; - base::string16 rule(attribute->GetRuleSet(allow_contraction)); + const base::string16 rule(attribute->GetRuleSet(allow_contraction)); // If there is no rule set, the attributes were invalid. if (rule.empty()) return false; - iterator_ = ubrk_openRules(rule.c_str(), rule.length(), NULL, 0, - &parse_status, &open_status); - if (U_FAILURE(open_status)) + scoped_ptr<base::i18n::BreakIterator> iterator( + new base::i18n::BreakIterator(base::string16(), rule)); + if (!iterator->Init()) { + // Since we're not passing in any text, the only reason this could fail + // is if we fail to parse the rules. Since the rules are hardcoded, + // that would be a bug in this class. + NOTREACHED() << "failed to open iterator (broken rules)"; return false; + } + iterator_ = iterator.Pass(); // Set the character attributes so we can normalize the words extracted by // this iterator. @@ -335,7 +338,7 @@ bool SpellcheckWordIterator::Initialize( } bool SpellcheckWordIterator::IsInitialized() const { - // Return true if we have an ICU custom iterator. + // Return true iff we have an iterator. return !!iterator_; } @@ -343,66 +346,51 @@ bool SpellcheckWordIterator::SetText(const base::char16* text, size_t length) { DCHECK(!!iterator_); // Set the text to be split by this iterator. - UErrorCode status = U_ZERO_ERROR; - ubrk_setText(iterator_, text, length, &status); - if (U_FAILURE(status)) - return false; - - // Retrieve the position to the first word in this text. We return false if - // this text does not have any words. (For example, The input text consists - // only of Chinese characters while the spellchecker language is English.) - position_ = ubrk_first(iterator_); - if (position_ == UBRK_DONE) + if (!iterator_->SetText(text, length)) { + LOG(ERROR) << "failed to set text"; return false; + } text_ = text; - length_ = static_cast<int>(length); return true; } bool SpellcheckWordIterator::GetNextWord(base::string16* word_string, int* word_start, int* word_length) { - DCHECK(!!text_ && length_ > 0); + DCHECK(!!text_); word_string->clear(); *word_start = 0; *word_length = 0; - if (!text_ || position_ == UBRK_DONE) + if (!text_) { return false; + } // Find a word that can be checked for spelling. Our rule sets filter out // invalid words (e.g. numbers and characters not supported by the // spellchecker language) so this ubrk_getRuleStatus() call returns // UBRK_WORD_NONE when this iterator finds an invalid word. So, we skip such // words until we can find a valid word or reach the end of the input string. - int next = ubrk_next(iterator_); - while (next != UBRK_DONE) { - if (ubrk_getRuleStatus(iterator_) != UBRK_WORD_NONE) { - if (Normalize(position_, next - position_, word_string)) { - *word_start = position_; - *word_length = next - position_; - position_ = next; + while (iterator_->Advance()) { + const size_t start = iterator_->prev(); + const size_t length = iterator_->pos() - start; + if (iterator_->IsWord()) { + if (Normalize(start, length, word_string)) { + *word_start = start; + *word_length = length; return true; } } - position_ = next; - next = ubrk_next(iterator_); } - // There aren't any more words in the given text. Set the position to - // UBRK_DONE to prevent from calling ubrk_next() next time when this function - // is called. - position_ = UBRK_DONE; + // There aren't any more words in the given text. return false; } void SpellcheckWordIterator::Reset() { - if (iterator_) { - ubrk_close(iterator_); - iterator_ = NULL; - } + iterator_.reset(); } bool SpellcheckWordIterator::Normalize(int input_start, diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator.h b/chrome/renderer/spellchecker/spellcheck_worditerator.h index 09d54a6..6b6a2f4 100644 --- a/chrome/renderer/spellchecker/spellcheck_worditerator.h +++ b/chrome/renderer/spellchecker/spellcheck_worditerator.h @@ -12,10 +12,16 @@ #include <string> #include "base/basictypes.h" +#include "base/memory/scoped_ptr.h" #include "base/strings/string16.h" -#include "third_party/icu/source/common/unicode/ubrk.h" #include "third_party/icu/source/common/unicode/uscript.h" +namespace base { +namespace i18n { +class BreakIterator; +} // namespace i18n +} // namespace base + // A class which encapsulates language-specific operations used by // SpellcheckWordIterator. When we set the spellchecker language, this class // creates rule sets that filter out the characters not supported by the @@ -156,18 +162,12 @@ class SpellcheckWordIterator { // The pointer to the input string from which we are extracting words. const base::char16* text_; - // The length of the original string. - int length_; - - // The current position in the original string. - int position_; - // The language-specific attributes used for filtering out non-word // characters. const SpellcheckCharAttribute* attribute_; - // The ICU break iterator. - UBreakIterator* iterator_; + // The break iterator. + scoped_ptr<base::i18n::BreakIterator> iterator_; DISALLOW_COPY_AND_ASSIGN(SpellcheckWordIterator); }; |