diff options
author | hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-08-31 06:58:13 +0000 |
---|---|---|
committer | hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-08-31 06:58:13 +0000 |
commit | 2f94293c2ee3c6c0c256a55697388b7276ad1144 (patch) | |
tree | f6a053f88a9784149f94e591cf5c1f890ae89910 /base/i18n | |
parent | f3835b4e4f91f6f924259da893860d8eea20ff11 (diff) | |
download | chromium_src-2f94293c2ee3c6c0c256a55697388b7276ad1144.zip chromium_src-2f94293c2ee3c6c0c256a55697388b7276ad1144.tar.gz chromium_src-2f94293c2ee3c6c0c256a55697388b7276ad1144.tar.bz2 |
Add BREAK_CHARACTER to base::i18n::BreakIterator.
This change adds a BREAK_CHARACTER type to the BreakIterator class so it can break text into grapheme clusters. This is the first step to fix a long-lasting bug (Bug 5371).
BUG=5371
TEST=BreakIteratorTest.BreakCharacter
Review URL: https://chromiumcodereview.appspot.com/10827346
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@154399 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/i18n')
-rw-r--r-- | base/i18n/break_iterator.cc | 4 | ||||
-rw-r--r-- | base/i18n/break_iterator.h | 1 | ||||
-rw-r--r-- | base/i18n/break_iterator_unittest.cc | 24 |
3 files changed, 29 insertions, 0 deletions
diff --git a/base/i18n/break_iterator.cc b/base/i18n/break_iterator.cc index eae531c..15451e2 100644 --- a/base/i18n/break_iterator.cc +++ b/base/i18n/break_iterator.cc @@ -31,6 +31,9 @@ bool BreakIterator::Init() { UErrorCode status = U_ZERO_ERROR; UBreakIteratorType break_type; switch (break_type_) { + case BREAK_CHARACTER: + break_type = UBRK_CHARACTER; + break; case BREAK_WORD: break_type = UBRK_WORD; break; @@ -59,6 +62,7 @@ bool BreakIterator::Advance() { int32_t status; prev_ = pos_; switch (break_type_) { + case BREAK_CHARACTER: case BREAK_WORD: case BREAK_LINE: pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); diff --git a/base/i18n/break_iterator.h b/base/i18n/break_iterator.h index ec6864e..d558e23 100644 --- a/base/i18n/break_iterator.h +++ b/base/i18n/break_iterator.h @@ -65,6 +65,7 @@ class BASE_I18N_EXPORT BreakIterator { // implement it separately. BREAK_SPACE = BREAK_LINE, BREAK_NEWLINE, + BREAK_CHARACTER, }; // Requires |str| to live as long as the BreakIterator does. diff --git a/base/i18n/break_iterator_unittest.cc b/base/i18n/break_iterator_unittest.cc index c1e521c..afb780c 100644 --- a/base/i18n/break_iterator_unittest.cc +++ b/base/i18n/break_iterator_unittest.cc @@ -310,5 +310,29 @@ TEST(BreakIteratorTest, BreakLineWide32) { EXPECT_FALSE(iter.IsWord()); } +TEST(BreakIteratorTest, BreakCharacter) { + static const wchar_t* kCharacters[] = { + // An English word consisting of four ASCII characters. + L"w", L"o", L"r", L"d", L" ", + // A Hindi word (which means "Hindi") consisting of three Devanagari + // characters. + L"\x0939\x093F", L"\x0928\x094D", L"\x0926\x0940", L" ", + // A Thai word (which means "feel") consisting of three Thai characters. + L"\x0E23\x0E39\x0E49", L"\x0E2A\x0E36", L"\x0E01", L" ", + }; + std::vector<string16> characters; + string16 text; + for (size_t i = 0; i < arraysize(kCharacters); ++i) { + characters.push_back(WideToUTF16(kCharacters[i])); + text.append(characters.back()); + } + BreakIterator iter(text, BreakIterator::BREAK_CHARACTER); + ASSERT_TRUE(iter.Init()); + for (size_t i = 0; i < arraysize(kCharacters); ++i) { + EXPECT_TRUE(iter.Advance()); + EXPECT_EQ(characters[i], iter.GetString()); + } +} + } // namespace i18n } // namespace base |