summaryrefslogtreecommitdiffstats
path: root/base/i18n
diff options
context:
space:
mode:
authorhbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-08-31 06:58:13 +0000
committerhbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-08-31 06:58:13 +0000
commit2f94293c2ee3c6c0c256a55697388b7276ad1144 (patch)
treef6a053f88a9784149f94e591cf5c1f890ae89910 /base/i18n
parentf3835b4e4f91f6f924259da893860d8eea20ff11 (diff)
downloadchromium_src-2f94293c2ee3c6c0c256a55697388b7276ad1144.zip
chromium_src-2f94293c2ee3c6c0c256a55697388b7276ad1144.tar.gz
chromium_src-2f94293c2ee3c6c0c256a55697388b7276ad1144.tar.bz2
Add BREAK_CHARACTER to base::i18n::BreakIterator.
This change adds a BREAK_CHARACTER type to the BreakIterator class so it can break text into grapheme clusters. This is the first step to fix a long-lasting bug (Bug 5371). BUG=5371 TEST=BreakIteratorTest.BreakCharacter Review URL: https://chromiumcodereview.appspot.com/10827346 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@154399 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/i18n')
-rw-r--r--base/i18n/break_iterator.cc4
-rw-r--r--base/i18n/break_iterator.h1
-rw-r--r--base/i18n/break_iterator_unittest.cc24
3 files changed, 29 insertions, 0 deletions
diff --git a/base/i18n/break_iterator.cc b/base/i18n/break_iterator.cc
index eae531c..15451e2 100644
--- a/base/i18n/break_iterator.cc
+++ b/base/i18n/break_iterator.cc
@@ -31,6 +31,9 @@ bool BreakIterator::Init() {
UErrorCode status = U_ZERO_ERROR;
UBreakIteratorType break_type;
switch (break_type_) {
+ case BREAK_CHARACTER:
+ break_type = UBRK_CHARACTER;
+ break;
case BREAK_WORD:
break_type = UBRK_WORD;
break;
@@ -59,6 +62,7 @@ bool BreakIterator::Advance() {
int32_t status;
prev_ = pos_;
switch (break_type_) {
+ case BREAK_CHARACTER:
case BREAK_WORD:
case BREAK_LINE:
pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
diff --git a/base/i18n/break_iterator.h b/base/i18n/break_iterator.h
index ec6864e..d558e23 100644
--- a/base/i18n/break_iterator.h
+++ b/base/i18n/break_iterator.h
@@ -65,6 +65,7 @@ class BASE_I18N_EXPORT BreakIterator {
// implement it separately.
BREAK_SPACE = BREAK_LINE,
BREAK_NEWLINE,
+ BREAK_CHARACTER,
};
// Requires |str| to live as long as the BreakIterator does.
diff --git a/base/i18n/break_iterator_unittest.cc b/base/i18n/break_iterator_unittest.cc
index c1e521c..afb780c 100644
--- a/base/i18n/break_iterator_unittest.cc
+++ b/base/i18n/break_iterator_unittest.cc
@@ -310,5 +310,29 @@ TEST(BreakIteratorTest, BreakLineWide32) {
EXPECT_FALSE(iter.IsWord());
}
+TEST(BreakIteratorTest, BreakCharacter) {
+ static const wchar_t* kCharacters[] = {
+ // An English word consisting of four ASCII characters.
+ L"w", L"o", L"r", L"d", L" ",
+ // A Hindi word (which means "Hindi") consisting of three Devanagari
+ // characters.
+ L"\x0939\x093F", L"\x0928\x094D", L"\x0926\x0940", L" ",
+ // A Thai word (which means "feel") consisting of three Thai characters.
+ L"\x0E23\x0E39\x0E49", L"\x0E2A\x0E36", L"\x0E01", L" ",
+ };
+ std::vector<string16> characters;
+ string16 text;
+ for (size_t i = 0; i < arraysize(kCharacters); ++i) {
+ characters.push_back(WideToUTF16(kCharacters[i]));
+ text.append(characters.back());
+ }
+ BreakIterator iter(text, BreakIterator::BREAK_CHARACTER);
+ ASSERT_TRUE(iter.Init());
+ for (size_t i = 0; i < arraysize(kCharacters); ++i) {
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_EQ(characters[i], iter.GetString());
+ }
+}
+
} // namespace i18n
} // namespace base