diff options
Diffstat (limited to 'chrome/renderer/spellchecker')
-rw-r--r-- | chrome/renderer/spellchecker/spellcheck_worditerator.cc | 9 | ||||
-rw-r--r-- | chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc | 64 |
2 files changed, 72 insertions, 1 deletions
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator.cc b/chrome/renderer/spellchecker/spellcheck_worditerator.cc index 96f1cf8..d6e6abe 100644 --- a/chrome/renderer/spellchecker/spellcheck_worditerator.cc +++ b/chrome/renderer/spellchecker/spellcheck_worditerator.cc @@ -64,7 +64,7 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) { // For instance, U+05F4 is MidLetter. So, this may be // better, but it leads to an empty set error in Thai. // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];" - "$ALetter = [\\p{script=%s} [0123456789]];" + "$ALetter = [\\p{script=%s}%s];" "$MidNumLet = [\\p{Word_Break = MidNumLet}];" "$MidLetter = [\\p{Word_Break = MidLetter}%s];" "$MidNum = [\\p{Word_Break = MidNum}];" @@ -154,6 +154,11 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) { if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI) aletter_plus = kWithDictionary; + // Treat numbers as word characters except for Arabic and Hebrew. + const char* aletter_extra = " [0123456789]"; + if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC) + aletter_extra = ""; + const char kMidLetterExtra[] = ""; // For Hebrew, treat single/double quoation marks as MidLetter. const char kMidLetterExtraHebrew[] = "\"'"; @@ -171,12 +176,14 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) { ruleset_allow_contraction_ = ASCIIToUTF16( base::StringPrintf(kRuleTemplate, aletter, + aletter_extra, midletter_extra, aletter_plus, kAllowContraction)); ruleset_disallow_contraction_ = ASCIIToUTF16( base::StringPrintf(kRuleTemplate, aletter, + aletter_extra, midletter_extra, aletter_plus, kDisallowContraction)); diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc index 37fbc71..1dc8614 100644 --- a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc +++ b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc @@ -164,3 +164,67 @@ TEST(SpellcheckWordIteratorTest, RuleSetConsistency) { EXPECT_EQ(0, actual_start); EXPECT_EQ(0, actual_end); } + +// Vertify our SpellcheckWordIterator can treat ASCII numbers as word characters +// on LTR languages. On the other hand, it should not treat ASCII numbers as +// word characters on RTL languages because they change the text direction from +// RTL to LTR. +TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) { + // A set of a language, a dummy word, and a text direction used in this test. + // For each language, this test splits a dummy word, which consists of ASCII + // numbers and an alphabet of the language, into words. When ASCII numbers are + // treated as word characters, the split word becomes equal to the dummy word. + // Otherwise, the split word does not include ASCII numbers. + static const struct { + const char* language; + const wchar_t* text; + bool left_to_right; + } kTestCases[] = { + { + // English + "en-US", L"0123456789" L"a", true, + }, { + // Greek + "el-GR", L"0123456789" L"\x03B1", true, + }, { + // Russian + "ru-RU", L"0123456789" L"\x0430", true, + }, { + // Hebrew + "he-IL", L"0123456789" L"\x05D0", false, + }, { + // Arabic + "ar", L"0123456789" L"\x0627", false, + }, { + // Hindi + "hi-IN", L"0123456789" L"\x0905", true, + }, { + // Thai + "th-TH", L"0123456789" L"\x0e01", true, + }, { + // Korean + "ko-KR", L"0123456789" L"\x1100\x1161", true, + }, + }; + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { + SCOPED_TRACE(base::StringPrintf("kTestCases[%" PRIuS "]: language=%s", i, + kTestCases[i].language)); + + SpellcheckCharAttribute attributes; + attributes.SetDefaultLanguage(kTestCases[i].language); + + string16 input_word(WideToUTF16(kTestCases[i].text)); + SpellcheckWordIterator iterator; + EXPECT_TRUE(iterator.Initialize(&attributes, true)); + EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); + + string16 actual_word; + int actual_start, actual_end; + EXPECT_TRUE(iterator.GetNextWord(&actual_word, &actual_start, &actual_end)); + if (kTestCases[i].left_to_right) + EXPECT_EQ(input_word, actual_word); + else + EXPECT_NE(input_word, actual_word); + } +} |