summaryrefslogtreecommitdiffstats
path: root/chrome/renderer/spellchecker
diff options
context:
space:
mode:
Diffstat (limited to 'chrome/renderer/spellchecker')
-rw-r--r--chrome/renderer/spellchecker/spellcheck_worditerator.cc9
-rw-r--r--chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc64
2 files changed, 72 insertions, 1 deletions
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator.cc b/chrome/renderer/spellchecker/spellcheck_worditerator.cc
index 96f1cf8..d6e6abe 100644
--- a/chrome/renderer/spellchecker/spellcheck_worditerator.cc
+++ b/chrome/renderer/spellchecker/spellcheck_worditerator.cc
@@ -64,7 +64,7 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
// For instance, U+05F4 is MidLetter. So, this may be
// better, but it leads to an empty set error in Thai.
// "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"
- "$ALetter = [\\p{script=%s} [0123456789]];"
+ "$ALetter = [\\p{script=%s}%s];"
"$MidNumLet = [\\p{Word_Break = MidNumLet}];"
"$MidLetter = [\\p{Word_Break = MidLetter}%s];"
"$MidNum = [\\p{Word_Break = MidNum}];"
@@ -154,6 +154,11 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI)
aletter_plus = kWithDictionary;
+ // Treat numbers as word characters except for Arabic and Hebrew.
+ const char* aletter_extra = " [0123456789]";
+ if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC)
+ aletter_extra = "";
+
const char kMidLetterExtra[] = "";
// For Hebrew, treat single/double quoation marks as MidLetter.
const char kMidLetterExtraHebrew[] = "\"'";
@@ -171,12 +176,14 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
ruleset_allow_contraction_ = ASCIIToUTF16(
base::StringPrintf(kRuleTemplate,
aletter,
+ aletter_extra,
midletter_extra,
aletter_plus,
kAllowContraction));
ruleset_disallow_contraction_ = ASCIIToUTF16(
base::StringPrintf(kRuleTemplate,
aletter,
+ aletter_extra,
midletter_extra,
aletter_plus,
kDisallowContraction));
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
index 37fbc71..1dc8614 100644
--- a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
+++ b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc
@@ -164,3 +164,67 @@ TEST(SpellcheckWordIteratorTest, RuleSetConsistency) {
EXPECT_EQ(0, actual_start);
EXPECT_EQ(0, actual_end);
}
+
+// Vertify our SpellcheckWordIterator can treat ASCII numbers as word characters
+// on LTR languages. On the other hand, it should not treat ASCII numbers as
+// word characters on RTL languages because they change the text direction from
+// RTL to LTR.
+TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) {
+ // A set of a language, a dummy word, and a text direction used in this test.
+ // For each language, this test splits a dummy word, which consists of ASCII
+ // numbers and an alphabet of the language, into words. When ASCII numbers are
+ // treated as word characters, the split word becomes equal to the dummy word.
+ // Otherwise, the split word does not include ASCII numbers.
+ static const struct {
+ const char* language;
+ const wchar_t* text;
+ bool left_to_right;
+ } kTestCases[] = {
+ {
+ // English
+ "en-US", L"0123456789" L"a", true,
+ }, {
+ // Greek
+ "el-GR", L"0123456789" L"\x03B1", true,
+ }, {
+ // Russian
+ "ru-RU", L"0123456789" L"\x0430", true,
+ }, {
+ // Hebrew
+ "he-IL", L"0123456789" L"\x05D0", false,
+ }, {
+ // Arabic
+ "ar", L"0123456789" L"\x0627", false,
+ }, {
+ // Hindi
+ "hi-IN", L"0123456789" L"\x0905", true,
+ }, {
+ // Thai
+ "th-TH", L"0123456789" L"\x0e01", true,
+ }, {
+ // Korean
+ "ko-KR", L"0123456789" L"\x1100\x1161", true,
+ },
+ };
+
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
+ SCOPED_TRACE(base::StringPrintf("kTestCases[%" PRIuS "]: language=%s", i,
+ kTestCases[i].language));
+
+ SpellcheckCharAttribute attributes;
+ attributes.SetDefaultLanguage(kTestCases[i].language);
+
+ string16 input_word(WideToUTF16(kTestCases[i].text));
+ SpellcheckWordIterator iterator;
+ EXPECT_TRUE(iterator.Initialize(&attributes, true));
+ EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length()));
+
+ string16 actual_word;
+ int actual_start, actual_end;
+ EXPECT_TRUE(iterator.GetNextWord(&actual_word, &actual_start, &actual_end));
+ if (kTestCases[i].left_to_right)
+ EXPECT_EQ(input_word, actual_word);
+ else
+ EXPECT_NE(input_word, actual_word);
+ }
+}