diff options
author | hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-08-12 08:31:36 +0000 |
---|---|---|
committer | hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-08-12 08:31:36 +0000 |
commit | 3849848ac63a0bfcc9f45acbe1738a099ba995ee (patch) | |
tree | 11d8b1d94c69acd8abb9b84e173fd0976e562e2a | |
parent | 33d761c17a24919bab654dd076ba7d5bad3c7f08 (diff) | |
download | chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.zip chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.tar.gz chromium_src-3849848ac63a0bfcc9f45acbe1738a099ba995ee.tar.bz2 |
A fix for Issue 46278.
When an ICU ruleset defines a dictionary value "$dictionary = [:LineBreak = Complex_Context:]", ICU expects we use this value in breaking text. On the other hand, when we do not use this value, ubrk_next() gets stuck in an infinite loop and it causes Issue 46278. To fix this issue, we define |$dictionary| only for the languages we need the value, e.g. Korean and Thai.
BUG=46278
TEST=unit_test.exe --gtest_filter=SpellcheckWordIteratorTest.RuleConsistency
Review URL: http://codereview.chromium.org/3108003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@55856 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/renderer/spellchecker/spellcheck_worditerator.cc | 17 | ||||
-rw-r--r-- | chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc | 23 |
2 files changed, 31 insertions, 9 deletions
diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator.cc b/chrome/renderer/spellchecker/spellcheck_worditerator.cc index 8ed6ef7..1c6b056 100644 --- a/chrome/renderer/spellchecker/spellcheck_worditerator.cc +++ b/chrome/renderer/spellchecker/spellcheck_worditerator.cc @@ -67,9 +67,8 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) { "$Numeric = [\\p{Word_Break = Numeric}];" "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];" - "$dictionary = [:LineBreak = Complex_Context:];" "$Control = [\\p{Grapheme_Cluster_Break = Control}]; " - "$ALetterPlus = %s;" + "%s" "$KatakanaEx = $Katakana ($Extend | $Format)*;" "$ALetterEx = $ALetterPlus ($Extend | $Format)*;" @@ -117,13 +116,11 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) { "($Extend | $Format)+ .?;" "($MidLetter | $MidNumLet) $BackALetterEx;" "($MidNum | $MidNumLet) $BackNumericEx;" - "$dictionary $dictionary;" "!!safe_forward;" "($Extend | $Format)+ .?;" "($MidLetterEx | $MidNumLetEx) $ALetterEx;" - "($MidNumEx | $MidNumLetEx) $NumericEx;" - "$dictionary $dictionary;"; + "($MidNumEx | $MidNumLetEx) $NumericEx;"; // Retrieve the script codes used by the given language from ICU. When the // given language consists of two or more scripts, we just use the first @@ -145,8 +142,10 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) { if (!aletter) aletter = "Latin"; - const char kWithDictionary[] = "[$ALetter [$dictionary-$Extend-$Control]]"; - const char kWithoutDictionary[] = "$ALetter"; + const char kWithDictionary[] = + "$dictionary = [:LineBreak = Complex_Context:];" + "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];"; + const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;"; const char* aletter_plus = kWithoutDictionary; if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI) aletter_plus = kWithDictionary; @@ -158,9 +157,9 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) { "$ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200};"; const char kDisallowContraction[] = ""; - ruleset_allow_contraction_ = UTF8ToUTF16(StringPrintf(kRuleTemplate, + ruleset_allow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate, aletter, aletter_plus, kAllowContraction)); - ruleset_disallow_contraction_ = UTF8ToUTF16(StringPrintf(kRuleTemplate, + ruleset_disallow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate, aletter, aletter_plus, kDisallowContraction)); } diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc index 43af29f..da279fa 100644 --- a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc +++ b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc @@ -129,3 +129,26 @@ TEST(SpellcheckWordIteratorTest, SplitWord) { } } +// Tests whether our SpellcheckWordIterator extracts an empty word without +// getting stuck in an infinite loop when inputting a Khmer text. (This is a +// regression test for Issue 46278.) +TEST(SpellcheckWordIteratorTest, RuleSetConsistency) { + SpellcheckCharAttribute attributes; + attributes.SetDefaultLanguage("en-US"); + + const wchar_t kTestText[] = L"\x1791\x17c1\x002e"; + string16 input(WideToUTF16(kTestText)); + + SpellcheckWordIterator iterator; + EXPECT_TRUE(iterator.Initialize(&attributes, input.c_str(), input.length(), + true)); + + // When SpellcheckWordIterator uses an inconsistent ICU ruleset, the following + // iterator.GetNextWord() call gets stuck in an infinite loop. Therefore, this + // test succeeds if this call returns without timeouts. + string16 actual_word; + int actual_start, actual_end; + EXPECT_FALSE(iterator.GetNextWord(&actual_word, &actual_start, &actual_end)); + EXPECT_EQ(0, actual_start); + EXPECT_EQ(0, actual_end); +} |