diff options
author | rouslan <rouslan@chromium.org> | 2015-06-18 19:30:14 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-06-19 02:30:36 +0000 |
commit | e12b92b09e5aa238758e3f2f0d7c195709fbf217 (patch) | |
tree | 5318d74a86bdb48294081651ef4eca8f29d19784 /chrome/renderer | |
parent | d20a6c4cd820214f99f525044a9f89a85a948ef3 (diff) | |
download | chromium_src-e12b92b09e5aa238758e3f2f0d7c195709fbf217.zip chromium_src-e12b92b09e5aa238758e3f2f0d7c195709fbf217.tar.gz chromium_src-e12b92b09e5aa238758e3f2f0d7c195709fbf217.tar.bz2 |
Handle typographical apostrophe with spelling service client
Do not mark typographical apostrophe as misspelled, if the server wants
to replace it with a typewriter apostrophe.
TEST=SpellCheckTest.CreateTextCheckingResults
TEST=SpellCheckTest.IsValidContraction
TEST=SpellcheckWordIteratorTest.TypographicalApostropheIsPartOfWord
TEST=SpellingServiceClientTest.RequestTextCheck
BUG=165079
Review URL: https://codereview.chromium.org/1092243002
Cr-Commit-Position: refs/heads/master@{#335193}
Diffstat (limited to 'chrome/renderer')
-rw-r--r-- | chrome/renderer/spellchecker/spellcheck.cc | 86 | ||||
-rw-r--r-- | chrome/renderer/spellchecker/spellcheck_unittest.cc | 99 | ||||
-rw-r--r-- | chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc | 48 |
3 files changed, 204 insertions, 29 deletions
diff --git a/chrome/renderer/spellchecker/spellcheck.cc b/chrome/renderer/spellchecker/spellcheck.cc index a43c727..fecb307 100644 --- a/chrome/renderer/spellchecker/spellcheck.cc +++ b/chrome/renderer/spellchecker/spellcheck.cc @@ -97,6 +97,29 @@ bool DocumentMarkersRemover::Visit(content::RenderView* render_view) { return true; } +bool IsApostrophe(base::char16 c) { + const base::char16 kApostrophe = 0x27; + const base::char16 kRightSingleQuotationMark = 0x2019; + return c == kApostrophe || c == kRightSingleQuotationMark; +} + +// Makes sure that the apostrophes in the |spelling_suggestion| are the same +// type as in the |misspelled_word| and in the same order. Ignore differences in +// the number of apostrophes. +void PreserveOriginalApostropheTypes(const base::string16& misspelled_word, + base::string16* spelling_suggestion) { + auto it = spelling_suggestion->begin(); + for (const base::char16& c : misspelled_word) { + if (IsApostrophe(c)) { + it = std::find_if(it, spelling_suggestion->end(), IsApostrophe); + if (it == spelling_suggestion->end()) + return; + + *it++ = c; + } + } +} + } // namespace class SpellCheck::SpellcheckRequest { @@ -383,33 +406,44 @@ void SpellCheck::CreateTextCheckingResults( const base::string16& line_text, const std::vector<SpellCheckResult>& spellcheck_results, WebVector<WebTextCheckingResult>* textcheck_results) { - // Double-check misspelled words with our spellchecker and attach grammar - // markers to them if our spellchecker tells they are correct words, i.e. they - // are probably contextually-misspelled words. - const base::char16* text = line_text.c_str(); - std::vector<WebTextCheckingResult> list; - for (size_t i = 0; i < spellcheck_results.size(); ++i) { - SpellCheckResult::Decoration decoration = spellcheck_results[i].decoration; - int word_location = spellcheck_results[i].location; - int word_length = spellcheck_results[i].length; - int misspelling_start = 0; - int misspelling_length = 0; - if (decoration == SpellCheckResult::SPELLING && - filter == USE_NATIVE_CHECKER) { - if (SpellCheckWord(text + word_location, word_length, 0, - &misspelling_start, &misspelling_length, NULL)) { - decoration = SpellCheckResult::GRAMMAR; - } + std::vector<WebTextCheckingResult> results; + for (const SpellCheckResult& spellcheck_result : spellcheck_results) { + const base::string16& misspelled_word = + line_text.substr(spellcheck_result.location, spellcheck_result.length); + + // Ignore words in custom dictionary. + if (custom_dictionary_.SpellCheckWord(misspelled_word, 0, + misspelled_word.length())) { + continue; } - if (!custom_dictionary_.SpellCheckWord( - line_text, word_location, word_length)) { - list.push_back(WebTextCheckingResult( - static_cast<WebTextDecorationType>(decoration), - word_location + line_offset, - word_length, - spellcheck_results[i].replacement, - spellcheck_results[i].hash)); + + // Use the same types of appostrophes as in the mispelled word. + base::string16 replacement = spellcheck_result.replacement; + PreserveOriginalApostropheTypes(misspelled_word, &replacement); + + // Ignore misspellings due the typographical apostrophe. + if (misspelled_word == replacement) + continue; + + // Double-check misspelled words with out spellchecker and attach grammar + // markers to them if our spellchecker tells us they are correct words, + // i.e. they are probably contextually-misspelled words. + SpellCheckResult::Decoration decoration = spellcheck_result.decoration; + int unused_misspelling_start = 0; + int unused_misspelling_length = 0; + if (decoration == SpellCheckResult::SPELLING && + filter == USE_NATIVE_CHECKER && + SpellCheckWord(misspelled_word.c_str(), misspelled_word.length(), 0, + &unused_misspelling_start, &unused_misspelling_length, + nullptr)) { + decoration = SpellCheckResult::GRAMMAR; } + + results.push_back(WebTextCheckingResult( + static_cast<WebTextDecorationType>(decoration), + line_offset + spellcheck_result.location, spellcheck_result.length, + replacement, spellcheck_result.hash)); } - textcheck_results->assign(list); + + textcheck_results->assign(results); } diff --git a/chrome/renderer/spellchecker/spellcheck_unittest.cc b/chrome/renderer/spellchecker/spellcheck_unittest.cc index 34886aa..0e36c24 100644 --- a/chrome/renderer/spellchecker/spellcheck_unittest.cc +++ b/chrome/renderer/spellchecker/spellcheck_unittest.cc @@ -17,6 +17,8 @@ #include "third_party/WebKit/public/web/WebTextCheckingCompletion.h" #include "third_party/WebKit/public/web/WebTextCheckingResult.h" +#define TYPOGRAPHICAL_APOSTROPHE L"\x2019" + namespace { base::FilePath GetHunspellDirectory() { @@ -80,6 +82,10 @@ class SpellCheckTest : public testing::Test { base::ASCIIToUTF16(word), tag); } + bool IsValidContraction(const base::string16& word, int tag) { + return spell_check_->spellcheck_.IsValidContraction(word, tag); + } + #if !defined(OS_MACOSX) protected: void TestSpellCheckParagraph( @@ -201,7 +207,7 @@ TEST_F(SpellCheckTest, SpellCheckStrings_EN_US) { // A valid English contraction {L"isn't", true}, // A valid English contraction with a typographical apostrophe. - {L"isn\x2019t", true}, + {L"isn" TYPOGRAPHICAL_APOSTROPHE L"t", true}, // A valid English word enclosed with underscores. {L"_hello_", true}, @@ -1140,7 +1146,7 @@ TEST_F(SpellCheckTest, CreateTextCheckingResults) { text, spellcheck_results, &textcheck_results); - EXPECT_EQ(spellcheck_results.size(), textcheck_results.size()); + ASSERT_EQ(spellcheck_results.size(), textcheck_results.size()); EXPECT_EQ(blink::WebTextDecorationTypeSpelling, textcheck_results[0].decoration); EXPECT_EQ(spellcheck_results[0].location, textcheck_results[0].location); @@ -1160,12 +1166,78 @@ TEST_F(SpellCheckTest, CreateTextCheckingResults) { text, spellcheck_results, &textcheck_results); - EXPECT_EQ(spellcheck_results.size(), textcheck_results.size()); + ASSERT_EQ(spellcheck_results.size(), textcheck_results.size()); EXPECT_EQ(blink::WebTextDecorationTypeGrammar, textcheck_results[0].decoration); EXPECT_EQ(spellcheck_results[0].location, textcheck_results[0].location); EXPECT_EQ(spellcheck_results[0].length, textcheck_results[0].length); } + + // Verify that the SpellCheck preserves the original apostrophe type in the + // checked text, regardless of the type of apostrophe the browser returns. + { + base::string16 text = base::WideToUTF16( + L"Ik've havn" TYPOGRAPHICAL_APOSTROPHE L"t ni'n" + TYPOGRAPHICAL_APOSTROPHE L"out-s I've I" TYPOGRAPHICAL_APOSTROPHE + L"ve"); + std::vector<SpellCheckResult> spellcheck_results; + + // All typewriter apostrophe results. + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 0, 5, base::UTF8ToUTF16("I've"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 6, 6, base::UTF8ToUTF16("haven't"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 13, 10, base::UTF8ToUTF16("in'n'out's"))); + + // Replacements that differ only by apostrophe type should be ignored. + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 24, 4, base::UTF8ToUTF16("I've"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 29, 4, base::UTF8ToUTF16("I've"))); + + // All typographical apostrophe results. + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 0, 5, + base::WideToUTF16(L"I" TYPOGRAPHICAL_APOSTROPHE L"ve"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 6, 6, + base::WideToUTF16(L"haven" TYPOGRAPHICAL_APOSTROPHE L"t"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 13, 10, base::WideToUTF16( + L"in" TYPOGRAPHICAL_APOSTROPHE L"n" TYPOGRAPHICAL_APOSTROPHE L"out" + TYPOGRAPHICAL_APOSTROPHE L"s"))); + + // Replacements that differ only by apostrophe type should be ignored. + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 24, 4, + base::WideToUTF16(L"I" TYPOGRAPHICAL_APOSTROPHE L"ve"))); + spellcheck_results.push_back(SpellCheckResult( + SpellCheckResult::SPELLING, 29, 4, + base::WideToUTF16(L"I" TYPOGRAPHICAL_APOSTROPHE L"ve"))); + + blink::WebVector<blink::WebTextCheckingResult> textcheck_results; + spell_check()->CreateTextCheckingResults(SpellCheck::USE_NATIVE_CHECKER, 0, + text, spellcheck_results, + &textcheck_results); + + static const wchar_t* kExpectedReplacements[] = { + L"I've", + L"haven" TYPOGRAPHICAL_APOSTROPHE L"t", + L"in'n" TYPOGRAPHICAL_APOSTROPHE L"out's", + L"I've", + L"haven" TYPOGRAPHICAL_APOSTROPHE L"t", + L"in'n" TYPOGRAPHICAL_APOSTROPHE L"out" TYPOGRAPHICAL_APOSTROPHE L"s", + }; + + ASSERT_EQ(arraysize(kExpectedReplacements), textcheck_results.size()); + for (size_t i = 0; i < arraysize(kExpectedReplacements); ++i) { + EXPECT_EQ(base::WideToUTF16(kExpectedReplacements[i]), + textcheck_results[i].replacement) + << "i=" << i << "\nactual: \"" + << base::string16(textcheck_results[i].replacement) << "\""; + } + } } #endif @@ -1373,3 +1445,24 @@ TEST_F(SpellCheckTest, LogicalSuggestions) { EXPECT_EQ(suggestions[0], base::ASCIIToUTF16(kTestCases[i].suggestion)); } } + +// Words with apostrophes should be valid contractions. +TEST_F(SpellCheckTest, IsValidContraction) { + static const char* kLanguages[] = { + "en-AU", + "en-CA", + "en-GB", + "en-US", + }; + + static const wchar_t* kWords[] = { + L"in'n'out", + L"in" TYPOGRAPHICAL_APOSTROPHE L"n" TYPOGRAPHICAL_APOSTROPHE L"out", + }; + + for (size_t i = 0; i < arraysize(kLanguages); ++i) { + ReinitializeSpellCheck(kLanguages[i]); + for (size_t j = 0; j < arraysize(kWords); ++j) + EXPECT_TRUE(IsValidContraction(base::WideToUTF16(kWords[j]), 0)); + } +} diff --git a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc index d45470c..491a8f2 100644 --- a/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc +++ b/chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc @@ -229,6 +229,54 @@ TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) { } } +// Vertify SpellcheckWordIterator treats typographical apostrophe as a part of +// the word. +TEST(SpellcheckWordIteratorTest, TypographicalApostropheIsPartOfWord) { + static const struct { + const char* language; + const wchar_t* word; + } kTestCases[] = { + // Typewriter apostrophe: + { + "en-AU", L"you're" + }, { + "en-CA", L"you're" + }, { + "en-GB", L"you're" + }, { + "en-US", L"you're" + }, + // Typographical apostrophe: + { + "en-AU", L"you\x2019re" + }, { + "en-CA", L"you\x2019re" + }, { + "en-GB", L"you\x2019re" + }, { + "en-US", L"you\x2019re" + }, + }; + + for (size_t i = 0; i < arraysize(kTestCases); ++i) { + SpellcheckCharAttribute attributes; + attributes.SetDefaultLanguage(kTestCases[i].language); + + base::string16 input_word(base::WideToUTF16(kTestCases[i].word)); + SpellcheckWordIterator iterator; + EXPECT_TRUE(iterator.Initialize(&attributes, true)); + EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); + + base::string16 actual_word; + int actual_start, actual_end; + EXPECT_TRUE(iterator.GetNextWord(&actual_word, &actual_start, &actual_end)); + EXPECT_EQ(input_word, actual_word); + EXPECT_EQ(0, actual_start); + EXPECT_EQ(input_word.length(), + static_cast<base::string16::size_type>(actual_end)); + } +} + TEST(SpellcheckWordIteratorTest, Initialization) { // Test initialization works when a default language is set. { |