diff options
author | sky@google.com <sky@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-12-09 18:25:38 +0000 |
---|---|---|
committer | sky@google.com <sky@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-12-09 18:25:38 +0000 |
commit | 32cbfa4f0c14b80fb551530b15051b7d0d5bb8a4 (patch) | |
tree | 84307691175f3326ec3040d21056f535b6ca0ca0 /chrome/browser/history | |
parent | 02a742798da28b5d6901821b8c799c5d668e8ae4 (diff) | |
download | chromium_src-32cbfa4f0c14b80fb551530b15051b7d0d5bb8a4.zip chromium_src-32cbfa4f0c14b80fb551530b15051b7d0d5bb8a4.tar.gz chromium_src-32cbfa4f0c14b80fb551530b15051b7d0d5bb8a4.tar.bz2 |
Changes query parser to sort and coalesce overlapping match positions.
BUG=5305
TEST=see bug
Review URL: http://codereview.chromium.org/13666
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@6595 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/history')
-rw-r--r-- | chrome/browser/history/query_parser.cc | 50 | ||||
-rw-r--r-- | chrome/browser/history/query_parser_unittest.cc | 5 |
2 files changed, 53 insertions, 2 deletions
diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc index 4d79a4c..ef40601 100644 --- a/chrome/browser/history/query_parser.cc +++ b/chrome/browser/history/query_parser.cc @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include <algorithm> + #include "chrome/browser/history/query_parser.h" #include "base/logging.h" @@ -11,6 +13,49 @@ #include "chrome/common/scoped_vector.h" #include "unicode/uscript.h" +namespace { + +// Returns true if |mp1.first| is less than |mp2.first|. This is used to +// sort match positions. +int CompareMatchPosition(const Snippet::MatchPosition& mp1, + const Snippet::MatchPosition& mp2) { + return mp1.first < mp2.first; +} + +// Returns true if |mp2| intersects |mp1|. This is intended for use by +// CoalesceMatchesFrom and isn't meant as a general intersectpion comparison +// function. +bool SnippetIntersects(const Snippet::MatchPosition& mp1, + const Snippet::MatchPosition& mp2) { + return mp2.first >= mp1.first && mp2.first <= mp1.second; +} + +// Coalesces match positions in |matches| after index that intersect the match +// position at |index|. +void CoalesceMatchesFrom(size_t index, + Snippet::MatchPositions* matches) { + Snippet::MatchPosition& mp = (*matches)[index]; + for (Snippet::MatchPositions::iterator i = matches->begin() + index + 1; + i != matches->end(); ) { + if (SnippetIntersects(mp, *i)) { + mp.second = i->second; + i = matches->erase(i); + } else { + return; + } + } +} + +// Sorts the match positions in |matches| by their first index, then coalesces +// any match positions that intersect each other. +void CoalseAndSortMatchPositions(Snippet::MatchPositions* matches) { + std::sort(matches->begin(), matches->end(), &CompareMatchPosition); + // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove + // from matches. + for (size_t i = 0; i < matches->size(); ++i) + CoalesceMatchesFrom(i, matches); +} + // For CJK ideographs and Korean Hangul, even a single character // can be useful in prefix matching, but that may give us too many // false positives. Moreover, the current ICU word breaker gives us @@ -18,7 +63,7 @@ // point doing anything for them and we only adjust the minimum length // to 2 for Korean Hangul while using 3 for others. This is a temporary // hack until we have a segmentation support. -static inline bool IsWordLongEnoughForPrefixSearch(const std::wstring& word) +inline bool IsWordLongEnoughForPrefixSearch(const std::wstring& word) { DCHECK(word.size() > 0); size_t minimum_length = 3; @@ -30,6 +75,8 @@ static inline bool IsWordLongEnoughForPrefixSearch(const std::wstring& word) return word.size() >= minimum_length; } +} // namespace + // Inheritance structure: // Queries are represented as trees of QueryNodes. // QueryNodes are either a collection of subnodes (a QueryNodeList) @@ -229,6 +276,7 @@ bool QueryParser::DoesQueryMatch(const std::wstring& text, if (!query_nodes[i]->HasMatchIn(query_words, &matches)) return false; } + CoalseAndSortMatchPositions(&matches); match_positions->swap(matches); return true; } diff --git a/chrome/browser/history/query_parser_unittest.cc b/chrome/browser/history/query_parser_unittest.cc index 7246adc..50cb343 100644 --- a/chrome/browser/history/query_parser_unittest.cc +++ b/chrome/browser/history/query_parser_unittest.cc @@ -93,12 +93,15 @@ TEST_F(QueryParserTest, ParseQueryNodesAndMatch) { const int m2_start; const int m2_end; } data[] = { + { L"foo foo", L"foo", true, 0, 3, 0, 0 }, + { L"foo fooey", L"fooey", true, 0, 5, 0, 0 }, + { L"foo fooey bar", L"bar fooey", true, 0, 3, 4, 9 }, { L"blah", L"blah", true, 0, 4, 0, 0 }, { L"blah", L"foo", false, 0, 0, 0, 0 }, { L"blah", L"blahblah", true, 0, 4, 0, 0 }, { L"blah", L"foo blah", true, 4, 8, 0, 0 }, { L"foo blah", L"blah", false, 0, 0, 0, 0 }, - { L"foo blah", L"blahx foobar", true, 6, 9, 0, 4 }, + { L"foo blah", L"blahx foobar", true, 0, 4, 6, 9 }, { L"\"foo blah\"", L"foo blah", true, 0, 8, 0, 0 }, { L"\"foo blah\"", L"foox blahx", false, 0, 0, 0, 0 }, { L"\"foo blah\"", L"foo blah", true, 0, 8, 0, 0 }, |