summaryrefslogtreecommitdiffstats
path: root/chrome/browser/history
diff options
context:
space:
mode:
authorsky@google.com <sky@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-12-09 18:25:38 +0000
committersky@google.com <sky@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-12-09 18:25:38 +0000
commit32cbfa4f0c14b80fb551530b15051b7d0d5bb8a4 (patch)
tree84307691175f3326ec3040d21056f535b6ca0ca0 /chrome/browser/history
parent02a742798da28b5d6901821b8c799c5d668e8ae4 (diff)
downloadchromium_src-32cbfa4f0c14b80fb551530b15051b7d0d5bb8a4.zip
chromium_src-32cbfa4f0c14b80fb551530b15051b7d0d5bb8a4.tar.gz
chromium_src-32cbfa4f0c14b80fb551530b15051b7d0d5bb8a4.tar.bz2
Changes query parser to sort and coalesce overlapping match positions.
BUG=5305 TEST=see bug Review URL: http://codereview.chromium.org/13666 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@6595 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/history')
-rw-r--r--chrome/browser/history/query_parser.cc50
-rw-r--r--chrome/browser/history/query_parser_unittest.cc5
2 files changed, 53 insertions, 2 deletions
diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc
index 4d79a4c..ef40601 100644
--- a/chrome/browser/history/query_parser.cc
+++ b/chrome/browser/history/query_parser.cc
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include <algorithm>
+
#include "chrome/browser/history/query_parser.h"
#include "base/logging.h"
@@ -11,6 +13,49 @@
#include "chrome/common/scoped_vector.h"
#include "unicode/uscript.h"
+namespace {
+
+// Returns true if |mp1.first| is less than |mp2.first|. This is used to
+// sort match positions.
+int CompareMatchPosition(const Snippet::MatchPosition& mp1,
+ const Snippet::MatchPosition& mp2) {
+ return mp1.first < mp2.first;
+}
+
+// Returns true if |mp2| intersects |mp1|. This is intended for use by
+// CoalesceMatchesFrom and isn't meant as a general intersectpion comparison
+// function.
+bool SnippetIntersects(const Snippet::MatchPosition& mp1,
+ const Snippet::MatchPosition& mp2) {
+ return mp2.first >= mp1.first && mp2.first <= mp1.second;
+}
+
+// Coalesces match positions in |matches| after index that intersect the match
+// position at |index|.
+void CoalesceMatchesFrom(size_t index,
+ Snippet::MatchPositions* matches) {
+ Snippet::MatchPosition& mp = (*matches)[index];
+ for (Snippet::MatchPositions::iterator i = matches->begin() + index + 1;
+ i != matches->end(); ) {
+ if (SnippetIntersects(mp, *i)) {
+ mp.second = i->second;
+ i = matches->erase(i);
+ } else {
+ return;
+ }
+ }
+}
+
+// Sorts the match positions in |matches| by their first index, then coalesces
+// any match positions that intersect each other.
+void CoalseAndSortMatchPositions(Snippet::MatchPositions* matches) {
+ std::sort(matches->begin(), matches->end(), &CompareMatchPosition);
+ // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove
+ // from matches.
+ for (size_t i = 0; i < matches->size(); ++i)
+ CoalesceMatchesFrom(i, matches);
+}
+
// For CJK ideographs and Korean Hangul, even a single character
// can be useful in prefix matching, but that may give us too many
// false positives. Moreover, the current ICU word breaker gives us
@@ -18,7 +63,7 @@
// point doing anything for them and we only adjust the minimum length
// to 2 for Korean Hangul while using 3 for others. This is a temporary
// hack until we have a segmentation support.
-static inline bool IsWordLongEnoughForPrefixSearch(const std::wstring& word)
+inline bool IsWordLongEnoughForPrefixSearch(const std::wstring& word)
{
DCHECK(word.size() > 0);
size_t minimum_length = 3;
@@ -30,6 +75,8 @@ static inline bool IsWordLongEnoughForPrefixSearch(const std::wstring& word)
return word.size() >= minimum_length;
}
+} // namespace
+
// Inheritance structure:
// Queries are represented as trees of QueryNodes.
// QueryNodes are either a collection of subnodes (a QueryNodeList)
@@ -229,6 +276,7 @@ bool QueryParser::DoesQueryMatch(const std::wstring& text,
if (!query_nodes[i]->HasMatchIn(query_words, &matches))
return false;
}
+ CoalseAndSortMatchPositions(&matches);
match_positions->swap(matches);
return true;
}
diff --git a/chrome/browser/history/query_parser_unittest.cc b/chrome/browser/history/query_parser_unittest.cc
index 7246adc..50cb343 100644
--- a/chrome/browser/history/query_parser_unittest.cc
+++ b/chrome/browser/history/query_parser_unittest.cc
@@ -93,12 +93,15 @@ TEST_F(QueryParserTest, ParseQueryNodesAndMatch) {
const int m2_start;
const int m2_end;
} data[] = {
+ { L"foo foo", L"foo", true, 0, 3, 0, 0 },
+ { L"foo fooey", L"fooey", true, 0, 5, 0, 0 },
+ { L"foo fooey bar", L"bar fooey", true, 0, 3, 4, 9 },
{ L"blah", L"blah", true, 0, 4, 0, 0 },
{ L"blah", L"foo", false, 0, 0, 0, 0 },
{ L"blah", L"blahblah", true, 0, 4, 0, 0 },
{ L"blah", L"foo blah", true, 4, 8, 0, 0 },
{ L"foo blah", L"blah", false, 0, 0, 0, 0 },
- { L"foo blah", L"blahx foobar", true, 6, 9, 0, 4 },
+ { L"foo blah", L"blahx foobar", true, 0, 4, 6, 9 },
{ L"\"foo blah\"", L"foo blah", true, 0, 8, 0, 0 },
{ L"\"foo blah\"", L"foox blahx", false, 0, 0, 0, 0 },
{ L"\"foo blah\"", L"foo blah", true, 0, 8, 0, 0 },