diff options
author | sky@google.com <sky@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-08-29 19:48:58 +0000 |
---|---|---|
committer | sky@google.com <sky@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-08-29 19:48:58 +0000 |
commit | 6956cd63fd86015d328a3dfb669a68f34c5e21fa (patch) | |
tree | 10faa0ef5e17e000768e26e19b7777dea8186daa /chrome/browser/history | |
parent | d364c659874cdc48ea7d2e6e26c9df1335be1863 (diff) | |
download | chromium_src-6956cd63fd86015d328a3dfb669a68f34c5e21fa.zip chromium_src-6956cd63fd86015d328a3dfb669a68f34c5e21fa.tar.gz chromium_src-6956cd63fd86015d328a3dfb669a68f34c5e21fa.tar.bz2 |
Modifies the query parser to provide the position of the match in the
title. I'm going to use this in the omnibox. There are a couple of
other random spelling errors I came across that are included for your
review pleasure.
BUG=1256202
TEST=none
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@1546 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/history')
-rw-r--r-- | chrome/browser/history/history_types.cc | 2 | ||||
-rw-r--r-- | chrome/browser/history/history_types.h | 9 | ||||
-rw-r--r-- | chrome/browser/history/query_parser.cc | 55 | ||||
-rw-r--r-- | chrome/browser/history/query_parser.h | 28 | ||||
-rw-r--r-- | chrome/browser/history/query_parser_unittest.cc | 42 |
5 files changed, 98 insertions, 38 deletions
diff --git a/chrome/browser/history/history_types.cc b/chrome/browser/history/history_types.cc index c58449a..44abb65 100644 --- a/chrome/browser/history/history_types.cc +++ b/chrome/browser/history/history_types.cc @@ -146,7 +146,7 @@ void QueryResults::AppendResultsBySwapping(QueryResults* other, AddURLUsageAtIndex(results_.back()->url(), results_.size() - 1); } - // We just took ownerwhip of all the results in the input vector. + // We just took ownership of all the results in the input vector. other->results_.clear(); other->url_to_results_.clear(); } diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h index c5c3e6c..9224c2a 100644 --- a/chrome/browser/history/history_types.h +++ b/chrome/browser/history/history_types.h @@ -317,6 +317,12 @@ class URLResult : public URLRow { : URLRow(url), visit_time_(visit_time) { } + // Constructor that create a URLResult from the specified URL and title match + // positions from title_matches. + URLResult(const GURL& url, const Snippet::MatchPositions& title_matches) + : URLRow(url) { + title_match_positions_ = title_matches; + } Time visit_time() const { return visit_time_; } void set_visit_time(Time visit_time) { visit_time_ = visit_time; } @@ -338,7 +344,7 @@ class URLResult : public URLRow { // The time that this result corresponds to. Time visit_time_; - // When setting, these values are set directly by the HistoryBackend. + // These values are typically set by HistoryBackend. Snippet snippet_; Snippet::MatchPositions title_match_positions_; @@ -492,4 +498,3 @@ struct KeywordSearchTermVisit { } // history #endif // CHROME_BROWSER_HISTORY_HISTORY_TYPES_H__ - diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc index fe29a89..d33aea6 100644 --- a/chrome/browser/history/query_parser.cc +++ b/chrome/browser/history/query_parser.cc @@ -46,7 +46,8 @@ class QueryNodeWord : public QueryNode { const std::wstring& word() const { return word_; } void set_literal(bool literal) { literal_ = literal; } - virtual bool HasMatchIn(const std::vector<std::wstring>& words) const; + virtual bool HasMatchIn(const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const; virtual bool Matches(const std::wstring& word, bool exact) const; @@ -55,10 +56,16 @@ class QueryNodeWord : public QueryNode { bool literal_; }; -bool QueryNodeWord::HasMatchIn(const std::vector<std::wstring>& words) const { +bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const { for (size_t i = 0; i < words.size(); ++i) { - if (Matches(words[i], false)) + if (Matches(words[i].word, false)) { + int match_start = words[i].position; + match_positions->push_back( + std::pair<int, int>(match_start, + match_start + static_cast<int>(word_.size()))); return true; + } } return false; } @@ -103,7 +110,8 @@ class QueryNodeList : public QueryNode { NOTREACHED(); return false; } - virtual bool HasMatchIn(const std::vector<std::wstring>& words) const { + virtual bool HasMatchIn(const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const { NOTREACHED(); return false; } @@ -142,7 +150,8 @@ class QueryNodePhrase : public QueryNodeList { } virtual bool Matches(const std::wstring& word, bool exact) const; - virtual bool HasMatchIn(const std::vector<std::wstring>& words) const; + virtual bool HasMatchIn(const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const; }; bool QueryNodePhrase::Matches(const std::wstring& word, bool exact) const { @@ -150,20 +159,27 @@ bool QueryNodePhrase::Matches(const std::wstring& word, bool exact) const { return false; } -bool QueryNodePhrase::HasMatchIn(const std::vector<std::wstring>& words) const { +bool QueryNodePhrase::HasMatchIn( + const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const { if (words.size() < children_.size()) return false; for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { bool matched_all = true; for (size_t j = 0; j < children_.size(); ++j) { - if (!children_[j]->Matches(words[i + j], true)) { + if (!children_[j]->Matches(words[i + j].word, true)) { matched_all = false; break; } } - if (matched_all) + if (matched_all) { + const QueryWord& last_word = words[i + children_.size() - 1]; + match_positions->push_back( + std::pair<int, int>(words[i].position, + last_word.position + last_word.word.length())); return true; + } } return false; } @@ -197,20 +213,23 @@ void QueryParser::ParseQuery(const std::wstring& query, } bool QueryParser::DoesQueryMatch(const std::wstring& text, - const std::vector<QueryNode*>& query_nodes) { + const std::vector<QueryNode*>& query_nodes, + Snippet::MatchPositions* match_positions) { if (query_nodes.empty()) return false; - std::vector<std::wstring> query_words; - ExtractWords(l10n_util::ToLower(text), &query_words); + std::vector<QueryWord> query_words; + ExtractQueryWords(l10n_util::ToLower(text), &query_words); if (query_words.empty()) return false; + Snippet::MatchPositions matches; for (size_t i = 0; i < query_nodes.size(); ++i) { - if (!query_nodes[i]->HasMatchIn(query_words)) + if (!query_nodes[i]->HasMatchIn(query_words, &matches)) return false; } + match_positions->swap(matches); return true; } @@ -257,8 +276,8 @@ bool QueryParser::ParseQueryImpl(const std::wstring& query, return true; } -void QueryParser::ExtractWords(const std::wstring& text, - std::vector<std::wstring>* words) { +void QueryParser::ExtractQueryWords(const std::wstring& text, + std::vector<QueryWord>* words) { WordIterator iter(text, WordIterator::BREAK_WORD); // TODO(evanm): support a locale here if (!iter.Init()) @@ -270,8 +289,11 @@ void QueryParser::ExtractWords(const std::wstring& text, // or whitespace. if (iter.IsWord()) { std::wstring word = iter.GetWord(); - if (!word.empty()) - words->push_back(word); + if (!word.empty()) { + words->push_back(QueryWord()); + words->back().word = word; + words->back().position = iter.prev(); + } } } } @@ -290,4 +312,3 @@ void QueryNodeList::RemoveEmptySubnodes() { } } } - diff --git a/chrome/browser/history/query_parser.h b/chrome/browser/history/query_parser.h index b84356b..a6a0a11 100644 --- a/chrome/browser/history/query_parser.h +++ b/chrome/browser/history/query_parser.h @@ -11,8 +11,19 @@ #include <set> #include <vector> +#include "chrome/browser/history/snippet.h" + class QueryNodeList; +// Used by HasMatchIn. +struct QueryWord { + // The work to match against. + std::wstring word; + + // The starting position of the word in the original text. + int position; +}; + // QueryNode is used by QueryNodeParser to represent the elements that // constitute a query. While QueryNode is exposed by way of ParseQuery, it // really isn't meant for external usage. @@ -32,8 +43,11 @@ class QueryNode { // comparison. virtual bool Matches(const std::wstring& word, bool exact) const = 0; - // Returns true if this node matches at least one of the words in words. - virtual bool HasMatchIn(const std::vector<std::wstring>& words) const = 0; + // Returns true if this node matches at least one of the words in words. If + // the node matches at least one word, an entry is added to match_positions + // giving the matching region. + virtual bool HasMatchIn(const std::vector<QueryWord>& words, + Snippet::MatchPositions* match_positions) const = 0; }; @@ -54,9 +68,11 @@ class QueryParser { std::vector<QueryNode*>* nodes); // Returns true if the string text matches the query nodes created by a call - // to ParseQuery. + // to ParseQuery. If the query does match each of the matching positions in + // the text is added to |match_positions|. bool DoesQueryMatch(const std::wstring& text, - const std::vector<QueryNode*>& nodes); + const std::vector<QueryNode*>& nodes, + Snippet::MatchPositions* match_positions); private: // Does the work of parsing a query; creates nodes in QueryNodeList as @@ -65,8 +81,8 @@ class QueryParser { QueryNodeList* root); // Extracts the words from text, placing each word into words. - void ExtractWords(const std::wstring& text, - std::vector<std::wstring>* words); + void ExtractQueryWords(const std::wstring& text, + std::vector<QueryWord>* words); }; #endif // CHROME_BROWSER_HISTORY_QUERY_PARSER_H__ diff --git a/chrome/browser/history/query_parser_unittest.cc b/chrome/browser/history/query_parser_unittest.cc index d026df1..1e0b36d 100644 --- a/chrome/browser/history/query_parser_unittest.cc +++ b/chrome/browser/history/query_parser_unittest.cc @@ -88,25 +88,43 @@ TEST_F(QueryParserTest, ParseQueryNodesAndMatch) { const std::wstring query; const std::wstring text; const bool matches; + const int m1_start; + const int m1_end; + const int m2_start; + const int m2_end; } data[] = { - { L"blah", L"blah", true }, - { L"blah", L"foo", false }, - { L"blah", L"blahblah", true }, - { L"blah", L"foo blah", true }, - { L"foo blah", L"blah", false }, - { L"foo blah", L"blahx foobar", true }, - { L"\"foo blah\"", L"foo blah", true }, - { L"\"foo blah\"", L"foox blahx", false }, - { L"\"foo blah\"", L"foo blah", true }, - { L"\"foo blah\"", L"\"foo blah\"", true }, - { L"foo blah", L"\"foo bar blah\"", true }, + { L"blah", L"blah", true, 0, 4, 0, 0 }, + { L"blah", L"foo", false, 0, 0, 0, 0 }, + { L"blah", L"blahblah", true, 0, 4, 0, 0 }, + { L"blah", L"foo blah", true, 4, 8, 0, 0 }, + { L"foo blah", L"blah", false, 0, 0, 0, 0 }, + { L"foo blah", L"blahx foobar", true, 6, 9, 0, 4 }, + { L"\"foo blah\"", L"foo blah", true, 0, 8, 0, 0 }, + { L"\"foo blah\"", L"foox blahx", false, 0, 0, 0, 0 }, + { L"\"foo blah\"", L"foo blah", true, 0, 8, 0, 0 }, + { L"\"foo blah\"", L"\"foo blah\"", true, 1, 9, 0, 0 }, + { L"foo blah", L"\"foo bar blah\"", true, 1, 4, 9, 13 }, }; for (int i = 0; i < arraysize(data); ++i) { std::vector<std::wstring> results; QueryParser parser; ScopedVector<QueryNode> query_nodes; parser.ParseQuery(data[i].query, &query_nodes.get()); + Snippet::MatchPositions match_positions; ASSERT_EQ(data[i].matches, - parser.DoesQueryMatch(data[i].text, query_nodes.get())); + parser.DoesQueryMatch(data[i].text, query_nodes.get(), + &match_positions)); + size_t offset = 0; + if (data[i].m1_start != 0 || data[i].m1_end != 0) { + ASSERT_TRUE(match_positions.size() >= 1); + EXPECT_EQ(data[i].m1_start, match_positions[0].first); + EXPECT_EQ(data[i].m1_end, match_positions[0].second); + offset++; + } + if (data[i].m2_start != 0 || data[i].m2_end != 0) { + ASSERT_TRUE(match_positions.size() == 1 + offset); + EXPECT_EQ(data[i].m2_start, match_positions[offset].first); + EXPECT_EQ(data[i].m2_end, match_positions[offset].second); + } } } |