From 85d911cd2f9cf2cb8235ff46560b5931ef5ab16e Mon Sep 17 00:00:00 2001 From: "sky@chromium.org" Date: Tue, 19 May 2009 03:59:42 +0000 Subject: Adds an index over bookmark titles for fast look up. The index is currently built on the main thread (because that's where we do the decoding now), but I'll change that after landing this. BUG=6646 TEST=There are tests to cover this, but make sure the omnibox still suggests bookmark titles. Review URL: http://codereview.chromium.org/115403 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@16357 0039d316-1c4b-4281-b951-d872f2087c98 --- .../autocomplete/history_contents_provider.cc | 4 +- chrome/browser/bookmarks/bookmark_index.cc | 202 +++++++++++++++++++++ chrome/browser/bookmarks/bookmark_index.h | 128 +++++++++++++ .../browser/bookmarks/bookmark_index_unittest.cc | 202 +++++++++++++++++++++ chrome/browser/bookmarks/bookmark_model.cc | 17 ++ chrome/browser/bookmarks/bookmark_model.h | 9 + .../browser/bookmarks/bookmark_model_unittest.cc | 22 --- chrome/browser/bookmarks/bookmark_storage.cc | 20 ++ chrome/browser/bookmarks/bookmark_storage.h | 4 + chrome/browser/bookmarks/bookmark_utils.cc | 26 --- chrome/browser/bookmarks/bookmark_utils.h | 7 - .../browser/bookmarks/bookmark_utils_unittest.cc | 15 -- chrome/browser/browser.vcproj | 8 + chrome/browser/history/query_parser.cc | 35 ++-- chrome/browser/history/query_parser.h | 9 + chrome/chrome.gyp | 3 + chrome/test/unit/unittests.vcproj | 4 + 17 files changed, 622 insertions(+), 93 deletions(-) create mode 100644 chrome/browser/bookmarks/bookmark_index.cc create mode 100644 chrome/browser/bookmarks/bookmark_index.h create mode 100644 chrome/browser/bookmarks/bookmark_index_unittest.cc (limited to 'chrome') diff --git a/chrome/browser/autocomplete/history_contents_provider.cc b/chrome/browser/autocomplete/history_contents_provider.cc index ccd8d39..f194559 100644 --- a/chrome/browser/autocomplete/history_contents_provider.cc +++ b/chrome/browser/autocomplete/history_contents_provider.cc @@ -274,8 +274,8 @@ void HistoryContentsProvider::QueryBookmarks(const AutocompleteInput& input) { TimeTicks start_time = TimeTicks::Now(); std::vector matches; - bookmark_utils::GetBookmarksMatchingText(bookmark_model, input.text(), - kMaxMatchCount, &matches); + bookmark_model->GetBookmarksWithTitlesMatching(input.text(), max_matches(), + &matches); for (size_t i = 0; i < matches.size(); ++i) AddBookmarkTitleMatchToResults(matches[i]); UMA_HISTOGRAM_TIMES("Omnibox.QueryBookmarksTime", diff --git a/chrome/browser/bookmarks/bookmark_index.cc b/chrome/browser/bookmarks/bookmark_index.cc new file mode 100644 index 0000000..e769d74 --- /dev/null +++ b/chrome/browser/bookmarks/bookmark_index.cc @@ -0,0 +1,202 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/bookmarks/bookmark_index.h" + +#include +#include + +#include "app/l10n_util.h" +#include "chrome/browser/bookmarks/bookmark_model.h" +#include "chrome/browser/bookmarks/bookmark_utils.h" +#include "chrome/browser/history/query_parser.h" + +BookmarkIndex::NodeSet::const_iterator + BookmarkIndex::Match::nodes_begin() const { + return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); +} + +BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { + return nodes.empty() ? terms.front()->second.end() : nodes.end(); +} + +void BookmarkIndex::Add(BookmarkNode* node) { + if (!node->is_url()) + return; + std::vector terms = ExtractQueryWords(node->GetTitle()); + for (size_t i = 0; i < terms.size(); ++i) + RegisterNode(terms[i], node); +} + +void BookmarkIndex::Remove(BookmarkNode* node) { + if (!node->is_url()) + return; + + std::vector terms = ExtractQueryWords(node->GetTitle()); + for (size_t i = 0; i < terms.size(); ++i) + UnregisterNode(terms[i], node); +} + +void BookmarkIndex::GetBookmarksWithTitlesMatching( + const std::wstring& query, + size_t max_count, + std::vector* results) { + std::vector terms = ExtractQueryWords(query); + if (terms.empty()) + return; + + Matches matches; + for (size_t i = 0; i < terms.size(); ++i) { + if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) + return; + } + + // We use a QueryParser to fill in match positions for us. It's not the most + // efficient way to go about this, but by the time we get here we know what + // matches and so this shouldn't be performance critical. + QueryParser parser; + ScopedVector query_nodes; + parser.ParseQuery(query, &query_nodes.get()); + for (size_t i = 0; i < matches.size() && results->size() < max_count; ++i) { + AddMatchToResults(matches[i], max_count, &parser, query_nodes.get(), + results); + } +} + +void BookmarkIndex::AddMatchToResults( + const Match& match, + size_t max_count, + QueryParser* parser, + const std::vector& query_nodes, + std::vector* results) { + for (NodeSet::const_iterator i = match.nodes_begin(); + i != match.nodes_end() && results->size() < max_count; ++i) { + results->push_back(bookmark_utils::TitleMatch()); + bookmark_utils::TitleMatch& title_match = results->back(); + title_match.node = *i; + if (!parser->DoesQueryMatch((*i)->GetTitle(), query_nodes, + &(title_match.match_positions))) { + // If we get here it implies the QueryParser didn't match something we + // thought should match. We should always match the same thing as the + // query parser. + NOTREACHED(); + } + } +} + +bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const std::wstring& term, + bool first_term, + Matches* matches) { + Index::const_iterator i = index_.lower_bound(term); + if (i == index_.end()) + return false; + + if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) { + // Term is too short for prefix match, compare using exact match. + if (i->first != term) + return false; // No bookmarks with this term. + + if (first_term) { + Match match; + match.terms.push_back(i); + matches->push_back(match); + return true; + } + CombineMatchesInPlace(i, matches); + } else if (first_term) { + // This is the first term and we're doing a prefix match. Loop through + // index adding all entries that start with term to matches. + while (i != index_.end() && + i->first.size() >= term.size() && + term.compare(0, term.size(), i->first, 0, term.size()) == 0) { + Match match; + match.terms.push_back(i); + matches->push_back(match); + ++i; + } + } else { + // Prefix match and not the first term. Loop through index combining + // current matches in matches with term, placing result in result. + Matches result; + while (i != index_.end() && + i->first.size() >= term.size() && + term.compare(0, term.size(), i->first, 0, term.size()) == 0) { + CombineMatches(i, *matches, &result); + ++i; + } + matches->swap(result); + } + return !matches->empty(); +} + +void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i, + Matches* matches) { + for (size_t i = 0; i < matches->size(); ) { + Match* match = &((*matches)[i]); + NodeSet intersection; + std::set_intersection(match->nodes_begin(), match->nodes_end(), + index_i->second.begin(), index_i->second.end(), + std::inserter(intersection, intersection.begin())); + if (intersection.empty()) { + matches->erase(matches->begin() + i); + } else { + match->terms.push_back(index_i); + match->nodes.swap(intersection); + ++i; + } + } +} + +void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i, + const Matches& current_matches, + Matches* result) { + for (size_t i = 0; i < current_matches.size(); ++i) { + const Match& match = current_matches[i]; + NodeSet intersection; + std::set_intersection(match.nodes_begin(), match.nodes_end(), + index_i->second.begin(), index_i->second.end(), + std::inserter(intersection, intersection.begin())); + if (!intersection.empty()) { + result->push_back(Match()); + Match& combined_match = result->back(); + combined_match.terms = match.terms; + combined_match.terms.push_back(index_i); + combined_match.nodes.swap(intersection); + } + } +} + +std::vector BookmarkIndex::ExtractQueryWords( + const std::wstring& query) { + std::vector terms; + if (query.empty()) + return terms; + QueryParser parser; + // TODO: use ICU normalization. + parser.ExtractQueryWords(l10n_util::ToLower(query), &terms); + return terms; +} + +void BookmarkIndex::RegisterNode(const std::wstring& term, + BookmarkNode* node) { + if (std::find(index_[term].begin(), index_[term].end(), node) != + index_[term].end()) { + // We've already added node for term. + return; + } + index_[term].insert(node); +} + +void BookmarkIndex::UnregisterNode(const std::wstring& term, + BookmarkNode* node) { + Index::iterator i = index_.find(term); + if (i == index_.end()) { + // We can get here if the node has the same term more than once. For + // example, a bookmark with the title 'foo foo' would end up here. + return; + } + i->second.erase(node); + if (i->second.empty()) + index_.erase(i); +} diff --git a/chrome/browser/bookmarks/bookmark_index.h b/chrome/browser/bookmarks/bookmark_index.h new file mode 100644 index 0000000..22c2ff5 --- /dev/null +++ b/chrome/browser/bookmarks/bookmark_index.h @@ -0,0 +1,128 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_BOOKMARKS_BOOKMARK_INDEX_H_ +#define CHROME_BROWSER_BOOKMARKS_BOOKMARK_INDEX_H_ + +#include +#include +#include +#include + +#include "base/basictypes.h" + +class BookmarkNode; +class QueryNode; +class QueryParser; + +namespace bookmark_utils { +struct TitleMatch; +} + +// BookmarkIndex maintains an index of the titles of bookmarks for quick +// look up. BookmarkIndex is owned and maintained by BookmarkModel, you +// shouldn't need to interact directly with BookmarkIndex. +// +// BookmarkIndex maintains the index (index_) as a map of sets. The map (type +// Index) maps from a lower case string to the set (type NodeSet) of +// BookmarkNodes that contain that string in their title. + +class BookmarkIndex { + public: + BookmarkIndex() {} + + // Invoked when a bookmark has been added to the model. + void Add(BookmarkNode* node); + + // Invoked when a bookmark has been removed from the model. + void Remove(BookmarkNode* node); + + // Returns up to |max_count| of bookmarks containing the text |query|. + void GetBookmarksWithTitlesMatching( + const std::wstring& query, + size_t max_count, + std::vector* results); + + private: + typedef std::set NodeSet; + typedef std::map Index; + + // Used when finding the set of bookmarks that match a query. Each match + // represents a set of terms (as an interator into the Index) matching the + // query as well as the set of nodes that contain those terms in their titles. + struct Match { + // List of terms matching the query. + std::list terms; + + // The set of nodes matching the terms. As an optimization this is empty + // when we match only one term, and is filled in when we get more than one + // term. We can do this as when we have only one matching term we know + // the set of matching nodes is terms.front()->second. + // + // Use nodes_begin() and nodes_end() to get an iterator over the set as + // it handles the necessary switching between nodes and terms.front(). + NodeSet nodes; + + // Returns an iterator to the beginning of the matching nodes. See + // description of nodes for why this should be used over nodes.begin(). + NodeSet::const_iterator nodes_begin() const; + + // Returns an iterator to the beginning of the matching nodes. See + // description of nodes for why this should be used over nodes.begin(). + NodeSet::const_iterator nodes_end() const; + }; + + typedef std::vector Matches; + + // Add all the matching nodes in |match.nodes| to |results| until there are + // at most |max_count| matches in |results|. + void AddMatchToResults(const Match& match, + size_t max_count, + QueryParser* parser, + const std::vector& query_nodes, + std::vector* results); + + // Populates |matches| for the specified term. If |first_term| is true, this + // is the first term in the query. Returns true if there is at least one node + // matching the term. + bool GetBookmarksWithTitleMatchingTerm(const std::wstring& term, + bool first_term, + Matches* matches); + + // Iterates over |matches| updating each Match's nodes to contain the + // intersection of the Match's current nodes and the nodes at |index_i|. + // If the intersection is empty, the Match is removed. + // + // This is invoked from GetBookmarksWithTitleMatchingTerm. + void CombineMatchesInPlace(const Index::const_iterator& index_i, + Matches* matches); + + // Iterates over |current_matches| calculating the intersection between the + // Match's nodes and the nodes at |index_i|. If the intersection between the + // two is non-empty, a new match is added to |result|. + // + // This differs from CombineMatchesInPlace in that if the intersection is + // non-empty the result is added to result, not combined in place. This + // variant is used for prefix matching. + // + // This is invoked from GetBookmarksWithTitleMatchingTerm. + void CombineMatches(const Index::const_iterator& index_i, + const Matches& current_matches, + Matches* result); + + // Returns the set of query words from |query|. + std::vector ExtractQueryWords(const std::wstring& query); + + // Adds |node| to |index_|. + void RegisterNode(const std::wstring& term, BookmarkNode* node); + + // Removes |node| from |index_|. + void UnregisterNode(const std::wstring& term, BookmarkNode* node); + + Index index_; + + DISALLOW_COPY_AND_ASSIGN(BookmarkIndex); +}; + +#endif // CHROME_BROWSER_BOOKMARKS_BOOKMARK_INDEX_H_ diff --git a/chrome/browser/bookmarks/bookmark_index_unittest.cc b/chrome/browser/bookmarks/bookmark_index_unittest.cc new file mode 100644 index 0000000..10f43f0 --- /dev/null +++ b/chrome/browser/bookmarks/bookmark_index_unittest.cc @@ -0,0 +1,202 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "base/string_util.h" +#include "chrome/browser/bookmarks/bookmark_index.h" +#include "chrome/browser/bookmarks/bookmark_model.h" +#include "chrome/browser/history/query_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + +class BookmarkIndexTest : public testing::Test { + public: + BookmarkIndexTest() : model_(new BookmarkModel(NULL)) {} + + void AddBookmarksWithTitles(const wchar_t** titles, size_t count) { + std::vector title_vector; + for (size_t i = 0; i < count; ++i) + title_vector.push_back(titles[i]); + AddBookmarksWithTitles(title_vector); + } + + void AddBookmarksWithTitles(const std::vector& titles) { + GURL url("about:blank"); + for (size_t i = 0; i < titles.size(); ++i) + model_->AddURL(model_->other_node(), static_cast(i), titles[i], url); + } + + void ExpectMatches(const std::wstring& query, + const wchar_t** expected_titles, + size_t expected_count) { + std::vector title_vector; + for (size_t i = 0; i < expected_count; ++i) + title_vector.push_back(expected_titles[i]); + ExpectMatches(query, title_vector); + } + + void ExpectMatches(const std::wstring& query, + const std::vector expected_titles) { + std::vector matches; + model_->GetBookmarksWithTitlesMatching(query, 1000, &matches); + ASSERT_EQ(expected_titles.size(), matches.size()); + for (size_t i = 0; i < expected_titles.size(); ++i) { + bool found = false; + for (size_t j = 0; j < matches.size(); ++j) { + if (std::wstring(expected_titles[i]) == matches[j].node->GetTitle()) { + matches.erase(matches.begin() + j); + found = true; + break; + } + } + ASSERT_TRUE(found); + } + } + + void ExtractMatchPositions(const std::string& string, + Snippet::MatchPositions* matches) { + std::vector match_strings; + SplitString(string, L':', &match_strings); + for (size_t i = 0; i < match_strings.size(); ++i) { + std::vector chunks; + SplitString(match_strings[i], ',', &chunks); + ASSERT_EQ(2U, chunks.size()); + matches->push_back(Snippet::MatchPosition()); + matches->back().first = StringToInt(chunks[0]); + matches->back().second = StringToInt(chunks[1]); + } + } + + void ExpectMatchPositions(const std::wstring& query, + const Snippet::MatchPositions& expected_positions) { + std::vector matches; + model_->GetBookmarksWithTitlesMatching(query, 1000, &matches); + ASSERT_EQ(1U, matches.size()); + const bookmark_utils::TitleMatch& match = matches[0]; + ASSERT_EQ(expected_positions.size(), match.match_positions.size()); + for (size_t i = 0; i < expected_positions.size(); ++i) { + EXPECT_EQ(expected_positions[i].first, match.match_positions[i].first); + EXPECT_EQ(expected_positions[i].second, match.match_positions[i].second); + } + } + + + protected: + scoped_ptr model_; + + private: + DISALLOW_COPY_AND_ASSIGN(BookmarkIndexTest); +}; + +// Various permutations with differing input, queries and output that exercises +// all query paths. +TEST_F(BookmarkIndexTest, Tests) { + struct TestData { + const std::wstring input; + const std::wstring query; + const std::wstring expected; + } data[] = { + // Trivial test case of only one term, exact match. + { L"a;b", L"A", L"a" }, + + // Prefix match, one term. + { L"abcd;abc;b", L"abc", L"abcd;abc" }, + + // Prefix match, multiple terms. + { L"abcd cdef;abcd;abcd cdefg", L"abc cde", L"abcd cdef;abcd cdefg"}, + + // Exact and prefix match. + { L"ab cdef;abcd;abcd cdefg", L"ab cdef", L"ab cdef"}, + + // Exact and prefix match. + { L"ab cdef ghij;ab;cde;cdef;ghi;cdef ab;ghij ab", + L"ab cde ghi", + L"ab cdef ghij"}, + + // Title with term multiple times. + { L"ab ab", L"ab", L"ab ab"}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { + std::vector titles; + SplitString(data[i].input, L';', &titles); + AddBookmarksWithTitles(titles); + + std::vector expected; + SplitString(data[i].expected, L';', &expected); + + ExpectMatches(data[i].query, expected); + + model_.reset(new BookmarkModel(NULL)); + } +} + +// Makes sure match positions are updated appropriately. +TEST_F(BookmarkIndexTest, MatchPositions) { + struct TestData { + const std::wstring title; + const std::wstring query; + const std::string expected; + } data[] = { + // Trivial test case of only one term, exact match. + { L"a", L"A", "0,1" }, + { L"foo bar", L"bar", "4,7" }, + { L"fooey bark", L"bar foo", "0,3:6,9"}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { + std::vector titles; + titles.push_back(data[i].title); + AddBookmarksWithTitles(titles); + + Snippet::MatchPositions expected_matches; + ExtractMatchPositions(data[i].expected, &expected_matches); + ExpectMatchPositions(data[i].query, expected_matches); + + model_.reset(new BookmarkModel(NULL)); + } +} + +// Makes sure index is updated when a node is removed. +TEST_F(BookmarkIndexTest, Remove) { + const wchar_t* input[] = { L"a", L"b" }; + AddBookmarksWithTitles(input, ARRAYSIZE_UNSAFE(input)); + + // Remove the node and make sure we don't get back any results. + model_->Remove(model_->other_node(), 0); + ExpectMatches(L"A", NULL, 0U); +} + +// Makes sure index is updated when a node's title is changed. +TEST_F(BookmarkIndexTest, ChangeTitle) { + const wchar_t* input[] = { L"a", L"b" }; + AddBookmarksWithTitles(input, ARRAYSIZE_UNSAFE(input)); + + // Remove the node and make sure we don't get back any results. + const wchar_t* expected[] = { L"blah" }; + model_->SetTitle(model_->other_node()->GetChild(0), L"blah"); + ExpectMatches(L"BlAh", expected, ARRAYSIZE_UNSAFE(expected)); +} + +// Makes sure no more than max queries is returned. +TEST_F(BookmarkIndexTest, HonorMax) { + const wchar_t* input[] = { L"abcd", L"abcde" }; + AddBookmarksWithTitles(input, ARRAYSIZE_UNSAFE(input)); + + std::vector matches; + model_->GetBookmarksWithTitlesMatching(L"ABc", 1, &matches); + EXPECT_EQ(1U, matches.size()); +} + +// Makes sure if the lower case string of a bookmark title is more characters +// than the upper case string no match positions are returned. +TEST_F(BookmarkIndexTest, EmptyMatchOnMultiwideLowercaseString) { + BookmarkNode* n1 = model_->AddURL(model_->other_node(), 0, L"\u0130 i", + GURL("http://www.google.com")); + + std::vector matches; + model_->GetBookmarksWithTitlesMatching(L"i", 100, &matches); + ASSERT_EQ(1U, matches.size()); + EXPECT_TRUE(matches[0].node == n1); + EXPECT_TRUE(matches[0].match_positions.empty()); +} diff --git a/chrome/browser/bookmarks/bookmark_model.cc b/chrome/browser/bookmarks/bookmark_model.cc index 209cc2b..2d0197c 100644 --- a/chrome/browser/bookmarks/bookmark_model.cc +++ b/chrome/browser/bookmarks/bookmark_model.cc @@ -213,8 +213,14 @@ void BookmarkModel::SetTitle(BookmarkNode* node, if (node->GetTitle() == title) return; + // The title index doesn't support changing the title, instead we remove then + // add it back. + index_.Remove(node); + node->SetTitle(title); + index_.Add(node); + if (store_.get()) store_->ScheduleSave(); @@ -372,6 +378,13 @@ void BookmarkModel::ResetDateGroupModified(BookmarkNode* node) { SetDateGroupModified(node, Time()); } +void BookmarkModel::GetBookmarksWithTitlesMatching( + const std::wstring& text, + size_t max_count, + std::vector* matches) { + index_.GetBookmarksWithTitlesMatching(text, max_count, matches); +} + void BookmarkModel::ClearStore() { if (profile_ && store_.get()) { NotificationService::current()->RemoveObserver( @@ -411,6 +424,8 @@ void BookmarkModel::RemoveNode(BookmarkNode* node, ++i; nodes_ordered_by_url_set_.erase(i); removed_urls->insert(node->GetURL()); + + index_.Remove(node); } CancelPendingFavIconLoadRequests(node); @@ -507,6 +522,8 @@ BookmarkNode* BookmarkModel::AddNode(BookmarkNode* parent, FOR_EACH_OBSERVER(BookmarkModelObserver, observers_, BookmarkNodeAdded(this, parent, index)); + index_.Add(node); + if (node->GetType() == history::StarredEntry::URL && !was_bookmarked) { history::URLsStarredDetails details(true); details.changed_urls.insert(node->GetURL()); diff --git a/chrome/browser/bookmarks/bookmark_model.h b/chrome/browser/bookmarks/bookmark_model.h index f605dec..8552e26 100644 --- a/chrome/browser/bookmarks/bookmark_model.h +++ b/chrome/browser/bookmarks/bookmark_model.h @@ -14,8 +14,10 @@ #include "base/lock.h" #include "base/observer_list.h" #include "base/waitable_event.h" +#include "chrome/browser/bookmarks/bookmark_index.h" #include "chrome/browser/bookmarks/bookmark_service.h" #include "chrome/browser/bookmarks/bookmark_storage.h" +#include "chrome/browser/bookmarks/bookmark_utils.h" #include "chrome/browser/cancelable_request.h" #include "chrome/browser/history/history.h" #include "chrome/browser/history/history_types.h" @@ -309,6 +311,11 @@ class BookmarkModel : public NotificationObserver, public BookmarkService { // combobox of most recently modified groups. void ResetDateGroupModified(BookmarkNode* node); + void GetBookmarksWithTitlesMatching( + const std::wstring& text, + size_t max_count, + std::vector* matches); + Profile* profile() const { return profile_; } // Sets the store to NULL, making it so the BookmarkModel does not persist @@ -437,6 +444,8 @@ class BookmarkModel : public NotificationObserver, public BookmarkService { // Reads/writes bookmarks to disk. scoped_refptr store_; + BookmarkIndex index_; + base::WaitableEvent loaded_signal_; DISALLOW_COPY_AND_ASSIGN(BookmarkModel); diff --git a/chrome/browser/bookmarks/bookmark_model_unittest.cc b/chrome/browser/bookmarks/bookmark_model_unittest.cc index e2f8571..0e297f4 100644 --- a/chrome/browser/bookmarks/bookmark_model_unittest.cc +++ b/chrome/browser/bookmarks/bookmark_model_unittest.cc @@ -338,28 +338,6 @@ TEST_F(BookmarkModelTest, MostRecentlyAddedEntries) { ASSERT_TRUE(n4 == recently_added[3]); } -// Makes sure GetBookmarksMatchingText works. -TEST_F(BookmarkModelTest, GetBookmarksMatchingText) { - // Add two urls with titles 'blah' and 'x' and one folder with the title - // 'blah'. - BookmarkNode* n1 = model.AddURL( - model.GetBookmarkBarNode(), 0, L"blah", GURL("http://foo.com/0")); - BookmarkNode* n2 = model.AddURL( - model.GetBookmarkBarNode(), 1, L"x", GURL("http://foo.com/1")); - model.AddGroup(model.GetBookmarkBarNode(), 2, L"blah"); - - // Make sure we don't get back the folder. - std::vector results; - bookmark_utils::GetBookmarksMatchingText(&model, L"blah", 2, &results); - ASSERT_EQ(1U, results.size()); - EXPECT_EQ(n1, results[0].node); - results.clear(); - - bookmark_utils::GetBookmarksMatchingText(&model, L"x", 2, &results); - ASSERT_EQ(1U, results.size()); - EXPECT_EQ(n2, results[0].node); -} - // Makes sure GetMostRecentlyAddedNodeForURL stays in sync. TEST_F(BookmarkModelTest, GetMostRecentlyAddedNodeForURL) { // Add a couple of nodes such that the following holds for the time of the diff --git a/chrome/browser/bookmarks/bookmark_storage.cc b/chrome/browser/bookmarks/bookmark_storage.cc index 7df18cd..e237d66 100644 --- a/chrome/browser/bookmarks/bookmark_storage.cc +++ b/chrome/browser/bookmarks/bookmark_storage.cc @@ -6,6 +6,7 @@ #include "base/compiler_specific.h" #include "base/file_util.h" +#include "base/histogram.h" #include "base/json_writer.h" #include "base/message_loop.h" #include "base/thread.h" @@ -19,6 +20,8 @@ #include "chrome/common/notification_source.h" #include "chrome/common/notification_type.h" +using base::TimeTicks; + namespace { // Extension used for backup files (copy of main file created during startup). @@ -193,8 +196,16 @@ void BookmarkStorage::OnLoadFinished(bool file_exists, const FilePath& path, return; if (root_value) { + TimeTicks start_time = TimeTicks::Now(); BookmarkCodec codec; codec.Decode(model_, *root_value); + UMA_HISTOGRAM_TIMES("Bookmarks.DecodeTime", + TimeTicks::Now() - start_time); + + start_time = TimeTicks::Now(); + AddBookmarksToIndex(model_->root_node()); + UMA_HISTOGRAM_TIMES("Bookmarks.CreatebookmarkIndexTime", + TimeTicks::Now() - start_time); } model_->DoneLoading(); @@ -246,3 +257,12 @@ void BookmarkStorage::RunTaskOnBackendThread(Task* task) const { delete task; } } + +void BookmarkStorage::AddBookmarksToIndex(BookmarkNode* node) { + if (node->is_url()) { + model_->index_.Add(node); + } else { + for (int i = 0; i < node->GetChildCount(); ++i) + AddBookmarksToIndex(node->GetChild(i)); + } +} diff --git a/chrome/browser/bookmarks/bookmark_storage.h b/chrome/browser/bookmarks/bookmark_storage.h index 61162eb..bb6c0cc 100644 --- a/chrome/browser/bookmarks/bookmark_storage.h +++ b/chrome/browser/bookmarks/bookmark_storage.h @@ -12,6 +12,7 @@ #include "chrome/common/notification_registrar.h" class BookmarkModel; +class BookmarkNode; class Profile; class Task; class Value; @@ -84,6 +85,9 @@ class BookmarkStorage : public NotificationObserver, // Returns the thread the backend is run on. const base::Thread* backend_thread() const { return backend_thread_; } + // Adds node to the model's index, recursing through all children as well. + void AddBookmarksToIndex(BookmarkNode* node); + // Keep the pointer to profile, we may need it for migration from history. Profile* profile_; diff --git a/chrome/browser/bookmarks/bookmark_utils.cc b/chrome/browser/bookmarks/bookmark_utils.cc index 8082f16..7cedca3 100644 --- a/chrome/browser/bookmarks/bookmark_utils.cc +++ b/chrome/browser/bookmarks/bookmark_utils.cc @@ -471,32 +471,6 @@ void GetMostRecentlyAddedEntries(BookmarkModel* model, } } -void GetBookmarksMatchingText(BookmarkModel* model, - const std::wstring& text, - size_t max_count, - std::vector* matches) { - QueryParser parser; - ScopedVector query_nodes; - parser.ParseQuery(text, &query_nodes.get()); - if (query_nodes.empty()) - return; - - TreeNodeIterator iterator(model->root_node()); - Snippet::MatchPositions match_position; - while (iterator.has_next()) { - BookmarkNode* node = iterator.Next(); - if (node->is_url() && - parser.DoesQueryMatch(node->GetTitle(), query_nodes.get(), - &match_position)) { - matches->push_back(TitleMatch()); - matches->back().node = node; - matches->back().match_positions.swap(match_position); - if (matches->size() == max_count) - break; - } - } -} - bool MoreRecentlyAdded(BookmarkNode* n1, BookmarkNode* n2) { return n1->date_added() > n2->date_added(); } diff --git a/chrome/browser/bookmarks/bookmark_utils.h b/chrome/browser/bookmarks/bookmark_utils.h index 81bdb84..7be4aad 100644 --- a/chrome/browser/bookmarks/bookmark_utils.h +++ b/chrome/browser/bookmarks/bookmark_utils.h @@ -123,13 +123,6 @@ struct TitleMatch { Snippet::MatchPositions match_positions; }; -// Returns the bookmarks whose title contains text. At most |max_count| -// matches are returned in |matches|. -void GetBookmarksMatchingText(BookmarkModel* model, - const std::wstring& text, - size_t max_count, - std::vector* matches); - // Returns true if |n1| was added more recently than |n2|. bool MoreRecentlyAdded(BookmarkNode* n1, BookmarkNode* n2); diff --git a/chrome/browser/bookmarks/bookmark_utils_unittest.cc b/chrome/browser/bookmarks/bookmark_utils_unittest.cc index b60bd45..a97a7ec 100644 --- a/chrome/browser/bookmarks/bookmark_utils_unittest.cc +++ b/chrome/browser/bookmarks/bookmark_utils_unittest.cc @@ -39,18 +39,3 @@ TEST_F(BookmarkUtilsTest, GetBookmarksContainingText) { EXPECT_TRUE(bookmark_utils::DoesBookmarkContainText(n1, L"foo bar")); nodes.clear(); } - -// Makes sure if the lower case string of a bookmark title is more characters -// than the upper case string no match positions are returned. -TEST_F(BookmarkUtilsTest, EmptyMatchOnMultiwideLowercaseString) { - BookmarkModel model(NULL); - BookmarkNode* n1 = - model.AddURL(model.other_node(), 0, L"\u0130 i", - GURL("http://www.google.com")); - - std::vector matches; - bookmark_utils::GetBookmarksMatchingText(&model, L"i", 100, &matches); - ASSERT_EQ(1U, matches.size()); - EXPECT_TRUE(matches[0].node == n1); - EXPECT_TRUE(matches[0].match_positions.empty()); -} diff --git a/chrome/browser/browser.vcproj b/chrome/browser/browser.vcproj index 6ba37de..7e0c503 100644 --- a/chrome/browser/browser.vcproj +++ b/chrome/browser/browser.vcproj @@ -634,6 +634,14 @@ > + + + + diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc index 39754ce..f3c4063 100644 --- a/chrome/browser/history/query_parser.cc +++ b/chrome/browser/history/query_parser.cc @@ -56,25 +56,6 @@ void CoalseAndSortMatchPositions(Snippet::MatchPositions* matches) { CoalesceMatchesFrom(i, matches); } -// For CJK ideographs and Korean Hangul, even a single character -// can be useful in prefix matching, but that may give us too many -// false positives. Moreover, the current ICU word breaker gives us -// back every single Chinese character as a word so that there's no -// point doing anything for them and we only adjust the minimum length -// to 2 for Korean Hangul while using 3 for others. This is a temporary -// hack until we have a segmentation support. -inline bool IsWordLongEnoughForPrefixSearch(const std::wstring& word) -{ - DCHECK(word.size() > 0); - size_t minimum_length = 3; - // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) - // because they 'behave like' Latin letters. Moreover, we should - // normalize the former before reaching here. - if (0xAC00 <= word[0] && word[0] <= 0xD7A3) - minimum_length = 2; - return word.size() >= minimum_length; -} - } // namespace // Inheritance structure: @@ -119,7 +100,7 @@ bool QueryNodeWord::HasMatchIn(const std::vector& words, } bool QueryNodeWord::Matches(const std::wstring& word, bool exact) const { - if (exact || !IsWordLongEnoughForPrefixSearch(word_)) + if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) return word == word_; return word.size() >= word_.size() && (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); @@ -133,7 +114,7 @@ int QueryNodeWord::AppendToSQLiteQuery(std::wstring* query) const { query->append(word_); // Use prefix search if we're not literal and long enough. - if (!literal_ && IsWordLongEnoughForPrefixSearch(word_)) + if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) *query += L'*'; return 1; } @@ -260,6 +241,18 @@ bool QueryNodePhrase::HasMatchIn( QueryParser::QueryParser() { } +// static +bool QueryParser::IsWordLongEnoughForPrefixSearch(const std::wstring& word) { + DCHECK(word.size() > 0); + size_t minimum_length = 3; + // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) + // because they 'behave like' Latin letters. Moreover, we should + // normalize the former before reaching here. + if (0xAC00 <= word[0] && word[0] <= 0xD7A3) + minimum_length = 2; + return word.size() >= minimum_length; +} + // Returns true if the character is considered a quote. static bool IsQueryQuote(wchar_t ch) { return ch == '"' || diff --git a/chrome/browser/history/query_parser.h b/chrome/browser/history/query_parser.h index 1961d6e..a29548a8 100644 --- a/chrome/browser/history/query_parser.h +++ b/chrome/browser/history/query_parser.h @@ -58,6 +58,15 @@ class QueryParser { public: QueryParser(); + // For CJK ideographs and Korean Hangul, even a single character + // can be useful in prefix matching, but that may give us too many + // false positives. Moreover, the current ICU word breaker gives us + // back every single Chinese character as a word so that there's no + // point doing anything for them and we only adjust the minimum length + // to 2 for Korean Hangul while using 3 for others. This is a temporary + // hack until we have a segmentation support. + static bool IsWordLongEnoughForPrefixSearch(const std::wstring& word); + // Parse a query into a SQLite query. The resulting query is placed in // sqlite_query and the number of words is returned. int ParseQuery(const std::wstring& query, diff --git a/chrome/chrome.gyp b/chrome/chrome.gyp index 9c74deb..a8bc6ed 100644 --- a/chrome/chrome.gyp +++ b/chrome/chrome.gyp @@ -548,6 +548,8 @@ 'browser/bookmarks/bookmark_editor.h', 'browser/bookmarks/bookmark_folder_tree_model.cc', 'browser/bookmarks/bookmark_folder_tree_model.h', + 'browser/bookmarks/bookmark_index.cc', + 'browser/bookmarks/bookmark_index.h', 'browser/bookmarks/bookmark_html_writer.cc', 'browser/bookmarks/bookmark_html_writer.h', 'browser/bookmarks/bookmark_menu_controller_gtk.cc', @@ -2649,6 +2651,7 @@ 'browser/bookmarks/bookmark_drag_data_unittest.cc', 'browser/bookmarks/bookmark_folder_tree_model_unittest.cc', 'browser/bookmarks/bookmark_html_writer_unittest.cc', + 'browser/bookmarks/bookmark_index_unittest.cc', 'browser/bookmarks/bookmark_model_test_utils.cc', 'browser/bookmarks/bookmark_model_test_utils.h', 'browser/bookmarks/bookmark_model_unittest.cc', diff --git a/chrome/test/unit/unittests.vcproj b/chrome/test/unit/unittests.vcproj index 349e013..90fa621 100644 --- a/chrome/test/unit/unittests.vcproj +++ b/chrome/test/unit/unittests.vcproj @@ -424,6 +424,10 @@ > + + -- cgit v1.1