diff options
Diffstat (limited to 'chrome/browser')
-rw-r--r-- | chrome/browser/autocomplete/history_contents_provider.cc | 4 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_index.cc | 202 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_index.h | 128 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_index_unittest.cc | 202 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_model.cc | 17 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_model.h | 9 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_model_unittest.cc | 22 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_storage.cc | 20 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_storage.h | 4 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_utils.cc | 26 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_utils.h | 7 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_utils_unittest.cc | 15 | ||||
-rw-r--r-- | chrome/browser/browser.vcproj | 8 | ||||
-rw-r--r-- | chrome/browser/history/query_parser.cc | 35 | ||||
-rw-r--r-- | chrome/browser/history/query_parser.h | 9 |
15 files changed, 615 insertions, 93 deletions
diff --git a/chrome/browser/autocomplete/history_contents_provider.cc b/chrome/browser/autocomplete/history_contents_provider.cc index ccd8d39..f194559 100644 --- a/chrome/browser/autocomplete/history_contents_provider.cc +++ b/chrome/browser/autocomplete/history_contents_provider.cc @@ -274,8 +274,8 @@ void HistoryContentsProvider::QueryBookmarks(const AutocompleteInput& input) { TimeTicks start_time = TimeTicks::Now(); std::vector<bookmark_utils::TitleMatch> matches; - bookmark_utils::GetBookmarksMatchingText(bookmark_model, input.text(), - kMaxMatchCount, &matches); + bookmark_model->GetBookmarksWithTitlesMatching(input.text(), max_matches(), + &matches); for (size_t i = 0; i < matches.size(); ++i) AddBookmarkTitleMatchToResults(matches[i]); UMA_HISTOGRAM_TIMES("Omnibox.QueryBookmarksTime", diff --git a/chrome/browser/bookmarks/bookmark_index.cc b/chrome/browser/bookmarks/bookmark_index.cc new file mode 100644 index 0000000..e769d74 --- /dev/null +++ b/chrome/browser/bookmarks/bookmark_index.cc @@ -0,0 +1,202 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/bookmarks/bookmark_index.h" + +#include <algorithm> +#include <iterator> + +#include "app/l10n_util.h" +#include "chrome/browser/bookmarks/bookmark_model.h" +#include "chrome/browser/bookmarks/bookmark_utils.h" +#include "chrome/browser/history/query_parser.h" + +BookmarkIndex::NodeSet::const_iterator + BookmarkIndex::Match::nodes_begin() const { + return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); +} + +BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { + return nodes.empty() ? terms.front()->second.end() : nodes.end(); +} + +void BookmarkIndex::Add(BookmarkNode* node) { + if (!node->is_url()) + return; + std::vector<std::wstring> terms = ExtractQueryWords(node->GetTitle()); + for (size_t i = 0; i < terms.size(); ++i) + RegisterNode(terms[i], node); +} + +void BookmarkIndex::Remove(BookmarkNode* node) { + if (!node->is_url()) + return; + + std::vector<std::wstring> terms = ExtractQueryWords(node->GetTitle()); + for (size_t i = 0; i < terms.size(); ++i) + UnregisterNode(terms[i], node); +} + +void BookmarkIndex::GetBookmarksWithTitlesMatching( + const std::wstring& query, + size_t max_count, + std::vector<bookmark_utils::TitleMatch>* results) { + std::vector<std::wstring> terms = ExtractQueryWords(query); + if (terms.empty()) + return; + + Matches matches; + for (size_t i = 0; i < terms.size(); ++i) { + if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) + return; + } + + // We use a QueryParser to fill in match positions for us. It's not the most + // efficient way to go about this, but by the time we get here we know what + // matches and so this shouldn't be performance critical. + QueryParser parser; + ScopedVector<QueryNode> query_nodes; + parser.ParseQuery(query, &query_nodes.get()); + for (size_t i = 0; i < matches.size() && results->size() < max_count; ++i) { + AddMatchToResults(matches[i], max_count, &parser, query_nodes.get(), + results); + } +} + +void BookmarkIndex::AddMatchToResults( + const Match& match, + size_t max_count, + QueryParser* parser, + const std::vector<QueryNode*>& query_nodes, + std::vector<bookmark_utils::TitleMatch>* results) { + for (NodeSet::const_iterator i = match.nodes_begin(); + i != match.nodes_end() && results->size() < max_count; ++i) { + results->push_back(bookmark_utils::TitleMatch()); + bookmark_utils::TitleMatch& title_match = results->back(); + title_match.node = *i; + if (!parser->DoesQueryMatch((*i)->GetTitle(), query_nodes, + &(title_match.match_positions))) { + // If we get here it implies the QueryParser didn't match something we + // thought should match. We should always match the same thing as the + // query parser. + NOTREACHED(); + } + } +} + +bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const std::wstring& term, + bool first_term, + Matches* matches) { + Index::const_iterator i = index_.lower_bound(term); + if (i == index_.end()) + return false; + + if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) { + // Term is too short for prefix match, compare using exact match. + if (i->first != term) + return false; // No bookmarks with this term. + + if (first_term) { + Match match; + match.terms.push_back(i); + matches->push_back(match); + return true; + } + CombineMatchesInPlace(i, matches); + } else if (first_term) { + // This is the first term and we're doing a prefix match. Loop through + // index adding all entries that start with term to matches. + while (i != index_.end() && + i->first.size() >= term.size() && + term.compare(0, term.size(), i->first, 0, term.size()) == 0) { + Match match; + match.terms.push_back(i); + matches->push_back(match); + ++i; + } + } else { + // Prefix match and not the first term. Loop through index combining + // current matches in matches with term, placing result in result. + Matches result; + while (i != index_.end() && + i->first.size() >= term.size() && + term.compare(0, term.size(), i->first, 0, term.size()) == 0) { + CombineMatches(i, *matches, &result); + ++i; + } + matches->swap(result); + } + return !matches->empty(); +} + +void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i, + Matches* matches) { + for (size_t i = 0; i < matches->size(); ) { + Match* match = &((*matches)[i]); + NodeSet intersection; + std::set_intersection(match->nodes_begin(), match->nodes_end(), + index_i->second.begin(), index_i->second.end(), + std::inserter(intersection, intersection.begin())); + if (intersection.empty()) { + matches->erase(matches->begin() + i); + } else { + match->terms.push_back(index_i); + match->nodes.swap(intersection); + ++i; + } + } +} + +void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i, + const Matches& current_matches, + Matches* result) { + for (size_t i = 0; i < current_matches.size(); ++i) { + const Match& match = current_matches[i]; + NodeSet intersection; + std::set_intersection(match.nodes_begin(), match.nodes_end(), + index_i->second.begin(), index_i->second.end(), + std::inserter(intersection, intersection.begin())); + if (!intersection.empty()) { + result->push_back(Match()); + Match& combined_match = result->back(); + combined_match.terms = match.terms; + combined_match.terms.push_back(index_i); + combined_match.nodes.swap(intersection); + } + } +} + +std::vector<std::wstring> BookmarkIndex::ExtractQueryWords( + const std::wstring& query) { + std::vector<std::wstring> terms; + if (query.empty()) + return terms; + QueryParser parser; + // TODO: use ICU normalization. + parser.ExtractQueryWords(l10n_util::ToLower(query), &terms); + return terms; +} + +void BookmarkIndex::RegisterNode(const std::wstring& term, + BookmarkNode* node) { + if (std::find(index_[term].begin(), index_[term].end(), node) != + index_[term].end()) { + // We've already added node for term. + return; + } + index_[term].insert(node); +} + +void BookmarkIndex::UnregisterNode(const std::wstring& term, + BookmarkNode* node) { + Index::iterator i = index_.find(term); + if (i == index_.end()) { + // We can get here if the node has the same term more than once. For + // example, a bookmark with the title 'foo foo' would end up here. + return; + } + i->second.erase(node); + if (i->second.empty()) + index_.erase(i); +} diff --git a/chrome/browser/bookmarks/bookmark_index.h b/chrome/browser/bookmarks/bookmark_index.h new file mode 100644 index 0000000..22c2ff5 --- /dev/null +++ b/chrome/browser/bookmarks/bookmark_index.h @@ -0,0 +1,128 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_BOOKMARKS_BOOKMARK_INDEX_H_ +#define CHROME_BROWSER_BOOKMARKS_BOOKMARK_INDEX_H_ + +#include <list> +#include <map> +#include <set> +#include <vector> + +#include "base/basictypes.h" + +class BookmarkNode; +class QueryNode; +class QueryParser; + +namespace bookmark_utils { +struct TitleMatch; +} + +// BookmarkIndex maintains an index of the titles of bookmarks for quick +// look up. BookmarkIndex is owned and maintained by BookmarkModel, you +// shouldn't need to interact directly with BookmarkIndex. +// +// BookmarkIndex maintains the index (index_) as a map of sets. The map (type +// Index) maps from a lower case string to the set (type NodeSet) of +// BookmarkNodes that contain that string in their title. + +class BookmarkIndex { + public: + BookmarkIndex() {} + + // Invoked when a bookmark has been added to the model. + void Add(BookmarkNode* node); + + // Invoked when a bookmark has been removed from the model. + void Remove(BookmarkNode* node); + + // Returns up to |max_count| of bookmarks containing the text |query|. + void GetBookmarksWithTitlesMatching( + const std::wstring& query, + size_t max_count, + std::vector<bookmark_utils::TitleMatch>* results); + + private: + typedef std::set<BookmarkNode*> NodeSet; + typedef std::map<std::wstring, NodeSet> Index; + + // Used when finding the set of bookmarks that match a query. Each match + // represents a set of terms (as an interator into the Index) matching the + // query as well as the set of nodes that contain those terms in their titles. + struct Match { + // List of terms matching the query. + std::list<Index::const_iterator> terms; + + // The set of nodes matching the terms. As an optimization this is empty + // when we match only one term, and is filled in when we get more than one + // term. We can do this as when we have only one matching term we know + // the set of matching nodes is terms.front()->second. + // + // Use nodes_begin() and nodes_end() to get an iterator over the set as + // it handles the necessary switching between nodes and terms.front(). + NodeSet nodes; + + // Returns an iterator to the beginning of the matching nodes. See + // description of nodes for why this should be used over nodes.begin(). + NodeSet::const_iterator nodes_begin() const; + + // Returns an iterator to the beginning of the matching nodes. See + // description of nodes for why this should be used over nodes.begin(). + NodeSet::const_iterator nodes_end() const; + }; + + typedef std::vector<Match> Matches; + + // Add all the matching nodes in |match.nodes| to |results| until there are + // at most |max_count| matches in |results|. + void AddMatchToResults(const Match& match, + size_t max_count, + QueryParser* parser, + const std::vector<QueryNode*>& query_nodes, + std::vector<bookmark_utils::TitleMatch>* results); + + // Populates |matches| for the specified term. If |first_term| is true, this + // is the first term in the query. Returns true if there is at least one node + // matching the term. + bool GetBookmarksWithTitleMatchingTerm(const std::wstring& term, + bool first_term, + Matches* matches); + + // Iterates over |matches| updating each Match's nodes to contain the + // intersection of the Match's current nodes and the nodes at |index_i|. + // If the intersection is empty, the Match is removed. + // + // This is invoked from GetBookmarksWithTitleMatchingTerm. + void CombineMatchesInPlace(const Index::const_iterator& index_i, + Matches* matches); + + // Iterates over |current_matches| calculating the intersection between the + // Match's nodes and the nodes at |index_i|. If the intersection between the + // two is non-empty, a new match is added to |result|. + // + // This differs from CombineMatchesInPlace in that if the intersection is + // non-empty the result is added to result, not combined in place. This + // variant is used for prefix matching. + // + // This is invoked from GetBookmarksWithTitleMatchingTerm. + void CombineMatches(const Index::const_iterator& index_i, + const Matches& current_matches, + Matches* result); + + // Returns the set of query words from |query|. + std::vector<std::wstring> ExtractQueryWords(const std::wstring& query); + + // Adds |node| to |index_|. + void RegisterNode(const std::wstring& term, BookmarkNode* node); + + // Removes |node| from |index_|. + void UnregisterNode(const std::wstring& term, BookmarkNode* node); + + Index index_; + + DISALLOW_COPY_AND_ASSIGN(BookmarkIndex); +}; + +#endif // CHROME_BROWSER_BOOKMARKS_BOOKMARK_INDEX_H_ diff --git a/chrome/browser/bookmarks/bookmark_index_unittest.cc b/chrome/browser/bookmarks/bookmark_index_unittest.cc new file mode 100644 index 0000000..10f43f0 --- /dev/null +++ b/chrome/browser/bookmarks/bookmark_index_unittest.cc @@ -0,0 +1,202 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <string> +#include <vector> + +#include "base/string_util.h" +#include "chrome/browser/bookmarks/bookmark_index.h" +#include "chrome/browser/bookmarks/bookmark_model.h" +#include "chrome/browser/history/query_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + +class BookmarkIndexTest : public testing::Test { + public: + BookmarkIndexTest() : model_(new BookmarkModel(NULL)) {} + + void AddBookmarksWithTitles(const wchar_t** titles, size_t count) { + std::vector<std::wstring> title_vector; + for (size_t i = 0; i < count; ++i) + title_vector.push_back(titles[i]); + AddBookmarksWithTitles(title_vector); + } + + void AddBookmarksWithTitles(const std::vector<std::wstring>& titles) { + GURL url("about:blank"); + for (size_t i = 0; i < titles.size(); ++i) + model_->AddURL(model_->other_node(), static_cast<int>(i), titles[i], url); + } + + void ExpectMatches(const std::wstring& query, + const wchar_t** expected_titles, + size_t expected_count) { + std::vector<std::wstring> title_vector; + for (size_t i = 0; i < expected_count; ++i) + title_vector.push_back(expected_titles[i]); + ExpectMatches(query, title_vector); + } + + void ExpectMatches(const std::wstring& query, + const std::vector<std::wstring> expected_titles) { + std::vector<bookmark_utils::TitleMatch> matches; + model_->GetBookmarksWithTitlesMatching(query, 1000, &matches); + ASSERT_EQ(expected_titles.size(), matches.size()); + for (size_t i = 0; i < expected_titles.size(); ++i) { + bool found = false; + for (size_t j = 0; j < matches.size(); ++j) { + if (std::wstring(expected_titles[i]) == matches[j].node->GetTitle()) { + matches.erase(matches.begin() + j); + found = true; + break; + } + } + ASSERT_TRUE(found); + } + } + + void ExtractMatchPositions(const std::string& string, + Snippet::MatchPositions* matches) { + std::vector<std::string> match_strings; + SplitString(string, L':', &match_strings); + for (size_t i = 0; i < match_strings.size(); ++i) { + std::vector<std::string> chunks; + SplitString(match_strings[i], ',', &chunks); + ASSERT_EQ(2U, chunks.size()); + matches->push_back(Snippet::MatchPosition()); + matches->back().first = StringToInt(chunks[0]); + matches->back().second = StringToInt(chunks[1]); + } + } + + void ExpectMatchPositions(const std::wstring& query, + const Snippet::MatchPositions& expected_positions) { + std::vector<bookmark_utils::TitleMatch> matches; + model_->GetBookmarksWithTitlesMatching(query, 1000, &matches); + ASSERT_EQ(1U, matches.size()); + const bookmark_utils::TitleMatch& match = matches[0]; + ASSERT_EQ(expected_positions.size(), match.match_positions.size()); + for (size_t i = 0; i < expected_positions.size(); ++i) { + EXPECT_EQ(expected_positions[i].first, match.match_positions[i].first); + EXPECT_EQ(expected_positions[i].second, match.match_positions[i].second); + } + } + + + protected: + scoped_ptr<BookmarkModel> model_; + + private: + DISALLOW_COPY_AND_ASSIGN(BookmarkIndexTest); +}; + +// Various permutations with differing input, queries and output that exercises +// all query paths. +TEST_F(BookmarkIndexTest, Tests) { + struct TestData { + const std::wstring input; + const std::wstring query; + const std::wstring expected; + } data[] = { + // Trivial test case of only one term, exact match. + { L"a;b", L"A", L"a" }, + + // Prefix match, one term. + { L"abcd;abc;b", L"abc", L"abcd;abc" }, + + // Prefix match, multiple terms. + { L"abcd cdef;abcd;abcd cdefg", L"abc cde", L"abcd cdef;abcd cdefg"}, + + // Exact and prefix match. + { L"ab cdef;abcd;abcd cdefg", L"ab cdef", L"ab cdef"}, + + // Exact and prefix match. + { L"ab cdef ghij;ab;cde;cdef;ghi;cdef ab;ghij ab", + L"ab cde ghi", + L"ab cdef ghij"}, + + // Title with term multiple times. + { L"ab ab", L"ab", L"ab ab"}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { + std::vector<std::wstring> titles; + SplitString(data[i].input, L';', &titles); + AddBookmarksWithTitles(titles); + + std::vector<std::wstring> expected; + SplitString(data[i].expected, L';', &expected); + + ExpectMatches(data[i].query, expected); + + model_.reset(new BookmarkModel(NULL)); + } +} + +// Makes sure match positions are updated appropriately. +TEST_F(BookmarkIndexTest, MatchPositions) { + struct TestData { + const std::wstring title; + const std::wstring query; + const std::string expected; + } data[] = { + // Trivial test case of only one term, exact match. + { L"a", L"A", "0,1" }, + { L"foo bar", L"bar", "4,7" }, + { L"fooey bark", L"bar foo", "0,3:6,9"}, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { + std::vector<std::wstring> titles; + titles.push_back(data[i].title); + AddBookmarksWithTitles(titles); + + Snippet::MatchPositions expected_matches; + ExtractMatchPositions(data[i].expected, &expected_matches); + ExpectMatchPositions(data[i].query, expected_matches); + + model_.reset(new BookmarkModel(NULL)); + } +} + +// Makes sure index is updated when a node is removed. +TEST_F(BookmarkIndexTest, Remove) { + const wchar_t* input[] = { L"a", L"b" }; + AddBookmarksWithTitles(input, ARRAYSIZE_UNSAFE(input)); + + // Remove the node and make sure we don't get back any results. + model_->Remove(model_->other_node(), 0); + ExpectMatches(L"A", NULL, 0U); +} + +// Makes sure index is updated when a node's title is changed. +TEST_F(BookmarkIndexTest, ChangeTitle) { + const wchar_t* input[] = { L"a", L"b" }; + AddBookmarksWithTitles(input, ARRAYSIZE_UNSAFE(input)); + + // Remove the node and make sure we don't get back any results. + const wchar_t* expected[] = { L"blah" }; + model_->SetTitle(model_->other_node()->GetChild(0), L"blah"); + ExpectMatches(L"BlAh", expected, ARRAYSIZE_UNSAFE(expected)); +} + +// Makes sure no more than max queries is returned. +TEST_F(BookmarkIndexTest, HonorMax) { + const wchar_t* input[] = { L"abcd", L"abcde" }; + AddBookmarksWithTitles(input, ARRAYSIZE_UNSAFE(input)); + + std::vector<bookmark_utils::TitleMatch> matches; + model_->GetBookmarksWithTitlesMatching(L"ABc", 1, &matches); + EXPECT_EQ(1U, matches.size()); +} + +// Makes sure if the lower case string of a bookmark title is more characters +// than the upper case string no match positions are returned. +TEST_F(BookmarkIndexTest, EmptyMatchOnMultiwideLowercaseString) { + BookmarkNode* n1 = model_->AddURL(model_->other_node(), 0, L"\u0130 i", + GURL("http://www.google.com")); + + std::vector<bookmark_utils::TitleMatch> matches; + model_->GetBookmarksWithTitlesMatching(L"i", 100, &matches); + ASSERT_EQ(1U, matches.size()); + EXPECT_TRUE(matches[0].node == n1); + EXPECT_TRUE(matches[0].match_positions.empty()); +} diff --git a/chrome/browser/bookmarks/bookmark_model.cc b/chrome/browser/bookmarks/bookmark_model.cc index 209cc2b..2d0197c 100644 --- a/chrome/browser/bookmarks/bookmark_model.cc +++ b/chrome/browser/bookmarks/bookmark_model.cc @@ -213,8 +213,14 @@ void BookmarkModel::SetTitle(BookmarkNode* node, if (node->GetTitle() == title) return; + // The title index doesn't support changing the title, instead we remove then + // add it back. + index_.Remove(node); + node->SetTitle(title); + index_.Add(node); + if (store_.get()) store_->ScheduleSave(); @@ -372,6 +378,13 @@ void BookmarkModel::ResetDateGroupModified(BookmarkNode* node) { SetDateGroupModified(node, Time()); } +void BookmarkModel::GetBookmarksWithTitlesMatching( + const std::wstring& text, + size_t max_count, + std::vector<bookmark_utils::TitleMatch>* matches) { + index_.GetBookmarksWithTitlesMatching(text, max_count, matches); +} + void BookmarkModel::ClearStore() { if (profile_ && store_.get()) { NotificationService::current()->RemoveObserver( @@ -411,6 +424,8 @@ void BookmarkModel::RemoveNode(BookmarkNode* node, ++i; nodes_ordered_by_url_set_.erase(i); removed_urls->insert(node->GetURL()); + + index_.Remove(node); } CancelPendingFavIconLoadRequests(node); @@ -507,6 +522,8 @@ BookmarkNode* BookmarkModel::AddNode(BookmarkNode* parent, FOR_EACH_OBSERVER(BookmarkModelObserver, observers_, BookmarkNodeAdded(this, parent, index)); + index_.Add(node); + if (node->GetType() == history::StarredEntry::URL && !was_bookmarked) { history::URLsStarredDetails details(true); details.changed_urls.insert(node->GetURL()); diff --git a/chrome/browser/bookmarks/bookmark_model.h b/chrome/browser/bookmarks/bookmark_model.h index f605dec..8552e26 100644 --- a/chrome/browser/bookmarks/bookmark_model.h +++ b/chrome/browser/bookmarks/bookmark_model.h @@ -14,8 +14,10 @@ #include "base/lock.h" #include "base/observer_list.h" #include "base/waitable_event.h" +#include "chrome/browser/bookmarks/bookmark_index.h" #include "chrome/browser/bookmarks/bookmark_service.h" #include "chrome/browser/bookmarks/bookmark_storage.h" +#include "chrome/browser/bookmarks/bookmark_utils.h" #include "chrome/browser/cancelable_request.h" #include "chrome/browser/history/history.h" #include "chrome/browser/history/history_types.h" @@ -309,6 +311,11 @@ class BookmarkModel : public NotificationObserver, public BookmarkService { // combobox of most recently modified groups. void ResetDateGroupModified(BookmarkNode* node); + void GetBookmarksWithTitlesMatching( + const std::wstring& text, + size_t max_count, + std::vector<bookmark_utils::TitleMatch>* matches); + Profile* profile() const { return profile_; } // Sets the store to NULL, making it so the BookmarkModel does not persist @@ -437,6 +444,8 @@ class BookmarkModel : public NotificationObserver, public BookmarkService { // Reads/writes bookmarks to disk. scoped_refptr<BookmarkStorage> store_; + BookmarkIndex index_; + base::WaitableEvent loaded_signal_; DISALLOW_COPY_AND_ASSIGN(BookmarkModel); diff --git a/chrome/browser/bookmarks/bookmark_model_unittest.cc b/chrome/browser/bookmarks/bookmark_model_unittest.cc index e2f8571..0e297f4 100644 --- a/chrome/browser/bookmarks/bookmark_model_unittest.cc +++ b/chrome/browser/bookmarks/bookmark_model_unittest.cc @@ -338,28 +338,6 @@ TEST_F(BookmarkModelTest, MostRecentlyAddedEntries) { ASSERT_TRUE(n4 == recently_added[3]); } -// Makes sure GetBookmarksMatchingText works. -TEST_F(BookmarkModelTest, GetBookmarksMatchingText) { - // Add two urls with titles 'blah' and 'x' and one folder with the title - // 'blah'. - BookmarkNode* n1 = model.AddURL( - model.GetBookmarkBarNode(), 0, L"blah", GURL("http://foo.com/0")); - BookmarkNode* n2 = model.AddURL( - model.GetBookmarkBarNode(), 1, L"x", GURL("http://foo.com/1")); - model.AddGroup(model.GetBookmarkBarNode(), 2, L"blah"); - - // Make sure we don't get back the folder. - std::vector<bookmark_utils::TitleMatch> results; - bookmark_utils::GetBookmarksMatchingText(&model, L"blah", 2, &results); - ASSERT_EQ(1U, results.size()); - EXPECT_EQ(n1, results[0].node); - results.clear(); - - bookmark_utils::GetBookmarksMatchingText(&model, L"x", 2, &results); - ASSERT_EQ(1U, results.size()); - EXPECT_EQ(n2, results[0].node); -} - // Makes sure GetMostRecentlyAddedNodeForURL stays in sync. TEST_F(BookmarkModelTest, GetMostRecentlyAddedNodeForURL) { // Add a couple of nodes such that the following holds for the time of the diff --git a/chrome/browser/bookmarks/bookmark_storage.cc b/chrome/browser/bookmarks/bookmark_storage.cc index 7df18cd..e237d66 100644 --- a/chrome/browser/bookmarks/bookmark_storage.cc +++ b/chrome/browser/bookmarks/bookmark_storage.cc @@ -6,6 +6,7 @@ #include "base/compiler_specific.h" #include "base/file_util.h" +#include "base/histogram.h" #include "base/json_writer.h" #include "base/message_loop.h" #include "base/thread.h" @@ -19,6 +20,8 @@ #include "chrome/common/notification_source.h" #include "chrome/common/notification_type.h" +using base::TimeTicks; + namespace { // Extension used for backup files (copy of main file created during startup). @@ -193,8 +196,16 @@ void BookmarkStorage::OnLoadFinished(bool file_exists, const FilePath& path, return; if (root_value) { + TimeTicks start_time = TimeTicks::Now(); BookmarkCodec codec; codec.Decode(model_, *root_value); + UMA_HISTOGRAM_TIMES("Bookmarks.DecodeTime", + TimeTicks::Now() - start_time); + + start_time = TimeTicks::Now(); + AddBookmarksToIndex(model_->root_node()); + UMA_HISTOGRAM_TIMES("Bookmarks.CreatebookmarkIndexTime", + TimeTicks::Now() - start_time); } model_->DoneLoading(); @@ -246,3 +257,12 @@ void BookmarkStorage::RunTaskOnBackendThread(Task* task) const { delete task; } } + +void BookmarkStorage::AddBookmarksToIndex(BookmarkNode* node) { + if (node->is_url()) { + model_->index_.Add(node); + } else { + for (int i = 0; i < node->GetChildCount(); ++i) + AddBookmarksToIndex(node->GetChild(i)); + } +} diff --git a/chrome/browser/bookmarks/bookmark_storage.h b/chrome/browser/bookmarks/bookmark_storage.h index 61162eb..bb6c0cc 100644 --- a/chrome/browser/bookmarks/bookmark_storage.h +++ b/chrome/browser/bookmarks/bookmark_storage.h @@ -12,6 +12,7 @@ #include "chrome/common/notification_registrar.h" class BookmarkModel; +class BookmarkNode; class Profile; class Task; class Value; @@ -84,6 +85,9 @@ class BookmarkStorage : public NotificationObserver, // Returns the thread the backend is run on. const base::Thread* backend_thread() const { return backend_thread_; } + // Adds node to the model's index, recursing through all children as well. + void AddBookmarksToIndex(BookmarkNode* node); + // Keep the pointer to profile, we may need it for migration from history. Profile* profile_; diff --git a/chrome/browser/bookmarks/bookmark_utils.cc b/chrome/browser/bookmarks/bookmark_utils.cc index 8082f16..7cedca3 100644 --- a/chrome/browser/bookmarks/bookmark_utils.cc +++ b/chrome/browser/bookmarks/bookmark_utils.cc @@ -471,32 +471,6 @@ void GetMostRecentlyAddedEntries(BookmarkModel* model, } } -void GetBookmarksMatchingText(BookmarkModel* model, - const std::wstring& text, - size_t max_count, - std::vector<TitleMatch>* matches) { - QueryParser parser; - ScopedVector<QueryNode> query_nodes; - parser.ParseQuery(text, &query_nodes.get()); - if (query_nodes.empty()) - return; - - TreeNodeIterator<BookmarkNode> iterator(model->root_node()); - Snippet::MatchPositions match_position; - while (iterator.has_next()) { - BookmarkNode* node = iterator.Next(); - if (node->is_url() && - parser.DoesQueryMatch(node->GetTitle(), query_nodes.get(), - &match_position)) { - matches->push_back(TitleMatch()); - matches->back().node = node; - matches->back().match_positions.swap(match_position); - if (matches->size() == max_count) - break; - } - } -} - bool MoreRecentlyAdded(BookmarkNode* n1, BookmarkNode* n2) { return n1->date_added() > n2->date_added(); } diff --git a/chrome/browser/bookmarks/bookmark_utils.h b/chrome/browser/bookmarks/bookmark_utils.h index 81bdb84..7be4aad 100644 --- a/chrome/browser/bookmarks/bookmark_utils.h +++ b/chrome/browser/bookmarks/bookmark_utils.h @@ -123,13 +123,6 @@ struct TitleMatch { Snippet::MatchPositions match_positions; }; -// Returns the bookmarks whose title contains text. At most |max_count| -// matches are returned in |matches|. -void GetBookmarksMatchingText(BookmarkModel* model, - const std::wstring& text, - size_t max_count, - std::vector<TitleMatch>* matches); - // Returns true if |n1| was added more recently than |n2|. bool MoreRecentlyAdded(BookmarkNode* n1, BookmarkNode* n2); diff --git a/chrome/browser/bookmarks/bookmark_utils_unittest.cc b/chrome/browser/bookmarks/bookmark_utils_unittest.cc index b60bd45..a97a7ec 100644 --- a/chrome/browser/bookmarks/bookmark_utils_unittest.cc +++ b/chrome/browser/bookmarks/bookmark_utils_unittest.cc @@ -39,18 +39,3 @@ TEST_F(BookmarkUtilsTest, GetBookmarksContainingText) { EXPECT_TRUE(bookmark_utils::DoesBookmarkContainText(n1, L"foo bar")); nodes.clear(); } - -// Makes sure if the lower case string of a bookmark title is more characters -// than the upper case string no match positions are returned. -TEST_F(BookmarkUtilsTest, EmptyMatchOnMultiwideLowercaseString) { - BookmarkModel model(NULL); - BookmarkNode* n1 = - model.AddURL(model.other_node(), 0, L"\u0130 i", - GURL("http://www.google.com")); - - std::vector<bookmark_utils::TitleMatch> matches; - bookmark_utils::GetBookmarksMatchingText(&model, L"i", 100, &matches); - ASSERT_EQ(1U, matches.size()); - EXPECT_TRUE(matches[0].node == n1); - EXPECT_TRUE(matches[0].match_positions.empty()); -} diff --git a/chrome/browser/browser.vcproj b/chrome/browser/browser.vcproj index 6ba37de..7e0c503 100644 --- a/chrome/browser/browser.vcproj +++ b/chrome/browser/browser.vcproj @@ -634,6 +634,14 @@ > </File> <File + RelativePath=".\bookmarks\bookmark_index.cc" + > + </File> + <File + RelativePath=".\bookmarks\bookmark_index.h" + > + </File> + <File RelativePath=".\bookmarks\bookmark_menu_controller_win.cc" > </File> diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc index 39754ce..f3c4063 100644 --- a/chrome/browser/history/query_parser.cc +++ b/chrome/browser/history/query_parser.cc @@ -56,25 +56,6 @@ void CoalseAndSortMatchPositions(Snippet::MatchPositions* matches) { CoalesceMatchesFrom(i, matches); } -// For CJK ideographs and Korean Hangul, even a single character -// can be useful in prefix matching, but that may give us too many -// false positives. Moreover, the current ICU word breaker gives us -// back every single Chinese character as a word so that there's no -// point doing anything for them and we only adjust the minimum length -// to 2 for Korean Hangul while using 3 for others. This is a temporary -// hack until we have a segmentation support. -inline bool IsWordLongEnoughForPrefixSearch(const std::wstring& word) -{ - DCHECK(word.size() > 0); - size_t minimum_length = 3; - // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) - // because they 'behave like' Latin letters. Moreover, we should - // normalize the former before reaching here. - if (0xAC00 <= word[0] && word[0] <= 0xD7A3) - minimum_length = 2; - return word.size() >= minimum_length; -} - } // namespace // Inheritance structure: @@ -119,7 +100,7 @@ bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words, } bool QueryNodeWord::Matches(const std::wstring& word, bool exact) const { - if (exact || !IsWordLongEnoughForPrefixSearch(word_)) + if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) return word == word_; return word.size() >= word_.size() && (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); @@ -133,7 +114,7 @@ int QueryNodeWord::AppendToSQLiteQuery(std::wstring* query) const { query->append(word_); // Use prefix search if we're not literal and long enough. - if (!literal_ && IsWordLongEnoughForPrefixSearch(word_)) + if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) *query += L'*'; return 1; } @@ -260,6 +241,18 @@ bool QueryNodePhrase::HasMatchIn( QueryParser::QueryParser() { } +// static +bool QueryParser::IsWordLongEnoughForPrefixSearch(const std::wstring& word) { + DCHECK(word.size() > 0); + size_t minimum_length = 3; + // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) + // because they 'behave like' Latin letters. Moreover, we should + // normalize the former before reaching here. + if (0xAC00 <= word[0] && word[0] <= 0xD7A3) + minimum_length = 2; + return word.size() >= minimum_length; +} + // Returns true if the character is considered a quote. static bool IsQueryQuote(wchar_t ch) { return ch == '"' || diff --git a/chrome/browser/history/query_parser.h b/chrome/browser/history/query_parser.h index 1961d6e..a29548a8 100644 --- a/chrome/browser/history/query_parser.h +++ b/chrome/browser/history/query_parser.h @@ -58,6 +58,15 @@ class QueryParser { public: QueryParser(); + // For CJK ideographs and Korean Hangul, even a single character + // can be useful in prefix matching, but that may give us too many + // false positives. Moreover, the current ICU word breaker gives us + // back every single Chinese character as a word so that there's no + // point doing anything for them and we only adjust the minimum length + // to 2 for Korean Hangul while using 3 for others. This is a temporary + // hack until we have a segmentation support. + static bool IsWordLongEnoughForPrefixSearch(const std::wstring& word); + // Parse a query into a SQLite query. The resulting query is placed in // sqlite_query and the number of words is returned. int ParseQuery(const std::wstring& query, |