diff options
author | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-13 14:06:21 +0000 |
---|---|---|
committer | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-13 14:06:21 +0000 |
commit | 38a5aafb705cd127c55b3a30baa5152d52195c7c (patch) | |
tree | 894264305b2d942c2ad639c9c65bfd4d7ccd267f | |
parent | ed5e6138e9a6485b5a74431c148661aa541cd501 (diff) | |
download | chromium_src-38a5aafb705cd127c55b3a30baa5152d52195c7c.zip chromium_src-38a5aafb705cd127c55b3a30baa5152d52195c7c.tar.gz chromium_src-38a5aafb705cd127c55b3a30baa5152d52195c7c.tar.bz2 |
HQP Refactoring (in Preparation for SQLite Cache)
1. Move ownership of the InMemoryURLIndex from the InMemoryHistoryBackend to the HistoryBackend where it truly belongs.
2. Encapsulate the private, persistent data for the InMemoryURLIndex in a new class, URLIndexPrivateData.
3. Handle (by notification) URL visits, updates and deletes. Refactor use of NOTIFICATION_HISTORY_URLS_DELETED to provide the deleted URLRow so that row ID is available.
4. Correctly handle the adding and removing of page title words when a URL change is detected.
5. Move most of the support types, including the new URLIndexPrivateData class, into a new file, in_memory_url_index_types.h.
6. Replace static class member functions with non-friend, non-class functions for better flexibility.
7. Move convenience types out from InMemoryURLIndex class up into history namespace.
8. Rename convenience types to generalize their intent.
9. Other small cleanups.
BUG=96731,92718
TEST=Unit tests updated.
TBR=atwilson,brettw
Review URL: http://codereview.chromium.org/8120004
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@105300 0039d316-1c4b-4281-b951-d872f2087c98
26 files changed, 987 insertions, 691 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc index 3cfe87f..5ca8ecf 100644 --- a/chrome/browser/autocomplete/autocomplete.cc +++ b/chrome/browser/autocomplete/autocomplete.cc @@ -504,6 +504,8 @@ void AutocompleteProvider::Stop() { } void AutocompleteProvider::DeleteMatch(const AutocompleteMatch& match) { + DLOG(WARNING) << "The AutocompleteProvider '" << name() + << "' has not implemented DeleteMatch."; } AutocompleteProvider::~AutocompleteProvider() { diff --git a/chrome/browser/autocomplete/history_provider.cc b/chrome/browser/autocomplete/history_provider.cc index 9fe0754..8140aea 100644 --- a/chrome/browser/autocomplete/history_provider.cc +++ b/chrome/browser/autocomplete/history_provider.cc @@ -31,17 +31,16 @@ void HistoryProvider::DeleteMatch(const AutocompleteMatch& match) { profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); // Delete the match from the history DB. - GURL selected_url(match.destination_url); - if (!history_service || !selected_url.is_valid()) { - NOTREACHED() << "Can't delete requested URL"; - return; - } - history_service->DeleteURL(selected_url); + DCHECK(history_service); + DCHECK(match.destination_url.is_valid()); + history_service->DeleteURL(match.destination_url); + DeleteMatchFromMatches(match); +} - // Delete the match from the current set of matches. +void HistoryProvider::DeleteMatchFromMatches(const AutocompleteMatch& match) { bool found = false; for (ACMatches::iterator i(matches_.begin()); i != matches_.end(); ++i) { - if (i->destination_url == selected_url && i->type == match.type) { + if (i->destination_url == match.destination_url && i->type == match.type) { found = true; if (i->is_history_what_you_typed_match || i->starred) { // We can't get rid of What-You-Typed or Bookmarked matches, diff --git a/chrome/browser/autocomplete/history_provider.h b/chrome/browser/autocomplete/history_provider.h index 231a7b1..6cc0f80 100644 --- a/chrome/browser/autocomplete/history_provider.h +++ b/chrome/browser/autocomplete/history_provider.h @@ -54,6 +54,10 @@ class HistoryProvider : public AutocompleteProvider { // |input.prevent_inline_autocomplete()| is true, or the input text contains // trailing whitespace. bool PreventInlineAutocomplete(const AutocompleteInput& input); + + // Finds and removes the match from the current collection of matches and + // backing data. + void DeleteMatchFromMatches(const AutocompleteMatch& match); }; #endif // CHROME_BROWSER_AUTOCOMPLETE_HISTORY_PROVIDER_H_ diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc index ae42585..8b53a02 100644 --- a/chrome/browser/autocomplete/history_quick_provider.cc +++ b/chrome/browser/autocomplete/history_quick_provider.cc @@ -16,6 +16,7 @@ #include "base/utf_string_conversions.h" #include "chrome/browser/history/history.h" #include "chrome/browser/history/in_memory_url_index.h" +#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/net/url_fixer_upper.h" #include "chrome/browser/prefs/pref_service.h" #include "chrome/browser/profiles/profile.h" @@ -81,8 +82,7 @@ void HistoryQuickProvider::Start(const AutocompleteInput& input, } } -// HistoryQuickProvider matches are currently not deletable. -// TODO(mrossetti): Determine when a match should be deletable. +// TODO(mrossetti): Implement this function. (Will happen in next CL.) void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) {} void HistoryQuickProvider::DoAutocomplete() { @@ -90,8 +90,8 @@ void HistoryQuickProvider::DoAutocomplete() { string16 term_string = autocomplete_input_.text(); term_string = net::UnescapeURLComponent(term_string, UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); - history::InMemoryURLIndex::String16Vector terms( - InMemoryURLIndex::WordVectorFromString16(term_string, false)); + history::String16Vector terms( + history::String16VectorFromString16(term_string, false)); ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms(terms); if (matches.empty()) return; @@ -131,13 +131,12 @@ AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch( // Format the URL autocomplete presentation. std::vector<size_t> offsets = - InMemoryURLIndex::OffsetsFromTermMatches(history_match.url_matches); + history::OffsetsFromTermMatches(history_match.url_matches); match.contents = net::FormatUrlWithOffsets(info.url(), languages_, net::kFormatUrlOmitAll, UnescapeRule::SPACES, NULL, NULL, &offsets); history::TermMatches new_matches = - InMemoryURLIndex::ReplaceOffsetsInTermMatches(history_match.url_matches, - offsets); + ReplaceOffsetsInTermMatches(history_match.url_matches, offsets); match.contents_class = SpansFromTermMatch(new_matches, match.contents.length(), true); match.fill_into_edit = match.contents; @@ -170,7 +169,8 @@ history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() { return history_service->InMemoryIndex(); } -void HistoryQuickProvider::SetIndexForTesting( +// TODO(mrossetti): This will be inlined in the next CL. +void HistoryQuickProvider::set_index( history::InMemoryURLIndex* index) { DCHECK(index); index_for_testing_.reset(index); diff --git a/chrome/browser/autocomplete/history_quick_provider.h b/chrome/browser/autocomplete/history_quick_provider.h index 8264669..4beac05 100644 --- a/chrome/browser/autocomplete/history_quick_provider.h +++ b/chrome/browser/autocomplete/history_quick_provider.h @@ -37,9 +37,6 @@ class HistoryQuickProvider : public HistoryProvider { virtual void DeleteMatch(const AutocompleteMatch& match) OVERRIDE; - // Performs the autocomplete matching and scoring. - void DoAutocomplete(); - // Disable this provider. For unit testing purposes only. This is required // because this provider is closely associated with the HistoryURLProvider // and in order to properly test the latter the HistoryQuickProvider must @@ -52,6 +49,9 @@ class HistoryQuickProvider : public HistoryProvider { FRIEND_TEST_ALL_PREFIXES(HistoryQuickProviderTest, Spans); FRIEND_TEST_ALL_PREFIXES(HistoryQuickProviderTest, Relevance); + // Performs the autocomplete matching and scoring. + void DoAutocomplete(); + // Creates an AutocompleteMatch from |history_match|. |max_match_score| gives // the maximum possible score for the match. |history_matches| is the full set // of matches to compare each match to when calculating confidence. @@ -81,7 +81,8 @@ class HistoryQuickProvider : public HistoryProvider { bool is_url); // Only for use in unittests. Takes ownership of |index|. - void SetIndexForTesting(history::InMemoryURLIndex* index); + void set_index(history::InMemoryURLIndex* index); + AutocompleteInput autocomplete_input_; std::string languages_; diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc index f936860..65b7654 100644 --- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc +++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc @@ -142,6 +142,13 @@ void HistoryQuickProviderTest::OnProviderUpdate(bool updated_matches) { void HistoryQuickProviderTest::FillData() { history::URLDatabase* db = history_service_->InMemoryDatabase(); ASSERT_TRUE(db != NULL); + + history::InMemoryURLIndex* index = + new history::InMemoryURLIndex(profile_.get(), + FilePath(FILE_PATH_LITERAL("/dummy"))); + PrefService* prefs = profile_->GetPrefs(); + std::string languages(prefs->GetString(prefs::kAcceptLanguages)); + index->Init(db, languages); for (size_t i = 0; i < arraysize(quick_test_db); ++i) { const TestURLInfo& cur = quick_test_db[i]; const GURL current_url(cur.url); @@ -153,20 +160,11 @@ void HistoryQuickProviderTest::FillData() { url_info.set_typed_count(cur.typed_count); url_info.set_last_visit(visit_time); url_info.set_hidden(false); - EXPECT_TRUE(db->AddURL(url_info)); - - history_service_->AddPageWithDetails(current_url, UTF8ToUTF16(cur.title), - cur.visit_count, cur.typed_count, - visit_time, false, - history::SOURCE_BROWSED); + url_info.set_id(i); + index->UpdateURL(url_info); } - history::InMemoryURLIndex* index = - new history::InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))); - PrefService* prefs = profile_->GetPrefs(); - std::string languages(prefs->GetString(prefs::kAcceptLanguages)); - index->Init(db, languages); - provider_->SetIndexForTesting(index); + provider_->set_index(index); } HistoryQuickProviderTest::SetShouldContain::SetShouldContain( diff --git a/chrome/browser/history/expire_history_backend.cc b/chrome/browser/history/expire_history_backend.cc index 94ed1ff..904503a 100644 --- a/chrome/browser/history/expire_history_backend.cc +++ b/chrome/browser/history/expire_history_backend.cc @@ -341,11 +341,9 @@ void ExpireHistoryBackend::BroadcastDeleteNotifications( // determine if they care whether anything was deleted). URLsDeletedDetails* deleted_details = new URLsDeletedDetails; deleted_details->all_history = false; - std::vector<URLRow> typed_urls_changed; // Collect this for later. for (size_t i = 0; i < dependencies->deleted_urls.size(); i++) { + deleted_details->rows.push_back(dependencies->deleted_urls[i]); deleted_details->urls.insert(dependencies->deleted_urls[i].url()); - if (dependencies->deleted_urls[i].typed_count() > 0) - typed_urls_changed.push_back(dependencies->deleted_urls[i]); } delegate_->BroadcastNotifications( chrome::NOTIFICATION_HISTORY_URLS_DELETED, deleted_details); diff --git a/chrome/browser/history/history.cc b/chrome/browser/history/history.cc index 0465e8d..9451fb5 100644 --- a/chrome/browser/history/history.cc +++ b/chrome/browser/history/history.cc @@ -37,6 +37,7 @@ #include "chrome/browser/history/history_types.h" #include "chrome/browser/history/in_memory_database.h" #include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/in_memory_url_index.h" #include "chrome/browser/history/top_sites.h" #include "chrome/browser/prefs/pref_service.h" #include "chrome/browser/profiles/profile.h" @@ -182,6 +183,9 @@ void HistoryService::UnloadBackend() { // Get rid of the in-memory backend. in_memory_backend_.reset(); + // Give the InMemoryURLIndex a chance to shutdown. + InMemoryIndex()->ShutDown(); + // The backend's destructor must run on the history thread since it is not // threadsafe. So this thread must not be the last thread holding a reference // to the backend, or a crash could happen. @@ -249,8 +253,8 @@ history::InMemoryURLIndex* HistoryService::InMemoryIndex() { // LoadBackendIfNecessary() here even though it won't affect the return value // for this call. LoadBackendIfNecessary(); - if (in_memory_backend_.get()) - return in_memory_backend_->InMemoryIndex(); + if (history_backend_.get()) + return history_backend_->InMemoryIndex(); return NULL; } @@ -803,7 +807,8 @@ void HistoryService::LoadBackendIfNecessary() { ++current_backend_id_; scoped_refptr<HistoryBackend> backend( - new HistoryBackend(history_dir_, + new HistoryBackend(profile_, + history_dir_, current_backend_id_, new BackendDelegate(this, profile_), bookmark_service_)); diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc index f942672..a1a8e01 100644 --- a/chrome/browser/history/history_backend.cc +++ b/chrome/browser/history/history_backend.cc @@ -23,10 +23,13 @@ #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/history_publisher.h" #include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/in_memory_url_index.h" #include "chrome/browser/history/page_usage_data.h" #include "chrome/browser/history/top_sites.h" +#include "chrome/browser/profiles/profile.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_notification_types.h" +#include "chrome/common/chrome_switches.h" #include "chrome/common/url_constants.h" #include "content/browser/cancelable_request.h" #include "content/browser/download/download_persistent_store_info.h" @@ -199,7 +202,8 @@ class HistoryBackend::URLQuerier { // HistoryBackend -------------------------------------------------------------- -HistoryBackend::HistoryBackend(const FilePath& history_dir, +HistoryBackend::HistoryBackend(Profile* profile, + const FilePath& history_dir, int id, Delegate* delegate, BookmarkService* bookmark_service) @@ -212,6 +216,9 @@ HistoryBackend::HistoryBackend(const FilePath& history_dir, backend_destroy_task_(NULL), segment_queried_(false), bookmark_service_(bookmark_service) { + if (!CommandLine::ForCurrentProcess()->HasSwitch( + switches::kDisableHistoryQuickProvider)) + in_memory_url_index_.reset(new InMemoryURLIndex(profile, history_dir_)); } HistoryBackend::~HistoryBackend() { @@ -630,6 +637,9 @@ void HistoryBackend::InitImpl(const std::string& languages) { archived_db_.reset(); } + if (in_memory_url_index_.get()) + in_memory_url_index_->Init(db_.get(), languages); + // Tell the expiration module about all the nice databases we made. This must // happen before db_->Init() is called since the callback ForceArchiveHistory // may need to expire stuff. @@ -777,7 +787,6 @@ void HistoryBackend::AddPagesWithDetails(const std::vector<URLRow>& urls, NOTREACHED() << "Could not add row to DB"; return; } - if (i->typed_count() > 0) modified->changed_urls.push_back(*i); } @@ -864,6 +873,7 @@ void HistoryBackend::SetPageTitle(const GURL& url, if (row_id && row.title() != title) { row.set_title(title); db_->UpdateURLRow(row_id, row); + row.id_ = row_id; changed_urls.push_back(row); if (row.typed_count() > 0) typed_url_changed = true; diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h index 9936782..6c93070 100644 --- a/chrome/browser/history/history_backend.h +++ b/chrome/browser/history/history_backend.h @@ -27,6 +27,7 @@ class BookmarkService; struct DownloadPersistentStoreInfo; +class Profile; class TestingProfile; struct ThumbnailScore; @@ -104,7 +105,8 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // may be NULL. // // This constructor is fast and does no I/O, so can be called at any time. - HistoryBackend(const FilePath& history_dir, + HistoryBackend(Profile* profile, + const FilePath& history_dir, int id, Delegate* delegate, BookmarkService* bookmark_service); @@ -113,9 +115,9 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // fails, all other functions will fail as well. (Since this runs on another // thread, we don't bother returning failure.) // - // |languages| gives a list of language encodings with which the history - // URLs and omnibox searches are interpreted. - // |force_fail| can be set during unittests to unconditionally fail to init. + // |languages| gives the languages used to break search terms and history + // page titles into separate words. |force_fail| can be set during unittests + // to unconditionally fail to init. void Init(const std::string& languages, bool force_fail); // Notification that the history system is shutting down. This will break @@ -257,6 +259,13 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, const base::Time remove_end); void RemoveDownloads(const base::Time remove_end); + // InMemoryURLIndex ---------------------------------------------------------- + + // Returns the quick history index. + history::InMemoryURLIndex* InMemoryIndex() const { + return in_memory_url_index_.get(); + } + // Segment usage ------------------------------------------------------------- void QuerySegmentUsage(scoped_refptr<QuerySegmentUsageRequest> request, @@ -569,6 +578,9 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // created. scoped_ptr<TextDatabaseManager> text_database_; + // The index used for quick history lookups. + scoped_ptr<history::InMemoryURLIndex> in_memory_url_index_; + // Manages expiration between the various databases. ExpireHistoryBackend expirer_; diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc index 2443d8c1..f7dac5c 100644 --- a/chrome/browser/history/history_backend_unittest.cc +++ b/chrome/browser/history/history_backend_unittest.cc @@ -19,6 +19,7 @@ #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/in_memory_database.h" #include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/in_memory_url_index.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_paths.h" #include "chrome/common/thumbnail_score.h" @@ -146,7 +147,6 @@ class HistoryBackendTest : public testing::Test { BookmarkModel bookmark_model_; - protected: bool loaded_; private: @@ -157,7 +157,8 @@ class HistoryBackendTest : public testing::Test { if (!file_util::CreateNewTempDirectory(FILE_PATH_LITERAL("BackendTest"), &test_dir_)) return; - backend_ = new HistoryBackend(test_dir_, + backend_ = new HistoryBackend(NULL, + test_dir_, 0, new HistoryBackendTestDelegate(this), &bookmark_model_); @@ -955,7 +956,8 @@ TEST_F(HistoryBackendTest, MigrationVisitSource) { FilePath new_history_file = new_history_path.Append(chrome::kHistoryFilename); ASSERT_TRUE(file_util::CopyFile(old_history_path, new_history_file)); - backend_ = new HistoryBackend(new_history_path, + backend_ = new HistoryBackend(NULL, + new_history_path, 0, new HistoryBackendTestDelegate(this), &bookmark_model_); diff --git a/chrome/browser/history/history_notifications.h b/chrome/browser/history/history_notifications.h index ab9d555..a298536 100644 --- a/chrome/browser/history/history_notifications.h +++ b/chrome/browser/history/history_notifications.h @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -25,7 +25,7 @@ struct HistoryDetails { virtual ~HistoryDetails() {} }; -// Details for HISTORY_URL_VISITED. +// Details for NOTIFICATION_HISTORY_URL_VISITED. struct URLVisitedDetails : public HistoryDetails { URLVisitedDetails(); virtual ~URLVisitedDetails(); @@ -40,7 +40,7 @@ struct URLVisitedDetails : public HistoryDetails { history::RedirectList redirects; }; -// Details for NOTIFY_HISTORY_TYPED_URLS_MODIFIED. +// Details for NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED. struct URLsModifiedDetails : public HistoryDetails { URLsModifiedDetails(); virtual ~URLsModifiedDetails(); @@ -49,7 +49,7 @@ struct URLsModifiedDetails : public HistoryDetails { std::vector<URLRow> changed_urls; }; -// Details for NOTIFY_HISTORY_URLS_DELETED. +// Details for NOTIFICATION_HISTORY_URLS_DELETED. struct URLsDeletedDetails : public HistoryDetails { URLsDeletedDetails(); virtual ~URLsDeletedDetails(); @@ -57,9 +57,14 @@ struct URLsDeletedDetails : public HistoryDetails { // Set when all history was deleted. False means just a subset was deleted. bool all_history; + // The URLRows which have been deleted. + std::vector<URLRow> rows; + // The list of unique URLs affected. This is valid only when a subset of // history is deleted. When all of it is deleted, this will be empty, since - // we do not bother to list all URLs. + // we do not bother to list all URLs. (This information can be gleaned from + // |rows| but, since there are several clients who need the set, we pre-build + // it so that the clients don't have to.) std::set<GURL> urls; }; diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h index 6d13264..6898d8c 100644 --- a/chrome/browser/history/history_types.h +++ b/chrome/browser/history/history_types.h @@ -74,6 +74,7 @@ class URLRow { URLRow& operator=(const URLRow& other); URLID id() const { return id_; } + void set_id(URLID id) { id_ = id; } // Note: Use for unittesting only. const GURL& url() const { return url_; } const string16& title() const { diff --git a/chrome/browser/history/history_unittest.cc b/chrome/browser/history/history_unittest.cc index fa15325..c219735 100644 --- a/chrome/browser/history/history_unittest.cc +++ b/chrome/browser/history/history_unittest.cc @@ -119,8 +119,8 @@ class HistoryTest : public testing::Test { // Creates the HistoryBackend and HistoryDatabase on the current thread, // assigning the values to backend_ and db_. void CreateBackendAndDatabase() { - backend_ = - new HistoryBackend(history_dir_, 0, new BackendDelegate(this), NULL); + backend_ = new HistoryBackend(NULL, history_dir_, 0, + new BackendDelegate(this), NULL); backend_->Init(std::string(), false); db_ = backend_->db_.get(); DCHECK(in_mem_backend_.get()) << "Mem backend should have been set by " diff --git a/chrome/browser/history/in_memory_history_backend.cc b/chrome/browser/history/in_memory_history_backend.cc index 6652b1a..073fc8d 100644 --- a/chrome/browser/history/in_memory_history_backend.cc +++ b/chrome/browser/history/in_memory_history_backend.cc @@ -13,11 +13,9 @@ #include "chrome/browser/browser_process.h" #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/in_memory_database.h" -#include "chrome/browser/history/in_memory_url_index.h" #include "chrome/browser/history/url_database.h" #include "chrome/browser/profiles/profile.h" #include "chrome/common/chrome_notification_types.h" -#include "chrome/common/chrome_switches.h" #include "content/common/notification_details.h" #include "content/common/notification_source.h" @@ -27,23 +25,14 @@ InMemoryHistoryBackend::InMemoryHistoryBackend() : profile_(NULL) { } -InMemoryHistoryBackend::~InMemoryHistoryBackend() { - if (index_.get()) - index_->ShutDown(); -} +InMemoryHistoryBackend::~InMemoryHistoryBackend() {} bool InMemoryHistoryBackend::Init(const FilePath& history_filename, const FilePath& history_dir, URLDatabase* db, const std::string& languages) { db_.reset(new InMemoryDatabase); - bool success = db_->InitFromDisk(history_filename); - if (!CommandLine::ForCurrentProcess()->HasSwitch( - switches::kDisableHistoryQuickProvider)) { - index_.reset(new InMemoryURLIndex(history_dir)); - index_->Init(db, languages); - } - return success; + return db_->InitFromDisk(history_filename); } void InMemoryHistoryBackend::AttachToHistoryService(Profile* profile) { @@ -129,36 +118,25 @@ void InMemoryHistoryBackend::OnTypedURLsModified( db_->UpdateURLRow(id, *i); else id = db_->AddURL(*i); - if (index_.get()) - index_->UpdateURL(id, *i); } } void InMemoryHistoryBackend::OnURLsDeleted(const URLsDeletedDetails& details) { - DCHECK(db_.get()); - if (details.all_history) { // When all history is deleted, the individual URLs won't be listed. Just // create a new database to quickly clear everything out. db_.reset(new InMemoryDatabase); if (!db_->InitFromScratch()) db_.reset(); - if (index_.get()) - index_->ReloadFromHistory(db_.get(), true); return; } // Delete all matching URLs in our database. - for (std::set<GURL>::const_iterator i = details.urls.begin(); - i != details.urls.end(); ++i) { - URLID id = db_->GetRowForURL(*i, NULL); - if (id) { - // We typically won't have most of them since we only have a subset of - // history, so ignore errors. - db_->DeleteURLRow(id); - if (index_.get()) - index_->DeleteURL(id); - } + for (std::vector<URLRow>::const_iterator row = details.rows.begin(); + row != details.rows.end(); ++row) { + // We typically won't have most of them since we only have a subset of + // history, so ignore errors. + db_->DeleteURLRow(row->id()); } } diff --git a/chrome/browser/history/in_memory_history_backend.h b/chrome/browser/history/in_memory_history_backend.h index 13bf030..bd22d6d 100644 --- a/chrome/browser/history/in_memory_history_backend.h +++ b/chrome/browser/history/in_memory_history_backend.h @@ -70,9 +70,6 @@ class InMemoryHistoryBackend : public NotificationObserver { const NotificationSource& source, const NotificationDetails& details); - // Return the quick history index. - history::InMemoryURLIndex* InMemoryIndex() const { return index_.get(); } - private: FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, DeleteAll); @@ -96,9 +93,6 @@ class InMemoryHistoryBackend : public NotificationObserver { // initialization. Profile* profile_; - // The index used for quick history lookups. - scoped_ptr<history::InMemoryURLIndex> index_; - DISALLOW_COPY_AND_ASSIGN(InMemoryHistoryBackend); }; diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc index c557acb..344995c 100644 --- a/chrome/browser/history/in_memory_url_index.cc +++ b/chrome/browser/history/in_memory_url_index.cc @@ -11,18 +11,20 @@ #include <numeric> #include "base/file_util.h" -#include "base/i18n/break_iterator.h" #include "base/i18n/case_conversion.h" #include "base/metrics/histogram.h" -#include "base/string_util.h" #include "base/threading/thread_restrictions.h" #include "base/time.h" #include "base/utf_string_conversions.h" #include "chrome/browser/autocomplete/autocomplete.h" #include "chrome/browser/autocomplete/history_provider_util.h" +#include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/url_database.h" #include "chrome/browser/profiles/profile.h" +#include "chrome/common/chrome_notification_types.h" #include "chrome/common/url_constants.h" +#include "content/common/notification_details.h" +#include "content/common/notification_source.h" #include "googleurl/src/url_parse.h" #include "googleurl/src/url_util.h" #include "net/base/escape.h" @@ -59,23 +61,6 @@ const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; // each of these scores for each factor. const int kScoreRank[] = { 1425, 1200, 900, 400 }; -ScoredHistoryMatch::ScoredHistoryMatch() - : raw_score(0), - can_inline(false) {} - -ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) - : HistoryMatch(url_info, 0, false, false), - raw_score(0), - can_inline(false) {} - -ScoredHistoryMatch::~ScoredHistoryMatch() {} - -// Comparison function for sorting ScoredMatches by their scores. -bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, - const ScoredHistoryMatch& m2) { - return m1.raw_score >= m2.raw_score; -} - InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( const WordIDSet& word_id_set, const HistoryIDSet& history_id_set) @@ -88,11 +73,6 @@ InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} -// Comparison function for sorting TermMatches by their offsets. -bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { - return m1.offset < m2.offset; -} - // Comparison function for sorting search terms by descending length. bool LengthGreater(const string16& string_a, const string16& string_b) { return string_a.length() > string_b.length(); @@ -135,15 +115,23 @@ int ScoreForValue(int value, const int* value_ranks) { return score; } -InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) +InMemoryURLIndex::InMemoryURLIndex(Profile* profile, + const FilePath& history_dir) : history_dir_(history_dir), - history_item_count_(0) { + private_data_(new URLIndexPrivateData) { InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); + if (profile) { + Source<Profile> source(profile); + registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source); + registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED, + source); + registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source); + } } // Called only by unit tests. InMemoryURLIndex::InMemoryURLIndex() - : history_item_count_(0) { + : private_data_(new URLIndexPrivateData) { InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); } @@ -164,7 +152,7 @@ void InMemoryURLIndex::InitializeSchemeWhitelist( // Indexing -bool InMemoryURLIndex::Init(history::URLDatabase* history_db, +bool InMemoryURLIndex::Init(URLDatabase* history_db, const std::string& languages) { // TODO(mrossetti): Register for profile/language change notifications. languages_ = languages; @@ -172,10 +160,52 @@ bool InMemoryURLIndex::Init(history::URLDatabase* history_db, } void InMemoryURLIndex::ShutDown() { + registrar_.RemoveAll(); // Write our cache. SaveToCacheFile(); } +void InMemoryURLIndex::Observe(int type, + const NotificationSource& source, + const NotificationDetails& details) { + switch (type) { + case chrome::NOTIFICATION_HISTORY_URL_VISITED: + OnURLVisited(Details<URLVisitedDetails>(details).ptr()); + break; + case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED: + OnURLsModified(Details<history::URLsModifiedDetails>(details).ptr()); + break; + case chrome::NOTIFICATION_HISTORY_URLS_DELETED: + OnURLsDeleted(Details<history::URLsDeletedDetails>(details).ptr()); + break; + default: + // For simplicity, the unit tests send us all notifications, even when + // we haven't registered for them, so don't assert here. + break; + } +} + +void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) { + UpdateURL(details->row); +} + +void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) { + for (std::vector<history::URLRow>::const_iterator row = + details->changed_urls.begin(); + row != details->changed_urls.end(); ++row) + UpdateURL(*row); +} + +void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) { + if (details->all_history) { + ClearPrivateData(); + } else { + for (std::vector<URLRow>::const_iterator row = details->rows.begin(); + row != details->rows.end(); ++row) + DeleteURL(*row); + } +} + bool InMemoryURLIndex::IndexRow(const URLRow& row) { const GURL& gurl(row.url()); @@ -183,26 +213,39 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) { if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) return true; + URLID row_id = row.id(); + // Strip out username and password before saving and indexing. string16 url(net::FormatUrl(gurl, languages_, net::kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, NULL, NULL, NULL)); - - HistoryID history_id = static_cast<HistoryID>(row.id()); - DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max()); + HistoryID history_id = static_cast<HistoryID>(row_id); + DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); // Add the row for quick lookup in the history info store. - URLRow new_row(GURL(url), row.id()); + URLRow new_row(GURL(url), row_id); new_row.set_visit_count(row.visit_count()); new_row.set_typed_count(row.typed_count()); new_row.set_last_visit(row.last_visit()); new_row.set_title(row.title()); - history_info_map_[history_id] = new_row; + private_data_->history_info_map_[history_id] = new_row; + // Index the words contained in the URL and title of the row. + AddRowWordsToIndex(new_row); + return true; +} + +void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) { + HistoryID history_id = static_cast<HistoryID>(row.id()); // Split URL into individual, unique words then add in the title words. + const GURL& gurl(row.url()); + string16 url(net::FormatUrl(gurl, languages_, + net::kFormatUrlOmitUsernamePassword, + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, + NULL, NULL, NULL)); url = base::i18n::ToLower(url); - String16Set url_words = WordSetFromString16(url); - String16Set title_words = WordSetFromString16(row.title()); + String16Set url_words = String16SetFromString16(url); + String16Set title_words = String16SetFromString16(row.title()); String16Set words; std::set_union(url_words.begin(), url_words.end(), title_words.begin(), title_words.end(), @@ -211,8 +254,48 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) { word_iter != words.end(); ++word_iter) AddWordToIndex(*word_iter, history_id); - ++history_item_count_; - return true; + search_term_cache_.clear(); // Invalidate the term cache. +} + +void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) { + RemoveRowWordsFromIndex(row); + HistoryID history_id = static_cast<HistoryID>(row.id()); + private_data_->history_info_map_.erase(history_id); +} + +void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) { + // Remove the entries in history_id_word_map_ and word_id_history_map_ for + // this row. + URLIndexPrivateData& private_data(*(private_data_.get())); + HistoryID history_id = static_cast<HistoryID>(row.id()); + WordIDSet word_id_set = private_data.history_id_word_map_[history_id]; + private_data.history_id_word_map_.erase(history_id); + + // Reconcile any changes to word usage. + for (WordIDSet::iterator word_id_iter = word_id_set.begin(); + word_id_iter != word_id_set.end(); ++word_id_iter) { + WordID word_id = *word_id_iter; + private_data.word_id_history_map_[word_id].erase(history_id); + if (!private_data.word_id_history_map_[word_id].empty()) + continue; // The word is still in use. + + // The word is no longer in use. Reconcile any changes to character usage. + string16 word = private_data.word_list_[word_id]; + Char16Set characters = Char16SetFromString16(word); + for (Char16Set::iterator uni_char_iter = characters.begin(); + uni_char_iter != characters.end(); ++uni_char_iter) { + char16 uni_char = *uni_char_iter; + private_data.char_word_map_[uni_char].erase(word_id); + if (private_data.char_word_map_[uni_char].empty()) + private_data.char_word_map_.erase(uni_char); // No longer in use. + } + + // Complete the removal of references to the word. + private_data.word_id_history_map_.erase(word_id); + private_data.word_map_.erase(word); + private_data.word_list_[word_id] = string16(); + private_data.available_words_.insert(word_id); + } } bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, @@ -241,12 +324,7 @@ bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, } void InMemoryURLIndex::ClearPrivateData() { - history_item_count_ = 0; - word_list_.clear(); - word_map_.clear(); - char_word_map_.clear(); - word_id_history_map_.clear(); - history_info_map_.clear(); + private_data_->Clear(); search_term_cache_.clear(); } @@ -282,10 +360,13 @@ bool InMemoryURLIndex::RestoreFromCacheFile() { UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", base::TimeTicks::Now() - beginning_time); - UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); + UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", + private_data_->history_id_word_map_.size()); UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", + private_data_->word_map_.size()); + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", + private_data_->char_word_map_.size()); return true; } @@ -317,13 +398,14 @@ bool InMemoryURLIndex::SaveToCacheFile() { return true; } -void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { +void InMemoryURLIndex::UpdateURL(const URLRow& row) { // The row may or may not already be in our index. If it is not already // indexed and it qualifies then it gets indexed. If it is already // indexed and still qualifies then it gets updated, otherwise it // is deleted from the index. - HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); - if (row_pos == history_info_map_.end()) { + HistoryInfoMap::iterator row_pos = + private_data_->history_info_map_.find(row.id()); + if (row_pos == private_data_->history_info_map_.end()) { // This new row should be indexed if it qualifies. if (RowQualifiesAsSignificant(row, base::Time())) IndexRow(row); @@ -335,25 +417,26 @@ void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { old_row.set_visit_count(row.visit_count()); old_row.set_typed_count(row.typed_count()); old_row.set_last_visit(row.last_visit()); - // TODO(mrossetti): When we start indexing the title the next line - // will need attention. - old_row.set_title(row.title()); + // While the URL is guaranteed to remain stable, the title may have changed. + // If so, then we need to update the index with the changed words. + if (old_row.title() != row.title()) { + // Clear all words associated with this row and re-index both the + // URL and title. + RemoveRowWordsFromIndex(row); + old_row.set_title(row.title()); + AddRowWordsToIndex(old_row); + } } else { - // This indexed row no longer qualifies and will be de-indexed. - history_info_map_.erase(row_id); + // This indexed row no longer qualifies and will be de-indexed by + // clearing all words associated with this row. + RemoveRowFromIndex(row); } // This invalidates the cache. search_term_cache_.clear(); - // TODO(mrossetti): Record this transaction in the cache. } -void InMemoryURLIndex::DeleteURL(URLID row_id) { - // Note that this does not remove any reference to this row from the - // word_id_history_map_. That map will continue to contain (and return) - // hits against this row until that map is rebuilt, but since the - // history_info_map_ no longer references the row no erroneous results - // will propagate to the user. - history_info_map_.erase(row_id); +void InMemoryURLIndex::DeleteURL(const URLRow& row) { + RemoveRowFromIndex(row); // This invalidates the word cache. search_term_cache_.clear(); // TODO(mrossetti): Record this transaction in the cache. @@ -364,6 +447,12 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) { ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( const String16Vector& terms) { ScoredHistoryMatches scored_items; + + // Do nothing if we have indexed no words (probably because we've not been + // initialized yet). + if (private_data_->word_list_.empty()) + return scored_items; + if (!terms.empty()) { // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep // approach. @@ -421,7 +510,7 @@ void InMemoryURLIndex::ResetSearchTermCache() { iter->second.used_ = false; } -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( +HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( const string16& uni_string) { // Break the terms down into individual terms (words), get the candidate // set for each term, and intersect each to get a final candidate list. @@ -429,7 +518,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( // a string like "http://www.somewebsite.com" which, from our perspective, // is four words: 'http', 'www', 'somewebsite', and 'com'. HistoryIDSet history_id_set; - String16Vector terms = WordVectorFromString16(uni_string, true); + String16Vector terms = String16VectorFromString16(uni_string, true); // Sort the terms into the longest first as such are likely to narrow down // the results quicker. Also, single character terms are the most expensive // to process so save them for last. @@ -456,7 +545,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( return history_id_set; } -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( +HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( const string16& term) { if (term.empty()) return HistoryIDSet(); @@ -466,7 +555,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // occuring words in the user's searches. size_t term_length = term.length(); - InMemoryURLIndex::WordIDSet word_id_set; + WordIDSet word_id_set; if (term_length > 1) { // See if this term or a prefix thereof is present in the cache. SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); @@ -512,7 +601,8 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // Reduce the word set with any leftover, unprocessed characters. if (!unique_chars.empty()) { - WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); + WordIDSet leftover_set( + private_data_->WordIDSetForTermChars(unique_chars)); // We might come up empty on the leftovers. if (leftover_set.empty()) { search_term_cache_[term] = SearchTermCacheItem(); @@ -535,13 +625,15 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // contains words which do not have the search term as a proper subset. for (WordIDSet::iterator word_set_iter = word_id_set.begin(); word_set_iter != word_id_set.end(); ) { - if (word_list_[*word_set_iter].find(term) == string16::npos) + if (private_data_->word_list_[*word_set_iter].find(term) == + string16::npos) word_id_set.erase(word_set_iter++); else ++word_set_iter; } } else { - word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); + word_id_set = + private_data_->WordIDSetForTermChars(Char16SetFromString16(term)); } // If any words resulted then we can compose a set of history IDs by unioning @@ -551,8 +643,9 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( for (WordIDSet::iterator word_id_iter = word_id_set.begin(); word_id_iter != word_id_set.end(); ++word_id_iter) { WordID word_id = *word_id_iter; - WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); - if (word_iter != word_id_history_map_.end()) { + WordIDHistoryMap::iterator word_iter = + private_data_->word_id_history_map_.find(word_id); + if (word_iter != private_data_->word_id_history_map_.end()) { HistoryIDSet& word_history_id_set(word_iter->second); history_id_set.insert(word_history_id_set.begin(), word_history_id_set.end()); @@ -570,85 +663,53 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // Utility Functions -// static -InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( - const string16& uni_string) { - const size_t kMaxWordLength = 64; - String16Vector words = WordVectorFromString16(uni_string, false); - String16Set word_set; - for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); - ++iter) - word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); - return word_set; -} - -// static -InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16( - const string16& uni_string, - bool break_on_space) { - base::i18n::BreakIterator iter( - uni_string, - break_on_space ? base::i18n::BreakIterator::BREAK_SPACE - : base::i18n::BreakIterator::BREAK_WORD); - String16Vector words; - if (!iter.Init()) - return words; - while (iter.Advance()) { - if (break_on_space || iter.IsWord()) { - string16 word = iter.GetString(); - if (break_on_space) - TrimWhitespace(word, TRIM_ALL, &word); - if (!word.empty()) - words.push_back(word); - } - } - return words; -} - -// static -InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( - const string16& term) { - Char16Set characters; - for (string16::const_iterator iter = term.begin(); iter != term.end(); - ++iter) - characters.insert(*iter); - return characters; -} - void InMemoryURLIndex::AddWordToIndex(const string16& term, HistoryID history_id) { - WordMap::iterator word_pos = word_map_.find(term); - if (word_pos != word_map_.end()) + WordMap::iterator word_pos = private_data_->word_map_.find(term); + if (word_pos != private_data_->word_map_.end()) UpdateWordHistory(word_pos->second, history_id); else AddWordHistory(term, history_id); } void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { - WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); - DCHECK(history_pos != word_id_history_map_.end()); - HistoryIDSet& history_id_set(history_pos->second); - history_id_set.insert(history_id); + WordIDHistoryMap::iterator history_pos = + private_data_->word_id_history_map_.find(word_id); + DCHECK(history_pos != private_data_->word_id_history_map_.end()); + HistoryIDSet& history_id_set(history_pos->second); + history_id_set.insert(history_id); + private_data_->AddToHistoryIDWordMap(history_id, word_id); } // Add a new word to the word list and the word map, and then create a // new entry in the word/history map. void InMemoryURLIndex::AddWordHistory(const string16& term, HistoryID history_id) { - word_list_.push_back(term); - WordID word_id = word_list_.size() - 1; - word_map_[term] = word_id; + URLIndexPrivateData& private_data(*(private_data_.get())); + WordID word_id = private_data.word_list_.size(); + if (private_data.available_words_.empty()) { + private_data.word_list_.push_back(term); + } else { + word_id = *(private_data.available_words_.begin()); + private_data.word_list_[word_id] = term; + private_data.available_words_.erase(word_id); + } + private_data.word_map_[term] = word_id; + HistoryIDSet history_id_set; history_id_set.insert(history_id); - word_id_history_map_[word_id] = history_id_set; + private_data.word_id_history_map_[word_id] = history_id_set; + private_data_->AddToHistoryIDWordMap(history_id, word_id); + // For each character in the newly added word (i.e. a word that is not // already in the word index), add the word to the character index. Char16Set characters = Char16SetFromString16(term); for (Char16Set::iterator uni_char_iter = characters.begin(); uni_char_iter != characters.end(); ++uni_char_iter) { char16 uni_char = *uni_char_iter; - CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); - if (char_iter != char_word_map_.end()) { + CharWordIDMap::iterator char_iter = + private_data.char_word_map_.find(uni_char); + if (char_iter != private_data.char_word_map_.end()) { // Update existing entry in the char/word index. WordIDSet& word_id_set(char_iter->second); word_id_set.insert(word_id); @@ -656,108 +717,13 @@ void InMemoryURLIndex::AddWordHistory(const string16& term, // Create a new entry in the char/word index. WordIDSet word_id_set; word_id_set.insert(word_id); - char_word_map_[uni_char] = word_id_set; - } - } -} - -InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( - const Char16Set& term_chars) { - WordIDSet word_id_set; - for (Char16Set::const_iterator c_iter = term_chars.begin(); - c_iter != term_chars.end(); ++c_iter) { - CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); - if (char_iter == char_word_map_.end()) { - // A character was not found so there are no matching results: bail. - word_id_set.clear(); - break; - } - WordIDSet& char_word_id_set(char_iter->second); - // It is possible for there to no longer be any words associated with - // a particular character. Give up in that case. - if (char_word_id_set.empty()) { - word_id_set.clear(); - break; - } - - if (c_iter == term_chars.begin()) { - // First character results becomes base set of results. - word_id_set = char_word_id_set; - } else { - // Subsequent character results get intersected in. - WordIDSet new_word_id_set; - std::set_intersection(word_id_set.begin(), word_id_set.end(), - char_word_id_set.begin(), char_word_id_set.end(), - std::inserter(new_word_id_set, - new_word_id_set.begin())); - word_id_set.swap(new_word_id_set); - } - } - return word_id_set; -} - -// static -TermMatches InMemoryURLIndex::MatchTermInString(const string16& term, - const string16& string, - int term_num) { - const size_t kMaxCompareLength = 2048; - const string16& short_string = (string.length() > kMaxCompareLength) ? - string.substr(0, kMaxCompareLength) : string; - TermMatches matches; - for (size_t location = short_string.find(term); location != string16::npos; - location = short_string.find(term, location + 1)) { - matches.push_back(TermMatch(term_num, location, term.length())); - } - return matches; -} - -// static -TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) { - if (matches.empty()) - return matches; - TermMatches sorted_matches = matches; - std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); - TermMatches clean_matches; - TermMatch last_match = sorted_matches[0]; - clean_matches.push_back(last_match); - for (TermMatches::const_iterator iter = sorted_matches.begin() + 1; - iter != sorted_matches.end(); ++iter) { - if (iter->offset >= last_match.offset + last_match.length) { - last_match = *iter; - clean_matches.push_back(last_match); + private_data.char_word_map_[uni_char] = word_id_set; } } - return clean_matches; -} - -// static -std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches( - const TermMatches& matches) { - std::vector<size_t> offsets; - for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) - offsets.push_back(i->offset); - return offsets; -} - -// static -TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches( - const TermMatches& matches, - const std::vector<size_t>& offsets) { - DCHECK_EQ(matches.size(), offsets.size()); - TermMatches new_matches; - std::vector<size_t>::const_iterator offset_iter = offsets.begin(); - for (TermMatches::const_iterator term_iter = matches.begin(); - term_iter != matches.end(); ++term_iter, ++offset_iter) { - if (*offset_iter != string16::npos) { - TermMatch new_match(*term_iter); - new_match.offset = *offset_iter; - new_matches.push_back(new_match); - } - } - return new_matches; } // static +// TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( const URLRow& row, const String16Vector& terms) { @@ -786,8 +752,8 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( } // Sort matches by offset and eliminate any which overlap. - match.url_matches = SortAndDeoverlap(match.url_matches); - match.title_matches = SortAndDeoverlap(match.title_matches); + match.url_matches = SortAndDeoverlapMatches(match.url_matches); + match.title_matches = SortAndDeoverlapMatches(match.title_matches); // We should not (currently) inline autocomplete a result unless both of the // following are true: @@ -839,7 +805,6 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( for (int i = 0; i < kSignificantFactors; ++i) match.raw_score += factor[i]; match.raw_score /= kSignificantFactors; - return match; } @@ -901,19 +866,17 @@ InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( const InMemoryURLIndex& index, const String16Vector& lower_terms) : index_(index), - lower_terms_(lower_terms) { -} + lower_terms_(lower_terms) {} InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} -void InMemoryURLIndex::AddHistoryMatch::operator()( - const InMemoryURLIndex::HistoryID history_id) { +void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) { HistoryInfoMap::const_iterator hist_pos = - index_.history_info_map_.find(history_id); + index_.private_data_->history_info_map_.find(history_id); // Note that a history_id may be present in the word_id_history_map_ yet not // be found in the history_info_map_. This occurs when an item has been // deleted by the user or the item no longer qualifies as a quick result. - if (hist_pos != index_.history_info_map_.end()) { + if (hist_pos != index_.private_data_->history_info_map_.end()) { const URLRow& hist_item = hist_pos->second; ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); if (match.raw_score > 0) @@ -935,7 +898,9 @@ bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { DCHECK(cache); cache->set_timestamp(base::Time::Now().ToInternalValue()); - cache->set_history_item_count(history_item_count_); + // history_item_count_ is no longer used but rather than change the protobuf + // definition use a placeholder. This will go away with the switch to SQLite. + cache->set_history_item_count(0); SaveWordList(cache); SaveWordMap(cache); SaveCharWordMap(cache); @@ -946,20 +911,18 @@ void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { bool InMemoryURLIndex::RestorePrivateData( const InMemoryURLIndexCacheItem& cache) { last_saved_ = base::Time::FromInternalValue(cache.timestamp()); - history_item_count_ = cache.history_item_count(); - return (history_item_count_ == 0) || (RestoreWordList(cache) && - RestoreWordMap(cache) && RestoreCharWordMap(cache) && - RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); + return RestoreWordList(cache) && RestoreWordMap(cache) && + RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && + RestoreHistoryInfoMap(cache); } - void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { - if (word_list_.empty()) + if (private_data_->word_list_.empty()) return; WordListItem* list_item = cache->mutable_word_list(); - list_item->set_word_count(word_list_.size()); - for (String16Vector::const_iterator iter = word_list_.begin(); - iter != word_list_.end(); ++iter) + list_item->set_word_count(private_data_->word_list_.size()); + for (String16Vector::const_iterator iter = private_data_->word_list_.begin(); + iter != private_data_->word_list_.end(); ++iter) list_item->add_word(UTF16ToUTF8(*iter)); } @@ -974,17 +937,17 @@ bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { const RepeatedPtrField<std::string>& words(list_item.word()); for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); iter != words.end(); ++iter) - word_list_.push_back(UTF8ToUTF16(*iter)); + private_data_->word_list_.push_back(UTF8ToUTF16(*iter)); return true; } void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { - if (word_map_.empty()) + if (private_data_->word_map_.empty()) return; WordMapItem* map_item = cache->mutable_word_map(); - map_item->set_item_count(word_map_.size()); - for (WordMap::const_iterator iter = word_map_.begin(); - iter != word_map_.end(); ++iter) { + map_item->set_item_count(private_data_->word_map_.size()); + for (WordMap::const_iterator iter = private_data_->word_map_.begin(); + iter != private_data_->word_map_.end(); ++iter) { WordMapEntry* map_entry = map_item->add_word_map_entry(); map_entry->set_word(UTF16ToUTF8(iter->first)); map_entry->set_word_id(iter->second); @@ -1002,17 +965,18 @@ bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); iter != entries.end(); ++iter) - word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); + private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); return true; } void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { - if (char_word_map_.empty()) + if (private_data_->char_word_map_.empty()) return; CharWordMapItem* map_item = cache->mutable_char_word_map(); - map_item->set_item_count(char_word_map_.size()); - for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); - iter != char_word_map_.end(); ++iter) { + map_item->set_item_count(private_data_->char_word_map_.size()); + for (CharWordIDMap::const_iterator iter = + private_data_->char_word_map_.begin(); + iter != private_data_->char_word_map_.end(); ++iter) { CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); map_entry->set_char_16(iter->first); const WordIDSet& word_id_set(iter->second); @@ -1046,19 +1010,20 @@ bool InMemoryURLIndex::RestoreCharWordMap( for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); jiter != word_ids.end(); ++jiter) word_id_set.insert(*jiter); - char_word_map_[uni_char] = word_id_set; + private_data_->char_word_map_[uni_char] = word_id_set; } return true; } void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) const { - if (word_id_history_map_.empty()) + if (private_data_->word_id_history_map_.empty()) return; WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); - map_item->set_item_count(word_id_history_map_.size()); - for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); - iter != word_id_history_map_.end(); ++iter) { + map_item->set_item_count(private_data_->word_id_history_map_.size()); + for (WordIDHistoryMap::const_iterator iter = + private_data_->word_id_history_map_.begin(); + iter != private_data_->word_id_history_map_.end(); ++iter) { WordIDHistoryMapEntry* map_entry = map_item->add_word_id_history_map_entry(); map_entry->set_word_id(iter->first); @@ -1091,21 +1056,24 @@ bool InMemoryURLIndex::RestoreWordIDHistoryMap( HistoryIDSet history_id_set; const RepeatedField<int64>& history_ids(iter->history_id()); for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); - jiter != history_ids.end(); ++jiter) + jiter != history_ids.end(); ++jiter) { history_id_set.insert(*jiter); - word_id_history_map_[word_id] = history_id_set; + private_data_->AddToHistoryIDWordMap(*jiter, word_id); + } + private_data_->word_id_history_map_[word_id] = history_id_set; } return true; } void InMemoryURLIndex::SaveHistoryInfoMap( InMemoryURLIndexCacheItem* cache) const { - if (history_info_map_.empty()) + if (private_data_->history_info_map_.empty()) return; HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); - map_item->set_item_count(history_info_map_.size()); - for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); - iter != history_info_map_.end(); ++iter) { + map_item->set_item_count(private_data_->history_info_map_.size()); + for (HistoryInfoMap::const_iterator iter = + private_data_->history_info_map_.begin(); + iter != private_data_->history_info_map_.end(); ++iter) { HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); map_entry->set_history_id(iter->first); const URLRow& url_row(iter->second); @@ -1143,7 +1111,7 @@ bool InMemoryURLIndex::RestoreHistoryInfoMap( string16 title(UTF8ToUTF16(iter->title())); url_row.set_title(title); } - history_info_map_[history_id] = url_row; + private_data_->history_info_map_[history_id] = url_row; } return true; } diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h index be31abe..598c282 100644 --- a/chrome/browser/history/in_memory_url_index.h +++ b/chrome/browser/history/in_memory_url_index.h @@ -21,16 +21,15 @@ #include "chrome/browser/autocomplete/autocomplete_match.h" #include "chrome/browser/autocomplete/history_provider_util.h" #include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/history/in_memory_url_index_cache.pb.h" +#include "content/common/notification_observer.h" +#include "content/common/notification_registrar.h" #include "sql/connection.h" #include "testing/gtest/include/gtest/gtest_prod.h" class Profile; -namespace base { -class Time; -} - namespace in_memory_url_index { class InMemoryURLIndexCacheItem; } @@ -40,39 +39,9 @@ namespace history { namespace imui = in_memory_url_index; class URLDatabase; - -// Specifies where an omnibox term occurs within a string. Used for specifying -// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in -// scoring a result. -struct TermMatch { - TermMatch(int term_num, size_t offset, size_t length) - : term_num(term_num), - offset(offset), - length(length) {} - - int term_num; // The index of the term in the original search string. - size_t offset; // The starting offset of the substring match. - size_t length; // The length of the substring match. -}; -typedef std::vector<TermMatch> TermMatches; - -// Used for intermediate history result operations. -struct ScoredHistoryMatch : public HistoryMatch { - ScoredHistoryMatch(); // Required by STL. - explicit ScoredHistoryMatch(const URLRow& url_info); - ~ScoredHistoryMatch(); - - static bool MatchScoreGreater(const ScoredHistoryMatch& m1, - const ScoredHistoryMatch& m2); - - // An interim score taking into consideration location and completeness - // of the match. - int raw_score; - TermMatches url_matches; // Term matches within the URL. - TermMatches title_matches; // Term matches within the page title. - bool can_inline; // True if this is a candidate for in-line autocompletion. -}; -typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; +struct URLsDeletedDetails; +struct URLsModifiedDetails; +struct URLVisitedDetails; // The URL history source. // Holds portions of the URL database in memory in an indexed form. Used to @@ -93,16 +62,14 @@ typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; // will eliminate such words except in the case where a single character // is being searched on and which character occurs as the second char16 of a // multi-char16 instance. -class InMemoryURLIndex { +class InMemoryURLIndex : public NotificationObserver { public: // |history_dir| is a path to the directory containing the history database // within the profile wherein the cache and transaction journals will be // stored. - explicit InMemoryURLIndex(const FilePath& history_dir); - ~InMemoryURLIndex(); - - // Convenience types - typedef std::vector<string16> String16Vector; + explicit InMemoryURLIndex(Profile* profile, + const FilePath& history_dir); + virtual ~InMemoryURLIndex(); // Opens and indexes the URL history database. // |languages| gives a list of language encodings with which the history @@ -143,36 +110,17 @@ class InMemoryURLIndex { // Updates or adds an history item to the index if it meets the minimum // 'quick' criteria. - void UpdateURL(URLID row_id, const URLRow& row); + void UpdateURL(const URLRow& row); // Deletes indexing data for an history item. The item may not have actually // been indexed (which is the case if it did not previously meet minimum // 'quick' criteria). - void DeleteURL(URLID row_id); + void DeleteURL(const URLRow& row); - // Breaks the |uni_string| string down into individual words and return - // a vector with the individual words in their original order. If - // |break_on_space| is false then the resulting list will contain only words - // containing alpha-numeric characters. If |break_on_space| is true then the - // resulting list will contain strings broken at whitespace. - // - // Example: - // Given: |uni_string|: "http://www.google.com/ harry the rabbit." - // With |break_on_space| false the returned list will contain: - // "http", "www", "google", "com", "harry", "the", "rabbit" - // With |break_on_space| true the returned list will contain: - // "http://", "www.google.com/", "harry", "the", "rabbit." - static String16Vector WordVectorFromString16(const string16& uni_string, - bool break_on_space); - - // Extract and return the offsets from |matches|. - static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); - - // Replace the offsets in |matches| with those given in |offsets|, deleting - // any which are npos, and return the updated list of matches. - static TermMatches ReplaceOffsetsInTermMatches( - const TermMatches& matches, - const std::vector<size_t>& offsets); + // Notification callback. + virtual void Observe(int type, + const NotificationSource& source, + const NotificationDetails& details); private: friend class AddHistoryMatch; @@ -194,29 +142,6 @@ class InMemoryURLIndex { // Creating one of me without a history path is not allowed (tests excepted). InMemoryURLIndex(); - // Convenience types. - typedef std::set<string16> String16Set; - typedef std::set<char16> Char16Set; - typedef std::vector<char16> Char16Vector; - - // An index into list of all of the words we have indexed. - typedef int WordID; - - // A map allowing a WordID to be determined given a word. - typedef std::map<string16, WordID> WordMap; - - // A map from character to word_ids. - typedef std::set<WordID> WordIDSet; // An index into the WordList. - typedef std::map<char16, WordIDSet> CharWordIDMap; - - // A map from word_id to history item. - // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit. - // Consider using a smaller type. - typedef URLID HistoryID; - typedef std::set<HistoryID> HistoryIDSet; - typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; - - // Support caching of term results so that we can optimize searches which // build upon a previous search. Each entry in this map represents one // search term from the most recent search. For example, if the user had @@ -248,12 +173,6 @@ class InMemoryURLIndex { }; typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap; - // TODO(rohitrao): Probably replace this with QueryResults. - typedef std::vector<URLRow> URLRowVector; - - // A map from history_id to the history's URL and title. - typedef std::map<HistoryID, URLRow> HistoryInfoMap; - // A helper class which performs the final filter on each candidate // history URL match, inserting accepted matches into |scored_matches_| // and trimming the maximum number of matches to 10. @@ -280,34 +199,19 @@ class InMemoryURLIndex { // Initializes the whitelist of URL schemes. static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); - // Breaks a string down into individual words. - static String16Set WordSetFromString16(const string16& uni_string); - - // Given a set of Char16s, finds words containing those characters. - WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); - - // Creates a TermMatches which has an entry for each occurrence of the string - // |term| found in the string |string|. Mark each match with |term_num| so - // that the resulting TermMatches can be merged with other TermMatches for - // other terms. - static TermMatches MatchTermInString(const string16& term, - const string16& string, - int term_num); - // URL History indexing support functions. // Indexes one URL history item. bool IndexRow(const URLRow& row); - // Breaks the |uni_word| string down into its individual characters. - // Note that this is temporarily intended to work on a single word, but - // _will_ work on a string of words, perhaps with unexpected results. - // TODO(mrossetti): Lots of optimizations possible here for not restarting - // a search if the user is just typing along. Also, change this to uniString - // and properly handle substring matches, scoring and sorting the results - // by score. Also, provide the metrics for where the matches occur so that - // the UI can highlight the matched sections. - static Char16Set Char16SetFromString16(const string16& uni_word); + // Parses and indexes the words in the URL and page title of |row|. + void AddRowWordsToIndex(const URLRow& row); + + // Removes |row| and all associated words and characters from the index. + void RemoveRowFromIndex(const URLRow& row); + + // Removes all words and characters associated with |row| from the index. + void RemoveRowWordsFromIndex(const URLRow& row); // Given a single word in |uni_word|, adds a reference for the containing // history item identified by |history_id| to the index. @@ -352,13 +256,14 @@ class InMemoryURLIndex { static int ScoreComponentForMatches(const TermMatches& matches, size_t max_length); - // Sorts and removes overlapping substring matches from |matches| and - // returns the cleaned up matches. - static TermMatches SortAndDeoverlap(const TermMatches& matches); - // Determines if |gurl| has a whitelisted scheme and returns true if so. bool URLSchemeIsWhitelisted(const GURL& gurl) const; + // Notification handlers. + void OnURLVisited(const URLVisitedDetails* details); + void OnURLsModified(const URLsModifiedDetails* details); + void OnURLsDeleted(const URLsDeletedDetails* details); + // Utility functions supporting RestoreFromCache and SaveToCache. // Construct a file path for the cache file within the same directory where @@ -384,6 +289,8 @@ class InMemoryURLIndex { bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); + NotificationRegistrar registrar_; + // Directory where cache file resides. This is, except when unit testing, // the same directory in which the profile's history database is found. It // should never be empty. @@ -395,21 +302,8 @@ class InMemoryURLIndex { // the InMemoryURLIndex was last populated. base::Time last_saved_; - // A list of all of indexed words. The index of a word in this list is the - // ID of the word in the word_map_. It reduces the memory overhead by - // replacing a potentially long and repeated string with a simple index. - // NOTE: A word will _never_ be removed from this vector thus insuring - // the immutability of the word_id throughout the session, reducing - // maintenance complexity. - // TODO(mrossetti): Profile the vector allocation and determine if judicious - // 'reserve' calls are called for. - String16Vector word_list_; - - int history_item_count_; - WordMap word_map_; - CharWordIDMap char_word_map_; - WordIDHistoryMap word_id_history_map_; - HistoryInfoMap history_info_map_; + // The index's durable private data. + scoped_ptr<URLIndexPrivateData> private_data_; // Cache of search terms. SearchTermCacheMap search_term_cache_; diff --git a/chrome/browser/history/in_memory_url_index_types.cc b/chrome/browser/history/in_memory_url_index_types.cc new file mode 100644 index 0000000..4447a0e --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_types.cc @@ -0,0 +1,197 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/in_memory_url_index_types.h" + +#include <algorithm> + +#include "base/i18n/break_iterator.h" +#include "base/i18n/case_conversion.h" +#include "base/string_util.h" + +namespace history { + +// Matches within URL and Title Strings ---------------------------------------- + +TermMatches MatchTermInString(const string16& term, + const string16& string, + int term_num) { + const size_t kMaxCompareLength = 2048; + const string16& short_string = (string.length() > kMaxCompareLength) ? + string.substr(0, kMaxCompareLength) : string; + TermMatches matches; + for (size_t location = short_string.find(term); location != string16::npos; + location = short_string.find(term, location + 1)) + matches.push_back(TermMatch(term_num, location, term.length())); + return matches; +} + +// Comparison function for sorting TermMatches by their offsets. +bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { + return m1.offset < m2.offset; +} + +TermMatches SortAndDeoverlapMatches(const TermMatches& matches) { + if (matches.empty()) + return matches; + TermMatches sorted_matches = matches; + std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); + TermMatches clean_matches; + TermMatch last_match; + for (TermMatches::const_iterator iter = sorted_matches.begin(); + iter != sorted_matches.end(); ++iter) { + if (iter->offset >= last_match.offset + last_match.length) { + last_match = *iter; + clean_matches.push_back(last_match); + } + } + return clean_matches; +} + +std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches) { + std::vector<size_t> offsets; + for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) + offsets.push_back(i->offset); + return offsets; +} + +TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, + const std::vector<size_t>& offsets) { + DCHECK_EQ(matches.size(), offsets.size()); + TermMatches new_matches; + std::vector<size_t>::const_iterator offset_iter = offsets.begin(); + for (TermMatches::const_iterator term_iter = matches.begin(); + term_iter != matches.end(); ++term_iter, ++offset_iter) { + if (*offset_iter != string16::npos) { + TermMatch new_match(*term_iter); + new_match.offset = *offset_iter; + new_matches.push_back(new_match); + } + } + return new_matches; +} + +// ScoredHistoryMatch ---------------------------------------------------------- + +ScoredHistoryMatch::ScoredHistoryMatch() + : raw_score(0), + can_inline(false) {} + +ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) + : HistoryMatch(url_info, 0, false, false), + raw_score(0), + can_inline(false) {} + +ScoredHistoryMatch::~ScoredHistoryMatch() {} + +// Comparison function for sorting ScoredMatches by their scores. +bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, + const ScoredHistoryMatch& m2) { + return m1.raw_score >= m2.raw_score; +} + +// InMemoryURLIndex's Private Data --------------------------------------------- + +URLIndexPrivateData::URLIndexPrivateData() {} + +URLIndexPrivateData::~URLIndexPrivateData() {} + +void URLIndexPrivateData::Clear() { + word_list_.clear(); + available_words_.clear(); + word_map_.clear(); + char_word_map_.clear(); + word_id_history_map_.clear(); + history_id_word_map_.clear(); + history_info_map_.clear(); +} + +void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id, + WordID word_id) { + HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id); + if (iter != history_id_word_map_.end()) { + WordIDSet& word_id_set(iter->second); + word_id_set.insert(word_id); + } else { + WordIDSet word_id_set; + word_id_set.insert(word_id); + history_id_word_map_[history_id] = word_id_set; + } +} + +WordIDSet URLIndexPrivateData::WordIDSetForTermChars( + const Char16Set& term_chars) { + WordIDSet word_id_set; + for (Char16Set::const_iterator c_iter = term_chars.begin(); + c_iter != term_chars.end(); ++c_iter) { + CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); + if (char_iter == char_word_map_.end()) { + // A character was not found so there are no matching results: bail. + word_id_set.clear(); + break; + } + WordIDSet& char_word_id_set(char_iter->second); + // It is possible for there to no longer be any words associated with + // a particular character. Give up in that case. + if (char_word_id_set.empty()) { + word_id_set.clear(); + break; + } + + if (c_iter == term_chars.begin()) { + // First character results becomes base set of results. + word_id_set = char_word_id_set; + } else { + // Subsequent character results get intersected in. + WordIDSet new_word_id_set; + std::set_intersection(word_id_set.begin(), word_id_set.end(), + char_word_id_set.begin(), char_word_id_set.end(), + std::inserter(new_word_id_set, + new_word_id_set.begin())); + word_id_set.swap(new_word_id_set); + } + } + return word_id_set; +} + +// Utility Functions ----------------------------------------------------------- + +String16Set String16SetFromString16(const string16& uni_string) { + const size_t kMaxWordLength = 64; + String16Vector words = String16VectorFromString16(uni_string, false); + String16Set word_set; + for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); + ++iter) + word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); + return word_set; +} + +String16Vector String16VectorFromString16(const string16& uni_string, + bool break_on_space) { + base::i18n::BreakIterator iter(uni_string, + break_on_space ? base::i18n::BreakIterator::BREAK_SPACE : + base::i18n::BreakIterator::BREAK_WORD); + String16Vector words; + if (!iter.Init()) + return words; + while (iter.Advance()) { + if (break_on_space || iter.IsWord()) { + string16 word = iter.GetString(); + if (break_on_space) + TrimWhitespace(word, TRIM_ALL, &word); + if (!word.empty()) + words.push_back(word); + } + } + return words; +} + +Char16Set Char16SetFromString16(const string16& term) { + Char16Set characters; + for (string16::const_iterator iter = term.begin(); iter != term.end(); ++iter) + characters.insert(*iter); + return characters; +} + +} // namespace history diff --git a/chrome/browser/history/in_memory_url_index_types.h b/chrome/browser/history/in_memory_url_index_types.h new file mode 100644 index 0000000..d6ea8a2 --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_types.h @@ -0,0 +1,206 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ +#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ +#pragma once + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "base/gtest_prod_util.h" +#include "base/string16.h" +#include "chrome/browser/autocomplete/history_provider_util.h" +#include "chrome/browser/history/history_types.h" + +namespace history { + +// Matches within URL and Title Strings ---------------------------------------- + +// Specifies where an omnibox term occurs within a string. Used for specifying +// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in +// scoring a result. +struct TermMatch { + TermMatch() : term_num(0), offset(0), length(0) {} + TermMatch(int term_num, size_t offset, size_t length) + : term_num(term_num), offset(offset), length(length) {} + + int term_num; // The index of the term in the original search string. + size_t offset; // The starting offset of the substring match. + size_t length; // The length of the substring match. +}; +typedef std::vector<TermMatch> TermMatches; + +// Returns a TermMatches which has an entry for each occurrence of the string +// |term| found in the string |string|. Mark each match with |term_num| so +// that the resulting TermMatches can be merged with other TermMatches for +// other terms. Note that only the first 2,048 characters of |string| are +// considered during the match operation. +TermMatches MatchTermInString(const string16& term, + const string16& string, + int term_num); + +// Sorts and removes overlapping substring matches from |matches| and +// returns the cleaned up matches. +TermMatches SortAndDeoverlapMatches(const TermMatches& matches); + +// Extracts and returns the offsets from |matches|. +std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); + +// Replaces the offsets in |matches| with those given in |offsets|, deleting +// any which are npos, and returns the updated list of matches. +TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, + const std::vector<size_t>& offsets); + +// Used for intermediate history result operations ----------------------------- + +struct ScoredHistoryMatch : public history::HistoryMatch { + ScoredHistoryMatch(); // Required by STL. + explicit ScoredHistoryMatch(const history::URLRow& url_info); + ~ScoredHistoryMatch(); + + static bool MatchScoreGreater(const ScoredHistoryMatch& m1, + const ScoredHistoryMatch& m2); + + // An interim score taking into consideration location and completeness + // of the match. + int raw_score; + TermMatches url_matches; // Term matches within the URL. + TermMatches title_matches; // Term matches within the page title. + bool can_inline; // True if this is a candidate for in-line autocompletion. +}; +typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; + +// Convenience Types ----------------------------------------------------------- + +typedef std::vector<string16> String16Vector; +typedef std::set<string16> String16Set; +typedef std::set<char16> Char16Set; +typedef std::vector<char16> Char16Vector; + +// Utility Functions Relates to Convenience Types ------------------------------ + +// Breaks a string down into individual words. +String16Set String16SetFromString16(const string16& uni_string); + +// Breaks the |uni_string| string down into individual words and return +// a vector with the individual words in their original order. If +// |break_on_space| is false then the resulting list will contain only words +// containing alpha-numeric characters. If |break_on_space| is true then the +// resulting list will contain strings broken at whitespace. (|break_on_space| +// indicates that the BreakIterator::BREAK_SPACE (equivalent to BREAK_LINE) +// approach is to be used. For a complete description of this algorithm +// refer to the comments in base/i18n/break_iterator.h.) +// +// Example: +// Given: |uni_string|: "http://www.google.com/ harry the rabbit." +// With |break_on_space| false the returned list will contain: +// "http", "www", "google", "com", "harry", "the", "rabbit" +// With |break_on_space| true the returned list will contain: +// "http://", "www.google.com/", "harry", "the", "rabbit." +String16Vector String16VectorFromString16(const string16& uni_string, + bool break_on_space); + +// Breaks the |uni_word| string down into its individual characters. +// Note that this is temporarily intended to work on a single word, but +// _will_ work on a string of words, perhaps with unexpected results. +// TODO(mrossetti): Lots of optimizations possible here for not restarting +// a search if the user is just typing along. Also, change this to uniString +// and properly handle substring matches, scoring and sorting the results +// by score. Also, provide the metrics for where the matches occur so that +// the UI can highlight the matched sections. +Char16Set Char16SetFromString16(const string16& uni_word); + +// Support for InMemoryURLIndex Private Data ----------------------------------- + +// An index into a list of all of the words we have indexed. +typedef size_t WordID; + +// A map allowing a WordID to be determined given a word. +typedef std::map<string16, WordID> WordMap; + +// A map from character to the word_ids of words containing that character. +typedef std::set<WordID> WordIDSet; // An index into the WordList. +typedef std::map<char16, WordIDSet> CharWordIDMap; + +// A map from word (by word_id) to history items containing that word. +typedef history::URLID HistoryID; +typedef std::set<HistoryID> HistoryIDSet; +typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; +typedef std::map<HistoryID, WordIDSet> HistoryIDWordMap; + +// A map from history_id to the history's URL and title. +typedef std::map<HistoryID, URLRow> HistoryInfoMap; + +// TODO(rohitrao): Probably replace this with QueryResults. +typedef std::vector<history::URLRow> URLRowVector; + +// A structure describing the index's internal data. This structure is used +// by the InMemoryURLIndex, InMemoryURLIndexBackend, and +// InMemoryURLCacheDatabase classes. +class URLIndexPrivateData { + public: + URLIndexPrivateData(); + ~URLIndexPrivateData(); + + // Clear all of our data. + void Clear(); + + // Adds |word_id| to |history_id|'s entry in the history/word map, + // creating a new entry if one does not already exist. + void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); + + // Given a set of Char16s, finds words containing those characters. + WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); + + private: + friend class InMemoryURLIndex; + friend class InMemoryURLIndexTest; + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); + FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); + + // A list of all of indexed words. The index of a word in this list is the + // ID of the word in the word_map_. It reduces the memory overhead by + // replacing a potentially long and repeated string with a simple index. + String16Vector word_list_; + + // A list of available words slots in |word_list_|. An available word slot + // is the index of a unused word in word_list_ vector, also referred to as + // a WordID. As URL visits are added or modified new words may be added to + // the index, in which case any available words are used, if any, and then + // words are added to the end of the word_list_. When URL visits are + // modified or deleted old words may be removed from the index, in which + // case the slots for those words are added to available_words_ for resuse + // by future URL updates. + WordIDSet available_words_; + + // A one-to-one mapping from the a word string to its slot number (i.e. + // WordID) in the |word_list_|. + WordMap word_map_; + + // A one-to-many mapping from a single character to all WordIDs of words + // containing that character. + CharWordIDMap char_word_map_; + + // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as + // used in the history database) of history items in which the word occurs. + WordIDHistoryMap word_id_history_map_; + + // A one-to-many mapping from a HistoryID to all WordIDs of words that occur + // in the URL and/or page title of the history item referenced by that + // HistoryID. + HistoryIDWordMap history_id_word_map_; + + // A one-to-one mapping from HistoryID to the history item data governing + // index inclusion and relevance scoring. + HistoryInfoMap history_info_map_; +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ diff --git a/chrome/browser/history/in_memory_url_index_types_unittest.cc b/chrome/browser/history/in_memory_url_index_types_unittest.cc new file mode 100644 index 0000000..6a6bc7c --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_types_unittest.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/string16.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/in_memory_url_index_types.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace history { + +class InMemoryURLIndexTypesTest : public testing::Test { +}; + +TEST_F(InMemoryURLIndexTypesTest, StaticFunctions) { + // Test WordVectorFromString16 + string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy")); + String16Vector string_vec = String16VectorFromString16(string_a, false); + ASSERT_EQ(7U, string_vec.size()); + // See if we got the words we expected. + EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]); + EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]); + EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]); + EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]); + EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]); + EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]); + EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]); + + string_vec = String16VectorFromString16(string_a, true); + ASSERT_EQ(5U, string_vec.size()); + EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]); + EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]); + EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]); + EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]); + EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]); + + // Test WordSetFromString16 + string16 string_b(ASCIIToUTF16( + "http://web.google.com/search Google Web Search")); + String16Set string_set = String16SetFromString16(string_b); + EXPECT_EQ(5U, string_set.size()); + // See if we got the words we expected. + EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end()); + + // Test SortAndDeoverlap + TermMatches matches_a; + matches_a.push_back(TermMatch(1, 13, 10)); + matches_a.push_back(TermMatch(2, 23, 10)); + matches_a.push_back(TermMatch(3, 3, 10)); + matches_a.push_back(TermMatch(4, 40, 5)); + TermMatches matches_b = SortAndDeoverlapMatches(matches_a); + // Nothing should have been eliminated. + EXPECT_EQ(matches_a.size(), matches_b.size()); + // The order should now be 3, 1, 2, 4. + EXPECT_EQ(3, matches_b[0].term_num); + EXPECT_EQ(1, matches_b[1].term_num); + EXPECT_EQ(2, matches_b[2].term_num); + EXPECT_EQ(4, matches_b[3].term_num); + matches_a.push_back(TermMatch(5, 18, 10)); + matches_a.push_back(TermMatch(6, 38, 5)); + matches_b = SortAndDeoverlapMatches(matches_a); + // Two matches should have been eliminated. + EXPECT_EQ(matches_a.size() - 2, matches_b.size()); + // The order should now be 3, 1, 2, 6. + EXPECT_EQ(3, matches_b[0].term_num); + EXPECT_EQ(1, matches_b[1].term_num); + EXPECT_EQ(2, matches_b[2].term_num); + EXPECT_EQ(6, matches_b[3].term_num); + + // Test MatchTermInString + TermMatches matches_c = MatchTermInString( + UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123); + const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 }; + ASSERT_EQ(arraysize(expected_offsets), matches_c.size()); + for (size_t i = 0; i < arraysize(expected_offsets); ++i) + EXPECT_EQ(expected_offsets[i], matches_c[i].offset); +} + +TEST_F(InMemoryURLIndexTypesTest, OffsetsAndTermMatches) { + // Test OffsetsFromTermMatches + history::TermMatches matches_a; + matches_a.push_back(history::TermMatch(1, 1, 2)); + matches_a.push_back(history::TermMatch(2, 4, 3)); + matches_a.push_back(history::TermMatch(3, 9, 1)); + matches_a.push_back(history::TermMatch(3, 10, 1)); + matches_a.push_back(history::TermMatch(4, 14, 5)); + std::vector<size_t> offsets = OffsetsFromTermMatches(matches_a); + const size_t expected_offsets_a[] = {1, 4, 9, 10, 14}; + ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a)); + for (size_t i = 0; i < offsets.size(); ++i) + EXPECT_EQ(expected_offsets_a[i], offsets[i]); + + // Test ReplaceOffsetsInTermMatches + offsets[2] = string16::npos; + history::TermMatches matches_b = + ReplaceOffsetsInTermMatches(matches_a, offsets); + const size_t expected_offsets_b[] = {1, 4, 10, 14}; + ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size()); + for (size_t i = 0; i < matches_b.size(); ++i) + EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset); +} + +} // namespace history diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc index de8e4e0c9..acdc34b 100644 --- a/chrome/browser/history/in_memory_url_index_unittest.cc +++ b/chrome/browser/history/in_memory_url_index_unittest.cc @@ -2,24 +2,18 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include <stdio.h> - #include <fstream> -#include <string> -#include <vector> #include "base/file_path.h" #include "base/file_util.h" -#include "base/memory/scoped_ptr.h" #include "base/path_service.h" +#include "base/string16.h" #include "base/string_util.h" -#include "base/time.h" #include "base/utf_string_conversions.h" #include "chrome/browser/history/in_memory_database.h" #include "chrome/browser/history/in_memory_url_index.h" +#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/common/chrome_paths.h" -#include "sql/connection.h" -#include "sql/statement.h" #include "sql/transaction.h" #include "testing/gtest/include/gtest/gtest.h" @@ -59,9 +53,8 @@ class InMemoryURLIndexTest : public testing::Test, int typed_count); // Convenience functions for easily creating vectors of search terms. - InMemoryURLIndex::String16Vector Make1Term(const char* term) const; - InMemoryURLIndex::String16Vector Make2Terms(const char* term_1, - const char* term_2) const; + String16Vector Make1Term(const char* term) const; + String16Vector Make2Terms(const char* term_1, const char* term_2) const; // Validates that the given |term| is contained in |cache| and that it is // marked as in-use. @@ -145,17 +138,15 @@ URLRow InMemoryURLIndexTest::MakeURLRow(const char* url, return row; } -InMemoryURLIndex::String16Vector InMemoryURLIndexTest::Make1Term( - const char* term) const { - InMemoryURLIndex::String16Vector terms; +String16Vector InMemoryURLIndexTest::Make1Term(const char* term) const { + String16Vector terms; terms.push_back(UTF8ToUTF16(term)); return terms; } -InMemoryURLIndex::String16Vector InMemoryURLIndexTest::Make2Terms( - const char* term_1, - const char* term_2) const { - InMemoryURLIndex::String16Vector terms; +String16Vector InMemoryURLIndexTest::Make2Terms(const char* term_1, + const char* term_2) const { + String16Vector terms; terms.push_back(UTF8ToUTF16(term_1)); terms.push_back(UTF8ToUTF16(term_2)); return terms; @@ -173,6 +164,27 @@ void InMemoryURLIndexTest::CheckTerm( << "Cache item '" << term << "' should be marked as being in use."; } +// Helper function which compares two maps for equivalence. The maps' values +// are associative containers and their contents are compared as well. +template<typename T> +void ExpectMapOfContainersIdentical(const T& expected, const T& actual) { + ASSERT_EQ(expected.size(), actual.size()); + for (typename T::const_iterator expected_iter = expected.begin(); + expected_iter != expected.end(); ++expected_iter) { + typename T::const_iterator actual_iter = actual.find(expected_iter->first); + ASSERT_NE(actual.end(), actual_iter); + typename T::mapped_type const& expected_values(expected_iter->second); + typename T::mapped_type const& actual_values(actual_iter->second); + ASSERT_EQ(expected_values.size(), actual_values.size()); + for (typename T::mapped_type::const_iterator set_iter = + expected_values.begin(); set_iter != expected_values.end(); ++set_iter) + EXPECT_EQ(actual_values.count(*set_iter), + expected_values.count(*set_iter)); + } +} + +//------------------------------------------------------------------------------ + class LimitedInMemoryURLIndexTest : public InMemoryURLIndexTest { protected: FilePath::StringType TestDBName() const; @@ -203,7 +215,8 @@ void ExpandedInMemoryURLIndexTest::SetUp() { } TEST_F(InMemoryURLIndexTest, Construction) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); EXPECT_TRUE(url_index_.get()); } @@ -217,16 +230,17 @@ TEST_F(LimitedInMemoryURLIndexTest, Initialization) { EXPECT_EQ(1U, row_count); url_index_.reset(new InMemoryURLIndex); url_index_->Init(this, "en,ja,hi,zh"); - EXPECT_EQ(1, url_index_->history_item_count_); + URLIndexPrivateData& private_data(*(url_index_->private_data_)); // history_info_map_ should have the same number of items as were filtered. - EXPECT_EQ(1U, url_index_->history_info_map_.size()); - EXPECT_EQ(35U, url_index_->char_word_map_.size()); - EXPECT_EQ(17U, url_index_->word_map_.size()); + EXPECT_EQ(1U, private_data.history_info_map_.size()); + EXPECT_EQ(35U, private_data.char_word_map_.size()); + EXPECT_EQ(17U, private_data.word_map_.size()); } TEST_F(InMemoryURLIndexTest, Retrieval) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); url_index_->Init(this, "en,ja,hi,zh"); // The term will be lowercased by the search. @@ -259,7 +273,7 @@ TEST_F(InMemoryURLIndexTest, Retrieval) { matches[0].url_info.title()); // Search which should result in very poor result. - InMemoryURLIndex::String16Vector terms; + String16Vector terms; terms.push_back(ASCIIToUTF16("z")); terms.push_back(ASCIIToUTF16("y")); terms.push_back(ASCIIToUTF16("x")); @@ -279,7 +293,8 @@ TEST_F(InMemoryURLIndexTest, Retrieval) { } TEST_F(ExpandedInMemoryURLIndexTest, ShortCircuit) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); url_index_->Init(this, "en,ja,hi,zh"); // A search for 'w' should short-circuit and not return any matches. @@ -296,8 +311,8 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) { url_index_.reset(new InMemoryURLIndex()); url_index_->Init(this, "en,ja,hi,zh"); // Signal if someone has changed the test DB. - EXPECT_EQ(27U, url_index_->history_info_map_.size()); - InMemoryURLIndex::String16Vector terms; + EXPECT_EQ(27U, url_index_->private_data_->history_info_map_.size()); + String16Vector terms; // Ensure title is being searched. terms.push_back(ASCIIToUTF16("MORTGAGE")); @@ -345,101 +360,6 @@ TEST_F(InMemoryURLIndexTest, NonUniqueTermCharacterSets) { EXPECT_EQ(28, matches[0].url_info.id()); } -TEST_F(InMemoryURLIndexTest, StaticFunctions) { - // Test WordVectorFromString16 - string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy")); - InMemoryURLIndex::String16Vector string_vec = - InMemoryURLIndex::WordVectorFromString16(string_a, false); - ASSERT_EQ(7U, string_vec.size()); - // See if we got the words we expected. - EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]); - EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]); - EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]); - EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]); - EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]); - EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]); - EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]); - - string_vec = InMemoryURLIndex::WordVectorFromString16(string_a, true); - ASSERT_EQ(5U, string_vec.size()); - EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]); - EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]); - EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]); - EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]); - EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]); - - // Test WordSetFromString16 - string16 string_b(ASCIIToUTF16( - "http://web.google.com/search Google Web Search")); - InMemoryURLIndex::String16Set string_set = - InMemoryURLIndex::WordSetFromString16(string_b); - EXPECT_EQ(5U, string_set.size()); - // See if we got the words we expected. - EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end()); - - // Test SortAndDeoverlap - TermMatches matches_a; - matches_a.push_back(TermMatch(1, 13, 10)); - matches_a.push_back(TermMatch(2, 23, 10)); - matches_a.push_back(TermMatch(3, 3, 10)); - matches_a.push_back(TermMatch(4, 40, 5)); - TermMatches matches_b = InMemoryURLIndex::SortAndDeoverlap(matches_a); - // Nothing should have been eliminated. - EXPECT_EQ(matches_a.size(), matches_b.size()); - // The order should now be 3, 1, 2, 4. - EXPECT_EQ(3, matches_b[0].term_num); - EXPECT_EQ(1, matches_b[1].term_num); - EXPECT_EQ(2, matches_b[2].term_num); - EXPECT_EQ(4, matches_b[3].term_num); - matches_a.push_back(TermMatch(5, 18, 10)); - matches_a.push_back(TermMatch(6, 38, 5)); - matches_b = InMemoryURLIndex::SortAndDeoverlap(matches_a); - // Two matches should have been eliminated. - EXPECT_EQ(matches_a.size() - 2, matches_b.size()); - // The order should now be 3, 1, 2, 6. - EXPECT_EQ(3, matches_b[0].term_num); - EXPECT_EQ(1, matches_b[1].term_num); - EXPECT_EQ(2, matches_b[2].term_num); - EXPECT_EQ(6, matches_b[3].term_num); - - // Test MatchTermInString - TermMatches matches_c = InMemoryURLIndex::MatchTermInString( - UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123); - ASSERT_EQ(11U, matches_c.size()); - const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 }; - for (int i = 0; i < 11; ++i) - EXPECT_EQ(expected_offsets[i], matches_c[i].offset); -} - -TEST_F(InMemoryURLIndexTest, OffsetsAndTermMatches) { - // Test OffsetsFromTermMatches - history::TermMatches matches_a; - matches_a.push_back(history::TermMatch(1, 1, 2)); - matches_a.push_back(history::TermMatch(2, 4, 3)); - matches_a.push_back(history::TermMatch(3, 9, 1)); - matches_a.push_back(history::TermMatch(3, 10, 1)); - matches_a.push_back(history::TermMatch(4, 14, 5)); - std::vector<size_t> offsets = - InMemoryURLIndex::OffsetsFromTermMatches(matches_a); - const size_t expected_offsets_a[] = {1, 4, 9, 10, 14}; - ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a)); - for (size_t i = 0; i < offsets.size(); ++i) - EXPECT_EQ(expected_offsets_a[i], offsets[i]); - - // Test ReplaceOffsetsInTermMatches - offsets[2] = string16::npos; - history::TermMatches matches_b = - InMemoryURLIndex::ReplaceOffsetsInTermMatches(matches_a, offsets); - const size_t expected_offsets_b[] = {1, 4, 10, 14}; - ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size()); - for (size_t i = 0; i < matches_b.size(); ++i) - EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset); -} - TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { // Verify that match results for previously typed characters are retained // (in the term_char_word_set_cache_) and reused, if possible, in future @@ -447,7 +367,8 @@ TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { typedef InMemoryURLIndex::SearchTermCacheMap::iterator CacheIter; typedef InMemoryURLIndex::SearchTermCacheItem CacheItem; - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); url_index_->Init(this, "en,ja,hi,zh"); InMemoryURLIndex::SearchTermCacheMap& cache(url_index_->search_term_cache_); @@ -460,7 +381,7 @@ TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { // Simulate typing "r" giving "r" in the simulated omnibox. The results for // 'r' will be not cached because it is only 1 character long. - InMemoryURLIndex::String16Vector terms; + String16Vector terms; string16 term_r = ASCIIToUTF16("r"); terms.push_back(term_r); url_index_->HistoryItemsForTerms(terms); @@ -550,9 +471,10 @@ TEST_F(InMemoryURLIndexTest, Scoring) { } TEST_F(InMemoryURLIndexTest, AddNewRows) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); url_index_->Init(this, "en,ja,hi,zh"); - InMemoryURLIndex::String16Vector terms; + String16Vector terms; // Verify that the row we're going to add does not already exist. URLID new_row_id = 87654321; @@ -564,20 +486,21 @@ TEST_F(InMemoryURLIndexTest, AddNewRows) { // Add a new row. URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id); new_row.set_last_visit(base::Time::Now()); - url_index_->UpdateURL(new_row_id, new_row); + url_index_->UpdateURL(new_row); // Verify that we can retrieve it. EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size()); // Add it again just to be sure that is harmless. - url_index_->UpdateURL(new_row_id, new_row); + url_index_->UpdateURL(new_row); EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size()); } TEST_F(InMemoryURLIndexTest, DeleteRows) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); url_index_->Init(this, "en,ja,hi,zh"); - InMemoryURLIndex::String16Vector terms; + String16Vector terms; // Make sure we actually get an existing result. terms.push_back(ASCIIToUTF16("DrudgeReport")); @@ -585,7 +508,7 @@ TEST_F(InMemoryURLIndexTest, DeleteRows) { ASSERT_EQ(1U, matches.size()); // Determine the row id for that result, delete that id, then search again. - url_index_->DeleteURL(matches[0].url_info.id()); + url_index_->DeleteURL(matches[0].url_info); EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty()); } @@ -662,7 +585,8 @@ TEST_F(InMemoryURLIndexTest, WhitelistedURLs) { { "xmpp:node@example.com", false }, { "xmpp://guest@example.com", false }, }; - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL( + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL( "/flammmy/frammy/")))); for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { GURL url(data[i].url_spec); @@ -672,8 +596,8 @@ TEST_F(InMemoryURLIndexTest, WhitelistedURLs) { } TEST_F(InMemoryURLIndexTest, CacheFilePath) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL( - "/flammmy/frammy/")))); + url_index_.reset(new InMemoryURLIndex( + NULL, FilePath(FILE_PATH_LITERAL("/flammmy/frammy/")))); FilePath full_file_path; url_index_->GetCacheFilePath(&full_file_path); std::vector<FilePath::StringType> expected_parts; @@ -689,84 +613,72 @@ TEST_F(InMemoryURLIndexTest, CacheFilePath) { TEST_F(InMemoryURLIndexTest, CacheSaveRestore) { // Save the cache to a protobuf, restore it, and compare the results. - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); InMemoryURLIndex& url_index(*(url_index_.get())); url_index.Init(this, "en,ja,hi,zh"); in_memory_url_index::InMemoryURLIndexCacheItem index_cache; url_index.SavePrivateData(&index_cache); // Capture our private data so we can later compare for equality. - int history_item_count(url_index.history_item_count_); - InMemoryURLIndex::String16Vector word_list(url_index.word_list_); - InMemoryURLIndex::WordMap word_map(url_index.word_map_); - InMemoryURLIndex::CharWordIDMap char_word_map(url_index.char_word_map_); - InMemoryURLIndex::WordIDHistoryMap word_id_history_map( - url_index.word_id_history_map_); - InMemoryURLIndex::HistoryInfoMap history_info_map( - url_index.history_info_map_); + URLIndexPrivateData& private_data(*(url_index_->private_data_)); + String16Vector word_list(private_data.word_list_); + WordMap word_map(private_data.word_map_); + CharWordIDMap char_word_map(private_data.char_word_map_); + WordIDHistoryMap word_id_history_map(private_data.word_id_history_map_); + HistoryIDWordMap history_id_word_map(private_data.history_id_word_map_); + HistoryInfoMap history_info_map(private_data.history_info_map_); // Prove that there is really something there. - EXPECT_GT(url_index.history_item_count_, 0); - EXPECT_FALSE(url_index.word_list_.empty()); - EXPECT_FALSE(url_index.word_map_.empty()); - EXPECT_FALSE(url_index.char_word_map_.empty()); - EXPECT_FALSE(url_index.word_id_history_map_.empty()); - EXPECT_FALSE(url_index.history_info_map_.empty()); + EXPECT_FALSE(private_data.word_list_.empty()); + // available_words_ will already be empty since we have freshly built the + // data set for this test. + EXPECT_TRUE(private_data.available_words_.empty()); + EXPECT_FALSE(private_data.word_map_.empty()); + EXPECT_FALSE(private_data.char_word_map_.empty()); + EXPECT_FALSE(private_data.word_id_history_map_.empty()); + EXPECT_FALSE(private_data.history_id_word_map_.empty()); + EXPECT_FALSE(private_data.history_info_map_.empty()); // Clear and then prove it's clear. url_index.ClearPrivateData(); - EXPECT_EQ(0, url_index.history_item_count_); - EXPECT_TRUE(url_index.word_list_.empty()); - EXPECT_TRUE(url_index.word_map_.empty()); - EXPECT_TRUE(url_index.char_word_map_.empty()); - EXPECT_TRUE(url_index.word_id_history_map_.empty()); - EXPECT_TRUE(url_index.history_info_map_.empty()); + EXPECT_TRUE(private_data.word_list_.empty()); + EXPECT_TRUE(private_data.available_words_.empty()); + EXPECT_TRUE(private_data.word_map_.empty()); + EXPECT_TRUE(private_data.char_word_map_.empty()); + EXPECT_TRUE(private_data.word_id_history_map_.empty()); + EXPECT_TRUE(private_data.history_id_word_map_.empty()); + EXPECT_TRUE(private_data.history_info_map_.empty()); // Restore the cache. EXPECT_TRUE(url_index.RestorePrivateData(index_cache)); // Compare the restored and captured for equality. - EXPECT_EQ(history_item_count, url_index.history_item_count_); - EXPECT_EQ(word_list.size(), url_index.word_list_.size()); - EXPECT_EQ(word_map.size(), url_index.word_map_.size()); - EXPECT_EQ(char_word_map.size(), url_index.char_word_map_.size()); - EXPECT_EQ(word_id_history_map.size(), url_index.word_id_history_map_.size()); - EXPECT_EQ(history_info_map.size(), url_index.history_info_map_.size()); + EXPECT_EQ(word_list.size(), private_data.word_list_.size()); + EXPECT_EQ(word_map.size(), private_data.word_map_.size()); + EXPECT_EQ(char_word_map.size(), private_data.char_word_map_.size()); + EXPECT_EQ(word_id_history_map.size(), + private_data.word_id_history_map_.size()); + EXPECT_EQ(history_id_word_map.size(), + private_data.history_id_word_map_.size()); + EXPECT_EQ(history_info_map.size(), private_data.history_info_map_.size()); // WordList must be index-by-index equal. size_t count = word_list.size(); for (size_t i = 0; i < count; ++i) - EXPECT_EQ(word_list[i], url_index.word_list_[i]); - for (InMemoryURLIndex::CharWordIDMap::const_iterator expected = - char_word_map.begin(); expected != char_word_map.end(); ++expected) { - InMemoryURLIndex::CharWordIDMap::const_iterator actual = - url_index.char_word_map_.find(expected->first); - ASSERT_TRUE(url_index.char_word_map_.end() != actual); - const InMemoryURLIndex::WordIDSet& expected_set(expected->second); - const InMemoryURLIndex::WordIDSet& actual_set(actual->second); - ASSERT_EQ(expected_set.size(), actual_set.size()); - for (InMemoryURLIndex::WordIDSet::const_iterator set_iter = - expected_set.begin(); set_iter != expected_set.end(); ++set_iter) - EXPECT_GT(actual_set.count(*set_iter), 0U); - } - for (InMemoryURLIndex::WordIDHistoryMap::const_iterator expected = - word_id_history_map.begin(); expected != word_id_history_map.end(); - ++expected) { - InMemoryURLIndex::WordIDHistoryMap::const_iterator actual = - url_index.word_id_history_map_.find(expected->first); - ASSERT_TRUE(url_index.word_id_history_map_.end() != actual); - const InMemoryURLIndex::HistoryIDSet& expected_set(expected->second); - const InMemoryURLIndex::HistoryIDSet& actual_set(actual->second); - ASSERT_EQ(expected_set.size(), actual_set.size()); - for (InMemoryURLIndex::HistoryIDSet::const_iterator set_iter = - expected_set.begin(); set_iter != expected_set.end(); ++set_iter) - EXPECT_GT(actual_set.count(*set_iter), 0U); - } - for (InMemoryURLIndex::HistoryInfoMap::const_iterator expected = - history_info_map.begin(); expected != history_info_map.end(); - ++expected) { - InMemoryURLIndex::HistoryInfoMap::const_iterator actual = - url_index.history_info_map_.find(expected->first); - ASSERT_FALSE(url_index.history_info_map_.end() == actual); + EXPECT_EQ(word_list[i], private_data.word_list_[i]); + + ExpectMapOfContainersIdentical(char_word_map, + private_data.char_word_map_); + ExpectMapOfContainersIdentical(word_id_history_map, + private_data.word_id_history_map_); + ExpectMapOfContainersIdentical(history_id_word_map, + private_data.history_id_word_map_); + + for (HistoryInfoMap::const_iterator expected = history_info_map.begin(); + expected != history_info_map.end(); ++expected) { + HistoryInfoMap::const_iterator actual = + private_data.history_info_map_.find(expected->first); + ASSERT_FALSE(private_data.history_info_map_.end() == actual); const URLRow& expected_row(expected->second); const URLRow& actual_row(actual->second); EXPECT_EQ(expected_row.visit_count(), actual_row.visit_count()); diff --git a/chrome/browser/sync/profile_sync_service_typed_url_unittest.cc b/chrome/browser/sync/profile_sync_service_typed_url_unittest.cc index 925cd25..e1e46a0 100644 --- a/chrome/browser/sync/profile_sync_service_typed_url_unittest.cc +++ b/chrome/browser/sync/profile_sync_service_typed_url_unittest.cc @@ -80,7 +80,7 @@ using testing::WithArgs; class HistoryBackendMock : public HistoryBackend { public: - HistoryBackendMock() : HistoryBackend(FilePath(), 0, NULL, NULL) {} + HistoryBackendMock() : HistoryBackend(NULL, FilePath(), 0, NULL, NULL) {} MOCK_METHOD1(GetAllTypedURLs, bool(std::vector<history::URLRow>* entries)); MOCK_METHOD3(GetMostRecentVisitsForURL, bool(history::URLID id, int max_visits, diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index 9ce4001..de843c5 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -1327,6 +1327,8 @@ 'browser/history/in_memory_history_backend.h', 'browser/history/in_memory_url_index.cc', 'browser/history/in_memory_url_index.h', + 'browser/history/in_memory_url_index_types.cc', + 'browser/history/in_memory_url_index_types.h', 'browser/history/page_usage_data.cc', 'browser/history/page_usage_data.h', 'browser/history/query_parser.cc', diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi index 78f01c3..bd44c61 100644 --- a/chrome/chrome_tests.gypi +++ b/chrome/chrome_tests.gypi @@ -1333,6 +1333,7 @@ 'browser/history/history_unittest.cc', 'browser/history/history_unittest_base.cc', 'browser/history/history_unittest_base.h', + 'browser/history/in_memory_url_index_types_unittest.cc', 'browser/history/in_memory_url_index_unittest.cc', 'browser/history/query_parser_unittest.cc', 'browser/history/shortcuts_backend_unittest.cc', diff --git a/content/browser/cancelable_request.h b/content/browser/cancelable_request.h index 69080df..9a986d8 100644 --- a/content/browser/cancelable_request.h +++ b/content/browser/cancelable_request.h @@ -4,8 +4,8 @@ // CancelableRequestProviders and Consumers work together to make requests that // execute on a background thread in the provider and return data to the -// consumer. These class collaborate to keep a list of open requests and to -// make sure that requests to not outlive either of the objects involved in the +// consumer. These classes collaborate to keep a list of open requests and to +// make sure that requests do not outlive either of the objects involved in the // transaction. // // If you do not need to return data to the consumer, do not use this system, |