diff options
26 files changed, 987 insertions, 691 deletions
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc index 6dbb9a1..8b6618c 100644 --- a/chrome/browser/autocomplete/autocomplete.cc +++ b/chrome/browser/autocomplete/autocomplete.cc @@ -531,6 +531,8 @@ void AutocompleteProvider::Stop() { } void AutocompleteProvider::DeleteMatch(const AutocompleteMatch& match) { + DLOG(WARNING) << "The AutocompleteProvider '" << name() + << "' has not implemented DeleteMatch."; } AutocompleteProvider::~AutocompleteProvider() { diff --git a/chrome/browser/autocomplete/history_provider.cc b/chrome/browser/autocomplete/history_provider.cc index 9fe0754..8140aea 100644 --- a/chrome/browser/autocomplete/history_provider.cc +++ b/chrome/browser/autocomplete/history_provider.cc @@ -31,17 +31,16 @@ void HistoryProvider::DeleteMatch(const AutocompleteMatch& match) { profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); // Delete the match from the history DB. - GURL selected_url(match.destination_url); - if (!history_service || !selected_url.is_valid()) { - NOTREACHED() << "Can't delete requested URL"; - return; - } - history_service->DeleteURL(selected_url); + DCHECK(history_service); + DCHECK(match.destination_url.is_valid()); + history_service->DeleteURL(match.destination_url); + DeleteMatchFromMatches(match); +} - // Delete the match from the current set of matches. +void HistoryProvider::DeleteMatchFromMatches(const AutocompleteMatch& match) { bool found = false; for (ACMatches::iterator i(matches_.begin()); i != matches_.end(); ++i) { - if (i->destination_url == selected_url && i->type == match.type) { + if (i->destination_url == match.destination_url && i->type == match.type) { found = true; if (i->is_history_what_you_typed_match || i->starred) { // We can't get rid of What-You-Typed or Bookmarked matches, diff --git a/chrome/browser/autocomplete/history_provider.h b/chrome/browser/autocomplete/history_provider.h index 231a7b1..6cc0f80 100644 --- a/chrome/browser/autocomplete/history_provider.h +++ b/chrome/browser/autocomplete/history_provider.h @@ -54,6 +54,10 @@ class HistoryProvider : public AutocompleteProvider { // |input.prevent_inline_autocomplete()| is true, or the input text contains // trailing whitespace. bool PreventInlineAutocomplete(const AutocompleteInput& input); + + // Finds and removes the match from the current collection of matches and + // backing data. + void DeleteMatchFromMatches(const AutocompleteMatch& match); }; #endif // CHROME_BROWSER_AUTOCOMPLETE_HISTORY_PROVIDER_H_ diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc index ae42585..8b53a02 100644 --- a/chrome/browser/autocomplete/history_quick_provider.cc +++ b/chrome/browser/autocomplete/history_quick_provider.cc @@ -16,6 +16,7 @@ #include "base/utf_string_conversions.h" #include "chrome/browser/history/history.h" #include "chrome/browser/history/in_memory_url_index.h" +#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/net/url_fixer_upper.h" #include "chrome/browser/prefs/pref_service.h" #include "chrome/browser/profiles/profile.h" @@ -81,8 +82,7 @@ void HistoryQuickProvider::Start(const AutocompleteInput& input, } } -// HistoryQuickProvider matches are currently not deletable. -// TODO(mrossetti): Determine when a match should be deletable. +// TODO(mrossetti): Implement this function. (Will happen in next CL.) void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) {} void HistoryQuickProvider::DoAutocomplete() { @@ -90,8 +90,8 @@ void HistoryQuickProvider::DoAutocomplete() { string16 term_string = autocomplete_input_.text(); term_string = net::UnescapeURLComponent(term_string, UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); - history::InMemoryURLIndex::String16Vector terms( - InMemoryURLIndex::WordVectorFromString16(term_string, false)); + history::String16Vector terms( + history::String16VectorFromString16(term_string, false)); ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms(terms); if (matches.empty()) return; @@ -131,13 +131,12 @@ AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch( // Format the URL autocomplete presentation. std::vector<size_t> offsets = - InMemoryURLIndex::OffsetsFromTermMatches(history_match.url_matches); + history::OffsetsFromTermMatches(history_match.url_matches); match.contents = net::FormatUrlWithOffsets(info.url(), languages_, net::kFormatUrlOmitAll, UnescapeRule::SPACES, NULL, NULL, &offsets); history::TermMatches new_matches = - InMemoryURLIndex::ReplaceOffsetsInTermMatches(history_match.url_matches, - offsets); + ReplaceOffsetsInTermMatches(history_match.url_matches, offsets); match.contents_class = SpansFromTermMatch(new_matches, match.contents.length(), true); match.fill_into_edit = match.contents; @@ -170,7 +169,8 @@ history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() { return history_service->InMemoryIndex(); } -void HistoryQuickProvider::SetIndexForTesting( +// TODO(mrossetti): This will be inlined in the next CL. +void HistoryQuickProvider::set_index( history::InMemoryURLIndex* index) { DCHECK(index); index_for_testing_.reset(index); diff --git a/chrome/browser/autocomplete/history_quick_provider.h b/chrome/browser/autocomplete/history_quick_provider.h index 8264669..4beac05 100644 --- a/chrome/browser/autocomplete/history_quick_provider.h +++ b/chrome/browser/autocomplete/history_quick_provider.h @@ -37,9 +37,6 @@ class HistoryQuickProvider : public HistoryProvider { virtual void DeleteMatch(const AutocompleteMatch& match) OVERRIDE; - // Performs the autocomplete matching and scoring. - void DoAutocomplete(); - // Disable this provider. For unit testing purposes only. This is required // because this provider is closely associated with the HistoryURLProvider // and in order to properly test the latter the HistoryQuickProvider must @@ -52,6 +49,9 @@ class HistoryQuickProvider : public HistoryProvider { FRIEND_TEST_ALL_PREFIXES(HistoryQuickProviderTest, Spans); FRIEND_TEST_ALL_PREFIXES(HistoryQuickProviderTest, Relevance); + // Performs the autocomplete matching and scoring. + void DoAutocomplete(); + // Creates an AutocompleteMatch from |history_match|. |max_match_score| gives // the maximum possible score for the match. |history_matches| is the full set // of matches to compare each match to when calculating confidence. @@ -81,7 +81,8 @@ class HistoryQuickProvider : public HistoryProvider { bool is_url); // Only for use in unittests. Takes ownership of |index|. - void SetIndexForTesting(history::InMemoryURLIndex* index); + void set_index(history::InMemoryURLIndex* index); + AutocompleteInput autocomplete_input_; std::string languages_; diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc index f936860..65b7654 100644 --- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc +++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc @@ -142,6 +142,13 @@ void HistoryQuickProviderTest::OnProviderUpdate(bool updated_matches) { void HistoryQuickProviderTest::FillData() { history::URLDatabase* db = history_service_->InMemoryDatabase(); ASSERT_TRUE(db != NULL); + + history::InMemoryURLIndex* index = + new history::InMemoryURLIndex(profile_.get(), + FilePath(FILE_PATH_LITERAL("/dummy"))); + PrefService* prefs = profile_->GetPrefs(); + std::string languages(prefs->GetString(prefs::kAcceptLanguages)); + index->Init(db, languages); for (size_t i = 0; i < arraysize(quick_test_db); ++i) { const TestURLInfo& cur = quick_test_db[i]; const GURL current_url(cur.url); @@ -153,20 +160,11 @@ void HistoryQuickProviderTest::FillData() { url_info.set_typed_count(cur.typed_count); url_info.set_last_visit(visit_time); url_info.set_hidden(false); - EXPECT_TRUE(db->AddURL(url_info)); - - history_service_->AddPageWithDetails(current_url, UTF8ToUTF16(cur.title), - cur.visit_count, cur.typed_count, - visit_time, false, - history::SOURCE_BROWSED); + url_info.set_id(i); + index->UpdateURL(url_info); } - history::InMemoryURLIndex* index = - new history::InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))); - PrefService* prefs = profile_->GetPrefs(); - std::string languages(prefs->GetString(prefs::kAcceptLanguages)); - index->Init(db, languages); - provider_->SetIndexForTesting(index); + provider_->set_index(index); } HistoryQuickProviderTest::SetShouldContain::SetShouldContain( diff --git a/chrome/browser/history/expire_history_backend.cc b/chrome/browser/history/expire_history_backend.cc index 94ed1ff..904503a 100644 --- a/chrome/browser/history/expire_history_backend.cc +++ b/chrome/browser/history/expire_history_backend.cc @@ -341,11 +341,9 @@ void ExpireHistoryBackend::BroadcastDeleteNotifications( // determine if they care whether anything was deleted). URLsDeletedDetails* deleted_details = new URLsDeletedDetails; deleted_details->all_history = false; - std::vector<URLRow> typed_urls_changed; // Collect this for later. for (size_t i = 0; i < dependencies->deleted_urls.size(); i++) { + deleted_details->rows.push_back(dependencies->deleted_urls[i]); deleted_details->urls.insert(dependencies->deleted_urls[i].url()); - if (dependencies->deleted_urls[i].typed_count() > 0) - typed_urls_changed.push_back(dependencies->deleted_urls[i]); } delegate_->BroadcastNotifications( chrome::NOTIFICATION_HISTORY_URLS_DELETED, deleted_details); diff --git a/chrome/browser/history/history.cc b/chrome/browser/history/history.cc index 0465e8d..9451fb5 100644 --- a/chrome/browser/history/history.cc +++ b/chrome/browser/history/history.cc @@ -37,6 +37,7 @@ #include "chrome/browser/history/history_types.h" #include "chrome/browser/history/in_memory_database.h" #include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/in_memory_url_index.h" #include "chrome/browser/history/top_sites.h" #include "chrome/browser/prefs/pref_service.h" #include "chrome/browser/profiles/profile.h" @@ -182,6 +183,9 @@ void HistoryService::UnloadBackend() { // Get rid of the in-memory backend. in_memory_backend_.reset(); + // Give the InMemoryURLIndex a chance to shutdown. + InMemoryIndex()->ShutDown(); + // The backend's destructor must run on the history thread since it is not // threadsafe. So this thread must not be the last thread holding a reference // to the backend, or a crash could happen. @@ -249,8 +253,8 @@ history::InMemoryURLIndex* HistoryService::InMemoryIndex() { // LoadBackendIfNecessary() here even though it won't affect the return value // for this call. LoadBackendIfNecessary(); - if (in_memory_backend_.get()) - return in_memory_backend_->InMemoryIndex(); + if (history_backend_.get()) + return history_backend_->InMemoryIndex(); return NULL; } @@ -803,7 +807,8 @@ void HistoryService::LoadBackendIfNecessary() { ++current_backend_id_; scoped_refptr<HistoryBackend> backend( - new HistoryBackend(history_dir_, + new HistoryBackend(profile_, + history_dir_, current_backend_id_, new BackendDelegate(this, profile_), bookmark_service_)); diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc index bee9c8c..30ca8c7 100644 --- a/chrome/browser/history/history_backend.cc +++ b/chrome/browser/history/history_backend.cc @@ -23,10 +23,13 @@ #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/history_publisher.h" #include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/in_memory_url_index.h" #include "chrome/browser/history/page_usage_data.h" #include "chrome/browser/history/top_sites.h" +#include "chrome/browser/profiles/profile.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_notification_types.h" +#include "chrome/common/chrome_switches.h" #include "chrome/common/url_constants.h" #include "content/browser/cancelable_request.h" #include "content/browser/download/download_persistent_store_info.h" @@ -199,7 +202,8 @@ class HistoryBackend::URLQuerier { // HistoryBackend -------------------------------------------------------------- -HistoryBackend::HistoryBackend(const FilePath& history_dir, +HistoryBackend::HistoryBackend(Profile* profile, + const FilePath& history_dir, int id, Delegate* delegate, BookmarkService* bookmark_service) @@ -212,6 +216,9 @@ HistoryBackend::HistoryBackend(const FilePath& history_dir, backend_destroy_task_(NULL), segment_queried_(false), bookmark_service_(bookmark_service) { + if (!CommandLine::ForCurrentProcess()->HasSwitch( + switches::kDisableHistoryQuickProvider)) + in_memory_url_index_.reset(new InMemoryURLIndex(profile, history_dir_)); } HistoryBackend::~HistoryBackend() { @@ -651,6 +658,9 @@ void HistoryBackend::InitImpl(const std::string& languages) { archived_db_.reset(); } + if (in_memory_url_index_.get()) + in_memory_url_index_->Init(db_.get(), languages); + // Tell the expiration module about all the nice databases we made. This must // happen before db_->Init() is called since the callback ForceArchiveHistory // may need to expire stuff. @@ -798,7 +808,6 @@ void HistoryBackend::AddPagesWithDetails(const std::vector<URLRow>& urls, NOTREACHED() << "Could not add row to DB"; return; } - if (i->typed_count() > 0) modified->changed_urls.push_back(*i); } @@ -885,6 +894,7 @@ void HistoryBackend::SetPageTitle(const GURL& url, if (row_id && row.title() != title) { row.set_title(title); db_->UpdateURLRow(row_id, row); + row.id_ = row_id; changed_urls.push_back(row); if (row.typed_count() > 0) typed_url_changed = true; diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h index 9936782..6c93070 100644 --- a/chrome/browser/history/history_backend.h +++ b/chrome/browser/history/history_backend.h @@ -27,6 +27,7 @@ class BookmarkService; struct DownloadPersistentStoreInfo; +class Profile; class TestingProfile; struct ThumbnailScore; @@ -104,7 +105,8 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // may be NULL. // // This constructor is fast and does no I/O, so can be called at any time. - HistoryBackend(const FilePath& history_dir, + HistoryBackend(Profile* profile, + const FilePath& history_dir, int id, Delegate* delegate, BookmarkService* bookmark_service); @@ -113,9 +115,9 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // fails, all other functions will fail as well. (Since this runs on another // thread, we don't bother returning failure.) // - // |languages| gives a list of language encodings with which the history - // URLs and omnibox searches are interpreted. - // |force_fail| can be set during unittests to unconditionally fail to init. + // |languages| gives the languages used to break search terms and history + // page titles into separate words. |force_fail| can be set during unittests + // to unconditionally fail to init. void Init(const std::string& languages, bool force_fail); // Notification that the history system is shutting down. This will break @@ -257,6 +259,13 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, const base::Time remove_end); void RemoveDownloads(const base::Time remove_end); + // InMemoryURLIndex ---------------------------------------------------------- + + // Returns the quick history index. + history::InMemoryURLIndex* InMemoryIndex() const { + return in_memory_url_index_.get(); + } + // Segment usage ------------------------------------------------------------- void QuerySegmentUsage(scoped_refptr<QuerySegmentUsageRequest> request, @@ -569,6 +578,9 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // created. scoped_ptr<TextDatabaseManager> text_database_; + // The index used for quick history lookups. + scoped_ptr<history::InMemoryURLIndex> in_memory_url_index_; + // Manages expiration between the various databases. ExpireHistoryBackend expirer_; diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc index 2443d8c1..f7dac5c 100644 --- a/chrome/browser/history/history_backend_unittest.cc +++ b/chrome/browser/history/history_backend_unittest.cc @@ -19,6 +19,7 @@ #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/in_memory_database.h" #include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/in_memory_url_index.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_paths.h" #include "chrome/common/thumbnail_score.h" @@ -146,7 +147,6 @@ class HistoryBackendTest : public testing::Test { BookmarkModel bookmark_model_; - protected: bool loaded_; private: @@ -157,7 +157,8 @@ class HistoryBackendTest : public testing::Test { if (!file_util::CreateNewTempDirectory(FILE_PATH_LITERAL("BackendTest"), &test_dir_)) return; - backend_ = new HistoryBackend(test_dir_, + backend_ = new HistoryBackend(NULL, + test_dir_, 0, new HistoryBackendTestDelegate(this), &bookmark_model_); @@ -955,7 +956,8 @@ TEST_F(HistoryBackendTest, MigrationVisitSource) { FilePath new_history_file = new_history_path.Append(chrome::kHistoryFilename); ASSERT_TRUE(file_util::CopyFile(old_history_path, new_history_file)); - backend_ = new HistoryBackend(new_history_path, + backend_ = new HistoryBackend(NULL, + new_history_path, 0, new HistoryBackendTestDelegate(this), &bookmark_model_); diff --git a/chrome/browser/history/history_notifications.h b/chrome/browser/history/history_notifications.h index ab9d555..a298536 100644 --- a/chrome/browser/history/history_notifications.h +++ b/chrome/browser/history/history_notifications.h @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -25,7 +25,7 @@ struct HistoryDetails { virtual ~HistoryDetails() {} }; -// Details for HISTORY_URL_VISITED. +// Details for NOTIFICATION_HISTORY_URL_VISITED. struct URLVisitedDetails : public HistoryDetails { URLVisitedDetails(); virtual ~URLVisitedDetails(); @@ -40,7 +40,7 @@ struct URLVisitedDetails : public HistoryDetails { history::RedirectList redirects; }; -// Details for NOTIFY_HISTORY_TYPED_URLS_MODIFIED. +// Details for NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED. struct URLsModifiedDetails : public HistoryDetails { URLsModifiedDetails(); virtual ~URLsModifiedDetails(); @@ -49,7 +49,7 @@ struct URLsModifiedDetails : public HistoryDetails { std::vector<URLRow> changed_urls; }; -// Details for NOTIFY_HISTORY_URLS_DELETED. +// Details for NOTIFICATION_HISTORY_URLS_DELETED. struct URLsDeletedDetails : public HistoryDetails { URLsDeletedDetails(); virtual ~URLsDeletedDetails(); @@ -57,9 +57,14 @@ struct URLsDeletedDetails : public HistoryDetails { // Set when all history was deleted. False means just a subset was deleted. bool all_history; + // The URLRows which have been deleted. + std::vector<URLRow> rows; + // The list of unique URLs affected. This is valid only when a subset of // history is deleted. When all of it is deleted, this will be empty, since - // we do not bother to list all URLs. + // we do not bother to list all URLs. (This information can be gleaned from + // |rows| but, since there are several clients who need the set, we pre-build + // it so that the clients don't have to.) std::set<GURL> urls; }; diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h index 6d13264..6898d8c 100644 --- a/chrome/browser/history/history_types.h +++ b/chrome/browser/history/history_types.h @@ -74,6 +74,7 @@ class URLRow { URLRow& operator=(const URLRow& other); URLID id() const { return id_; } + void set_id(URLID id) { id_ = id; } // Note: Use for unittesting only. const GURL& url() const { return url_; } const string16& title() const { diff --git a/chrome/browser/history/history_unittest.cc b/chrome/browser/history/history_unittest.cc index d5ce21a..d995d41 100644 --- a/chrome/browser/history/history_unittest.cc +++ b/chrome/browser/history/history_unittest.cc @@ -119,8 +119,8 @@ class HistoryTest : public testing::Test { // Creates the HistoryBackend and HistoryDatabase on the current thread, // assigning the values to backend_ and db_. void CreateBackendAndDatabase() { - backend_ = - new HistoryBackend(history_dir_, 0, new BackendDelegate(this), NULL); + backend_ = new HistoryBackend(NULL, history_dir_, 0, + new BackendDelegate(this), NULL); backend_->Init(std::string(), false); db_ = backend_->db_.get(); DCHECK(in_mem_backend_.get()) << "Mem backend should have been set by " diff --git a/chrome/browser/history/in_memory_history_backend.cc b/chrome/browser/history/in_memory_history_backend.cc index 6652b1a..073fc8d 100644 --- a/chrome/browser/history/in_memory_history_backend.cc +++ b/chrome/browser/history/in_memory_history_backend.cc @@ -13,11 +13,9 @@ #include "chrome/browser/browser_process.h" #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/in_memory_database.h" -#include "chrome/browser/history/in_memory_url_index.h" #include "chrome/browser/history/url_database.h" #include "chrome/browser/profiles/profile.h" #include "chrome/common/chrome_notification_types.h" -#include "chrome/common/chrome_switches.h" #include "content/common/notification_details.h" #include "content/common/notification_source.h" @@ -27,23 +25,14 @@ InMemoryHistoryBackend::InMemoryHistoryBackend() : profile_(NULL) { } -InMemoryHistoryBackend::~InMemoryHistoryBackend() { - if (index_.get()) - index_->ShutDown(); -} +InMemoryHistoryBackend::~InMemoryHistoryBackend() {} bool InMemoryHistoryBackend::Init(const FilePath& history_filename, const FilePath& history_dir, URLDatabase* db, const std::string& languages) { db_.reset(new InMemoryDatabase); - bool success = db_->InitFromDisk(history_filename); - if (!CommandLine::ForCurrentProcess()->HasSwitch( - switches::kDisableHistoryQuickProvider)) { - index_.reset(new InMemoryURLIndex(history_dir)); - index_->Init(db, languages); - } - return success; + return db_->InitFromDisk(history_filename); } void InMemoryHistoryBackend::AttachToHistoryService(Profile* profile) { @@ -129,36 +118,25 @@ void InMemoryHistoryBackend::OnTypedURLsModified( db_->UpdateURLRow(id, *i); else id = db_->AddURL(*i); - if (index_.get()) - index_->UpdateURL(id, *i); } } void InMemoryHistoryBackend::OnURLsDeleted(const URLsDeletedDetails& details) { - DCHECK(db_.get()); - if (details.all_history) { // When all history is deleted, the individual URLs won't be listed. Just // create a new database to quickly clear everything out. db_.reset(new InMemoryDatabase); if (!db_->InitFromScratch()) db_.reset(); - if (index_.get()) - index_->ReloadFromHistory(db_.get(), true); return; } // Delete all matching URLs in our database. - for (std::set<GURL>::const_iterator i = details.urls.begin(); - i != details.urls.end(); ++i) { - URLID id = db_->GetRowForURL(*i, NULL); - if (id) { - // We typically won't have most of them since we only have a subset of - // history, so ignore errors. - db_->DeleteURLRow(id); - if (index_.get()) - index_->DeleteURL(id); - } + for (std::vector<URLRow>::const_iterator row = details.rows.begin(); + row != details.rows.end(); ++row) { + // We typically won't have most of them since we only have a subset of + // history, so ignore errors. + db_->DeleteURLRow(row->id()); } } diff --git a/chrome/browser/history/in_memory_history_backend.h b/chrome/browser/history/in_memory_history_backend.h index 13bf030..bd22d6d 100644 --- a/chrome/browser/history/in_memory_history_backend.h +++ b/chrome/browser/history/in_memory_history_backend.h @@ -70,9 +70,6 @@ class InMemoryHistoryBackend : public NotificationObserver { const NotificationSource& source, const NotificationDetails& details); - // Return the quick history index. - history::InMemoryURLIndex* InMemoryIndex() const { return index_.get(); } - private: FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, DeleteAll); @@ -96,9 +93,6 @@ class InMemoryHistoryBackend : public NotificationObserver { // initialization. Profile* profile_; - // The index used for quick history lookups. - scoped_ptr<history::InMemoryURLIndex> index_; - DISALLOW_COPY_AND_ASSIGN(InMemoryHistoryBackend); }; diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc index c557acb..344995c 100644 --- a/chrome/browser/history/in_memory_url_index.cc +++ b/chrome/browser/history/in_memory_url_index.cc @@ -11,18 +11,20 @@ #include <numeric> #include "base/file_util.h" -#include "base/i18n/break_iterator.h" #include "base/i18n/case_conversion.h" #include "base/metrics/histogram.h" -#include "base/string_util.h" #include "base/threading/thread_restrictions.h" #include "base/time.h" #include "base/utf_string_conversions.h" #include "chrome/browser/autocomplete/autocomplete.h" #include "chrome/browser/autocomplete/history_provider_util.h" +#include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/url_database.h" #include "chrome/browser/profiles/profile.h" +#include "chrome/common/chrome_notification_types.h" #include "chrome/common/url_constants.h" +#include "content/common/notification_details.h" +#include "content/common/notification_source.h" #include "googleurl/src/url_parse.h" #include "googleurl/src/url_util.h" #include "net/base/escape.h" @@ -59,23 +61,6 @@ const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; // each of these scores for each factor. const int kScoreRank[] = { 1425, 1200, 900, 400 }; -ScoredHistoryMatch::ScoredHistoryMatch() - : raw_score(0), - can_inline(false) {} - -ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) - : HistoryMatch(url_info, 0, false, false), - raw_score(0), - can_inline(false) {} - -ScoredHistoryMatch::~ScoredHistoryMatch() {} - -// Comparison function for sorting ScoredMatches by their scores. -bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, - const ScoredHistoryMatch& m2) { - return m1.raw_score >= m2.raw_score; -} - InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( const WordIDSet& word_id_set, const HistoryIDSet& history_id_set) @@ -88,11 +73,6 @@ InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} -// Comparison function for sorting TermMatches by their offsets. -bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { - return m1.offset < m2.offset; -} - // Comparison function for sorting search terms by descending length. bool LengthGreater(const string16& string_a, const string16& string_b) { return string_a.length() > string_b.length(); @@ -135,15 +115,23 @@ int ScoreForValue(int value, const int* value_ranks) { return score; } -InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) +InMemoryURLIndex::InMemoryURLIndex(Profile* profile, + const FilePath& history_dir) : history_dir_(history_dir), - history_item_count_(0) { + private_data_(new URLIndexPrivateData) { InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); + if (profile) { + Source<Profile> source(profile); + registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source); + registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED, + source); + registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source); + } } // Called only by unit tests. InMemoryURLIndex::InMemoryURLIndex() - : history_item_count_(0) { + : private_data_(new URLIndexPrivateData) { InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); } @@ -164,7 +152,7 @@ void InMemoryURLIndex::InitializeSchemeWhitelist( // Indexing -bool InMemoryURLIndex::Init(history::URLDatabase* history_db, +bool InMemoryURLIndex::Init(URLDatabase* history_db, const std::string& languages) { // TODO(mrossetti): Register for profile/language change notifications. languages_ = languages; @@ -172,10 +160,52 @@ bool InMemoryURLIndex::Init(history::URLDatabase* history_db, } void InMemoryURLIndex::ShutDown() { + registrar_.RemoveAll(); // Write our cache. SaveToCacheFile(); } +void InMemoryURLIndex::Observe(int type, + const NotificationSource& source, + const NotificationDetails& details) { + switch (type) { + case chrome::NOTIFICATION_HISTORY_URL_VISITED: + OnURLVisited(Details<URLVisitedDetails>(details).ptr()); + break; + case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED: + OnURLsModified(Details<history::URLsModifiedDetails>(details).ptr()); + break; + case chrome::NOTIFICATION_HISTORY_URLS_DELETED: + OnURLsDeleted(Details<history::URLsDeletedDetails>(details).ptr()); + break; + default: + // For simplicity, the unit tests send us all notifications, even when + // we haven't registered for them, so don't assert here. + break; + } +} + +void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) { + UpdateURL(details->row); +} + +void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) { + for (std::vector<history::URLRow>::const_iterator row = + details->changed_urls.begin(); + row != details->changed_urls.end(); ++row) + UpdateURL(*row); +} + +void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) { + if (details->all_history) { + ClearPrivateData(); + } else { + for (std::vector<URLRow>::const_iterator row = details->rows.begin(); + row != details->rows.end(); ++row) + DeleteURL(*row); + } +} + bool InMemoryURLIndex::IndexRow(const URLRow& row) { const GURL& gurl(row.url()); @@ -183,26 +213,39 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) { if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) return true; + URLID row_id = row.id(); + // Strip out username and password before saving and indexing. string16 url(net::FormatUrl(gurl, languages_, net::kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, NULL, NULL, NULL)); - - HistoryID history_id = static_cast<HistoryID>(row.id()); - DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max()); + HistoryID history_id = static_cast<HistoryID>(row_id); + DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); // Add the row for quick lookup in the history info store. - URLRow new_row(GURL(url), row.id()); + URLRow new_row(GURL(url), row_id); new_row.set_visit_count(row.visit_count()); new_row.set_typed_count(row.typed_count()); new_row.set_last_visit(row.last_visit()); new_row.set_title(row.title()); - history_info_map_[history_id] = new_row; + private_data_->history_info_map_[history_id] = new_row; + // Index the words contained in the URL and title of the row. + AddRowWordsToIndex(new_row); + return true; +} + +void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) { + HistoryID history_id = static_cast<HistoryID>(row.id()); // Split URL into individual, unique words then add in the title words. + const GURL& gurl(row.url()); + string16 url(net::FormatUrl(gurl, languages_, + net::kFormatUrlOmitUsernamePassword, + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, + NULL, NULL, NULL)); url = base::i18n::ToLower(url); - String16Set url_words = WordSetFromString16(url); - String16Set title_words = WordSetFromString16(row.title()); + String16Set url_words = String16SetFromString16(url); + String16Set title_words = String16SetFromString16(row.title()); String16Set words; std::set_union(url_words.begin(), url_words.end(), title_words.begin(), title_words.end(), @@ -211,8 +254,48 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) { word_iter != words.end(); ++word_iter) AddWordToIndex(*word_iter, history_id); - ++history_item_count_; - return true; + search_term_cache_.clear(); // Invalidate the term cache. +} + +void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) { + RemoveRowWordsFromIndex(row); + HistoryID history_id = static_cast<HistoryID>(row.id()); + private_data_->history_info_map_.erase(history_id); +} + +void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) { + // Remove the entries in history_id_word_map_ and word_id_history_map_ for + // this row. + URLIndexPrivateData& private_data(*(private_data_.get())); + HistoryID history_id = static_cast<HistoryID>(row.id()); + WordIDSet word_id_set = private_data.history_id_word_map_[history_id]; + private_data.history_id_word_map_.erase(history_id); + + // Reconcile any changes to word usage. + for (WordIDSet::iterator word_id_iter = word_id_set.begin(); + word_id_iter != word_id_set.end(); ++word_id_iter) { + WordID word_id = *word_id_iter; + private_data.word_id_history_map_[word_id].erase(history_id); + if (!private_data.word_id_history_map_[word_id].empty()) + continue; // The word is still in use. + + // The word is no longer in use. Reconcile any changes to character usage. + string16 word = private_data.word_list_[word_id]; + Char16Set characters = Char16SetFromString16(word); + for (Char16Set::iterator uni_char_iter = characters.begin(); + uni_char_iter != characters.end(); ++uni_char_iter) { + char16 uni_char = *uni_char_iter; + private_data.char_word_map_[uni_char].erase(word_id); + if (private_data.char_word_map_[uni_char].empty()) + private_data.char_word_map_.erase(uni_char); // No longer in use. + } + + // Complete the removal of references to the word. + private_data.word_id_history_map_.erase(word_id); + private_data.word_map_.erase(word); + private_data.word_list_[word_id] = string16(); + private_data.available_words_.insert(word_id); + } } bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, @@ -241,12 +324,7 @@ bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, } void InMemoryURLIndex::ClearPrivateData() { - history_item_count_ = 0; - word_list_.clear(); - word_map_.clear(); - char_word_map_.clear(); - word_id_history_map_.clear(); - history_info_map_.clear(); + private_data_->Clear(); search_term_cache_.clear(); } @@ -282,10 +360,13 @@ bool InMemoryURLIndex::RestoreFromCacheFile() { UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", base::TimeTicks::Now() - beginning_time); - UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); + UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", + private_data_->history_id_word_map_.size()); UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", + private_data_->word_map_.size()); + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", + private_data_->char_word_map_.size()); return true; } @@ -317,13 +398,14 @@ bool InMemoryURLIndex::SaveToCacheFile() { return true; } -void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { +void InMemoryURLIndex::UpdateURL(const URLRow& row) { // The row may or may not already be in our index. If it is not already // indexed and it qualifies then it gets indexed. If it is already // indexed and still qualifies then it gets updated, otherwise it // is deleted from the index. - HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); - if (row_pos == history_info_map_.end()) { + HistoryInfoMap::iterator row_pos = + private_data_->history_info_map_.find(row.id()); + if (row_pos == private_data_->history_info_map_.end()) { // This new row should be indexed if it qualifies. if (RowQualifiesAsSignificant(row, base::Time())) IndexRow(row); @@ -335,25 +417,26 @@ void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { old_row.set_visit_count(row.visit_count()); old_row.set_typed_count(row.typed_count()); old_row.set_last_visit(row.last_visit()); - // TODO(mrossetti): When we start indexing the title the next line - // will need attention. - old_row.set_title(row.title()); + // While the URL is guaranteed to remain stable, the title may have changed. + // If so, then we need to update the index with the changed words. + if (old_row.title() != row.title()) { + // Clear all words associated with this row and re-index both the + // URL and title. + RemoveRowWordsFromIndex(row); + old_row.set_title(row.title()); + AddRowWordsToIndex(old_row); + } } else { - // This indexed row no longer qualifies and will be de-indexed. - history_info_map_.erase(row_id); + // This indexed row no longer qualifies and will be de-indexed by + // clearing all words associated with this row. + RemoveRowFromIndex(row); } // This invalidates the cache. search_term_cache_.clear(); - // TODO(mrossetti): Record this transaction in the cache. } -void InMemoryURLIndex::DeleteURL(URLID row_id) { - // Note that this does not remove any reference to this row from the - // word_id_history_map_. That map will continue to contain (and return) - // hits against this row until that map is rebuilt, but since the - // history_info_map_ no longer references the row no erroneous results - // will propagate to the user. - history_info_map_.erase(row_id); +void InMemoryURLIndex::DeleteURL(const URLRow& row) { + RemoveRowFromIndex(row); // This invalidates the word cache. search_term_cache_.clear(); // TODO(mrossetti): Record this transaction in the cache. @@ -364,6 +447,12 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) { ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( const String16Vector& terms) { ScoredHistoryMatches scored_items; + + // Do nothing if we have indexed no words (probably because we've not been + // initialized yet). + if (private_data_->word_list_.empty()) + return scored_items; + if (!terms.empty()) { // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep // approach. @@ -421,7 +510,7 @@ void InMemoryURLIndex::ResetSearchTermCache() { iter->second.used_ = false; } -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( +HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( const string16& uni_string) { // Break the terms down into individual terms (words), get the candidate // set for each term, and intersect each to get a final candidate list. @@ -429,7 +518,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( // a string like "http://www.somewebsite.com" which, from our perspective, // is four words: 'http', 'www', 'somewebsite', and 'com'. HistoryIDSet history_id_set; - String16Vector terms = WordVectorFromString16(uni_string, true); + String16Vector terms = String16VectorFromString16(uni_string, true); // Sort the terms into the longest first as such are likely to narrow down // the results quicker. Also, single character terms are the most expensive // to process so save them for last. @@ -456,7 +545,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( return history_id_set; } -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( +HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( const string16& term) { if (term.empty()) return HistoryIDSet(); @@ -466,7 +555,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // occuring words in the user's searches. size_t term_length = term.length(); - InMemoryURLIndex::WordIDSet word_id_set; + WordIDSet word_id_set; if (term_length > 1) { // See if this term or a prefix thereof is present in the cache. SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); @@ -512,7 +601,8 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // Reduce the word set with any leftover, unprocessed characters. if (!unique_chars.empty()) { - WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); + WordIDSet leftover_set( + private_data_->WordIDSetForTermChars(unique_chars)); // We might come up empty on the leftovers. if (leftover_set.empty()) { search_term_cache_[term] = SearchTermCacheItem(); @@ -535,13 +625,15 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // contains words which do not have the search term as a proper subset. for (WordIDSet::iterator word_set_iter = word_id_set.begin(); word_set_iter != word_id_set.end(); ) { - if (word_list_[*word_set_iter].find(term) == string16::npos) + if (private_data_->word_list_[*word_set_iter].find(term) == + string16::npos) word_id_set.erase(word_set_iter++); else ++word_set_iter; } } else { - word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); + word_id_set = + private_data_->WordIDSetForTermChars(Char16SetFromString16(term)); } // If any words resulted then we can compose a set of history IDs by unioning @@ -551,8 +643,9 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( for (WordIDSet::iterator word_id_iter = word_id_set.begin(); word_id_iter != word_id_set.end(); ++word_id_iter) { WordID word_id = *word_id_iter; - WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); - if (word_iter != word_id_history_map_.end()) { + WordIDHistoryMap::iterator word_iter = + private_data_->word_id_history_map_.find(word_id); + if (word_iter != private_data_->word_id_history_map_.end()) { HistoryIDSet& word_history_id_set(word_iter->second); history_id_set.insert(word_history_id_set.begin(), word_history_id_set.end()); @@ -570,85 +663,53 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // Utility Functions -// static -InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( - const string16& uni_string) { - const size_t kMaxWordLength = 64; - String16Vector words = WordVectorFromString16(uni_string, false); - String16Set word_set; - for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); - ++iter) - word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); - return word_set; -} - -// static -InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16( - const string16& uni_string, - bool break_on_space) { - base::i18n::BreakIterator iter( - uni_string, - break_on_space ? base::i18n::BreakIterator::BREAK_SPACE - : base::i18n::BreakIterator::BREAK_WORD); - String16Vector words; - if (!iter.Init()) - return words; - while (iter.Advance()) { - if (break_on_space || iter.IsWord()) { - string16 word = iter.GetString(); - if (break_on_space) - TrimWhitespace(word, TRIM_ALL, &word); - if (!word.empty()) - words.push_back(word); - } - } - return words; -} - -// static -InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( - const string16& term) { - Char16Set characters; - for (string16::const_iterator iter = term.begin(); iter != term.end(); - ++iter) - characters.insert(*iter); - return characters; -} - void InMemoryURLIndex::AddWordToIndex(const string16& term, HistoryID history_id) { - WordMap::iterator word_pos = word_map_.find(term); - if (word_pos != word_map_.end()) + WordMap::iterator word_pos = private_data_->word_map_.find(term); + if (word_pos != private_data_->word_map_.end()) UpdateWordHistory(word_pos->second, history_id); else AddWordHistory(term, history_id); } void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { - WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); - DCHECK(history_pos != word_id_history_map_.end()); - HistoryIDSet& history_id_set(history_pos->second); - history_id_set.insert(history_id); + WordIDHistoryMap::iterator history_pos = + private_data_->word_id_history_map_.find(word_id); + DCHECK(history_pos != private_data_->word_id_history_map_.end()); + HistoryIDSet& history_id_set(history_pos->second); + history_id_set.insert(history_id); + private_data_->AddToHistoryIDWordMap(history_id, word_id); } // Add a new word to the word list and the word map, and then create a // new entry in the word/history map. void InMemoryURLIndex::AddWordHistory(const string16& term, HistoryID history_id) { - word_list_.push_back(term); - WordID word_id = word_list_.size() - 1; - word_map_[term] = word_id; + URLIndexPrivateData& private_data(*(private_data_.get())); + WordID word_id = private_data.word_list_.size(); + if (private_data.available_words_.empty()) { + private_data.word_list_.push_back(term); + } else { + word_id = *(private_data.available_words_.begin()); + private_data.word_list_[word_id] = term; + private_data.available_words_.erase(word_id); + } + private_data.word_map_[term] = word_id; + HistoryIDSet history_id_set; history_id_set.insert(history_id); - word_id_history_map_[word_id] = history_id_set; + private_data.word_id_history_map_[word_id] = history_id_set; + private_data_->AddToHistoryIDWordMap(history_id, word_id); + // For each character in the newly added word (i.e. a word that is not // already in the word index), add the word to the character index. Char16Set characters = Char16SetFromString16(term); for (Char16Set::iterator uni_char_iter = characters.begin(); uni_char_iter != characters.end(); ++uni_char_iter) { char16 uni_char = *uni_char_iter; - CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); - if (char_iter != char_word_map_.end()) { + CharWordIDMap::iterator char_iter = + private_data.char_word_map_.find(uni_char); + if (char_iter != private_data.char_word_map_.end()) { // Update existing entry in the char/word index. WordIDSet& word_id_set(char_iter->second); word_id_set.insert(word_id); @@ -656,108 +717,13 @@ void InMemoryURLIndex::AddWordHistory(const string16& term, // Create a new entry in the char/word index. WordIDSet word_id_set; word_id_set.insert(word_id); - char_word_map_[uni_char] = word_id_set; - } - } -} - -InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( - const Char16Set& term_chars) { - WordIDSet word_id_set; - for (Char16Set::const_iterator c_iter = term_chars.begin(); - c_iter != term_chars.end(); ++c_iter) { - CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); - if (char_iter == char_word_map_.end()) { - // A character was not found so there are no matching results: bail. - word_id_set.clear(); - break; - } - WordIDSet& char_word_id_set(char_iter->second); - // It is possible for there to no longer be any words associated with - // a particular character. Give up in that case. - if (char_word_id_set.empty()) { - word_id_set.clear(); - break; - } - - if (c_iter == term_chars.begin()) { - // First character results becomes base set of results. - word_id_set = char_word_id_set; - } else { - // Subsequent character results get intersected in. - WordIDSet new_word_id_set; - std::set_intersection(word_id_set.begin(), word_id_set.end(), - char_word_id_set.begin(), char_word_id_set.end(), - std::inserter(new_word_id_set, - new_word_id_set.begin())); - word_id_set.swap(new_word_id_set); - } - } - return word_id_set; -} - -// static -TermMatches InMemoryURLIndex::MatchTermInString(const string16& term, - const string16& string, - int term_num) { - const size_t kMaxCompareLength = 2048; - const string16& short_string = (string.length() > kMaxCompareLength) ? - string.substr(0, kMaxCompareLength) : string; - TermMatches matches; - for (size_t location = short_string.find(term); location != string16::npos; - location = short_string.find(term, location + 1)) { - matches.push_back(TermMatch(term_num, location, term.length())); - } - return matches; -} - -// static -TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) { - if (matches.empty()) - return matches; - TermMatches sorted_matches = matches; - std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); - TermMatches clean_matches; - TermMatch last_match = sorted_matches[0]; - clean_matches.push_back(last_match); - for (TermMatches::const_iterator iter = sorted_matches.begin() + 1; - iter != sorted_matches.end(); ++iter) { - if (iter->offset >= last_match.offset + last_match.length) { - last_match = *iter; - clean_matches.push_back(last_match); + private_data.char_word_map_[uni_char] = word_id_set; } } - return clean_matches; -} - -// static -std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches( - const TermMatches& matches) { - std::vector<size_t> offsets; - for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) - offsets.push_back(i->offset); - return offsets; -} - -// static -TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches( - const TermMatches& matches, - const std::vector<size_t>& offsets) { - DCHECK_EQ(matches.size(), offsets.size()); - TermMatches new_matches; - std::vector<size_t>::const_iterator offset_iter = offsets.begin(); - for (TermMatches::const_iterator term_iter = matches.begin(); - term_iter != matches.end(); ++term_iter, ++offset_iter) { - if (*offset_iter != string16::npos) { - TermMatch new_match(*term_iter); - new_match.offset = *offset_iter; - new_matches.push_back(new_match); - } - } - return new_matches; } // static +// TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( const URLRow& row, const String16Vector& terms) { @@ -786,8 +752,8 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( } // Sort matches by offset and eliminate any which overlap. - match.url_matches = SortAndDeoverlap(match.url_matches); - match.title_matches = SortAndDeoverlap(match.title_matches); + match.url_matches = SortAndDeoverlapMatches(match.url_matches); + match.title_matches = SortAndDeoverlapMatches(match.title_matches); // We should not (currently) inline autocomplete a result unless both of the // following are true: @@ -839,7 +805,6 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( for (int i = 0; i < kSignificantFactors; ++i) match.raw_score += factor[i]; match.raw_score /= kSignificantFactors; - return match; } @@ -901,19 +866,17 @@ InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( const InMemoryURLIndex& index, const String16Vector& lower_terms) : index_(index), - lower_terms_(lower_terms) { -} + lower_terms_(lower_terms) {} InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} -void InMemoryURLIndex::AddHistoryMatch::operator()( - const InMemoryURLIndex::HistoryID history_id) { +void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) { HistoryInfoMap::const_iterator hist_pos = - index_.history_info_map_.find(history_id); + index_.private_data_->history_info_map_.find(history_id); // Note that a history_id may be present in the word_id_history_map_ yet not // be found in the history_info_map_. This occurs when an item has been // deleted by the user or the item no longer qualifies as a quick result. - if (hist_pos != index_.history_info_map_.end()) { + if (hist_pos != index_.private_data_->history_info_map_.end()) { const URLRow& hist_item = hist_pos->second; ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); if (match.raw_score > 0) @@ -935,7 +898,9 @@ bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { DCHECK(cache); cache->set_timestamp(base::Time::Now().ToInternalValue()); - cache->set_history_item_count(history_item_count_); + // history_item_count_ is no longer used but rather than change the protobuf + // definition use a placeholder. This will go away with the switch to SQLite. + cache->set_history_item_count(0); SaveWordList(cache); SaveWordMap(cache); SaveCharWordMap(cache); @@ -946,20 +911,18 @@ void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { bool InMemoryURLIndex::RestorePrivateData( const InMemoryURLIndexCacheItem& cache) { last_saved_ = base::Time::FromInternalValue(cache.timestamp()); - history_item_count_ = cache.history_item_count(); - return (history_item_count_ == 0) || (RestoreWordList(cache) && - RestoreWordMap(cache) && RestoreCharWordMap(cache) && - RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); + return RestoreWordList(cache) && RestoreWordMap(cache) && + RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && + RestoreHistoryInfoMap(cache); } - void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { - if (word_list_.empty()) + if (private_data_->word_list_.empty()) return; WordListItem* list_item = cache->mutable_word_list(); - list_item->set_word_count(word_list_.size()); - for (String16Vector::const_iterator iter = word_list_.begin(); - iter != word_list_.end(); ++iter) + list_item->set_word_count(private_data_->word_list_.size()); + for (String16Vector::const_iterator iter = private_data_->word_list_.begin(); + iter != private_data_->word_list_.end(); ++iter) list_item->add_word(UTF16ToUTF8(*iter)); } @@ -974,17 +937,17 @@ bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { const RepeatedPtrField<std::string>& words(list_item.word()); for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); iter != words.end(); ++iter) - word_list_.push_back(UTF8ToUTF16(*iter)); + private_data_->word_list_.push_back(UTF8ToUTF16(*iter)); return true; } void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { - if (word_map_.empty()) + if (private_data_->word_map_.empty()) return; WordMapItem* map_item = cache->mutable_word_map(); - map_item->set_item_count(word_map_.size()); - for (WordMap::const_iterator iter = word_map_.begin(); - iter != word_map_.end(); ++iter) { + map_item->set_item_count(private_data_->word_map_.size()); + for (WordMap::const_iterator iter = private_data_->word_map_.begin(); + iter != private_data_->word_map_.end(); ++iter) { WordMapEntry* map_entry = map_item->add_word_map_entry(); map_entry->set_word(UTF16ToUTF8(iter->first)); map_entry->set_word_id(iter->second); @@ -1002,17 +965,18 @@ bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); iter != entries.end(); ++iter) - word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); + private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); return true; } void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { - if (char_word_map_.empty()) + if (private_data_->char_word_map_.empty()) return; CharWordMapItem* map_item = cache->mutable_char_word_map(); - map_item->set_item_count(char_word_map_.size()); - for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); - iter != char_word_map_.end(); ++iter) { + map_item->set_item_count(private_data_->char_word_map_.size()); + for (CharWordIDMap::const_iterator iter = + private_data_->char_word_map_.begin(); + iter != private_data_->char_word_map_.end(); ++iter) { CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); map_entry->set_char_16(iter->first); const WordIDSet& word_id_set(iter->second); @@ -1046,19 +1010,20 @@ bool InMemoryURLIndex::RestoreCharWordMap( for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); jiter != word_ids.end(); ++jiter) word_id_set.insert(*jiter); - char_word_map_[uni_char] = word_id_set; + private_data_->char_word_map_[uni_char] = word_id_set; } return true; } void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) const { - if (word_id_history_map_.empty()) + if (private_data_->word_id_history_map_.empty()) return; WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); - map_item->set_item_count(word_id_history_map_.size()); - for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); - iter != word_id_history_map_.end(); ++iter) { + map_item->set_item_count(private_data_->word_id_history_map_.size()); + for (WordIDHistoryMap::const_iterator iter = + private_data_->word_id_history_map_.begin(); + iter != private_data_->word_id_history_map_.end(); ++iter) { WordIDHistoryMapEntry* map_entry = map_item->add_word_id_history_map_entry(); map_entry->set_word_id(iter->first); @@ -1091,21 +1056,24 @@ bool InMemoryURLIndex::RestoreWordIDHistoryMap( HistoryIDSet history_id_set; const RepeatedField<int64>& history_ids(iter->history_id()); for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); - jiter != history_ids.end(); ++jiter) + jiter != history_ids.end(); ++jiter) { history_id_set.insert(*jiter); - word_id_history_map_[word_id] = history_id_set; + private_data_->AddToHistoryIDWordMap(*jiter, word_id); + } + private_data_->word_id_history_map_[word_id] = history_id_set; } return true; } void InMemoryURLIndex::SaveHistoryInfoMap( InMemoryURLIndexCacheItem* cache) const { - if (history_info_map_.empty()) + if (private_data_->history_info_map_.empty()) return; HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); - map_item->set_item_count(history_info_map_.size()); - for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); - iter != history_info_map_.end(); ++iter) { + map_item->set_item_count(private_data_->history_info_map_.size()); + for (HistoryInfoMap::const_iterator iter = + private_data_->history_info_map_.begin(); + iter != private_data_->history_info_map_.end(); ++iter) { HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); map_entry->set_history_id(iter->first); const URLRow& url_row(iter->second); @@ -1143,7 +1111,7 @@ bool InMemoryURLIndex::RestoreHistoryInfoMap( string16 title(UTF8ToUTF16(iter->title())); url_row.set_title(title); } - history_info_map_[history_id] = url_row; + private_data_->history_info_map_[history_id] = url_row; } return true; } diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h index be31abe..598c282 100644 --- a/chrome/browser/history/in_memory_url_index.h +++ b/chrome/browser/history/in_memory_url_index.h @@ -21,16 +21,15 @@ #include "chrome/browser/autocomplete/autocomplete_match.h" #include "chrome/browser/autocomplete/history_provider_util.h" #include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/history/in_memory_url_index_cache.pb.h" +#include "content/common/notification_observer.h" +#include "content/common/notification_registrar.h" #include "sql/connection.h" #include "testing/gtest/include/gtest/gtest_prod.h" class Profile; -namespace base { -class Time; -} - namespace in_memory_url_index { class InMemoryURLIndexCacheItem; } @@ -40,39 +39,9 @@ namespace history { namespace imui = in_memory_url_index; class URLDatabase; - -// Specifies where an omnibox term occurs within a string. Used for specifying -// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in -// scoring a result. -struct TermMatch { - TermMatch(int term_num, size_t offset, size_t length) - : term_num(term_num), - offset(offset), - length(length) {} - - int term_num; // The index of the term in the original search string. - size_t offset; // The starting offset of the substring match. - size_t length; // The length of the substring match. -}; -typedef std::vector<TermMatch> TermMatches; - -// Used for intermediate history result operations. -struct ScoredHistoryMatch : public HistoryMatch { - ScoredHistoryMatch(); // Required by STL. - explicit ScoredHistoryMatch(const URLRow& url_info); - ~ScoredHistoryMatch(); - - static bool MatchScoreGreater(const ScoredHistoryMatch& m1, - const ScoredHistoryMatch& m2); - - // An interim score taking into consideration location and completeness - // of the match. - int raw_score; - TermMatches url_matches; // Term matches within the URL. - TermMatches title_matches; // Term matches within the page title. - bool can_inline; // True if this is a candidate for in-line autocompletion. -}; -typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; +struct URLsDeletedDetails; +struct URLsModifiedDetails; +struct URLVisitedDetails; // The URL history source. // Holds portions of the URL database in memory in an indexed form. Used to @@ -93,16 +62,14 @@ typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; // will eliminate such words except in the case where a single character // is being searched on and which character occurs as the second char16 of a // multi-char16 instance. -class InMemoryURLIndex { +class InMemoryURLIndex : public NotificationObserver { public: // |history_dir| is a path to the directory containing the history database // within the profile wherein the cache and transaction journals will be // stored. - explicit InMemoryURLIndex(const FilePath& history_dir); - ~InMemoryURLIndex(); - - // Convenience types - typedef std::vector<string16> String16Vector; + explicit InMemoryURLIndex(Profile* profile, + const FilePath& history_dir); + virtual ~InMemoryURLIndex(); // Opens and indexes the URL history database. // |languages| gives a list of language encodings with which the history @@ -143,36 +110,17 @@ class InMemoryURLIndex { // Updates or adds an history item to the index if it meets the minimum // 'quick' criteria. - void UpdateURL(URLID row_id, const URLRow& row); + void UpdateURL(const URLRow& row); // Deletes indexing data for an history item. The item may not have actually // been indexed (which is the case if it did not previously meet minimum // 'quick' criteria). - void DeleteURL(URLID row_id); + void DeleteURL(const URLRow& row); - // Breaks the |uni_string| string down into individual words and return - // a vector with the individual words in their original order. If - // |break_on_space| is false then the resulting list will contain only words - // containing alpha-numeric characters. If |break_on_space| is true then the - // resulting list will contain strings broken at whitespace. - // - // Example: - // Given: |uni_string|: "http://www.google.com/ harry the rabbit." - // With |break_on_space| false the returned list will contain: - // "http", "www", "google", "com", "harry", "the", "rabbit" - // With |break_on_space| true the returned list will contain: - // "http://", "www.google.com/", "harry", "the", "rabbit." - static String16Vector WordVectorFromString16(const string16& uni_string, - bool break_on_space); - - // Extract and return the offsets from |matches|. - static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); - - // Replace the offsets in |matches| with those given in |offsets|, deleting - // any which are npos, and return the updated list of matches. - static TermMatches ReplaceOffsetsInTermMatches( - const TermMatches& matches, - const std::vector<size_t>& offsets); + // Notification callback. + virtual void Observe(int type, + const NotificationSource& source, + const NotificationDetails& details); private: friend class AddHistoryMatch; @@ -194,29 +142,6 @@ class InMemoryURLIndex { // Creating one of me without a history path is not allowed (tests excepted). InMemoryURLIndex(); - // Convenience types. - typedef std::set<string16> String16Set; - typedef std::set<char16> Char16Set; - typedef std::vector<char16> Char16Vector; - - // An index into list of all of the words we have indexed. - typedef int WordID; - - // A map allowing a WordID to be determined given a word. - typedef std::map<string16, WordID> WordMap; - - // A map from character to word_ids. - typedef std::set<WordID> WordIDSet; // An index into the WordList. - typedef std::map<char16, WordIDSet> CharWordIDMap; - - // A map from word_id to history item. - // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit. - // Consider using a smaller type. - typedef URLID HistoryID; - typedef std::set<HistoryID> HistoryIDSet; - typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; - - // Support caching of term results so that we can optimize searches which // build upon a previous search. Each entry in this map represents one // search term from the most recent search. For example, if the user had @@ -248,12 +173,6 @@ class InMemoryURLIndex { }; typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap; - // TODO(rohitrao): Probably replace this with QueryResults. - typedef std::vector<URLRow> URLRowVector; - - // A map from history_id to the history's URL and title. - typedef std::map<HistoryID, URLRow> HistoryInfoMap; - // A helper class which performs the final filter on each candidate // history URL match, inserting accepted matches into |scored_matches_| // and trimming the maximum number of matches to 10. @@ -280,34 +199,19 @@ class InMemoryURLIndex { // Initializes the whitelist of URL schemes. static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); - // Breaks a string down into individual words. - static String16Set WordSetFromString16(const string16& uni_string); - - // Given a set of Char16s, finds words containing those characters. - WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); - - // Creates a TermMatches which has an entry for each occurrence of the string - // |term| found in the string |string|. Mark each match with |term_num| so - // that the resulting TermMatches can be merged with other TermMatches for - // other terms. - static TermMatches MatchTermInString(const string16& term, - const string16& string, - int term_num); - // URL History indexing support functions. // Indexes one URL history item. bool IndexRow(const URLRow& row); - // Breaks the |uni_word| string down into its individual characters. - // Note that this is temporarily intended to work on a single word, but - // _will_ work on a string of words, perhaps with unexpected results. - // TODO(mrossetti): Lots of optimizations possible here for not restarting - // a search if the user is just typing along. Also, change this to uniString - // and properly handle substring matches, scoring and sorting the results - // by score. Also, provide the metrics for where the matches occur so that - // the UI can highlight the matched sections. - static Char16Set Char16SetFromString16(const string16& uni_word); + // Parses and indexes the words in the URL and page title of |row|. + void AddRowWordsToIndex(const URLRow& row); + + // Removes |row| and all associated words and characters from the index. + void RemoveRowFromIndex(const URLRow& row); + + // Removes all words and characters associated with |row| from the index. + void RemoveRowWordsFromIndex(const URLRow& row); // Given a single word in |uni_word|, adds a reference for the containing // history item identified by |history_id| to the index. @@ -352,13 +256,14 @@ class InMemoryURLIndex { static int ScoreComponentForMatches(const TermMatches& matches, size_t max_length); - // Sorts and removes overlapping substring matches from |matches| and - // returns the cleaned up matches. - static TermMatches SortAndDeoverlap(const TermMatches& matches); - // Determines if |gurl| has a whitelisted scheme and returns true if so. bool URLSchemeIsWhitelisted(const GURL& gurl) const; + // Notification handlers. + void OnURLVisited(const URLVisitedDetails* details); + void OnURLsModified(const URLsModifiedDetails* details); + void OnURLsDeleted(const URLsDeletedDetails* details); + // Utility functions supporting RestoreFromCache and SaveToCache. // Construct a file path for the cache file within the same directory where @@ -384,6 +289,8 @@ class InMemoryURLIndex { bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); + NotificationRegistrar registrar_; + // Directory where cache file resides. This is, except when unit testing, // the same directory in which the profile's history database is found. It // should never be empty. @@ -395,21 +302,8 @@ class InMemoryURLIndex { // the InMemoryURLIndex was last populated. base::Time last_saved_; - // A list of all of indexed words. The index of a word in this list is the - // ID of the word in the word_map_. It reduces the memory overhead by - // replacing a potentially long and repeated string with a simple index. - // NOTE: A word will _never_ be removed from this vector thus insuring - // the immutability of the word_id throughout the session, reducing - // maintenance complexity. - // TODO(mrossetti): Profile the vector allocation and determine if judicious - // 'reserve' calls are called for. - String16Vector word_list_; - - int history_item_count_; - WordMap word_map_; - CharWordIDMap char_word_map_; - WordIDHistoryMap word_id_history_map_; - HistoryInfoMap history_info_map_; + // The index's durable private data. + scoped_ptr<URLIndexPrivateData> private_data_; // Cache of search terms. SearchTermCacheMap search_term_cache_; diff --git a/chrome/browser/history/in_memory_url_index_types.cc b/chrome/browser/history/in_memory_url_index_types.cc new file mode 100644 index 0000000..4447a0e --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_types.cc @@ -0,0 +1,197 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/in_memory_url_index_types.h" + +#include <algorithm> + +#include "base/i18n/break_iterator.h" +#include "base/i18n/case_conversion.h" +#include "base/string_util.h" + +namespace history { + +// Matches within URL and Title Strings ---------------------------------------- + +TermMatches MatchTermInString(const string16& term, + const string16& string, + int term_num) { + const size_t kMaxCompareLength = 2048; + const string16& short_string = (string.length() > kMaxCompareLength) ? + string.substr(0, kMaxCompareLength) : string; + TermMatches matches; + for (size_t location = short_string.find(term); location != string16::npos; + location = short_string.find(term, location + 1)) + matches.push_back(TermMatch(term_num, location, term.length())); + return matches; +} + +// Comparison function for sorting TermMatches by their offsets. +bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { + return m1.offset < m2.offset; +} + +TermMatches SortAndDeoverlapMatches(const TermMatches& matches) { + if (matches.empty()) + return matches; + TermMatches sorted_matches = matches; + std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); + TermMatches clean_matches; + TermMatch last_match; + for (TermMatches::const_iterator iter = sorted_matches.begin(); + iter != sorted_matches.end(); ++iter) { + if (iter->offset >= last_match.offset + last_match.length) { + last_match = *iter; + clean_matches.push_back(last_match); + } + } + return clean_matches; +} + +std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches) { + std::vector<size_t> offsets; + for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) + offsets.push_back(i->offset); + return offsets; +} + +TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, + const std::vector<size_t>& offsets) { + DCHECK_EQ(matches.size(), offsets.size()); + TermMatches new_matches; + std::vector<size_t>::const_iterator offset_iter = offsets.begin(); + for (TermMatches::const_iterator term_iter = matches.begin(); + term_iter != matches.end(); ++term_iter, ++offset_iter) { + if (*offset_iter != string16::npos) { + TermMatch new_match(*term_iter); + new_match.offset = *offset_iter; + new_matches.push_back(new_match); + } + } + return new_matches; +} + +// ScoredHistoryMatch ---------------------------------------------------------- + +ScoredHistoryMatch::ScoredHistoryMatch() + : raw_score(0), + can_inline(false) {} + +ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) + : HistoryMatch(url_info, 0, false, false), + raw_score(0), + can_inline(false) {} + +ScoredHistoryMatch::~ScoredHistoryMatch() {} + +// Comparison function for sorting ScoredMatches by their scores. +bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, + const ScoredHistoryMatch& m2) { + return m1.raw_score >= m2.raw_score; +} + +// InMemoryURLIndex's Private Data --------------------------------------------- + +URLIndexPrivateData::URLIndexPrivateData() {} + +URLIndexPrivateData::~URLIndexPrivateData() {} + +void URLIndexPrivateData::Clear() { + word_list_.clear(); + available_words_.clear(); + word_map_.clear(); + char_word_map_.clear(); + word_id_history_map_.clear(); + history_id_word_map_.clear(); + history_info_map_.clear(); +} + +void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id, + WordID word_id) { + HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id); + if (iter != history_id_word_map_.end()) { + WordIDSet& word_id_set(iter->second); + word_id_set.insert(word_id); + } else { + WordIDSet word_id_set; + word_id_set.insert(word_id); + history_id_word_map_[history_id] = word_id_set; + } +} + +WordIDSet URLIndexPrivateData::WordIDSetForTermChars( + const Char16Set& term_chars) { + WordIDSet word_id_set; + for (Char16Set::const_iterator c_iter = term_chars.begin(); + c_iter != term_chars.end(); ++c_iter) { + CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); + if (char_iter == char_word_map_.end()) { + // A character was not found so there are no matching results: bail. + word_id_set.clear(); + break; + } + WordIDSet& char_word_id_set(char_iter->second); + // It is possible for there to no longer be any words associated with + // a particular character. Give up in that case. + if (char_word_id_set.empty()) { + word_id_set.clear(); + break; + } + + if (c_iter == term_chars.begin()) { + // First character results becomes base set of results. + word_id_set = char_word_id_set; + } else { + // Subsequent character results get intersected in. + WordIDSet new_word_id_set; + std::set_intersection(word_id_set.begin(), word_id_set.end(), + char_word_id_set.begin(), char_word_id_set.end(), + std::inserter(new_word_id_set, + new_word_id_set.begin())); + word_id_set.swap(new_word_id_set); + } + } + return word_id_set; +} + +// Utility Functions ----------------------------------------------------------- + +String16Set String16SetFromString16(const string16& uni_string) { + const size_t kMaxWordLength = 64; + String16Vector words = String16VectorFromString16(uni_string, false); + String16Set word_set; + for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); + ++iter) + word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); + return word_set; +} + +String16Vector String16VectorFromString16(const string16& uni_string, + bool break_on_space) { + base::i18n::BreakIterator iter(uni_string, + break_on_space ? base::i18n::BreakIterator::BREAK_SPACE : + base::i18n::BreakIterator::BREAK_WORD); + String16Vector words; + if (!iter.Init()) + return words; + while (iter.Advance()) { + if (break_on_space || iter.IsWord()) { + string16 word = iter.GetString(); + if (break_on_space) + TrimWhitespace(word, TRIM_ALL, &word); + if (!word.empty()) + words.push_back(word); + } + } + return words; +} + +Char16Set Char16SetFromString16(const string16& term) { + Char16Set characters; + for (string16::const_iterator iter = term.begin(); iter != term.end(); ++iter) + characters.insert(*iter); + return characters; +} + +} // namespace history diff --git a/chrome/browser/history/in_memory_url_index_types.h b/chrome/browser/history/in_memory_url_index_types.h new file mode 100644 index 0000000..d6ea8a2 --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_types.h @@ -0,0 +1,206 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ +#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ +#pragma once + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "base/gtest_prod_util.h" +#include "base/string16.h" +#include "chrome/browser/autocomplete/history_provider_util.h" +#include "chrome/browser/history/history_types.h" + +namespace history { + +// Matches within URL and Title Strings ---------------------------------------- + +// Specifies where an omnibox term occurs within a string. Used for specifying +// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in +// scoring a result. +struct TermMatch { + TermMatch() : term_num(0), offset(0), length(0) {} + TermMatch(int term_num, size_t offset, size_t length) + : term_num(term_num), offset(offset), length(length) {} + + int term_num; // The index of the term in the original search string. + size_t offset; // The starting offset of the substring match. + size_t length; // The length of the substring match. +}; +typedef std::vector<TermMatch> TermMatches; + +// Returns a TermMatches which has an entry for each occurrence of the string +// |term| found in the string |string|. Mark each match with |term_num| so +// that the resulting TermMatches can be merged with other TermMatches for +// other terms. Note that only the first 2,048 characters of |string| are +// considered during the match operation. +TermMatches MatchTermInString(const string16& term, + const string16& string, + int term_num); + +// Sorts and removes overlapping substring matches from |matches| and +// returns the cleaned up matches. +TermMatches SortAndDeoverlapMatches(const TermMatches& matches); + +// Extracts and returns the offsets from |matches|. +std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); + +// Replaces the offsets in |matches| with those given in |offsets|, deleting +// any which are npos, and returns the updated list of matches. +TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, + const std::vector<size_t>& offsets); + +// Used for intermediate history result operations ----------------------------- + +struct ScoredHistoryMatch : public history::HistoryMatch { + ScoredHistoryMatch(); // Required by STL. + explicit ScoredHistoryMatch(const history::URLRow& url_info); + ~ScoredHistoryMatch(); + + static bool MatchScoreGreater(const ScoredHistoryMatch& m1, + const ScoredHistoryMatch& m2); + + // An interim score taking into consideration location and completeness + // of the match. + int raw_score; + TermMatches url_matches; // Term matches within the URL. + TermMatches title_matches; // Term matches within the page title. + bool can_inline; // True if this is a candidate for in-line autocompletion. +}; +typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; + +// Convenience Types ----------------------------------------------------------- + +typedef std::vector<string16> String16Vector; +typedef std::set<string16> String16Set; +typedef std::set<char16> Char16Set; +typedef std::vector<char16> Char16Vector; + +// Utility Functions Relates to Convenience Types ------------------------------ + +// Breaks a string down into individual words. +String16Set String16SetFromString16(const string16& uni_string); + +// Breaks the |uni_string| string down into individual words and return +// a vector with the individual words in their original order. If +// |break_on_space| is false then the resulting list will contain only words +// containing alpha-numeric characters. If |break_on_space| is true then the +// resulting list will contain strings broken at whitespace. (|break_on_space| +// indicates that the BreakIterator::BREAK_SPACE (equivalent to BREAK_LINE) +// approach is to be used. For a complete description of this algorithm +// refer to the comments in base/i18n/break_iterator.h.) +// +// Example: +// Given: |uni_string|: "http://www.google.com/ harry the rabbit." +// With |break_on_space| false the returned list will contain: +// "http", "www", "google", "com", "harry", "the", "rabbit" +// With |break_on_space| true the returned list will contain: +// "http://", "www.google.com/", "harry", "the", "rabbit." +String16Vector String16VectorFromString16(const string16& uni_string, + bool break_on_space); + +// Breaks the |uni_word| string down into its individual characters. +// Note that this is temporarily intended to work on a single word, but +// _will_ work on a string of words, perhaps with unexpected results. +// TODO(mrossetti): Lots of optimizations possible here for not restarting +// a search if the user is just typing along. Also, change this to uniString +// and properly handle substring matches, scoring and sorting the results +// by score. Also, provide the metrics for where the matches occur so that +// the UI can highlight the matched sections. +Char16Set Char16SetFromString16(const string16& uni_word); + +// Support for InMemoryURLIndex Private Data ----------------------------------- + +// An index into a list of all of the words we have indexed. +typedef size_t WordID; + +// A map allowing a WordID to be determined given a word. +typedef std::map<string16, WordID> WordMap; + +// A map from character to the word_ids of words containing that character. +typedef std::set<WordID> WordIDSet; // An index into the WordList. +typedef std::map<char16, WordIDSet> CharWordIDMap; + +// A map from word (by word_id) to history items containing that word. +typedef history::URLID HistoryID; +typedef std::set<HistoryID> HistoryIDSet; +typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; +typedef std::map<HistoryID, WordIDSet> HistoryIDWordMap; + +// A map from history_id to the history's URL and title. +typedef std::map<HistoryID, URLRow> HistoryInfoMap; + +// TODO(rohitrao): Probably replace this with QueryResults. +typedef std::vector<history::URLRow> URLRowVector; + +// A structure describing the index's internal data. This structure is used +// by the InMemoryURLIndex, InMemoryURLIndexBackend, and +// InMemoryURLCacheDatabase classes. +class URLIndexPrivateData { + public: + URLIndexPrivateData(); + ~URLIndexPrivateData(); + + // Clear all of our data. + void Clear(); + + // Adds |word_id| to |history_id|'s entry in the history/word map, + // creating a new entry if one does not already exist. + void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); + + // Given a set of Char16s, finds words containing those characters. + WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); + + private: + friend class InMemoryURLIndex; + friend class InMemoryURLIndexTest; + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); + FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); + + // A list of all of indexed words. The index of a word in this list is the + // ID of the word in the word_map_. It reduces the memory overhead by + // replacing a potentially long and repeated string with a simple index. + String16Vector word_list_; + + // A list of available words slots in |word_list_|. An available word slot + // is the index of a unused word in word_list_ vector, also referred to as + // a WordID. As URL visits are added or modified new words may be added to + // the index, in which case any available words are used, if any, and then + // words are added to the end of the word_list_. When URL visits are + // modified or deleted old words may be removed from the index, in which + // case the slots for those words are added to available_words_ for resuse + // by future URL updates. + WordIDSet available_words_; + + // A one-to-one mapping from the a word string to its slot number (i.e. + // WordID) in the |word_list_|. + WordMap word_map_; + + // A one-to-many mapping from a single character to all WordIDs of words + // containing that character. + CharWordIDMap char_word_map_; + + // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as + // used in the history database) of history items in which the word occurs. + WordIDHistoryMap word_id_history_map_; + + // A one-to-many mapping from a HistoryID to all WordIDs of words that occur + // in the URL and/or page title of the history item referenced by that + // HistoryID. + HistoryIDWordMap history_id_word_map_; + + // A one-to-one mapping from HistoryID to the history item data governing + // index inclusion and relevance scoring. + HistoryInfoMap history_info_map_; +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ diff --git a/chrome/browser/history/in_memory_url_index_types_unittest.cc b/chrome/browser/history/in_memory_url_index_types_unittest.cc new file mode 100644 index 0000000..6a6bc7c --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_types_unittest.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/string16.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/in_memory_url_index_types.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace history { + +class InMemoryURLIndexTypesTest : public testing::Test { +}; + +TEST_F(InMemoryURLIndexTypesTest, StaticFunctions) { + // Test WordVectorFromString16 + string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy")); + String16Vector string_vec = String16VectorFromString16(string_a, false); + ASSERT_EQ(7U, string_vec.size()); + // See if we got the words we expected. + EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]); + EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]); + EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]); + EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]); + EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]); + EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]); + EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]); + + string_vec = String16VectorFromString16(string_a, true); + ASSERT_EQ(5U, string_vec.size()); + EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]); + EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]); + EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]); + EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]); + EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]); + + // Test WordSetFromString16 + string16 string_b(ASCIIToUTF16( + "http://web.google.com/search Google Web Search")); + String16Set string_set = String16SetFromString16(string_b); + EXPECT_EQ(5U, string_set.size()); + // See if we got the words we expected. + EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end()); + + // Test SortAndDeoverlap + TermMatches matches_a; + matches_a.push_back(TermMatch(1, 13, 10)); + matches_a.push_back(TermMatch(2, 23, 10)); + matches_a.push_back(TermMatch(3, 3, 10)); + matches_a.push_back(TermMatch(4, 40, 5)); + TermMatches matches_b = SortAndDeoverlapMatches(matches_a); + // Nothing should have been eliminated. + EXPECT_EQ(matches_a.size(), matches_b.size()); + // The order should now be 3, 1, 2, 4. + EXPECT_EQ(3, matches_b[0].term_num); + EXPECT_EQ(1, matches_b[1].term_num); + EXPECT_EQ(2, matches_b[2].term_num); + EXPECT_EQ(4, matches_b[3].term_num); + matches_a.push_back(TermMatch(5, 18, 10)); + matches_a.push_back(TermMatch(6, 38, 5)); + matches_b = SortAndDeoverlapMatches(matches_a); + // Two matches should have been eliminated. + EXPECT_EQ(matches_a.size() - 2, matches_b.size()); + // The order should now be 3, 1, 2, 6. + EXPECT_EQ(3, matches_b[0].term_num); + EXPECT_EQ(1, matches_b[1].term_num); + EXPECT_EQ(2, matches_b[2].term_num); + EXPECT_EQ(6, matches_b[3].term_num); + + // Test MatchTermInString + TermMatches matches_c = MatchTermInString( + UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123); + const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 }; + ASSERT_EQ(arraysize(expected_offsets), matches_c.size()); + for (size_t i = 0; i < arraysize(expected_offsets); ++i) + EXPECT_EQ(expected_offsets[i], matches_c[i].offset); +} + +TEST_F(InMemoryURLIndexTypesTest, OffsetsAndTermMatches) { + // Test OffsetsFromTermMatches + history::TermMatches matches_a; + matches_a.push_back(history::TermMatch(1, 1, 2)); + matches_a.push_back(history::TermMatch(2, 4, 3)); + matches_a.push_back(history::TermMatch(3, 9, 1)); + matches_a.push_back(history::TermMatch(3, 10, 1)); + matches_a.push_back(history::TermMatch(4, 14, 5)); + std::vector<size_t> offsets = OffsetsFromTermMatches(matches_a); + const size_t expected_offsets_a[] = {1, 4, 9, 10, 14}; + ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a)); + for (size_t i = 0; i < offsets.size(); ++i) + EXPECT_EQ(expected_offsets_a[i], offsets[i]); + + // Test ReplaceOffsetsInTermMatches + offsets[2] = string16::npos; + history::TermMatches matches_b = + ReplaceOffsetsInTermMatches(matches_a, offsets); + const size_t expected_offsets_b[] = {1, 4, 10, 14}; + ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size()); + for (size_t i = 0; i < matches_b.size(); ++i) + EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset); +} + +} // namespace history diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc index de8e4e0c9..acdc34b 100644 --- a/chrome/browser/history/in_memory_url_index_unittest.cc +++ b/chrome/browser/history/in_memory_url_index_unittest.cc @@ -2,24 +2,18 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include <stdio.h> - #include <fstream> -#include <string> -#include <vector> #include "base/file_path.h" #include "base/file_util.h" -#include "base/memory/scoped_ptr.h" #include "base/path_service.h" +#include "base/string16.h" #include "base/string_util.h" -#include "base/time.h" #include "base/utf_string_conversions.h" #include "chrome/browser/history/in_memory_database.h" #include "chrome/browser/history/in_memory_url_index.h" +#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/common/chrome_paths.h" -#include "sql/connection.h" -#include "sql/statement.h" #include "sql/transaction.h" #include "testing/gtest/include/gtest/gtest.h" @@ -59,9 +53,8 @@ class InMemoryURLIndexTest : public testing::Test, int typed_count); // Convenience functions for easily creating vectors of search terms. - InMemoryURLIndex::String16Vector Make1Term(const char* term) const; - InMemoryURLIndex::String16Vector Make2Terms(const char* term_1, - const char* term_2) const; + String16Vector Make1Term(const char* term) const; + String16Vector Make2Terms(const char* term_1, const char* term_2) const; // Validates that the given |term| is contained in |cache| and that it is // marked as in-use. @@ -145,17 +138,15 @@ URLRow InMemoryURLIndexTest::MakeURLRow(const char* url, return row; } -InMemoryURLIndex::String16Vector InMemoryURLIndexTest::Make1Term( - const char* term) const { - InMemoryURLIndex::String16Vector terms; +String16Vector InMemoryURLIndexTest::Make1Term(const char* term) const { + String16Vector terms; terms.push_back(UTF8ToUTF16(term)); return terms; } -InMemoryURLIndex::String16Vector InMemoryURLIndexTest::Make2Terms( - const char* term_1, - const char* term_2) const { - InMemoryURLIndex::String16Vector terms; +String16Vector InMemoryURLIndexTest::Make2Terms(const char* term_1, + const char* term_2) const { + String16Vector terms; terms.push_back(UTF8ToUTF16(term_1)); terms.push_back(UTF8ToUTF16(term_2)); return terms; @@ -173,6 +164,27 @@ void InMemoryURLIndexTest::CheckTerm( << "Cache item '" << term << "' should be marked as being in use."; } +// Helper function which compares two maps for equivalence. The maps' values +// are associative containers and their contents are compared as well. +template<typename T> +void ExpectMapOfContainersIdentical(const T& expected, const T& actual) { + ASSERT_EQ(expected.size(), actual.size()); + for (typename T::const_iterator expected_iter = expected.begin(); + expected_iter != expected.end(); ++expected_iter) { + typename T::const_iterator actual_iter = actual.find(expected_iter->first); + ASSERT_NE(actual.end(), actual_iter); + typename T::mapped_type const& expected_values(expected_iter->second); + typename T::mapped_type const& actual_values(actual_iter->second); + ASSERT_EQ(expected_values.size(), actual_values.size()); + for (typename T::mapped_type::const_iterator set_iter = + expected_values.begin(); set_iter != expected_values.end(); ++set_iter) + EXPECT_EQ(actual_values.count(*set_iter), + expected_values.count(*set_iter)); + } +} + +//------------------------------------------------------------------------------ + class LimitedInMemoryURLIndexTest : public InMemoryURLIndexTest { protected: FilePath::StringType TestDBName() const; @@ -203,7 +215,8 @@ void ExpandedInMemoryURLIndexTest::SetUp() { } TEST_F(InMemoryURLIndexTest, Construction) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); EXPECT_TRUE(url_index_.get()); } @@ -217,16 +230,17 @@ TEST_F(LimitedInMemoryURLIndexTest, Initialization) { EXPECT_EQ(1U, row_count); url_index_.reset(new InMemoryURLIndex); url_index_->Init(this, "en,ja,hi,zh"); - EXPECT_EQ(1, url_index_->history_item_count_); + URLIndexPrivateData& private_data(*(url_index_->private_data_)); // history_info_map_ should have the same number of items as were filtered. - EXPECT_EQ(1U, url_index_->history_info_map_.size()); - EXPECT_EQ(35U, url_index_->char_word_map_.size()); - EXPECT_EQ(17U, url_index_->word_map_.size()); + EXPECT_EQ(1U, private_data.history_info_map_.size()); + EXPECT_EQ(35U, private_data.char_word_map_.size()); + EXPECT_EQ(17U, private_data.word_map_.size()); } TEST_F(InMemoryURLIndexTest, Retrieval) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); url_index_->Init(this, "en,ja,hi,zh"); // The term will be lowercased by the search. @@ -259,7 +273,7 @@ TEST_F(InMemoryURLIndexTest, Retrieval) { matches[0].url_info.title()); // Search which should result in very poor result. - InMemoryURLIndex::String16Vector terms; + String16Vector terms; terms.push_back(ASCIIToUTF16("z")); terms.push_back(ASCIIToUTF16("y")); terms.push_back(ASCIIToUTF16("x")); @@ -279,7 +293,8 @@ TEST_F(InMemoryURLIndexTest, Retrieval) { } TEST_F(ExpandedInMemoryURLIndexTest, ShortCircuit) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); url_index_->Init(this, "en,ja,hi,zh"); // A search for 'w' should short-circuit and not return any matches. @@ -296,8 +311,8 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) { url_index_.reset(new InMemoryURLIndex()); url_index_->Init(this, "en,ja,hi,zh"); // Signal if someone has changed the test DB. - EXPECT_EQ(27U, url_index_->history_info_map_.size()); - InMemoryURLIndex::String16Vector terms; + EXPECT_EQ(27U, url_index_->private_data_->history_info_map_.size()); + String16Vector terms; // Ensure title is being searched. terms.push_back(ASCIIToUTF16("MORTGAGE")); @@ -345,101 +360,6 @@ TEST_F(InMemoryURLIndexTest, NonUniqueTermCharacterSets) { EXPECT_EQ(28, matches[0].url_info.id()); } -TEST_F(InMemoryURLIndexTest, StaticFunctions) { - // Test WordVectorFromString16 - string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy")); - InMemoryURLIndex::String16Vector string_vec = - InMemoryURLIndex::WordVectorFromString16(string_a, false); - ASSERT_EQ(7U, string_vec.size()); - // See if we got the words we expected. - EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]); - EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]); - EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]); - EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]); - EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]); - EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]); - EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]); - - string_vec = InMemoryURLIndex::WordVectorFromString16(string_a, true); - ASSERT_EQ(5U, string_vec.size()); - EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]); - EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]); - EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]); - EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]); - EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]); - - // Test WordSetFromString16 - string16 string_b(ASCIIToUTF16( - "http://web.google.com/search Google Web Search")); - InMemoryURLIndex::String16Set string_set = - InMemoryURLIndex::WordSetFromString16(string_b); - EXPECT_EQ(5U, string_set.size()); - // See if we got the words we expected. - EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end()); - - // Test SortAndDeoverlap - TermMatches matches_a; - matches_a.push_back(TermMatch(1, 13, 10)); - matches_a.push_back(TermMatch(2, 23, 10)); - matches_a.push_back(TermMatch(3, 3, 10)); - matches_a.push_back(TermMatch(4, 40, 5)); - TermMatches matches_b = InMemoryURLIndex::SortAndDeoverlap(matches_a); - // Nothing should have been eliminated. - EXPECT_EQ(matches_a.size(), matches_b.size()); - // The order should now be 3, 1, 2, 4. - EXPECT_EQ(3, matches_b[0].term_num); - EXPECT_EQ(1, matches_b[1].term_num); - EXPECT_EQ(2, matches_b[2].term_num); - EXPECT_EQ(4, matches_b[3].term_num); - matches_a.push_back(TermMatch(5, 18, 10)); - matches_a.push_back(TermMatch(6, 38, 5)); - matches_b = InMemoryURLIndex::SortAndDeoverlap(matches_a); - // Two matches should have been eliminated. - EXPECT_EQ(matches_a.size() - 2, matches_b.size()); - // The order should now be 3, 1, 2, 6. - EXPECT_EQ(3, matches_b[0].term_num); - EXPECT_EQ(1, matches_b[1].term_num); - EXPECT_EQ(2, matches_b[2].term_num); - EXPECT_EQ(6, matches_b[3].term_num); - - // Test MatchTermInString - TermMatches matches_c = InMemoryURLIndex::MatchTermInString( - UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123); - ASSERT_EQ(11U, matches_c.size()); - const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 }; - for (int i = 0; i < 11; ++i) - EXPECT_EQ(expected_offsets[i], matches_c[i].offset); -} - -TEST_F(InMemoryURLIndexTest, OffsetsAndTermMatches) { - // Test OffsetsFromTermMatches - history::TermMatches matches_a; - matches_a.push_back(history::TermMatch(1, 1, 2)); - matches_a.push_back(history::TermMatch(2, 4, 3)); - matches_a.push_back(history::TermMatch(3, 9, 1)); - matches_a.push_back(history::TermMatch(3, 10, 1)); - matches_a.push_back(history::TermMatch(4, 14, 5)); - std::vector<size_t> offsets = - InMemoryURLIndex::OffsetsFromTermMatches(matches_a); - const size_t expected_offsets_a[] = {1, 4, 9, 10, 14}; - ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a)); - for (size_t i = 0; i < offsets.size(); ++i) - EXPECT_EQ(expected_offsets_a[i], offsets[i]); - - // Test ReplaceOffsetsInTermMatches - offsets[2] = string16::npos; - history::TermMatches matches_b = - InMemoryURLIndex::ReplaceOffsetsInTermMatches(matches_a, offsets); - const size_t expected_offsets_b[] = {1, 4, 10, 14}; - ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size()); - for (size_t i = 0; i < matches_b.size(); ++i) - EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset); -} - TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { // Verify that match results for previously typed characters are retained // (in the term_char_word_set_cache_) and reused, if possible, in future @@ -447,7 +367,8 @@ TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { typedef InMemoryURLIndex::SearchTermCacheMap::iterator CacheIter; typedef InMemoryURLIndex::SearchTermCacheItem CacheItem; - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); url_index_->Init(this, "en,ja,hi,zh"); InMemoryURLIndex::SearchTermCacheMap& cache(url_index_->search_term_cache_); @@ -460,7 +381,7 @@ TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { // Simulate typing "r" giving "r" in the simulated omnibox. The results for // 'r' will be not cached because it is only 1 character long. - InMemoryURLIndex::String16Vector terms; + String16Vector terms; string16 term_r = ASCIIToUTF16("r"); terms.push_back(term_r); url_index_->HistoryItemsForTerms(terms); @@ -550,9 +471,10 @@ TEST_F(InMemoryURLIndexTest, Scoring) { } TEST_F(InMemoryURLIndexTest, AddNewRows) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); url_index_->Init(this, "en,ja,hi,zh"); - InMemoryURLIndex::String16Vector terms; + String16Vector terms; // Verify that the row we're going to add does not already exist. URLID new_row_id = 87654321; @@ -564,20 +486,21 @@ TEST_F(InMemoryURLIndexTest, AddNewRows) { // Add a new row. URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id); new_row.set_last_visit(base::Time::Now()); - url_index_->UpdateURL(new_row_id, new_row); + url_index_->UpdateURL(new_row); // Verify that we can retrieve it. EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size()); // Add it again just to be sure that is harmless. - url_index_->UpdateURL(new_row_id, new_row); + url_index_->UpdateURL(new_row); EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size()); } TEST_F(InMemoryURLIndexTest, DeleteRows) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); url_index_->Init(this, "en,ja,hi,zh"); - InMemoryURLIndex::String16Vector terms; + String16Vector terms; // Make sure we actually get an existing result. terms.push_back(ASCIIToUTF16("DrudgeReport")); @@ -585,7 +508,7 @@ TEST_F(InMemoryURLIndexTest, DeleteRows) { ASSERT_EQ(1U, matches.size()); // Determine the row id for that result, delete that id, then search again. - url_index_->DeleteURL(matches[0].url_info.id()); + url_index_->DeleteURL(matches[0].url_info); EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty()); } @@ -662,7 +585,8 @@ TEST_F(InMemoryURLIndexTest, WhitelistedURLs) { { "xmpp:node@example.com", false }, { "xmpp://guest@example.com", false }, }; - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL( + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL( "/flammmy/frammy/")))); for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { GURL url(data[i].url_spec); @@ -672,8 +596,8 @@ TEST_F(InMemoryURLIndexTest, WhitelistedURLs) { } TEST_F(InMemoryURLIndexTest, CacheFilePath) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL( - "/flammmy/frammy/")))); + url_index_.reset(new InMemoryURLIndex( + NULL, FilePath(FILE_PATH_LITERAL("/flammmy/frammy/")))); FilePath full_file_path; url_index_->GetCacheFilePath(&full_file_path); std::vector<FilePath::StringType> expected_parts; @@ -689,84 +613,72 @@ TEST_F(InMemoryURLIndexTest, CacheFilePath) { TEST_F(InMemoryURLIndexTest, CacheSaveRestore) { // Save the cache to a protobuf, restore it, and compare the results. - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex(NULL, + FilePath(FILE_PATH_LITERAL("/dummy")))); InMemoryURLIndex& url_index(*(url_index_.get())); url_index.Init(this, "en,ja,hi,zh"); in_memory_url_index::InMemoryURLIndexCacheItem index_cache; url_index.SavePrivateData(&index_cache); // Capture our private data so we can later compare for equality. - int history_item_count(url_index.history_item_count_); - InMemoryURLIndex::String16Vector word_list(url_index.word_list_); - InMemoryURLIndex::WordMap word_map(url_index.word_map_); - InMemoryURLIndex::CharWordIDMap char_word_map(url_index.char_word_map_); - InMemoryURLIndex::WordIDHistoryMap word_id_history_map( - url_index.word_id_history_map_); - InMemoryURLIndex::HistoryInfoMap history_info_map( - url_index.history_info_map_); + URLIndexPrivateData& private_data(*(url_index_->private_data_)); + String16Vector word_list(private_data.word_list_); + WordMap word_map(private_data.word_map_); + CharWordIDMap char_word_map(private_data.char_word_map_); + WordIDHistoryMap word_id_history_map(private_data.word_id_history_map_); + HistoryIDWordMap history_id_word_map(private_data.history_id_word_map_); + HistoryInfoMap history_info_map(private_data.history_info_map_); // Prove that there is really something there. - EXPECT_GT(url_index.history_item_count_, 0); - EXPECT_FALSE(url_index.word_list_.empty()); - EXPECT_FALSE(url_index.word_map_.empty()); - EXPECT_FALSE(url_index.char_word_map_.empty()); - EXPECT_FALSE(url_index.word_id_history_map_.empty()); - EXPECT_FALSE(url_index.history_info_map_.empty()); + EXPECT_FALSE(private_data.word_list_.empty()); + // available_words_ will already be empty since we have freshly built the + // data set for this test. + EXPECT_TRUE(private_data.available_words_.empty()); + EXPECT_FALSE(private_data.word_map_.empty()); + EXPECT_FALSE(private_data.char_word_map_.empty()); + EXPECT_FALSE(private_data.word_id_history_map_.empty()); + EXPECT_FALSE(private_data.history_id_word_map_.empty()); + EXPECT_FALSE(private_data.history_info_map_.empty()); // Clear and then prove it's clear. url_index.ClearPrivateData(); - EXPECT_EQ(0, url_index.history_item_count_); - EXPECT_TRUE(url_index.word_list_.empty()); - EXPECT_TRUE(url_index.word_map_.empty()); - EXPECT_TRUE(url_index.char_word_map_.empty()); - EXPECT_TRUE(url_index.word_id_history_map_.empty()); - EXPECT_TRUE(url_index.history_info_map_.empty()); + EXPECT_TRUE(private_data.word_list_.empty()); + EXPECT_TRUE(private_data.available_words_.empty()); + EXPECT_TRUE(private_data.word_map_.empty()); + EXPECT_TRUE(private_data.char_word_map_.empty()); + EXPECT_TRUE(private_data.word_id_history_map_.empty()); + EXPECT_TRUE(private_data.history_id_word_map_.empty()); + EXPECT_TRUE(private_data.history_info_map_.empty()); // Restore the cache. EXPECT_TRUE(url_index.RestorePrivateData(index_cache)); // Compare the restored and captured for equality. - EXPECT_EQ(history_item_count, url_index.history_item_count_); - EXPECT_EQ(word_list.size(), url_index.word_list_.size()); - EXPECT_EQ(word_map.size(), url_index.word_map_.size()); - EXPECT_EQ(char_word_map.size(), url_index.char_word_map_.size()); - EXPECT_EQ(word_id_history_map.size(), url_index.word_id_history_map_.size()); - EXPECT_EQ(history_info_map.size(), url_index.history_info_map_.size()); + EXPECT_EQ(word_list.size(), private_data.word_list_.size()); + EXPECT_EQ(word_map.size(), private_data.word_map_.size()); + EXPECT_EQ(char_word_map.size(), private_data.char_word_map_.size()); + EXPECT_EQ(word_id_history_map.size(), + private_data.word_id_history_map_.size()); + EXPECT_EQ(history_id_word_map.size(), + private_data.history_id_word_map_.size()); + EXPECT_EQ(history_info_map.size(), private_data.history_info_map_.size()); // WordList must be index-by-index equal. size_t count = word_list.size(); for (size_t i = 0; i < count; ++i) - EXPECT_EQ(word_list[i], url_index.word_list_[i]); - for (InMemoryURLIndex::CharWordIDMap::const_iterator expected = - char_word_map.begin(); expected != char_word_map.end(); ++expected) { - InMemoryURLIndex::CharWordIDMap::const_iterator actual = - url_index.char_word_map_.find(expected->first); - ASSERT_TRUE(url_index.char_word_map_.end() != actual); - const InMemoryURLIndex::WordIDSet& expected_set(expected->second); - const InMemoryURLIndex::WordIDSet& actual_set(actual->second); - ASSERT_EQ(expected_set.size(), actual_set.size()); - for (InMemoryURLIndex::WordIDSet::const_iterator set_iter = - expected_set.begin(); set_iter != expected_set.end(); ++set_iter) - EXPECT_GT(actual_set.count(*set_iter), 0U); - } - for (InMemoryURLIndex::WordIDHistoryMap::const_iterator expected = - word_id_history_map.begin(); expected != word_id_history_map.end(); - ++expected) { - InMemoryURLIndex::WordIDHistoryMap::const_iterator actual = - url_index.word_id_history_map_.find(expected->first); - ASSERT_TRUE(url_index.word_id_history_map_.end() != actual); - const InMemoryURLIndex::HistoryIDSet& expected_set(expected->second); - const InMemoryURLIndex::HistoryIDSet& actual_set(actual->second); - ASSERT_EQ(expected_set.size(), actual_set.size()); - for (InMemoryURLIndex::HistoryIDSet::const_iterator set_iter = - expected_set.begin(); set_iter != expected_set.end(); ++set_iter) - EXPECT_GT(actual_set.count(*set_iter), 0U); - } - for (InMemoryURLIndex::HistoryInfoMap::const_iterator expected = - history_info_map.begin(); expected != history_info_map.end(); - ++expected) { - InMemoryURLIndex::HistoryInfoMap::const_iterator actual = - url_index.history_info_map_.find(expected->first); - ASSERT_FALSE(url_index.history_info_map_.end() == actual); + EXPECT_EQ(word_list[i], private_data.word_list_[i]); + + ExpectMapOfContainersIdentical(char_word_map, + private_data.char_word_map_); + ExpectMapOfContainersIdentical(word_id_history_map, + private_data.word_id_history_map_); + ExpectMapOfContainersIdentical(history_id_word_map, + private_data.history_id_word_map_); + + for (HistoryInfoMap::const_iterator expected = history_info_map.begin(); + expected != history_info_map.end(); ++expected) { + HistoryInfoMap::const_iterator actual = + private_data.history_info_map_.find(expected->first); + ASSERT_FALSE(private_data.history_info_map_.end() == actual); const URLRow& expected_row(expected->second); const URLRow& actual_row(actual->second); EXPECT_EQ(expected_row.visit_count(), actual_row.visit_count()); diff --git a/chrome/browser/sync/profile_sync_service_typed_url_unittest.cc b/chrome/browser/sync/profile_sync_service_typed_url_unittest.cc index 925cd25..e1e46a0 100644 --- a/chrome/browser/sync/profile_sync_service_typed_url_unittest.cc +++ b/chrome/browser/sync/profile_sync_service_typed_url_unittest.cc @@ -80,7 +80,7 @@ using testing::WithArgs; class HistoryBackendMock : public HistoryBackend { public: - HistoryBackendMock() : HistoryBackend(FilePath(), 0, NULL, NULL) {} + HistoryBackendMock() : HistoryBackend(NULL, FilePath(), 0, NULL, NULL) {} MOCK_METHOD1(GetAllTypedURLs, bool(std::vector<history::URLRow>* entries)); MOCK_METHOD3(GetMostRecentVisitsForURL, bool(history::URLID id, int max_visits, diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index 55ff343..acbc48a 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -1330,6 +1330,8 @@ 'browser/history/in_memory_history_backend.h', 'browser/history/in_memory_url_index.cc', 'browser/history/in_memory_url_index.h', + 'browser/history/in_memory_url_index_types.cc', + 'browser/history/in_memory_url_index_types.h', 'browser/history/page_usage_data.cc', 'browser/history/page_usage_data.h', 'browser/history/query_parser.cc', diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi index cd0436e..8cd0133 100644 --- a/chrome/chrome_tests.gypi +++ b/chrome/chrome_tests.gypi @@ -1335,6 +1335,7 @@ 'browser/history/history_unittest.cc', 'browser/history/history_unittest_base.cc', 'browser/history/history_unittest_base.h', + 'browser/history/in_memory_url_index_types_unittest.cc', 'browser/history/in_memory_url_index_unittest.cc', 'browser/history/query_parser_unittest.cc', 'browser/history/shortcuts_backend_unittest.cc', diff --git a/content/browser/cancelable_request.h b/content/browser/cancelable_request.h index 69080df..9a986d8 100644 --- a/content/browser/cancelable_request.h +++ b/content/browser/cancelable_request.h @@ -4,8 +4,8 @@ // CancelableRequestProviders and Consumers work together to make requests that // execute on a background thread in the provider and return data to the -// consumer. These class collaborate to keep a list of open requests and to -// make sure that requests to not outlive either of the objects involved in the +// consumer. These classes collaborate to keep a list of open requests and to +// make sure that requests do not outlive either of the objects involved in the // transaction. // // If you do not need to return data to the consumer, do not use this system, |