diff options
30 files changed, 115 insertions, 2674 deletions
diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc index 7e28403..ce9fa68 100644 --- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc +++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc @@ -212,7 +212,7 @@ void HistoryQuickProviderTest::FillData() { // Mark the most recent |cur.typed_count| visits as typed. std::string sql_cmd_line = base::StringPrintf( "INSERT INTO \"visits\" VALUES(%" PRIuS ", %" PRIuS ", %" PRId64 - ", 0, %d, 0, 0, 1)", + ", 0, %d, 0, 1)", visit_id++, i + 1, visit_time.ToInternalValue(), (j < cur.typed_count) ? content::PAGE_TRANSITION_TYPED : content::PAGE_TRANSITION_LINK); diff --git a/chrome/browser/history/expire_history_backend.cc b/chrome/browser/history/expire_history_backend.cc index 5c33897..370a62b 100644 --- a/chrome/browser/history/expire_history_backend.cc +++ b/chrome/browser/history/expire_history_backend.cc @@ -19,8 +19,6 @@ #include "chrome/browser/history/archived_database.h" #include "chrome/browser/history/history_database.h" #include "chrome/browser/history/history_notifications.h" -#include "chrome/browser/history/text_database.h" -#include "chrome/browser/history/text_database_manager.h" #include "chrome/browser/history/thumbnail_database.h" using base::Time; @@ -172,10 +170,6 @@ struct ExpireHistoryBackend::DeleteDependencies { // The list of all favicon urls that were actually deleted from the thumbnail // db. std::set<GURL> expired_favicons; - - // Tracks the set of databases that have changed so we can optimize when - // when we're done. - TextDatabaseManager::ChangeSet text_db_changes; }; ExpireHistoryBackend::ExpireHistoryBackend( @@ -185,7 +179,6 @@ ExpireHistoryBackend::ExpireHistoryBackend( main_db_(NULL), archived_db_(NULL), thumb_db_(NULL), - text_db_(NULL), weak_factory_(this), bookmark_service_(bookmark_service) { } @@ -195,12 +188,10 @@ ExpireHistoryBackend::~ExpireHistoryBackend() { void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db, ArchivedDatabase* archived_db, - ThumbnailDatabase* thumb_db, - TextDatabaseManager* text_db) { + ThumbnailDatabase* thumb_db) { main_db_ = main_db; archived_db_ = archived_db; thumb_db_ = thumb_db; - text_db_ = text_db; } void ExpireHistoryBackend::DeleteURL(const GURL& url) { @@ -242,9 +233,6 @@ void ExpireHistoryBackend::DeleteURLs(const std::vector<GURL>& urls) { DeleteFaviconsIfPossible(dependencies.affected_favicons, &dependencies.expired_favicons); - if (text_db_) - text_db_->OptimizeChangedDatabases(dependencies.text_db_changes); - BroadcastDeleteNotifications(&dependencies, DELETION_USER_INITIATED); } @@ -253,10 +241,6 @@ void ExpireHistoryBackend::ExpireHistoryBetween( if (!main_db_) return; - // There may be stuff in the text database manager's temporary cache. - if (text_db_) - text_db_->DeleteFromUncommitted(restrict_urls, begin_time, end_time); - // Find the affected visits and delete them. // TODO(brettw): bug 1171164: We should query the archived database here, too. VisitVector visits; @@ -290,10 +274,6 @@ void ExpireHistoryBackend::ExpireHistoryForTimes( if (!main_db_) return; - // There may be stuff in the text database manager's temporary cache. - if (text_db_) - text_db_->DeleteFromUncommittedForTimes(times); - // Find the affected visits and delete them. // TODO(brettw): bug 1171164: We should query the archived database here, too. VisitVector visits; @@ -368,7 +348,6 @@ void ExpireHistoryBackend::StartArchivingOldStuff( // Initialize the queue with all tasks for the first set of iterations. InitWorkQueue(); ScheduleArchive(); - ScheduleExpireHistoryIndexFiles(); } void ExpireHistoryBackend::DeleteFaviconsIfPossible( @@ -422,21 +401,11 @@ void ExpireHistoryBackend::DeleteVisitRelatedInfo( // Add the URL row to the affected URL list. std::map<URLID, URLRow>::const_iterator found = dependencies->affected_urls.find(visits[i].url_id); - const URLRow* cur_row = NULL; if (found == dependencies->affected_urls.end()) { URLRow row; if (!main_db_->GetURLRow(visits[i].url_id, &row)) continue; dependencies->affected_urls[visits[i].url_id] = row; - cur_row = &dependencies->affected_urls[visits[i].url_id]; - } else { - cur_row = &found->second; - } - - // Delete any associated full-text indexed data. - if (visits[i].is_indexed && text_db_) { - text_db_->DeletePageData(visits[i].visit_time, cur_row->url(), - &dependencies->text_db_changes); } } } @@ -447,13 +416,6 @@ void ExpireHistoryBackend::DeleteOneURL( DeleteDependencies* dependencies) { main_db_->DeleteSegmentForURL(url_row.id()); - // The URL may be in the text database manager's temporary cache. - if (text_db_) { - std::set<GURL> restrict_urls; - restrict_urls.insert(url_row.url()); - text_db_->DeleteFromUncommitted(restrict_urls, base::Time(), base::Time()); - } - if (!is_bookmarked) { dependencies->deleted_urls.push_back(url_row); @@ -722,48 +684,6 @@ void ExpireHistoryBackend::ParanoidExpireHistory() { // TODO(brettw): Bug 1067331: write this to clean up any errors. } -void ExpireHistoryBackend::ScheduleExpireHistoryIndexFiles() { - if (!text_db_) { - // Can't expire old history index files because we - // don't know where they're located. - return; - } - - TimeDelta delay = TimeDelta::FromMinutes(kIndexExpirationDelayMin); - base::MessageLoop::current()->PostDelayedTask( - FROM_HERE, - base::Bind(&ExpireHistoryBackend::DoExpireHistoryIndexFiles, - weak_factory_.GetWeakPtr()), - delay); -} - -void ExpireHistoryBackend::DoExpireHistoryIndexFiles() { - if (!text_db_) { - // The text database may have been closed since the task was scheduled. - return; - } - - Time::Exploded exploded; - Time::Now().LocalExplode(&exploded); - int cutoff_month = - exploded.year * 12 + exploded.month - kStoreHistoryIndexesForMonths; - TextDatabase::DBIdent cutoff_id = - (cutoff_month / 12) * 100 + (cutoff_month % 12); - - base::FilePath::StringType history_index_files_pattern = - TextDatabase::file_base(); - history_index_files_pattern.append(FILE_PATH_LITERAL("*")); - base::FileEnumerator file_enumerator( - text_db_->GetDir(), false, base::FileEnumerator::FILES, - history_index_files_pattern); - for (base::FilePath file = file_enumerator.Next(); !file.empty(); - file = file_enumerator.Next()) { - TextDatabase::DBIdent file_id = TextDatabase::FileNameToID(file); - if (file_id < cutoff_id) - sql::Connection::Delete(file); - } -} - BookmarkService* ExpireHistoryBackend::GetBookmarkService() { // We use the bookmark service to determine if a URL is bookmarked. The // bookmark service is loaded on a separate thread and may not be done by the diff --git a/chrome/browser/history/expire_history_backend.h b/chrome/browser/history/expire_history_backend.h index 626ad12..2e3d27e 100644 --- a/chrome/browser/history/expire_history_backend.h +++ b/chrome/browser/history/expire_history_backend.h @@ -25,7 +25,6 @@ namespace history { class ArchivedDatabase; class HistoryDatabase; struct HistoryDetails; -class TextDatabaseManager; class ThumbnailDatabase; // Delegate used to broadcast notifications to the main thread. @@ -76,8 +75,7 @@ class ExpireHistoryBackend { // Completes initialization by setting the databases that this class will use. void SetDatabases(HistoryDatabase* main_db, ArchivedDatabase* archived_db, - ThumbnailDatabase* thumb_db, - TextDatabaseManager* text_db); + ThumbnailDatabase* thumb_db); // Begins periodic expiration of history older than the given threshold. This // will continue until the object is deleted. @@ -128,9 +126,6 @@ class ExpireHistoryBackend { // Deletes the visit-related stuff for all the visits in the given list, and // adds the rows for unique URLs affected to the affected_urls list in // the dependencies structure. - // - // Deleted information is the visits themselves and the full-text index - // entries corresponding to them. void DeleteVisitRelatedInfo(const VisitVector& visits, DeleteDependencies* dependencies); @@ -138,8 +133,7 @@ class ExpireHistoryBackend { void ArchiveVisits(const VisitVector& visits); // Finds or deletes dependency information for the given URL. Information that - // is specific to this URL (URL row, thumbnails, full text indexed stuff, - // etc.) is deleted. + // is specific to this URL (URL row, thumbnails, etc.) is deleted. // // This does not affect the visits! This is used for expiration as well as // deleting from the UI, and they handle visits differently. @@ -240,12 +234,6 @@ class ExpireHistoryBackend { // and deletes items. For example, URLs with no visits. void ParanoidExpireHistory(); - // Schedules a call to DoExpireHistoryIndexFiles. - void ScheduleExpireHistoryIndexFiles(); - - // Deletes old history index files. - void DoExpireHistoryIndexFiles(); - // Returns the BookmarkService, blocking until it is loaded. This may return // NULL. BookmarkService* GetBookmarkService(); @@ -269,7 +257,6 @@ class ExpireHistoryBackend { HistoryDatabase* main_db_; // Main history database. ArchivedDatabase* archived_db_; // Old history. ThumbnailDatabase* thumb_db_; // Thumbnails and favicons. - TextDatabaseManager* text_db_; // Full text index. // Used to generate runnable methods to do timers on this class. They will be // automatically canceled when this class is deleted. diff --git a/chrome/browser/history/expire_history_backend_unittest.cc b/chrome/browser/history/expire_history_backend_unittest.cc index d36a6a1..59852a1 100644 --- a/chrome/browser/history/expire_history_backend_unittest.cc +++ b/chrome/browser/history/expire_history_backend_unittest.cc @@ -23,7 +23,6 @@ #include "chrome/browser/history/expire_history_backend.h" #include "chrome/browser/history/history_database.h" #include "chrome/browser/history/history_notifications.h" -#include "chrome/browser/history/text_database_manager.h" #include "chrome/browser/history/thumbnail_database.h" #include "chrome/browser/history/top_sites.h" #include "chrome/common/thumbnail_score.h" @@ -77,10 +76,6 @@ class ExpireHistoryTest : public testing::Test, chrome::FaviconID GetFavicon(const GURL& page_url, chrome::IconType icon_type); - // Returns the number of text matches for the given URL in the example data - // added by AddExampleData. - int CountTextMatchesForURL(const GURL& url); - // EXPECTs that each URL-specific history thing (basically, everything but // favicons) is gone. void EnsureURLInfoGone(const URLRow& row); @@ -114,7 +109,6 @@ class ExpireHistoryTest : public testing::Test, scoped_ptr<HistoryDatabase> main_db_; scoped_ptr<ArchivedDatabase> archived_db_; scoped_ptr<ThumbnailDatabase> thumb_db_; - scoped_ptr<TextDatabaseManager> text_db_; TestingProfile profile_; scoped_refptr<TopSites> top_sites_; @@ -147,13 +141,7 @@ class ExpireHistoryTest : public testing::Test, if (thumb_db_->Init(thumb_name, NULL, main_db_.get()) != sql::INIT_OK) thumb_db_.reset(); - text_db_.reset(new TextDatabaseManager(path(), - main_db_.get(), main_db_.get())); - if (!text_db_->Init(NULL)) - text_db_.reset(); - - expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(), - text_db_.get()); + expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get()); profile_.CreateTopSites(); profile_.BlockUntilTopSitesLoaded(); top_sites_ = profile_.GetTopSites(); @@ -164,12 +152,11 @@ class ExpireHistoryTest : public testing::Test, ClearLastNotifications(); - expirer_.SetDatabases(NULL, NULL, NULL, NULL); + expirer_.SetDatabases(NULL, NULL, NULL); main_db_.reset(); archived_db_.reset(); thumb_db_.reset(); - text_db_.reset(); } // BroadcastNotificationDelegate implementation. @@ -199,7 +186,7 @@ class ExpireHistoryTest : public testing::Test, // The IDs of the added URLs, and the times of the four added visits will be // added to the given arrays. void ExpireHistoryTest::AddExampleData(URLID url_ids[3], Time visit_times[4]) { - if (!main_db_.get() || !text_db_) + if (!main_db_.get()) return; // Four times for each visit. @@ -251,45 +238,23 @@ void ExpireHistoryTest::AddExampleData(URLID url_ids[3], Time visit_times[4]) { VisitRow visit_row1; visit_row1.url_id = url_ids[0]; visit_row1.visit_time = visit_times[0]; - visit_row1.is_indexed = true; main_db_->AddVisit(&visit_row1, SOURCE_BROWSED); VisitRow visit_row2; visit_row2.url_id = url_ids[1]; visit_row2.visit_time = visit_times[1]; - visit_row2.is_indexed = true; main_db_->AddVisit(&visit_row2, SOURCE_BROWSED); VisitRow visit_row3; visit_row3.url_id = url_ids[1]; visit_row3.visit_time = visit_times[2]; - visit_row3.is_indexed = true; visit_row3.transition = content::PAGE_TRANSITION_TYPED; main_db_->AddVisit(&visit_row3, SOURCE_BROWSED); VisitRow visit_row4; visit_row4.url_id = url_ids[2]; visit_row4.visit_time = visit_times[3]; - visit_row4.is_indexed = true; main_db_->AddVisit(&visit_row4, SOURCE_BROWSED); - - // Full text index for each visit. - text_db_->AddPageData(url_row1.url(), visit_row1.url_id, visit_row1.visit_id, - visit_row1.visit_time, UTF8ToUTF16("title"), - UTF8ToUTF16("body")); - - text_db_->AddPageData(url_row2.url(), visit_row2.url_id, visit_row2.visit_id, - visit_row2.visit_time, UTF8ToUTF16("title"), - UTF8ToUTF16("body")); - text_db_->AddPageData(url_row2.url(), visit_row3.url_id, visit_row3.visit_id, - visit_row3.visit_time, UTF8ToUTF16("title"), - UTF8ToUTF16("body")); - - // Note the special text in this URL. We'll search the file for this string - // to make sure it doesn't hang around after the delete. - text_db_->AddPageData(url_row3.url(), visit_row4.url_id, visit_row4.visit_id, - visit_row4.visit_time, UTF8ToUTF16("title"), - UTF8ToUTF16("goats body")); } void ExpireHistoryTest::AddExampleSourceData(const GURL& url, URLID* id) { @@ -349,33 +314,11 @@ bool ExpireHistoryTest::HasThumbnail(URLID url_id) { return top_sites_->GetPageThumbnail(url, &data); } -int ExpireHistoryTest::CountTextMatchesForURL(const GURL& url) { - if (!text_db_) - return 0; - - // "body" should match all pages in the example data. - std::vector<TextDatabase::Match> results; - QueryOptions options; - Time first_time; - text_db_->GetTextMatches(UTF8ToUTF16("body"), options, - &results, &first_time); - - int count = 0; - for (size_t i = 0; i < results.size(); i++) { - if (results[i].url == url) - count++; - } - return count; -} - void ExpireHistoryTest::EnsureURLInfoGone(const URLRow& row) { // Verify the URL no longer exists. URLRow temp_row; EXPECT_FALSE(main_db_->GetURLRow(row.id(), &temp_row)); - // The indexed data should be gone. - EXPECT_EQ(0, CountTextMatchesForURL(row.url())); - // There should be no visits. VisitVector visits; main_db_->GetVisitsForURL(row.id(), &visits); @@ -471,46 +414,10 @@ TEST_F(ExpireHistoryTest, DISABLED_DeleteURLAndFavicon) { VisitVector visits; main_db_->GetVisitsForURL(url_ids[2], &visits); ASSERT_EQ(1U, visits.size()); - EXPECT_EQ(1, CountTextMatchesForURL(last_row.url())); - - // In this test we also make sure that any pending entries in the text - // database manager are removed. - text_db_->AddPageURL(last_row.url(), last_row.id(), visits[0].visit_id, - visits[0].visit_time); - - // Compute the text DB filename. - base::FilePath fts_filename = path().Append( - TextDatabase::IDToFileName(text_db_->TimeToID(visit_times[3]))); - - // When checking the file, the database must be closed. We then re-initialize - // it just like the test set-up did. - text_db_.reset(); - EXPECT_TRUE(IsStringInFile(fts_filename, "goats")); - text_db_.reset(new TextDatabaseManager(path(), - main_db_.get(), main_db_.get())); - ASSERT_TRUE(text_db_->Init(NULL)); - expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(), - text_db_.get()); // Delete the URL and its dependencies. expirer_.DeleteURL(last_row.url()); - // The string should be removed from the file. FTS can mark it as gone but - // doesn't remove it from the file, we want to be sure we're doing the latter. - text_db_.reset(); - EXPECT_FALSE(IsStringInFile(fts_filename, "goats")); - text_db_.reset(new TextDatabaseManager(path(), - main_db_.get(), main_db_.get())); - ASSERT_TRUE(text_db_->Init(NULL)); - expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(), - text_db_.get()); - - // Run the text database expirer. This will flush any pending entries so we - // can check that nothing was committed. We use a time far in the future so - // that anything added recently will get flushed. - TimeTicks expiration_time = TimeTicks::Now() + TimeDelta::FromDays(1); - text_db_->FlushOldChangesForTime(expiration_time); - // All the normal data + the favicon should be gone. EnsureURLInfoGone(last_row); EXPECT_FALSE(GetFavicon(last_row.url(), chrome::FAVICON)); @@ -535,7 +442,6 @@ TEST_F(ExpireHistoryTest, DeleteURLWithoutFavicon) { VisitVector visits; main_db_->GetVisitsForURL(url_ids[1], &visits); EXPECT_EQ(2U, visits.size()); - EXPECT_EQ(1, CountTextMatchesForURL(last_row.url())); // Delete the URL and its dependencies. expirer_.DeleteURL(last_row.url()); @@ -568,9 +474,6 @@ TEST_F(ExpireHistoryTest, DontDeleteStarredURL) { chrome::FaviconID favicon_id = GetFavicon(url_row.url(), chrome::FAVICON); EXPECT_TRUE(HasFavicon(favicon_id)); - // But there should be no fts. - ASSERT_EQ(0, CountTextMatchesForURL(url_row.url())); - // And no visits. VisitVector visits; main_db_->GetVisitsForURL(url_row.id(), &visits); @@ -637,29 +540,18 @@ TEST_F(ExpireHistoryTest, FlushRecentURLsUnstarred) { ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1)); ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2)); - // In this test we also make sure that any pending entries in the text - // database manager are removed. VisitVector visits; main_db_->GetVisitsForURL(url_ids[2], &visits); ASSERT_EQ(1U, visits.size()); - text_db_->AddPageURL(url_row2.url(), url_row2.id(), visits[0].visit_id, - visits[0].visit_time); // This should delete the last two visits. std::set<GURL> restrict_urls; expirer_.ExpireHistoryBetween(restrict_urls, visit_times[2], Time()); - // Run the text database expirer. This will flush any pending entries so we - // can check that nothing was committed. We use a time far in the future so - // that anything added recently will get flushed. - TimeTicks expiration_time = TimeTicks::Now() + TimeDelta::FromDays(1); - text_db_->FlushOldChangesForTime(expiration_time); - // Verify that the middle URL had its last visit deleted only. visits.clear(); main_db_->GetVisitsForURL(url_ids[1], &visits); EXPECT_EQ(1U, visits.size()); - EXPECT_EQ(0, CountTextMatchesForURL(url_row1.url())); // Verify that the middle URL visit time and visit counts were updated. URLRow temp_row; @@ -693,13 +585,9 @@ TEST_F(ExpireHistoryTest, FlushURLsForTimes) { ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1)); ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2)); - // In this test we also make sure that any pending entries in the text - // database manager are removed. VisitVector visits; main_db_->GetVisitsForURL(url_ids[2], &visits); ASSERT_EQ(1U, visits.size()); - text_db_->AddPageURL(url_row2.url(), url_row2.id(), visits[0].visit_id, - visits[0].visit_time); // This should delete the last two visits. std::vector<base::Time> times; @@ -707,17 +595,10 @@ TEST_F(ExpireHistoryTest, FlushURLsForTimes) { times.push_back(visit_times[2]); expirer_.ExpireHistoryForTimes(times); - // Run the text database expirer. This will flush any pending entries so we - // can check that nothing was committed. We use a time far in the future so - // that anything added recently will get flushed. - TimeTicks expiration_time = TimeTicks::Now() + TimeDelta::FromDays(1); - text_db_->FlushOldChangesForTime(expiration_time); - // Verify that the middle URL had its last visit deleted only. visits.clear(); main_db_->GetVisitsForURL(url_ids[1], &visits); EXPECT_EQ(1U, visits.size()); - EXPECT_EQ(0, CountTextMatchesForURL(url_row1.url())); // Verify that the middle URL visit time and visit counts were updated. URLRow temp_row; @@ -753,30 +634,19 @@ TEST_F(ExpireHistoryTest, FlushRecentURLsUnstarredRestricted) { ASSERT_TRUE(main_db_->GetURLRow(url_ids[1], &url_row1)); ASSERT_TRUE(main_db_->GetURLRow(url_ids[2], &url_row2)); - // In this test we also make sure that any pending entries in the text - // database manager are removed. VisitVector visits; main_db_->GetVisitsForURL(url_ids[2], &visits); ASSERT_EQ(1U, visits.size()); - text_db_->AddPageURL(url_row2.url(), url_row2.id(), visits[0].visit_id, - visits[0].visit_time); // This should delete the last two visits. std::set<GURL> restrict_urls; restrict_urls.insert(url_row1.url()); expirer_.ExpireHistoryBetween(restrict_urls, visit_times[2], Time()); - // Run the text database expirer. This will flush any pending entries so we - // can check that nothing was committed. We use a time far in the future so - // that anything added recently will get flushed. - TimeTicks expiration_time = TimeTicks::Now() + TimeDelta::FromDays(1); - text_db_->FlushOldChangesForTime(expiration_time); - // Verify that the middle URL had its last visit deleted only. visits.clear(); main_db_->GetVisitsForURL(url_ids[1], &visits); EXPECT_EQ(1U, visits.size()); - EXPECT_EQ(0, CountTextMatchesForURL(url_row1.url())); // Verify that the middle URL visit time and visit counts were updated. URLRow temp_row; diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc index adaff5a..83a2afd 100644 --- a/chrome/browser/history/history_backend.cc +++ b/chrome/browser/history/history_backend.cc @@ -14,6 +14,7 @@ #include "base/basictypes.h" #include "base/bind.h" #include "base/compiler_specific.h" +#include "base/files/file_enumerator.h" #include "base/memory/scoped_ptr.h" #include "base/memory/scoped_vector.h" #include "base/message_loop/message_loop.h" @@ -68,10 +69,6 @@ using base::TimeTicks; (this does not store visit segments as they expire after 3 mos.) - TextDatabaseManager (manages multiple text database for different times) - TextDatabase (represents a single month of full-text index). - ...more TextDatabase objects... - ExpireHistoryBackend (manages moving things from HistoryDatabase to the ArchivedDatabase and deleting) */ @@ -168,53 +165,6 @@ class CommitLaterTask : public base::RefCounted<CommitLaterTask> { scoped_refptr<HistoryBackend> history_backend_; }; -// Handles querying first the main database, then the full text database if that -// fails. It will optionally keep track of all URLs seen so duplicates can be -// eliminated. This is used by the querying sub-functions. -// -// TODO(brettw): This class may be able to be simplified or eliminated. After -// this was written, QueryResults can efficiently look up by URL, so the need -// for this extra set of previously queried URLs is less important. -class HistoryBackend::URLQuerier { - public: - URLQuerier(URLDatabase* main_db, URLDatabase* archived_db, bool track_unique) - : main_db_(main_db), - archived_db_(archived_db), - track_unique_(track_unique) { - } - - // When we're tracking unique URLs, returns true if this URL has been - // previously queried. Only call when tracking unique URLs. - bool HasURL(const GURL& url) { - DCHECK(track_unique_); - return unique_urls_.find(url) != unique_urls_.end(); - } - - bool GetRowForURL(const GURL& url, URLRow* row) { - if (!main_db_->GetRowForURL(url, row)) { - if (!archived_db_ || !archived_db_->GetRowForURL(url, row)) { - // This row is neither in the main nor the archived DB. - return false; - } - } - - if (track_unique_) - unique_urls_.insert(url); - return true; - } - - private: - URLDatabase* main_db_; // Guaranteed non-NULL. - URLDatabase* archived_db_; // Possibly NULL. - - bool track_unique_; - - // When track_unique_ is set, this is updated with every URL seen so far. - std::set<GURL> unique_urls_; - - DISALLOW_COPY_AND_ASSIGN(URLQuerier); -}; - // HistoryBackend -------------------------------------------------------------- HistoryBackend::HistoryBackend(const base::FilePath& history_dir, @@ -582,7 +532,7 @@ void HistoryBackend::AddPage(const HistoryAddPageArgs& request) { } // Last, save this redirect chain for later so we can set titles & favicons - // on the redirected pages properly. It is indexed by the destination page. + // on the redirected pages properly. recent_redirects_.Put(request.url, redirects); } @@ -600,11 +550,6 @@ void HistoryBackend::AddPage(const HistoryAddPageArgs& request) { last_ids.second); } - if (text_database_) { - text_database_->AddPageURL(request.url, last_ids.first, last_ids.second, - request.time); - } - ScheduleCommit(); } @@ -617,12 +562,14 @@ void HistoryBackend::InitImpl(const std::string& languages) { TimeTicks beginning_time = TimeTicks::Now(); - // Compute the file names. Note that the index file can be removed when the - // text db manager is finished being hooked up. + // Compute the file names. base::FilePath history_name = history_dir_.Append(chrome::kHistoryFilename); base::FilePath thumbnail_name = GetThumbnailFileName(); base::FilePath archived_name = GetArchivedFileName(); + // Delete the old index database files which are no longer used. + DeleteFTSIndexDatabases(); + // History database. db_.reset(new HistoryDatabase()); @@ -662,8 +609,8 @@ void HistoryBackend::InitImpl(const std::string& languages) { delete mem_backend; // Error case, run without the in-memory DB. db_->BeginExclusiveMode(); // Must be after the mem backend read the data. - // Create the history publisher which needs to be passed on to the text and - // thumbnail databases for publishing history. + // Create the history publisher which needs to be passed on to the thumbnail + // database for publishing history. history_publisher_.reset(new HistoryPublisher()); if (!history_publisher_->Init()) { // The init may fail when there are no indexers wanting our history. @@ -671,22 +618,6 @@ void HistoryBackend::InitImpl(const std::string& languages) { history_publisher_.reset(); } - // Full-text database. This has to be first so we can pass it to the - // HistoryDatabase for migration. - text_database_.reset(new TextDatabaseManager(history_dir_, - db_.get(), db_.get())); - if (!text_database_->Init(history_publisher_.get())) { - LOG(WARNING) << "Text database initialization failed, running without it."; - text_database_.reset(); - } - if (db_->needs_version_17_migration()) { - // See needs_version_17_migration() decl for more. In this case, we want - // to erase all the text database files. This must be done after the text - // database manager has been initialized, since it knows about all the - // files it manages. - text_database_->DeleteAll(); - } - // Thumbnail database. thumbnail_db_.reset(new ThumbnailDatabase()); if (!db_->GetNeedsThumbnailMigration()) { @@ -739,7 +670,7 @@ void HistoryBackend::InitImpl(const std::string& languages) { // The main DB initialization should intuitively be first (not that it // actually matters) and the expirer should be set last. expirer_.SetDatabases(db_.get(), archived_db_.get(), - thumbnail_db_.get(), text_database_.get()); + thumbnail_db_.get()); // Open the long-running transaction. db_->BeginTransaction(); @@ -747,8 +678,6 @@ void HistoryBackend::InitImpl(const std::string& languages) { thumbnail_db_->BeginTransaction(); if (archived_db_) archived_db_->BeginTransaction(); - if (text_database_) - text_database_->BeginTransaction(); // Get the first item in our database. db_->GetStartDate(&first_recorded_time_); @@ -794,10 +723,6 @@ void HistoryBackend::CloseAllDatabases() { archived_db_->CommitTransaction(); archived_db_.reset(); } - if (text_database_) { - text_database_->CommitTransaction(); - text_database_.reset(); - } } std::pair<URLID, VisitID> HistoryBackend::AddPageVisit( @@ -861,14 +786,6 @@ std::pair<URLID, VisitID> HistoryBackend::AddPageVisit( return std::make_pair(0, 0); } url_info.id_ = url_id; - - // We don't actually add the URL to the full text index at this point. It - // might be nice to do this so that even if we get no title or body, the - // user can search for URL components and get the page. - // - // However, in most cases, we'll get at least a title and usually contents, - // and this add will be redundant, slowing everything down. As a result, - // we ignore this edge case. } // Add the visit with the time to the database. @@ -938,26 +855,6 @@ void HistoryBackend::AddPagesWithDetails(const URLRows& urls, } } - // Add the page to the full text index. This function is also used for - // importing. Even though we don't have page contents, we can at least - // add the title and URL to the index so they can be searched. We don't - // bother to delete any already-existing FTS entries for the URL, since - // this is normally called on import. - // - // If you ever import *after* first run (selecting import from the menu), - // then these additional entries will "shadow" the originals when querying - // for the most recent match only, and the user won't get snippets. This is - // a very minor issue, and fixing it will make import slower, so we don't - // bother. - bool has_indexed = false; - if (text_database_) { - // We do not have to make it update the visit database, below, we will - // create the visit entry with the indexed flag set. - has_indexed = text_database_->AddPageData(i->url(), url_id, 0, - i->last_visit(), - i->title(), string16()); - } - // Sync code manages the visits itself. if (visit_source != SOURCE_SYNCED) { // Make up a visit to correspond to the last visit to the page. @@ -966,7 +863,6 @@ void HistoryBackend::AddPagesWithDetails(const URLRows& urls, content::PAGE_TRANSITION_LINK | content::PAGE_TRANSITION_CHAIN_START | content::PAGE_TRANSITION_CHAIN_END), 0); - visit_info.is_indexed = has_indexed; if (!visit_database->AddVisit(&visit_info, visit_source)) { NOTREACHED() << "Adding visit failed."; return; @@ -1001,10 +897,6 @@ void HistoryBackend::SetPageTitle(const GURL& url, if (!db_) return; - // Update the full text index. - if (text_database_) - text_database_->AddPageTitle(url, title); - // Search for recent redirects which should get the same title. We make a // dummy list containing the exact URL visited if there are no redirects so // the processing below can be the same. @@ -1499,59 +1391,6 @@ void HistoryBackend::QueryHistoryText(URLDatabase* url_db, result->set_reached_beginning(true); } -void HistoryBackend::QueryHistoryFTS(const string16& text_query, - const QueryOptions& options, - QueryResults* result) { - if (!text_database_) - return; - - // Full text query, first get all the FTS results in the time range. - std::vector<TextDatabase::Match> fts_matches; - Time first_time_searched; - text_database_->GetTextMatches(text_query, options, - &fts_matches, &first_time_searched); - - URLQuerier querier(db_.get(), archived_db_.get(), true); - - // Now get the row and visit information for each one. - URLResult url_result; // Declare outside loop to prevent re-construction. - for (size_t i = 0; i < fts_matches.size(); i++) { - if (options.max_count != 0 && - static_cast<int>(result->size()) >= options.max_count) - break; // Got too many items. - - // Get the URL, querying the main and archived databases as necessary. If - // this is not found, the history and full text search databases are out - // of sync and we give up with this result. - if (!querier.GetRowForURL(fts_matches[i].url, &url_result)) - continue; - - if (!url_result.url().is_valid()) - continue; // Don't report invalid URLs in case of corruption. - - // Copy over the FTS stuff that the URLDatabase doesn't know about. - // We do this with swap() to avoid copying, since we know we don't - // need the original any more. Note that we override the title with the - // one from FTS, since that will match the title_match_positions (the - // FTS title and the history DB title may differ). - url_result.set_title(fts_matches[i].title); - url_result.title_match_positions_.swap( - fts_matches[i].title_match_positions); - url_result.snippet_.Swap(&fts_matches[i].snippet); - - // The visit time also comes from the full text search database. Since it - // has the time, we can avoid an extra query of the visits table. - url_result.set_visit_time(fts_matches[i].time); - - // Add it to the vector, this will clear our |url_row| object as a - // result of the swap. - result->AppendURLBySwapping(&url_result); - } - - if (first_time_searched <= first_recorded_time_) - result->set_reached_beginning(true); -} - // Frontend to GetMostRecentRedirectsFrom from the history thread. void HistoryBackend::QueryRedirectsFrom( scoped_refptr<QueryRedirectsRequest> request, @@ -1811,14 +1650,6 @@ void HistoryBackend::ScheduleAutocomplete(HistoryURLProvider* provider, provider->ExecuteWithDB(this, db_.get(), params); } -void HistoryBackend::SetPageContents(const GURL& url, - const string16& contents) { - // This is histogrammed in the text database manager. - if (!text_database_) - return; - text_database_->AddPageContents(url, contents); -} - void HistoryBackend::SetPageThumbnail( const GURL& url, const gfx::Image* thumbnail, @@ -1902,6 +1733,23 @@ void HistoryBackend::MigrateThumbnailsDatabase() { } } +void HistoryBackend::DeleteFTSIndexDatabases() { + // Find files on disk matching the text databases file pattern so we can + // quickly test for and delete them. + base::FilePath::StringType filepattern = + FILE_PATH_LITERAL("History Index *"); + base::FileEnumerator enumerator( + history_dir_, false, base::FileEnumerator::FILES, filepattern); + int num_databases_deleted = 0; + base::FilePath current_file; + while (!(current_file = enumerator.Next()).empty()) { + if (sql::Connection::Delete(current_file)) + num_databases_deleted++; + } + UMA_HISTOGRAM_COUNTS("History.DeleteFTSIndexDatabases", + num_databases_deleted); +} + bool HistoryBackend::GetThumbnailFromOlderRedirect( const GURL& page_url, std::vector<unsigned char>* data) { @@ -2668,11 +2516,6 @@ void HistoryBackend::Commit() { archived_db_->CommitTransaction(); archived_db_->BeginTransaction(); } - - if (text_database_) { - text_database_->CommitTransaction(); - text_database_->BeginTransaction(); - } } void HistoryBackend::ScheduleCommit() { @@ -2903,7 +2746,7 @@ void HistoryBackend::KillHistoryDatabase() { // The expirer keeps tabs on the active databases. Tell it about the // databases which will be closed. - expirer_.SetDatabases(NULL, NULL, NULL, NULL); + expirer_.SetDatabases(NULL, NULL, NULL); // Reopen a new transaction for |db_| for the sake of CloseAllDatabases(). db_->BeginTransaction(); @@ -2993,15 +2836,7 @@ void HistoryBackend::DeleteAllHistory() { LOG(ERROR) << "Main history could not be cleared"; kept_urls.clear(); - // Delete FTS files & archived history. - if (text_database_) { - // We assume that the text database has one transaction on them that we need - // to close & restart (the long-running history transaction). - text_database_->CommitTransaction(); - text_database_->DeleteAll(); - text_database_->BeginTransaction(); - } - + // Delete archived history. if (archived_db_) { // Close the database and delete the file. archived_db_.reset(); diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h index c4010b7..fd92878 100644 --- a/chrome/browser/history/history_backend.h +++ b/chrome/browser/history/history_backend.h @@ -20,7 +20,6 @@ #include "chrome/browser/history/history_database.h" #include "chrome/browser/history/history_marshaling.h" #include "chrome/browser/history/history_types.h" -#include "chrome/browser/history/text_database_manager.h" #include "chrome/browser/history/thumbnail_database.h" #include "chrome/browser/history/visit_tracker.h" #include "chrome/browser/search_engines/template_url_id.h" @@ -162,11 +161,6 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, const GURL& url, base::Time end_ts); - - // Indexing ------------------------------------------------------------------ - - void SetPageContents(const GURL& url, const string16& contents); - // Querying ------------------------------------------------------------------ // ScheduleAutocomplete() never frees |provider| (which is globally live). @@ -564,6 +558,7 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, QueryFilteredURLs); FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, UpdateVisitDuration); FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, ExpireHistoryForTimes); + FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, DeleteFTSIndexDatabases); friend class ::TestingProfile; @@ -658,9 +653,6 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, const string16& text_query, const QueryOptions& options, QueryResults* result); - void QueryHistoryFTS(const string16& text_query, - const QueryOptions& options, - QueryResults* result); // Committing ---------------------------------------------------------------- @@ -843,6 +835,9 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // The IDs of the URLs may change. bool ClearAllMainHistory(const URLRows& kept_urls); + // Deletes the FTS index database files, which are no longer used. + void DeleteFTSIndexDatabases(); + // Returns the BookmarkService, blocking until it is loaded. This may return // NULL during testing. BookmarkService* GetBookmarkService(); @@ -875,10 +870,6 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // Stores old history in a larger, slower database. scoped_ptr<ArchivedDatabase> archived_db_; - // Full text database manager, possibly NULL if the database could not be - // created. - scoped_ptr<TextDatabaseManager> text_database_; - // Manages expiration between the various databases. ExpireHistoryBackend expirer_; diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc index 7bd2328..9f841c9 100644 --- a/chrome/browser/history/history_backend_unittest.cc +++ b/chrome/browser/history/history_backend_unittest.cc @@ -5,6 +5,7 @@ #include <algorithm> #include <set> #include <vector> +#include <fstream> #include "base/basictypes.h" #include "base/bind.h" @@ -15,6 +16,7 @@ #include "base/memory/ref_counted.h" #include "base/memory/scoped_ptr.h" #include "base/path_service.h" +#include "base/platform_file.h" #include "base/strings/string16.h" #include "base/strings/string_number_conversions.h" #include "base/strings/utf_string_conversions.h" @@ -504,12 +506,10 @@ TEST_F(HistoryBackendTest, DeleteAll) { VisitVector visits; backend_->db_->GetVisitsForURL(row1_id, &visits); ASSERT_EQ(1U, visits.size()); - VisitID visit1_id = visits[0].visit_id; visits.clear(); backend_->db_->GetVisitsForURL(row2_id, &visits); ASSERT_EQ(1U, visits.size()); - VisitID visit2_id = visits[0].visit_id; // The in-memory backend should have been set and it should have gotten the // typed URL. @@ -539,16 +539,6 @@ TEST_F(HistoryBackendTest, DeleteAll) { bookmark_model_.AddURL( bookmark_model_.bookmark_bar_node(), 0, string16(), row1.url()); - // Set full text index for each one. - backend_->text_database_->AddPageData(row1.url(), row1_id, visit1_id, - row1.last_visit(), - UTF8ToUTF16("Title 1"), - UTF8ToUTF16("Body 1")); - backend_->text_database_->AddPageData(row2.url(), row2_id, visit2_id, - row2.last_visit(), - UTF8ToUTF16("Title 2"), - UTF8ToUTF16("Body 2")); - // Now finally clear all history. backend_->DeleteAllHistory(); @@ -615,15 +605,6 @@ TEST_F(HistoryBackendTest, DeleteAll) { // The first URL should still be bookmarked. EXPECT_TRUE(bookmark_model_.IsBookmarked(row1.url())); - - // The full text database should have no data. - std::vector<TextDatabase::Match> text_matches; - Time first_time_searched; - backend_->text_database_->GetTextMatches(UTF8ToUTF16("Body"), - QueryOptions(), - &text_matches, - &first_time_searched); - EXPECT_EQ(0U, text_matches.size()); } // Checks that adding a visit, then calling DeleteAll, and then trying to add @@ -659,9 +640,8 @@ TEST_F(HistoryBackendTest, DeleteAllThenAddData) { backend_->db_->GetAllVisitsInRange(Time(), Time(), 0, &all_visits); ASSERT_EQ(0U, all_visits.size()); - // Try and set the full text index. + // Try and set the title. backend_->SetPageTitle(url, UTF8ToUTF16("Title")); - backend_->SetPageContents(url, UTF8ToUTF16("Body")); // The row should still be deleted. EXPECT_FALSE(backend_->db_->GetRowForURL(url, &outrow)); @@ -669,15 +649,6 @@ TEST_F(HistoryBackendTest, DeleteAllThenAddData) { // The visit should still be deleted. backend_->db_->GetAllVisitsInRange(Time(), Time(), 0, &all_visits); ASSERT_EQ(0U, all_visits.size()); - - // The full text database should have no data. - std::vector<TextDatabase::Match> text_matches; - Time first_time_searched; - backend_->text_database_->GetTextMatches(UTF8ToUTF16("Body"), - QueryOptions(), - &text_matches, - &first_time_searched); - EXPECT_EQ(0U, text_matches.size()); } TEST_F(HistoryBackendTest, URLsNoLongerBookmarked) { @@ -2812,4 +2783,42 @@ TEST_F(HistoryBackendTest, RemoveNotification) { service->DeleteURL(url); } +// Simple function to create a new dummy file. +void CreateDummyFile(const base::FilePath& filename) { + std::wofstream file; + file.open(filename.value().c_str()); + ASSERT_TRUE(file.is_open()); + file << L"Dummy"; + file.close(); +} + +// Test DeleteFTSIndexDatabases deletes expected files. +TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) { + ASSERT_TRUE(backend_.get()); + + base::FilePath history_path(getTestDir()); + base::FilePath db1(history_path.AppendASCII("History Index 2013-05")); + base::FilePath db1_journal(db1.InsertBeforeExtensionASCII("-journal")); + base::FilePath db1_wal(db1.InsertBeforeExtensionASCII("-wal")); + base::FilePath db2_symlink(history_path.AppendASCII("History Index 2013-06")); + base::FilePath db2_actual(history_path.AppendASCII("Underlying DB")); + + // Setup dummy index database files. + CreateDummyFile(db1); + CreateDummyFile(db1_journal); + CreateDummyFile(db1_wal); + CreateDummyFile(db2_actual); +#if defined(OS_POSIX) + EXPECT_TRUE(file_util::CreateSymbolicLink(db2_actual, db2_symlink)); +#endif + + // Delete all DTS index databases. + backend_->DeleteFTSIndexDatabases(); + EXPECT_FALSE(base::PathExists(db1)); + EXPECT_FALSE(base::PathExists(db1_wal)); + EXPECT_FALSE(base::PathExists(db1_journal)); + EXPECT_FALSE(base::PathExists(db2_symlink)); + EXPECT_TRUE(base::PathExists(db2_actual)); // Symlinks shouldn't be followed. +} + } // namespace history diff --git a/chrome/browser/history/history_database.cc b/chrome/browser/history/history_database.cc index 3db7acd..20cb023 100644 --- a/chrome/browser/history/history_database.cc +++ b/chrome/browser/history/history_database.cc @@ -441,15 +441,13 @@ sql::InitStatus HistoryDatabase::EnsureCurrentVersion() { #if !defined(OS_WIN) void HistoryDatabase::MigrateTimeEpoch() { // Update all the times in the URLs and visits table in the main database. - // For visits, clear the indexed flag since we'll delete the FTS databases in - // the next step. ignore_result(db_.Execute( "UPDATE urls " "SET last_visit_time = last_visit_time + 11644473600000000 " "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);")); ignore_result(db_.Execute( "UPDATE visits " - "SET visit_time = visit_time + 11644473600000000, is_indexed = 0 " + "SET visit_time = visit_time + 11644473600000000 " "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);")); ignore_result(db_.Execute( "UPDATE segment_usage " diff --git a/chrome/browser/history/history_service.cc b/chrome/browser/history/history_service.cc index b697f37..644dc85 100644 --- a/chrome/browser/history/history_service.cc +++ b/chrome/browser/history/history_service.cc @@ -14,10 +14,6 @@ // -> SQLite connection to History // -> ArchivedDatabase // -> SQLite connection to Archived History -// -> TextDatabaseManager -// -> SQLite connection to one month's data -// -> SQLite connection to one month's data -// ... // -> ThumbnailDatabase // -> SQLite connection to Thumbnails // (and favicons) @@ -597,16 +593,6 @@ void HistoryService::AddPagesWithDetails(const history::URLRows& info, &HistoryBackend::AddPagesWithDetails, info, visit_source); } -void HistoryService::SetPageContents(const GURL& url, - const string16& contents) { - DCHECK(thread_checker_.CalledOnValidThread()); - if (!CanAddURL(url)) - return; - - ScheduleAndForget(PRIORITY_LOW, &HistoryBackend::SetPageContents, - url, contents); -} - HistoryService::Handle HistoryService::GetPageThumbnail( const GURL& page_url, CancelableRequestConsumerBase* consumer, diff --git a/chrome/browser/history/history_service.h b/chrome/browser/history/history_service.h index 060614d..1662571 100644 --- a/chrome/browser/history/history_service.h +++ b/chrome/browser/history/history_service.h @@ -236,13 +236,6 @@ class HistoryService : public CancelableRequestProvider, const GURL& url, base::Time end_ts); - // Indexing ------------------------------------------------------------------ - - // Notifies history of the body text of the given recently-visited URL. - // If the URL was not visited "recently enough," the history system may - // discard it. - void SetPageContents(const GURL& url, const string16& contents); - // Querying ------------------------------------------------------------------ // Returns the information about the requested URL. If the URL is found, diff --git a/chrome/browser/history/history_tab_helper.cc b/chrome/browser/history/history_tab_helper.cc index f0285ab..96c545e 100644 --- a/chrome/browser/history/history_tab_helper.cc +++ b/chrome/browser/history/history_tab_helper.cc @@ -81,16 +81,6 @@ HistoryTabHelper::CreateHistoryAddPageArgs( return add_page_args; } -bool HistoryTabHelper::OnMessageReceived(const IPC::Message& message) { - bool handled = true; - IPC_BEGIN_MESSAGE_MAP(HistoryTabHelper, message) - IPC_MESSAGE_HANDLER(ChromeViewHostMsg_PageContents, OnPageContents) - IPC_MESSAGE_UNHANDLED(handled = false) - IPC_END_MESSAGE_MAP() - - return handled; -} - void HistoryTabHelper::DidNavigateMainFrame( const content::LoadCommittedDetails& details, const content::FrameNavigateParams& params) { @@ -154,26 +144,6 @@ void HistoryTabHelper::Observe(int type, } } -void HistoryTabHelper::OnPageContents(const GURL& url, - int32 page_id, - const string16& contents) { - // Don't index any https pages. People generally don't want their bank - // accounts, etc. indexed on their computer, especially since some of these - // things are not marked cachable. - // TODO(brettw) we may want to consider more elaborate heuristics such as - // the cachability of the page. We may also want to consider subframes (this - // test will still index subframes if the subframe is SSL). - // TODO(zelidrag) bug chromium-os:2808 - figure out if we want to reenable - // content indexing for chromeos in some future releases. -#if !defined(OS_CHROMEOS) - if (!url.SchemeIsSecure()) { - HistoryService* hs = GetHistoryService(); - if (hs) - hs->SetPageContents(url, contents); - } -#endif -} - HistoryService* HistoryTabHelper::GetHistoryService() { Profile* profile = Profile::FromBrowserContext(web_contents()->GetBrowserContext()); diff --git a/chrome/browser/history/history_tab_helper.h b/chrome/browser/history/history_tab_helper.h index 95c4ee0..2f613fa 100644 --- a/chrome/browser/history/history_tab_helper.h +++ b/chrome/browser/history/history_tab_helper.h @@ -46,7 +46,6 @@ class HistoryTabHelper : public content::WebContentsObserver, friend class content::WebContentsUserData<HistoryTabHelper>; // content::WebContentsObserver implementation. - virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE; virtual void DidNavigateMainFrame( const content::LoadCommittedDetails& details, const content::FrameNavigateParams& params) OVERRIDE; diff --git a/chrome/browser/history/history_types.cc b/chrome/browser/history/history_types.cc index f0e6439..f766a0b 100644 --- a/chrome/browser/history/history_types.cc +++ b/chrome/browser/history/history_types.cc @@ -69,8 +69,7 @@ VisitRow::VisitRow() url_id(0), referring_visit(0), transition(content::PAGE_TRANSITION_LINK), - segment_id(0), - is_indexed(false) { + segment_id(0) { } VisitRow::VisitRow(URLID arg_url_id, @@ -83,8 +82,7 @@ VisitRow::VisitRow(URLID arg_url_id, visit_time(arg_visit_time), referring_visit(arg_referring_visit), transition(arg_transition), - segment_id(arg_segment_id), - is_indexed(false) { + segment_id(arg_segment_id) { } VisitRow::~VisitRow() { @@ -246,7 +244,6 @@ void QueryResults::AdjustResultMap(size_t begin, size_t end, ptrdiff_t delta) { QueryOptions::QueryOptions() : max_count(0), - body_only(false), duplicate_policy(QueryOptions::REMOVE_ALL_DUPLICATES) { } diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h index 7fd1295..f23a310 100644 --- a/chrome/browser/history/history_types.h +++ b/chrome/browser/history/history_types.h @@ -246,13 +246,6 @@ class VisitRow { // If 0, the segment id is null in the table. SegmentID segment_id; - // True when this visit has indexed data for it. We try to keep this in sync - // with the full text index: when we add or remove things from there, we will - // update the visit table as well. However, that file could get deleted, or - // out of sync in various ways, so this flag should be false when things - // change. - bool is_indexed; - // Record how much time a user has this visit starting from the user // opened this visit to the user closed or ended this visit. // This includes both active and inactive time as long as @@ -460,10 +453,6 @@ struct QueryOptions { // enough room. When 0, this will return everything (the default). int max_count; - // Only search within the page body if true, otherwise search all columns - // including url and time. Defaults to false. - bool body_only; - enum DuplicateHandling { // Omit visits for which there is a more recent visit to the same URL. // Each URL in the results will appear only once. diff --git a/chrome/browser/history/text_database.cc b/chrome/browser/history/text_database.cc deleted file mode 100644 index 0f5db89..0000000 --- a/chrome/browser/history/text_database.cc +++ /dev/null @@ -1,353 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <limits> -#include <set> -#include <string> - -#include "chrome/browser/history/text_database.h" - -#include "base/file_util.h" -#include "base/logging.h" -#include "base/metrics/histogram.h" -#include "base/strings/string_number_conversions.h" -#include "base/strings/stringprintf.h" -#include "base/strings/utf_string_conversions.h" -#include "sql/statement.h" -#include "sql/transaction.h" - -// There are two tables in each database, one full-text search (FTS) table which -// indexes the contents and title of the pages. The other is a regular SQLITE -// table which contains non-indexed information about the page. All columns of -// a FTS table are indexed using the text search algorithm, which isn't what we -// want for things like times. If this were in the FTS table, there would be -// different words in the index for each time number. -// -// "pages" FTS table: -// url URL of the page so searches will match the URL. -// title Title of the page. -// body Body of the page. -// -// "info" regular table: -// time Time the corresponding FTS entry was visited. -// -// We do joins across these two tables by using their internal rowids, which we -// keep in sync between the two tables. The internal rowid is the only part of -// an FTS table that is indexed like a normal table, and the index over it is -// free since sqlite always indexes the internal rowid. - -namespace history { - -namespace { - -// Version 1 uses FTS2 for index files. -// Version 2 uses FTS3. -static const int kCurrentVersionNumber = 2; -static const int kCompatibleVersionNumber = 2; - -// Snippet computation relies on the index of the columns in the original -// create statement. These are the 0-based indices (as strings) of the -// corresponding columns. -const char kTitleColumnIndex[] = "1"; -const char kBodyColumnIndex[] = "2"; - -// The string prepended to the database identifier to generate the filename. -const base::FilePath::CharType kFilePrefix[] = - FILE_PATH_LITERAL("History Index "); - -} // namespace - -TextDatabase::Match::Match() {} - -TextDatabase::Match::~Match() {} - -TextDatabase::TextDatabase(const base::FilePath& path, - DBIdent id, - bool allow_create) - : path_(path), - ident_(id), - allow_create_(allow_create) { - // Compute the file name. - file_name_ = path_.Append(IDToFileName(ident_)); -} - -TextDatabase::~TextDatabase() { -} - -// static -const base::FilePath::CharType* TextDatabase::file_base() { - return kFilePrefix; -} - -// static -base::FilePath TextDatabase::IDToFileName(DBIdent id) { - // Identifiers are intended to be a combination of the year and month, for - // example, 200801 for January 2008. We convert this to - // "History Index 2008-01". However, we don't make assumptions about this - // scheme: the caller should assign IDs as it feels fit with the knowledge - // that they will apppear on disk in this form. - base::FilePath::StringType filename(file_base()); - base::StringAppendF(&filename, FILE_PATH_LITERAL("%d-%02d"), - id / 100, id % 100); - return base::FilePath(filename); -} - -// static -TextDatabase::DBIdent TextDatabase::FileNameToID( - const base::FilePath& file_path) { - base::FilePath::StringType file_name = file_path.BaseName().value(); - - // We don't actually check the prefix here. Since the file system could - // be case insensitive in ways we can't predict (NTFS), checking could - // potentially be the wrong thing to do. Instead, we just look for a suffix. - static const size_t kIDStringLength = 7; // Room for "xxxx-xx". - if (file_name.length() < kIDStringLength) - return 0; - const base::FilePath::StringType suffix( - &file_name[file_name.length() - kIDStringLength]); - - if (suffix.length() != kIDStringLength || - suffix[4] != FILE_PATH_LITERAL('-')) { - return 0; - } - - // TODO: Once StringPiece supports a templated interface over the - // underlying string type, use it here instead of substr, since that - // will avoid needless string copies. StringPiece cannot be used - // right now because base::FilePath::StringType could use either 8 or 16 bit - // characters, depending on the OS. - int year, month; - base::StringToInt(suffix.substr(0, 4), &year); - base::StringToInt(suffix.substr(5, 2), &month); - - return year * 100 + month; -} - -bool TextDatabase::Init() { - // Make sure, if we're not allowed to create the file, that it exists. - if (!allow_create_) { - if (!base::PathExists(file_name_)) - return false; - } - - db_.set_histogram_tag("Text"); - - // Set the database page size to something a little larger to give us - // better performance (we're typically seek rather than bandwidth limited). - // This only has an effect before any tables have been created, otherwise - // this is a NOP. Must be a power of 2 and a max of 8192. - db_.set_page_size(4096); - - // The default cache size is 2000 which give >8MB of data. Since we will often - // have 2-3 of these objects, each with their own 8MB, this adds up very fast. - // We therefore reduce the size so when there are multiple objects, we're not - // too big. - db_.set_cache_size(512); - - // Run the database in exclusive mode. Nobody else should be accessing the - // database while we're running, and this will give somewhat improved perf. - db_.set_exclusive_locking(); - - // Attach the database to our index file. - if (!db_.Open(file_name_)) - return false; - - // Meta table tracking version information. - if (!meta_table_.Init(&db_, kCurrentVersionNumber, kCompatibleVersionNumber)) - return false; - if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) { - // This version is too new. We don't bother notifying the user on this - // error, and just fail to use the file. Normally if they have version skew, - // they will get it for the main history file and it won't be necessary - // here. If that's not the case, since this is only indexed data, it's - // probably better to just not give FTS results than strange errors when - // everything else is working OK. - LOG(WARNING) << "Text database is too new."; - return false; - } - - return CreateTables(); -} - -void TextDatabase::BeginTransaction() { - db_.BeginTransaction(); -} - -void TextDatabase::CommitTransaction() { - db_.CommitTransaction(); -} - -bool TextDatabase::CreateTables() { - // FTS table of page contents. - if (!db_.DoesTableExist("pages")) { - if (!db_.Execute("CREATE VIRTUAL TABLE pages USING fts3(" - "TOKENIZE icu," - "url LONGVARCHAR," - "title LONGVARCHAR," - "body LONGVARCHAR)")) - return false; - } - - // Non-FTS table containing URLs and times so we can efficiently find them - // using a regular index (all FTS columns are special and are treated as - // full-text-search, which is not what we want when retrieving this data). - if (!db_.DoesTableExist("info")) { - // Note that there is no point in creating an index over time. Since - // we must always query the entire FTS table (it can not efficiently do - // subsets), we will always end up doing that first, and joining the info - // table off of that. - if (!db_.Execute("CREATE TABLE info(time INTEGER NOT NULL)")) - return false; - } - - // Create the index. - return db_.Execute("CREATE INDEX IF NOT EXISTS info_time ON info(time)"); -} - -bool TextDatabase::AddPageData(base::Time time, - const std::string& url, - const std::string& title, - const std::string& contents) { - sql::Transaction committer(&db_); - if (!committer.Begin()) - return false; - - // Add to the pages table. - sql::Statement add_to_pages(db_.GetCachedStatement(SQL_FROM_HERE, - "INSERT INTO pages (url, title, body) VALUES (?,?,?)")); - add_to_pages.BindString(0, url); - add_to_pages.BindString(1, title); - add_to_pages.BindString(2, contents); - if (!add_to_pages.Run()) - return false; - - int64 rowid = db_.GetLastInsertRowId(); - - // Add to the info table with the same rowid. - sql::Statement add_to_info(db_.GetCachedStatement(SQL_FROM_HERE, - "INSERT INTO info (rowid, time) VALUES (?,?)")); - add_to_info.BindInt64(0, rowid); - add_to_info.BindInt64(1, time.ToInternalValue()); - - if (!add_to_info.Run()) - return false; - - return committer.Commit(); -} - -void TextDatabase::DeletePageData(base::Time time, const std::string& url) { - // First get all rows that match. Selecing on time (which has an index) allows - // us to avoid brute-force searches on the full-text-index table (there will - // generally be only one match per time). - sql::Statement select_ids(db_.GetCachedStatement(SQL_FROM_HERE, - "SELECT info.rowid " - "FROM info JOIN pages ON info.rowid = pages.rowid " - "WHERE info.time=? AND pages.url=?")); - select_ids.BindInt64(0, time.ToInternalValue()); - select_ids.BindString(1, url); - - std::set<int64> rows_to_delete; - while (select_ids.Step()) - rows_to_delete.insert(select_ids.ColumnInt64(0)); - - // Delete from the pages table. - sql::Statement delete_page(db_.GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM pages WHERE rowid=?")); - - for (std::set<int64>::const_iterator i = rows_to_delete.begin(); - i != rows_to_delete.end(); ++i) { - delete_page.BindInt64(0, *i); - if (!delete_page.Run()) - return; - delete_page.Reset(true); - } - - // Delete from the info table. - sql::Statement delete_info(db_.GetCachedStatement(SQL_FROM_HERE, - "DELETE FROM info WHERE rowid=?")); - - for (std::set<int64>::const_iterator i = rows_to_delete.begin(); - i != rows_to_delete.end(); ++i) { - delete_info.BindInt64(0, *i); - if (!delete_info.Run()) - return; - delete_info.Reset(true); - } -} - -void TextDatabase::Optimize() { - sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, - "SELECT OPTIMIZE(pages) FROM pages LIMIT 1")); - statement.Run(); -} - -bool TextDatabase::GetTextMatches(const std::string& query, - const QueryOptions& options, - std::vector<Match>* results, - URLSet* found_urls) { - std::string sql = "SELECT url, title, time, offsets(pages), body FROM pages " - "LEFT OUTER JOIN info ON pages.rowid = info.rowid WHERE "; - sql += options.body_only ? "body " : "pages "; - sql += "MATCH ? AND time >= ? AND time < ? "; - // Times may not be unique, so also sort by rowid to ensure a stable order. - sql += "ORDER BY time DESC, info.rowid DESC"; - - // Generate unique IDs for the two possible variations of the statement, - // so they don't share the same cached prepared statement. - sql::StatementID body_only_id = SQL_FROM_HERE; - sql::StatementID pages_id = SQL_FROM_HERE; - - sql::Statement statement(db_.GetCachedStatement( - (options.body_only ? body_only_id : pages_id), sql.c_str())); - - statement.BindString(0, query); - statement.BindInt64(1, options.EffectiveBeginTime()); - statement.BindInt64(2, options.EffectiveEndTime()); - - // |results| may not be initially empty, so keep track of how many were added - // by this call. - int result_count = 0; - - while (statement.Step()) { - // TODO(brettw) allow canceling the query in the middle. - // if (canceled_or_something) - // break; - - GURL url(statement.ColumnString(0)); - URLSet::const_iterator found_url = found_urls->find(url); - if (found_url != found_urls->end()) - continue; // Don't add this duplicate. - - if (++result_count > options.EffectiveMaxCount()) - break; - - // Fill the results into the vector (avoid copying the URL with Swap()). - results->resize(results->size() + 1); - Match& match = results->at(results->size() - 1); - match.url.Swap(&url); - - match.title = statement.ColumnString16(1); - match.time = base::Time::FromInternalValue(statement.ColumnInt64(2)); - - // Extract any matches in the title. - std::string offsets_str = statement.ColumnString(3); - Snippet::ExtractMatchPositions(offsets_str, kTitleColumnIndex, - &match.title_match_positions); - Snippet::ConvertMatchPositionsToWide(statement.ColumnString(1), - &match.title_match_positions); - - // Extract the matches in the body. - Snippet::MatchPositions match_positions; - Snippet::ExtractMatchPositions(offsets_str, kBodyColumnIndex, - &match_positions); - - // Compute the snippet based on those matches. - std::string body = statement.ColumnString(4); - match.snippet.ComputeSnippet(match_positions, body); - } - statement.Reset(true); - return result_count > options.EffectiveMaxCount(); -} - -} // namespace history diff --git a/chrome/browser/history/text_database.h b/chrome/browser/history/text_database.h deleted file mode 100644 index ddb247b..0000000 --- a/chrome/browser/history/text_database.h +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ -#define CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ - -#include <set> -#include <vector> - -#include "base/basictypes.h" -#include "base/files/file_path.h" -#include "base/strings/string16.h" -#include "chrome/browser/history/history_types.h" -#include "sql/connection.h" -#include "sql/meta_table.h" -#include "url/gurl.h" - -namespace history { - -// Encapsulation of a full-text indexed database file. -class TextDatabase { - public: - typedef int DBIdent; - - typedef std::set<GURL> URLSet; - - // Returned from the search function. - struct Match { - Match(); - ~Match(); - - // URL of the match. - GURL url; - - // The title is returned because the title in the text database and the URL - // database may differ. This happens because we capture the title when the - // body is captured, and don't update it later. - string16 title; - - // Time the page that was returned was visited. - base::Time time; - - // Identifies any found matches in the title of the document. These are not - // included in the snippet. - Snippet::MatchPositions title_match_positions; - - // Snippet of the match we generated from the body. - Snippet snippet; - }; - - // Note: You must call init which must succeed before using this class. - // - // Computes the matches for the query, returning results in decreasing order - // of visit time. - // - // This function will attach the new database to the given database - // connection. This allows one sqlite3 object to share many TextDatabases, - // meaning that they will all share the same cache, which allows us to limit - // the total size that text indexing databasii can take up. - // - // |file_name| is the name of the file on disk. - // - // ID is the identifier for the database. It should uniquely identify it among - // other databases on disk and in the sqlite connection. - // - // |allow_create| indicates if we want to allow creation of the file if it - // doesn't exist. For files associated with older time periods, we don't want - // to create them if they don't exist, so this flag would be false. - TextDatabase(const base::FilePath& path, - DBIdent id, - bool allow_create); - ~TextDatabase(); - - // Initializes the database connection and creates the file if the class - // was created with |allow_create|. If the file couldn't be opened or - // created, this will return false. No other functions should be called - // after this. - bool Init(); - - // Allows updates to be batched. This gives higher performance when multiple - // updates are happening because every insert doesn't require a sync to disk. - // Transactions can be nested, only the outermost one will actually count. - void BeginTransaction(); - void CommitTransaction(); - - // For testing, returns the file name of the database so it can be deleted - // after the test. This is valid even before Init() is called. - const base::FilePath& file_name() const { return file_name_; } - - // Returns a NULL-terminated string that is the base of history index files, - // which is the part before the database identifier. For example - // "History Index *". This is for finding existing database files. - static const base::FilePath::CharType* file_base(); - - // Converts a filename on disk (optionally including a path) to a database - // identifier. If the filename doesn't have the correct format, returns 0. - static DBIdent FileNameToID(const base::FilePath& file_path); - - // Changing operations ------------------------------------------------------- - - // Adds the given data to the page. Returns true on success. The data should - // already be converted to UTF-8. - bool AddPageData(base::Time time, - const std::string& url, - const std::string& title, - const std::string& contents); - - // Deletes the indexed data exactly matching the given URL/time pair. - void DeletePageData(base::Time time, const std::string& url); - - // Optimizes the tree inside the database. This will, in addition to making - // access faster, remove any deleted data from the database (normally it is - // added again as "removed" and it is manually cleaned up when it decides to - // optimize it naturally). It is bad for privacy if a user is deleting a - // page from history but it still exists in the full text database in some - // form. This function will clean that up. - void Optimize(); - - // Querying ------------------------------------------------------------------ - - // Executes the given query. See QueryOptions for more info on input. - // - // The results are appended to any existing ones in |*results|. - // - // Any URLs found will be added to |unique_urls|. If a URL is already in the - // set, additional results will not be added (giving the ability to uniquify - // URL results). - // - // Callers must run QueryParser on the user text and pass the results of the - // QueryParser to this method as the query string. - // - // Returns true if there are more results available, i.e. if the number of - // results was restricted by |options.max_count|. - bool GetTextMatches(const std::string& query, - const QueryOptions& options, - std::vector<Match>* results, - URLSet* unique_urls); - - // Converts the given database identifier to a filename. This does not include - // the path, just the file and extension. - static base::FilePath IDToFileName(DBIdent id); - - private: - // Ensures that the tables and indices are created. Returns true on success. - bool CreateTables(); - - // The sql database. Not valid until Init is called. - sql::Connection db_; - - const base::FilePath path_; - const DBIdent ident_; - const bool allow_create_; - - // Full file name of the file on disk, computed in Init(). - base::FilePath file_name_; - - sql::MetaTable meta_table_; - - DISALLOW_COPY_AND_ASSIGN(TextDatabase); -}; - -} // namespace history - -#endif // CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ diff --git a/chrome/browser/history/text_database_manager.cc b/chrome/browser/history/text_database_manager.cc deleted file mode 100644 index c43a5fc..0000000 --- a/chrome/browser/history/text_database_manager.cc +++ /dev/null @@ -1,586 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/text_database_manager.h" - -#include <algorithm> -#include <functional> - -#include "base/bind.h" -#include "base/compiler_specific.h" -#include "base/files/file_enumerator.h" -#include "base/logging.h" -#include "base/message_loop/message_loop.h" -#include "base/metrics/histogram.h" -#include "base/strings/string_util.h" -#include "base/strings/utf_string_conversions.h" -#include "chrome/browser/history/history_publisher.h" -#include "chrome/browser/history/visit_database.h" - -using base::Time; -using base::TimeDelta; -using base::TimeTicks; - -namespace history { - -namespace { - -// The number of database files we will be attached to at once. -const int kCacheDBSize = 5; - -std::string ConvertStringForIndexer(const string16& input) { - // TODO(evanm): other transformations here? - return UTF16ToUTF8(CollapseWhitespace(input, false)); -} - -// Data older than this will be committed to the full text index even if we -// haven't gotten a title and/or body. -const int kExpirationSeconds = 20; - -} // namespace - -// TextDatabaseManager::ChangeSet ---------------------------------------------- - -TextDatabaseManager::ChangeSet::ChangeSet() {} - -TextDatabaseManager::ChangeSet::~ChangeSet() {} - -// TextDatabaseManager::PageInfo ----------------------------------------------- - -TextDatabaseManager::PageInfo::PageInfo(URLID url_id, - VisitID visit_id, - Time visit_time) - : url_id_(url_id), - visit_id_(visit_id), - visit_time_(visit_time) { - added_time_ = TimeTicks::Now(); -} - -TextDatabaseManager::PageInfo::~PageInfo() {} - -void TextDatabaseManager::PageInfo::set_title(const string16& ttl) { - if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet. - title_ = ASCIIToUTF16(" "); - else - title_ = ttl; -} - -void TextDatabaseManager::PageInfo::set_body(const string16& bdy) { - if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet. - body_ = ASCIIToUTF16(" "); - else - body_ = bdy; -} - -bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const { - return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds); -} - -// TextDatabaseManager --------------------------------------------------------- - -TextDatabaseManager::TextDatabaseManager(const base::FilePath& dir, - URLDatabase* url_database, - VisitDatabase* visit_database) - : dir_(dir), - url_database_(url_database), - visit_database_(visit_database), - recent_changes_(RecentChangeList::NO_AUTO_EVICT), - transaction_nesting_(0), - db_cache_(DBCache::NO_AUTO_EVICT), - present_databases_loaded_(false), - weak_factory_(this), - history_publisher_(NULL) { -} - -TextDatabaseManager::~TextDatabaseManager() { - if (transaction_nesting_) - CommitTransaction(); -} - -// static -TextDatabase::DBIdent TextDatabaseManager::TimeToID(Time time) { - Time::Exploded exploded; - time.UTCExplode(&exploded); - - // We combine the month and year into a 6-digit number (200801 for - // January, 2008). The month is 1-based. - return exploded.year * 100 + exploded.month; -} - -// static -Time TextDatabaseManager::IDToTime(TextDatabase::DBIdent id) { - Time::Exploded exploded; - memset(&exploded, 0, sizeof(Time::Exploded)); - exploded.year = id / 100; - exploded.month = id % 100; - return Time::FromUTCExploded(exploded); -} - -bool TextDatabaseManager::Init(const HistoryPublisher* history_publisher) { - history_publisher_ = history_publisher; - - // Start checking recent changes and committing them. - ScheduleFlushOldChanges(); - return true; -} - -void TextDatabaseManager::BeginTransaction() { - transaction_nesting_++; -} - -void TextDatabaseManager::CommitTransaction() { - DCHECK(transaction_nesting_); - transaction_nesting_--; - if (transaction_nesting_) - return; // Still more nesting of transactions before committing. - - // Commit all databases with open transactions on them. - for (DBIdentSet::const_iterator i = open_transactions_.begin(); - i != open_transactions_.end(); ++i) { - DBCache::iterator iter = db_cache_.Get(*i); - if (iter == db_cache_.end()) { - NOTREACHED() << "All open transactions should be cached."; - continue; - } - iter->second->CommitTransaction(); - } - open_transactions_.clear(); - - // Now that the transaction is over, we can expire old connections. - db_cache_.ShrinkToSize(kCacheDBSize); -} - -void TextDatabaseManager::InitDBList() { - if (present_databases_loaded_) - return; - - present_databases_loaded_ = true; - - // Find files on disk matching our pattern so we can quickly test for them. - base::FilePath::StringType filepattern(TextDatabase::file_base()); - filepattern.append(FILE_PATH_LITERAL("*")); - base::FileEnumerator enumerator( - dir_, false, base::FileEnumerator::FILES, filepattern); - base::FilePath cur_file; - while (!(cur_file = enumerator.Next()).empty()) { - // Convert to the number representing this file. - TextDatabase::DBIdent id = TextDatabase::FileNameToID(cur_file); - if (id) // Will be 0 on error. - present_databases_.insert(id); - } -} - -void TextDatabaseManager::AddPageURL(const GURL& url, - URLID url_id, - VisitID visit_id, - Time time) { - // Delete any existing page info. - RecentChangeList::iterator found = recent_changes_.Peek(url); - if (found != recent_changes_.end()) - recent_changes_.Erase(found); - - // Just save this info for later. We will save it when it expires or when all - // the data is complete. - recent_changes_.Put(url, PageInfo(url_id, visit_id, time)); -} - -void TextDatabaseManager::AddPageTitle(const GURL& url, - const string16& title) { - RecentChangeList::iterator found = recent_changes_.Peek(url); - if (found == recent_changes_.end()) { - // This page is not in our cache of recent pages. This is very much an edge - // case as normally a title will come in <20 seconds after the page commits, - // and WebContents will avoid spamming us with >1 title per page. However, - // it could come up if your connection is unhappy, and we don't want to - // miss anything. - // - // To solve this problem, we'll just associate the most recent visit with - // the new title and index that using the regular code path. - URLRow url_row; - if (!url_database_->GetRowForURL(url, &url_row)) - return; // URL is unknown, give up. - VisitRow visit; - if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit)) - return; // No recent visit, give up. - - if (visit.is_indexed) { - // If this page was already indexed, we could have a body that came in - // first and we don't want to overwrite it. We could go query for the - // current body, or have a special setter for only the title, but this is - // not worth it for this edge case. - // - // It will be almost impossible for the title to take longer than - // kExpirationSeconds yet we got a body in less than that time, since - // the title should always come in first. - return; - } - - AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time, - title, string16()); - return; // We don't know about this page, give up. - } - - PageInfo& info = found->second; - if (info.has_body()) { - // This info is complete, write to the database. - AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(), - title, info.body()); - recent_changes_.Erase(found); - return; - } - - info.set_title(title); -} - -void TextDatabaseManager::AddPageContents(const GURL& url, - const string16& body) { - RecentChangeList::iterator found = recent_changes_.Peek(url); - if (found == recent_changes_.end()) { - // This page is not in our cache of recent pages. This means that the page - // took more than kExpirationSeconds to load. Often, this will be the result - // of a very slow iframe or other resource on the page that makes us think - // it's still loading. - // - // As a fallback, set the most recent visit's contents using the input, and - // use the last set title in the URL table as the title to index. - URLRow url_row; - if (!url_database_->GetRowForURL(url, &url_row)) - return; // URL is unknown, give up. - VisitRow visit; - if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit)) - return; // No recent visit, give up. - - // Use the title from the URL row as the title for the indexing. - AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time, - url_row.title(), body); - return; - } - - PageInfo& info = found->second; - if (info.has_title()) { - // This info is complete, write to the database. - AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(), - info.title(), body); - recent_changes_.Erase(found); - return; - } - - info.set_body(body); -} - -bool TextDatabaseManager::AddPageData(const GURL& url, - URLID url_id, - VisitID visit_id, - Time visit_time, - const string16& title, - const string16& body) { - TextDatabase* db = GetDBForTime(visit_time, true); - if (!db) - return false; - - TimeTicks beginning_time = TimeTicks::Now(); - - // First delete any recently-indexed data for this page. This will delete - // anything in the main database, but we don't bother looking through the - // archived database. - VisitVector visits; - visit_database_->GetIndexedVisitsForURL(url_id, &visits); - for (size_t i = 0; i < visits.size(); i++) { - visits[i].is_indexed = false; - visit_database_->UpdateVisitRow(visits[i]); - DeletePageData(visits[i].visit_time, url, NULL); - } - - if (visit_id) { - // We're supposed to update the visit database, so load the visit. - VisitRow row; - if (!visit_database_->GetRowForVisit(visit_id, &row)) { - // This situation can occur if Chrome's history is in the process of - // being updated, and then the browsing history is deleted before all - // updates have been completely performed. In this case, a stale update - // to the database is attempted, leading to the warning below. - DLOG(WARNING) << "Could not find requested visit #" << visit_id; - return false; - } - - DCHECK(visit_time == row.visit_time); - - // Update the visit database to reference our addition. - row.is_indexed = true; - if (!visit_database_->UpdateVisitRow(row)) - return false; - } - - // Now index the data. - std::string url_str = URLDatabase::GURLToDatabaseURL(url); - bool success = db->AddPageData(visit_time, url_str, - ConvertStringForIndexer(title), - ConvertStringForIndexer(body)); - - UMA_HISTOGRAM_TIMES("History.AddFTSData", - TimeTicks::Now() - beginning_time); - - if (history_publisher_) - history_publisher_->PublishPageContent(visit_time, url, title, body); - - return success; -} - -void TextDatabaseManager::DeletePageData(Time time, const GURL& url, - ChangeSet* change_set) { - TextDatabase::DBIdent db_ident = TimeToID(time); - - // We want to open the database for writing, but only if it exists. To - // achieve this, we check whether it exists by saying we're not going to - // write to it (avoiding the autocreation code normally called when writing) - // and then access it for writing only if it succeeds. - TextDatabase* db = GetDB(db_ident, false); - if (!db) - return; - db = GetDB(db_ident, true); - - if (change_set) - change_set->Add(db_ident); - - db->DeletePageData(time, URLDatabase::GURLToDatabaseURL(url)); -} - -void TextDatabaseManager::DeleteFromUncommitted( - const std::set<GURL>& restrict_urls, Time begin, Time end) { - // First find the beginning of the range to delete. Recall that the list - // has the most recent item at the beginning. There won't normally be very - // many items, so a brute-force search is fine. - RecentChangeList::iterator cur = recent_changes_.begin(); - if (!end.is_null()) { - // Walk from the beginning of the list backwards in time to find the newest - // entry that should be deleted. - while (cur != recent_changes_.end() && cur->second.visit_time() >= end) - ++cur; - } - - // Now delete all visits up to the oldest one we were supposed to delete. - // Note that if begin is_null, it will be less than or equal to any other - // time. - if (restrict_urls.empty()) { - while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) - cur = recent_changes_.Erase(cur); - } else { - while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) { - if (restrict_urls.find(cur->first) != restrict_urls.end()) - cur = recent_changes_.Erase(cur); - else - ++cur; - } - } -} - -void TextDatabaseManager::DeleteFromUncommittedForTimes( - const std::vector<base::Time>& times) { - // |times| must be in reverse chronological order, i.e. each member - // must be earlier than or the same as the one before it. - DCHECK( - std::adjacent_find( - times.begin(), times.end(), std::less<base::Time>()) == - times.end()); - - // Both |recent_changes_| and |times| are in reverse chronological order. - RecentChangeList::iterator it = recent_changes_.begin(); - std::vector<base::Time>::const_iterator time_it = times.begin(); - while (it != recent_changes_.end() && time_it != times.end()) { - base::Time visit_time = it->second.visit_time(); - if (visit_time == *time_it) { - it = recent_changes_.Erase(it); - } else if (visit_time < *time_it) { - ++time_it; - } else /* if (visit_time > *time_it) */ { - ++it; - } - } -} - -void TextDatabaseManager::DeleteAll() { - DCHECK_EQ(0, transaction_nesting_) << "Calling deleteAll in a transaction."; - - InitDBList(); - - // Delete uncommitted entries. - recent_changes_.Clear(); - - // Close all open databases. - db_cache_.Clear(); - - // Now go through and delete all the files. - for (DBIdentSet::iterator i = present_databases_.begin(); - i != present_databases_.end(); ++i) { - base::FilePath file_name = dir_.Append(TextDatabase::IDToFileName(*i)); - sql::Connection::Delete(file_name); - } -} - -void TextDatabaseManager::OptimizeChangedDatabases( - const ChangeSet& change_set) { - for (ChangeSet::DBSet::const_iterator i = - change_set.changed_databases_.begin(); - i != change_set.changed_databases_.end(); ++i) { - // We want to open the database for writing, but only if it exists. To - // achieve this, we check whether it exists by saying we're not going to - // write to it (avoiding the autocreation code normally called when writing) - // and then access it for writing only if it succeeds. - TextDatabase* db = GetDB(*i, false); - if (!db) - continue; - db = GetDB(*i, true); - if (!db) - continue; // The file may have changed or something. - db->Optimize(); - } -} - -void TextDatabaseManager::GetTextMatches( - const string16& query, - const QueryOptions& options, - std::vector<TextDatabase::Match>* results, - Time* first_time_searched) { - results->clear(); - - *first_time_searched = options.begin_time; - - InitDBList(); - if (present_databases_.empty()) - return; // Nothing to search. - - // Get the query into the proper format for the individual DBs. - string16 fts_query16; - query_parser_.ParseQuery(query, &fts_query16); - std::string fts_query = UTF16ToUTF8(fts_query16); - - // Need a copy of the options so we can modify the max count for each call - // to the individual databases. - QueryOptions cur_options(options); - - // Compute the minimum and maximum values for the identifiers that could - // encompass the input time range. - TextDatabase::DBIdent min_ident = options.begin_time.is_null() ? - *present_databases_.begin() : - TimeToID(options.begin_time); - TextDatabase::DBIdent max_ident = options.end_time.is_null() ? - *present_databases_.rbegin() : - TimeToID(options.end_time); - - // Iterate over the databases from the most recent backwards. - TextDatabase::URLSet found_urls; - for (DBIdentSet::reverse_iterator i = present_databases_.rbegin(); - i != present_databases_.rend(); - ++i) { - // TODO(brettw) allow canceling the query in the middle. - // if (canceled_or_something) - // break; - - // This code is stupid, we just loop until we find the correct starting - // time range rather than search in an intelligent way. Users will have a - // few dozen files at most, so this should not be an issue. - if (*i > max_ident) - continue; // Haven't gotten to the time range yet. - if (*i < min_ident) - break; // Covered all the time range. - - TextDatabase* cur_db = GetDB(*i, false); - if (!cur_db) - continue; - - // Adjust the max count according to how many results we've already got. - if (options.max_count) { - cur_options.max_count = options.max_count - - static_cast<int>(results->size()); - } - - bool has_more_results = cur_db->GetTextMatches( - fts_query, cur_options, results, &found_urls); - - DCHECK(static_cast<int>(results->size()) <= options.EffectiveMaxCount()); - - if (has_more_results || - static_cast<int>(results->size()) == options.EffectiveMaxCount()) { - // Since the search proceeds backwards in time, the last result we have - // gives the first time searched. - *first_time_searched = results->back().time; - break; - } - } -} - -size_t TextDatabaseManager::GetUncommittedEntryCountForTest() const { - return recent_changes_.size(); -} - -TextDatabase* TextDatabaseManager::GetDB(TextDatabase::DBIdent id, - bool for_writing) { - DBCache::iterator found_db = db_cache_.Get(id); - if (found_db != db_cache_.end()) { - if (transaction_nesting_ && for_writing && - open_transactions_.find(id) == open_transactions_.end()) { - // If we currently have an open transaction, that database is not yet - // part of the transaction, and the database will be written to, it needs - // to be part of our transaction. - found_db->second->BeginTransaction(); - open_transactions_.insert(id); - } - return found_db->second; - } - - // Need to make the database. - TextDatabase* new_db = new TextDatabase(dir_, id, for_writing); - if (!new_db->Init()) { - delete new_db; - return NULL; - } - db_cache_.Put(id, new_db); - present_databases_.insert(id); - - if (transaction_nesting_ && for_writing) { - // If we currently have an open transaction and the new database will be - // written to, it needs to be part of our transaction. - new_db->BeginTransaction(); - open_transactions_.insert(id); - } - - // When no transaction is open, allow this new one to kick out an old one. - if (!transaction_nesting_) - db_cache_.ShrinkToSize(kCacheDBSize); - - return new_db; -} - -TextDatabase* TextDatabaseManager::GetDBForTime(Time time, - bool create_if_necessary) { - return GetDB(TimeToID(time), create_if_necessary); -} - -void TextDatabaseManager::ScheduleFlushOldChanges() { - weak_factory_.InvalidateWeakPtrs(); - base::MessageLoop::current()->PostDelayedTask( - FROM_HERE, - base::Bind(&TextDatabaseManager::FlushOldChanges, - weak_factory_.GetWeakPtr()), - base::TimeDelta::FromSeconds(kExpirationSeconds)); -} - -void TextDatabaseManager::FlushOldChanges() { - FlushOldChangesForTime(TimeTicks::Now()); -} - -void TextDatabaseManager::FlushOldChangesForTime(TimeTicks now) { - // The end of the list is the oldest, so we just start from there committing - // things until we get something too new. - RecentChangeList::reverse_iterator i = recent_changes_.rbegin(); - while (i != recent_changes_.rend() && i->second.Expired(now)) { - AddPageData(i->first, i->second.url_id(), i->second.visit_id(), - i->second.visit_time(), i->second.title(), i->second.body()); - i = recent_changes_.Erase(i); - } - - ScheduleFlushOldChanges(); -} - -} // namespace history diff --git a/chrome/browser/history/text_database_manager.h b/chrome/browser/history/text_database_manager.h deleted file mode 100644 index 986274c..0000000 --- a/chrome/browser/history/text_database_manager.h +++ /dev/null @@ -1,320 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_ -#define CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_ - -#include <cstddef> -#include <set> -#include <vector> - -#include "base/basictypes.h" -#include "base/containers/mru_cache.h" -#include "base/files/file_path.h" -#include "base/gtest_prod_util.h" -#include "base/memory/weak_ptr.h" -#include "base/strings/string16.h" -#include "chrome/browser/history/history_types.h" -#include "chrome/browser/history/query_parser.h" -#include "chrome/browser/history/text_database.h" -#include "chrome/browser/history/url_database.h" - -namespace history { - -class HistoryPublisher; -class VisitDatabase; - -// Manages a set of text databases representing different time periods. This -// will page them in and out as necessary, and will manage queries for times -// spanning multiple databases. -// -// It will also keep a list of partial changes, such as page adds and title and -// body sets, all of which come in at different times for a given page. When -// all data is received or enough time has elapsed since adding, the indexed -// data will be committed. -// -// This allows us to minimize inserts and modifications, which are slow for the -// full text database, since each page's information is added exactly once. -// -// Note: be careful to delete the relevant entries from this uncommitted list -// when clearing history or this information may get added to the database soon -// after the clear. -class TextDatabaseManager { - public: - // Tracks a set of changes (only deletes need to be supported now) to the - // databases. This is opaque to the caller, but allows it to pass back a list - // of all database that it has caused a change to. - // - // This is necessary for the feature where we optimize full text databases - // which have changed as a result of the user deleting history via - // OptimizeChangedDatabases. We want to do each affected database only once at - // the end of the delete, but we don't want the caller to have to worry about - // our internals. - class ChangeSet { - public: - ChangeSet(); - ~ChangeSet(); - - private: - friend class TextDatabaseManager; - - typedef std::set<TextDatabase::DBIdent> DBSet; - - void Add(TextDatabase::DBIdent id) { changed_databases_.insert(id); } - - DBSet changed_databases_; - }; - - // You must call Init() to complete initialization. - // - // |dir| is the directory that will hold the full text database files (there - // will be many files named by their date ranges). - // - // The visit database is a pointer owned by the caller for the main database - // (of recent visits). The visit database will be updated to refer to the - // added text database entries. - TextDatabaseManager(const base::FilePath& dir, - URLDatabase* url_database, - VisitDatabase* visit_database); - ~TextDatabaseManager(); - - // Must call before using other functions. If it returns false, no other - // functions should be called. - bool Init(const HistoryPublisher* history_publisher); - - // Returns the directory that holds the full text database files. - const base::FilePath& GetDir() { return dir_; } - - // Allows scoping updates. This also allows things to go faster since every - // page add doesn't need to be committed to disk (slow). Note that files will - // still get created during a transaction. - void BeginTransaction(); - void CommitTransaction(); - - // Sets specific information for the given page to be added to the database. - // In normal operation, URLs will be added as the user visits them, the titles - // and bodies will come in some time after that. These changes will be - // automatically coalesced and added to the database some time in the future - // using AddPageData(). - // - // AddPageURL must be called for a given URL (+ its corresponding ID) before - // either the title or body set. The visit ID specifies the visit that will - // get updated to refer to the full text indexed information. The visit time - // should be the time corresponding to that visit in the database. - void AddPageURL(const GURL& url, URLID url_id, VisitID visit_id, - base::Time visit_time); - void AddPageTitle(const GURL& url, const string16& title); - void AddPageContents(const GURL& url, const string16& body); - - // Adds the given data to the appropriate database file, returning true on - // success. The visit database row identified by |visit_id| will be updated - // to refer to the full text index entry. If the visit ID is 0, the visit - // database will not be updated. - bool AddPageData(const GURL& url, - URLID url_id, - VisitID visit_id, - base::Time visit_time, - const string16& title, - const string16& body); - - // Deletes the instance of indexed data identified by the given time and URL. - // Any changes will be tracked in the optional change set for use when calling - // OptimizeChangedDatabases later. change_set can be NULL. - void DeletePageData(base::Time time, const GURL& url, - ChangeSet* change_set); - - // The text database manager keeps a list of changes that are made to the - // file AddPageURL/Title/Body that may not be committed to the database yet. - // This function removes entries from this list happening between the given - // time range. It is called when the user clears their history for a time - // range, and we don't want any of our data to "leak." If restrict_urls is - // not empty, only changes on those URLs are deleted. - // - // Either or both times my be is_null to be unbounded in that direction. When - // non-null, the range is [begin, end). - void DeleteFromUncommitted(const std::set<GURL>& restrict_urls, - base::Time begin, base::Time end); - - // This function removes entries from the same list as - // DeleteFromUncommitted() with times belonging to the given list of - // times, which must be in reverse chronological order. - void DeleteFromUncommittedForTimes(const std::vector<base::Time>& times); - - // Deletes all full text search data by removing the files from the disk. - // This must be called OUTSIDE of a transaction since it actually deletes the - // files rather than messing with the database. - void DeleteAll(); - - // Calls optimize on all the databases identified in a given change set (see - // the definition of ChangeSet above for more). Optimizing means that old data - // will be removed rather than marked unused. - void OptimizeChangedDatabases(const ChangeSet& change_set); - - // Executes the given query. See QueryOptions for more info on input. - // - // The results are filled into |results|, and the first time considered for - // the output is in |first_time_searched| (see QueryResults for more). - // - // This function will return more than one match per URL if there is more than - // one entry for that URL in the database. - void GetTextMatches(const string16& query, - const QueryOptions& options, - std::vector<TextDatabase::Match>* results, - base::Time* first_time_searched); - - size_t GetUncommittedEntryCountForTest() const; - - private: - // These tests call ExpireRecentChangesForTime to force expiration. - FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, InsertPartial); - FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, PartialComplete); - FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, DeleteURLAndFavicon); - FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, FlushRecentURLsUnstarred); - FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, FlushURLsForTimes); - FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, - FlushRecentURLsUnstarredRestricted); - - // Stores "recent stuff" that has happened with the page, since the page - // visit, title, and body all come in at different times. - class PageInfo { - public: - PageInfo(URLID url_id, VisitID visit_id, base::Time visit_time); - ~PageInfo(); - - // Getters. - URLID url_id() const { return url_id_; } - VisitID visit_id() const { return visit_id_; } - base::Time visit_time() const { return visit_time_; } - const string16& title() const { return title_; } - const string16& body() const { return body_; } - - // Setters, we can only update the title and body. - void set_title(const string16& ttl); - void set_body(const string16& bdy); - - // Returns true if both the title or body of the entry has been set. Since - // both the title and body setters will "fix" empty strings to be a space, - // these indicate if the setter was ever called. - bool has_title() const { return !title_.empty(); } - bool has_body() { return !body_.empty(); } - - // Returns true if this entry was added too long ago and we should give up - // waiting for more data. The current time is passed in as an argument so we - // can check many without re-querying the timer. - bool Expired(base::TimeTicks now) const; - - private: - URLID url_id_; - VisitID visit_id_; - - // Time of the visit of the URL. This will be the value stored in the URL - // and visit tables for the entry. - base::Time visit_time_; - - // When this page entry was created. We have a cap on the maximum time that - // an entry will be in the queue before being flushed to the database. - base::TimeTicks added_time_; - - // Will be the string " " when they are set to distinguish set and unset. - string16 title_; - string16 body_; - }; - - // Converts the given time to a database identifier or vice-versa. - static TextDatabase::DBIdent TimeToID(base::Time time); - static base::Time IDToTime(TextDatabase::DBIdent id); - - // Returns a text database for the given identifier or time. This file will - // be created if it doesn't exist and |for_writing| is set. On error, - // including the case where the file doesn't exist and |for_writing| - // is false, it will return NULL. - // - // When |for_writing| is set, a transaction on the database will be opened - // if there is a transaction open on this manager. - // - // The pointer will be tracked in the cache. The caller should not store it - // or delete it since it will get automatically deleted as necessary. - TextDatabase* GetDB(TextDatabase::DBIdent id, bool for_writing); - TextDatabase* GetDBForTime(base::Time time, bool for_writing); - - // Populates the present_databases_ list based on which files are on disk. - // When the list is already initialized, this will do nothing, so you can - // call it whenever you want to ensure the present_databases_ set is filled. - void InitDBList(); - - // Schedules a call to ExpireRecentChanges in the future. - void ScheduleFlushOldChanges(); - - // Checks the recent_changes_ list and commits partial data that has been - // around too long. - void FlushOldChanges(); - - // Given "now," this will expire old things from the recent_changes_ list. - // This is used as the backend for FlushOldChanges and is called directly - // by the unit tests with fake times. - void FlushOldChangesForTime(base::TimeTicks now); - - // Directory holding our index files. - const base::FilePath dir_; - - // Non-owning pointers to the recent history databases for URLs and visits. - URLDatabase* url_database_; - VisitDatabase* visit_database_; - - // Lists recent additions that we have not yet filled out with the title and - // body. Sorted by time, we will flush them when they are complete or have - // been in the queue too long without modification. - // - // We kind of abuse the MRUCache because we never move things around in it - // using Get. Instead, we keep them in the order they were inserted, since - // this is the metric we use to measure age. The MRUCache gives us an ordered - // list with fast lookup by URL. - typedef base::MRUCache<GURL, PageInfo> RecentChangeList; - RecentChangeList recent_changes_; - - // Nesting levels of transactions. Since sqlite only allows one open - // transaction, we simulate nested transactions by mapping the outermost one - // to a real transaction. Since this object never needs to do ROLLBACK, losing - // the ability for all transactions to rollback is inconsequential. - int transaction_nesting_; - - // The cache owns the TextDatabase pointers, they will be automagically - // deleted when the cache entry is removed or expired. - typedef base::OwningMRUCache<TextDatabase::DBIdent, TextDatabase*> DBCache; - DBCache db_cache_; - - // Tells us about the existence of database files on disk. All existing - // databases will be in here, and non-existent ones will not, so we don't - // have to check the disk every time. - // - // This set is populated LAZILY by InitDBList(), you should call that function - // before accessing the list. - // - // Note that iterators will work on the keys in-order. Normally, reverse - // iterators will be used to iterate the keys in reverse-order. - typedef std::set<TextDatabase::DBIdent> DBIdentSet; - DBIdentSet present_databases_; - bool present_databases_loaded_; // Set by InitDBList when populated. - - // Lists all databases with open transactions. These will have to be closed - // when the transaction is committed. - DBIdentSet open_transactions_; - - QueryParser query_parser_; - - // Generates tasks for our periodic checking of expired "recent changes". - base::WeakPtrFactory<TextDatabaseManager> weak_factory_; - - // This object is created and managed by the history backend. We maintain an - // opaque pointer to the object for our use. - // This can be NULL if there are no indexers registered to receive indexing - // data from us. - const HistoryPublisher* history_publisher_; - - DISALLOW_COPY_AND_ASSIGN(TextDatabaseManager); -}; - -} // namespace history - -#endif // CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_ diff --git a/chrome/browser/history/text_database_manager_unittest.cc b/chrome/browser/history/text_database_manager_unittest.cc deleted file mode 100644 index 0ea76f6..0000000 --- a/chrome/browser/history/text_database_manager_unittest.cc +++ /dev/null @@ -1,598 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <set> - -#include "base/file_util.h" -#include "base/files/file_path.h" -#include "base/message_loop/message_loop.h" -#include "base/strings/utf_string_conversions.h" -#include "chrome/browser/history/text_database_manager.h" -#include "chrome/browser/history/visit_database.h" -#include "sql/connection.h" -#include "testing/gtest/include/gtest/gtest.h" - -using base::Time; -using base::TimeDelta; -using base::TimeTicks; - -namespace history { - -namespace { - -const char* kURL1 = "http://www.google.com/asdf"; -const char* kTitle1 = "Google A"; -const char* kBody1 = "FOO page one."; - -const char* kURL2 = "http://www.google.com/qwer"; -const char* kTitle2 = "Google B"; -const char* kBody2 = "FOO two."; - -const char* kURL3 = "http://www.google.com/zxcv"; -const char* kTitle3 = "Google C"; -const char* kBody3 = "FOO drei"; - -const char* kURL4 = "http://www.google.com/hjkl"; -const char* kTitle4 = "Google D"; -const char* kBody4 = "FOO lalala four."; - -const char* kURL5 = "http://www.google.com/uiop"; -const char* kTitle5 = "Google cinq"; -const char* kBody5 = "FOO page one."; - -// This provides a simple implementation of a URL+VisitDatabase using an -// in-memory sqlite connection. The text database manager expects to be able to -// update the visit database to keep in sync. -class InMemDB : public URLDatabase, public VisitDatabase { - public: - InMemDB() { - EXPECT_TRUE(db_.OpenInMemory()); - CreateURLTable(false); - InitVisitTable(); - } - virtual ~InMemDB() { - } - - private: - virtual sql::Connection& GetDB() OVERRIDE { return db_; } - - sql::Connection db_; - - DISALLOW_COPY_AND_ASSIGN(InMemDB); -}; - -// Adds all the pages once, and the first page once more in the next month. -// The times of all the pages will be filled into |*times|. -void AddAllPages(TextDatabaseManager& manager, VisitDatabase* visit_db, - std::vector<Time>* times) { - Time::Exploded exploded; - memset(&exploded, 0, sizeof(Time::Exploded)); - - // Put the visits in two different months so it will query across databases. - exploded.year = 2008; - exploded.month = 1; - exploded.day_of_month = 3; - - VisitRow visit_row; - visit_row.url_id = 1; - visit_row.visit_time = Time::FromUTCExploded(exploded); - visit_row.referring_visit = 0; - visit_row.transition = content::PageTransitionFromInt(0); - visit_row.segment_id = 0; - visit_row.is_indexed = false; - visit_db->AddVisit(&visit_row, SOURCE_BROWSED); - - times->push_back(visit_row.visit_time); - manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id, - visit_row.visit_time, UTF8ToUTF16(kTitle1), - UTF8ToUTF16(kBody1)); - - exploded.day_of_month++; - visit_row.url_id = 2; - visit_row.visit_time = Time::FromUTCExploded(exploded); - visit_db->AddVisit(&visit_row, SOURCE_BROWSED); - times->push_back(visit_row.visit_time); - manager.AddPageData(GURL(kURL2), visit_row.url_id, visit_row.visit_id, - visit_row.visit_time, UTF8ToUTF16(kTitle2), - UTF8ToUTF16(kBody2)); - - exploded.day_of_month++; - visit_row.url_id = 2; - visit_row.visit_time = Time::FromUTCExploded(exploded); - visit_db->AddVisit(&visit_row, SOURCE_BROWSED); - times->push_back(visit_row.visit_time); - manager.AddPageData(GURL(kURL3), visit_row.url_id, visit_row.visit_id, - visit_row.visit_time, UTF8ToUTF16(kTitle3), - UTF8ToUTF16(kBody3)); - - // Put the next ones in the next month. - exploded.month++; - visit_row.url_id = 2; - visit_row.visit_time = Time::FromUTCExploded(exploded); - visit_db->AddVisit(&visit_row, SOURCE_BROWSED); - times->push_back(visit_row.visit_time); - manager.AddPageData(GURL(kURL4), visit_row.url_id, visit_row.visit_id, - visit_row.visit_time, UTF8ToUTF16(kTitle4), - UTF8ToUTF16(kBody4)); - - exploded.day_of_month++; - visit_row.url_id = 2; - visit_row.visit_time = Time::FromUTCExploded(exploded); - visit_db->AddVisit(&visit_row, SOURCE_BROWSED); - times->push_back(visit_row.visit_time); - manager.AddPageData(GURL(kURL5), visit_row.url_id, visit_row.visit_id, - visit_row.visit_time, UTF8ToUTF16(kTitle5), - UTF8ToUTF16(kBody5)); - - // Put the first one in again in the second month. - exploded.day_of_month++; - visit_row.url_id = 2; - visit_row.visit_time = Time::FromUTCExploded(exploded); - visit_db->AddVisit(&visit_row, SOURCE_BROWSED); - times->push_back(visit_row.visit_time); - manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id, - visit_row.visit_time, UTF8ToUTF16(kTitle1), - UTF8ToUTF16(kBody1)); -} - -bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results, - const char* url) { - GURL gurl(url); - for (size_t i = 0; i < results.size(); i++) { - if (results[i].url == gurl) - return true; - } - return false; -} - -} // namespace - -class TextDatabaseManagerTest : public testing::Test { - public: - // Called manually by the test so it can report failure to initialize. - bool Init() { - return file_util::CreateNewTempDirectory( - FILE_PATH_LITERAL("TestSearchTest"), &dir_); - } - - protected: - virtual void SetUp() { - } - - virtual void TearDown() { - base::DeleteFile(dir_, true); - } - - base::MessageLoop message_loop_; - - // Directory containing the databases. - base::FilePath dir_; -}; - -// Tests basic querying. -TEST_F(TextDatabaseManagerTest, InsertQuery) { - ASSERT_TRUE(Init()); - InMemDB visit_db; - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - std::vector<Time> times; - AddAllPages(manager, &visit_db, ×); - - QueryOptions options; - options.begin_time = times[0] - TimeDelta::FromDays(100); - options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); - std::vector<TextDatabase::Match> results; - Time first_time_searched; - manager.GetTextMatches(UTF8ToUTF16("FOO"), options, - &results, &first_time_searched); - - // We should have matched every page. - EXPECT_EQ(6U, results.size()); - EXPECT_TRUE(ResultsHaveURL(results, kURL1)); - EXPECT_TRUE(ResultsHaveURL(results, kURL2)); - EXPECT_TRUE(ResultsHaveURL(results, kURL3)); - EXPECT_TRUE(ResultsHaveURL(results, kURL4)); - EXPECT_TRUE(ResultsHaveURL(results, kURL5)); - - // The first time searched should have been the first page's time or before - // (it could have eliminated some time for us). - EXPECT_TRUE(first_time_searched <= times[0]); -} - -// Tests that adding page components piecemeal will get them added properly. -// This does not supply a visit to update, this mode is used only by the unit -// tests right now, but we test it anyway. -TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) { - ASSERT_TRUE(Init()); - InMemDB visit_db; - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - // First add one without a visit. - const GURL url(kURL1); - manager.AddPageURL(url, 0, 0, Time::Now()); - manager.AddPageTitle(url, UTF8ToUTF16(kTitle1)); - manager.AddPageContents(url, UTF8ToUTF16(kBody1)); - - // Check that the page got added. - QueryOptions options; - std::vector<TextDatabase::Match> results; - Time first_time_searched; - - manager.GetTextMatches(UTF8ToUTF16("FOO"), options, - &results, &first_time_searched); - ASSERT_EQ(1U, results.size()); - EXPECT_EQ(kTitle1, UTF16ToUTF8(results[0].title)); -} - -// Like InsertCompleteNoVisit but specifies a visit to update. We check that the -// visit was updated properly. -TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) { - ASSERT_TRUE(Init()); - InMemDB visit_db; - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - // First add a visit to a page. We can just make up a URL ID since there is - // not actually any URL database around. - VisitRow visit; - visit.url_id = 1; - visit.visit_time = Time::Now(); - visit.referring_visit = 0; - visit.transition = content::PAGE_TRANSITION_LINK; - visit.segment_id = 0; - visit.is_indexed = false; - visit_db.AddVisit(&visit, SOURCE_BROWSED); - - // Add a full text indexed entry for that visit. - const GURL url(kURL2); - manager.AddPageURL(url, visit.url_id, visit.visit_id, visit.visit_time); - manager.AddPageContents(url, UTF8ToUTF16(kBody2)); - manager.AddPageTitle(url, UTF8ToUTF16(kTitle2)); - - // Check that the page got added. - QueryOptions options; - std::vector<TextDatabase::Match> results; - Time first_time_searched; - - manager.GetTextMatches(UTF8ToUTF16("FOO"), options, - &results, &first_time_searched); - ASSERT_EQ(1U, results.size()); - EXPECT_EQ(kTitle2, UTF16ToUTF8(results[0].title)); - - // Check that the visit got updated for its new indexed state. - VisitRow out_visit; - ASSERT_TRUE(visit_db.GetRowForVisit(visit.visit_id, &out_visit)); - EXPECT_TRUE(out_visit.is_indexed); -} - -// Tests that partial inserts that expire are added to the database. -TEST_F(TextDatabaseManagerTest, InsertPartial) { - ASSERT_TRUE(Init()); - InMemDB visit_db; - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - // Add the first one with just a URL. - GURL url1(kURL1); - manager.AddPageURL(url1, 0, 0, Time::Now()); - - // Now add a second one with a URL and title. - GURL url2(kURL2); - manager.AddPageURL(url2, 0, 0, Time::Now()); - manager.AddPageTitle(url2, UTF8ToUTF16(kTitle2)); - - // The third one has a URL and body. - GURL url3(kURL3); - manager.AddPageURL(url3, 0, 0, Time::Now()); - manager.AddPageContents(url3, UTF8ToUTF16(kBody3)); - - // Expire stuff very fast. This assumes that the time between the first - // AddPageURL and this line is less than the expiration time (20 seconds). - TimeTicks added_time = TimeTicks::Now(); - TimeTicks expire_time = added_time + TimeDelta::FromSeconds(5); - manager.FlushOldChangesForTime(expire_time); - - // Do a query, nothing should be added yet. - QueryOptions options; - std::vector<TextDatabase::Match> results; - Time first_time_searched; - manager.GetTextMatches(UTF8ToUTF16("google"), options, - &results, &first_time_searched); - ASSERT_EQ(0U, results.size()); - - // Compute a time threshold that will cause everything to be flushed, and - // poke at the manager's internals to cause this to happen. - expire_time = added_time + TimeDelta::FromDays(1); - manager.FlushOldChangesForTime(expire_time); - - // Now we should have all 3 URLs added. - manager.GetTextMatches(UTF8ToUTF16("google"), options, - &results, &first_time_searched); - ASSERT_EQ(3U, results.size()); - EXPECT_TRUE(ResultsHaveURL(results, kURL1)); - EXPECT_TRUE(ResultsHaveURL(results, kURL2)); - EXPECT_TRUE(ResultsHaveURL(results, kURL3)); -} - -// Tests that partial inserts (due to timeouts) will still get updated if the -// data comes in later. -TEST_F(TextDatabaseManagerTest, PartialComplete) { - ASSERT_TRUE(Init()); - InMemDB visit_db; - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - Time added_time = Time::Now(); - GURL url(kURL1); - - // We have to have the URL in the URL and visit databases for this test to - // work. - URLRow url_row(url); - url_row.set_title(UTF8ToUTF16("chocolate")); - URLID url_id = visit_db.AddURL(url_row); - ASSERT_TRUE(url_id); - VisitRow visit_row; - visit_row.url_id = url_id; - visit_row.visit_time = added_time; - visit_db.AddVisit(&visit_row, SOURCE_BROWSED); - - // Add a URL with no title or body, and say that it expired. - manager.AddPageURL(url, 0, 0, added_time); - TimeTicks expire_time = TimeTicks::Now() + TimeDelta::FromDays(1); - manager.FlushOldChangesForTime(expire_time); - - // Add the title. We should be able to query based on that. The title in the - // URL row we set above should not come into the picture. - manager.AddPageTitle(url, UTF8ToUTF16("Some unique title")); - Time first_time_searched; - QueryOptions options; - std::vector<TextDatabase::Match> results; - manager.GetTextMatches(UTF8ToUTF16("unique"), options, - &results, &first_time_searched); - EXPECT_EQ(1U, results.size()); - manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, - &results, &first_time_searched); - EXPECT_EQ(0U, results.size()); - - // Now add the body, which should be queryable. - manager.AddPageContents(url, UTF8ToUTF16("Very awesome body")); - manager.GetTextMatches(UTF8ToUTF16("awesome"), options, &results, &first_time_searched); - EXPECT_EQ(1U, results.size()); - - // Adding the body will actually copy the title from the URL table rather - // than the previously indexed row (we made them not match above). This isn't - // necessarily what we want, but it's how it's implemented, and we don't want - // to regress it. - manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, &results, &first_time_searched); - EXPECT_EQ(1U, results.size()); -} - -// Tests that changes get properly committed to disk. -TEST_F(TextDatabaseManagerTest, Writing) { - ASSERT_TRUE(Init()); - - QueryOptions options; - std::vector<TextDatabase::Match> results; - Time first_time_searched; - - InMemDB visit_db; - - // Create the manager and write some stuff to it. - { - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - std::vector<Time> times; - AddAllPages(manager, &visit_db, ×); - - // We should have matched every page. - manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched); - EXPECT_EQ(6U, results.size()); - } - results.clear(); - - // Recreate the manager and make sure it finds the written stuff. - { - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - // We should have matched every page again. - manager.GetTextMatches(UTF8ToUTF16("FOO"), options, - &results, &first_time_searched); - EXPECT_EQ(6U, results.size()); - } -} - -// Tests that changes get properly committed to disk, as in the Writing test -// above, but when there is a transaction around the adds. -TEST_F(TextDatabaseManagerTest, WritingTransaction) { - ASSERT_TRUE(Init()); - - QueryOptions options; - std::vector<TextDatabase::Match> results; - Time first_time_searched; - - InMemDB visit_db; - - // Create the manager and write some stuff to it. - { - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - std::vector<Time> times; - manager.BeginTransaction(); - AddAllPages(manager, &visit_db, ×); - // "Forget" to commit, it should be autocommittedd for us. - - // We should have matched every page. - manager.GetTextMatches(UTF8ToUTF16("FOO"), options, - &results, &first_time_searched); - EXPECT_EQ(6U, results.size()); - } - results.clear(); - - // Recreate the manager and make sure it finds the written stuff. - { - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - // We should have matched every page again. - manager.GetTextMatches(UTF8ToUTF16("FOO"), options, - &results, &first_time_searched); - EXPECT_EQ(6U, results.size()); - } -} - -// Tests querying where the maximum number of items is met. -TEST_F(TextDatabaseManagerTest, QueryMax) { - ASSERT_TRUE(Init()); - InMemDB visit_db; - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - std::vector<Time> times; - AddAllPages(manager, &visit_db, ×); - - string16 foo = UTF8ToUTF16("FOO"); - - QueryOptions options; - options.begin_time = times[0] - TimeDelta::FromDays(100); - options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); - options.max_count = 2; - std::vector<TextDatabase::Match> results; - Time first_time_searched; - manager.GetTextMatches(foo, options, &results, &first_time_searched); - - // We should have gotten the last two pages as results (the first page is - // also the last). - EXPECT_EQ(2U, results.size()); - EXPECT_TRUE(first_time_searched <= times[4]); - EXPECT_TRUE(ResultsHaveURL(results, kURL5)); - EXPECT_TRUE(ResultsHaveURL(results, kURL1)); - - // Asking for 4 pages, the first one should be in another DB. - options.max_count = 4; - manager.GetTextMatches(foo, options, &results, &first_time_searched); - - EXPECT_EQ(4U, results.size()); - EXPECT_TRUE(first_time_searched <= times[4]); - EXPECT_TRUE(ResultsHaveURL(results, kURL3)); - EXPECT_TRUE(ResultsHaveURL(results, kURL4)); - EXPECT_TRUE(ResultsHaveURL(results, kURL5)); - EXPECT_TRUE(ResultsHaveURL(results, kURL1)); -} - -// Tests querying backwards in time in chunks. -TEST_F(TextDatabaseManagerTest, QueryBackwards) { - ASSERT_TRUE(Init()); - InMemDB visit_db; - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - std::vector<Time> times; - AddAllPages(manager, &visit_db, ×); - - string16 foo = UTF8ToUTF16("FOO"); - - // First do a query for all time, but with a max of 2. This will give us the - // last two results and will tell us where to start searching when we want - // to go back in time. - QueryOptions options; - options.begin_time = times[0] - TimeDelta::FromDays(100); - options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); - options.max_count = 2; - std::vector<TextDatabase::Match> results; - Time first_time_searched; - manager.GetTextMatches(foo, options, &results, &first_time_searched); - - // Check that we got the last two results. - EXPECT_EQ(2U, results.size()); - EXPECT_TRUE(first_time_searched <= times[4]); - EXPECT_TRUE(ResultsHaveURL(results, kURL5)); - EXPECT_TRUE(ResultsHaveURL(results, kURL1)); - - // Query the previous two URLs and make sure we got the correct ones. - options.end_time = first_time_searched; - manager.GetTextMatches(foo, options, &results, &first_time_searched); - EXPECT_EQ(2U, results.size()); - EXPECT_TRUE(first_time_searched <= times[2]); - EXPECT_TRUE(ResultsHaveURL(results, kURL3)); - EXPECT_TRUE(ResultsHaveURL(results, kURL4)); - - // Query the previous two URLs... - options.end_time = first_time_searched; - manager.GetTextMatches(foo, options, &results, &first_time_searched); - EXPECT_EQ(2U, results.size()); - EXPECT_TRUE(first_time_searched <= times[0]); - EXPECT_TRUE(ResultsHaveURL(results, kURL2)); - EXPECT_TRUE(ResultsHaveURL(results, kURL1)); - - // Try to query some more, there should be no results. - options.end_time = first_time_searched; - manager.GetTextMatches(foo, options, &results, &first_time_searched); - EXPECT_EQ(0U, results.size()); -} - -// Tests deletion of uncommitted entries. -TEST_F(TextDatabaseManagerTest, DeleteUncommitted) { - ASSERT_TRUE(Init()); - InMemDB visit_db; - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - manager.AddPageURL(GURL(kURL1), 0, 0, Time::FromInternalValue(1)); - manager.AddPageURL(GURL(kURL2), 0, 0, Time::FromInternalValue(2)); - manager.AddPageURL(GURL(kURL3), 0, 0, Time::FromInternalValue(3)); - manager.AddPageURL(GURL(kURL4), 0, 0, Time::FromInternalValue(4)); - manager.AddPageURL(GURL(kURL5), 0, 0, Time::FromInternalValue(5)); - - EXPECT_EQ(5u, manager.GetUncommittedEntryCountForTest()); - - // Should delete the first two entries. - manager.DeleteFromUncommitted(std::set<GURL>(), - Time::FromInternalValue(1), - Time::FromInternalValue(3)); - - EXPECT_EQ(3u, manager.GetUncommittedEntryCountForTest()); - - // Should delete the third entry. - { - std::set<GURL> urls; - urls.insert(GURL(kURL3)); - manager.DeleteFromUncommitted(urls, Time(), Time()); - } - - EXPECT_EQ(2u, manager.GetUncommittedEntryCountForTest()); -} - -// Tests deletion of uncommitted entries by time. -TEST_F(TextDatabaseManagerTest, DeleteUncommittedForTimes) { - ASSERT_TRUE(Init()); - InMemDB visit_db; - TextDatabaseManager manager(dir_, &visit_db, &visit_db); - ASSERT_TRUE(manager.Init(NULL)); - - manager.AddPageURL(GURL(kURL1), 0, 0, Time::FromInternalValue(2)); - manager.AddPageURL(GURL(kURL2), 0, 0, Time::FromInternalValue(3)); - manager.AddPageURL(GURL(kURL3), 0, 0, Time::FromInternalValue(4)); - manager.AddPageURL(GURL(kURL4), 0, 0, Time::FromInternalValue(5)); - manager.AddPageURL(GURL(kURL5), 0, 0, Time::FromInternalValue(6)); - - EXPECT_EQ(5u, manager.GetUncommittedEntryCountForTest()); - - std::vector<base::Time> times; - times.push_back(Time::FromInternalValue(9)); - times.push_back(Time::FromInternalValue(7)); - times.push_back(Time::FromInternalValue(5)); - times.push_back(Time::FromInternalValue(5)); - times.push_back(Time::FromInternalValue(3)); - times.push_back(Time::FromInternalValue(1)); - manager.DeleteFromUncommittedForTimes(times); - - EXPECT_EQ(3u, manager.GetUncommittedEntryCountForTest()); -} - -} // namespace history diff --git a/chrome/browser/history/visit_database.cc b/chrome/browser/history/visit_database.cc index 883fe6f..5e04c02 100644 --- a/chrome/browser/history/visit_database.cc +++ b/chrome/browser/history/visit_database.cc @@ -34,19 +34,10 @@ bool VisitDatabase::InitVisitTable() { "from_visit INTEGER," "transition INTEGER DEFAULT 0 NOT NULL," "segment_id INTEGER," - // True when we have indexed data for this visit. - "is_indexed BOOLEAN," + // Some old DBs may have an "is_indexed" field here, but this is no + // longer used and should NOT be read or written from any longer. "visit_duration INTEGER DEFAULT 0 NOT NULL)")) return false; - } else if (!GetDB().DoesColumnExist("visits", "is_indexed")) { - // Old versions don't have the is_indexed column, we can just add that and - // not worry about different database revisions, since old ones will - // continue to work. - // - // TODO(brettw) this should be removed once we think everybody has been - // updated (added early Mar 2008). - if (!GetDB().Execute("ALTER TABLE visits ADD COLUMN is_indexed BOOLEAN")) - return false; } // Visit source table contains the source information for all the visits. To @@ -98,9 +89,8 @@ void VisitDatabase::FillVisitRow(sql::Statement& statement, VisitRow* visit) { visit->referring_visit = statement.ColumnInt64(3); visit->transition = content::PageTransitionFromInt(statement.ColumnInt(4)); visit->segment_id = statement.ColumnInt64(5); - visit->is_indexed = !!statement.ColumnInt(6); visit->visit_duration = - base::TimeDelta::FromInternalValue(statement.ColumnInt64(7)); + base::TimeDelta::FromInternalValue(statement.ColumnInt64(6)); } // static @@ -154,15 +144,14 @@ bool VisitDatabase::FillVisitVectorWithOptions(sql::Statement& statement, VisitID VisitDatabase::AddVisit(VisitRow* visit, VisitSource source) { sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "INSERT INTO visits " - "(url, visit_time, from_visit, transition, segment_id, is_indexed, " - "visit_duration) VALUES (?,?,?,?,?,?,?)")); + "(url, visit_time, from_visit, transition, segment_id, " + "visit_duration) VALUES (?,?,?,?,?,?)")); statement.BindInt64(0, visit->url_id); statement.BindInt64(1, visit->visit_time.ToInternalValue()); statement.BindInt64(2, visit->referring_visit); statement.BindInt64(3, visit->transition); statement.BindInt64(4, visit->segment_id); - statement.BindInt64(5, visit->is_indexed); - statement.BindInt64(6, visit->visit_duration.ToInternalValue()); + statement.BindInt64(5, visit->visit_duration.ToInternalValue()); if (!statement.Run()) { VLOG(0) << "Failed to execute visit insert statement: " @@ -241,16 +230,15 @@ bool VisitDatabase::UpdateVisitRow(const VisitRow& visit) { sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "UPDATE visits SET " - "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?,is_indexed=?," + "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?," "visit_duration=? WHERE id=?")); statement.BindInt64(0, visit.url_id); statement.BindInt64(1, visit.visit_time.ToInternalValue()); statement.BindInt64(2, visit.referring_visit); statement.BindInt64(3, visit.transition); statement.BindInt64(4, visit.segment_id); - statement.BindInt64(5, visit.is_indexed); - statement.BindInt64(6, visit.visit_duration.ToInternalValue()); - statement.BindInt64(7, visit.visit_id); + statement.BindInt64(5, visit.visit_duration.ToInternalValue()); + statement.BindInt64(6, visit.visit_id); return statement.Run(); } @@ -267,17 +255,6 @@ bool VisitDatabase::GetVisitsForURL(URLID url_id, VisitVector* visits) { return FillVisitVector(statement, visits); } -bool VisitDatabase::GetIndexedVisitsForURL(URLID url_id, VisitVector* visits) { - visits->clear(); - - sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, - "SELECT" HISTORY_VISIT_ROW_FIELDS - "FROM visits " - "WHERE url=? AND is_indexed=1")); - statement.BindInt64(0, url_id); - return FillVisitVector(statement, visits); -} - bool VisitDatabase::GetVisitsForURLWithOptions(URLID url_id, const QueryOptions& options, VisitVector* visits) { diff --git a/chrome/browser/history/visit_database.h b/chrome/browser/history/visit_database.h index effaa8f..f196c88 100644 --- a/chrome/browser/history/visit_database.h +++ b/chrome/browser/history/visit_database.h @@ -58,11 +58,6 @@ class VisitDatabase { // may still be no matches). bool GetVisitsForURL(URLID url_id, VisitVector* visits); - // Fills in the given vector with all of the visits for the given page ID that - // have the |is_indexed| field set to true, in no particular order. - // Returns true on success (although there may still be no matches). - bool GetIndexedVisitsForURL(URLID url_id, VisitVector* visits); - // Fills in the given vector with the visits for the given page ID which // match the set of options passed, sorted in ascending order of date. // @@ -230,8 +225,7 @@ class VisitDatabase { // Rows, in order, of the visit table. #define HISTORY_VISIT_ROW_FIELDS \ - " id,url,visit_time,from_visit,transition,segment_id,is_indexed," \ - "visit_duration " + " id,url,visit_time,from_visit,transition,segment_id,visit_duration " } // namespace history diff --git a/chrome/browser/history/visit_database_unittest.cc b/chrome/browser/history/visit_database_unittest.cc index a6146b4..df8075e 100644 --- a/chrome/browser/history/visit_database_unittest.cc +++ b/chrome/browser/history/visit_database_unittest.cc @@ -29,8 +29,7 @@ bool IsVisitInfoEqual(const VisitRow& a, a.url_id == b.url_id && a.visit_time == b.visit_time && a.referring_visit == b.referring_visit && - a.transition == b.transition && - a.is_indexed == b.is_indexed; + a.transition == b.transition; } } // namespace @@ -148,7 +147,6 @@ TEST_F(VisitDatabaseTest, Update) { modification.transition = content::PAGE_TRANSITION_TYPED; modification.visit_time = Time::Now() + TimeDelta::FromDays(1); modification.referring_visit = 9292; - modification.is_indexed = true; UpdateVisitRow(modification); // Check that the mutated version was written. @@ -387,31 +385,4 @@ TEST_F(VisitDatabaseTest, VisitSource) { EXPECT_EQ(SOURCE_EXTENSION, sources[matches[0].visit_id]); } -TEST_F(VisitDatabaseTest, GetIndexedVisits) { - // Add non-indexed visits. - int url_id = 111; - VisitRow visit_info1( - url_id, Time::Now(), 0, content::PAGE_TRANSITION_LINK, 0); - ASSERT_TRUE(AddVisit(&visit_info1, SOURCE_BROWSED)); - - VisitRow visit_info2( - url_id, Time::Now(), 0, content::PAGE_TRANSITION_TYPED, 0); - ASSERT_TRUE(AddVisit(&visit_info2, SOURCE_SYNCED)); - - std::vector<VisitRow> visits; - EXPECT_TRUE(GetVisitsForURL(url_id, &visits)); - EXPECT_EQ(static_cast<size_t>(2), visits.size()); - EXPECT_TRUE(GetIndexedVisitsForURL(url_id, &visits)); - EXPECT_EQ(static_cast<size_t>(0), visits.size()); - - VisitRow visit_info3( - url_id, Time::Now(), 2, content::PAGE_TRANSITION_TYPED, 0); - visit_info3.is_indexed = true; - ASSERT_TRUE(AddVisit(&visit_info3, SOURCE_SYNCED)); - EXPECT_TRUE(GetVisitsForURL(url_id, &visits)); - EXPECT_EQ(static_cast<size_t>(3), visits.size()); - EXPECT_TRUE(GetIndexedVisitsForURL(url_id, &visits)); - EXPECT_EQ(static_cast<size_t>(1), visits.size()); -} - } // namespace history diff --git a/chrome/browser/ui/omnibox/omnibox_view_browsertest.cc b/chrome/browser/ui/omnibox/omnibox_view_browsertest.cc index 88dd32e..c110148 100644 --- a/chrome/browser/ui/omnibox/omnibox_view_browsertest.cc +++ b/chrome/browser/ui/omnibox/omnibox_view_browsertest.cc @@ -320,7 +320,6 @@ class OmniboxViewTest : public InProcessBrowserTest, entry.visit_count, entry.typed_count, time, false, history::SOURCE_BROWSED); - history_service->SetPageContents(url, UTF8ToUTF16(entry.body)); if (entry.starred) bookmark_utils::AddIfNotBookmarked(bookmark_model, url, string16()); // Wait at least for the AddPageWithDetails() call to finish. diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index 2b79c88..f37ed9f 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -768,10 +768,6 @@ 'browser/history/shortcuts_backend_factory.h', 'browser/history/shortcuts_database.cc', 'browser/history/shortcuts_database.h', - 'browser/history/text_database.cc', - 'browser/history/text_database.h', - 'browser/history/text_database_manager.cc', - 'browser/history/text_database_manager.h', 'browser/history/thumbnail_database.cc', 'browser/history/thumbnail_database.h', 'browser/history/top_sites.cc', diff --git a/chrome/chrome_tests_unit.gypi b/chrome/chrome_tests_unit.gypi index 126b736..8678b47 100644 --- a/chrome/chrome_tests_unit.gypi +++ b/chrome/chrome_tests_unit.gypi @@ -917,8 +917,6 @@ 'browser/history/shortcuts_backend_unittest.cc', 'browser/history/shortcuts_database_unittest.cc', 'browser/history/snippet_unittest.cc', - 'browser/history/text_database_manager_unittest.cc', - 'browser/history/text_database_unittest.cc', 'browser/history/thumbnail_database_unittest.cc', 'browser/history/top_sites_database_unittest.cc', 'browser/history/top_sites_impl_unittest.cc', diff --git a/chrome/common/render_messages.h b/chrome/common/render_messages.h index 92ff235..021fa95 100644 --- a/chrome/common/render_messages.h +++ b/chrome/common/render_messages.h @@ -398,12 +398,6 @@ IPC_MESSAGE_ROUTED1(ChromeViewMsg_NetErrorInfo, // Misc messages // These are messages sent from the renderer to the browser process. -// Provides the contents for the given page that was loaded recently. -IPC_MESSAGE_ROUTED3(ChromeViewHostMsg_PageContents, - GURL /* URL of the page */, - int32 /* page id */, - string16 /* page contents */) - // Notification that the language for the tab has been determined. IPC_MESSAGE_ROUTED2(ChromeViewHostMsg_TranslateLanguageDetermined, LanguageDetectionDetails /* details about lang detection */, diff --git a/chrome/renderer/chrome_render_view_observer.cc b/chrome/renderer/chrome_render_view_observer.cc index 7fe88bb..fa9a76a9 100644 --- a/chrome/renderer/chrome_render_view_observer.cc +++ b/chrome/renderer/chrome_render_view_observer.cc @@ -741,13 +741,6 @@ void ChromeRenderViewObserver::CapturePageInfo(int page_id, TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); - if (contents.size()) { - // Send the text to the browser for indexing (the browser might decide not - // to index, if the URL is HTTPS for instance). - Send(new ChromeViewHostMsg_PageContents(routing_id(), url, page_id, - contents)); - } - #if defined(FULL_SAFE_BROWSING) // Will swap out the string. if (phishing_classifier_) diff --git a/chrome/test/data/History/url_history_provider_test.db.txt b/chrome/test/data/History/url_history_provider_test.db.txt index 0019f94..c46927d 100644 --- a/chrome/test/data/History/url_history_provider_test.db.txt +++ b/chrome/test/data/History/url_history_provider_test.db.txt @@ -86,23 +86,23 @@ INSERT INTO "urls" VALUES(35,'http://en.wikipedia.org/wiki/1%25_rule_(Internet_c the visits functionality, certainly not as many visits as are implied by the visit counts associated with the URLs above. */ -INSERT INTO "visits" VALUES(1, 1, 2, 4, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(2, 1, 5, 0, 1, 0, 0, 1); -INSERT INTO "visits" VALUES(3, 1, 12, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(4, 32, 1, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(5, 32, 2, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(6, 32, 3, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(7, 32, 4, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(8, 32, 5, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(9, 32, 6, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(10, 32, 7, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(11, 32, 8, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(12, 32, 9, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(13, 32, 10, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(14, 32, 11, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(15, 32, 12, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(16, 32, 13, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(17, 32, 14, 0, 0, 0, 0, 1); -INSERT INTO "visits" VALUES(18, 32, 15, 0, 1, 0, 0, 1); -INSERT INTO "visits" VALUES(19, 35, 0, 0, 1, 0, 0, 1); -INSERT INTO "visits" VALUES(20, 35, 7, 0, 1, 0, 0, 1); +INSERT INTO "visits" VALUES(1, 1, 2, 4, 0, 0, 1); +INSERT INTO "visits" VALUES(2, 1, 5, 0, 1, 0, 1); +INSERT INTO "visits" VALUES(3, 1, 12, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(4, 32, 1, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(5, 32, 2, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(6, 32, 3, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(7, 32, 4, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(8, 32, 5, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(9, 32, 6, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(10, 32, 7, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(11, 32, 8, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(12, 32, 9, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(13, 32, 10, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(14, 32, 11, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(15, 32, 12, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(16, 32, 13, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(17, 32, 14, 0, 0, 0, 1); +INSERT INTO "visits" VALUES(18, 32, 15, 0, 1, 0, 1); +INSERT INTO "visits" VALUES(19, 35, 0, 0, 1, 0, 1); +INSERT INTO "visits" VALUES(20, 35, 7, 0, 1, 0, 1); diff --git a/chrome/test/perf/generate_profile.cc b/chrome/test/perf/generate_profile.cc index d37060a..f66056c 100644 --- a/chrome/test/perf/generate_profile.cc +++ b/chrome/test/perf/generate_profile.cc @@ -192,8 +192,6 @@ void InsertURLBatch(Profile* profile, transition, history::SOURCE_BROWSED, true); ThumbnailScore score(0.75, false, false); history_service->SetPageTitle(url, ConstructRandomTitle()); - if (types & FULL_TEXT) - history_service->SetPageContents(url, ConstructRandomPage()); if (types & TOP_SITES && top_sites) { top_sites->SetPageThumbnailToJPEGBytes( url, diff --git a/chrome/test/perf/generate_profile.h b/chrome/test/perf/generate_profile.h index 85a2858..fe1cfbf 100644 --- a/chrome/test/perf/generate_profile.h +++ b/chrome/test/perf/generate_profile.h @@ -14,8 +14,7 @@ class FilePath; // Addition types data can be generated for. By default only urls/visits are // added. enum GenerateProfileTypes { - TOP_SITES = 1 << 0, - FULL_TEXT = 1 << 1 + TOP_SITES = 1 << 0 }; // Generates a user profile and history by psuedo-randomly generating data and |