Revert 212459 "Remove TextDatabase from the history service."

This appears to have broken several history unit tests on Mac 10.6 AddNewRows CacheSaveRestore CursorPositionRetrieval DeleteRows ExpireRow HugeResultSet NonUniqueTermCharacterSets ProperStringMatching ReadVisitsFromHistory RebuildFromHistoryIfCacheOld Retrieval TitleChange TitleSearch TypedCharacterCaching URLPrefixMatching WhitelistedURLs Example output: http://build.chromium.org/p/chromium.mac/builders/Mac10.6%20Tests%20%282%29/builds/40521/steps/unit_tests/logs/AddNewRows > Remove TextDatabase from the history service. > > The full text indexing feature is no longer used by anything. > - Remove the TextDatabase, TextDatabaseManager and related files. > - Remove is_indexed field from VisitRow objects. > - Modify ExpireHistoryBackend and HistoryBackend to no longer call TextDatabase > - Have HistoryBackend delete the "History Index *" files from the users profile > - Remove ChromeViewHostMsg_PageContents IPC message since the browser process > no longer requires to have the page contents of recently loaded pages. > > BUG=247418 > TESTS=unit tests, manual testing and run through trybots. > > Review URL: https://chromiumcodereview.appspot.com/16951015 TBR=rmcilroy@chromium.org Review URL: https://codereview.chromium.org/19637009 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@212473 0039d316-1c4b-4281-b951-d872f2087c98
author: asargent@chromium.org <asargent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-07-18 23:25:55 +0000
committer: asargent@chromium.org <asargent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-07-18 23:25:55 +0000
commit: b2729b58d392ba2272a7f98c6ee6d7b917c4841c (patch)
tree: 9e2dd6c778c1bd4fd36eb60ae38bd75cd1869957 /chrome/browser/history/history_backend.cc
parent: 53b4cc1380a09bba155f2465dc263c78033e8229 (diff)
download: chromium_src-b2729b58d392ba2272a7f98c6ee6d7b917c4841c.zip
chromium_src-b2729b58d392ba2272a7f98c6ee6d7b917c4841c.tar.gz
chromium_src-b2729b58d392ba2272a7f98c6ee6d7b917c4841c.tar.bz2
1 files changed, 193 insertions, 28 deletions
diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc
index 83a2afd..adaff5a 100644
--- a/chrome/browser/history/history_backend.cc
+++ b/chrome/browser/history/history_backend.cc
@@ -14,7 +14,6 @@
 #include "base/basictypes.h"
 #include "base/bind.h"
 #include "base/compiler_specific.h"
-#include "base/files/file_enumerator.h"
 #include "base/memory/scoped_ptr.h"
 #include "base/memory/scoped_vector.h"
 #include "base/message_loop/message_loop.h"
@@ -69,6 +68,10 @@ using base::TimeTicks;
 
       (this does not store visit segments as they expire after 3 mos.)
 
+    TextDatabaseManager (manages multiple text database for different times)
+      TextDatabase (represents a single month of full-text index).
+      ...more TextDatabase objects...
+
     ExpireHistoryBackend (manages moving things from HistoryDatabase to
                           the ArchivedDatabase and deleting)
 */
@@ -165,6 +168,53 @@ class CommitLaterTask : public base::RefCounted<CommitLaterTask> {
   scoped_refptr<HistoryBackend> history_backend_;
 };
 
+// Handles querying first the main database, then the full text database if that
+// fails. It will optionally keep track of all URLs seen so duplicates can be
+// eliminated. This is used by the querying sub-functions.
+//
+// TODO(brettw): This class may be able to be simplified or eliminated. After
+// this was written, QueryResults can efficiently look up by URL, so the need
+// for this extra set of previously queried URLs is less important.
+class HistoryBackend::URLQuerier {
+ public:
+  URLQuerier(URLDatabase* main_db, URLDatabase* archived_db, bool track_unique)
+      : main_db_(main_db),
+        archived_db_(archived_db),
+        track_unique_(track_unique) {
+  }
+
+  // When we're tracking unique URLs, returns true if this URL has been
+  // previously queried. Only call when tracking unique URLs.
+  bool HasURL(const GURL& url) {
+    DCHECK(track_unique_);
+    return unique_urls_.find(url) != unique_urls_.end();
+  }
+
+  bool GetRowForURL(const GURL& url, URLRow* row) {
+    if (!main_db_->GetRowForURL(url, row)) {
+      if (!archived_db_ || !archived_db_->GetRowForURL(url, row)) {
+        // This row is neither in the main nor the archived DB.
+        return false;
+      }
+    }
+
+    if (track_unique_)
+      unique_urls_.insert(url);
+    return true;
+  }
+
+ private:
+  URLDatabase* main_db_;  // Guaranteed non-NULL.
+  URLDatabase* archived_db_;  // Possibly NULL.
+
+  bool track_unique_;
+
+  // When track_unique_ is set, this is updated with every URL seen so far.
+  std::set<GURL> unique_urls_;
+
+  DISALLOW_COPY_AND_ASSIGN(URLQuerier);
+};
+
 // HistoryBackend --------------------------------------------------------------
 
 HistoryBackend::HistoryBackend(const base::FilePath& history_dir,
@@ -532,7 +582,7 @@ void HistoryBackend::AddPage(const HistoryAddPageArgs& request) {
     }
 
     // Last, save this redirect chain for later so we can set titles & favicons
-    // on the redirected pages properly.
+    // on the redirected pages properly. It is indexed by the destination page.
     recent_redirects_.Put(request.url, redirects);
   }
 
@@ -550,6 +600,11 @@ void HistoryBackend::AddPage(const HistoryAddPageArgs& request) {
                       last_ids.second);
   }
 
+  if (text_database_) {
+    text_database_->AddPageURL(request.url, last_ids.first, last_ids.second,
+                               request.time);
+  }
+
   ScheduleCommit();
 }
 
@@ -562,14 +617,12 @@ void HistoryBackend::InitImpl(const std::string& languages) {
 
   TimeTicks beginning_time = TimeTicks::Now();
 
-  // Compute the file names.
+  // Compute the file names. Note that the index file can be removed when the
+  // text db manager is finished being hooked up.
   base::FilePath history_name = history_dir_.Append(chrome::kHistoryFilename);
   base::FilePath thumbnail_name = GetThumbnailFileName();
   base::FilePath archived_name = GetArchivedFileName();
 
-  // Delete the old index database files which are no longer used.
-  DeleteFTSIndexDatabases();
-
   // History database.
   db_.reset(new HistoryDatabase());
 
@@ -609,8 +662,8 @@ void HistoryBackend::InitImpl(const std::string& languages) {
     delete mem_backend;  // Error case, run without the in-memory DB.
   db_->BeginExclusiveMode();  // Must be after the mem backend read the data.
 
-  // Create the history publisher which needs to be passed on to the thumbnail
-  // database for publishing history.
+  // Create the history publisher which needs to be passed on to the text and
+  // thumbnail databases for publishing history.
   history_publisher_.reset(new HistoryPublisher());
   if (!history_publisher_->Init()) {
     // The init may fail when there are no indexers wanting our history.
@@ -618,6 +671,22 @@ void HistoryBackend::InitImpl(const std::string& languages) {
     history_publisher_.reset();
   }
 
+  // Full-text database. This has to be first so we can pass it to the
+  // HistoryDatabase for migration.
+  text_database_.reset(new TextDatabaseManager(history_dir_,
+                                               db_.get(), db_.get()));
+  if (!text_database_->Init(history_publisher_.get())) {
+    LOG(WARNING) << "Text database initialization failed, running without it.";
+    text_database_.reset();
+  }
+  if (db_->needs_version_17_migration()) {
+    // See needs_version_17_migration() decl for more. In this case, we want
+    // to erase all the text database files. This must be done after the text
+    // database manager has been initialized, since it knows about all the
+    // files it manages.
+    text_database_->DeleteAll();
+  }
+
   // Thumbnail database.
   thumbnail_db_.reset(new ThumbnailDatabase());
   if (!db_->GetNeedsThumbnailMigration()) {
@@ -670,7 +739,7 @@ void HistoryBackend::InitImpl(const std::string& languages) {
   // The main DB initialization should intuitively be first (not that it
   // actually matters) and the expirer should be set last.
   expirer_.SetDatabases(db_.get(), archived_db_.get(),
-                        thumbnail_db_.get());
+                        thumbnail_db_.get(), text_database_.get());
 
   // Open the long-running transaction.
   db_->BeginTransaction();
@@ -678,6 +747,8 @@ void HistoryBackend::InitImpl(const std::string& languages) {
     thumbnail_db_->BeginTransaction();
   if (archived_db_)
     archived_db_->BeginTransaction();
+  if (text_database_)
+    text_database_->BeginTransaction();
 
   // Get the first item in our database.
   db_->GetStartDate(&first_recorded_time_);
@@ -723,6 +794,10 @@ void HistoryBackend::CloseAllDatabases() {
     archived_db_->CommitTransaction();
     archived_db_.reset();
   }
+  if (text_database_) {
+    text_database_->CommitTransaction();
+    text_database_.reset();
+  }
 }
 
 std::pair<URLID, VisitID> HistoryBackend::AddPageVisit(
@@ -786,6 +861,14 @@ std::pair<URLID, VisitID> HistoryBackend::AddPageVisit(
       return std::make_pair(0, 0);
     }
     url_info.id_ = url_id;
+
+    // We don't actually add the URL to the full text index at this point. It
+    // might be nice to do this so that even if we get no title or body, the
+    // user can search for URL components and get the page.
+    //
+    // However, in most cases, we'll get at least a title and usually contents,
+    // and this add will be redundant, slowing everything down. As a result,
+    // we ignore this edge case.
   }
 
   // Add the visit with the time to the database.
@@ -855,6 +938,26 @@ void HistoryBackend::AddPagesWithDetails(const URLRows& urls,
       }
     }
 
+    // Add the page to the full text index. This function is also used for
+    // importing. Even though we don't have page contents, we can at least
+    // add the title and URL to the index so they can be searched. We don't
+    // bother to delete any already-existing FTS entries for the URL, since
+    // this is normally called on import.
+    //
+    // If you ever import *after* first run (selecting import from the menu),
+    // then these additional entries will "shadow" the originals when querying
+    // for the most recent match only, and the user won't get snippets. This is
+    // a very minor issue, and fixing it will make import slower, so we don't
+    // bother.
+    bool has_indexed = false;
+    if (text_database_) {
+      // We do not have to make it update the visit database, below, we will
+      // create the visit entry with the indexed flag set.
+      has_indexed = text_database_->AddPageData(i->url(), url_id, 0,
+                                                i->last_visit(),
+                                                i->title(), string16());
+    }
+
     // Sync code manages the visits itself.
     if (visit_source != SOURCE_SYNCED) {
       // Make up a visit to correspond to the last visit to the page.
@@ -863,6 +966,7 @@ void HistoryBackend::AddPagesWithDetails(const URLRows& urls,
                               content::PAGE_TRANSITION_LINK |
                               content::PAGE_TRANSITION_CHAIN_START |
                               content::PAGE_TRANSITION_CHAIN_END), 0);
+      visit_info.is_indexed = has_indexed;
       if (!visit_database->AddVisit(&visit_info, visit_source)) {
         NOTREACHED() << "Adding visit failed.";
         return;
@@ -897,6 +1001,10 @@ void HistoryBackend::SetPageTitle(const GURL& url,
   if (!db_)
     return;
 
+  // Update the full text index.
+  if (text_database_)
+    text_database_->AddPageTitle(url, title);
+
   // Search for recent redirects which should get the same title. We make a
   // dummy list containing the exact URL visited if there are no redirects so
   // the processing below can be the same.
@@ -1391,6 +1499,59 @@ void HistoryBackend::QueryHistoryText(URLDatabase* url_db,
     result->set_reached_beginning(true);
 }
 
+void HistoryBackend::QueryHistoryFTS(const string16& text_query,
+                                     const QueryOptions& options,
+                                     QueryResults* result) {
+  if (!text_database_)
+    return;
+
+  // Full text query, first get all the FTS results in the time range.
+  std::vector<TextDatabase::Match> fts_matches;
+  Time first_time_searched;
+  text_database_->GetTextMatches(text_query, options,
+                                 &fts_matches, &first_time_searched);
+
+  URLQuerier querier(db_.get(), archived_db_.get(), true);
+
+  // Now get the row and visit information for each one.
+  URLResult url_result;  // Declare outside loop to prevent re-construction.
+  for (size_t i = 0; i < fts_matches.size(); i++) {
+    if (options.max_count != 0 &&
+        static_cast<int>(result->size()) >= options.max_count)
+      break;  // Got too many items.
+
+    // Get the URL, querying the main and archived databases as necessary. If
+    // this is not found, the history and full text search databases are out
+    // of sync and we give up with this result.
+    if (!querier.GetRowForURL(fts_matches[i].url, &url_result))
+      continue;
+
+    if (!url_result.url().is_valid())
+      continue;  // Don't report invalid URLs in case of corruption.
+
+    // Copy over the FTS stuff that the URLDatabase doesn't know about.
+    // We do this with swap() to avoid copying, since we know we don't
+    // need the original any more. Note that we override the title with the
+    // one from FTS, since that will match the title_match_positions (the
+    // FTS title and the history DB title may differ).
+    url_result.set_title(fts_matches[i].title);
+    url_result.title_match_positions_.swap(
+        fts_matches[i].title_match_positions);
+    url_result.snippet_.Swap(&fts_matches[i].snippet);
+
+    // The visit time also comes from the full text search database. Since it
+    // has the time, we can avoid an extra query of the visits table.
+    url_result.set_visit_time(fts_matches[i].time);
+
+    // Add it to the vector, this will clear our |url_row| object as a
+    // result of the swap.
+    result->AppendURLBySwapping(&url_result);
+  }
+
+  if (first_time_searched <= first_recorded_time_)
+    result->set_reached_beginning(true);
+}
+
 // Frontend to GetMostRecentRedirectsFrom from the history thread.
 void HistoryBackend::QueryRedirectsFrom(
     scoped_refptr<QueryRedirectsRequest> request,
@@ -1650,6 +1811,14 @@ void HistoryBackend::ScheduleAutocomplete(HistoryURLProvider* provider,
   provider->ExecuteWithDB(this, db_.get(), params);
 }
 
+void HistoryBackend::SetPageContents(const GURL& url,
+                                     const string16& contents) {
+  // This is histogrammed in the text database manager.
+  if (!text_database_)
+    return;
+  text_database_->AddPageContents(url, contents);
+}
+
 void HistoryBackend::SetPageThumbnail(
     const GURL& url,
     const gfx::Image* thumbnail,
@@ -1733,23 +1902,6 @@ void HistoryBackend::MigrateThumbnailsDatabase() {
   }
 }
 
-void HistoryBackend::DeleteFTSIndexDatabases() {
-  // Find files on disk matching the text databases file pattern so we can
-  // quickly test for and delete them.
-  base::FilePath::StringType filepattern =
-      FILE_PATH_LITERAL("History Index *");
-  base::FileEnumerator enumerator(
-      history_dir_, false, base::FileEnumerator::FILES, filepattern);
-  int num_databases_deleted = 0;
-  base::FilePath current_file;
-  while (!(current_file = enumerator.Next()).empty()) {
-    if (sql::Connection::Delete(current_file))
-      num_databases_deleted++;
-  }
-  UMA_HISTOGRAM_COUNTS("History.DeleteFTSIndexDatabases",
-                       num_databases_deleted);
-}
-
 bool HistoryBackend::GetThumbnailFromOlderRedirect(
     const GURL& page_url,
     std::vector<unsigned char>* data) {
@@ -2516,6 +2668,11 @@ void HistoryBackend::Commit() {
     archived_db_->CommitTransaction();
     archived_db_->BeginTransaction();
   }
+
+  if (text_database_) {
+    text_database_->CommitTransaction();
+    text_database_->BeginTransaction();
+  }
 }
 
 void HistoryBackend::ScheduleCommit() {
@@ -2746,7 +2903,7 @@ void HistoryBackend::KillHistoryDatabase() {
 
   // The expirer keeps tabs on the active databases. Tell it about the
   // databases which will be closed.
-  expirer_.SetDatabases(NULL, NULL, NULL);
+  expirer_.SetDatabases(NULL, NULL, NULL, NULL);
 
   // Reopen a new transaction for |db_| for the sake of CloseAllDatabases().
   db_->BeginTransaction();
@@ -2836,7 +2993,15 @@ void HistoryBackend::DeleteAllHistory() {
     LOG(ERROR) << "Main history could not be cleared";
   kept_urls.clear();
 
-  // Delete archived history.
+  // Delete FTS files & archived history.
+  if (text_database_) {
+    // We assume that the text database has one transaction on them that we need
+    // to close & restart (the long-running history transaction).
+    text_database_->CommitTransaction();
+    text_database_->DeleteAll();
+    text_database_->BeginTransaction();
+  }
+
   if (archived_db_) {
     // Close the database and delete the file.
     archived_db_.reset();
author	asargent@chromium.org <asargent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-07-18 23:25:55 +0000
committer	asargent@chromium.org <asargent@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-07-18 23:25:55 +0000
commit	b2729b58d392ba2272a7f98c6ee6d7b917c4841c (patch)
tree	9e2dd6c778c1bd4fd36eb60ae38bd75cd1869957 /chrome/browser/history/history_backend.cc
parent	53b4cc1380a09bba155f2465dc263c78033e8229 (diff)
download	chromium_src-b2729b58d392ba2272a7f98c6ee6d7b917c4841c.zip chromium_src-b2729b58d392ba2272a7f98c6ee6d7b917c4841c.tar.gz chromium_src-b2729b58d392ba2272a7f98c6ee6d7b917c4841c.tar.bz2