Revert 224746 "Merge 224441 "TextDatabase contained an unexpecte..."

A bad merge resolution broke the build. Reverting while I fix it. > Merge 224441 "TextDatabase contained an unexpected bit of needed..." > > > TextDatabase contained an unexpected bit of needed functionality. > > > > https://chromiumcodereview.appspot.com/16951015 removed TextDatabase > > from history service. TextDatabase was calling a HistoryPublisher > > routine which was still in use. > > > > BUG=294306 > > R=cevans@chromium.org, rmcilroy@chromium.org, sky@chromium.org > > > > Review URL: https://codereview.chromium.org/23437047 > > TBR=shess@chromium.org > > Review URL: https://codereview.chromium.org/24360006 TBR=shess@chromium.org Review URL: https://codereview.chromium.org/24391002 git-svn-id: svn://svn.chromium.org/chrome/branches/1599/src@224764 0039d316-1c4b-4281-b951-d872f2087c98
author: shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-09-23 19:22:32 +0000
committer: shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-09-23 19:22:32 +0000
commit: 281f2d690bea2d7560323a94d9461c0ce68d0057 (patch)
tree: 072f1925f22e5905aa04f26c074ff06c760e8f3f
parent: eecf8e2088e3adc5e6c65db2b8d72365c3a369a8 (diff)
download: chromium_src-281f2d690bea2d7560323a94d9461c0ce68d0057.zip
chromium_src-281f2d690bea2d7560323a94d9461c0ce68d0057.tar.gz
chromium_src-281f2d690bea2d7560323a94d9461c0ce68d0057.tar.bz2
14 files changed, 362 insertions, 385 deletions
diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc
index 13e1394..4e69a4b 100644
--- a/chrome/browser/history/history_backend.cc
+++ b/chrome/browser/history/history_backend.cc
@@ -32,7 +32,6 @@
 #include "chrome/browser/history/history_notifications.h"
 #include "chrome/browser/history/history_publisher.h"
 #include "chrome/browser/history/in_memory_history_backend.h"
-#include "chrome/browser/history/page_collector.h"
 #include "chrome/browser/history/page_usage_data.h"
 #include "chrome/browser/history/select_favicon_frames.h"
 #include "chrome/browser/history/top_sites.h"
@@ -551,9 +550,6 @@ void HistoryBackend::AddPage(const HistoryAddPageArgs& request) {
                       last_ids.second);
   }
 
-  if (page_collector_)
-    page_collector_->AddPageURL(request.url, request.time);
-
   ScheduleCommit();
 }
 
@@ -615,9 +611,6 @@ void HistoryBackend::InitImpl(const std::string& languages) {
 
   // Create the history publisher which needs to be passed on to the thumbnail
   // database for publishing history.
-  // TODO(shess): HistoryPublisher is being deprecated.  I am still
-  // trying to track down who depends on it, meanwhile talk to me
-  // before removing interactions with it.  http://crbug.com/294306
   history_publisher_.reset(new HistoryPublisher());
   if (!history_publisher_->Init()) {
     // The init may fail when there are no indexers wanting our history.
@@ -625,12 +618,6 @@ void HistoryBackend::InitImpl(const std::string& languages) {
     history_publisher_.reset();
   }
 
-  // Collects page data for history_publisher_.
-  if (history_publisher_.get()) {
-    page_collector_.reset(new PageCollector());
-    page_collector_->Init(history_publisher_.get());
-  }
-
   // Thumbnail database.
   thumbnail_db_.reset(new ThumbnailDatabase());
   if (!db_->GetNeedsThumbnailMigration()) {
@@ -683,7 +670,8 @@ void HistoryBackend::InitImpl(const std::string& languages) {
   // *sigh*, this can all be cleaned up when that migration code is removed.
   // The main DB initialization should intuitively be first (not that it
   // actually matters) and the expirer should be set last.
-  expirer_.SetDatabases(db_.get(), archived_db_.get(), thumbnail_db_.get());
+  expirer_.SetDatabases(db_.get(), archived_db_.get(),
+                        thumbnail_db_.get());
 
   // Open the long-running transaction.
   db_->BeginTransaction();
@@ -868,12 +856,6 @@ void HistoryBackend::AddPagesWithDetails(const URLRows& urls,
       }
     }
 
-    // TODO(shess): I'm not sure this case needs to exist anymore.
-    if (page_collector_) {
-      page_collector_->AddPageData(i->url(), i->last_visit(),
-                                   i->title(), string16());
-    }
-
     // Sync code manages the visits itself.
     if (visit_source != SOURCE_SYNCED) {
       // Make up a visit to correspond to the last visit to the page.
@@ -911,13 +893,11 @@ bool HistoryBackend::IsExpiredVisitTime(const base::Time& time) {
   return time < expirer_.GetCurrentArchiveTime();
 }
 
-void HistoryBackend::SetPageTitle(const GURL& url, const string16& title) {
+void HistoryBackend::SetPageTitle(const GURL& url,
+                                  const string16& title) {
   if (!db_)
     return;
 
-  if (page_collector_)
-    page_collector_->AddPageTitle(url, title);
-
   // Search for recent redirects which should get the same title. We make a
   // dummy list containing the exact URL visited if there are no redirects so
   // the processing below can be the same.
@@ -1771,10 +1751,35 @@ void HistoryBackend::DeleteFTSIndexDatabases() {
                        num_databases_deleted);
 }
 
-void HistoryBackend::SetPageContents(const GURL& url,
-                                     const string16& contents) {
-  if (page_collector_)
-    page_collector_->AddPageContents(url, contents);
+bool HistoryBackend::GetThumbnailFromOlderRedirect(
+    const GURL& page_url,
+    std::vector<unsigned char>* data) {
+  // Look at a few previous visit sessions.
+  VisitVector older_sessions;
+  URLID page_url_id = db_->GetRowForURL(page_url, NULL);
+  static const int kVisitsToSearchForThumbnail = 4;
+  db_->GetMostRecentVisitsForURL(
+      page_url_id, kVisitsToSearchForThumbnail, &older_sessions);
+
+  // Iterate across all those previous visits, and see if any of the
+  // final destinations of those redirect chains have a good thumbnail
+  // for us.
+  bool success = false;
+  for (VisitVector::const_iterator it = older_sessions.begin();
+       !success && it != older_sessions.end(); ++it) {
+    history::RedirectList redirects;
+    if (it->visit_id) {
+      GetRedirectsFromSpecificVisit(it->visit_id, &redirects);
+
+      if (!redirects.empty()) {
+        URLID url_id;
+        if ((url_id = db_->GetRowForURL(redirects.back(), NULL)))
+          success = thumbnail_db_->GetPageThumbnail(url_id, data);
+      }
+    }
+  }
+
+  return success;
 }
 
 void HistoryBackend::GetFavicons(
diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h
index 53caec3..fd92878 100644
--- a/chrome/browser/history/history_backend.h
+++ b/chrome/browser/history/history_backend.h
@@ -38,7 +38,6 @@ class AndroidProviderBackend;
 
 class CommitLaterTask;
 class HistoryPublisher;
-class PageCollector;
 class VisitFilter;
 struct DownloadRow;
 
@@ -162,11 +161,6 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
                              const GURL& url,
                              base::Time end_ts);
 
-
-  // Indexing ------------------------------------------------------------------
-
-  void SetPageContents(const GURL& url, const string16& contents);
-
   // Querying ------------------------------------------------------------------
 
   // ScheduleAutocomplete() never frees |provider| (which is globally live).
@@ -876,9 +870,6 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
   // Stores old history in a larger, slower database.
   scoped_ptr<ArchivedDatabase> archived_db_;
 
-  // Helper to collect page data for vending to history_publisher_.
-  scoped_ptr<PageCollector> page_collector_;
-
   // Manages expiration between the various databases.
   ExpireHistoryBackend expirer_;
 
diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc
index fb1e242..8bb0111 100644
--- a/chrome/browser/history/history_backend_unittest.cc
+++ b/chrome/browser/history/history_backend_unittest.cc
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <set>
 #include <vector>
+#include <fstream>
 
 #include "base/basictypes.h"
 #include "base/bind.h"
@@ -2782,6 +2783,15 @@ TEST_F(HistoryBackendTest, RemoveNotification) {
   service->DeleteURL(url);
 }
 
+// Simple function to create a new dummy file.
+void CreateDummyFile(const base::FilePath& filename) {
+  std::wofstream file;
+  file.open(filename.value().c_str());
+  ASSERT_TRUE(file.is_open());
+  file << L"Dummy";
+  file.close();
+}
+
 // Test DeleteFTSIndexDatabases deletes expected files.
 TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) {
   ASSERT_TRUE(backend_.get());
@@ -2794,12 +2804,10 @@ TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) {
   base::FilePath db2_actual(history_path.AppendASCII("Underlying DB"));
 
   // Setup dummy index database files.
-  const char* data = "Dummy";
-  const size_t data_len = 5;
-  ASSERT_TRUE(file_util::WriteFile(db1, data, data_len));
-  ASSERT_TRUE(file_util::WriteFile(db1_journal, data, data_len));
-  ASSERT_TRUE(file_util::WriteFile(db1_wal, data, data_len));
-  ASSERT_TRUE(file_util::WriteFile(db2_actual, data, data_len));
+  CreateDummyFile(db1);
+  CreateDummyFile(db1_journal);
+  CreateDummyFile(db1_wal);
+  CreateDummyFile(db2_actual);
 #if defined(OS_POSIX)
   EXPECT_TRUE(file_util::CreateSymbolicLink(db2_actual, db2_symlink));
 #endif
diff --git a/chrome/browser/history/history_publisher.h b/chrome/browser/history/history_publisher.h
index a3538b4..2f16655 100644
--- a/chrome/browser/history/history_publisher.h
+++ b/chrome/browser/history/history_publisher.h
@@ -16,10 +16,6 @@
 #include "history_indexer.h"
 #endif
 
-// TODO(shess): HistoryPublisher is being deprecated.  I am still
-// trying to track down who depends on it, meanwhile talk to me
-// before removing interactions with it.
-
 class GURL;
 
 namespace base {
diff --git a/chrome/browser/history/history_service.cc b/chrome/browser/history/history_service.cc
index f86e75f..644dc85 100644
--- a/chrome/browser/history/history_service.cc
+++ b/chrome/browser/history/history_service.cc
@@ -593,14 +593,13 @@ void HistoryService::AddPagesWithDetails(const history::URLRows& info,
                     &HistoryBackend::AddPagesWithDetails, info, visit_source);
 }
 
-void HistoryService::SetPageContents(const GURL& url,
-                                     const string16& contents) {
+HistoryService::Handle HistoryService::GetPageThumbnail(
+    const GURL& page_url,
+    CancelableRequestConsumerBase* consumer,
+    const ThumbnailDataCallback& callback) {
   DCHECK(thread_checker_.CalledOnValidThread());
-  if (!CanAddURL(url))
-    return;
-
-  ScheduleAndForget(PRIORITY_LOW, &HistoryBackend::SetPageContents,
-                    url, contents);
+  return Schedule(PRIORITY_NORMAL, &HistoryBackend::GetPageThumbnail, consumer,
+                  new history::GetPageThumbnailRequest(callback), page_url);
 }
 
 CancelableTaskTracker::TaskId HistoryService::GetFavicons(
diff --git a/chrome/browser/history/history_service.h b/chrome/browser/history/history_service.h
index 060614d..1662571 100644
--- a/chrome/browser/history/history_service.h
+++ b/chrome/browser/history/history_service.h
@@ -236,13 +236,6 @@ class HistoryService : public CancelableRequestProvider,
                              const GURL& url,
                              base::Time end_ts);
 
-  // Indexing ------------------------------------------------------------------
-
-  // Notifies history of the body text of the given recently-visited URL.
-  // If the URL was not visited "recently enough," the history system may
-  // discard it.
-  void SetPageContents(const GURL& url, const string16& contents);
-
   // Querying ------------------------------------------------------------------
 
   // Returns the information about the requested URL. If the URL is found,
diff --git a/chrome/browser/history/history_tab_helper.cc b/chrome/browser/history/history_tab_helper.cc
index 8496883..96c545e 100644
--- a/chrome/browser/history/history_tab_helper.cc
+++ b/chrome/browser/history/history_tab_helper.cc
@@ -81,16 +81,6 @@ HistoryTabHelper::CreateHistoryAddPageArgs(
   return add_page_args;
 }
 
-bool HistoryTabHelper::OnMessageReceived(const IPC::Message& message) {
-  bool handled = true;
-  IPC_BEGIN_MESSAGE_MAP(HistoryTabHelper, message)
-    IPC_MESSAGE_HANDLER(ChromeViewHostMsg_PageContents, OnPageContents)
-    IPC_MESSAGE_UNHANDLED(handled = false)
-  IPC_END_MESSAGE_MAP()
-
-  return handled;
-}
-
 void HistoryTabHelper::DidNavigateMainFrame(
     const content::LoadCommittedDetails& details,
     const content::FrameNavigateParams& params) {
@@ -154,25 +144,6 @@ void HistoryTabHelper::Observe(int type,
   }
 }
 
-void HistoryTabHelper::OnPageContents(const GURL& url,
-                                      const string16& contents) {
-  // Don't index any https pages. People generally don't want their bank
-  // accounts, etc. indexed on their computer, especially since some of these
-  // things are not marked cachable.
-  // TODO(brettw) we may want to consider more elaborate heuristics such as
-  // the cachability of the page. We may also want to consider subframes (this
-  // test will still index subframes if the subframe is SSL).
-  // TODO(zelidrag) bug chromium-os:2808 - figure out if we want to reenable
-  // content indexing for chromeos in some future releases.
-#if !defined(OS_CHROMEOS)
-  if (!url.SchemeIsSecure()) {
-    HistoryService* hs = GetHistoryService();
-    if (hs)
-      hs->SetPageContents(url, contents);
-  }
-#endif
-}
-
 HistoryService* HistoryTabHelper::GetHistoryService() {
   Profile* profile =
       Profile::FromBrowserContext(web_contents()->GetBrowserContext());
diff --git a/chrome/browser/history/history_tab_helper.h b/chrome/browser/history/history_tab_helper.h
index 65b03c3..2f613fa 100644
--- a/chrome/browser/history/history_tab_helper.h
+++ b/chrome/browser/history/history_tab_helper.h
@@ -46,7 +46,6 @@ class HistoryTabHelper : public content::WebContentsObserver,
   friend class content::WebContentsUserData<HistoryTabHelper>;
 
   // content::WebContentsObserver implementation.
-  virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
   virtual void DidNavigateMainFrame(
       const content::LoadCommittedDetails& details,
       const content::FrameNavigateParams& params) OVERRIDE;
@@ -60,7 +59,9 @@ class HistoryTabHelper : public content::WebContentsObserver,
                        const content::NotificationSource& source,
                        const content::NotificationDetails& details) OVERRIDE;
 
-  void OnPageContents(const GURL& url, const string16& contents);
+  void OnPageContents(const GURL& url,
+                      int32 page_id,
+                      const string16& contents);
 
   // Helper function to return the history service.  May return NULL.
   HistoryService* GetHistoryService();
diff --git a/chrome/browser/history/page_collector.cc b/chrome/browser/history/page_collector.cc
deleted file mode 100644
index da07ad1..0000000
--- a/chrome/browser/history/page_collector.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-// Copyright (c) 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/browser/history/page_collector.h"
-
-#include "base/bind.h"
-#include "base/message_loop/message_loop.h"
-#include "base/strings/utf_string_conversions.h"
-#include "chrome/browser/history/history_publisher.h"
-#include "url/gurl.h"
-
-namespace {
-
-// Page info older than this will be published even if we haven't
-// gotten a title and/or body.
-const int kExpirationSeconds = 20;
-
-}  // namespace
-
-namespace history {
-
-// PageCollector::PageInfo -----------------------------------------------
-
-PageCollector::PageInfo::PageInfo(base::Time visit_time)
-    : visit_time_(visit_time),
-      added_time_(base::TimeTicks::Now()) {
-}
-
-PageCollector::PageInfo::~PageInfo() {}
-
-// NOTE(shess): Per the comment on has_title() and has_body(), this
-// code maps empty strings to single space to differentiate set title
-// and body from empty.  This approach is held over from the original
-// TextDatabaseManager version.
-void PageCollector::PageInfo::set_title(const string16& ttl) {
-  if (ttl.empty())
-    title_ = ASCIIToUTF16(" ");
-  else
-    title_ = ttl;
-}
-
-void PageCollector::PageInfo::set_body(const string16& bdy) {
-  if (bdy.empty())
-    body_ = ASCIIToUTF16(" ");
-  else
-    body_ = bdy;
-}
-
-bool PageCollector::PageInfo::Expired(base::TimeTicks now) const {
-  return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds);
-}
-
-PageCollector::PageCollector()
-    : recent_changes_(RecentChangeList::NO_AUTO_EVICT),
-      weak_factory_(this) {
-}
-
-PageCollector::~PageCollector() {
-}
-
-void PageCollector::Init(const HistoryPublisher* history_publisher) {
-  history_publisher_ = history_publisher;
-}
-
-void PageCollector::AddPageURL(const GURL& url, base::Time time) {
-  // Don't collect data which cannot be published.
-  if (!history_publisher_)
-    return;
-
-  // Just save this info for later (evicting any previous data). We
-  // will delete it when it expires or when all the data is complete.
-  recent_changes_.Put(url, PageInfo(time));
-
-  // Schedule flush if not already scheduled.
-  if (!weak_factory_.HasWeakPtrs())
-    ScheduleFlushCollected();
-}
-
-void PageCollector::AddPageTitle(const GURL& url, const string16& title) {
-  if (!history_publisher_)
-    return;
-
-  // If the title comes in after the page has aged out, drop it.
-  // Older code would manufacture information from the database.
-  RecentChangeList::iterator found = recent_changes_.Peek(url);
-  if (found == recent_changes_.end())
-    return;
-
-  // Publish the info if complete.
-  if (found->second.has_body()) {
-    history_publisher_->PublishPageContent(
-        found->second.visit_time(), url, title, found->second.body());
-    recent_changes_.Erase(found);
-  } else {
-    found->second.set_title(title);
-  }
-}
-
-void PageCollector::AddPageContents(const GURL& url,
-                                    const string16& body) {
-  if (!history_publisher_)
-    return;
-
-  // If the body comes in after the page has aged out, drop it.
-  // Older code would manufacture information from the database.
-  RecentChangeList::iterator found = recent_changes_.Peek(url);
-  if (found == recent_changes_.end())
-    return;
-
-  // Publish the info if complete.
-  if (found->second.has_title()) {
-    history_publisher_->PublishPageContent(
-        found->second.visit_time(), url, found->second.title(), body);
-    recent_changes_.Erase(found);
-  } else {
-    found->second.set_body(body);
-  }
-}
-
-void PageCollector::AddPageData(const GURL& url,
-                                base::Time visit_time,
-                                const string16& title,
-                                const string16& body) {
-  if (!history_publisher_)
-    return;
-
-  // Publish the item.
-  history_publisher_->PublishPageContent(visit_time, url, title, body);
-}
-
-void PageCollector::ScheduleFlushCollected() {
-  weak_factory_.InvalidateWeakPtrs();
-  base::MessageLoop::current()->PostDelayedTask(
-      FROM_HERE,
-      base::Bind(&PageCollector::FlushCollected,
-                 weak_factory_.GetWeakPtr()),
-      base::TimeDelta::FromSeconds(kExpirationSeconds));
-}
-
-void PageCollector::FlushCollected() {
-  base::TimeTicks now = base::TimeTicks::Now();
-
-  // Iterate from oldest to newest publishing items which expire while
-  // waiting for title or body.
-  RecentChangeList::reverse_iterator iter = recent_changes_.rbegin();
-  while (iter != recent_changes_.rend() && iter->second.Expired(now)) {
-    AddPageData(iter->first, iter->second.visit_time(),
-                iter->second.title(), iter->second.body());
-    iter = recent_changes_.Erase(iter);
-  }
-
-  if (!recent_changes_.empty())
-    ScheduleFlushCollected();
-}
-
-}  // namespace history
diff --git a/chrome/browser/history/page_collector.h b/chrome/browser/history/page_collector.h
deleted file mode 100644
index c044a3b..0000000
--- a/chrome/browser/history/page_collector.h
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright (c) 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
-#define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
-
-#include "base/basictypes.h"
-#include "base/containers/mru_cache.h"
-#include "base/memory/weak_ptr.h"
-#include "base/strings/string16.h"
-#include "base/time/time.h"
-
-class GURL;
-
-namespace history {
-
-class HistoryPublisher;
-
-// Collect page data and publish to HistoryPublisher.
-class PageCollector {
- public:
-  // You must call Init() to complete initialization.
-  PageCollector();
-  ~PageCollector();
-
-  // Must call before using other functions.
-  void Init(const HistoryPublisher* history_publisher);
-
-  // Sets specific information for the given page to be published.
-  // In normal operation, URLs will be added as the user visits them, the titles
-  // and bodies will come in some time after that. These changes will be
-  // automatically coalesced and added to the database some time in the future
-  // using AddPageData().
-  //
-  // AddPageURL must be called for a given URL before either the title
-  // or body set. The visit time should be the time corresponding to
-  // that visit in the history database.
-  void AddPageURL(const GURL& url, base::Time visit_time);
-  void AddPageTitle(const GURL& url, const string16& title);
-  void AddPageContents(const GURL& url, const string16& body);
-
-  void AddPageData(const GURL& url,
-                   base::Time visit_time,
-                   const string16& title,
-                   const string16& body);
-
- private:
-  // Stores "recent stuff" that has happened with the page, since the page
-  // visit, title, and body all come in at different times.
-  class PageInfo {
-   public:
-    explicit PageInfo(base::Time visit_time);
-    ~PageInfo();
-
-    // Getters.
-    base::Time visit_time() const { return visit_time_; }
-    const string16& title() const { return title_; }
-    const string16& body() const { return body_; }
-
-    // Setters, we can only update the title and body.
-    void set_title(const string16& ttl);
-    void set_body(const string16& bdy);
-
-    // Returns true if both the title or body of the entry has been set. Since
-    // both the title and body setters will "fix" empty strings to be a space,
-    // these indicate if the setter was ever called.
-    bool has_title() const { return !title_.empty(); }
-    bool has_body() const { return !body_.empty(); }
-
-    // Returns true if this entry was added too long ago and we should give up
-    // waiting for more data. The current time is passed in as an argument so we
-    // can check many without re-querying the timer.
-    bool Expired(base::TimeTicks now) const;
-
-   private:
-    // Time of the visit of the URL. This will be the value stored in the URL
-    // and visit tables for the entry.
-    base::Time visit_time_;
-
-    // When this page entry was created. We have a cap on the maximum time that
-    // an entry will be in the queue before being flushed to the database.
-    base::TimeTicks added_time_;
-
-    // Will be the string " " when they are set to distinguish set and unset.
-    string16 title_;
-    string16 body_;
-  };
-
-  // Collected data is published when both the title and body are
-  // present.  https data is never passed to AddPageContents(), so
-  // periodically collected data is published without the contents.
-  // Pages which take a long time to load will not have their bodies
-  // published.
-  void ScheduleFlushCollected();
-  void FlushCollected();
-
-  // Lists recent additions that we have not yet filled out with the title and
-  // body. Sorted by time, we will flush them when they are complete or have
-  // been in the queue too long without modification.
-  //
-  // We kind of abuse the MRUCache because we never move things around in it
-  // using Get. Instead, we keep them in the order they were inserted, since
-  // this is the metric we use to measure age. The MRUCache gives us an ordered
-  // list with fast lookup by URL.
-  typedef base::MRUCache<GURL, PageInfo> RecentChangeList;
-  RecentChangeList recent_changes_;
-
-  // Generates tasks for our periodic checking of expired "recent changes".
-  base::WeakPtrFactory<PageCollector> weak_factory_;
-
-  // This object is created and managed by the history backend. We maintain an
-  // opaque pointer to the object for our use.
-  // This can be NULL if there are no indexers registered to receive indexing
-  // data from us.
-  const HistoryPublisher* history_publisher_;
-
-  DISALLOW_COPY_AND_ASSIGN(PageCollector);
-};
-
-}  // namespace history
-
-#endif  // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
diff --git a/chrome/browser/history/text_database_unittest.cc b/chrome/browser/history/text_database_unittest.cc
new file mode 100644
index 0000000..25b7d79
--- /dev/null
+++ b/chrome/browser/history/text_database_unittest.cc
@@ -0,0 +1,306 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+
+#include "base/files/scoped_temp_dir.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversions.h"
+#include "chrome/browser/history/text_database.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/platform_test.h"
+
+using base::Time;
+
+namespace history {
+
+namespace {
+
+// Note that all pages have "COUNTTAG" which allows us to count the number of
+// pages in the database withoujt adding any extra functions to the DB object.
+const char kURL1[] = "http://www.google.com/";
+const int kTime1 = 1000;
+const char kTitle1[] = "Google";
+const char kBody1[] =
+    "COUNTTAG Web Images Maps News Shopping Gmail more My Account | "
+    "Sign out Advanced Search Preferences Language Tools Advertising Programs "
+    "- Business Solutions - About Google, 2008 Google";
+
+const char kURL2[] = "http://images.google.com/";
+const int kTime2 = 2000;
+const char kTitle2[] = "Google Image Search";
+const char kBody2[] =
+    "COUNTTAG Web Images Maps News Shopping Gmail more My Account | "
+    "Sign out Advanced Image Search Preferences The most comprehensive image "
+    "search on the web. Want to help improve Google Image Search? Try Google "
+    "Image Labeler. Advertising Programs - Business Solutions - About Google "
+    "2008 Google";
+
+const char kURL3[] = "http://slashdot.org/";
+const int kTime3 = 3000;
+const char kTitle3[] = "Slashdot: News for nerds, stuff that matters";
+const char kBody3[] =
+    "COUNTTAG Slashdot Log In Create Account Subscribe Firehose Why "
+    "Log In? Why Subscribe? Nickname Password Public Terminal Sections "
+    "Main Apple AskSlashdot Backslash Books Developers Games Hardware "
+    "Interviews IT Linux Mobile Politics Science YRO";
+
+// Returns the number of rows currently in the database.
+int RowCount(TextDatabase* db) {
+  QueryOptions options;
+  options.begin_time = Time::FromInternalValue(0);
+  // Leave end_time at now.
+
+  std::vector<TextDatabase::Match> results;
+  TextDatabase::URLSet unique_urls;
+  db->GetTextMatches("COUNTTAG", options, &results, &unique_urls);
+  return static_cast<int>(results.size());
+}
+
+// Adds each of the test pages to the database.
+void AddAllTestData(TextDatabase* db) {
+  EXPECT_TRUE(db->AddPageData(
+      Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1));
+  EXPECT_TRUE(db->AddPageData(
+      Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2));
+  EXPECT_TRUE(db->AddPageData(
+      Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3));
+  EXPECT_EQ(3, RowCount(db));
+}
+
+bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results,
+                    const char* url) {
+  GURL gurl(url);
+  for (size_t i = 0; i < results.size(); i++) {
+    if (results[i].url == gurl)
+      return true;
+  }
+  return false;
+}
+
+}  // namespace
+
+class TextDatabaseTest : public PlatformTest {
+ public:
+  TextDatabaseTest() {}
+
+ protected:
+  virtual void SetUp() {
+    PlatformTest::SetUp();
+    ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
+  }
+
+  // Create databases with this function, which will ensure that the files are
+  // deleted on shutdown. Only open one database for each file. Returns NULL on
+  // failure.
+  //
+  // Set |delete_file| to delete any existing file. If we are trying to create
+  // the file for the first time, we don't want a previous test left in a
+  // weird state to have left a file that would affect us.
+  TextDatabase* CreateDB(TextDatabase::DBIdent id,
+                         bool allow_create,
+                         bool delete_file) {
+    TextDatabase* db = new TextDatabase(temp_dir_.path(), id, allow_create);
+
+    if (delete_file)
+      sql::Connection::Delete(db->file_name());
+
+    if (!db->Init()) {
+      delete db;
+      return NULL;
+    }
+    return db;
+  }
+
+  // Directory containing the databases.
+  base::ScopedTempDir temp_dir_;
+
+  // Name of the main database file.
+  base::FilePath file_name_;
+};
+
+TEST_F(TextDatabaseTest, AttachDetach) {
+  // First database with one page.
+  const int kIdee1 = 200801;
+  scoped_ptr<TextDatabase> db1(CreateDB(kIdee1, true, true));
+  ASSERT_TRUE(!!db1.get());
+  EXPECT_TRUE(db1->AddPageData(
+      Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1));
+
+  // Second database with one page.
+  const int kIdee2 = 200802;
+  scoped_ptr<TextDatabase> db2(CreateDB(kIdee2, true, true));
+  ASSERT_TRUE(!!db2.get());
+  EXPECT_TRUE(db2->AddPageData(
+      Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2));
+
+  // Detach, then reattach database one. The file should exist, so we force
+  // opening an existing file.
+  db1.reset();
+  db1.reset(CreateDB(kIdee1, false, false));
+  ASSERT_TRUE(!!db1.get());
+
+  // We should not be able to attach this random database for which no file
+  // exists.
+  const int kIdeeNoExisto = 999999999;
+  scoped_ptr<TextDatabase> db3(CreateDB(kIdeeNoExisto, false, true));
+  EXPECT_FALSE(!!db3.get());
+}
+
+TEST_F(TextDatabaseTest, AddRemove) {
+  // Create a database and add some pages to it.
+  const int kIdee1 = 200801;
+  scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
+  ASSERT_TRUE(!!db.get());
+  URLID id1 = db->AddPageData(
+      Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1);
+  EXPECT_NE(0, id1);
+  URLID id2 = db->AddPageData(
+      Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2);
+  EXPECT_NE(0, id2);
+  URLID id3 = db->AddPageData(
+      Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3);
+  EXPECT_NE(0, id3);
+  EXPECT_EQ(3, RowCount(db.get()));
+
+  // Make sure we can delete some of the data.
+  db->DeletePageData(Time::FromInternalValue(kTime1), kURL1);
+  EXPECT_EQ(2, RowCount(db.get()));
+
+  // Close and reopen.
+  db.reset(new TextDatabase(temp_dir_.path(), kIdee1, false));
+  EXPECT_TRUE(db->Init());
+
+  // Verify that the deleted ID is gone and try to delete another one.
+  EXPECT_EQ(2, RowCount(db.get()));
+  db->DeletePageData(Time::FromInternalValue(kTime2), kURL2);
+  EXPECT_EQ(1, RowCount(db.get()));
+}
+
+TEST_F(TextDatabaseTest, Query) {
+  // Make a database with some pages.
+  const int kIdee1 = 200801;
+  scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
+  EXPECT_TRUE(!!db.get());
+  AddAllTestData(db.get());
+
+  // Get all the results.
+  QueryOptions options;
+  options.begin_time = Time::FromInternalValue(0);
+
+  std::vector<TextDatabase::Match> results;
+  TextDatabase::URLSet unique_urls;
+  db->GetTextMatches("COUNTTAG", options, &results, &unique_urls);
+  EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
+
+  // All 3 sites should be returned in order.
+  ASSERT_EQ(3U, results.size());
+  EXPECT_EQ(GURL(kURL1), results[2].url);
+  EXPECT_EQ(GURL(kURL2), results[1].url);
+  EXPECT_EQ(GURL(kURL3), results[0].url);
+
+  // Verify the info on those results.
+  EXPECT_TRUE(Time::FromInternalValue(kTime1) == results[2].time);
+  EXPECT_TRUE(Time::FromInternalValue(kTime2) == results[1].time);
+  EXPECT_TRUE(Time::FromInternalValue(kTime3) == results[0].time);
+
+  EXPECT_EQ(std::string(kTitle1), UTF16ToUTF8(results[2].title));
+  EXPECT_EQ(std::string(kTitle2), UTF16ToUTF8(results[1].title));
+  EXPECT_EQ(std::string(kTitle3), UTF16ToUTF8(results[0].title));
+
+  // Should have no matches in the title.
+  EXPECT_EQ(0U, results[0].title_match_positions.size());
+  EXPECT_EQ(0U, results[1].title_match_positions.size());
+  EXPECT_EQ(0U, results[2].title_match_positions.size());
+
+  // We don't want to be dependent on the exact snippet algorithm, but we know
+  // since we searched for "COUNTTAG" which occurs at the beginning of each
+  // document, that each snippet should start with that.
+  EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[0].snippet.text()),
+                              "COUNTTAG", false));
+  EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[1].snippet.text()),
+                              "COUNTTAG", false));
+  EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[2].snippet.text()),
+                              "COUNTTAG", false));
+}
+
+TEST_F(TextDatabaseTest, TimeRange) {
+  // Make a database with some pages.
+  const int kIdee1 = 200801;
+  scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
+  ASSERT_TRUE(!!db.get());
+  AddAllTestData(db.get());
+
+  // Beginning should be inclusive, and the ending exclusive.
+  // Get all the results.
+  QueryOptions options;
+  options.begin_time = Time::FromInternalValue(kTime1);
+  options.end_time = Time::FromInternalValue(kTime3);
+
+  std::vector<TextDatabase::Match> results;
+  TextDatabase::URLSet unique_urls;
+  bool has_more_results = db->GetTextMatches(
+      "COUNTTAG", options, &results,  &unique_urls);
+  EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
+
+  // The first and second should have been returned.
+  EXPECT_EQ(2U, results.size());
+  EXPECT_TRUE(ResultsHaveURL(results, kURL1));
+  EXPECT_TRUE(ResultsHaveURL(results, kURL2));
+  EXPECT_FALSE(ResultsHaveURL(results, kURL3));
+  EXPECT_EQ(kTime1, results.back().time.ToInternalValue());
+  EXPECT_FALSE(has_more_results);
+
+  // Do a query where there isn't a result on the begin boundary.
+  options.begin_time = Time::FromInternalValue((kTime2 - kTime1) / 2 + kTime1);
+  options.end_time = Time::FromInternalValue(kTime3 + 1);
+  results.clear();  // GetTextMatches does *not* clear the results.
+  has_more_results = db->GetTextMatches(
+      "COUNTTAG", options, &results, &unique_urls);
+  EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
+  EXPECT_FALSE(has_more_results);
+
+  // Should have two results, the second and third.
+  EXPECT_EQ(2U, results.size());
+  EXPECT_FALSE(ResultsHaveURL(results, kURL1));
+  EXPECT_TRUE(ResultsHaveURL(results, kURL2));
+  EXPECT_TRUE(ResultsHaveURL(results, kURL3));
+
+  // Try a range that has no results.
+  options.begin_time = Time::FromInternalValue(kTime3 + 1);
+  options.end_time = Time::FromInternalValue(kTime3 * 100);
+  results.clear();
+  has_more_results = db->GetTextMatches(
+      "COUNTTAG", options, &results, &unique_urls);
+  EXPECT_FALSE(has_more_results);
+}
+
+// Make sure that max_count works.
+TEST_F(TextDatabaseTest, MaxCount) {
+  // Make a database with some pages.
+  const int kIdee1 = 200801;
+  scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
+  ASSERT_TRUE(!!db.get());
+  AddAllTestData(db.get());
+
+  // Set up the query to return all the results with "Google" (should be 2), but
+  // with a maximum of 1.
+  QueryOptions options;
+  options.begin_time = Time::FromInternalValue(kTime1);
+  options.end_time = Time::FromInternalValue(kTime3 + 1);
+  options.max_count = 1;
+
+  std::vector<TextDatabase::Match> results;
+  TextDatabase::URLSet unique_urls;
+  db->GetTextMatches("google", options, &results, &unique_urls);
+  EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
+
+  // There should be one result, the most recent one.
+  EXPECT_EQ(1U, results.size());
+  EXPECT_TRUE(ResultsHaveURL(results, kURL2));
+  EXPECT_EQ(kTime2, results.back().time.ToInternalValue());
+}
+
+}  // namespace history
diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi
index c45ad0a..4a1deca 100644
--- a/chrome/chrome_browser.gypi
+++ b/chrome/chrome_browser.gypi
@@ -770,8 +770,6 @@
         'browser/history/in_memory_url_index_types.h',
         'browser/history/most_visited_tiles_experiment.cc',
         'browser/history/most_visited_tiles_experiment.h',
-        'browser/history/page_collector.cc',
-        'browser/history/page_collector.h',
         'browser/history/page_usage_data.cc',
         'browser/history/page_usage_data.h',
         'browser/history/query_parser.cc',
diff --git a/chrome/common/render_messages.h b/chrome/common/render_messages.h
index aa38bbc..e75e566 100644
--- a/chrome/common/render_messages.h
+++ b/chrome/common/render_messages.h
@@ -417,11 +417,6 @@ IPC_MESSAGE_ROUTED1(ChromeViewMsg_NetErrorInfo,
 // Misc messages
 // These are messages sent from the renderer to the browser process.
 
-// Provides the contents for the given page that was loaded recently.
-IPC_MESSAGE_ROUTED2(ChromeViewHostMsg_PageContents,
-                    GURL         /* URL of the page */,
-                    string16     /* page contents */)
-
 // Notification that the language for the tab has been determined.
 IPC_MESSAGE_ROUTED2(ChromeViewHostMsg_TranslateLanguageDetermined,
                     LanguageDetectionDetails /* details about lang detection */,
diff --git a/chrome/renderer/chrome_render_view_observer.cc b/chrome/renderer/chrome_render_view_observer.cc
index 70f6ccf..81a7cbf 100644
--- a/chrome/renderer/chrome_render_view_observer.cc
+++ b/chrome/renderer/chrome_render_view_observer.cc
@@ -800,12 +800,6 @@ void ChromeRenderViewObserver::CapturePageInfo(int page_id,
 
   TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo");
 
-  if (contents.size()) {
-    // Send the text to the browser for indexing (the browser might decide not
-    // to index, if the URL is HTTPS for instance).
-    Send(new ChromeViewHostMsg_PageContents(routing_id(), url, contents));
-  }
-
 #if defined(FULL_SAFE_BROWSING)
   // Will swap out the string.
   if (phishing_classifier_)
author	shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-09-23 19:22:32 +0000
committer	shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-09-23 19:22:32 +0000
commit	281f2d690bea2d7560323a94d9461c0ce68d0057 (patch)
tree	072f1925f22e5905aa04f26c074ff06c760e8f3f
parent	eecf8e2088e3adc5e6c65db2b8d72365c3a369a8 (diff)
download	chromium_src-281f2d690bea2d7560323a94d9461c0ce68d0057.zip chromium_src-281f2d690bea2d7560323a94d9461c0ce68d0057.tar.gz chromium_src-281f2d690bea2d7560323a94d9461c0ce68d0057.tar.bz2