TextDatabase contained an unexpected bit of needed functionality.

https://chromiumcodereview.appspot.com/16951015 removed TextDatabase from history service. TextDatabase was calling a HistoryPublisher routine which was still in use. BUG=294306 R=cevans@chromium.org, rmcilroy@chromium.org, sky@chromium.org Review URL: https://codereview.chromium.org/23437047 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@224441 0039d316-1c4b-4281-b951-d872f2087c98
author: shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-09-20 18:42:50 +0000
committer: shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-09-20 18:42:50 +0000
commit: 33abbda30215a6521987a221b8991eff32ad3284 (patch)
tree: 519464726ef776be57727f98d42666636cd958fa /chrome/browser/history
parent: 560cfc359cbaad9af3100542fe5b83fefe78f83f (diff)
download: chromium_src-33abbda30215a6521987a221b8991eff32ad3284.zip
chromium_src-33abbda30215a6521987a221b8991eff32ad3284.tar.gz
chromium_src-33abbda30215a6521987a221b8991eff32ad3284.tar.bz2
11 files changed, 377 insertions, 327 deletions
diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc
index 3913d1d..07f18ed 100644
--- a/chrome/browser/history/history_backend.cc
+++ b/chrome/browser/history/history_backend.cc
@@ -32,6 +32,7 @@
 #include "chrome/browser/history/history_notifications.h"
 #include "chrome/browser/history/history_publisher.h"
 #include "chrome/browser/history/in_memory_history_backend.h"
+#include "chrome/browser/history/page_collector.h"
 #include "chrome/browser/history/page_usage_data.h"
 #include "chrome/browser/history/select_favicon_frames.h"
 #include "chrome/browser/history/top_sites.h"
@@ -550,6 +551,9 @@ void HistoryBackend::AddPage(const HistoryAddPageArgs& request) {
                       last_ids.second);
   }
 
+  if (page_collector_)
+    page_collector_->AddPageURL(request.url, request.time);
+
   ScheduleCommit();
 }
 
@@ -611,6 +615,9 @@ void HistoryBackend::InitImpl(const std::string& languages) {
 
   // Create the history publisher which needs to be passed on to the thumbnail
   // database for publishing history.
+  // TODO(shess): HistoryPublisher is being deprecated.  I am still
+  // trying to track down who depends on it, meanwhile talk to me
+  // before removing interactions with it.  http://crbug.com/294306
   history_publisher_.reset(new HistoryPublisher());
   if (!history_publisher_->Init()) {
     // The init may fail when there are no indexers wanting our history.
@@ -618,6 +625,12 @@ void HistoryBackend::InitImpl(const std::string& languages) {
     history_publisher_.reset();
   }
 
+  // Collects page data for history_publisher_.
+  if (history_publisher_.get()) {
+    page_collector_.reset(new PageCollector());
+    page_collector_->Init(history_publisher_.get());
+  }
+
   // Thumbnail database.
   // TODO(shess): "thumbnail database" these days only stores
   // favicons.  Thumbnails are stored in "top sites".  Consider
@@ -665,8 +678,7 @@ void HistoryBackend::InitImpl(const std::string& languages) {
   // *sigh*, this can all be cleaned up when that migration code is removed.
   // The main DB initialization should intuitively be first (not that it
   // actually matters) and the expirer should be set last.
-  expirer_.SetDatabases(db_.get(), archived_db_.get(),
-                        thumbnail_db_.get());
+  expirer_.SetDatabases(db_.get(), archived_db_.get(), thumbnail_db_.get());
 
   // Open the long-running transaction.
   db_->BeginTransaction();
@@ -853,6 +865,12 @@ void HistoryBackend::AddPagesWithDetails(const URLRows& urls,
       }
     }
 
+    // TODO(shess): I'm not sure this case needs to exist anymore.
+    if (page_collector_) {
+      page_collector_->AddPageData(i->url(), i->last_visit(),
+                                   i->title(), string16());
+    }
+
     // Sync code manages the visits itself.
     if (visit_source != SOURCE_SYNCED) {
       // Make up a visit to correspond to the last visit to the page.
@@ -890,11 +908,13 @@ bool HistoryBackend::IsExpiredVisitTime(const base::Time& time) {
   return time < expirer_.GetCurrentArchiveTime();
 }
 
-void HistoryBackend::SetPageTitle(const GURL& url,
-                                  const string16& title) {
+void HistoryBackend::SetPageTitle(const GURL& url, const string16& title) {
   if (!db_)
     return;
 
+  if (page_collector_)
+    page_collector_->AddPageTitle(url, title);
+
   // Search for recent redirects which should get the same title. We make a
   // dummy list containing the exact URL visited if there are no redirects so
   // the processing below can be the same.
@@ -1665,6 +1685,12 @@ void HistoryBackend::DeleteFTSIndexDatabases() {
                        num_databases_deleted);
 }
 
+void HistoryBackend::SetPageContents(const GURL& url,
+                                     const string16& contents) {
+  if (page_collector_)
+    page_collector_->AddPageContents(url, contents);
+}
+
 void HistoryBackend::GetFavicons(
     const std::vector<GURL>& icon_urls,
     int icon_types,
diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h
index de64e5c..f3bbf4d 100644
--- a/chrome/browser/history/history_backend.h
+++ b/chrome/browser/history/history_backend.h
@@ -38,6 +38,7 @@ class AndroidProviderBackend;
 
 class CommitLaterTask;
 class HistoryPublisher;
+class PageCollector;
 class VisitFilter;
 struct DownloadRow;
 
@@ -158,6 +159,11 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
                              const GURL& url,
                              base::Time end_ts);
 
+
+  // Indexing ------------------------------------------------------------------
+
+  void SetPageContents(const GURL& url, const string16& contents);
+
   // Querying ------------------------------------------------------------------
 
   // ScheduleAutocomplete() never frees |provider| (which is globally live).
@@ -833,6 +839,9 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
   // Stores old history in a larger, slower database.
   scoped_ptr<ArchivedDatabase> archived_db_;
 
+  // Helper to collect page data for vending to history_publisher_.
+  scoped_ptr<PageCollector> page_collector_;
+
   // Manages expiration between the various databases.
   ExpireHistoryBackend expirer_;
 
diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc
index a666cf1..04af2b4 100644
--- a/chrome/browser/history/history_backend_unittest.cc
+++ b/chrome/browser/history/history_backend_unittest.cc
@@ -7,7 +7,6 @@
 #include <algorithm>
 #include <set>
 #include <vector>
-#include <fstream>
 
 #include "base/basictypes.h"
 #include "base/bind.h"
@@ -2745,15 +2744,6 @@ TEST_F(HistoryBackendTest, RemoveNotification) {
   service->DeleteURL(url);
 }
 
-// Simple function to create a new dummy file.
-void CreateDummyFile(const base::FilePath& filename) {
-  std::wofstream file;
-  file.open(filename.value().c_str());
-  ASSERT_TRUE(file.is_open());
-  file << L"Dummy";
-  file.close();
-}
-
 // Test DeleteFTSIndexDatabases deletes expected files.
 TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) {
   ASSERT_TRUE(backend_.get());
@@ -2766,10 +2756,12 @@ TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) {
   base::FilePath db2_actual(history_path.AppendASCII("Underlying DB"));
 
   // Setup dummy index database files.
-  CreateDummyFile(db1);
-  CreateDummyFile(db1_journal);
-  CreateDummyFile(db1_wal);
-  CreateDummyFile(db2_actual);
+  const char* data = "Dummy";
+  const size_t data_len = 5;
+  ASSERT_TRUE(file_util::WriteFile(db1, data, data_len));
+  ASSERT_TRUE(file_util::WriteFile(db1_journal, data, data_len));
+  ASSERT_TRUE(file_util::WriteFile(db1_wal, data, data_len));
+  ASSERT_TRUE(file_util::WriteFile(db2_actual, data, data_len));
 #if defined(OS_POSIX)
   EXPECT_TRUE(file_util::CreateSymbolicLink(db2_actual, db2_symlink));
 #endif
diff --git a/chrome/browser/history/history_publisher.h b/chrome/browser/history/history_publisher.h
index 2f16655..a3538b4 100644
--- a/chrome/browser/history/history_publisher.h
+++ b/chrome/browser/history/history_publisher.h
@@ -16,6 +16,10 @@
 #include "history_indexer.h"
 #endif
 
+// TODO(shess): HistoryPublisher is being deprecated.  I am still
+// trying to track down who depends on it, meanwhile talk to me
+// before removing interactions with it.
+
 class GURL;
 
 namespace base {
diff --git a/chrome/browser/history/history_service.cc b/chrome/browser/history/history_service.cc
index dc28c42..aee38c8 100644
--- a/chrome/browser/history/history_service.cc
+++ b/chrome/browser/history/history_service.cc
@@ -584,6 +584,16 @@ void HistoryService::AddPagesWithDetails(const history::URLRows& info,
                     &HistoryBackend::AddPagesWithDetails, info, visit_source);
 }
 
+void HistoryService::SetPageContents(const GURL& url,
+                                     const string16& contents) {
+  DCHECK(thread_checker_.CalledOnValidThread());
+  if (!CanAddURL(url))
+    return;
+
+  ScheduleAndForget(PRIORITY_LOW, &HistoryBackend::SetPageContents,
+                    url, contents);
+}
+
 CancelableTaskTracker::TaskId HistoryService::GetFavicons(
     const std::vector<GURL>& icon_urls,
     int icon_types,
diff --git a/chrome/browser/history/history_service.h b/chrome/browser/history/history_service.h
index f8d383b..6891f38 100644
--- a/chrome/browser/history/history_service.h
+++ b/chrome/browser/history/history_service.h
@@ -236,6 +236,13 @@ class HistoryService : public CancelableRequestProvider,
                              const GURL& url,
                              base::Time end_ts);
 
+  // Indexing ------------------------------------------------------------------
+
+  // Notifies history of the body text of the given recently-visited URL.
+  // If the URL was not visited "recently enough," the history system may
+  // discard it.
+  void SetPageContents(const GURL& url, const string16& contents);
+
   // Querying ------------------------------------------------------------------
 
   // Returns the information about the requested URL. If the URL is found,
diff --git a/chrome/browser/history/history_tab_helper.cc b/chrome/browser/history/history_tab_helper.cc
index adbd892..4fc97c6 100644
--- a/chrome/browser/history/history_tab_helper.cc
+++ b/chrome/browser/history/history_tab_helper.cc
@@ -84,6 +84,16 @@ HistoryTabHelper::CreateHistoryAddPageArgs(
   return add_page_args;
 }
 
+bool HistoryTabHelper::OnMessageReceived(const IPC::Message& message) {
+  bool handled = true;
+  IPC_BEGIN_MESSAGE_MAP(HistoryTabHelper, message)
+    IPC_MESSAGE_HANDLER(ChromeViewHostMsg_PageContents, OnPageContents)
+    IPC_MESSAGE_UNHANDLED(handled = false)
+  IPC_END_MESSAGE_MAP()
+
+  return handled;
+}
+
 void HistoryTabHelper::DidNavigateMainFrame(
     const content::LoadCommittedDetails& details,
     const content::FrameNavigateParams& params) {
@@ -155,6 +165,25 @@ void HistoryTabHelper::Observe(int type,
   }
 }
 
+void HistoryTabHelper::OnPageContents(const GURL& url,
+                                      const string16& contents) {
+  // Don't index any https pages. People generally don't want their bank
+  // accounts, etc. indexed on their computer, especially since some of these
+  // things are not marked cachable.
+  // TODO(brettw) we may want to consider more elaborate heuristics such as
+  // the cachability of the page. We may also want to consider subframes (this
+  // test will still index subframes if the subframe is SSL).
+  // TODO(zelidrag) bug chromium-os:2808 - figure out if we want to reenable
+  // content indexing for chromeos in some future releases.
+#if !defined(OS_CHROMEOS)
+  if (!url.SchemeIsSecure()) {
+    HistoryService* hs = GetHistoryService();
+    if (hs)
+      hs->SetPageContents(url, contents);
+  }
+#endif
+}
+
 HistoryService* HistoryTabHelper::GetHistoryService() {
   Profile* profile =
       Profile::FromBrowserContext(web_contents()->GetBrowserContext());
diff --git a/chrome/browser/history/history_tab_helper.h b/chrome/browser/history/history_tab_helper.h
index 2f613fa..65b03c3 100644
--- a/chrome/browser/history/history_tab_helper.h
+++ b/chrome/browser/history/history_tab_helper.h
@@ -46,6 +46,7 @@ class HistoryTabHelper : public content::WebContentsObserver,
   friend class content::WebContentsUserData<HistoryTabHelper>;
 
   // content::WebContentsObserver implementation.
+  virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
   virtual void DidNavigateMainFrame(
       const content::LoadCommittedDetails& details,
       const content::FrameNavigateParams& params) OVERRIDE;
@@ -59,9 +60,7 @@ class HistoryTabHelper : public content::WebContentsObserver,
                        const content::NotificationSource& source,
                        const content::NotificationDetails& details) OVERRIDE;
 
-  void OnPageContents(const GURL& url,
-                      int32 page_id,
-                      const string16& contents);
+  void OnPageContents(const GURL& url, const string16& contents);
 
   // Helper function to return the history service.  May return NULL.
   HistoryService* GetHistoryService();
diff --git a/chrome/browser/history/page_collector.cc b/chrome/browser/history/page_collector.cc
new file mode 100644
index 0000000..da07ad1
--- /dev/null
+++ b/chrome/browser/history/page_collector.cc
@@ -0,0 +1,157 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/page_collector.h"
+
+#include "base/bind.h"
+#include "base/message_loop/message_loop.h"
+#include "base/strings/utf_string_conversions.h"
+#include "chrome/browser/history/history_publisher.h"
+#include "url/gurl.h"
+
+namespace {
+
+// Page info older than this will be published even if we haven't
+// gotten a title and/or body.
+const int kExpirationSeconds = 20;
+
+}  // namespace
+
+namespace history {
+
+// PageCollector::PageInfo -----------------------------------------------
+
+PageCollector::PageInfo::PageInfo(base::Time visit_time)
+    : visit_time_(visit_time),
+      added_time_(base::TimeTicks::Now()) {
+}
+
+PageCollector::PageInfo::~PageInfo() {}
+
+// NOTE(shess): Per the comment on has_title() and has_body(), this
+// code maps empty strings to single space to differentiate set title
+// and body from empty.  This approach is held over from the original
+// TextDatabaseManager version.
+void PageCollector::PageInfo::set_title(const string16& ttl) {
+  if (ttl.empty())
+    title_ = ASCIIToUTF16(" ");
+  else
+    title_ = ttl;
+}
+
+void PageCollector::PageInfo::set_body(const string16& bdy) {
+  if (bdy.empty())
+    body_ = ASCIIToUTF16(" ");
+  else
+    body_ = bdy;
+}
+
+bool PageCollector::PageInfo::Expired(base::TimeTicks now) const {
+  return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds);
+}
+
+PageCollector::PageCollector()
+    : recent_changes_(RecentChangeList::NO_AUTO_EVICT),
+      weak_factory_(this) {
+}
+
+PageCollector::~PageCollector() {
+}
+
+void PageCollector::Init(const HistoryPublisher* history_publisher) {
+  history_publisher_ = history_publisher;
+}
+
+void PageCollector::AddPageURL(const GURL& url, base::Time time) {
+  // Don't collect data which cannot be published.
+  if (!history_publisher_)
+    return;
+
+  // Just save this info for later (evicting any previous data). We
+  // will delete it when it expires or when all the data is complete.
+  recent_changes_.Put(url, PageInfo(time));
+
+  // Schedule flush if not already scheduled.
+  if (!weak_factory_.HasWeakPtrs())
+    ScheduleFlushCollected();
+}
+
+void PageCollector::AddPageTitle(const GURL& url, const string16& title) {
+  if (!history_publisher_)
+    return;
+
+  // If the title comes in after the page has aged out, drop it.
+  // Older code would manufacture information from the database.
+  RecentChangeList::iterator found = recent_changes_.Peek(url);
+  if (found == recent_changes_.end())
+    return;
+
+  // Publish the info if complete.
+  if (found->second.has_body()) {
+    history_publisher_->PublishPageContent(
+        found->second.visit_time(), url, title, found->second.body());
+    recent_changes_.Erase(found);
+  } else {
+    found->second.set_title(title);
+  }
+}
+
+void PageCollector::AddPageContents(const GURL& url,
+                                    const string16& body) {
+  if (!history_publisher_)
+    return;
+
+  // If the body comes in after the page has aged out, drop it.
+  // Older code would manufacture information from the database.
+  RecentChangeList::iterator found = recent_changes_.Peek(url);
+  if (found == recent_changes_.end())
+    return;
+
+  // Publish the info if complete.
+  if (found->second.has_title()) {
+    history_publisher_->PublishPageContent(
+        found->second.visit_time(), url, found->second.title(), body);
+    recent_changes_.Erase(found);
+  } else {
+    found->second.set_body(body);
+  }
+}
+
+void PageCollector::AddPageData(const GURL& url,
+                                base::Time visit_time,
+                                const string16& title,
+                                const string16& body) {
+  if (!history_publisher_)
+    return;
+
+  // Publish the item.
+  history_publisher_->PublishPageContent(visit_time, url, title, body);
+}
+
+void PageCollector::ScheduleFlushCollected() {
+  weak_factory_.InvalidateWeakPtrs();
+  base::MessageLoop::current()->PostDelayedTask(
+      FROM_HERE,
+      base::Bind(&PageCollector::FlushCollected,
+                 weak_factory_.GetWeakPtr()),
+      base::TimeDelta::FromSeconds(kExpirationSeconds));
+}
+
+void PageCollector::FlushCollected() {
+  base::TimeTicks now = base::TimeTicks::Now();
+
+  // Iterate from oldest to newest publishing items which expire while
+  // waiting for title or body.
+  RecentChangeList::reverse_iterator iter = recent_changes_.rbegin();
+  while (iter != recent_changes_.rend() && iter->second.Expired(now)) {
+    AddPageData(iter->first, iter->second.visit_time(),
+                iter->second.title(), iter->second.body());
+    iter = recent_changes_.Erase(iter);
+  }
+
+  if (!recent_changes_.empty())
+    ScheduleFlushCollected();
+}
+
+}  // namespace history
diff --git a/chrome/browser/history/page_collector.h b/chrome/browser/history/page_collector.h
new file mode 100644
index 0000000..c044a3b
--- /dev/null
+++ b/chrome/browser/history/page_collector.h
@@ -0,0 +1,123 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
+#define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
+
+#include "base/basictypes.h"
+#include "base/containers/mru_cache.h"
+#include "base/memory/weak_ptr.h"
+#include "base/strings/string16.h"
+#include "base/time/time.h"
+
+class GURL;
+
+namespace history {
+
+class HistoryPublisher;
+
+// Collect page data and publish to HistoryPublisher.
+class PageCollector {
+ public:
+  // You must call Init() to complete initialization.
+  PageCollector();
+  ~PageCollector();
+
+  // Must call before using other functions.
+  void Init(const HistoryPublisher* history_publisher);
+
+  // Sets specific information for the given page to be published.
+  // In normal operation, URLs will be added as the user visits them, the titles
+  // and bodies will come in some time after that. These changes will be
+  // automatically coalesced and added to the database some time in the future
+  // using AddPageData().
+  //
+  // AddPageURL must be called for a given URL before either the title
+  // or body set. The visit time should be the time corresponding to
+  // that visit in the history database.
+  void AddPageURL(const GURL& url, base::Time visit_time);
+  void AddPageTitle(const GURL& url, const string16& title);
+  void AddPageContents(const GURL& url, const string16& body);
+
+  void AddPageData(const GURL& url,
+                   base::Time visit_time,
+                   const string16& title,
+                   const string16& body);
+
+ private:
+  // Stores "recent stuff" that has happened with the page, since the page
+  // visit, title, and body all come in at different times.
+  class PageInfo {
+   public:
+    explicit PageInfo(base::Time visit_time);
+    ~PageInfo();
+
+    // Getters.
+    base::Time visit_time() const { return visit_time_; }
+    const string16& title() const { return title_; }
+    const string16& body() const { return body_; }
+
+    // Setters, we can only update the title and body.
+    void set_title(const string16& ttl);
+    void set_body(const string16& bdy);
+
+    // Returns true if both the title or body of the entry has been set. Since
+    // both the title and body setters will "fix" empty strings to be a space,
+    // these indicate if the setter was ever called.
+    bool has_title() const { return !title_.empty(); }
+    bool has_body() const { return !body_.empty(); }
+
+    // Returns true if this entry was added too long ago and we should give up
+    // waiting for more data. The current time is passed in as an argument so we
+    // can check many without re-querying the timer.
+    bool Expired(base::TimeTicks now) const;
+
+   private:
+    // Time of the visit of the URL. This will be the value stored in the URL
+    // and visit tables for the entry.
+    base::Time visit_time_;
+
+    // When this page entry was created. We have a cap on the maximum time that
+    // an entry will be in the queue before being flushed to the database.
+    base::TimeTicks added_time_;
+
+    // Will be the string " " when they are set to distinguish set and unset.
+    string16 title_;
+    string16 body_;
+  };
+
+  // Collected data is published when both the title and body are
+  // present.  https data is never passed to AddPageContents(), so
+  // periodically collected data is published without the contents.
+  // Pages which take a long time to load will not have their bodies
+  // published.
+  void ScheduleFlushCollected();
+  void FlushCollected();
+
+  // Lists recent additions that we have not yet filled out with the title and
+  // body. Sorted by time, we will flush them when they are complete or have
+  // been in the queue too long without modification.
+  //
+  // We kind of abuse the MRUCache because we never move things around in it
+  // using Get. Instead, we keep them in the order they were inserted, since
+  // this is the metric we use to measure age. The MRUCache gives us an ordered
+  // list with fast lookup by URL.
+  typedef base::MRUCache<GURL, PageInfo> RecentChangeList;
+  RecentChangeList recent_changes_;
+
+  // Generates tasks for our periodic checking of expired "recent changes".
+  base::WeakPtrFactory<PageCollector> weak_factory_;
+
+  // This object is created and managed by the history backend. We maintain an
+  // opaque pointer to the object for our use.
+  // This can be NULL if there are no indexers registered to receive indexing
+  // data from us.
+  const HistoryPublisher* history_publisher_;
+
+  DISALLOW_COPY_AND_ASSIGN(PageCollector);
+};
+
+}  // namespace history
+
+#endif  // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
diff --git a/chrome/browser/history/text_database_unittest.cc b/chrome/browser/history/text_database_unittest.cc
deleted file mode 100644
index 25b7d79..0000000
--- a/chrome/browser/history/text_database_unittest.cc
+++ /dev/null
@@ -1,306 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <string>
-
-#include "base/files/scoped_temp_dir.h"
-#include "base/memory/scoped_ptr.h"
-#include "base/strings/string_util.h"
-#include "base/strings/utf_string_conversions.h"
-#include "chrome/browser/history/text_database.h"
-#include "testing/gtest/include/gtest/gtest.h"
-#include "testing/platform_test.h"
-
-using base::Time;
-
-namespace history {
-
-namespace {
-
-// Note that all pages have "COUNTTAG" which allows us to count the number of
-// pages in the database withoujt adding any extra functions to the DB object.
-const char kURL1[] = "http://www.google.com/";
-const int kTime1 = 1000;
-const char kTitle1[] = "Google";
-const char kBody1[] =
-    "COUNTTAG Web Images Maps News Shopping Gmail more My Account | "
-    "Sign out Advanced Search Preferences Language Tools Advertising Programs "
-    "- Business Solutions - About Google, 2008 Google";
-
-const char kURL2[] = "http://images.google.com/";
-const int kTime2 = 2000;
-const char kTitle2[] = "Google Image Search";
-const char kBody2[] =
-    "COUNTTAG Web Images Maps News Shopping Gmail more My Account | "
-    "Sign out Advanced Image Search Preferences The most comprehensive image "
-    "search on the web. Want to help improve Google Image Search? Try Google "
-    "Image Labeler. Advertising Programs - Business Solutions - About Google "
-    "2008 Google";
-
-const char kURL3[] = "http://slashdot.org/";
-const int kTime3 = 3000;
-const char kTitle3[] = "Slashdot: News for nerds, stuff that matters";
-const char kBody3[] =
-    "COUNTTAG Slashdot Log In Create Account Subscribe Firehose Why "
-    "Log In? Why Subscribe? Nickname Password Public Terminal Sections "
-    "Main Apple AskSlashdot Backslash Books Developers Games Hardware "
-    "Interviews IT Linux Mobile Politics Science YRO";
-
-// Returns the number of rows currently in the database.
-int RowCount(TextDatabase* db) {
-  QueryOptions options;
-  options.begin_time = Time::FromInternalValue(0);
-  // Leave end_time at now.
-
-  std::vector<TextDatabase::Match> results;
-  TextDatabase::URLSet unique_urls;
-  db->GetTextMatches("COUNTTAG", options, &results, &unique_urls);
-  return static_cast<int>(results.size());
-}
-
-// Adds each of the test pages to the database.
-void AddAllTestData(TextDatabase* db) {
-  EXPECT_TRUE(db->AddPageData(
-      Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1));
-  EXPECT_TRUE(db->AddPageData(
-      Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2));
-  EXPECT_TRUE(db->AddPageData(
-      Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3));
-  EXPECT_EQ(3, RowCount(db));
-}
-
-bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results,
-                    const char* url) {
-  GURL gurl(url);
-  for (size_t i = 0; i < results.size(); i++) {
-    if (results[i].url == gurl)
-      return true;
-  }
-  return false;
-}
-
-}  // namespace
-
-class TextDatabaseTest : public PlatformTest {
- public:
-  TextDatabaseTest() {}
-
- protected:
-  virtual void SetUp() {
-    PlatformTest::SetUp();
-    ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
-  }
-
-  // Create databases with this function, which will ensure that the files are
-  // deleted on shutdown. Only open one database for each file. Returns NULL on
-  // failure.
-  //
-  // Set |delete_file| to delete any existing file. If we are trying to create
-  // the file for the first time, we don't want a previous test left in a
-  // weird state to have left a file that would affect us.
-  TextDatabase* CreateDB(TextDatabase::DBIdent id,
-                         bool allow_create,
-                         bool delete_file) {
-    TextDatabase* db = new TextDatabase(temp_dir_.path(), id, allow_create);
-
-    if (delete_file)
-      sql::Connection::Delete(db->file_name());
-
-    if (!db->Init()) {
-      delete db;
-      return NULL;
-    }
-    return db;
-  }
-
-  // Directory containing the databases.
-  base::ScopedTempDir temp_dir_;
-
-  // Name of the main database file.
-  base::FilePath file_name_;
-};
-
-TEST_F(TextDatabaseTest, AttachDetach) {
-  // First database with one page.
-  const int kIdee1 = 200801;
-  scoped_ptr<TextDatabase> db1(CreateDB(kIdee1, true, true));
-  ASSERT_TRUE(!!db1.get());
-  EXPECT_TRUE(db1->AddPageData(
-      Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1));
-
-  // Second database with one page.
-  const int kIdee2 = 200802;
-  scoped_ptr<TextDatabase> db2(CreateDB(kIdee2, true, true));
-  ASSERT_TRUE(!!db2.get());
-  EXPECT_TRUE(db2->AddPageData(
-      Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2));
-
-  // Detach, then reattach database one. The file should exist, so we force
-  // opening an existing file.
-  db1.reset();
-  db1.reset(CreateDB(kIdee1, false, false));
-  ASSERT_TRUE(!!db1.get());
-
-  // We should not be able to attach this random database for which no file
-  // exists.
-  const int kIdeeNoExisto = 999999999;
-  scoped_ptr<TextDatabase> db3(CreateDB(kIdeeNoExisto, false, true));
-  EXPECT_FALSE(!!db3.get());
-}
-
-TEST_F(TextDatabaseTest, AddRemove) {
-  // Create a database and add some pages to it.
-  const int kIdee1 = 200801;
-  scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
-  ASSERT_TRUE(!!db.get());
-  URLID id1 = db->AddPageData(
-      Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1);
-  EXPECT_NE(0, id1);
-  URLID id2 = db->AddPageData(
-      Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2);
-  EXPECT_NE(0, id2);
-  URLID id3 = db->AddPageData(
-      Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3);
-  EXPECT_NE(0, id3);
-  EXPECT_EQ(3, RowCount(db.get()));
-
-  // Make sure we can delete some of the data.
-  db->DeletePageData(Time::FromInternalValue(kTime1), kURL1);
-  EXPECT_EQ(2, RowCount(db.get()));
-
-  // Close and reopen.
-  db.reset(new TextDatabase(temp_dir_.path(), kIdee1, false));
-  EXPECT_TRUE(db->Init());
-
-  // Verify that the deleted ID is gone and try to delete another one.
-  EXPECT_EQ(2, RowCount(db.get()));
-  db->DeletePageData(Time::FromInternalValue(kTime2), kURL2);
-  EXPECT_EQ(1, RowCount(db.get()));
-}
-
-TEST_F(TextDatabaseTest, Query) {
-  // Make a database with some pages.
-  const int kIdee1 = 200801;
-  scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
-  EXPECT_TRUE(!!db.get());
-  AddAllTestData(db.get());
-
-  // Get all the results.
-  QueryOptions options;
-  options.begin_time = Time::FromInternalValue(0);
-
-  std::vector<TextDatabase::Match> results;
-  TextDatabase::URLSet unique_urls;
-  db->GetTextMatches("COUNTTAG", options, &results, &unique_urls);
-  EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
-
-  // All 3 sites should be returned in order.
-  ASSERT_EQ(3U, results.size());
-  EXPECT_EQ(GURL(kURL1), results[2].url);
-  EXPECT_EQ(GURL(kURL2), results[1].url);
-  EXPECT_EQ(GURL(kURL3), results[0].url);
-
-  // Verify the info on those results.
-  EXPECT_TRUE(Time::FromInternalValue(kTime1) == results[2].time);
-  EXPECT_TRUE(Time::FromInternalValue(kTime2) == results[1].time);
-  EXPECT_TRUE(Time::FromInternalValue(kTime3) == results[0].time);
-
-  EXPECT_EQ(std::string(kTitle1), UTF16ToUTF8(results[2].title));
-  EXPECT_EQ(std::string(kTitle2), UTF16ToUTF8(results[1].title));
-  EXPECT_EQ(std::string(kTitle3), UTF16ToUTF8(results[0].title));
-
-  // Should have no matches in the title.
-  EXPECT_EQ(0U, results[0].title_match_positions.size());
-  EXPECT_EQ(0U, results[1].title_match_positions.size());
-  EXPECT_EQ(0U, results[2].title_match_positions.size());
-
-  // We don't want to be dependent on the exact snippet algorithm, but we know
-  // since we searched for "COUNTTAG" which occurs at the beginning of each
-  // document, that each snippet should start with that.
-  EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[0].snippet.text()),
-                              "COUNTTAG", false));
-  EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[1].snippet.text()),
-                              "COUNTTAG", false));
-  EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[2].snippet.text()),
-                              "COUNTTAG", false));
-}
-
-TEST_F(TextDatabaseTest, TimeRange) {
-  // Make a database with some pages.
-  const int kIdee1 = 200801;
-  scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
-  ASSERT_TRUE(!!db.get());
-  AddAllTestData(db.get());
-
-  // Beginning should be inclusive, and the ending exclusive.
-  // Get all the results.
-  QueryOptions options;
-  options.begin_time = Time::FromInternalValue(kTime1);
-  options.end_time = Time::FromInternalValue(kTime3);
-
-  std::vector<TextDatabase::Match> results;
-  TextDatabase::URLSet unique_urls;
-  bool has_more_results = db->GetTextMatches(
-      "COUNTTAG", options, &results,  &unique_urls);
-  EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
-
-  // The first and second should have been returned.
-  EXPECT_EQ(2U, results.size());
-  EXPECT_TRUE(ResultsHaveURL(results, kURL1));
-  EXPECT_TRUE(ResultsHaveURL(results, kURL2));
-  EXPECT_FALSE(ResultsHaveURL(results, kURL3));
-  EXPECT_EQ(kTime1, results.back().time.ToInternalValue());
-  EXPECT_FALSE(has_more_results);
-
-  // Do a query where there isn't a result on the begin boundary.
-  options.begin_time = Time::FromInternalValue((kTime2 - kTime1) / 2 + kTime1);
-  options.end_time = Time::FromInternalValue(kTime3 + 1);
-  results.clear();  // GetTextMatches does *not* clear the results.
-  has_more_results = db->GetTextMatches(
-      "COUNTTAG", options, &results, &unique_urls);
-  EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
-  EXPECT_FALSE(has_more_results);
-
-  // Should have two results, the second and third.
-  EXPECT_EQ(2U, results.size());
-  EXPECT_FALSE(ResultsHaveURL(results, kURL1));
-  EXPECT_TRUE(ResultsHaveURL(results, kURL2));
-  EXPECT_TRUE(ResultsHaveURL(results, kURL3));
-
-  // Try a range that has no results.
-  options.begin_time = Time::FromInternalValue(kTime3 + 1);
-  options.end_time = Time::FromInternalValue(kTime3 * 100);
-  results.clear();
-  has_more_results = db->GetTextMatches(
-      "COUNTTAG", options, &results, &unique_urls);
-  EXPECT_FALSE(has_more_results);
-}
-
-// Make sure that max_count works.
-TEST_F(TextDatabaseTest, MaxCount) {
-  // Make a database with some pages.
-  const int kIdee1 = 200801;
-  scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
-  ASSERT_TRUE(!!db.get());
-  AddAllTestData(db.get());
-
-  // Set up the query to return all the results with "Google" (should be 2), but
-  // with a maximum of 1.
-  QueryOptions options;
-  options.begin_time = Time::FromInternalValue(kTime1);
-  options.end_time = Time::FromInternalValue(kTime3 + 1);
-  options.max_count = 1;
-
-  std::vector<TextDatabase::Match> results;
-  TextDatabase::URLSet unique_urls;
-  db->GetTextMatches("google", options, &results, &unique_urls);
-  EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
-
-  // There should be one result, the most recent one.
-  EXPECT_EQ(1U, results.size());
-  EXPECT_TRUE(ResultsHaveURL(results, kURL2));
-  EXPECT_EQ(kTime2, results.back().time.ToInternalValue());
-}
-
-}  // namespace history
author	shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-09-20 18:42:50 +0000
committer	shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-09-20 18:42:50 +0000
commit	33abbda30215a6521987a221b8991eff32ad3284 (patch)
tree	519464726ef776be57727f98d42666636cd958fa /chrome/browser/history
parent	560cfc359cbaad9af3100542fe5b83fefe78f83f (diff)
download	chromium_src-33abbda30215a6521987a221b8991eff32ad3284.zip chromium_src-33abbda30215a6521987a221b8991eff32ad3284.tar.gz chromium_src-33abbda30215a6521987a221b8991eff32ad3284.tar.bz2