summaryrefslogtreecommitdiffstats
path: root/chrome/browser/history
diff options
context:
space:
mode:
authorshess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-09-20 18:42:50 +0000
committershess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-09-20 18:42:50 +0000
commit33abbda30215a6521987a221b8991eff32ad3284 (patch)
tree519464726ef776be57727f98d42666636cd958fa /chrome/browser/history
parent560cfc359cbaad9af3100542fe5b83fefe78f83f (diff)
downloadchromium_src-33abbda30215a6521987a221b8991eff32ad3284.zip
chromium_src-33abbda30215a6521987a221b8991eff32ad3284.tar.gz
chromium_src-33abbda30215a6521987a221b8991eff32ad3284.tar.bz2
TextDatabase contained an unexpected bit of needed functionality.
https://chromiumcodereview.appspot.com/16951015 removed TextDatabase from history service. TextDatabase was calling a HistoryPublisher routine which was still in use. BUG=294306 R=cevans@chromium.org, rmcilroy@chromium.org, sky@chromium.org Review URL: https://codereview.chromium.org/23437047 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@224441 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/history')
-rw-r--r--chrome/browser/history/history_backend.cc34
-rw-r--r--chrome/browser/history/history_backend.h9
-rw-r--r--chrome/browser/history/history_backend_unittest.cc20
-rw-r--r--chrome/browser/history/history_publisher.h4
-rw-r--r--chrome/browser/history/history_service.cc10
-rw-r--r--chrome/browser/history/history_service.h7
-rw-r--r--chrome/browser/history/history_tab_helper.cc29
-rw-r--r--chrome/browser/history/history_tab_helper.h5
-rw-r--r--chrome/browser/history/page_collector.cc157
-rw-r--r--chrome/browser/history/page_collector.h123
-rw-r--r--chrome/browser/history/text_database_unittest.cc306
11 files changed, 377 insertions, 327 deletions
diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc
index 3913d1d..07f18ed 100644
--- a/chrome/browser/history/history_backend.cc
+++ b/chrome/browser/history/history_backend.cc
@@ -32,6 +32,7 @@
#include "chrome/browser/history/history_notifications.h"
#include "chrome/browser/history/history_publisher.h"
#include "chrome/browser/history/in_memory_history_backend.h"
+#include "chrome/browser/history/page_collector.h"
#include "chrome/browser/history/page_usage_data.h"
#include "chrome/browser/history/select_favicon_frames.h"
#include "chrome/browser/history/top_sites.h"
@@ -550,6 +551,9 @@ void HistoryBackend::AddPage(const HistoryAddPageArgs& request) {
last_ids.second);
}
+ if (page_collector_)
+ page_collector_->AddPageURL(request.url, request.time);
+
ScheduleCommit();
}
@@ -611,6 +615,9 @@ void HistoryBackend::InitImpl(const std::string& languages) {
// Create the history publisher which needs to be passed on to the thumbnail
// database for publishing history.
+ // TODO(shess): HistoryPublisher is being deprecated. I am still
+ // trying to track down who depends on it, meanwhile talk to me
+ // before removing interactions with it. http://crbug.com/294306
history_publisher_.reset(new HistoryPublisher());
if (!history_publisher_->Init()) {
// The init may fail when there are no indexers wanting our history.
@@ -618,6 +625,12 @@ void HistoryBackend::InitImpl(const std::string& languages) {
history_publisher_.reset();
}
+ // Collects page data for history_publisher_.
+ if (history_publisher_.get()) {
+ page_collector_.reset(new PageCollector());
+ page_collector_->Init(history_publisher_.get());
+ }
+
// Thumbnail database.
// TODO(shess): "thumbnail database" these days only stores
// favicons. Thumbnails are stored in "top sites". Consider
@@ -665,8 +678,7 @@ void HistoryBackend::InitImpl(const std::string& languages) {
// *sigh*, this can all be cleaned up when that migration code is removed.
// The main DB initialization should intuitively be first (not that it
// actually matters) and the expirer should be set last.
- expirer_.SetDatabases(db_.get(), archived_db_.get(),
- thumbnail_db_.get());
+ expirer_.SetDatabases(db_.get(), archived_db_.get(), thumbnail_db_.get());
// Open the long-running transaction.
db_->BeginTransaction();
@@ -853,6 +865,12 @@ void HistoryBackend::AddPagesWithDetails(const URLRows& urls,
}
}
+ // TODO(shess): I'm not sure this case needs to exist anymore.
+ if (page_collector_) {
+ page_collector_->AddPageData(i->url(), i->last_visit(),
+ i->title(), string16());
+ }
+
// Sync code manages the visits itself.
if (visit_source != SOURCE_SYNCED) {
// Make up a visit to correspond to the last visit to the page.
@@ -890,11 +908,13 @@ bool HistoryBackend::IsExpiredVisitTime(const base::Time& time) {
return time < expirer_.GetCurrentArchiveTime();
}
-void HistoryBackend::SetPageTitle(const GURL& url,
- const string16& title) {
+void HistoryBackend::SetPageTitle(const GURL& url, const string16& title) {
if (!db_)
return;
+ if (page_collector_)
+ page_collector_->AddPageTitle(url, title);
+
// Search for recent redirects which should get the same title. We make a
// dummy list containing the exact URL visited if there are no redirects so
// the processing below can be the same.
@@ -1665,6 +1685,12 @@ void HistoryBackend::DeleteFTSIndexDatabases() {
num_databases_deleted);
}
+void HistoryBackend::SetPageContents(const GURL& url,
+ const string16& contents) {
+ if (page_collector_)
+ page_collector_->AddPageContents(url, contents);
+}
+
void HistoryBackend::GetFavicons(
const std::vector<GURL>& icon_urls,
int icon_types,
diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h
index de64e5c..f3bbf4d 100644
--- a/chrome/browser/history/history_backend.h
+++ b/chrome/browser/history/history_backend.h
@@ -38,6 +38,7 @@ class AndroidProviderBackend;
class CommitLaterTask;
class HistoryPublisher;
+class PageCollector;
class VisitFilter;
struct DownloadRow;
@@ -158,6 +159,11 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
const GURL& url,
base::Time end_ts);
+
+ // Indexing ------------------------------------------------------------------
+
+ void SetPageContents(const GURL& url, const string16& contents);
+
// Querying ------------------------------------------------------------------
// ScheduleAutocomplete() never frees |provider| (which is globally live).
@@ -833,6 +839,9 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
// Stores old history in a larger, slower database.
scoped_ptr<ArchivedDatabase> archived_db_;
+ // Helper to collect page data for vending to history_publisher_.
+ scoped_ptr<PageCollector> page_collector_;
+
// Manages expiration between the various databases.
ExpireHistoryBackend expirer_;
diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc
index a666cf1..04af2b4 100644
--- a/chrome/browser/history/history_backend_unittest.cc
+++ b/chrome/browser/history/history_backend_unittest.cc
@@ -7,7 +7,6 @@
#include <algorithm>
#include <set>
#include <vector>
-#include <fstream>
#include "base/basictypes.h"
#include "base/bind.h"
@@ -2745,15 +2744,6 @@ TEST_F(HistoryBackendTest, RemoveNotification) {
service->DeleteURL(url);
}
-// Simple function to create a new dummy file.
-void CreateDummyFile(const base::FilePath& filename) {
- std::wofstream file;
- file.open(filename.value().c_str());
- ASSERT_TRUE(file.is_open());
- file << L"Dummy";
- file.close();
-}
-
// Test DeleteFTSIndexDatabases deletes expected files.
TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) {
ASSERT_TRUE(backend_.get());
@@ -2766,10 +2756,12 @@ TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) {
base::FilePath db2_actual(history_path.AppendASCII("Underlying DB"));
// Setup dummy index database files.
- CreateDummyFile(db1);
- CreateDummyFile(db1_journal);
- CreateDummyFile(db1_wal);
- CreateDummyFile(db2_actual);
+ const char* data = "Dummy";
+ const size_t data_len = 5;
+ ASSERT_TRUE(file_util::WriteFile(db1, data, data_len));
+ ASSERT_TRUE(file_util::WriteFile(db1_journal, data, data_len));
+ ASSERT_TRUE(file_util::WriteFile(db1_wal, data, data_len));
+ ASSERT_TRUE(file_util::WriteFile(db2_actual, data, data_len));
#if defined(OS_POSIX)
EXPECT_TRUE(file_util::CreateSymbolicLink(db2_actual, db2_symlink));
#endif
diff --git a/chrome/browser/history/history_publisher.h b/chrome/browser/history/history_publisher.h
index 2f16655..a3538b4 100644
--- a/chrome/browser/history/history_publisher.h
+++ b/chrome/browser/history/history_publisher.h
@@ -16,6 +16,10 @@
#include "history_indexer.h"
#endif
+// TODO(shess): HistoryPublisher is being deprecated. I am still
+// trying to track down who depends on it, meanwhile talk to me
+// before removing interactions with it.
+
class GURL;
namespace base {
diff --git a/chrome/browser/history/history_service.cc b/chrome/browser/history/history_service.cc
index dc28c42..aee38c8 100644
--- a/chrome/browser/history/history_service.cc
+++ b/chrome/browser/history/history_service.cc
@@ -584,6 +584,16 @@ void HistoryService::AddPagesWithDetails(const history::URLRows& info,
&HistoryBackend::AddPagesWithDetails, info, visit_source);
}
+void HistoryService::SetPageContents(const GURL& url,
+ const string16& contents) {
+ DCHECK(thread_checker_.CalledOnValidThread());
+ if (!CanAddURL(url))
+ return;
+
+ ScheduleAndForget(PRIORITY_LOW, &HistoryBackend::SetPageContents,
+ url, contents);
+}
+
CancelableTaskTracker::TaskId HistoryService::GetFavicons(
const std::vector<GURL>& icon_urls,
int icon_types,
diff --git a/chrome/browser/history/history_service.h b/chrome/browser/history/history_service.h
index f8d383b..6891f38 100644
--- a/chrome/browser/history/history_service.h
+++ b/chrome/browser/history/history_service.h
@@ -236,6 +236,13 @@ class HistoryService : public CancelableRequestProvider,
const GURL& url,
base::Time end_ts);
+ // Indexing ------------------------------------------------------------------
+
+ // Notifies history of the body text of the given recently-visited URL.
+ // If the URL was not visited "recently enough," the history system may
+ // discard it.
+ void SetPageContents(const GURL& url, const string16& contents);
+
// Querying ------------------------------------------------------------------
// Returns the information about the requested URL. If the URL is found,
diff --git a/chrome/browser/history/history_tab_helper.cc b/chrome/browser/history/history_tab_helper.cc
index adbd892..4fc97c6 100644
--- a/chrome/browser/history/history_tab_helper.cc
+++ b/chrome/browser/history/history_tab_helper.cc
@@ -84,6 +84,16 @@ HistoryTabHelper::CreateHistoryAddPageArgs(
return add_page_args;
}
+bool HistoryTabHelper::OnMessageReceived(const IPC::Message& message) {
+ bool handled = true;
+ IPC_BEGIN_MESSAGE_MAP(HistoryTabHelper, message)
+ IPC_MESSAGE_HANDLER(ChromeViewHostMsg_PageContents, OnPageContents)
+ IPC_MESSAGE_UNHANDLED(handled = false)
+ IPC_END_MESSAGE_MAP()
+
+ return handled;
+}
+
void HistoryTabHelper::DidNavigateMainFrame(
const content::LoadCommittedDetails& details,
const content::FrameNavigateParams& params) {
@@ -155,6 +165,25 @@ void HistoryTabHelper::Observe(int type,
}
}
+void HistoryTabHelper::OnPageContents(const GURL& url,
+ const string16& contents) {
+ // Don't index any https pages. People generally don't want their bank
+ // accounts, etc. indexed on their computer, especially since some of these
+ // things are not marked cachable.
+ // TODO(brettw) we may want to consider more elaborate heuristics such as
+ // the cachability of the page. We may also want to consider subframes (this
+ // test will still index subframes if the subframe is SSL).
+ // TODO(zelidrag) bug chromium-os:2808 - figure out if we want to reenable
+ // content indexing for chromeos in some future releases.
+#if !defined(OS_CHROMEOS)
+ if (!url.SchemeIsSecure()) {
+ HistoryService* hs = GetHistoryService();
+ if (hs)
+ hs->SetPageContents(url, contents);
+ }
+#endif
+}
+
HistoryService* HistoryTabHelper::GetHistoryService() {
Profile* profile =
Profile::FromBrowserContext(web_contents()->GetBrowserContext());
diff --git a/chrome/browser/history/history_tab_helper.h b/chrome/browser/history/history_tab_helper.h
index 2f613fa..65b03c3 100644
--- a/chrome/browser/history/history_tab_helper.h
+++ b/chrome/browser/history/history_tab_helper.h
@@ -46,6 +46,7 @@ class HistoryTabHelper : public content::WebContentsObserver,
friend class content::WebContentsUserData<HistoryTabHelper>;
// content::WebContentsObserver implementation.
+ virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
virtual void DidNavigateMainFrame(
const content::LoadCommittedDetails& details,
const content::FrameNavigateParams& params) OVERRIDE;
@@ -59,9 +60,7 @@ class HistoryTabHelper : public content::WebContentsObserver,
const content::NotificationSource& source,
const content::NotificationDetails& details) OVERRIDE;
- void OnPageContents(const GURL& url,
- int32 page_id,
- const string16& contents);
+ void OnPageContents(const GURL& url, const string16& contents);
// Helper function to return the history service. May return NULL.
HistoryService* GetHistoryService();
diff --git a/chrome/browser/history/page_collector.cc b/chrome/browser/history/page_collector.cc
new file mode 100644
index 0000000..da07ad1
--- /dev/null
+++ b/chrome/browser/history/page_collector.cc
@@ -0,0 +1,157 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/page_collector.h"
+
+#include "base/bind.h"
+#include "base/message_loop/message_loop.h"
+#include "base/strings/utf_string_conversions.h"
+#include "chrome/browser/history/history_publisher.h"
+#include "url/gurl.h"
+
+namespace {
+
+// Page info older than this will be published even if we haven't
+// gotten a title and/or body.
+const int kExpirationSeconds = 20;
+
+} // namespace
+
+namespace history {
+
+// PageCollector::PageInfo -----------------------------------------------
+
+PageCollector::PageInfo::PageInfo(base::Time visit_time)
+ : visit_time_(visit_time),
+ added_time_(base::TimeTicks::Now()) {
+}
+
+PageCollector::PageInfo::~PageInfo() {}
+
+// NOTE(shess): Per the comment on has_title() and has_body(), this
+// code maps empty strings to single space to differentiate set title
+// and body from empty. This approach is held over from the original
+// TextDatabaseManager version.
+void PageCollector::PageInfo::set_title(const string16& ttl) {
+ if (ttl.empty())
+ title_ = ASCIIToUTF16(" ");
+ else
+ title_ = ttl;
+}
+
+void PageCollector::PageInfo::set_body(const string16& bdy) {
+ if (bdy.empty())
+ body_ = ASCIIToUTF16(" ");
+ else
+ body_ = bdy;
+}
+
+bool PageCollector::PageInfo::Expired(base::TimeTicks now) const {
+ return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds);
+}
+
+PageCollector::PageCollector()
+ : recent_changes_(RecentChangeList::NO_AUTO_EVICT),
+ weak_factory_(this) {
+}
+
+PageCollector::~PageCollector() {
+}
+
+void PageCollector::Init(const HistoryPublisher* history_publisher) {
+ history_publisher_ = history_publisher;
+}
+
+void PageCollector::AddPageURL(const GURL& url, base::Time time) {
+ // Don't collect data which cannot be published.
+ if (!history_publisher_)
+ return;
+
+ // Just save this info for later (evicting any previous data). We
+ // will delete it when it expires or when all the data is complete.
+ recent_changes_.Put(url, PageInfo(time));
+
+ // Schedule flush if not already scheduled.
+ if (!weak_factory_.HasWeakPtrs())
+ ScheduleFlushCollected();
+}
+
+void PageCollector::AddPageTitle(const GURL& url, const string16& title) {
+ if (!history_publisher_)
+ return;
+
+ // If the title comes in after the page has aged out, drop it.
+ // Older code would manufacture information from the database.
+ RecentChangeList::iterator found = recent_changes_.Peek(url);
+ if (found == recent_changes_.end())
+ return;
+
+ // Publish the info if complete.
+ if (found->second.has_body()) {
+ history_publisher_->PublishPageContent(
+ found->second.visit_time(), url, title, found->second.body());
+ recent_changes_.Erase(found);
+ } else {
+ found->second.set_title(title);
+ }
+}
+
+void PageCollector::AddPageContents(const GURL& url,
+ const string16& body) {
+ if (!history_publisher_)
+ return;
+
+ // If the body comes in after the page has aged out, drop it.
+ // Older code would manufacture information from the database.
+ RecentChangeList::iterator found = recent_changes_.Peek(url);
+ if (found == recent_changes_.end())
+ return;
+
+ // Publish the info if complete.
+ if (found->second.has_title()) {
+ history_publisher_->PublishPageContent(
+ found->second.visit_time(), url, found->second.title(), body);
+ recent_changes_.Erase(found);
+ } else {
+ found->second.set_body(body);
+ }
+}
+
+void PageCollector::AddPageData(const GURL& url,
+ base::Time visit_time,
+ const string16& title,
+ const string16& body) {
+ if (!history_publisher_)
+ return;
+
+ // Publish the item.
+ history_publisher_->PublishPageContent(visit_time, url, title, body);
+}
+
+void PageCollector::ScheduleFlushCollected() {
+ weak_factory_.InvalidateWeakPtrs();
+ base::MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ base::Bind(&PageCollector::FlushCollected,
+ weak_factory_.GetWeakPtr()),
+ base::TimeDelta::FromSeconds(kExpirationSeconds));
+}
+
+void PageCollector::FlushCollected() {
+ base::TimeTicks now = base::TimeTicks::Now();
+
+ // Iterate from oldest to newest publishing items which expire while
+ // waiting for title or body.
+ RecentChangeList::reverse_iterator iter = recent_changes_.rbegin();
+ while (iter != recent_changes_.rend() && iter->second.Expired(now)) {
+ AddPageData(iter->first, iter->second.visit_time(),
+ iter->second.title(), iter->second.body());
+ iter = recent_changes_.Erase(iter);
+ }
+
+ if (!recent_changes_.empty())
+ ScheduleFlushCollected();
+}
+
+} // namespace history
diff --git a/chrome/browser/history/page_collector.h b/chrome/browser/history/page_collector.h
new file mode 100644
index 0000000..c044a3b
--- /dev/null
+++ b/chrome/browser/history/page_collector.h
@@ -0,0 +1,123 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
+#define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
+
+#include "base/basictypes.h"
+#include "base/containers/mru_cache.h"
+#include "base/memory/weak_ptr.h"
+#include "base/strings/string16.h"
+#include "base/time/time.h"
+
+class GURL;
+
+namespace history {
+
+class HistoryPublisher;
+
+// Collect page data and publish to HistoryPublisher.
+class PageCollector {
+ public:
+ // You must call Init() to complete initialization.
+ PageCollector();
+ ~PageCollector();
+
+ // Must call before using other functions.
+ void Init(const HistoryPublisher* history_publisher);
+
+ // Sets specific information for the given page to be published.
+ // In normal operation, URLs will be added as the user visits them, the titles
+ // and bodies will come in some time after that. These changes will be
+ // automatically coalesced and added to the database some time in the future
+ // using AddPageData().
+ //
+ // AddPageURL must be called for a given URL before either the title
+ // or body set. The visit time should be the time corresponding to
+ // that visit in the history database.
+ void AddPageURL(const GURL& url, base::Time visit_time);
+ void AddPageTitle(const GURL& url, const string16& title);
+ void AddPageContents(const GURL& url, const string16& body);
+
+ void AddPageData(const GURL& url,
+ base::Time visit_time,
+ const string16& title,
+ const string16& body);
+
+ private:
+ // Stores "recent stuff" that has happened with the page, since the page
+ // visit, title, and body all come in at different times.
+ class PageInfo {
+ public:
+ explicit PageInfo(base::Time visit_time);
+ ~PageInfo();
+
+ // Getters.
+ base::Time visit_time() const { return visit_time_; }
+ const string16& title() const { return title_; }
+ const string16& body() const { return body_; }
+
+ // Setters, we can only update the title and body.
+ void set_title(const string16& ttl);
+ void set_body(const string16& bdy);
+
+ // Returns true if both the title or body of the entry has been set. Since
+ // both the title and body setters will "fix" empty strings to be a space,
+ // these indicate if the setter was ever called.
+ bool has_title() const { return !title_.empty(); }
+ bool has_body() const { return !body_.empty(); }
+
+ // Returns true if this entry was added too long ago and we should give up
+ // waiting for more data. The current time is passed in as an argument so we
+ // can check many without re-querying the timer.
+ bool Expired(base::TimeTicks now) const;
+
+ private:
+ // Time of the visit of the URL. This will be the value stored in the URL
+ // and visit tables for the entry.
+ base::Time visit_time_;
+
+ // When this page entry was created. We have a cap on the maximum time that
+ // an entry will be in the queue before being flushed to the database.
+ base::TimeTicks added_time_;
+
+ // Will be the string " " when they are set to distinguish set and unset.
+ string16 title_;
+ string16 body_;
+ };
+
+ // Collected data is published when both the title and body are
+ // present. https data is never passed to AddPageContents(), so
+ // periodically collected data is published without the contents.
+ // Pages which take a long time to load will not have their bodies
+ // published.
+ void ScheduleFlushCollected();
+ void FlushCollected();
+
+ // Lists recent additions that we have not yet filled out with the title and
+ // body. Sorted by time, we will flush them when they are complete or have
+ // been in the queue too long without modification.
+ //
+ // We kind of abuse the MRUCache because we never move things around in it
+ // using Get. Instead, we keep them in the order they were inserted, since
+ // this is the metric we use to measure age. The MRUCache gives us an ordered
+ // list with fast lookup by URL.
+ typedef base::MRUCache<GURL, PageInfo> RecentChangeList;
+ RecentChangeList recent_changes_;
+
+ // Generates tasks for our periodic checking of expired "recent changes".
+ base::WeakPtrFactory<PageCollector> weak_factory_;
+
+ // This object is created and managed by the history backend. We maintain an
+ // opaque pointer to the object for our use.
+ // This can be NULL if there are no indexers registered to receive indexing
+ // data from us.
+ const HistoryPublisher* history_publisher_;
+
+ DISALLOW_COPY_AND_ASSIGN(PageCollector);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
diff --git a/chrome/browser/history/text_database_unittest.cc b/chrome/browser/history/text_database_unittest.cc
deleted file mode 100644
index 25b7d79..0000000
--- a/chrome/browser/history/text_database_unittest.cc
+++ /dev/null
@@ -1,306 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <string>
-
-#include "base/files/scoped_temp_dir.h"
-#include "base/memory/scoped_ptr.h"
-#include "base/strings/string_util.h"
-#include "base/strings/utf_string_conversions.h"
-#include "chrome/browser/history/text_database.h"
-#include "testing/gtest/include/gtest/gtest.h"
-#include "testing/platform_test.h"
-
-using base::Time;
-
-namespace history {
-
-namespace {
-
-// Note that all pages have "COUNTTAG" which allows us to count the number of
-// pages in the database withoujt adding any extra functions to the DB object.
-const char kURL1[] = "http://www.google.com/";
-const int kTime1 = 1000;
-const char kTitle1[] = "Google";
-const char kBody1[] =
- "COUNTTAG Web Images Maps News Shopping Gmail more My Account | "
- "Sign out Advanced Search Preferences Language Tools Advertising Programs "
- "- Business Solutions - About Google, 2008 Google";
-
-const char kURL2[] = "http://images.google.com/";
-const int kTime2 = 2000;
-const char kTitle2[] = "Google Image Search";
-const char kBody2[] =
- "COUNTTAG Web Images Maps News Shopping Gmail more My Account | "
- "Sign out Advanced Image Search Preferences The most comprehensive image "
- "search on the web. Want to help improve Google Image Search? Try Google "
- "Image Labeler. Advertising Programs - Business Solutions - About Google "
- "2008 Google";
-
-const char kURL3[] = "http://slashdot.org/";
-const int kTime3 = 3000;
-const char kTitle3[] = "Slashdot: News for nerds, stuff that matters";
-const char kBody3[] =
- "COUNTTAG Slashdot Log In Create Account Subscribe Firehose Why "
- "Log In? Why Subscribe? Nickname Password Public Terminal Sections "
- "Main Apple AskSlashdot Backslash Books Developers Games Hardware "
- "Interviews IT Linux Mobile Politics Science YRO";
-
-// Returns the number of rows currently in the database.
-int RowCount(TextDatabase* db) {
- QueryOptions options;
- options.begin_time = Time::FromInternalValue(0);
- // Leave end_time at now.
-
- std::vector<TextDatabase::Match> results;
- TextDatabase::URLSet unique_urls;
- db->GetTextMatches("COUNTTAG", options, &results, &unique_urls);
- return static_cast<int>(results.size());
-}
-
-// Adds each of the test pages to the database.
-void AddAllTestData(TextDatabase* db) {
- EXPECT_TRUE(db->AddPageData(
- Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1));
- EXPECT_TRUE(db->AddPageData(
- Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2));
- EXPECT_TRUE(db->AddPageData(
- Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3));
- EXPECT_EQ(3, RowCount(db));
-}
-
-bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results,
- const char* url) {
- GURL gurl(url);
- for (size_t i = 0; i < results.size(); i++) {
- if (results[i].url == gurl)
- return true;
- }
- return false;
-}
-
-} // namespace
-
-class TextDatabaseTest : public PlatformTest {
- public:
- TextDatabaseTest() {}
-
- protected:
- virtual void SetUp() {
- PlatformTest::SetUp();
- ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
- }
-
- // Create databases with this function, which will ensure that the files are
- // deleted on shutdown. Only open one database for each file. Returns NULL on
- // failure.
- //
- // Set |delete_file| to delete any existing file. If we are trying to create
- // the file for the first time, we don't want a previous test left in a
- // weird state to have left a file that would affect us.
- TextDatabase* CreateDB(TextDatabase::DBIdent id,
- bool allow_create,
- bool delete_file) {
- TextDatabase* db = new TextDatabase(temp_dir_.path(), id, allow_create);
-
- if (delete_file)
- sql::Connection::Delete(db->file_name());
-
- if (!db->Init()) {
- delete db;
- return NULL;
- }
- return db;
- }
-
- // Directory containing the databases.
- base::ScopedTempDir temp_dir_;
-
- // Name of the main database file.
- base::FilePath file_name_;
-};
-
-TEST_F(TextDatabaseTest, AttachDetach) {
- // First database with one page.
- const int kIdee1 = 200801;
- scoped_ptr<TextDatabase> db1(CreateDB(kIdee1, true, true));
- ASSERT_TRUE(!!db1.get());
- EXPECT_TRUE(db1->AddPageData(
- Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1));
-
- // Second database with one page.
- const int kIdee2 = 200802;
- scoped_ptr<TextDatabase> db2(CreateDB(kIdee2, true, true));
- ASSERT_TRUE(!!db2.get());
- EXPECT_TRUE(db2->AddPageData(
- Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2));
-
- // Detach, then reattach database one. The file should exist, so we force
- // opening an existing file.
- db1.reset();
- db1.reset(CreateDB(kIdee1, false, false));
- ASSERT_TRUE(!!db1.get());
-
- // We should not be able to attach this random database for which no file
- // exists.
- const int kIdeeNoExisto = 999999999;
- scoped_ptr<TextDatabase> db3(CreateDB(kIdeeNoExisto, false, true));
- EXPECT_FALSE(!!db3.get());
-}
-
-TEST_F(TextDatabaseTest, AddRemove) {
- // Create a database and add some pages to it.
- const int kIdee1 = 200801;
- scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
- ASSERT_TRUE(!!db.get());
- URLID id1 = db->AddPageData(
- Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1);
- EXPECT_NE(0, id1);
- URLID id2 = db->AddPageData(
- Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2);
- EXPECT_NE(0, id2);
- URLID id3 = db->AddPageData(
- Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3);
- EXPECT_NE(0, id3);
- EXPECT_EQ(3, RowCount(db.get()));
-
- // Make sure we can delete some of the data.
- db->DeletePageData(Time::FromInternalValue(kTime1), kURL1);
- EXPECT_EQ(2, RowCount(db.get()));
-
- // Close and reopen.
- db.reset(new TextDatabase(temp_dir_.path(), kIdee1, false));
- EXPECT_TRUE(db->Init());
-
- // Verify that the deleted ID is gone and try to delete another one.
- EXPECT_EQ(2, RowCount(db.get()));
- db->DeletePageData(Time::FromInternalValue(kTime2), kURL2);
- EXPECT_EQ(1, RowCount(db.get()));
-}
-
-TEST_F(TextDatabaseTest, Query) {
- // Make a database with some pages.
- const int kIdee1 = 200801;
- scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
- EXPECT_TRUE(!!db.get());
- AddAllTestData(db.get());
-
- // Get all the results.
- QueryOptions options;
- options.begin_time = Time::FromInternalValue(0);
-
- std::vector<TextDatabase::Match> results;
- TextDatabase::URLSet unique_urls;
- db->GetTextMatches("COUNTTAG", options, &results, &unique_urls);
- EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
-
- // All 3 sites should be returned in order.
- ASSERT_EQ(3U, results.size());
- EXPECT_EQ(GURL(kURL1), results[2].url);
- EXPECT_EQ(GURL(kURL2), results[1].url);
- EXPECT_EQ(GURL(kURL3), results[0].url);
-
- // Verify the info on those results.
- EXPECT_TRUE(Time::FromInternalValue(kTime1) == results[2].time);
- EXPECT_TRUE(Time::FromInternalValue(kTime2) == results[1].time);
- EXPECT_TRUE(Time::FromInternalValue(kTime3) == results[0].time);
-
- EXPECT_EQ(std::string(kTitle1), UTF16ToUTF8(results[2].title));
- EXPECT_EQ(std::string(kTitle2), UTF16ToUTF8(results[1].title));
- EXPECT_EQ(std::string(kTitle3), UTF16ToUTF8(results[0].title));
-
- // Should have no matches in the title.
- EXPECT_EQ(0U, results[0].title_match_positions.size());
- EXPECT_EQ(0U, results[1].title_match_positions.size());
- EXPECT_EQ(0U, results[2].title_match_positions.size());
-
- // We don't want to be dependent on the exact snippet algorithm, but we know
- // since we searched for "COUNTTAG" which occurs at the beginning of each
- // document, that each snippet should start with that.
- EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[0].snippet.text()),
- "COUNTTAG", false));
- EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[1].snippet.text()),
- "COUNTTAG", false));
- EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[2].snippet.text()),
- "COUNTTAG", false));
-}
-
-TEST_F(TextDatabaseTest, TimeRange) {
- // Make a database with some pages.
- const int kIdee1 = 200801;
- scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
- ASSERT_TRUE(!!db.get());
- AddAllTestData(db.get());
-
- // Beginning should be inclusive, and the ending exclusive.
- // Get all the results.
- QueryOptions options;
- options.begin_time = Time::FromInternalValue(kTime1);
- options.end_time = Time::FromInternalValue(kTime3);
-
- std::vector<TextDatabase::Match> results;
- TextDatabase::URLSet unique_urls;
- bool has_more_results = db->GetTextMatches(
- "COUNTTAG", options, &results, &unique_urls);
- EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
-
- // The first and second should have been returned.
- EXPECT_EQ(2U, results.size());
- EXPECT_TRUE(ResultsHaveURL(results, kURL1));
- EXPECT_TRUE(ResultsHaveURL(results, kURL2));
- EXPECT_FALSE(ResultsHaveURL(results, kURL3));
- EXPECT_EQ(kTime1, results.back().time.ToInternalValue());
- EXPECT_FALSE(has_more_results);
-
- // Do a query where there isn't a result on the begin boundary.
- options.begin_time = Time::FromInternalValue((kTime2 - kTime1) / 2 + kTime1);
- options.end_time = Time::FromInternalValue(kTime3 + 1);
- results.clear(); // GetTextMatches does *not* clear the results.
- has_more_results = db->GetTextMatches(
- "COUNTTAG", options, &results, &unique_urls);
- EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
- EXPECT_FALSE(has_more_results);
-
- // Should have two results, the second and third.
- EXPECT_EQ(2U, results.size());
- EXPECT_FALSE(ResultsHaveURL(results, kURL1));
- EXPECT_TRUE(ResultsHaveURL(results, kURL2));
- EXPECT_TRUE(ResultsHaveURL(results, kURL3));
-
- // Try a range that has no results.
- options.begin_time = Time::FromInternalValue(kTime3 + 1);
- options.end_time = Time::FromInternalValue(kTime3 * 100);
- results.clear();
- has_more_results = db->GetTextMatches(
- "COUNTTAG", options, &results, &unique_urls);
- EXPECT_FALSE(has_more_results);
-}
-
-// Make sure that max_count works.
-TEST_F(TextDatabaseTest, MaxCount) {
- // Make a database with some pages.
- const int kIdee1 = 200801;
- scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true));
- ASSERT_TRUE(!!db.get());
- AddAllTestData(db.get());
-
- // Set up the query to return all the results with "Google" (should be 2), but
- // with a maximum of 1.
- QueryOptions options;
- options.begin_time = Time::FromInternalValue(kTime1);
- options.end_time = Time::FromInternalValue(kTime3 + 1);
- options.max_count = 1;
-
- std::vector<TextDatabase::Match> results;
- TextDatabase::URLSet unique_urls;
- db->GetTextMatches("google", options, &results, &unique_urls);
- EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs";
-
- // There should be one result, the most recent one.
- EXPECT_EQ(1U, results.size());
- EXPECT_TRUE(ResultsHaveURL(results, kURL2));
- EXPECT_EQ(kTime2, results.back().time.ToInternalValue());
-}
-
-} // namespace history