diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-09-20 18:42:50 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-09-20 18:42:50 +0000 |
commit | 33abbda30215a6521987a221b8991eff32ad3284 (patch) | |
tree | 519464726ef776be57727f98d42666636cd958fa /chrome/browser/history | |
parent | 560cfc359cbaad9af3100542fe5b83fefe78f83f (diff) | |
download | chromium_src-33abbda30215a6521987a221b8991eff32ad3284.zip chromium_src-33abbda30215a6521987a221b8991eff32ad3284.tar.gz chromium_src-33abbda30215a6521987a221b8991eff32ad3284.tar.bz2 |
TextDatabase contained an unexpected bit of needed functionality.
https://chromiumcodereview.appspot.com/16951015 removed TextDatabase
from history service. TextDatabase was calling a HistoryPublisher
routine which was still in use.
BUG=294306
R=cevans@chromium.org, rmcilroy@chromium.org, sky@chromium.org
Review URL: https://codereview.chromium.org/23437047
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@224441 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/history')
-rw-r--r-- | chrome/browser/history/history_backend.cc | 34 | ||||
-rw-r--r-- | chrome/browser/history/history_backend.h | 9 | ||||
-rw-r--r-- | chrome/browser/history/history_backend_unittest.cc | 20 | ||||
-rw-r--r-- | chrome/browser/history/history_publisher.h | 4 | ||||
-rw-r--r-- | chrome/browser/history/history_service.cc | 10 | ||||
-rw-r--r-- | chrome/browser/history/history_service.h | 7 | ||||
-rw-r--r-- | chrome/browser/history/history_tab_helper.cc | 29 | ||||
-rw-r--r-- | chrome/browser/history/history_tab_helper.h | 5 | ||||
-rw-r--r-- | chrome/browser/history/page_collector.cc | 157 | ||||
-rw-r--r-- | chrome/browser/history/page_collector.h | 123 | ||||
-rw-r--r-- | chrome/browser/history/text_database_unittest.cc | 306 |
11 files changed, 377 insertions, 327 deletions
diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc index 3913d1d..07f18ed 100644 --- a/chrome/browser/history/history_backend.cc +++ b/chrome/browser/history/history_backend.cc @@ -32,6 +32,7 @@ #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/history_publisher.h" #include "chrome/browser/history/in_memory_history_backend.h" +#include "chrome/browser/history/page_collector.h" #include "chrome/browser/history/page_usage_data.h" #include "chrome/browser/history/select_favicon_frames.h" #include "chrome/browser/history/top_sites.h" @@ -550,6 +551,9 @@ void HistoryBackend::AddPage(const HistoryAddPageArgs& request) { last_ids.second); } + if (page_collector_) + page_collector_->AddPageURL(request.url, request.time); + ScheduleCommit(); } @@ -611,6 +615,9 @@ void HistoryBackend::InitImpl(const std::string& languages) { // Create the history publisher which needs to be passed on to the thumbnail // database for publishing history. + // TODO(shess): HistoryPublisher is being deprecated. I am still + // trying to track down who depends on it, meanwhile talk to me + // before removing interactions with it. http://crbug.com/294306 history_publisher_.reset(new HistoryPublisher()); if (!history_publisher_->Init()) { // The init may fail when there are no indexers wanting our history. @@ -618,6 +625,12 @@ void HistoryBackend::InitImpl(const std::string& languages) { history_publisher_.reset(); } + // Collects page data for history_publisher_. + if (history_publisher_.get()) { + page_collector_.reset(new PageCollector()); + page_collector_->Init(history_publisher_.get()); + } + // Thumbnail database. // TODO(shess): "thumbnail database" these days only stores // favicons. Thumbnails are stored in "top sites". Consider @@ -665,8 +678,7 @@ void HistoryBackend::InitImpl(const std::string& languages) { // *sigh*, this can all be cleaned up when that migration code is removed. // The main DB initialization should intuitively be first (not that it // actually matters) and the expirer should be set last. - expirer_.SetDatabases(db_.get(), archived_db_.get(), - thumbnail_db_.get()); + expirer_.SetDatabases(db_.get(), archived_db_.get(), thumbnail_db_.get()); // Open the long-running transaction. db_->BeginTransaction(); @@ -853,6 +865,12 @@ void HistoryBackend::AddPagesWithDetails(const URLRows& urls, } } + // TODO(shess): I'm not sure this case needs to exist anymore. + if (page_collector_) { + page_collector_->AddPageData(i->url(), i->last_visit(), + i->title(), string16()); + } + // Sync code manages the visits itself. if (visit_source != SOURCE_SYNCED) { // Make up a visit to correspond to the last visit to the page. @@ -890,11 +908,13 @@ bool HistoryBackend::IsExpiredVisitTime(const base::Time& time) { return time < expirer_.GetCurrentArchiveTime(); } -void HistoryBackend::SetPageTitle(const GURL& url, - const string16& title) { +void HistoryBackend::SetPageTitle(const GURL& url, const string16& title) { if (!db_) return; + if (page_collector_) + page_collector_->AddPageTitle(url, title); + // Search for recent redirects which should get the same title. We make a // dummy list containing the exact URL visited if there are no redirects so // the processing below can be the same. @@ -1665,6 +1685,12 @@ void HistoryBackend::DeleteFTSIndexDatabases() { num_databases_deleted); } +void HistoryBackend::SetPageContents(const GURL& url, + const string16& contents) { + if (page_collector_) + page_collector_->AddPageContents(url, contents); +} + void HistoryBackend::GetFavicons( const std::vector<GURL>& icon_urls, int icon_types, diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h index de64e5c..f3bbf4d 100644 --- a/chrome/browser/history/history_backend.h +++ b/chrome/browser/history/history_backend.h @@ -38,6 +38,7 @@ class AndroidProviderBackend; class CommitLaterTask; class HistoryPublisher; +class PageCollector; class VisitFilter; struct DownloadRow; @@ -158,6 +159,11 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, const GURL& url, base::Time end_ts); + + // Indexing ------------------------------------------------------------------ + + void SetPageContents(const GURL& url, const string16& contents); + // Querying ------------------------------------------------------------------ // ScheduleAutocomplete() never frees |provider| (which is globally live). @@ -833,6 +839,9 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // Stores old history in a larger, slower database. scoped_ptr<ArchivedDatabase> archived_db_; + // Helper to collect page data for vending to history_publisher_. + scoped_ptr<PageCollector> page_collector_; + // Manages expiration between the various databases. ExpireHistoryBackend expirer_; diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc index a666cf1..04af2b4 100644 --- a/chrome/browser/history/history_backend_unittest.cc +++ b/chrome/browser/history/history_backend_unittest.cc @@ -7,7 +7,6 @@ #include <algorithm> #include <set> #include <vector> -#include <fstream> #include "base/basictypes.h" #include "base/bind.h" @@ -2745,15 +2744,6 @@ TEST_F(HistoryBackendTest, RemoveNotification) { service->DeleteURL(url); } -// Simple function to create a new dummy file. -void CreateDummyFile(const base::FilePath& filename) { - std::wofstream file; - file.open(filename.value().c_str()); - ASSERT_TRUE(file.is_open()); - file << L"Dummy"; - file.close(); -} - // Test DeleteFTSIndexDatabases deletes expected files. TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) { ASSERT_TRUE(backend_.get()); @@ -2766,10 +2756,12 @@ TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) { base::FilePath db2_actual(history_path.AppendASCII("Underlying DB")); // Setup dummy index database files. - CreateDummyFile(db1); - CreateDummyFile(db1_journal); - CreateDummyFile(db1_wal); - CreateDummyFile(db2_actual); + const char* data = "Dummy"; + const size_t data_len = 5; + ASSERT_TRUE(file_util::WriteFile(db1, data, data_len)); + ASSERT_TRUE(file_util::WriteFile(db1_journal, data, data_len)); + ASSERT_TRUE(file_util::WriteFile(db1_wal, data, data_len)); + ASSERT_TRUE(file_util::WriteFile(db2_actual, data, data_len)); #if defined(OS_POSIX) EXPECT_TRUE(file_util::CreateSymbolicLink(db2_actual, db2_symlink)); #endif diff --git a/chrome/browser/history/history_publisher.h b/chrome/browser/history/history_publisher.h index 2f16655..a3538b4 100644 --- a/chrome/browser/history/history_publisher.h +++ b/chrome/browser/history/history_publisher.h @@ -16,6 +16,10 @@ #include "history_indexer.h" #endif +// TODO(shess): HistoryPublisher is being deprecated. I am still +// trying to track down who depends on it, meanwhile talk to me +// before removing interactions with it. + class GURL; namespace base { diff --git a/chrome/browser/history/history_service.cc b/chrome/browser/history/history_service.cc index dc28c42..aee38c8 100644 --- a/chrome/browser/history/history_service.cc +++ b/chrome/browser/history/history_service.cc @@ -584,6 +584,16 @@ void HistoryService::AddPagesWithDetails(const history::URLRows& info, &HistoryBackend::AddPagesWithDetails, info, visit_source); } +void HistoryService::SetPageContents(const GURL& url, + const string16& contents) { + DCHECK(thread_checker_.CalledOnValidThread()); + if (!CanAddURL(url)) + return; + + ScheduleAndForget(PRIORITY_LOW, &HistoryBackend::SetPageContents, + url, contents); +} + CancelableTaskTracker::TaskId HistoryService::GetFavicons( const std::vector<GURL>& icon_urls, int icon_types, diff --git a/chrome/browser/history/history_service.h b/chrome/browser/history/history_service.h index f8d383b..6891f38 100644 --- a/chrome/browser/history/history_service.h +++ b/chrome/browser/history/history_service.h @@ -236,6 +236,13 @@ class HistoryService : public CancelableRequestProvider, const GURL& url, base::Time end_ts); + // Indexing ------------------------------------------------------------------ + + // Notifies history of the body text of the given recently-visited URL. + // If the URL was not visited "recently enough," the history system may + // discard it. + void SetPageContents(const GURL& url, const string16& contents); + // Querying ------------------------------------------------------------------ // Returns the information about the requested URL. If the URL is found, diff --git a/chrome/browser/history/history_tab_helper.cc b/chrome/browser/history/history_tab_helper.cc index adbd892..4fc97c6 100644 --- a/chrome/browser/history/history_tab_helper.cc +++ b/chrome/browser/history/history_tab_helper.cc @@ -84,6 +84,16 @@ HistoryTabHelper::CreateHistoryAddPageArgs( return add_page_args; } +bool HistoryTabHelper::OnMessageReceived(const IPC::Message& message) { + bool handled = true; + IPC_BEGIN_MESSAGE_MAP(HistoryTabHelper, message) + IPC_MESSAGE_HANDLER(ChromeViewHostMsg_PageContents, OnPageContents) + IPC_MESSAGE_UNHANDLED(handled = false) + IPC_END_MESSAGE_MAP() + + return handled; +} + void HistoryTabHelper::DidNavigateMainFrame( const content::LoadCommittedDetails& details, const content::FrameNavigateParams& params) { @@ -155,6 +165,25 @@ void HistoryTabHelper::Observe(int type, } } +void HistoryTabHelper::OnPageContents(const GURL& url, + const string16& contents) { + // Don't index any https pages. People generally don't want their bank + // accounts, etc. indexed on their computer, especially since some of these + // things are not marked cachable. + // TODO(brettw) we may want to consider more elaborate heuristics such as + // the cachability of the page. We may also want to consider subframes (this + // test will still index subframes if the subframe is SSL). + // TODO(zelidrag) bug chromium-os:2808 - figure out if we want to reenable + // content indexing for chromeos in some future releases. +#if !defined(OS_CHROMEOS) + if (!url.SchemeIsSecure()) { + HistoryService* hs = GetHistoryService(); + if (hs) + hs->SetPageContents(url, contents); + } +#endif +} + HistoryService* HistoryTabHelper::GetHistoryService() { Profile* profile = Profile::FromBrowserContext(web_contents()->GetBrowserContext()); diff --git a/chrome/browser/history/history_tab_helper.h b/chrome/browser/history/history_tab_helper.h index 2f613fa..65b03c3 100644 --- a/chrome/browser/history/history_tab_helper.h +++ b/chrome/browser/history/history_tab_helper.h @@ -46,6 +46,7 @@ class HistoryTabHelper : public content::WebContentsObserver, friend class content::WebContentsUserData<HistoryTabHelper>; // content::WebContentsObserver implementation. + virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE; virtual void DidNavigateMainFrame( const content::LoadCommittedDetails& details, const content::FrameNavigateParams& params) OVERRIDE; @@ -59,9 +60,7 @@ class HistoryTabHelper : public content::WebContentsObserver, const content::NotificationSource& source, const content::NotificationDetails& details) OVERRIDE; - void OnPageContents(const GURL& url, - int32 page_id, - const string16& contents); + void OnPageContents(const GURL& url, const string16& contents); // Helper function to return the history service. May return NULL. HistoryService* GetHistoryService(); diff --git a/chrome/browser/history/page_collector.cc b/chrome/browser/history/page_collector.cc new file mode 100644 index 0000000..da07ad1 --- /dev/null +++ b/chrome/browser/history/page_collector.cc @@ -0,0 +1,157 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/page_collector.h" + +#include "base/bind.h" +#include "base/message_loop/message_loop.h" +#include "base/strings/utf_string_conversions.h" +#include "chrome/browser/history/history_publisher.h" +#include "url/gurl.h" + +namespace { + +// Page info older than this will be published even if we haven't +// gotten a title and/or body. +const int kExpirationSeconds = 20; + +} // namespace + +namespace history { + +// PageCollector::PageInfo ----------------------------------------------- + +PageCollector::PageInfo::PageInfo(base::Time visit_time) + : visit_time_(visit_time), + added_time_(base::TimeTicks::Now()) { +} + +PageCollector::PageInfo::~PageInfo() {} + +// NOTE(shess): Per the comment on has_title() and has_body(), this +// code maps empty strings to single space to differentiate set title +// and body from empty. This approach is held over from the original +// TextDatabaseManager version. +void PageCollector::PageInfo::set_title(const string16& ttl) { + if (ttl.empty()) + title_ = ASCIIToUTF16(" "); + else + title_ = ttl; +} + +void PageCollector::PageInfo::set_body(const string16& bdy) { + if (bdy.empty()) + body_ = ASCIIToUTF16(" "); + else + body_ = bdy; +} + +bool PageCollector::PageInfo::Expired(base::TimeTicks now) const { + return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds); +} + +PageCollector::PageCollector() + : recent_changes_(RecentChangeList::NO_AUTO_EVICT), + weak_factory_(this) { +} + +PageCollector::~PageCollector() { +} + +void PageCollector::Init(const HistoryPublisher* history_publisher) { + history_publisher_ = history_publisher; +} + +void PageCollector::AddPageURL(const GURL& url, base::Time time) { + // Don't collect data which cannot be published. + if (!history_publisher_) + return; + + // Just save this info for later (evicting any previous data). We + // will delete it when it expires or when all the data is complete. + recent_changes_.Put(url, PageInfo(time)); + + // Schedule flush if not already scheduled. + if (!weak_factory_.HasWeakPtrs()) + ScheduleFlushCollected(); +} + +void PageCollector::AddPageTitle(const GURL& url, const string16& title) { + if (!history_publisher_) + return; + + // If the title comes in after the page has aged out, drop it. + // Older code would manufacture information from the database. + RecentChangeList::iterator found = recent_changes_.Peek(url); + if (found == recent_changes_.end()) + return; + + // Publish the info if complete. + if (found->second.has_body()) { + history_publisher_->PublishPageContent( + found->second.visit_time(), url, title, found->second.body()); + recent_changes_.Erase(found); + } else { + found->second.set_title(title); + } +} + +void PageCollector::AddPageContents(const GURL& url, + const string16& body) { + if (!history_publisher_) + return; + + // If the body comes in after the page has aged out, drop it. + // Older code would manufacture information from the database. + RecentChangeList::iterator found = recent_changes_.Peek(url); + if (found == recent_changes_.end()) + return; + + // Publish the info if complete. + if (found->second.has_title()) { + history_publisher_->PublishPageContent( + found->second.visit_time(), url, found->second.title(), body); + recent_changes_.Erase(found); + } else { + found->second.set_body(body); + } +} + +void PageCollector::AddPageData(const GURL& url, + base::Time visit_time, + const string16& title, + const string16& body) { + if (!history_publisher_) + return; + + // Publish the item. + history_publisher_->PublishPageContent(visit_time, url, title, body); +} + +void PageCollector::ScheduleFlushCollected() { + weak_factory_.InvalidateWeakPtrs(); + base::MessageLoop::current()->PostDelayedTask( + FROM_HERE, + base::Bind(&PageCollector::FlushCollected, + weak_factory_.GetWeakPtr()), + base::TimeDelta::FromSeconds(kExpirationSeconds)); +} + +void PageCollector::FlushCollected() { + base::TimeTicks now = base::TimeTicks::Now(); + + // Iterate from oldest to newest publishing items which expire while + // waiting for title or body. + RecentChangeList::reverse_iterator iter = recent_changes_.rbegin(); + while (iter != recent_changes_.rend() && iter->second.Expired(now)) { + AddPageData(iter->first, iter->second.visit_time(), + iter->second.title(), iter->second.body()); + iter = recent_changes_.Erase(iter); + } + + if (!recent_changes_.empty()) + ScheduleFlushCollected(); +} + +} // namespace history diff --git a/chrome/browser/history/page_collector.h b/chrome/browser/history/page_collector.h new file mode 100644 index 0000000..c044a3b --- /dev/null +++ b/chrome/browser/history/page_collector.h @@ -0,0 +1,123 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ +#define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ + +#include "base/basictypes.h" +#include "base/containers/mru_cache.h" +#include "base/memory/weak_ptr.h" +#include "base/strings/string16.h" +#include "base/time/time.h" + +class GURL; + +namespace history { + +class HistoryPublisher; + +// Collect page data and publish to HistoryPublisher. +class PageCollector { + public: + // You must call Init() to complete initialization. + PageCollector(); + ~PageCollector(); + + // Must call before using other functions. + void Init(const HistoryPublisher* history_publisher); + + // Sets specific information for the given page to be published. + // In normal operation, URLs will be added as the user visits them, the titles + // and bodies will come in some time after that. These changes will be + // automatically coalesced and added to the database some time in the future + // using AddPageData(). + // + // AddPageURL must be called for a given URL before either the title + // or body set. The visit time should be the time corresponding to + // that visit in the history database. + void AddPageURL(const GURL& url, base::Time visit_time); + void AddPageTitle(const GURL& url, const string16& title); + void AddPageContents(const GURL& url, const string16& body); + + void AddPageData(const GURL& url, + base::Time visit_time, + const string16& title, + const string16& body); + + private: + // Stores "recent stuff" that has happened with the page, since the page + // visit, title, and body all come in at different times. + class PageInfo { + public: + explicit PageInfo(base::Time visit_time); + ~PageInfo(); + + // Getters. + base::Time visit_time() const { return visit_time_; } + const string16& title() const { return title_; } + const string16& body() const { return body_; } + + // Setters, we can only update the title and body. + void set_title(const string16& ttl); + void set_body(const string16& bdy); + + // Returns true if both the title or body of the entry has been set. Since + // both the title and body setters will "fix" empty strings to be a space, + // these indicate if the setter was ever called. + bool has_title() const { return !title_.empty(); } + bool has_body() const { return !body_.empty(); } + + // Returns true if this entry was added too long ago and we should give up + // waiting for more data. The current time is passed in as an argument so we + // can check many without re-querying the timer. + bool Expired(base::TimeTicks now) const; + + private: + // Time of the visit of the URL. This will be the value stored in the URL + // and visit tables for the entry. + base::Time visit_time_; + + // When this page entry was created. We have a cap on the maximum time that + // an entry will be in the queue before being flushed to the database. + base::TimeTicks added_time_; + + // Will be the string " " when they are set to distinguish set and unset. + string16 title_; + string16 body_; + }; + + // Collected data is published when both the title and body are + // present. https data is never passed to AddPageContents(), so + // periodically collected data is published without the contents. + // Pages which take a long time to load will not have their bodies + // published. + void ScheduleFlushCollected(); + void FlushCollected(); + + // Lists recent additions that we have not yet filled out with the title and + // body. Sorted by time, we will flush them when they are complete or have + // been in the queue too long without modification. + // + // We kind of abuse the MRUCache because we never move things around in it + // using Get. Instead, we keep them in the order they were inserted, since + // this is the metric we use to measure age. The MRUCache gives us an ordered + // list with fast lookup by URL. + typedef base::MRUCache<GURL, PageInfo> RecentChangeList; + RecentChangeList recent_changes_; + + // Generates tasks for our periodic checking of expired "recent changes". + base::WeakPtrFactory<PageCollector> weak_factory_; + + // This object is created and managed by the history backend. We maintain an + // opaque pointer to the object for our use. + // This can be NULL if there are no indexers registered to receive indexing + // data from us. + const HistoryPublisher* history_publisher_; + + DISALLOW_COPY_AND_ASSIGN(PageCollector); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ diff --git a/chrome/browser/history/text_database_unittest.cc b/chrome/browser/history/text_database_unittest.cc deleted file mode 100644 index 25b7d79..0000000 --- a/chrome/browser/history/text_database_unittest.cc +++ /dev/null @@ -1,306 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <string> - -#include "base/files/scoped_temp_dir.h" -#include "base/memory/scoped_ptr.h" -#include "base/strings/string_util.h" -#include "base/strings/utf_string_conversions.h" -#include "chrome/browser/history/text_database.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "testing/platform_test.h" - -using base::Time; - -namespace history { - -namespace { - -// Note that all pages have "COUNTTAG" which allows us to count the number of -// pages in the database withoujt adding any extra functions to the DB object. -const char kURL1[] = "http://www.google.com/"; -const int kTime1 = 1000; -const char kTitle1[] = "Google"; -const char kBody1[] = - "COUNTTAG Web Images Maps News Shopping Gmail more My Account | " - "Sign out Advanced Search Preferences Language Tools Advertising Programs " - "- Business Solutions - About Google, 2008 Google"; - -const char kURL2[] = "http://images.google.com/"; -const int kTime2 = 2000; -const char kTitle2[] = "Google Image Search"; -const char kBody2[] = - "COUNTTAG Web Images Maps News Shopping Gmail more My Account | " - "Sign out Advanced Image Search Preferences The most comprehensive image " - "search on the web. Want to help improve Google Image Search? Try Google " - "Image Labeler. Advertising Programs - Business Solutions - About Google " - "2008 Google"; - -const char kURL3[] = "http://slashdot.org/"; -const int kTime3 = 3000; -const char kTitle3[] = "Slashdot: News for nerds, stuff that matters"; -const char kBody3[] = - "COUNTTAG Slashdot Log In Create Account Subscribe Firehose Why " - "Log In? Why Subscribe? Nickname Password Public Terminal Sections " - "Main Apple AskSlashdot Backslash Books Developers Games Hardware " - "Interviews IT Linux Mobile Politics Science YRO"; - -// Returns the number of rows currently in the database. -int RowCount(TextDatabase* db) { - QueryOptions options; - options.begin_time = Time::FromInternalValue(0); - // Leave end_time at now. - - std::vector<TextDatabase::Match> results; - TextDatabase::URLSet unique_urls; - db->GetTextMatches("COUNTTAG", options, &results, &unique_urls); - return static_cast<int>(results.size()); -} - -// Adds each of the test pages to the database. -void AddAllTestData(TextDatabase* db) { - EXPECT_TRUE(db->AddPageData( - Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1)); - EXPECT_TRUE(db->AddPageData( - Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2)); - EXPECT_TRUE(db->AddPageData( - Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3)); - EXPECT_EQ(3, RowCount(db)); -} - -bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results, - const char* url) { - GURL gurl(url); - for (size_t i = 0; i < results.size(); i++) { - if (results[i].url == gurl) - return true; - } - return false; -} - -} // namespace - -class TextDatabaseTest : public PlatformTest { - public: - TextDatabaseTest() {} - - protected: - virtual void SetUp() { - PlatformTest::SetUp(); - ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); - } - - // Create databases with this function, which will ensure that the files are - // deleted on shutdown. Only open one database for each file. Returns NULL on - // failure. - // - // Set |delete_file| to delete any existing file. If we are trying to create - // the file for the first time, we don't want a previous test left in a - // weird state to have left a file that would affect us. - TextDatabase* CreateDB(TextDatabase::DBIdent id, - bool allow_create, - bool delete_file) { - TextDatabase* db = new TextDatabase(temp_dir_.path(), id, allow_create); - - if (delete_file) - sql::Connection::Delete(db->file_name()); - - if (!db->Init()) { - delete db; - return NULL; - } - return db; - } - - // Directory containing the databases. - base::ScopedTempDir temp_dir_; - - // Name of the main database file. - base::FilePath file_name_; -}; - -TEST_F(TextDatabaseTest, AttachDetach) { - // First database with one page. - const int kIdee1 = 200801; - scoped_ptr<TextDatabase> db1(CreateDB(kIdee1, true, true)); - ASSERT_TRUE(!!db1.get()); - EXPECT_TRUE(db1->AddPageData( - Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1)); - - // Second database with one page. - const int kIdee2 = 200802; - scoped_ptr<TextDatabase> db2(CreateDB(kIdee2, true, true)); - ASSERT_TRUE(!!db2.get()); - EXPECT_TRUE(db2->AddPageData( - Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2)); - - // Detach, then reattach database one. The file should exist, so we force - // opening an existing file. - db1.reset(); - db1.reset(CreateDB(kIdee1, false, false)); - ASSERT_TRUE(!!db1.get()); - - // We should not be able to attach this random database for which no file - // exists. - const int kIdeeNoExisto = 999999999; - scoped_ptr<TextDatabase> db3(CreateDB(kIdeeNoExisto, false, true)); - EXPECT_FALSE(!!db3.get()); -} - -TEST_F(TextDatabaseTest, AddRemove) { - // Create a database and add some pages to it. - const int kIdee1 = 200801; - scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); - ASSERT_TRUE(!!db.get()); - URLID id1 = db->AddPageData( - Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1); - EXPECT_NE(0, id1); - URLID id2 = db->AddPageData( - Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2); - EXPECT_NE(0, id2); - URLID id3 = db->AddPageData( - Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3); - EXPECT_NE(0, id3); - EXPECT_EQ(3, RowCount(db.get())); - - // Make sure we can delete some of the data. - db->DeletePageData(Time::FromInternalValue(kTime1), kURL1); - EXPECT_EQ(2, RowCount(db.get())); - - // Close and reopen. - db.reset(new TextDatabase(temp_dir_.path(), kIdee1, false)); - EXPECT_TRUE(db->Init()); - - // Verify that the deleted ID is gone and try to delete another one. - EXPECT_EQ(2, RowCount(db.get())); - db->DeletePageData(Time::FromInternalValue(kTime2), kURL2); - EXPECT_EQ(1, RowCount(db.get())); -} - -TEST_F(TextDatabaseTest, Query) { - // Make a database with some pages. - const int kIdee1 = 200801; - scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); - EXPECT_TRUE(!!db.get()); - AddAllTestData(db.get()); - - // Get all the results. - QueryOptions options; - options.begin_time = Time::FromInternalValue(0); - - std::vector<TextDatabase::Match> results; - TextDatabase::URLSet unique_urls; - db->GetTextMatches("COUNTTAG", options, &results, &unique_urls); - EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; - - // All 3 sites should be returned in order. - ASSERT_EQ(3U, results.size()); - EXPECT_EQ(GURL(kURL1), results[2].url); - EXPECT_EQ(GURL(kURL2), results[1].url); - EXPECT_EQ(GURL(kURL3), results[0].url); - - // Verify the info on those results. - EXPECT_TRUE(Time::FromInternalValue(kTime1) == results[2].time); - EXPECT_TRUE(Time::FromInternalValue(kTime2) == results[1].time); - EXPECT_TRUE(Time::FromInternalValue(kTime3) == results[0].time); - - EXPECT_EQ(std::string(kTitle1), UTF16ToUTF8(results[2].title)); - EXPECT_EQ(std::string(kTitle2), UTF16ToUTF8(results[1].title)); - EXPECT_EQ(std::string(kTitle3), UTF16ToUTF8(results[0].title)); - - // Should have no matches in the title. - EXPECT_EQ(0U, results[0].title_match_positions.size()); - EXPECT_EQ(0U, results[1].title_match_positions.size()); - EXPECT_EQ(0U, results[2].title_match_positions.size()); - - // We don't want to be dependent on the exact snippet algorithm, but we know - // since we searched for "COUNTTAG" which occurs at the beginning of each - // document, that each snippet should start with that. - EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[0].snippet.text()), - "COUNTTAG", false)); - EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[1].snippet.text()), - "COUNTTAG", false)); - EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[2].snippet.text()), - "COUNTTAG", false)); -} - -TEST_F(TextDatabaseTest, TimeRange) { - // Make a database with some pages. - const int kIdee1 = 200801; - scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); - ASSERT_TRUE(!!db.get()); - AddAllTestData(db.get()); - - // Beginning should be inclusive, and the ending exclusive. - // Get all the results. - QueryOptions options; - options.begin_time = Time::FromInternalValue(kTime1); - options.end_time = Time::FromInternalValue(kTime3); - - std::vector<TextDatabase::Match> results; - TextDatabase::URLSet unique_urls; - bool has_more_results = db->GetTextMatches( - "COUNTTAG", options, &results, &unique_urls); - EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; - - // The first and second should have been returned. - EXPECT_EQ(2U, results.size()); - EXPECT_TRUE(ResultsHaveURL(results, kURL1)); - EXPECT_TRUE(ResultsHaveURL(results, kURL2)); - EXPECT_FALSE(ResultsHaveURL(results, kURL3)); - EXPECT_EQ(kTime1, results.back().time.ToInternalValue()); - EXPECT_FALSE(has_more_results); - - // Do a query where there isn't a result on the begin boundary. - options.begin_time = Time::FromInternalValue((kTime2 - kTime1) / 2 + kTime1); - options.end_time = Time::FromInternalValue(kTime3 + 1); - results.clear(); // GetTextMatches does *not* clear the results. - has_more_results = db->GetTextMatches( - "COUNTTAG", options, &results, &unique_urls); - EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; - EXPECT_FALSE(has_more_results); - - // Should have two results, the second and third. - EXPECT_EQ(2U, results.size()); - EXPECT_FALSE(ResultsHaveURL(results, kURL1)); - EXPECT_TRUE(ResultsHaveURL(results, kURL2)); - EXPECT_TRUE(ResultsHaveURL(results, kURL3)); - - // Try a range that has no results. - options.begin_time = Time::FromInternalValue(kTime3 + 1); - options.end_time = Time::FromInternalValue(kTime3 * 100); - results.clear(); - has_more_results = db->GetTextMatches( - "COUNTTAG", options, &results, &unique_urls); - EXPECT_FALSE(has_more_results); -} - -// Make sure that max_count works. -TEST_F(TextDatabaseTest, MaxCount) { - // Make a database with some pages. - const int kIdee1 = 200801; - scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); - ASSERT_TRUE(!!db.get()); - AddAllTestData(db.get()); - - // Set up the query to return all the results with "Google" (should be 2), but - // with a maximum of 1. - QueryOptions options; - options.begin_time = Time::FromInternalValue(kTime1); - options.end_time = Time::FromInternalValue(kTime3 + 1); - options.max_count = 1; - - std::vector<TextDatabase::Match> results; - TextDatabase::URLSet unique_urls; - db->GetTextMatches("google", options, &results, &unique_urls); - EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; - - // There should be one result, the most recent one. - EXPECT_EQ(1U, results.size()); - EXPECT_TRUE(ResultsHaveURL(results, kURL2)); - EXPECT_EQ(kTime2, results.back().time.ToInternalValue()); -} - -} // namespace history |