diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-09-23 19:22:32 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-09-23 19:22:32 +0000 |
commit | 281f2d690bea2d7560323a94d9461c0ce68d0057 (patch) | |
tree | 072f1925f22e5905aa04f26c074ff06c760e8f3f | |
parent | eecf8e2088e3adc5e6c65db2b8d72365c3a369a8 (diff) | |
download | chromium_src-281f2d690bea2d7560323a94d9461c0ce68d0057.zip chromium_src-281f2d690bea2d7560323a94d9461c0ce68d0057.tar.gz chromium_src-281f2d690bea2d7560323a94d9461c0ce68d0057.tar.bz2 |
Revert 224746 "Merge 224441 "TextDatabase contained an unexpecte..."
A bad merge resolution broke the build. Reverting while I fix it.
> Merge 224441 "TextDatabase contained an unexpected bit of needed..."
>
> > TextDatabase contained an unexpected bit of needed functionality.
> >
> > https://chromiumcodereview.appspot.com/16951015 removed TextDatabase
> > from history service. TextDatabase was calling a HistoryPublisher
> > routine which was still in use.
> >
> > BUG=294306
> > R=cevans@chromium.org, rmcilroy@chromium.org, sky@chromium.org
> >
> > Review URL: https://codereview.chromium.org/23437047
>
> TBR=shess@chromium.org
>
> Review URL: https://codereview.chromium.org/24360006
TBR=shess@chromium.org
Review URL: https://codereview.chromium.org/24391002
git-svn-id: svn://svn.chromium.org/chrome/branches/1599/src@224764 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/history/history_backend.cc | 61 | ||||
-rw-r--r-- | chrome/browser/history/history_backend.h | 9 | ||||
-rw-r--r-- | chrome/browser/history/history_backend_unittest.cc | 20 | ||||
-rw-r--r-- | chrome/browser/history/history_publisher.h | 4 | ||||
-rw-r--r-- | chrome/browser/history/history_service.cc | 13 | ||||
-rw-r--r-- | chrome/browser/history/history_service.h | 7 | ||||
-rw-r--r-- | chrome/browser/history/history_tab_helper.cc | 29 | ||||
-rw-r--r-- | chrome/browser/history/history_tab_helper.h | 5 | ||||
-rw-r--r-- | chrome/browser/history/page_collector.cc | 157 | ||||
-rw-r--r-- | chrome/browser/history/page_collector.h | 123 | ||||
-rw-r--r-- | chrome/browser/history/text_database_unittest.cc | 306 | ||||
-rw-r--r-- | chrome/chrome_browser.gypi | 2 | ||||
-rw-r--r-- | chrome/common/render_messages.h | 5 | ||||
-rw-r--r-- | chrome/renderer/chrome_render_view_observer.cc | 6 |
14 files changed, 362 insertions, 385 deletions
diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc index 13e1394..4e69a4b 100644 --- a/chrome/browser/history/history_backend.cc +++ b/chrome/browser/history/history_backend.cc @@ -32,7 +32,6 @@ #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/history_publisher.h" #include "chrome/browser/history/in_memory_history_backend.h" -#include "chrome/browser/history/page_collector.h" #include "chrome/browser/history/page_usage_data.h" #include "chrome/browser/history/select_favicon_frames.h" #include "chrome/browser/history/top_sites.h" @@ -551,9 +550,6 @@ void HistoryBackend::AddPage(const HistoryAddPageArgs& request) { last_ids.second); } - if (page_collector_) - page_collector_->AddPageURL(request.url, request.time); - ScheduleCommit(); } @@ -615,9 +611,6 @@ void HistoryBackend::InitImpl(const std::string& languages) { // Create the history publisher which needs to be passed on to the thumbnail // database for publishing history. - // TODO(shess): HistoryPublisher is being deprecated. I am still - // trying to track down who depends on it, meanwhile talk to me - // before removing interactions with it. http://crbug.com/294306 history_publisher_.reset(new HistoryPublisher()); if (!history_publisher_->Init()) { // The init may fail when there are no indexers wanting our history. @@ -625,12 +618,6 @@ void HistoryBackend::InitImpl(const std::string& languages) { history_publisher_.reset(); } - // Collects page data for history_publisher_. - if (history_publisher_.get()) { - page_collector_.reset(new PageCollector()); - page_collector_->Init(history_publisher_.get()); - } - // Thumbnail database. thumbnail_db_.reset(new ThumbnailDatabase()); if (!db_->GetNeedsThumbnailMigration()) { @@ -683,7 +670,8 @@ void HistoryBackend::InitImpl(const std::string& languages) { // *sigh*, this can all be cleaned up when that migration code is removed. // The main DB initialization should intuitively be first (not that it // actually matters) and the expirer should be set last. - expirer_.SetDatabases(db_.get(), archived_db_.get(), thumbnail_db_.get()); + expirer_.SetDatabases(db_.get(), archived_db_.get(), + thumbnail_db_.get()); // Open the long-running transaction. db_->BeginTransaction(); @@ -868,12 +856,6 @@ void HistoryBackend::AddPagesWithDetails(const URLRows& urls, } } - // TODO(shess): I'm not sure this case needs to exist anymore. - if (page_collector_) { - page_collector_->AddPageData(i->url(), i->last_visit(), - i->title(), string16()); - } - // Sync code manages the visits itself. if (visit_source != SOURCE_SYNCED) { // Make up a visit to correspond to the last visit to the page. @@ -911,13 +893,11 @@ bool HistoryBackend::IsExpiredVisitTime(const base::Time& time) { return time < expirer_.GetCurrentArchiveTime(); } -void HistoryBackend::SetPageTitle(const GURL& url, const string16& title) { +void HistoryBackend::SetPageTitle(const GURL& url, + const string16& title) { if (!db_) return; - if (page_collector_) - page_collector_->AddPageTitle(url, title); - // Search for recent redirects which should get the same title. We make a // dummy list containing the exact URL visited if there are no redirects so // the processing below can be the same. @@ -1771,10 +1751,35 @@ void HistoryBackend::DeleteFTSIndexDatabases() { num_databases_deleted); } -void HistoryBackend::SetPageContents(const GURL& url, - const string16& contents) { - if (page_collector_) - page_collector_->AddPageContents(url, contents); +bool HistoryBackend::GetThumbnailFromOlderRedirect( + const GURL& page_url, + std::vector<unsigned char>* data) { + // Look at a few previous visit sessions. + VisitVector older_sessions; + URLID page_url_id = db_->GetRowForURL(page_url, NULL); + static const int kVisitsToSearchForThumbnail = 4; + db_->GetMostRecentVisitsForURL( + page_url_id, kVisitsToSearchForThumbnail, &older_sessions); + + // Iterate across all those previous visits, and see if any of the + // final destinations of those redirect chains have a good thumbnail + // for us. + bool success = false; + for (VisitVector::const_iterator it = older_sessions.begin(); + !success && it != older_sessions.end(); ++it) { + history::RedirectList redirects; + if (it->visit_id) { + GetRedirectsFromSpecificVisit(it->visit_id, &redirects); + + if (!redirects.empty()) { + URLID url_id; + if ((url_id = db_->GetRowForURL(redirects.back(), NULL))) + success = thumbnail_db_->GetPageThumbnail(url_id, data); + } + } + } + + return success; } void HistoryBackend::GetFavicons( diff --git a/chrome/browser/history/history_backend.h b/chrome/browser/history/history_backend.h index 53caec3..fd92878 100644 --- a/chrome/browser/history/history_backend.h +++ b/chrome/browser/history/history_backend.h @@ -38,7 +38,6 @@ class AndroidProviderBackend; class CommitLaterTask; class HistoryPublisher; -class PageCollector; class VisitFilter; struct DownloadRow; @@ -162,11 +161,6 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, const GURL& url, base::Time end_ts); - - // Indexing ------------------------------------------------------------------ - - void SetPageContents(const GURL& url, const string16& contents); - // Querying ------------------------------------------------------------------ // ScheduleAutocomplete() never frees |provider| (which is globally live). @@ -876,9 +870,6 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // Stores old history in a larger, slower database. scoped_ptr<ArchivedDatabase> archived_db_; - // Helper to collect page data for vending to history_publisher_. - scoped_ptr<PageCollector> page_collector_; - // Manages expiration between the various databases. ExpireHistoryBackend expirer_; diff --git a/chrome/browser/history/history_backend_unittest.cc b/chrome/browser/history/history_backend_unittest.cc index fb1e242..8bb0111 100644 --- a/chrome/browser/history/history_backend_unittest.cc +++ b/chrome/browser/history/history_backend_unittest.cc @@ -5,6 +5,7 @@ #include <algorithm> #include <set> #include <vector> +#include <fstream> #include "base/basictypes.h" #include "base/bind.h" @@ -2782,6 +2783,15 @@ TEST_F(HistoryBackendTest, RemoveNotification) { service->DeleteURL(url); } +// Simple function to create a new dummy file. +void CreateDummyFile(const base::FilePath& filename) { + std::wofstream file; + file.open(filename.value().c_str()); + ASSERT_TRUE(file.is_open()); + file << L"Dummy"; + file.close(); +} + // Test DeleteFTSIndexDatabases deletes expected files. TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) { ASSERT_TRUE(backend_.get()); @@ -2794,12 +2804,10 @@ TEST_F(HistoryBackendTest, DeleteFTSIndexDatabases) { base::FilePath db2_actual(history_path.AppendASCII("Underlying DB")); // Setup dummy index database files. - const char* data = "Dummy"; - const size_t data_len = 5; - ASSERT_TRUE(file_util::WriteFile(db1, data, data_len)); - ASSERT_TRUE(file_util::WriteFile(db1_journal, data, data_len)); - ASSERT_TRUE(file_util::WriteFile(db1_wal, data, data_len)); - ASSERT_TRUE(file_util::WriteFile(db2_actual, data, data_len)); + CreateDummyFile(db1); + CreateDummyFile(db1_journal); + CreateDummyFile(db1_wal); + CreateDummyFile(db2_actual); #if defined(OS_POSIX) EXPECT_TRUE(file_util::CreateSymbolicLink(db2_actual, db2_symlink)); #endif diff --git a/chrome/browser/history/history_publisher.h b/chrome/browser/history/history_publisher.h index a3538b4..2f16655 100644 --- a/chrome/browser/history/history_publisher.h +++ b/chrome/browser/history/history_publisher.h @@ -16,10 +16,6 @@ #include "history_indexer.h" #endif -// TODO(shess): HistoryPublisher is being deprecated. I am still -// trying to track down who depends on it, meanwhile talk to me -// before removing interactions with it. - class GURL; namespace base { diff --git a/chrome/browser/history/history_service.cc b/chrome/browser/history/history_service.cc index f86e75f..644dc85 100644 --- a/chrome/browser/history/history_service.cc +++ b/chrome/browser/history/history_service.cc @@ -593,14 +593,13 @@ void HistoryService::AddPagesWithDetails(const history::URLRows& info, &HistoryBackend::AddPagesWithDetails, info, visit_source); } -void HistoryService::SetPageContents(const GURL& url, - const string16& contents) { +HistoryService::Handle HistoryService::GetPageThumbnail( + const GURL& page_url, + CancelableRequestConsumerBase* consumer, + const ThumbnailDataCallback& callback) { DCHECK(thread_checker_.CalledOnValidThread()); - if (!CanAddURL(url)) - return; - - ScheduleAndForget(PRIORITY_LOW, &HistoryBackend::SetPageContents, - url, contents); + return Schedule(PRIORITY_NORMAL, &HistoryBackend::GetPageThumbnail, consumer, + new history::GetPageThumbnailRequest(callback), page_url); } CancelableTaskTracker::TaskId HistoryService::GetFavicons( diff --git a/chrome/browser/history/history_service.h b/chrome/browser/history/history_service.h index 060614d..1662571 100644 --- a/chrome/browser/history/history_service.h +++ b/chrome/browser/history/history_service.h @@ -236,13 +236,6 @@ class HistoryService : public CancelableRequestProvider, const GURL& url, base::Time end_ts); - // Indexing ------------------------------------------------------------------ - - // Notifies history of the body text of the given recently-visited URL. - // If the URL was not visited "recently enough," the history system may - // discard it. - void SetPageContents(const GURL& url, const string16& contents); - // Querying ------------------------------------------------------------------ // Returns the information about the requested URL. If the URL is found, diff --git a/chrome/browser/history/history_tab_helper.cc b/chrome/browser/history/history_tab_helper.cc index 8496883..96c545e 100644 --- a/chrome/browser/history/history_tab_helper.cc +++ b/chrome/browser/history/history_tab_helper.cc @@ -81,16 +81,6 @@ HistoryTabHelper::CreateHistoryAddPageArgs( return add_page_args; } -bool HistoryTabHelper::OnMessageReceived(const IPC::Message& message) { - bool handled = true; - IPC_BEGIN_MESSAGE_MAP(HistoryTabHelper, message) - IPC_MESSAGE_HANDLER(ChromeViewHostMsg_PageContents, OnPageContents) - IPC_MESSAGE_UNHANDLED(handled = false) - IPC_END_MESSAGE_MAP() - - return handled; -} - void HistoryTabHelper::DidNavigateMainFrame( const content::LoadCommittedDetails& details, const content::FrameNavigateParams& params) { @@ -154,25 +144,6 @@ void HistoryTabHelper::Observe(int type, } } -void HistoryTabHelper::OnPageContents(const GURL& url, - const string16& contents) { - // Don't index any https pages. People generally don't want their bank - // accounts, etc. indexed on their computer, especially since some of these - // things are not marked cachable. - // TODO(brettw) we may want to consider more elaborate heuristics such as - // the cachability of the page. We may also want to consider subframes (this - // test will still index subframes if the subframe is SSL). - // TODO(zelidrag) bug chromium-os:2808 - figure out if we want to reenable - // content indexing for chromeos in some future releases. -#if !defined(OS_CHROMEOS) - if (!url.SchemeIsSecure()) { - HistoryService* hs = GetHistoryService(); - if (hs) - hs->SetPageContents(url, contents); - } -#endif -} - HistoryService* HistoryTabHelper::GetHistoryService() { Profile* profile = Profile::FromBrowserContext(web_contents()->GetBrowserContext()); diff --git a/chrome/browser/history/history_tab_helper.h b/chrome/browser/history/history_tab_helper.h index 65b03c3..2f613fa 100644 --- a/chrome/browser/history/history_tab_helper.h +++ b/chrome/browser/history/history_tab_helper.h @@ -46,7 +46,6 @@ class HistoryTabHelper : public content::WebContentsObserver, friend class content::WebContentsUserData<HistoryTabHelper>; // content::WebContentsObserver implementation. - virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE; virtual void DidNavigateMainFrame( const content::LoadCommittedDetails& details, const content::FrameNavigateParams& params) OVERRIDE; @@ -60,7 +59,9 @@ class HistoryTabHelper : public content::WebContentsObserver, const content::NotificationSource& source, const content::NotificationDetails& details) OVERRIDE; - void OnPageContents(const GURL& url, const string16& contents); + void OnPageContents(const GURL& url, + int32 page_id, + const string16& contents); // Helper function to return the history service. May return NULL. HistoryService* GetHistoryService(); diff --git a/chrome/browser/history/page_collector.cc b/chrome/browser/history/page_collector.cc deleted file mode 100644 index da07ad1..0000000 --- a/chrome/browser/history/page_collector.cc +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright (c) 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/browser/history/page_collector.h" - -#include "base/bind.h" -#include "base/message_loop/message_loop.h" -#include "base/strings/utf_string_conversions.h" -#include "chrome/browser/history/history_publisher.h" -#include "url/gurl.h" - -namespace { - -// Page info older than this will be published even if we haven't -// gotten a title and/or body. -const int kExpirationSeconds = 20; - -} // namespace - -namespace history { - -// PageCollector::PageInfo ----------------------------------------------- - -PageCollector::PageInfo::PageInfo(base::Time visit_time) - : visit_time_(visit_time), - added_time_(base::TimeTicks::Now()) { -} - -PageCollector::PageInfo::~PageInfo() {} - -// NOTE(shess): Per the comment on has_title() and has_body(), this -// code maps empty strings to single space to differentiate set title -// and body from empty. This approach is held over from the original -// TextDatabaseManager version. -void PageCollector::PageInfo::set_title(const string16& ttl) { - if (ttl.empty()) - title_ = ASCIIToUTF16(" "); - else - title_ = ttl; -} - -void PageCollector::PageInfo::set_body(const string16& bdy) { - if (bdy.empty()) - body_ = ASCIIToUTF16(" "); - else - body_ = bdy; -} - -bool PageCollector::PageInfo::Expired(base::TimeTicks now) const { - return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds); -} - -PageCollector::PageCollector() - : recent_changes_(RecentChangeList::NO_AUTO_EVICT), - weak_factory_(this) { -} - -PageCollector::~PageCollector() { -} - -void PageCollector::Init(const HistoryPublisher* history_publisher) { - history_publisher_ = history_publisher; -} - -void PageCollector::AddPageURL(const GURL& url, base::Time time) { - // Don't collect data which cannot be published. - if (!history_publisher_) - return; - - // Just save this info for later (evicting any previous data). We - // will delete it when it expires or when all the data is complete. - recent_changes_.Put(url, PageInfo(time)); - - // Schedule flush if not already scheduled. - if (!weak_factory_.HasWeakPtrs()) - ScheduleFlushCollected(); -} - -void PageCollector::AddPageTitle(const GURL& url, const string16& title) { - if (!history_publisher_) - return; - - // If the title comes in after the page has aged out, drop it. - // Older code would manufacture information from the database. - RecentChangeList::iterator found = recent_changes_.Peek(url); - if (found == recent_changes_.end()) - return; - - // Publish the info if complete. - if (found->second.has_body()) { - history_publisher_->PublishPageContent( - found->second.visit_time(), url, title, found->second.body()); - recent_changes_.Erase(found); - } else { - found->second.set_title(title); - } -} - -void PageCollector::AddPageContents(const GURL& url, - const string16& body) { - if (!history_publisher_) - return; - - // If the body comes in after the page has aged out, drop it. - // Older code would manufacture information from the database. - RecentChangeList::iterator found = recent_changes_.Peek(url); - if (found == recent_changes_.end()) - return; - - // Publish the info if complete. - if (found->second.has_title()) { - history_publisher_->PublishPageContent( - found->second.visit_time(), url, found->second.title(), body); - recent_changes_.Erase(found); - } else { - found->second.set_body(body); - } -} - -void PageCollector::AddPageData(const GURL& url, - base::Time visit_time, - const string16& title, - const string16& body) { - if (!history_publisher_) - return; - - // Publish the item. - history_publisher_->PublishPageContent(visit_time, url, title, body); -} - -void PageCollector::ScheduleFlushCollected() { - weak_factory_.InvalidateWeakPtrs(); - base::MessageLoop::current()->PostDelayedTask( - FROM_HERE, - base::Bind(&PageCollector::FlushCollected, - weak_factory_.GetWeakPtr()), - base::TimeDelta::FromSeconds(kExpirationSeconds)); -} - -void PageCollector::FlushCollected() { - base::TimeTicks now = base::TimeTicks::Now(); - - // Iterate from oldest to newest publishing items which expire while - // waiting for title or body. - RecentChangeList::reverse_iterator iter = recent_changes_.rbegin(); - while (iter != recent_changes_.rend() && iter->second.Expired(now)) { - AddPageData(iter->first, iter->second.visit_time(), - iter->second.title(), iter->second.body()); - iter = recent_changes_.Erase(iter); - } - - if (!recent_changes_.empty()) - ScheduleFlushCollected(); -} - -} // namespace history diff --git a/chrome/browser/history/page_collector.h b/chrome/browser/history/page_collector.h deleted file mode 100644 index c044a3b..0000000 --- a/chrome/browser/history/page_collector.h +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (c) 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ -#define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ - -#include "base/basictypes.h" -#include "base/containers/mru_cache.h" -#include "base/memory/weak_ptr.h" -#include "base/strings/string16.h" -#include "base/time/time.h" - -class GURL; - -namespace history { - -class HistoryPublisher; - -// Collect page data and publish to HistoryPublisher. -class PageCollector { - public: - // You must call Init() to complete initialization. - PageCollector(); - ~PageCollector(); - - // Must call before using other functions. - void Init(const HistoryPublisher* history_publisher); - - // Sets specific information for the given page to be published. - // In normal operation, URLs will be added as the user visits them, the titles - // and bodies will come in some time after that. These changes will be - // automatically coalesced and added to the database some time in the future - // using AddPageData(). - // - // AddPageURL must be called for a given URL before either the title - // or body set. The visit time should be the time corresponding to - // that visit in the history database. - void AddPageURL(const GURL& url, base::Time visit_time); - void AddPageTitle(const GURL& url, const string16& title); - void AddPageContents(const GURL& url, const string16& body); - - void AddPageData(const GURL& url, - base::Time visit_time, - const string16& title, - const string16& body); - - private: - // Stores "recent stuff" that has happened with the page, since the page - // visit, title, and body all come in at different times. - class PageInfo { - public: - explicit PageInfo(base::Time visit_time); - ~PageInfo(); - - // Getters. - base::Time visit_time() const { return visit_time_; } - const string16& title() const { return title_; } - const string16& body() const { return body_; } - - // Setters, we can only update the title and body. - void set_title(const string16& ttl); - void set_body(const string16& bdy); - - // Returns true if both the title or body of the entry has been set. Since - // both the title and body setters will "fix" empty strings to be a space, - // these indicate if the setter was ever called. - bool has_title() const { return !title_.empty(); } - bool has_body() const { return !body_.empty(); } - - // Returns true if this entry was added too long ago and we should give up - // waiting for more data. The current time is passed in as an argument so we - // can check many without re-querying the timer. - bool Expired(base::TimeTicks now) const; - - private: - // Time of the visit of the URL. This will be the value stored in the URL - // and visit tables for the entry. - base::Time visit_time_; - - // When this page entry was created. We have a cap on the maximum time that - // an entry will be in the queue before being flushed to the database. - base::TimeTicks added_time_; - - // Will be the string " " when they are set to distinguish set and unset. - string16 title_; - string16 body_; - }; - - // Collected data is published when both the title and body are - // present. https data is never passed to AddPageContents(), so - // periodically collected data is published without the contents. - // Pages which take a long time to load will not have their bodies - // published. - void ScheduleFlushCollected(); - void FlushCollected(); - - // Lists recent additions that we have not yet filled out with the title and - // body. Sorted by time, we will flush them when they are complete or have - // been in the queue too long without modification. - // - // We kind of abuse the MRUCache because we never move things around in it - // using Get. Instead, we keep them in the order they were inserted, since - // this is the metric we use to measure age. The MRUCache gives us an ordered - // list with fast lookup by URL. - typedef base::MRUCache<GURL, PageInfo> RecentChangeList; - RecentChangeList recent_changes_; - - // Generates tasks for our periodic checking of expired "recent changes". - base::WeakPtrFactory<PageCollector> weak_factory_; - - // This object is created and managed by the history backend. We maintain an - // opaque pointer to the object for our use. - // This can be NULL if there are no indexers registered to receive indexing - // data from us. - const HistoryPublisher* history_publisher_; - - DISALLOW_COPY_AND_ASSIGN(PageCollector); -}; - -} // namespace history - -#endif // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ diff --git a/chrome/browser/history/text_database_unittest.cc b/chrome/browser/history/text_database_unittest.cc new file mode 100644 index 0000000..25b7d79 --- /dev/null +++ b/chrome/browser/history/text_database_unittest.cc @@ -0,0 +1,306 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <string> + +#include "base/files/scoped_temp_dir.h" +#include "base/memory/scoped_ptr.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "chrome/browser/history/text_database.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/platform_test.h" + +using base::Time; + +namespace history { + +namespace { + +// Note that all pages have "COUNTTAG" which allows us to count the number of +// pages in the database withoujt adding any extra functions to the DB object. +const char kURL1[] = "http://www.google.com/"; +const int kTime1 = 1000; +const char kTitle1[] = "Google"; +const char kBody1[] = + "COUNTTAG Web Images Maps News Shopping Gmail more My Account | " + "Sign out Advanced Search Preferences Language Tools Advertising Programs " + "- Business Solutions - About Google, 2008 Google"; + +const char kURL2[] = "http://images.google.com/"; +const int kTime2 = 2000; +const char kTitle2[] = "Google Image Search"; +const char kBody2[] = + "COUNTTAG Web Images Maps News Shopping Gmail more My Account | " + "Sign out Advanced Image Search Preferences The most comprehensive image " + "search on the web. Want to help improve Google Image Search? Try Google " + "Image Labeler. Advertising Programs - Business Solutions - About Google " + "2008 Google"; + +const char kURL3[] = "http://slashdot.org/"; +const int kTime3 = 3000; +const char kTitle3[] = "Slashdot: News for nerds, stuff that matters"; +const char kBody3[] = + "COUNTTAG Slashdot Log In Create Account Subscribe Firehose Why " + "Log In? Why Subscribe? Nickname Password Public Terminal Sections " + "Main Apple AskSlashdot Backslash Books Developers Games Hardware " + "Interviews IT Linux Mobile Politics Science YRO"; + +// Returns the number of rows currently in the database. +int RowCount(TextDatabase* db) { + QueryOptions options; + options.begin_time = Time::FromInternalValue(0); + // Leave end_time at now. + + std::vector<TextDatabase::Match> results; + TextDatabase::URLSet unique_urls; + db->GetTextMatches("COUNTTAG", options, &results, &unique_urls); + return static_cast<int>(results.size()); +} + +// Adds each of the test pages to the database. +void AddAllTestData(TextDatabase* db) { + EXPECT_TRUE(db->AddPageData( + Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1)); + EXPECT_TRUE(db->AddPageData( + Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2)); + EXPECT_TRUE(db->AddPageData( + Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3)); + EXPECT_EQ(3, RowCount(db)); +} + +bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results, + const char* url) { + GURL gurl(url); + for (size_t i = 0; i < results.size(); i++) { + if (results[i].url == gurl) + return true; + } + return false; +} + +} // namespace + +class TextDatabaseTest : public PlatformTest { + public: + TextDatabaseTest() {} + + protected: + virtual void SetUp() { + PlatformTest::SetUp(); + ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); + } + + // Create databases with this function, which will ensure that the files are + // deleted on shutdown. Only open one database for each file. Returns NULL on + // failure. + // + // Set |delete_file| to delete any existing file. If we are trying to create + // the file for the first time, we don't want a previous test left in a + // weird state to have left a file that would affect us. + TextDatabase* CreateDB(TextDatabase::DBIdent id, + bool allow_create, + bool delete_file) { + TextDatabase* db = new TextDatabase(temp_dir_.path(), id, allow_create); + + if (delete_file) + sql::Connection::Delete(db->file_name()); + + if (!db->Init()) { + delete db; + return NULL; + } + return db; + } + + // Directory containing the databases. + base::ScopedTempDir temp_dir_; + + // Name of the main database file. + base::FilePath file_name_; +}; + +TEST_F(TextDatabaseTest, AttachDetach) { + // First database with one page. + const int kIdee1 = 200801; + scoped_ptr<TextDatabase> db1(CreateDB(kIdee1, true, true)); + ASSERT_TRUE(!!db1.get()); + EXPECT_TRUE(db1->AddPageData( + Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1)); + + // Second database with one page. + const int kIdee2 = 200802; + scoped_ptr<TextDatabase> db2(CreateDB(kIdee2, true, true)); + ASSERT_TRUE(!!db2.get()); + EXPECT_TRUE(db2->AddPageData( + Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2)); + + // Detach, then reattach database one. The file should exist, so we force + // opening an existing file. + db1.reset(); + db1.reset(CreateDB(kIdee1, false, false)); + ASSERT_TRUE(!!db1.get()); + + // We should not be able to attach this random database for which no file + // exists. + const int kIdeeNoExisto = 999999999; + scoped_ptr<TextDatabase> db3(CreateDB(kIdeeNoExisto, false, true)); + EXPECT_FALSE(!!db3.get()); +} + +TEST_F(TextDatabaseTest, AddRemove) { + // Create a database and add some pages to it. + const int kIdee1 = 200801; + scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); + ASSERT_TRUE(!!db.get()); + URLID id1 = db->AddPageData( + Time::FromInternalValue(kTime1), kURL1, kTitle1, kBody1); + EXPECT_NE(0, id1); + URLID id2 = db->AddPageData( + Time::FromInternalValue(kTime2), kURL2, kTitle2, kBody2); + EXPECT_NE(0, id2); + URLID id3 = db->AddPageData( + Time::FromInternalValue(kTime3), kURL3, kTitle3, kBody3); + EXPECT_NE(0, id3); + EXPECT_EQ(3, RowCount(db.get())); + + // Make sure we can delete some of the data. + db->DeletePageData(Time::FromInternalValue(kTime1), kURL1); + EXPECT_EQ(2, RowCount(db.get())); + + // Close and reopen. + db.reset(new TextDatabase(temp_dir_.path(), kIdee1, false)); + EXPECT_TRUE(db->Init()); + + // Verify that the deleted ID is gone and try to delete another one. + EXPECT_EQ(2, RowCount(db.get())); + db->DeletePageData(Time::FromInternalValue(kTime2), kURL2); + EXPECT_EQ(1, RowCount(db.get())); +} + +TEST_F(TextDatabaseTest, Query) { + // Make a database with some pages. + const int kIdee1 = 200801; + scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); + EXPECT_TRUE(!!db.get()); + AddAllTestData(db.get()); + + // Get all the results. + QueryOptions options; + options.begin_time = Time::FromInternalValue(0); + + std::vector<TextDatabase::Match> results; + TextDatabase::URLSet unique_urls; + db->GetTextMatches("COUNTTAG", options, &results, &unique_urls); + EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; + + // All 3 sites should be returned in order. + ASSERT_EQ(3U, results.size()); + EXPECT_EQ(GURL(kURL1), results[2].url); + EXPECT_EQ(GURL(kURL2), results[1].url); + EXPECT_EQ(GURL(kURL3), results[0].url); + + // Verify the info on those results. + EXPECT_TRUE(Time::FromInternalValue(kTime1) == results[2].time); + EXPECT_TRUE(Time::FromInternalValue(kTime2) == results[1].time); + EXPECT_TRUE(Time::FromInternalValue(kTime3) == results[0].time); + + EXPECT_EQ(std::string(kTitle1), UTF16ToUTF8(results[2].title)); + EXPECT_EQ(std::string(kTitle2), UTF16ToUTF8(results[1].title)); + EXPECT_EQ(std::string(kTitle3), UTF16ToUTF8(results[0].title)); + + // Should have no matches in the title. + EXPECT_EQ(0U, results[0].title_match_positions.size()); + EXPECT_EQ(0U, results[1].title_match_positions.size()); + EXPECT_EQ(0U, results[2].title_match_positions.size()); + + // We don't want to be dependent on the exact snippet algorithm, but we know + // since we searched for "COUNTTAG" which occurs at the beginning of each + // document, that each snippet should start with that. + EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[0].snippet.text()), + "COUNTTAG", false)); + EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[1].snippet.text()), + "COUNTTAG", false)); + EXPECT_TRUE(StartsWithASCII(UTF16ToUTF8(results[2].snippet.text()), + "COUNTTAG", false)); +} + +TEST_F(TextDatabaseTest, TimeRange) { + // Make a database with some pages. + const int kIdee1 = 200801; + scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); + ASSERT_TRUE(!!db.get()); + AddAllTestData(db.get()); + + // Beginning should be inclusive, and the ending exclusive. + // Get all the results. + QueryOptions options; + options.begin_time = Time::FromInternalValue(kTime1); + options.end_time = Time::FromInternalValue(kTime3); + + std::vector<TextDatabase::Match> results; + TextDatabase::URLSet unique_urls; + bool has_more_results = db->GetTextMatches( + "COUNTTAG", options, &results, &unique_urls); + EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; + + // The first and second should have been returned. + EXPECT_EQ(2U, results.size()); + EXPECT_TRUE(ResultsHaveURL(results, kURL1)); + EXPECT_TRUE(ResultsHaveURL(results, kURL2)); + EXPECT_FALSE(ResultsHaveURL(results, kURL3)); + EXPECT_EQ(kTime1, results.back().time.ToInternalValue()); + EXPECT_FALSE(has_more_results); + + // Do a query where there isn't a result on the begin boundary. + options.begin_time = Time::FromInternalValue((kTime2 - kTime1) / 2 + kTime1); + options.end_time = Time::FromInternalValue(kTime3 + 1); + results.clear(); // GetTextMatches does *not* clear the results. + has_more_results = db->GetTextMatches( + "COUNTTAG", options, &results, &unique_urls); + EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; + EXPECT_FALSE(has_more_results); + + // Should have two results, the second and third. + EXPECT_EQ(2U, results.size()); + EXPECT_FALSE(ResultsHaveURL(results, kURL1)); + EXPECT_TRUE(ResultsHaveURL(results, kURL2)); + EXPECT_TRUE(ResultsHaveURL(results, kURL3)); + + // Try a range that has no results. + options.begin_time = Time::FromInternalValue(kTime3 + 1); + options.end_time = Time::FromInternalValue(kTime3 * 100); + results.clear(); + has_more_results = db->GetTextMatches( + "COUNTTAG", options, &results, &unique_urls); + EXPECT_FALSE(has_more_results); +} + +// Make sure that max_count works. +TEST_F(TextDatabaseTest, MaxCount) { + // Make a database with some pages. + const int kIdee1 = 200801; + scoped_ptr<TextDatabase> db(CreateDB(kIdee1, true, true)); + ASSERT_TRUE(!!db.get()); + AddAllTestData(db.get()); + + // Set up the query to return all the results with "Google" (should be 2), but + // with a maximum of 1. + QueryOptions options; + options.begin_time = Time::FromInternalValue(kTime1); + options.end_time = Time::FromInternalValue(kTime3 + 1); + options.max_count = 1; + + std::vector<TextDatabase::Match> results; + TextDatabase::URLSet unique_urls; + db->GetTextMatches("google", options, &results, &unique_urls); + EXPECT_TRUE(unique_urls.empty()) << "Didn't ask for unique URLs"; + + // There should be one result, the most recent one. + EXPECT_EQ(1U, results.size()); + EXPECT_TRUE(ResultsHaveURL(results, kURL2)); + EXPECT_EQ(kTime2, results.back().time.ToInternalValue()); +} + +} // namespace history diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index c45ad0a..4a1deca 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -770,8 +770,6 @@ 'browser/history/in_memory_url_index_types.h', 'browser/history/most_visited_tiles_experiment.cc', 'browser/history/most_visited_tiles_experiment.h', - 'browser/history/page_collector.cc', - 'browser/history/page_collector.h', 'browser/history/page_usage_data.cc', 'browser/history/page_usage_data.h', 'browser/history/query_parser.cc', diff --git a/chrome/common/render_messages.h b/chrome/common/render_messages.h index aa38bbc..e75e566 100644 --- a/chrome/common/render_messages.h +++ b/chrome/common/render_messages.h @@ -417,11 +417,6 @@ IPC_MESSAGE_ROUTED1(ChromeViewMsg_NetErrorInfo, // Misc messages // These are messages sent from the renderer to the browser process. -// Provides the contents for the given page that was loaded recently. -IPC_MESSAGE_ROUTED2(ChromeViewHostMsg_PageContents, - GURL /* URL of the page */, - string16 /* page contents */) - // Notification that the language for the tab has been determined. IPC_MESSAGE_ROUTED2(ChromeViewHostMsg_TranslateLanguageDetermined, LanguageDetectionDetails /* details about lang detection */, diff --git a/chrome/renderer/chrome_render_view_observer.cc b/chrome/renderer/chrome_render_view_observer.cc index 70f6ccf..81a7cbf 100644 --- a/chrome/renderer/chrome_render_view_observer.cc +++ b/chrome/renderer/chrome_render_view_observer.cc @@ -800,12 +800,6 @@ void ChromeRenderViewObserver::CapturePageInfo(int page_id, TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); - if (contents.size()) { - // Send the text to the browser for indexing (the browser might decide not - // to index, if the URL is HTTPS for instance). - Send(new ChromeViewHostMsg_PageContents(routing_id(), url, contents)); - } - #if defined(FULL_SAFE_BROWSING) // Will swap out the string. if (phishing_classifier_) |