diff options
Diffstat (limited to 'chrome/browser/history/page_collector.h')
-rw-r--r-- | chrome/browser/history/page_collector.h | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/chrome/browser/history/page_collector.h b/chrome/browser/history/page_collector.h new file mode 100644 index 0000000..c044a3b --- /dev/null +++ b/chrome/browser/history/page_collector.h @@ -0,0 +1,123 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ +#define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ + +#include "base/basictypes.h" +#include "base/containers/mru_cache.h" +#include "base/memory/weak_ptr.h" +#include "base/strings/string16.h" +#include "base/time/time.h" + +class GURL; + +namespace history { + +class HistoryPublisher; + +// Collect page data and publish to HistoryPublisher. +class PageCollector { + public: + // You must call Init() to complete initialization. + PageCollector(); + ~PageCollector(); + + // Must call before using other functions. + void Init(const HistoryPublisher* history_publisher); + + // Sets specific information for the given page to be published. + // In normal operation, URLs will be added as the user visits them, the titles + // and bodies will come in some time after that. These changes will be + // automatically coalesced and added to the database some time in the future + // using AddPageData(). + // + // AddPageURL must be called for a given URL before either the title + // or body set. The visit time should be the time corresponding to + // that visit in the history database. + void AddPageURL(const GURL& url, base::Time visit_time); + void AddPageTitle(const GURL& url, const string16& title); + void AddPageContents(const GURL& url, const string16& body); + + void AddPageData(const GURL& url, + base::Time visit_time, + const string16& title, + const string16& body); + + private: + // Stores "recent stuff" that has happened with the page, since the page + // visit, title, and body all come in at different times. + class PageInfo { + public: + explicit PageInfo(base::Time visit_time); + ~PageInfo(); + + // Getters. + base::Time visit_time() const { return visit_time_; } + const string16& title() const { return title_; } + const string16& body() const { return body_; } + + // Setters, we can only update the title and body. + void set_title(const string16& ttl); + void set_body(const string16& bdy); + + // Returns true if both the title or body of the entry has been set. Since + // both the title and body setters will "fix" empty strings to be a space, + // these indicate if the setter was ever called. + bool has_title() const { return !title_.empty(); } + bool has_body() const { return !body_.empty(); } + + // Returns true if this entry was added too long ago and we should give up + // waiting for more data. The current time is passed in as an argument so we + // can check many without re-querying the timer. + bool Expired(base::TimeTicks now) const; + + private: + // Time of the visit of the URL. This will be the value stored in the URL + // and visit tables for the entry. + base::Time visit_time_; + + // When this page entry was created. We have a cap on the maximum time that + // an entry will be in the queue before being flushed to the database. + base::TimeTicks added_time_; + + // Will be the string " " when they are set to distinguish set and unset. + string16 title_; + string16 body_; + }; + + // Collected data is published when both the title and body are + // present. https data is never passed to AddPageContents(), so + // periodically collected data is published without the contents. + // Pages which take a long time to load will not have their bodies + // published. + void ScheduleFlushCollected(); + void FlushCollected(); + + // Lists recent additions that we have not yet filled out with the title and + // body. Sorted by time, we will flush them when they are complete or have + // been in the queue too long without modification. + // + // We kind of abuse the MRUCache because we never move things around in it + // using Get. Instead, we keep them in the order they were inserted, since + // this is the metric we use to measure age. The MRUCache gives us an ordered + // list with fast lookup by URL. + typedef base::MRUCache<GURL, PageInfo> RecentChangeList; + RecentChangeList recent_changes_; + + // Generates tasks for our periodic checking of expired "recent changes". + base::WeakPtrFactory<PageCollector> weak_factory_; + + // This object is created and managed by the history backend. We maintain an + // opaque pointer to the object for our use. + // This can be NULL if there are no indexers registered to receive indexing + // data from us. + const HistoryPublisher* history_publisher_; + + DISALLOW_COPY_AND_ASSIGN(PageCollector); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ |