// Copyright (c) 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ #define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ #include "base/basictypes.h" #include "base/containers/mru_cache.h" #include "base/memory/weak_ptr.h" #include "base/strings/string16.h" #include "base/time/time.h" class GURL; namespace history { class HistoryPublisher; // Collect page data and publish to HistoryPublisher. class PageCollector { public: // You must call Init() to complete initialization. PageCollector(); ~PageCollector(); // Must call before using other functions. void Init(const HistoryPublisher* history_publisher); // Sets specific information for the given page to be published. // In normal operation, URLs will be added as the user visits them, the titles // and bodies will come in some time after that. These changes will be // automatically coalesced and added to the database some time in the future // using AddPageData(). // // AddPageURL must be called for a given URL before either the title // or body set. The visit time should be the time corresponding to // that visit in the history database. void AddPageURL(const GURL& url, base::Time visit_time); void AddPageTitle(const GURL& url, const string16& title); void AddPageContents(const GURL& url, const string16& body); void AddPageData(const GURL& url, base::Time visit_time, const string16& title, const string16& body); private: // Stores "recent stuff" that has happened with the page, since the page // visit, title, and body all come in at different times. class PageInfo { public: explicit PageInfo(base::Time visit_time); ~PageInfo(); // Getters. base::Time visit_time() const { return visit_time_; } const string16& title() const { return title_; } const string16& body() const { return body_; } // Setters, we can only update the title and body. void set_title(const string16& ttl); void set_body(const string16& bdy); // Returns true if both the title or body of the entry has been set. Since // both the title and body setters will "fix" empty strings to be a space, // these indicate if the setter was ever called. bool has_title() const { return !title_.empty(); } bool has_body() const { return !body_.empty(); } // Returns true if this entry was added too long ago and we should give up // waiting for more data. The current time is passed in as an argument so we // can check many without re-querying the timer. bool Expired(base::TimeTicks now) const; private: // Time of the visit of the URL. This will be the value stored in the URL // and visit tables for the entry. base::Time visit_time_; // When this page entry was created. We have a cap on the maximum time that // an entry will be in the queue before being flushed to the database. base::TimeTicks added_time_; // Will be the string " " when they are set to distinguish set and unset. string16 title_; string16 body_; }; // Collected data is published when both the title and body are // present. https data is never passed to AddPageContents(), so // periodically collected data is published without the contents. // Pages which take a long time to load will not have their bodies // published. void ScheduleFlushCollected(); void FlushCollected(); // Lists recent additions that we have not yet filled out with the title and // body. Sorted by time, we will flush them when they are complete or have // been in the queue too long without modification. // // We kind of abuse the MRUCache because we never move things around in it // using Get. Instead, we keep them in the order they were inserted, since // this is the metric we use to measure age. The MRUCache gives us an ordered // list with fast lookup by URL. typedef base::MRUCache RecentChangeList; RecentChangeList recent_changes_; // Generates tasks for our periodic checking of expired "recent changes". base::WeakPtrFactory weak_factory_; // This object is created and managed by the history backend. We maintain an // opaque pointer to the object for our use. // This can be NULL if there are no indexers registered to receive indexing // data from us. const HistoryPublisher* history_publisher_; DISALLOW_COPY_AND_ASSIGN(PageCollector); }; } // namespace history #endif // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_