summaryrefslogtreecommitdiffstats
path: root/chrome/browser/history/page_collector.h
diff options
context:
space:
mode:
Diffstat (limited to 'chrome/browser/history/page_collector.h')
-rw-r--r--chrome/browser/history/page_collector.h123
1 files changed, 123 insertions, 0 deletions
diff --git a/chrome/browser/history/page_collector.h b/chrome/browser/history/page_collector.h
new file mode 100644
index 0000000..c044a3b
--- /dev/null
+++ b/chrome/browser/history/page_collector.h
@@ -0,0 +1,123 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
+#define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
+
+#include "base/basictypes.h"
+#include "base/containers/mru_cache.h"
+#include "base/memory/weak_ptr.h"
+#include "base/strings/string16.h"
+#include "base/time/time.h"
+
+class GURL;
+
+namespace history {
+
+class HistoryPublisher;
+
+// Collect page data and publish to HistoryPublisher.
+class PageCollector {
+ public:
+ // You must call Init() to complete initialization.
+ PageCollector();
+ ~PageCollector();
+
+ // Must call before using other functions.
+ void Init(const HistoryPublisher* history_publisher);
+
+ // Sets specific information for the given page to be published.
+ // In normal operation, URLs will be added as the user visits them, the titles
+ // and bodies will come in some time after that. These changes will be
+ // automatically coalesced and added to the database some time in the future
+ // using AddPageData().
+ //
+ // AddPageURL must be called for a given URL before either the title
+ // or body set. The visit time should be the time corresponding to
+ // that visit in the history database.
+ void AddPageURL(const GURL& url, base::Time visit_time);
+ void AddPageTitle(const GURL& url, const string16& title);
+ void AddPageContents(const GURL& url, const string16& body);
+
+ void AddPageData(const GURL& url,
+ base::Time visit_time,
+ const string16& title,
+ const string16& body);
+
+ private:
+ // Stores "recent stuff" that has happened with the page, since the page
+ // visit, title, and body all come in at different times.
+ class PageInfo {
+ public:
+ explicit PageInfo(base::Time visit_time);
+ ~PageInfo();
+
+ // Getters.
+ base::Time visit_time() const { return visit_time_; }
+ const string16& title() const { return title_; }
+ const string16& body() const { return body_; }
+
+ // Setters, we can only update the title and body.
+ void set_title(const string16& ttl);
+ void set_body(const string16& bdy);
+
+ // Returns true if both the title or body of the entry has been set. Since
+ // both the title and body setters will "fix" empty strings to be a space,
+ // these indicate if the setter was ever called.
+ bool has_title() const { return !title_.empty(); }
+ bool has_body() const { return !body_.empty(); }
+
+ // Returns true if this entry was added too long ago and we should give up
+ // waiting for more data. The current time is passed in as an argument so we
+ // can check many without re-querying the timer.
+ bool Expired(base::TimeTicks now) const;
+
+ private:
+ // Time of the visit of the URL. This will be the value stored in the URL
+ // and visit tables for the entry.
+ base::Time visit_time_;
+
+ // When this page entry was created. We have a cap on the maximum time that
+ // an entry will be in the queue before being flushed to the database.
+ base::TimeTicks added_time_;
+
+ // Will be the string " " when they are set to distinguish set and unset.
+ string16 title_;
+ string16 body_;
+ };
+
+ // Collected data is published when both the title and body are
+ // present. https data is never passed to AddPageContents(), so
+ // periodically collected data is published without the contents.
+ // Pages which take a long time to load will not have their bodies
+ // published.
+ void ScheduleFlushCollected();
+ void FlushCollected();
+
+ // Lists recent additions that we have not yet filled out with the title and
+ // body. Sorted by time, we will flush them when they are complete or have
+ // been in the queue too long without modification.
+ //
+ // We kind of abuse the MRUCache because we never move things around in it
+ // using Get. Instead, we keep them in the order they were inserted, since
+ // this is the metric we use to measure age. The MRUCache gives us an ordered
+ // list with fast lookup by URL.
+ typedef base::MRUCache<GURL, PageInfo> RecentChangeList;
+ RecentChangeList recent_changes_;
+
+ // Generates tasks for our periodic checking of expired "recent changes".
+ base::WeakPtrFactory<PageCollector> weak_factory_;
+
+ // This object is created and managed by the history backend. We maintain an
+ // opaque pointer to the object for our use.
+ // This can be NULL if there are no indexers registered to receive indexing
+ // data from us.
+ const HistoryPublisher* history_publisher_;
+
+ DISALLOW_COPY_AND_ASSIGN(PageCollector);
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_