1 files changed, 123 insertions, 0 deletions
diff --git a/chrome/browser/history/page_collector.h b/chrome/browser/history/page_collector.h
new file mode 100644
index 0000000..c044a3b
--- /dev/null
+++ b/chrome/browser/history/page_collector.h
@@ -0,0 +1,123 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
+#define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
+
+#include "base/basictypes.h"
+#include "base/containers/mru_cache.h"
+#include "base/memory/weak_ptr.h"
+#include "base/strings/string16.h"
+#include "base/time/time.h"
+
+class GURL;
+
+namespace history {
+
+class HistoryPublisher;
+
+// Collect page data and publish to HistoryPublisher.
+class PageCollector {
+ public:
+  // You must call Init() to complete initialization.
+  PageCollector();
+  ~PageCollector();
+
+  // Must call before using other functions.
+  void Init(const HistoryPublisher* history_publisher);
+
+  // Sets specific information for the given page to be published.
+  // In normal operation, URLs will be added as the user visits them, the titles
+  // and bodies will come in some time after that. These changes will be
+  // automatically coalesced and added to the database some time in the future
+  // using AddPageData().
+  //
+  // AddPageURL must be called for a given URL before either the title
+  // or body set. The visit time should be the time corresponding to
+  // that visit in the history database.
+  void AddPageURL(const GURL& url, base::Time visit_time);
+  void AddPageTitle(const GURL& url, const string16& title);
+  void AddPageContents(const GURL& url, const string16& body);
+
+  void AddPageData(const GURL& url,
+                   base::Time visit_time,
+                   const string16& title,
+                   const string16& body);
+
+ private:
+  // Stores "recent stuff" that has happened with the page, since the page
+  // visit, title, and body all come in at different times.
+  class PageInfo {
+   public:
+    explicit PageInfo(base::Time visit_time);
+    ~PageInfo();
+
+    // Getters.
+    base::Time visit_time() const { return visit_time_; }
+    const string16& title() const { return title_; }
+    const string16& body() const { return body_; }
+
+    // Setters, we can only update the title and body.
+    void set_title(const string16& ttl);
+    void set_body(const string16& bdy);
+
+    // Returns true if both the title or body of the entry has been set. Since
+    // both the title and body setters will "fix" empty strings to be a space,
+    // these indicate if the setter was ever called.
+    bool has_title() const { return !title_.empty(); }
+    bool has_body() const { return !body_.empty(); }
+
+    // Returns true if this entry was added too long ago and we should give up
+    // waiting for more data. The current time is passed in as an argument so we
+    // can check many without re-querying the timer.
+    bool Expired(base::TimeTicks now) const;
+
+   private:
+    // Time of the visit of the URL. This will be the value stored in the URL
+    // and visit tables for the entry.
+    base::Time visit_time_;
+
+    // When this page entry was created. We have a cap on the maximum time that
+    // an entry will be in the queue before being flushed to the database.
+    base::TimeTicks added_time_;
+
+    // Will be the string " " when they are set to distinguish set and unset.
+    string16 title_;
+    string16 body_;
+  };
+
+  // Collected data is published when both the title and body are
+  // present.  https data is never passed to AddPageContents(), so
+  // periodically collected data is published without the contents.
+  // Pages which take a long time to load will not have their bodies
+  // published.
+  void ScheduleFlushCollected();
+  void FlushCollected();
+
+  // Lists recent additions that we have not yet filled out with the title and
+  // body. Sorted by time, we will flush them when they are complete or have
+  // been in the queue too long without modification.
+  //
+  // We kind of abuse the MRUCache because we never move things around in it
+  // using Get. Instead, we keep them in the order they were inserted, since
+  // this is the metric we use to measure age. The MRUCache gives us an ordered
+  // list with fast lookup by URL.
+  typedef base::MRUCache<GURL, PageInfo> RecentChangeList;
+  RecentChangeList recent_changes_;
+
+  // Generates tasks for our periodic checking of expired "recent changes".
+  base::WeakPtrFactory<PageCollector> weak_factory_;
+
+  // This object is created and managed by the history backend. We maintain an
+  // opaque pointer to the object for our use.
+  // This can be NULL if there are no indexers registered to receive indexing
+  // data from us.
+  const HistoryPublisher* history_publisher_;
+
+  DISALLOW_COPY_AND_ASSIGN(PageCollector);
+};
+
+}  // namespace history
+
+#endif  // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_