summaryrefslogtreecommitdiffstats
path: root/components/precache/core/precache_fetcher.h
blob: ff32f0b5d55c9bebc6e810e3a78d8d8c85d85fd7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_PRECACHE_CORE_PRECACHE_FETCHER_H_
#define COMPONENTS_PRECACHE_CORE_PRECACHE_FETCHER_H_

#include <stdint.h>

#include <list>
#include <string>
#include <vector>

#include "base/callback.h"
#include "base/macros.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
#include "base/time/time.h"
#include "net/url_request/url_fetcher.h"
#include "net/url_request/url_fetcher_delegate.h"
#include "url/gurl.h"

namespace net {
class URLRequestContextGetter;
}

namespace precache {

class PrecacheConfigurationSettings;

// Visible for testing.
extern const int kNoTracking;

// Public interface to code that fetches resources that the user is likely to
// want to fetch in the future, putting them in the network stack disk cache.
// Precaching is intended to be done when Chrome is not actively in use, likely
// hours ahead of the time when the resources are actually needed.
//
// This class takes as input a prioritized list of URL domains that the user
// commonly visits, referred to as starting hosts. This class interacts with a
// server, sending it the list of starting hosts sequentially. For each starting
// host, the server returns a manifest of resource URLs that are good candidates
// for precaching. Every resource returned is fetched, and responses are cached
// as they are received. Destroying the PrecacheFetcher while it is precaching
// will cancel any fetch in progress and cancel precaching.
//
// The URLs of the server-side component must be specified in order for the
// PrecacheFetcher to work. This includes the URL that the precache
// configuration settings are fetched from and the prefix of URLs where precache
// manifests are fetched from. These can be set by using command line switches
// or by providing default values.
//
// Sample interaction:
//
// class MyPrecacheFetcherDelegate : public PrecacheFetcher::PrecacheDelegate {
//  public:
//   void PrecacheResourcesForTopURLs(
//       net::URLRequestContextGetter* request_context,
//       const std::list<GURL>& top_urls) {
//     fetcher_.reset(new PrecacheFetcher(request_context, top_urls, this));
//     fetcher_->Start();
//   }
//
//   virtual void OnDone() {
//     // Do something when precaching is done.
//   }
//
//  private:
//   scoped_ptr<PrecacheFetcher> fetcher_;
// };
class PrecacheFetcher {
 public:
  class PrecacheDelegate {
   public:
    // Called when the fetching of resources has finished, whether the resources
    // were fetched or not. If the PrecacheFetcher is destroyed before OnDone is
    // called, then precaching will be canceled and OnDone will not be called.
    virtual void OnDone() = 0;
  };

  // Visible for testing.
  class Fetcher;

  // Constructs a new PrecacheFetcher. The |starting_hosts| parameter is a
  // prioritized list of hosts that the user commonly visits. These hosts are
  // used by a server side component to construct a list of resource URLs that
  // the user is likely to fetch.
  PrecacheFetcher(const std::vector<std::string>& starting_hosts,
                  net::URLRequestContextGetter* request_context,
                  const GURL& config_url,
                  const std::string& manifest_url_prefix,
                  PrecacheDelegate* precache_delegate);

  virtual ~PrecacheFetcher();

  // Starts fetching resources to precache. URLs are fetched sequentially. Can
  // be called from any thread. Start should only be called once on a
  // PrecacheFetcher instance.
  void Start();

 private:
  // Fetches the next resource or manifest URL, if any remain. Fetching is done
  // sequentially and depth-first: all resources are fetched for a manifest
  // before the next manifest is fetched. This is done to limit the length of
  // the |resource_urls_to_fetch_| list, reducing the memory usage.
  void StartNextFetch();

  // Called when the precache configuration settings have been fetched.
  // Determines the list of manifest URLs to fetch according to the list of
  // |starting_hosts_| and information from the precache configuration settings.
  // If the fetch of the configuration settings fails, then precaching ends.
  void OnConfigFetchComplete(const net::URLFetcher* source);

  // Called when a precache manifest has been fetched. Builds the list of
  // resource URLs to fetch according to the URLs in the manifest. If the fetch
  // of a manifest fails, then it skips to the next manifest.
  void OnManifestFetchComplete(const net::URLFetcher* source);

  // Called when a resource has been fetched.
  void OnResourceFetchComplete(const net::URLFetcher* source);

  // The prioritized list of starting hosts that the server will pick resource
  // URLs to be precached for.
  const std::vector<std::string> starting_hosts_;

  // The request context used when fetching URLs.
  const scoped_refptr<net::URLRequestContextGetter> request_context_;

  // The custom URL to use when fetching the config. If not provided, the
  // default flag-specified URL will be used.
  const GURL config_url_;

  // The custom URL prefix to use when fetching manifests. If not provided, the
  // default flag-specified prefix will be used.
  const std::string manifest_url_prefix_;

  // Non-owning pointer. Should not be NULL.
  PrecacheDelegate* precache_delegate_;

  scoped_ptr<PrecacheConfigurationSettings> config_;

  // Tally of the total number of bytes contained in URL fetches, including
  // config, manifests, and resources. This the number of bytes as they would be
  // compressed over the network.
  size_t total_response_bytes_;

  // Tally of the total number of bytes received over the network from URL
  // fetches (the same ones as in total_response_bytes_). This includes response
  // headers and intermediate responses such as 30xs.
  size_t network_response_bytes_;

  scoped_ptr<Fetcher> fetcher_;

  // Time when the prefetch was started.
  base::TimeTicks start_time_;

  int num_manifest_urls_to_fetch_;
  std::list<GURL> manifest_urls_to_fetch_;
  std::list<GURL> resource_urls_to_fetch_;

  DISALLOW_COPY_AND_ASSIGN(PrecacheFetcher);
};

// Class that fetches a URL, and runs the specified callback when the fetch is
// complete. This class exists so that a different method can be run in
// response to different kinds of fetches, e.g. OnConfigFetchComplete when
// configuration settings are fetched, OnManifestFetchComplete when a manifest
// is fetched, etc.
//
// This class tries to increase freshness while limiting network usage, by using
// the following strategy:
// 1.  Fetch the URL from the cache.
// 2a. If it's present and lacks revalidation headers, then stop.
// 2b. If it's not present, or it's present and has revalidation headers, then
//     refetch over the network.
//
// This allows the precache to "refresh" cache entries by increasing their
// expiration date, but minimizes the network impact of doing so, by performing
// only conditional GETs.
//
// On completion it calls the given callback. This class cancels requests whose
// responses are or will be larger than max_bytes. In such a case, |callback|
// will be called with nullptr.
class PrecacheFetcher::Fetcher : public net::URLFetcherDelegate {
 public:
  // Construct a new Fetcher. This will create and start a new URLFetcher for
  // the specified URL using the specified request context.
  Fetcher(net::URLRequestContextGetter* request_context,
          const GURL& url,
          const base::Callback<void(const net::URLFetcher*)>& callback,
          bool is_resource_request,
          size_t max_bytes);
  ~Fetcher() override;
  void OnURLFetchDownloadProgress(const net::URLFetcher* source,
                                  int64_t current,
                                  int64_t total) override;
  void OnURLFetchComplete(const net::URLFetcher* source) override;
  int64_t response_bytes() const { return response_bytes_; }
  int64_t network_response_bytes() const { return network_response_bytes_; }

 private:
  enum class FetchStage { CACHE, NETWORK };

  void LoadFromCache();
  void LoadFromNetwork();

  net::URLRequestContextGetter* const request_context_;
  const GURL url_;
  const base::Callback<void(const net::URLFetcher*)> callback_;
  const bool is_resource_request_;
  const size_t max_bytes_;

  FetchStage fetch_stage_;
  // The url_fetcher_cache_ is kept alive until Fetcher destruction for testing.
  scoped_ptr<net::URLFetcher> url_fetcher_cache_;
  scoped_ptr<net::URLFetcher> url_fetcher_network_;
  int64_t response_bytes_;
  int64_t network_response_bytes_;

  DISALLOW_COPY_AND_ASSIGN(Fetcher);
};

}  // namespace precache

#endif  // COMPONENTS_PRECACHE_CORE_PRECACHE_FETCHER_H_