summaryrefslogtreecommitdiffstats
path: root/chrome/browser/net/predictor.h
blob: 710b8ae64e659f3629a00894b3ae771733feab01 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// A Predictor object is instantiated once in the browser process, and manages
// both preresolution of hostnames, as well as TCP/IP preconnection to expected
// subresources.
// Most hostname lists are provided by the renderer processes, and include URLs
// that *might* be used in the near future by the browsing user.  One goal of
// this class is to cause the underlying DNS structure to lookup a hostname
// before it is really needed, and hence reduce latency in the standard lookup
// paths.
// Subresource relationships are usually acquired from the referrer field in a
// navigation.  A subresource URL may be associated with a referrer URL.  Later
// navigations may, if the likelihood of needing the subresource is high enough,
// cause this module to speculatively create a TCP/IP connection. If there is
// only a low likelihood, then a DNS pre-resolution operation may be performed.

#ifndef CHROME_BROWSER_NET_PREDICTOR_H_
#define CHROME_BROWSER_NET_PREDICTOR_H_
#pragma once

#include <map>
#include <queue>
#include <set>
#include <string>
#include <vector>

#include "base/gtest_prod_util.h"
#include "base/memory/scoped_ptr.h"
#include "chrome/browser/net/url_info.h"
#include "chrome/browser/net/referrer.h"
#include "chrome/common/net/predictor_common.h"
#include "net/base/host_port_pair.h"

namespace base {
class ListValue;
}

namespace base {
class WaitableEvent;
}

namespace net {
class HostResolver;
}  // namespace net

class IOThread;
class PrefService;
class Profile;

namespace chrome_browser_net {

typedef chrome_common_net::UrlList UrlList;
typedef chrome_common_net::NameList NameList;
typedef std::map<GURL, UrlInfo> Results;

// Predictor is constructed during Profile construction (on the UI thread),
// but it is destroyed on the IO thread when ProfileIOData goes away. All of
// its core state and functionality happens on the IO thread. The only UI
// methods are initialization / shutdown related (including preconnect
// initialization), or convenience methods that internally forward calls to
// the IO thread.
class Predictor {
 public:
  // A version number for prefs that are saved. This should be incremented when
  // we change the format so that we discard old data.
  static const int kPredictorReferrerVersion;

  // Given that the underlying Chromium resolver defaults to a total maximum of
  // 8 paralell resolutions, we will avoid any chance of starving navigational
  // resolutions by limiting the number of paralell speculative resolutions.
  // This is used in the field trials and testing.
  // TODO(jar): Move this limitation into the resolver.
  static const size_t kMaxSpeculativeParallelResolves;

  // To control the congestion avoidance system, we need an estimate of how
  // many speculative requests may arrive at once.  Since we currently only
  // keep 8 subresource names for each frame, we'll use that as our basis.
  // Note that when scanning search results lists, we might actually get 10 at
  // a time, and wikipedia can often supply (during a page scan) upwards of 50.
  // In those odd cases, we may discard some of the later speculative requests
  // mistakenly assuming that the resolutions took too long.
  static const int kTypicalSpeculativeGroupSize;

  // The next constant specifies an amount of queueing delay that is
  // "too large," and indicative of problems with resolutions (perhaps due to
  // an overloaded router, or such).  When we exceed this delay, congestion
  // avoidance will kick in and all speculations in the queue will be discarded.
  static const int kMaxSpeculativeResolveQueueDelayMs;

  // |max_concurrent| specifies how many concurrent (parallel) prefetches will
  // be performed. Host lookups will be issued through |host_resolver|.
  explicit Predictor(bool preconnect_enabled);

  virtual ~Predictor();

  // This function is used to create a predictor. For testing, we can create
  // a version which does a simpler shutdown.
  static Predictor* CreatePredictor(bool preconnect_enabled,
                                    bool simple_shutdown);

  static void RegisterUserPrefs(PrefService* user_prefs);

  // ------------- Start UI thread methods.

  virtual void InitNetworkPredictor(PrefService* user_prefs,
                                    PrefService* local_state,
                                    IOThread* io_thread);

  // The Omnibox has proposed a given url to the user, and if it is a search
  // URL, then it also indicates that this is preconnectable (i.e., we could
  // preconnect to the search server).
  void AnticipateOmniboxUrl(const GURL& url, bool preconnectable);

  // Preconnect a URL and all of its subresource domains.
  void PreconnectUrlAndSubresources(const GURL& url);

  static UrlList GetPredictedUrlListAtStartup(PrefService* user_prefs,
                                              PrefService* local_state);

  static void set_max_queueing_delay(int max_queueing_delay_ms);

  static void set_max_parallel_resolves(size_t max_parallel_resolves);

  virtual void ShutdownOnUIThread(PrefService* user_prefs);

  // ------------- End UI thread methods.

  // ------------- Start IO thread methods.

  // Cancel pending requests and prevent new ones from being made.
  void Shutdown();

  // In some circumstances, for privacy reasons, all results should be
  // discarded.  This method gracefully handles that activity.
  // Destroy all our internal state, which shows what names we've looked up, and
  // how long each has taken, etc. etc.  We also destroy records of suggesses
  // (cache hits etc.).
  void DiscardAllResults();

  // Add hostname(s) to the queue for processing.
  void ResolveList(const UrlList& urls,
                   UrlInfo::ResolutionMotivation motivation);

  void Resolve(const GURL& url, UrlInfo::ResolutionMotivation motivation);

  // Record details of a navigation so that we can preresolve the host name
  // ahead of time the next time the users navigates to the indicated host.
  // Should only be called when urls are distinct, and they should already be
  // canonicalized to not have a path.
  void LearnFromNavigation(const GURL& referring_url, const GURL& target_url);

  // When displaying info in about:dns, the following API is called.
  static void PredictorGetHtmlInfo(Predictor* predictor, std::string* output);

  // Dump HTML table containing list of referrers for about:dns.
  void GetHtmlReferrerLists(std::string* output);

  // Dump the list of currently known referrer domains and related prefetchable
  // domains for about:dns.
  void GetHtmlInfo(std::string* output);

  // Discards any referrer for which all the suggested host names are currently
  // annotated with negligible expected-use.  Scales down (diminishes) the
  // expected-use of those that remain, so that their use will go down by a
  // factor each time we trim (moving the referrer closer to being discarded in
  // a future call).
  // The task is performed synchronously and completes before returing.
  void TrimReferrersNow();

  // Construct a ListValue object that contains all the data in the referrers_
  // so that it can be persisted in a pref.
  void SerializeReferrers(base::ListValue* referral_list);

  // Process a ListValue that contains all the data from a previous reference
  // list, as constructed by SerializeReferrers(), and add all the identified
  // values into the current referrer list.
  void DeserializeReferrers(const base::ListValue& referral_list);

  void DeserializeReferrersThenDelete(base::ListValue* referral_list);

  void DiscardInitialNavigationHistory();

  void FinalizeInitializationOnIOThread(
      const std::vector<GURL>& urls_to_prefetch,
      base::ListValue* referral_list,
      IOThread* io_thread,
      bool predictor_enabled);

  // During startup, we learn what the first N urls visited are, and then
  // resolve the associated hosts ASAP during our next startup.
  void LearnAboutInitialNavigation(const GURL& url);

  // Renderer bundles up list and sends to this browser API via IPC.
  // TODO(jar): Use UrlList instead to include port and scheme.
  void DnsPrefetchList(const NameList& hostnames);

  // May be called from either the IO or UI thread and will PostTask
  // to the IO thread if necessary.
  void DnsPrefetchMotivatedList(const UrlList& urls,
                                UrlInfo::ResolutionMotivation motivation);

  // May be called from either the IO or UI thread and will PostTask
  // to the IO thread if necessary.
  void SaveStateForNextStartupAndTrim(PrefService* prefs);

  void SaveDnsPrefetchStateForNextStartupAndTrim(
      base::ListValue* startup_list,
      base::ListValue* referral_list,
      base::WaitableEvent* completion);

  // May be called from either the IO or UI thread and will PostTask
  // to the IO thread if necessary.
  void EnablePredictor(bool enable);

  void EnablePredictorOnIOThread(bool enable);

  // ------------- End IO thread methods.

  // The following methods may be called on either the IO or UI threads.

  // Instigate pre-connection to any URLs, or pre-resolution of related host,
  // that we predict will be needed after this navigation (typically
  // more-embedded resources on a page).  This method will actually post a task
  // to do the actual work, so as not to jump ahead of the frame navigation that
  // instigated this activity.
  void PredictFrameSubresources(const GURL& url);

  // Put URL in canonical form, including a scheme, host, and port.
  // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url
  // cannot be otherwise canonicalized.
  static GURL CanonicalizeUrl(const GURL& url);

  // Used for testing.
  void SetHostResolver(net::HostResolver* host_resolver) {
    host_resolver_ = host_resolver;
  }
  // Used for testing.
  size_t max_concurrent_dns_lookups() const {
    return max_concurrent_dns_lookups_;
  }
  // Used for testing.
  void SetShutdown(bool shutdown) {
    shutdown_ = shutdown;
  }

  // Flag setting to use preconnection instead of just DNS pre-fetching.
  bool preconnect_enabled() const {
    return preconnect_enabled_;
  }

  // Flag setting for whether we are prefetching dns lookups.
  bool predictor_enabled() const {
    return predictor_enabled_;
  }


 private:
  FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest);
  FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest);
  FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest);
  FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest);
  FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest);
  FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest);
  FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest);
  FRIEND_TEST_ALL_PREFIXES(PredictorTest, ReferrerSerializationTrimTest);
  friend class WaitForResolutionHelper;  // For testing.

  class LookupRequest;

  // A simple priority queue for handling host names.
  // Some names that are queued up have |motivation| that requires very rapid
  // handling.  For example, a sub-resource name lookup MUST be done before the
  // actual sub-resource is fetched.  In contrast, a name that was speculatively
  // noted in a page has to be resolved before the user "gets around to"
  // clicking on a link.  By tagging (with a motivation) each push we make into
  // this FIFO queue, the queue can re-order the more important names to service
  // them sooner (relative to some low priority background resolutions).
  class HostNameQueue {
   public:
    HostNameQueue();
    ~HostNameQueue();
    void Push(const GURL& url,
              UrlInfo::ResolutionMotivation motivation);
    bool IsEmpty() const;
    GURL Pop();

   private:
    // The names in the queue that should be serviced (popped) ASAP.
    std::queue<GURL> rush_queue_;
    // The names in the queue that should only be serviced when rush_queue is
    // empty.
    std::queue<GURL> background_queue_;

  DISALLOW_COPY_AND_ASSIGN(HostNameQueue);
  };

  // The InitialObserver monitors navigations made by the network stack. This
  // is only used to identify startup time resolutions (for re-resolution
  // during our next process startup).
  // TODO(jar): Consider preconnecting at startup, which may be faster than
  // waiting for render process to start and request a connection.
  class InitialObserver {
   public:
    InitialObserver();
    ~InitialObserver();
    // Recording of when we observed each navigation.
    typedef std::map<GURL, base::TimeTicks> FirstNavigations;

    // Potentially add a new URL to our startup list.
    void Append(const GURL& url, Predictor* predictor);

    // Get an HTML version of our current planned first_navigations_.
    void GetFirstResolutionsHtml(std::string* output);

    // Persist the current first_navigations_ for storage in a list.
    void GetInitialDnsResolutionList(base::ListValue* startup_list);

    // Discards all initial loading history.
    void DiscardInitialNavigationHistory() { first_navigations_.clear(); }

   private:
    // List of the first N URL resolutions observed in this run.
    FirstNavigations first_navigations_;

    // The number of URLs we'll save for pre-resolving at next startup.
    static const size_t kStartupResolutionCount = 10;
  };

  // A map that is keyed with the host/port that we've learned were the cause
  // of loading additional URLs.  The list of additional targets is held
  // in a Referrer instance, which is a value in this map.
  typedef std::map<GURL, Referrer> Referrers;

  // Depending on the expected_subresource_use_, we may either make a TCP/IP
  // preconnection, or merely pre-resolve the hostname via DNS (or even do
  // nothing).  The following are the threasholds for taking those actions.
  static const double kPreconnectWorthyExpectedValue;
  static const double kDNSPreresolutionWorthyExpectedValue;
  // Referred hosts with a subresource_use_rate_ that are less than the
  // following threshold will be discarded when we Trim() the list.
  static const double kDiscardableExpectedValue;
  // During trimming operation to discard hosts for which we don't have likely
  // subresources, we multiply the expected_subresource_use_ value by the
  // following ratio until that value is less than kDiscardableExpectedValue.
  // This number should always be less than 1, an more than 0.
  static const double kReferrerTrimRatio;

  // Interval between periodic trimming of our whole referrer list.
  // We only do a major trimming about once an hour, and then only when the user
  // is actively browsing.
  static const base::TimeDelta kDurationBetweenTrimmings;
  // Interval between incremental trimmings (to avoid inducing Jank).
  static const base::TimeDelta kDurationBetweenTrimmingIncrements;
  // Number of referring URLs processed in an incremental trimming.
  static const size_t kUrlsTrimmedPerIncrement;

  // Only for testing. Returns true if hostname has been successfully resolved
  // (name found).
  bool WasFound(const GURL& url) const {
    Results::const_iterator it(results_.find(url));
    return (it != results_.end()) &&
            it->second.was_found();
  }

  // Only for testing. Return how long was the resolution
  // or UrlInfo::kNullDuration if it hasn't been resolved yet.
  base::TimeDelta GetResolutionDuration(const GURL& url) {
    if (results_.find(url) == results_.end())
      return UrlInfo::kNullDuration;
    return results_[url].resolve_duration();
  }

  // Only for testing;
  size_t peak_pending_lookups() const { return peak_pending_lookups_; }

  // ------------- Start IO thread methods.

  // Perform actual resolution or preconnection to subresources now.  This is
  // an internal worker method that is reached via a post task from
  // PredictFrameSubresources().
  void PrepareFrameSubresources(const GURL& url);

  // Access method for use by async lookup request to pass resolution result.
  void OnLookupFinished(LookupRequest* request, const GURL& url, bool found);

  // Underlying method for both async and synchronous lookup to update state.
  void LookupFinished(LookupRequest* request,
                      const GURL& url, bool found);

  // Queue hostname for resolution.  If queueing was done, return the pointer
  // to the queued instance, otherwise return NULL.
  UrlInfo* AppendToResolutionQueue(const GURL& url,
      UrlInfo::ResolutionMotivation motivation);

  // Check to see if too much queuing delay has been noted for the given info,
  // which indicates that there is "congestion" or growing delay in handling the
  // resolution of names.  Rather than letting this congestion potentially grow
  // without bounds, we abandon our queued efforts at pre-resolutions in such a
  // case.
  // To do this, we will recycle |info|, as well as all queued items, back to
  // the state they had before they were queued up.  We can't do anything about
  // the resolutions we've already sent off for processing on another thread, so
  // we just let them complete.  On a slow system, subject to congestion, this
  // will greatly reduce the number of resolutions done, but it will assure that
  // any resolutions that are done, are in a timely and hence potentially
  // helpful manner.
  bool CongestionControlPerformed(UrlInfo* info);

  // Take lookup requests from work_queue_ and tell HostResolver to look them up
  // asynchronously, provided we don't exceed concurrent resolution limit.
  void StartSomeQueuedResolutions();

  // Performs trimming similar to TrimReferrersNow(), except it does it as a
  // series of short tasks by posting continuations again an again until done.
  void TrimReferrers();

  // Loads urls_being_trimmed_ from keys of current referrers_.
  void LoadUrlsForTrimming();

  // Posts a task to do additional incremental trimming of referrers_.
  void PostIncrementalTrimTask();

  // Calls Trim() on some or all of urls_being_trimmed_.
  // If it does not process all the URLs in that vector, it posts a task to
  // continue with them shortly (i.e., it yeilds and continues).
  void IncrementalTrimReferrers(bool trim_all_now);

  // ------------- End IO thread methods.

  scoped_ptr<InitialObserver> initial_observer_;

  // Status of speculative DNS resolution and speculative TCP/IP connection
  // feature.
  bool predictor_enabled_;

  // work_queue_ holds a list of names we need to look up.
  HostNameQueue work_queue_;

  // results_ contains information for existing/prior prefetches.
  Results results_;

  std::set<LookupRequest*> pending_lookups_;

  // For testing, to verify that we don't exceed the limit.
  size_t peak_pending_lookups_;

  // When true, we don't make new lookup requests.
  bool shutdown_;

  // The number of concurrent speculative lookups currently allowed to be sent
  // to the resolver.  Any additional lookups will be queued to avoid exceeding
  // this value.  The queue is a priority queue that will accelerate
  // sub-resource speculation, and retard resolutions suggested by page scans.
  const size_t max_concurrent_dns_lookups_;

  // The maximum queueing delay that is acceptable before we enter congestion
  // reduction mode, and discard all queued (but not yet assigned) resolutions.
  const base::TimeDelta max_dns_queue_delay_;

  // The host resolver we warm DNS entries for.
  net::HostResolver* host_resolver_;

  // Are we currently using preconnection, rather than just DNS resolution, for
  // subresources and omni-box search URLs.
  bool preconnect_enabled_;

  // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl().
  std::string last_omnibox_host_;

  // The time when the last preresolve was done for last_omnibox_host_.
  base::TimeTicks last_omnibox_preresolve_;

  // The number of consecutive requests to AnticipateOmniboxUrl() that suggested
  // preconnecting (because it was to a search service).
  int consecutive_omnibox_preconnect_count_;

  // The time when the last preconnection was requested to a search service.
  base::TimeTicks last_omnibox_preconnect_;

  // For each URL that we might navigate to (that we've "learned about")
  // we have a Referrer list. Each Referrer list has all hostnames we might
  // need to pre-resolve or pre-connect to when there is a navigation to the
  // orginial hostname.
  Referrers referrers_;

  // List of URLs in referrers_ currently being trimmed (scaled down to
  // eventually be aged out of use).
  std::vector<GURL> urls_being_trimmed_;

  // A time after which we need to do more trimming of referrers.
  base::TimeTicks next_trim_time_;

  scoped_ptr<ScopedRunnableMethodFactory<Predictor> > trim_task_factory_;

  DISALLOW_COPY_AND_ASSIGN(Predictor);
};

// This version of the predictor is used for testing.
class SimplePredictor : public Predictor {
 public:
  explicit SimplePredictor(bool preconnect_enabled)
      : Predictor(preconnect_enabled) {}
  virtual ~SimplePredictor() {}
  virtual void InitNetworkPredictor(PrefService* user_prefs,
                                    PrefService* local_state,
                                    IOThread* io_thread);
  virtual void ShutdownOnUIThread(PrefService* user_prefs);
};

}  // namespace chrome_browser_net

#endif  // CHROME_BROWSER_NET_PREDICTOR_H_