summaryrefslogtreecommitdiffstats
path: root/net/base/sdch_manager.h
blob: 74e38cab2d02fafe57d4642e13a75cdc736a9efa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Provides global database of differential decompression dictionaries for the
// SDCH filter (processes sdch enconded content).

// Exactly one instance of SdchManager is built, and all references are made
// into that collection.
//
// The SdchManager maintains a collection of memory resident dictionaries.  It
// can find a dictionary (based on a server specification of a hash), store a
// dictionary, and make judgements about what URLs can use, set, etc. a
// dictionary.

// These dictionaries are acquired over the net, and include a header
// (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF
// module) to decompress data.

#ifndef NET_BASE_SDCH_MANAGER_H_
#define NET_BASE_SDCH_MANAGER_H_
#pragma once

#include <map>
#include <set>
#include <string>

#include "base/ref_counted.h"
#include "base/scoped_ptr.h"
#include "base/time.h"
#include "googleurl/src/gurl.h"
#include "testing/gtest/include/gtest/gtest_prod.h"

//------------------------------------------------------------------------------
// Create a public interface to help us load SDCH dictionaries.
// The SdchManager class allows registration to support this interface.
// A browser may register a fetcher that is used by the dictionary managers to
// get data from a specified URL.  This allows us to use very high level browser
// functionality in this base (when the functionaity can be provided).
class SdchFetcher {
 public:
  SdchFetcher() {}
  virtual ~SdchFetcher() {}

  // The Schedule() method is called when there is a need to get a dictionary
  // from a server.  The callee is responsible for getting that dictionary_text,
  // and then calling back to AddSdchDictionary() to the SdchManager instance.
  virtual void Schedule(const GURL& dictionary_url) = 0;
 private:
  DISALLOW_COPY_AND_ASSIGN(SdchFetcher);
};
//------------------------------------------------------------------------------

class SdchManager {
 public:
  // A list of errors that appeared and were either resolved, or used to turn
  // off sdch encoding.
  enum ProblemCodes {
    MIN_PROBLEM_CODE,

    // Content-encoding correction problems.
    ADDED_CONTENT_ENCODING = 1,
    FIXED_CONTENT_ENCODING = 2,
    FIXED_CONTENT_ENCODINGS = 3,

    // Content decoding errors.
    DECODE_HEADER_ERROR = 4,
    DECODE_BODY_ERROR = 5,

    // More content-encoding correction problems.
    OPTIONAL_GUNZIP_ENCODING_ADDED = 6,

    // Content encoding correction when we're not even tagged as HTML!?!
    BINARY_ADDED_CONTENT_ENCODING = 7,
    BINARY_FIXED_CONTENT_ENCODING = 8,
    BINARY_FIXED_CONTENT_ENCODINGS = 9,

    // Dictionary selection for use problems.
    DICTIONARY_FOUND_HAS_WRONG_DOMAIN = 10,
    DICTIONARY_FOUND_HAS_WRONG_PORT_LIST = 11,
    DICTIONARY_FOUND_HAS_WRONG_PATH = 12,
    DICTIONARY_FOUND_HAS_WRONG_SCHEME = 13,
    DICTIONARY_HASH_NOT_FOUND = 14,
    DICTIONARY_HASH_MALFORMED = 15,

    // Dictionary saving problems.
    DICTIONARY_HAS_NO_HEADER = 20,
    DICTIONARY_HEADER_LINE_MISSING_COLON = 21,
    DICTIONARY_MISSING_DOMAIN_SPECIFIER = 22,
    DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN = 23,
    DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL = 24,
    DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL = 25,
    DICTIONARY_HAS_NO_TEXT = 26,
    DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX = 27,

    // Dictionary loading problems.
    DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30,
    DICTIONARY_SELECTED_FOR_SSL = 31,
    DICTIONARY_ALREADY_LOADED = 32,
    DICTIONARY_SELECTED_FROM_NON_HTTP = 33,
    DICTIONARY_IS_TOO_LARGE= 34,
    DICTIONARY_COUNT_EXCEEDED = 35,
    DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD = 36,
    DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD = 37,

    // Failsafe hack.
    ATTEMPT_TO_DECODE_NON_HTTP_DATA = 40,


    // Content-Encoding problems detected, with no action taken.
    MULTIENCODING_FOR_NON_SDCH_REQUEST = 50,
    SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST = 51,

    // Dictionary manager issues.
    DOMAIN_BLACKLIST_INCLUDES_TARGET = 61,

    // Problematic decode recovery methods.
    META_REFRESH_RECOVERY = 70,            // Dictionary not found.
    // defunct =  71, // Almost the same as META_REFRESH_UNSUPPORTED.
    // defunct = 72,  // Almost the same as CACHED_META_REFRESH_UNSUPPORTED.
    // defunct = 73,  // PASSING_THROUGH_NON_SDCH plus DISCARD_TENTATIVE_SDCH.
    META_REFRESH_UNSUPPORTED = 74,         // Unrecoverable error.
    CACHED_META_REFRESH_UNSUPPORTED = 75,  // As above, but pulled from cache.
    PASSING_THROUGH_NON_SDCH = 76,  // Tagged sdch but missing dictionary-hash.
    INCOMPLETE_SDCH_CONTENT = 77,   // Last window was not completely decoded.
    PASS_THROUGH_404_CODE = 78,     // URL not found message passing through.

    // This next report is very common, and not really an error scenario, but
    // it exercises the error recovery logic.
    PASS_THROUGH_OLD_CACHED = 79,   // Back button got pre-SDCH cached content.

    // Common decoded recovery methods.
    META_REFRESH_CACHED_RECOVERY = 80,  // Probably startup tab loading.
    DISCARD_TENTATIVE_SDCH = 81,        // Server decided not to use sdch.

    // Non SDCH problems, only accounted for to make stat counting complete
    // (i.e., be able to be sure all dictionary advertisements are accounted
    // for).

    UNFLUSHED_CONTENT = 90,    // Possible error in filter chaining.
    // defunct = 91,           // MISSING_TIME_STATS (Should never happen.)
    CACHE_DECODED = 92,        // No timing stats recorded.
    // defunct = 93,           // OVER_10_MINUTES (No timing stats recorded.)
    UNINITIALIZED = 94,        // Filter never even got initialized.
    PRIOR_TO_DICTIONARY = 95,  // We hadn't even parsed a dictionary selector.
    DECODE_ERROR = 96,         // Something went wrong during decode.

    // Problem during the latency test.
    LATENCY_TEST_DISALLOWED = 100,  // SDCH now failing, but it worked before!

    MAX_PROBLEM_CODE  // Used to bound histogram.
  };

  // Use the following static limits to block DOS attacks until we implement
  // a cached dictionary evicition strategy.
  static const size_t kMaxDictionarySize;
  static const size_t kMaxDictionaryCount;

  // There is one instance of |Dictionary| for each memory-cached SDCH
  // dictionary.
  class Dictionary : public base::RefCounted<Dictionary> {
   public:
    // Sdch filters can get our text to use in decoding compressed data.
    const std::string& text() const { return text_; }

   private:
    friend class base::RefCounted<Dictionary>;
    friend class SdchManager;  // Only manager can construct an instance.
    FRIEND_TEST(SdchFilterTest, PathMatch);

    // Construct a vc-diff usable dictionary from the dictionary_text starting
    // at the given offset.  The supplied client_hash should be used to
    // advertise the dictionary's availability relative to the suppplied URL.
    Dictionary(const std::string& dictionary_text, size_t offset,
               const std::string& client_hash, const GURL& url,
               const std::string& domain, const std::string& path,
               const base::Time& expiration, const std::set<int> ports);
    ~Dictionary() {}

    const GURL& url() const { return url_; }
    const std::string& client_hash() const { return client_hash_; }

    // Security method to check if we can advertise this dictionary for use
    // if the |target_url| returns SDCH compressed data.
    bool CanAdvertise(const GURL& target_url);

    // Security methods to check if we can establish a new dictionary with the
    // given data, that arrived in response to get of dictionary_url.
    static bool CanSet(const std::string& domain, const std::string& path,
                       const std::set<int> ports, const GURL& dictionary_url);

    // Security method to check if we can use a dictionary to decompress a
    // target that arrived with a reference to this dictionary.
    bool CanUse(const GURL& referring_url);

    // Compare paths to see if they "match" for dictionary use.
    static bool PathMatch(const std::string& path,
                          const std::string& restriction);

    // Compare domains to see if the "match" for dictionary use.
    static bool DomainMatch(const GURL& url, const std::string& restriction);


    // The actual text of the dictionary.
    std::string text_;

    // Part of the hash of text_ that the client uses to advertise the fact that
    // it has a specific dictionary pre-cached.
    std::string client_hash_;

    // The GURL that arrived with the text_ in a URL request to specify where
    // this dictionary may be used.
    const GURL url_;

    // Metadate "headers" in before dictionary text contained the following:
    // Each dictionary payload consists of several headers, followed by the text
    // of the dictionary.  The following are the known headers.
    const std::string domain_;
    const std::string path_;
    const base::Time expiration_;  // Implied by max-age.
    const std::set<int> ports_;

    DISALLOW_COPY_AND_ASSIGN(Dictionary);
  };

  SdchManager();
  ~SdchManager();

  // Discontinue fetching of dictionaries, as we're now shutting down.
  static void Shutdown();

  // Provide access to the single instance of this class.
  static SdchManager* Global();

  // Record stats on various errors.
  static void SdchErrorRecovery(ProblemCodes problem);

  // Register a fetcher that this class can use to obtain dictionaries.
  void set_sdch_fetcher(SdchFetcher* fetcher) { fetcher_.reset(fetcher); }

  // If called with an empty string, advertise and support sdch on all domains.
  // If called with a specific string, advertise and support only the specified
  // domain.  Function assumes the existence of a global SdchManager instance.
  void EnableSdchSupport(const std::string& domain);

  static bool sdch_enabled() { return global_ && global_->sdch_enabled_; }

  // Briefly prevent further advertising of SDCH on this domain (if SDCH is
  // enabled). After enough calls to IsInSupportedDomain() the blacklisting
  // will be removed.  Additional blacklists take exponentially more calls
  // to IsInSupportedDomain() before the blacklisting is undone.
  // Used when filter errors are found from a given domain, but it is plausible
  // that the cause is temporary (such as application startup, where cached
  // entries are used, but a dictionary is not yet loaded).
  static void BlacklistDomain(const GURL& url);

  // Used when SEVERE filter errors are found from a given domain, to prevent
  // further use of SDCH on that domain.
  static void BlacklistDomainForever(const GURL& url);

  // Unit test only, this function resets enabling of sdch, and clears the
  // blacklist.
  static void ClearBlacklistings();

  // Unit test only, this function resets the blacklisting count for a domain.
  static void ClearDomainBlacklisting(const std::string& domain);

  // Unit test only: indicate how many more times a domain will be blacklisted.
  static int BlackListDomainCount(const std::string& domain);

  // Unit test only: Indicate what current blacklist increment is for a domain.
  static int BlacklistDomainExponential(const std::string& domain);

  // Check to see if SDCH is enabled (globally), and the given URL is in a
  // supported domain (i.e., not blacklisted, and either the specific supported
  // domain, or all domains were assumed supported).  If it is blacklist, reduce
  // by 1 the number of times it will be reported as blacklisted.
  bool IsInSupportedDomain(const GURL& url);

  // Schedule the URL fetching to load a dictionary. This will always return
  // before the dictionary is actually loaded and added.
  // After the implied task does completes, the dictionary will have been
  // cached in memory.
  void FetchDictionary(const GURL& request_url, const GURL& dictionary_url);

  // Security test function used before initiating a FetchDictionary.
  // Return true if fetch is legal.
  bool CanFetchDictionary(const GURL& referring_url,
                          const GURL& dictionary_url) const;

  // Add an SDCH dictionary to our list of availible dictionaries. This addition
  // will fail (return false) if addition is illegal (data in the dictionary is
  // not acceptable from the dictionary_url; dictionary already added, etc.).
  bool AddSdchDictionary(const std::string& dictionary_text,
                         const GURL& dictionary_url);

  // Find the vcdiff dictionary (the body of the sdch dictionary that appears
  // after the meta-data headers like Domain:...) with the given |server_hash|
  // to use to decompreses data that arrived as SDCH encoded content.  Check to
  // be sure the returned |dictionary| can be used for decoding content supplied
  // in response to a request for |referring_url|.
  // Caller is responsible for AddRef()ing the dictionary, and Release()ing it
  // when done.
  // Return null in |dictionary| if there is no matching legal dictionary.
  void GetVcdiffDictionary(const std::string& server_hash,
                           const GURL& referring_url,
                           Dictionary** dictionary);

  // Get list of available (pre-cached) dictionaries that we have already loaded
  // into memory.  The list is a comma separated list of (client) hashes per
  // the SDCH spec.
  void GetAvailDictionaryList(const GURL& target_url, std::string* list);

  // Construct the pair of hashes for client and server to identify an SDCH
  // dictionary.  This is only made public to facilitate unit testing, but is
  // otherwise private
  static void GenerateHash(const std::string& dictionary_text,
                           std::string* client_hash, std::string* server_hash);

  // For Latency testing only, we need to know if we've succeeded in doing a
  // round trip before starting our comparative tests.  If ever we encounter
  // problems with SDCH, we opt-out of the test unless/until we perform a
  // complete SDCH decoding.
  bool AllowLatencyExperiment(const GURL& url) const;

  void SetAllowLatencyExperiment(const GURL& url, bool enable);

 private:
  typedef std::map<std::string, int> DomainCounter;
  typedef std::set<std::string> ExperimentSet;

  // A map of dictionaries info indexed by the hash that the server provides.
  typedef std::map<std::string, Dictionary*> DictionaryMap;

  // The one global instance of that holds all the data.
  static SdchManager* global_;

  // A simple implementation of a RFC 3548 "URL safe" base64 encoder.
  static void UrlSafeBase64Encode(const std::string& input,
                                  std::string* output);
  DictionaryMap dictionaries_;

  // An instance that can fetch a dictionary given a URL.
  scoped_ptr<SdchFetcher> fetcher_;

  // Support SDCH compression, by advertising in headers.
  bool sdch_enabled_;

  // Empty string means all domains.  Non-empty means support only the given
  // domain is supported.
  std::string supported_domain_;

  // List domains where decode failures have required disabling sdch, along with
  // count of how many additonal uses should be blacklisted.
  DomainCounter blacklisted_domains_;

  // Support exponential backoff in number of domain accesses before
  // blacklisting expires.
  DomainCounter exponential_blacklist_count;

  // List of hostnames for which a latency experiment is allowed (because a
  // round trip test has recently passed).
  ExperimentSet allow_latency_experiment_;

  DISALLOW_COPY_AND_ASSIGN(SdchManager);
};

#endif  // NET_BASE_SDCH_MANAGER_H_