summaryrefslogtreecommitdiffstats
path: root/net/base/sdch_manager.h
diff options
context:
space:
mode:
Diffstat (limited to 'net/base/sdch_manager.h')
-rw-r--r--net/base/sdch_manager.h367
1 files changed, 367 insertions, 0 deletions
diff --git a/net/base/sdch_manager.h b/net/base/sdch_manager.h
new file mode 100644
index 0000000..67ca5e5
--- /dev/null
+++ b/net/base/sdch_manager.h
@@ -0,0 +1,367 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Provides global database of differential decompression dictionaries for the
+// SDCH filter (processes sdch enconded content).
+
+// Exactly one instance of SdchManager is built, and all references are made
+// into that collection.
+//
+// The SdchManager maintains a collection of memory resident dictionaries. It
+// can find a dictionary (based on a server specification of a hash), store a
+// dictionary, and make judgements about what URLs can use, set, etc. a
+// dictionary.
+
+// These dictionaries are acquired over the net, and include a header
+// (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF
+// module) to decompress data.
+
+#ifndef NET_BASE_SDCH_MANAGER_H_
+#define NET_BASE_SDCH_MANAGER_H_
+
+#include <map>
+#include <set>
+#include <string>
+
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "base/time.h"
+#include "googleurl/src/gurl.h"
+#include "testing/gtest/include/gtest/gtest_prod.h"
+
+//------------------------------------------------------------------------------
+// Create a public interface to help us load SDCH dictionaries.
+// The SdchManager class allows registration to support this interface.
+// A browser may register a fetcher that is used by the dictionary managers to
+// get data from a specified URL. This allows us to use very high level browser
+// functionality in this base (when the functionaity can be provided).
+class SdchFetcher {
+ public:
+ SdchFetcher() {}
+ virtual ~SdchFetcher() {}
+
+ // The Schedule() method is called when there is a need to get a dictionary
+ // from a server. The callee is responsible for getting that dictionary_text,
+ // and then calling back to AddSdchDictionary() to the SdchManager instance.
+ virtual void Schedule(const GURL& dictionary_url) = 0;
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SdchFetcher);
+};
+//------------------------------------------------------------------------------
+
+class SdchManager {
+ public:
+ // A list of errors that appeared and were either resolved, or used to turn
+ // off sdch encoding.
+ enum ProblemCodes {
+ MIN_PROBLEM_CODE,
+
+ // Content-encoding correction problems.
+ ADDED_CONTENT_ENCODING = 1,
+ FIXED_CONTENT_ENCODING = 2,
+ FIXED_CONTENT_ENCODINGS = 3,
+
+ // Content decoding errors.
+ DECODE_HEADER_ERROR = 4,
+ DECODE_BODY_ERROR = 5,
+
+ // More content-encoding correction problems.
+ OPTIONAL_GUNZIP_ENCODING_ADDED = 6,
+
+ // Content encoding correction when we're not even tagged as HTML!?!
+ BINARY_ADDED_CONTENT_ENCODING = 7,
+ BINARY_FIXED_CONTENT_ENCODING = 8,
+ BINARY_FIXED_CONTENT_ENCODINGS = 9,
+
+ // Dictionary selection for use problems.
+ DICTIONARY_FOUND_HAS_WRONG_DOMAIN = 10,
+ DICTIONARY_FOUND_HAS_WRONG_PORT_LIST = 11,
+ DICTIONARY_FOUND_HAS_WRONG_PATH = 12,
+ DICTIONARY_FOUND_HAS_WRONG_SCHEME = 13,
+ DICTIONARY_HASH_NOT_FOUND = 14,
+ DICTIONARY_HASH_MALFORMED = 15,
+
+ // Dictionary saving problems.
+ DICTIONARY_HAS_NO_HEADER = 20,
+ DICTIONARY_HEADER_LINE_MISSING_COLON = 21,
+ DICTIONARY_MISSING_DOMAIN_SPECIFIER = 22,
+ DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN = 23,
+ DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL = 24,
+ DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL = 25,
+ DICTIONARY_HAS_NO_TEXT = 26,
+ DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX = 27,
+
+ // Dictionary loading problems.
+ DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30,
+ DICTIONARY_SELECTED_FOR_SSL = 31,
+ DICTIONARY_ALREADY_LOADED = 32,
+ DICTIONARY_SELECTED_FROM_NON_HTTP = 33,
+ DICTIONARY_IS_TOO_LARGE= 34,
+ DICTIONARY_COUNT_EXCEEDED = 35,
+ DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD = 36,
+ DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD = 37,
+
+ // Failsafe hack.
+ ATTEMPT_TO_DECODE_NON_HTTP_DATA = 40,
+
+
+ // Content-Encoding problems detected, with no action taken.
+ MULTIENCODING_FOR_NON_SDCH_REQUEST = 50,
+ SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST = 51,
+
+ // Dictionary manager issues.
+ DOMAIN_BLACKLIST_INCLUDES_TARGET = 61,
+
+ // Problematic decode recovery methods.
+ META_REFRESH_RECOVERY = 70, // Dictionary not found.
+ // defunct = 71, // Almost the same as META_REFRESH_UNSUPPORTED.
+ // defunct = 72, // Almost the same as CACHED_META_REFRESH_UNSUPPORTED.
+ // defunct = 73, // PASSING_THROUGH_NON_SDCH plus DISCARD_TENTATIVE_SDCH.
+ META_REFRESH_UNSUPPORTED = 74, // Unrecoverable error.
+ CACHED_META_REFRESH_UNSUPPORTED = 75, // As above, but pulled from cache.
+ PASSING_THROUGH_NON_SDCH = 76, // Tagged sdch but missing dictionary-hash.
+ INCOMPLETE_SDCH_CONTENT = 77, // Last window was not completely decoded.
+ PASS_THROUGH_404_CODE = 78, // URL not found message passing through.
+
+ // This next report is very common, and not really an error scenario, but
+ // it exercises the error recovery logic.
+ PASS_THROUGH_OLD_CACHED = 79, // Back button got pre-SDCH cached content.
+
+ // Common decoded recovery methods.
+ META_REFRESH_CACHED_RECOVERY = 80, // Probably startup tab loading.
+ DISCARD_TENTATIVE_SDCH = 81, // Server decided not to use sdch.
+
+ // Non SDCH problems, only accounted for to make stat counting complete
+ // (i.e., be able to be sure all dictionary advertisements are accounted
+ // for).
+
+ UNFLUSHED_CONTENT = 90, // Possible error in filter chaining.
+ // defunct = 91, // MISSING_TIME_STATS (Should never happen.)
+ CACHE_DECODED = 92, // No timing stats recorded.
+ // defunct = 93, // OVER_10_MINUTES (No timing stats recorded.)
+ UNINITIALIZED = 94, // Filter never even got initialized.
+ PRIOR_TO_DICTIONARY = 95, // We hadn't even parsed a dictionary selector.
+ DECODE_ERROR = 96, // Something went wrong during decode.
+
+ // Problem during the latency test.
+ LATENCY_TEST_DISALLOWED = 100, // SDCH now failing, but it worked before!
+
+ MAX_PROBLEM_CODE // Used to bound histogram.
+ };
+
+ // Use the following static limits to block DOS attacks until we implement
+ // a cached dictionary evicition strategy.
+ static const size_t kMaxDictionarySize;
+ static const size_t kMaxDictionaryCount;
+
+ // There is one instance of |Dictionary| for each memory-cached SDCH
+ // dictionary.
+ class Dictionary : public base::RefCounted<Dictionary> {
+ public:
+ // Sdch filters can get our text to use in decoding compressed data.
+ const std::string& text() const { return text_; }
+
+ private:
+ friend class base::RefCounted<Dictionary>;
+ friend class SdchManager; // Only manager can construct an instance.
+ FRIEND_TEST(SdchFilterTest, PathMatch);
+
+ // Construct a vc-diff usable dictionary from the dictionary_text starting
+ // at the given offset. The supplied client_hash should be used to
+ // advertise the dictionary's availability relative to the suppplied URL.
+ Dictionary(const std::string& dictionary_text, size_t offset,
+ const std::string& client_hash, const GURL& url,
+ const std::string& domain, const std::string& path,
+ const base::Time& expiration, const std::set<int> ports);
+ ~Dictionary() {}
+
+ const GURL& url() const { return url_; }
+ const std::string& client_hash() const { return client_hash_; }
+
+ // Security method to check if we can advertise this dictionary for use
+ // if the |target_url| returns SDCH compressed data.
+ bool CanAdvertise(const GURL& target_url);
+
+ // Security methods to check if we can establish a new dictionary with the
+ // given data, that arrived in response to get of dictionary_url.
+ static bool CanSet(const std::string& domain, const std::string& path,
+ const std::set<int> ports, const GURL& dictionary_url);
+
+ // Security method to check if we can use a dictionary to decompress a
+ // target that arrived with a reference to this dictionary.
+ bool CanUse(const GURL& referring_url);
+
+ // Compare paths to see if they "match" for dictionary use.
+ static bool PathMatch(const std::string& path,
+ const std::string& restriction);
+
+ // Compare domains to see if the "match" for dictionary use.
+ static bool DomainMatch(const GURL& url, const std::string& restriction);
+
+
+ // The actual text of the dictionary.
+ std::string text_;
+
+ // Part of the hash of text_ that the client uses to advertise the fact that
+ // it has a specific dictionary pre-cached.
+ std::string client_hash_;
+
+ // The GURL that arrived with the text_ in a URL request to specify where
+ // this dictionary may be used.
+ const GURL url_;
+
+ // Metadate "headers" in before dictionary text contained the following:
+ // Each dictionary payload consists of several headers, followed by the text
+ // of the dictionary. The following are the known headers.
+ const std::string domain_;
+ const std::string path_;
+ const base::Time expiration_; // Implied by max-age.
+ const std::set<int> ports_;
+
+ DISALLOW_COPY_AND_ASSIGN(Dictionary);
+ };
+
+ SdchManager();
+ ~SdchManager();
+
+ // Discontinue fetching of dictionaries, as we're now shutting down.
+ static void Shutdown();
+
+ // Provide access to the single instance of this class.
+ static SdchManager* Global();
+
+ // Record stats on various errors.
+ static void SdchErrorRecovery(ProblemCodes problem);
+
+ // Register a fetcher that this class can use to obtain dictionaries.
+ void set_sdch_fetcher(SdchFetcher* fetcher) { fetcher_.reset(fetcher); }
+
+ // If called with an empty string, advertise and support sdch on all domains.
+ // If called with a specific string, advertise and support only the specified
+ // domain. Function assumes the existence of a global SdchManager instance.
+ void EnableSdchSupport(const std::string& domain);
+
+ static bool sdch_enabled() { return global_ && global_->sdch_enabled_; }
+
+ // Briefly prevent further advertising of SDCH on this domain (if SDCH is
+ // enabled). After enough calls to IsInSupportedDomain() the blacklisting
+ // will be removed. Additional blacklists take exponentially more calls
+ // to IsInSupportedDomain() before the blacklisting is undone.
+ // Used when filter errors are found from a given domain, but it is plausible
+ // that the cause is temporary (such as application startup, where cached
+ // entries are used, but a dictionary is not yet loaded).
+ static void BlacklistDomain(const GURL& url);
+
+ // Used when SEVERE filter errors are found from a given domain, to prevent
+ // further use of SDCH on that domain.
+ static void BlacklistDomainForever(const GURL& url);
+
+ // Unit test only, this function resets enabling of sdch, and clears the
+ // blacklist.
+ static void ClearBlacklistings();
+
+ // Unit test only, this function resets the blacklisting count for a domain.
+ static void ClearDomainBlacklisting(const std::string& domain);
+
+ // Unit test only: indicate how many more times a domain will be blacklisted.
+ static int BlackListDomainCount(const std::string& domain);
+
+ // Unit test only: Indicate what current blacklist increment is for a domain.
+ static int BlacklistDomainExponential(const std::string& domain);
+
+ // Check to see if SDCH is enabled (globally), and the given URL is in a
+ // supported domain (i.e., not blacklisted, and either the specific supported
+ // domain, or all domains were assumed supported). If it is blacklist, reduce
+ // by 1 the number of times it will be reported as blacklisted.
+ const bool IsInSupportedDomain(const GURL& url);
+
+ // Schedule the URL fetching to load a dictionary. This will always return
+ // before the dictionary is actually loaded and added.
+ // After the implied task does completes, the dictionary will have been
+ // cached in memory.
+ void FetchDictionary(const GURL& request_url, const GURL& dictionary_url);
+
+ // Security test function used before initiating a FetchDictionary.
+ // Return true if fetch is legal.
+ bool CanFetchDictionary(const GURL& referring_url,
+ const GURL& dictionary_url) const;
+
+ // Add an SDCH dictionary to our list of availible dictionaries. This addition
+ // will fail (return false) if addition is illegal (data in the dictionary is
+ // not acceptable from the dictionary_url; dictionary already added, etc.).
+ bool AddSdchDictionary(const std::string& dictionary_text,
+ const GURL& dictionary_url);
+
+ // Find the vcdiff dictionary (the body of the sdch dictionary that appears
+ // after the meta-data headers like Domain:...) with the given |server_hash|
+ // to use to decompreses data that arrived as SDCH encoded content. Check to
+ // be sure the returned |dictionary| can be used for decoding content supplied
+ // in response to a request for |referring_url|.
+ // Caller is responsible for AddRef()ing the dictionary, and Release()ing it
+ // when done.
+ // Return null in |dictionary| if there is no matching legal dictionary.
+ void GetVcdiffDictionary(const std::string& server_hash,
+ const GURL& referring_url,
+ Dictionary** dictionary);
+
+ // Get list of available (pre-cached) dictionaries that we have already loaded
+ // into memory. The list is a comma separated list of (client) hashes per
+ // the SDCH spec.
+ void GetAvailDictionaryList(const GURL& target_url, std::string* list);
+
+ // Construct the pair of hashes for client and server to identify an SDCH
+ // dictionary. This is only made public to facilitate unit testing, but is
+ // otherwise private
+ static void GenerateHash(const std::string& dictionary_text,
+ std::string* client_hash, std::string* server_hash);
+
+ // For Latency testing only, we need to know if we've succeeded in doing a
+ // round trip before starting our comparative tests. If ever we encounter
+ // problems with SDCH, we opt-out of the test unless/until we perform a
+ // complete SDCH decoding.
+ bool AllowLatencyExperiment(const GURL& url) const;
+
+ void SetAllowLatencyExperiment(const GURL& url, bool enable);
+
+ private:
+ typedef std::map<std::string, int> DomainCounter;
+ typedef std::set<std::string> ExperimentSet;
+
+ // A map of dictionaries info indexed by the hash that the server provides.
+ typedef std::map<std::string, Dictionary*> DictionaryMap;
+
+ // The one global instance of that holds all the data.
+ static SdchManager* global_;
+
+ // A simple implementation of a RFC 3548 "URL safe" base64 encoder.
+ static void UrlSafeBase64Encode(const std::string& input,
+ std::string* output);
+ DictionaryMap dictionaries_;
+
+ // An instance that can fetch a dictionary given a URL.
+ scoped_ptr<SdchFetcher> fetcher_;
+
+ // Support SDCH compression, by advertising in headers.
+ bool sdch_enabled_;
+
+ // Empty string means all domains. Non-empty means support only the given
+ // domain is supported.
+ std::string supported_domain_;
+
+ // List domains where decode failures have required disabling sdch, along with
+ // count of how many additonal uses should be blacklisted.
+ DomainCounter blacklisted_domains_;
+
+ // Support exponential backoff in number of domain accesses before
+ // blacklisting expires.
+ DomainCounter exponential_blacklist_count;
+
+ // List of hostnames for which a latency experiment is allowed (because a
+ // round trip test has recently passed).
+ ExperimentSet allow_latency_experiment_;
+
+ DISALLOW_COPY_AND_ASSIGN(SdchManager);
+};
+
+#endif // NET_BASE_SDCH_MANAGER_H_