summaryrefslogtreecommitdiffstats
path: root/net/base/sdch_manager.h
diff options
context:
space:
mode:
authorPatrick Scott <phanna@android.com>2010-02-04 10:37:17 -0500
committerPatrick Scott <phanna@android.com>2010-02-04 10:39:42 -0500
commitc7f5f8508d98d5952d42ed7648c2a8f30a4da156 (patch)
treedd51dbfbf6670daa61279b3a19e7b1835b301dbf /net/base/sdch_manager.h
parent139d8152182f9093f03d9089822b688e49fa7667 (diff)
downloadexternal_chromium-c7f5f8508d98d5952d42ed7648c2a8f30a4da156.zip
external_chromium-c7f5f8508d98d5952d42ed7648c2a8f30a4da156.tar.gz
external_chromium-c7f5f8508d98d5952d42ed7648c2a8f30a4da156.tar.bz2
Initial source checkin.
The source files were determined by building net_unittests in chromium's source tree. Some of the obvious libraries were left out (v8, gmock, gtest). The Android.mk file has all the sources (minus unittests and tools) that were used during net_unittests compilation. Nothing builds yet because of STL but that is the next task. The .cpp files will most likely not compile anyways because of the LOCAL_CPP_EXTENSION mod. I will have to break this into multiple projects to get around that limitation.
Diffstat (limited to 'net/base/sdch_manager.h')
-rw-r--r--net/base/sdch_manager.h367
1 files changed, 367 insertions, 0 deletions
diff --git a/net/base/sdch_manager.h b/net/base/sdch_manager.h
new file mode 100644
index 0000000..67ca5e5
--- /dev/null
+++ b/net/base/sdch_manager.h
@@ -0,0 +1,367 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Provides global database of differential decompression dictionaries for the
+// SDCH filter (processes sdch enconded content).
+
+// Exactly one instance of SdchManager is built, and all references are made
+// into that collection.
+//
+// The SdchManager maintains a collection of memory resident dictionaries. It
+// can find a dictionary (based on a server specification of a hash), store a
+// dictionary, and make judgements about what URLs can use, set, etc. a
+// dictionary.
+
+// These dictionaries are acquired over the net, and include a header
+// (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF
+// module) to decompress data.
+
+#ifndef NET_BASE_SDCH_MANAGER_H_
+#define NET_BASE_SDCH_MANAGER_H_
+
+#include <map>
+#include <set>
+#include <string>
+
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "base/time.h"
+#include "googleurl/src/gurl.h"
+#include "testing/gtest/include/gtest/gtest_prod.h"
+
+//------------------------------------------------------------------------------
+// Create a public interface to help us load SDCH dictionaries.
+// The SdchManager class allows registration to support this interface.
+// A browser may register a fetcher that is used by the dictionary managers to
+// get data from a specified URL. This allows us to use very high level browser
+// functionality in this base (when the functionaity can be provided).
+class SdchFetcher {
+ public:
+ SdchFetcher() {}
+ virtual ~SdchFetcher() {}
+
+ // The Schedule() method is called when there is a need to get a dictionary
+ // from a server. The callee is responsible for getting that dictionary_text,
+ // and then calling back to AddSdchDictionary() to the SdchManager instance.
+ virtual void Schedule(const GURL& dictionary_url) = 0;
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SdchFetcher);
+};
+//------------------------------------------------------------------------------
+
+class SdchManager {
+ public:
+ // A list of errors that appeared and were either resolved, or used to turn
+ // off sdch encoding.
+ enum ProblemCodes {
+ MIN_PROBLEM_CODE,
+
+ // Content-encoding correction problems.
+ ADDED_CONTENT_ENCODING = 1,
+ FIXED_CONTENT_ENCODING = 2,
+ FIXED_CONTENT_ENCODINGS = 3,
+
+ // Content decoding errors.
+ DECODE_HEADER_ERROR = 4,
+ DECODE_BODY_ERROR = 5,
+
+ // More content-encoding correction problems.
+ OPTIONAL_GUNZIP_ENCODING_ADDED = 6,
+
+ // Content encoding correction when we're not even tagged as HTML!?!
+ BINARY_ADDED_CONTENT_ENCODING = 7,
+ BINARY_FIXED_CONTENT_ENCODING = 8,
+ BINARY_FIXED_CONTENT_ENCODINGS = 9,
+
+ // Dictionary selection for use problems.
+ DICTIONARY_FOUND_HAS_WRONG_DOMAIN = 10,
+ DICTIONARY_FOUND_HAS_WRONG_PORT_LIST = 11,
+ DICTIONARY_FOUND_HAS_WRONG_PATH = 12,
+ DICTIONARY_FOUND_HAS_WRONG_SCHEME = 13,
+ DICTIONARY_HASH_NOT_FOUND = 14,
+ DICTIONARY_HASH_MALFORMED = 15,
+
+ // Dictionary saving problems.
+ DICTIONARY_HAS_NO_HEADER = 20,
+ DICTIONARY_HEADER_LINE_MISSING_COLON = 21,
+ DICTIONARY_MISSING_DOMAIN_SPECIFIER = 22,
+ DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN = 23,
+ DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL = 24,
+ DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL = 25,
+ DICTIONARY_HAS_NO_TEXT = 26,
+ DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX = 27,
+
+ // Dictionary loading problems.
+ DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30,
+ DICTIONARY_SELECTED_FOR_SSL = 31,
+ DICTIONARY_ALREADY_LOADED = 32,
+ DICTIONARY_SELECTED_FROM_NON_HTTP = 33,
+ DICTIONARY_IS_TOO_LARGE= 34,
+ DICTIONARY_COUNT_EXCEEDED = 35,
+ DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD = 36,
+ DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD = 37,
+
+ // Failsafe hack.
+ ATTEMPT_TO_DECODE_NON_HTTP_DATA = 40,
+
+
+ // Content-Encoding problems detected, with no action taken.
+ MULTIENCODING_FOR_NON_SDCH_REQUEST = 50,
+ SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST = 51,
+
+ // Dictionary manager issues.
+ DOMAIN_BLACKLIST_INCLUDES_TARGET = 61,
+
+ // Problematic decode recovery methods.
+ META_REFRESH_RECOVERY = 70, // Dictionary not found.
+ // defunct = 71, // Almost the same as META_REFRESH_UNSUPPORTED.
+ // defunct = 72, // Almost the same as CACHED_META_REFRESH_UNSUPPORTED.
+ // defunct = 73, // PASSING_THROUGH_NON_SDCH plus DISCARD_TENTATIVE_SDCH.
+ META_REFRESH_UNSUPPORTED = 74, // Unrecoverable error.
+ CACHED_META_REFRESH_UNSUPPORTED = 75, // As above, but pulled from cache.
+ PASSING_THROUGH_NON_SDCH = 76, // Tagged sdch but missing dictionary-hash.
+ INCOMPLETE_SDCH_CONTENT = 77, // Last window was not completely decoded.
+ PASS_THROUGH_404_CODE = 78, // URL not found message passing through.
+
+ // This next report is very common, and not really an error scenario, but
+ // it exercises the error recovery logic.
+ PASS_THROUGH_OLD_CACHED = 79, // Back button got pre-SDCH cached content.
+
+ // Common decoded recovery methods.
+ META_REFRESH_CACHED_RECOVERY = 80, // Probably startup tab loading.
+ DISCARD_TENTATIVE_SDCH = 81, // Server decided not to use sdch.
+
+ // Non SDCH problems, only accounted for to make stat counting complete
+ // (i.e., be able to be sure all dictionary advertisements are accounted
+ // for).
+
+ UNFLUSHED_CONTENT = 90, // Possible error in filter chaining.
+ // defunct = 91, // MISSING_TIME_STATS (Should never happen.)
+ CACHE_DECODED = 92, // No timing stats recorded.
+ // defunct = 93, // OVER_10_MINUTES (No timing stats recorded.)
+ UNINITIALIZED = 94, // Filter never even got initialized.
+ PRIOR_TO_DICTIONARY = 95, // We hadn't even parsed a dictionary selector.
+ DECODE_ERROR = 96, // Something went wrong during decode.
+
+ // Problem during the latency test.
+ LATENCY_TEST_DISALLOWED = 100, // SDCH now failing, but it worked before!
+
+ MAX_PROBLEM_CODE // Used to bound histogram.
+ };
+
+ // Use the following static limits to block DOS attacks until we implement
+ // a cached dictionary evicition strategy.
+ static const size_t kMaxDictionarySize;
+ static const size_t kMaxDictionaryCount;
+
+ // There is one instance of |Dictionary| for each memory-cached SDCH
+ // dictionary.
+ class Dictionary : public base::RefCounted<Dictionary> {
+ public:
+ // Sdch filters can get our text to use in decoding compressed data.
+ const std::string& text() const { return text_; }
+
+ private:
+ friend class base::RefCounted<Dictionary>;
+ friend class SdchManager; // Only manager can construct an instance.
+ FRIEND_TEST(SdchFilterTest, PathMatch);
+
+ // Construct a vc-diff usable dictionary from the dictionary_text starting
+ // at the given offset. The supplied client_hash should be used to
+ // advertise the dictionary's availability relative to the suppplied URL.
+ Dictionary(const std::string& dictionary_text, size_t offset,
+ const std::string& client_hash, const GURL& url,
+ const std::string& domain, const std::string& path,
+ const base::Time& expiration, const std::set<int> ports);
+ ~Dictionary() {}
+
+ const GURL& url() const { return url_; }
+ const std::string& client_hash() const { return client_hash_; }
+
+ // Security method to check if we can advertise this dictionary for use
+ // if the |target_url| returns SDCH compressed data.
+ bool CanAdvertise(const GURL& target_url);
+
+ // Security methods to check if we can establish a new dictionary with the
+ // given data, that arrived in response to get of dictionary_url.
+ static bool CanSet(const std::string& domain, const std::string& path,
+ const std::set<int> ports, const GURL& dictionary_url);
+
+ // Security method to check if we can use a dictionary to decompress a
+ // target that arrived with a reference to this dictionary.
+ bool CanUse(const GURL& referring_url);
+
+ // Compare paths to see if they "match" for dictionary use.
+ static bool PathMatch(const std::string& path,
+ const std::string& restriction);
+
+ // Compare domains to see if the "match" for dictionary use.
+ static bool DomainMatch(const GURL& url, const std::string& restriction);
+
+
+ // The actual text of the dictionary.
+ std::string text_;
+
+ // Part of the hash of text_ that the client uses to advertise the fact that
+ // it has a specific dictionary pre-cached.
+ std::string client_hash_;
+
+ // The GURL that arrived with the text_ in a URL request to specify where
+ // this dictionary may be used.
+ const GURL url_;
+
+ // Metadate "headers" in before dictionary text contained the following:
+ // Each dictionary payload consists of several headers, followed by the text
+ // of the dictionary. The following are the known headers.
+ const std::string domain_;
+ const std::string path_;
+ const base::Time expiration_; // Implied by max-age.
+ const std::set<int> ports_;
+
+ DISALLOW_COPY_AND_ASSIGN(Dictionary);
+ };
+
+ SdchManager();
+ ~SdchManager();
+
+ // Discontinue fetching of dictionaries, as we're now shutting down.
+ static void Shutdown();
+
+ // Provide access to the single instance of this class.
+ static SdchManager* Global();
+
+ // Record stats on various errors.
+ static void SdchErrorRecovery(ProblemCodes problem);
+
+ // Register a fetcher that this class can use to obtain dictionaries.
+ void set_sdch_fetcher(SdchFetcher* fetcher) { fetcher_.reset(fetcher); }
+
+ // If called with an empty string, advertise and support sdch on all domains.
+ // If called with a specific string, advertise and support only the specified
+ // domain. Function assumes the existence of a global SdchManager instance.
+ void EnableSdchSupport(const std::string& domain);
+
+ static bool sdch_enabled() { return global_ && global_->sdch_enabled_; }
+
+ // Briefly prevent further advertising of SDCH on this domain (if SDCH is
+ // enabled). After enough calls to IsInSupportedDomain() the blacklisting
+ // will be removed. Additional blacklists take exponentially more calls
+ // to IsInSupportedDomain() before the blacklisting is undone.
+ // Used when filter errors are found from a given domain, but it is plausible
+ // that the cause is temporary (such as application startup, where cached
+ // entries are used, but a dictionary is not yet loaded).
+ static void BlacklistDomain(const GURL& url);
+
+ // Used when SEVERE filter errors are found from a given domain, to prevent
+ // further use of SDCH on that domain.
+ static void BlacklistDomainForever(const GURL& url);
+
+ // Unit test only, this function resets enabling of sdch, and clears the
+ // blacklist.
+ static void ClearBlacklistings();
+
+ // Unit test only, this function resets the blacklisting count for a domain.
+ static void ClearDomainBlacklisting(const std::string& domain);
+
+ // Unit test only: indicate how many more times a domain will be blacklisted.
+ static int BlackListDomainCount(const std::string& domain);
+
+ // Unit test only: Indicate what current blacklist increment is for a domain.
+ static int BlacklistDomainExponential(const std::string& domain);
+
+ // Check to see if SDCH is enabled (globally), and the given URL is in a
+ // supported domain (i.e., not blacklisted, and either the specific supported
+ // domain, or all domains were assumed supported). If it is blacklist, reduce
+ // by 1 the number of times it will be reported as blacklisted.
+ const bool IsInSupportedDomain(const GURL& url);
+
+ // Schedule the URL fetching to load a dictionary. This will always return
+ // before the dictionary is actually loaded and added.
+ // After the implied task does completes, the dictionary will have been
+ // cached in memory.
+ void FetchDictionary(const GURL& request_url, const GURL& dictionary_url);
+
+ // Security test function used before initiating a FetchDictionary.
+ // Return true if fetch is legal.
+ bool CanFetchDictionary(const GURL& referring_url,
+ const GURL& dictionary_url) const;
+
+ // Add an SDCH dictionary to our list of availible dictionaries. This addition
+ // will fail (return false) if addition is illegal (data in the dictionary is
+ // not acceptable from the dictionary_url; dictionary already added, etc.).
+ bool AddSdchDictionary(const std::string& dictionary_text,
+ const GURL& dictionary_url);
+
+ // Find the vcdiff dictionary (the body of the sdch dictionary that appears
+ // after the meta-data headers like Domain:...) with the given |server_hash|
+ // to use to decompreses data that arrived as SDCH encoded content. Check to
+ // be sure the returned |dictionary| can be used for decoding content supplied
+ // in response to a request for |referring_url|.
+ // Caller is responsible for AddRef()ing the dictionary, and Release()ing it
+ // when done.
+ // Return null in |dictionary| if there is no matching legal dictionary.
+ void GetVcdiffDictionary(const std::string& server_hash,
+ const GURL& referring_url,
+ Dictionary** dictionary);
+
+ // Get list of available (pre-cached) dictionaries that we have already loaded
+ // into memory. The list is a comma separated list of (client) hashes per
+ // the SDCH spec.
+ void GetAvailDictionaryList(const GURL& target_url, std::string* list);
+
+ // Construct the pair of hashes for client and server to identify an SDCH
+ // dictionary. This is only made public to facilitate unit testing, but is
+ // otherwise private
+ static void GenerateHash(const std::string& dictionary_text,
+ std::string* client_hash, std::string* server_hash);
+
+ // For Latency testing only, we need to know if we've succeeded in doing a
+ // round trip before starting our comparative tests. If ever we encounter
+ // problems with SDCH, we opt-out of the test unless/until we perform a
+ // complete SDCH decoding.
+ bool AllowLatencyExperiment(const GURL& url) const;
+
+ void SetAllowLatencyExperiment(const GURL& url, bool enable);
+
+ private:
+ typedef std::map<std::string, int> DomainCounter;
+ typedef std::set<std::string> ExperimentSet;
+
+ // A map of dictionaries info indexed by the hash that the server provides.
+ typedef std::map<std::string, Dictionary*> DictionaryMap;
+
+ // The one global instance of that holds all the data.
+ static SdchManager* global_;
+
+ // A simple implementation of a RFC 3548 "URL safe" base64 encoder.
+ static void UrlSafeBase64Encode(const std::string& input,
+ std::string* output);
+ DictionaryMap dictionaries_;
+
+ // An instance that can fetch a dictionary given a URL.
+ scoped_ptr<SdchFetcher> fetcher_;
+
+ // Support SDCH compression, by advertising in headers.
+ bool sdch_enabled_;
+
+ // Empty string means all domains. Non-empty means support only the given
+ // domain is supported.
+ std::string supported_domain_;
+
+ // List domains where decode failures have required disabling sdch, along with
+ // count of how many additonal uses should be blacklisted.
+ DomainCounter blacklisted_domains_;
+
+ // Support exponential backoff in number of domain accesses before
+ // blacklisting expires.
+ DomainCounter exponential_blacklist_count;
+
+ // List of hostnames for which a latency experiment is allowed (because a
+ // round trip test has recently passed).
+ ExperimentSet allow_latency_experiment_;
+
+ DISALLOW_COPY_AND_ASSIGN(SdchManager);
+};
+
+#endif // NET_BASE_SDCH_MANAGER_H_