diff options
author | jar@google.com <jar@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-10-15 21:21:37 +0000 |
---|---|---|
committer | jar@google.com <jar@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-10-15 21:21:37 +0000 |
commit | c631b6aabef4753d124fcda8fcd1ea3d6931803d (patch) | |
tree | 2d35566d0b4a6ea39c421c7b7bab9a42a1b6b15f /net | |
parent | 9e82059e9496cd01dc4766197196c9d3330fa842 (diff) | |
download | chromium_src-c631b6aabef4753d124fcda8fcd1ea3d6931803d.zip chromium_src-c631b6aabef4753d124fcda8fcd1ea3d6931803d.tar.gz chromium_src-c631b6aabef4753d124fcda8fcd1ea3d6931803d.tar.bz2 |
Add a lot of robustness to SDCH filter decoding
a) Do meta-refresh (and partially disable sdch) when we have an sdch decode
error early on (due to lack of dictionary).
b) Gracefully handle proxy removal of "gzip" in "content-encoding: sdch,gzip".
c) Recover from completely stripped content encoding (in response to a
request for which we advertised an SDCH dictionary) by using a filter
that can optionally decode sdch with an optional gzip as well.
Some field tests have shown problems with this protocol's
transit across the ether. The general approach we've adopted
is to stop using SDCH on the problematic site when we have problems. This
will prevent a malicious site from shutting down all use of SDCH.
r=kmixter,openvcdiff,huanr
Review URL: http://codereview.chromium.org/6433
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@3420 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/base/filter.cc | 61 | ||||
-rw-r--r-- | net/base/filter.h | 83 | ||||
-rw-r--r-- | net/base/gzip_filter.cc | 20 | ||||
-rw-r--r-- | net/base/gzip_filter.h | 9 | ||||
-rw-r--r-- | net/base/load_flags.h | 4 | ||||
-rw-r--r-- | net/base/sdch_filter.cc | 114 | ||||
-rw-r--r-- | net/base/sdch_filter.h | 14 | ||||
-rw-r--r-- | net/base/sdch_filter_unitest.cc | 498 | ||||
-rw-r--r-- | net/base/sdch_manager.cc | 103 | ||||
-rw-r--r-- | net/base/sdch_manager.h | 74 | ||||
-rw-r--r-- | net/url_request/url_request_http_job.cc | 60 | ||||
-rw-r--r-- | net/url_request/url_request_http_job.h | 1 | ||||
-rw-r--r-- | net/url_request/url_request_job.cc | 4 | ||||
-rw-r--r-- | net/url_request/url_request_job.h | 4 |
14 files changed, 784 insertions, 265 deletions
diff --git a/net/base/filter.cc b/net/base/filter.cc index c59e7f0..1174c2f 100644 --- a/net/base/filter.cc +++ b/net/base/filter.cc @@ -43,26 +43,24 @@ Filter* Filter::Factory(const std::vector<std::string>& filter_types, std::string safe_mime_type = (filter_types.size() > 1) ? "" : mime_type; Filter* filter_list = NULL; // Linked list of filters. - for (size_t i = 0; i < filter_types.size(); ++i) { - Filter* first_filter; - first_filter = SingleFilter(filter_types[i], safe_mime_type, buffer_size); - if (!first_filter) { - // Cleanup and exit, since we can't construct this filter list. - if (filter_list) - delete filter_list; - filter_list = NULL; - break; - } - first_filter->next_filter_.reset(filter_list); - filter_list = first_filter; + FilterType type_id = FILTER_TYPE_UNSUPPORTED; + for (size_t i = 0; i < filter_types.size(); i++) { + type_id = ConvertEncodingToType(filter_types[i], safe_mime_type); + filter_list = PrependNewFilter(type_id, buffer_size, filter_list); + if (!filter_list) + return NULL; } + + // Handle proxy that changes content encoding "sdch,gzip" into "sdch". + if (1 == filter_types.size() && FILTER_TYPE_SDCH == type_id) + filter_list = PrependNewFilter(FILTER_TYPE_GZIP_HELPING_SDCH, buffer_size, + filter_list); return filter_list; } // static -Filter* Filter::SingleFilter(const std::string& filter_type, - const std::string& mime_type, - int buffer_size) { +Filter::FilterType Filter::ConvertEncodingToType(const std::string& filter_type, + const std::string& mime_type) { FilterType type_id; if (LowerCaseEqualsASCII(filter_type, kDeflate)) { type_id = FILTER_TYPE_DEFLATE; @@ -75,6 +73,8 @@ Filter* Filter::SingleFilter(const std::string& filter_type, // content encoding. Sadly, Apache mistakenly sets these headers for all // .gz files. We match Firefox's nsHttpChannel::ProcessNormal and ignore // the Content-Encoding here. + // TODO(jar): Move all this encoding type "fixup" into the + // GetContentEncoding() methods. Combine this defaulting with SDCH fixup. type_id = FILTER_TYPE_UNSUPPORTED; } else { type_id = FILTER_TYPE_GZIP; @@ -89,14 +89,21 @@ Filter* Filter::SingleFilter(const std::string& filter_type, // filter should be disabled in such cases. type_id = FILTER_TYPE_UNSUPPORTED; } + return type_id; +} +// static +Filter* Filter::PrependNewFilter(FilterType type_id, int buffer_size, + Filter* filter_list) { + Filter* first_filter = NULL; // Soon to be start of chain. switch (type_id) { + case FILTER_TYPE_GZIP_HELPING_SDCH: case FILTER_TYPE_DEFLATE: case FILTER_TYPE_GZIP: { scoped_ptr<GZipFilter> gz_filter(new GZipFilter()); if (gz_filter->InitBuffer(buffer_size)) { if (gz_filter->InitDecoding(type_id)) { - return gz_filter.release(); + first_filter = gz_filter.release(); } } break; @@ -105,7 +112,7 @@ Filter* Filter::SingleFilter(const std::string& filter_type, scoped_ptr<BZip2Filter> bzip2_filter(new BZip2Filter()); if (bzip2_filter->InitBuffer(buffer_size)) { if (bzip2_filter->InitDecoding(false)) { - return bzip2_filter.release(); + first_filter = bzip2_filter.release(); } } break; @@ -114,7 +121,7 @@ Filter* Filter::SingleFilter(const std::string& filter_type, scoped_ptr<SdchFilter> sdch_filter(new SdchFilter()); if (sdch_filter->InitBuffer(buffer_size)) { if (sdch_filter->InitDecoding()) { - return sdch_filter.release(); + first_filter = sdch_filter.release(); } } break; @@ -124,7 +131,14 @@ Filter* Filter::SingleFilter(const std::string& filter_type, } } - return NULL; + if (first_filter) { + first_filter->next_filter_.reset(filter_list); + } else { + // Cleanup and exit, since we can't construct this filter list. + delete filter_list; + filter_list = NULL; + } + return first_filter; } Filter::Filter() @@ -132,6 +146,8 @@ Filter::Filter() stream_buffer_size_(0), next_stream_data_(NULL), stream_data_len_(0), + url_(), + mime_type_(), next_filter_(NULL), last_status_(FILTER_NEED_MORE_DATA) { } @@ -230,3 +246,10 @@ void Filter::SetURL(const GURL& url) { if (next_filter_.get()) next_filter_->SetURL(url); } + +void Filter::SetMimeType(std::string& mime_type) { + mime_type_ = mime_type; + if (next_filter_.get()) + next_filter_->SetMimeType(mime_type); +} + diff --git a/net/base/filter.h b/net/base/filter.h index 3551056..ab61732 100644 --- a/net/base/filter.h +++ b/net/base/filter.h @@ -35,9 +35,28 @@ #include "base/basictypes.h" #include "base/scoped_ptr.h" #include "googleurl/src/gurl.h" +#include "testing/gtest/include/gtest/gtest_prod.h" class Filter { public: + // Return values of function ReadFilteredData. + enum FilterStatus { + // Read filtered data successfully + FILTER_OK, + // Read filtered data successfully, and the data in the buffer has been + // consumed by the filter, but more data is needed in order to continue + // filtering. At this point, the caller is free to reuse the filter + // buffer to provide more data. + FILTER_NEED_MORE_DATA, + // Read filtered data successfully, and filter reaches the end of the data + // stream. + FILTER_DONE, + // There is an error during filtering. + FILTER_ERROR + }; + + virtual ~Filter(); + // Creates a Filter object. // Parameters: Filter_types specifies the type of filter created; Buffer_size // specifies the size (in number of chars) of the buffer the filter should @@ -56,24 +75,6 @@ class Filter { const std::string& mime_type, int buffer_size); - virtual ~Filter(); - - // Return values of function ReadFilteredData. - enum FilterStatus { - // Read filtered data successfully - FILTER_OK, - // Read filtered data successfully, and the data in the buffer has been - // consumed by the filter, but more data is needed in order to continue - // filtering. At this point, the caller is free to reuse the filter - // buffer to provide more data. - FILTER_NEED_MORE_DATA, - // Read filtered data successfully, and filter reaches the end of the data - // stream. - FILTER_DONE, - // There is an error during filtering. - FILTER_ERROR - }; - // External call to obtain data from this filter chain. If ther is no // next_filter_, then it obtains data from this specific filter. FilterStatus ReadData(char* dest_buffer, int* dest_len); @@ -106,7 +107,23 @@ class Filter { void SetURL(const GURL& url); const GURL& url() const { return url_; } + void SetMimeType(std::string& mime_type); + const std::string& mime_type() const { return mime_type_; } + protected: + // Specifies type of filters that can be created. + enum FilterType { + FILTER_TYPE_DEFLATE, + FILTER_TYPE_GZIP, + FILTER_TYPE_BZIP2, + FILTER_TYPE_GZIP_HELPING_SDCH, + FILTER_TYPE_SDCH, // open-vcdiff compression relative to a dictionary. + FILTER_TYPE_UNSUPPORTED + }; + + Filter(); + + FRIEND_TEST(SdchFilterTest, ContentTypeId); // Filters the data stored in stream_buffer_ and writes the output into the // dest_buffer passed in. // @@ -120,31 +137,26 @@ class Filter { // but not produce output yet. virtual FilterStatus ReadFilteredData(char* dest_buffer, int* dest_len); - Filter(); - // Copy pre-filter data directly to destination buffer without decoding. FilterStatus CopyOut(char* dest_buffer, int* dest_len); - // Specifies type of filters that can be created. - enum FilterType { - FILTER_TYPE_DEFLATE, - FILTER_TYPE_GZIP, - FILTER_TYPE_BZIP2, - FILTER_TYPE_SDCH, // open-vcdiff compression relative to a dictionary. - FILTER_TYPE_UNSUPPORTED - }; - // Allocates and initializes stream_buffer_. // Buffer_size is the maximum size of stream_buffer_ in number of chars. bool InitBuffer(int buffer_size); + // Translate the text of a filter name (from Content-Encoding header) into a + // FilterType, in the context of a mime type. + static FilterType ConvertEncodingToType(const std::string& filter_type, + const std::string& mime_type); + // A factory helper for creating filters for within a chain of potentially // multiple encodings. If a chain of filters is created, then this may be // called multiple times during the filter creation process. In most simple - // cases, this is only called once. - static Filter* SingleFilter(const std::string& filter_type, - const std::string& mime_type, - int buffer_size); + // cases, this is only called once. Returns NULL and cleans up (deleting + // filter_list) if a new filter can't be constructed. + static Filter* PrependNewFilter(FilterType type_id, int buffer_size, + Filter* filter_list); + FilterStatus last_status() const { return last_status_; } // Buffer to hold the data to be filtered. @@ -164,6 +176,11 @@ class Filter { // a specific URL or path. GURL url_; + // To facilitate error recovery in SDCH filters, allow filter to know if + // content is text/html by checking within this mime type (SDCH filter may + // do a meta-refresh via html). + std::string mime_type_; + // An optional filter to process output from this filter. scoped_ptr<Filter> next_filter_; // Remember what status or local filter last returned so we can better handle diff --git a/net/base/gzip_filter.cc b/net/base/gzip_filter.cc index 097a6e4..af7609b 100644 --- a/net/base/gzip_filter.cc +++ b/net/base/gzip_filter.cc @@ -13,7 +13,8 @@ GZipFilter::GZipFilter() decoding_mode_(DECODE_MODE_UNKNOWN), gzip_header_status_(GZIP_CHECK_HEADER_IN_PROGRESS), zlib_header_added_(false), - gzip_footer_bytes_(0) { + gzip_footer_bytes_(0), + possible_sdch_pass_through_(false) { } GZipFilter::~GZipFilter() { @@ -40,6 +41,9 @@ bool GZipFilter::InitDecoding(Filter::FilterType filter_type) { decoding_mode_ = DECODE_MODE_DEFLATE; break; } + case Filter::FILTER_TYPE_GZIP_HELPING_SDCH: + possible_sdch_pass_through_ = true; // Needed to optionally help sdch. + // Fall through to GZIP case. case Filter::FILTER_TYPE_GZIP: { gzip_header_.reset(new GZipHeader()); if (!gzip_header_.get()) @@ -64,9 +68,10 @@ Filter::FilterStatus GZipFilter::ReadFilteredData(char* dest_buffer, return Filter::FILTER_ERROR; if (decoding_status_ == DECODING_DONE) { + if (GZIP_GET_INVALID_HEADER != gzip_header_status_) + SkipGZipFooter(); // Some server might send extra data after the gzip footer. We just copy // them out. Mozilla does this too. - SkipGZipFooter(); return CopyOut(dest_buffer, dest_len); } @@ -85,6 +90,12 @@ Filter::FilterStatus GZipFilter::ReadFilteredData(char* dest_buffer, // We have consumed all input data, either getting a complete header or // a partial header. Return now to get more data. *dest_len = 0; + // Partial header means it can't be an SDCH header. + // Reason: SDCH *always* starts with 8 printable characters [a-zA-Z/_]. + // Gzip always starts with two non-printable characters. Hence even a + // single character (partial header) means that this can't be an SDCH + // encoded body masquerading as a GZIP body. + possible_sdch_pass_through_ = false; return status; } case Filter::FILTER_OK: { @@ -94,6 +105,11 @@ Filter::FilterStatus GZipFilter::ReadFilteredData(char* dest_buffer, break; } case Filter::FILTER_ERROR: { + if (possible_sdch_pass_through_ && + GZIP_GET_INVALID_HEADER == gzip_header_status_) { + decoding_status_ = DECODING_DONE; // Become a pass through filter. + return CopyOut(dest_buffer, dest_len); + } decoding_status_ = DECODING_ERROR; return status; } diff --git a/net/base/gzip_filter.h b/net/base/gzip_filter.h index 5d042cf..f0fc3f2 100644 --- a/net/base/gzip_filter.h +++ b/net/base/gzip_filter.h @@ -127,6 +127,15 @@ class GZipFilter : public Filter { // DoInflate, with InsertZlibHeader being the exception as a workaround. scoped_ptr<z_stream> zlib_stream_; + // For robustness, when we see the solo sdch filter, we chain in a gzip filter + // in front of it, with this flag to indicate that the gzip decoding might not + // be needed. This handles a strange case where "Content-Encoding: sdch,gzip" + // is reduced by an errant proxy to "Content-Encoding: sdch", while the + // content is indeed really gzipped result of sdch :-/. + // If this flag is set, then we will revert to being a pass through filter if + // we don't get a valid gzip header. + bool possible_sdch_pass_through_; + DISALLOW_EVIL_CONSTRUCTORS(GZipFilter); }; diff --git a/net/base/load_flags.h b/net/base/load_flags.h index 7ca5ffb..bf5e02f 100644 --- a/net/base/load_flags.h +++ b/net/base/load_flags.h @@ -60,6 +60,10 @@ enum { // This load will not make any changes to cookies, including storing new // cookies or updating existing ones. LOAD_DO_NOT_SAVE_COOKIES = 1 << 13, + + // An SDCH dictionary was advertised, and an SDCH encoded response is + // possible. + LOAD_SDCH_DICTIONARY_ADVERTISED = 1 << 14, }; } // namespace net diff --git a/net/base/sdch_filter.cc b/net/base/sdch_filter.cc index 158c4bb..9be9530 100644 --- a/net/base/sdch_filter.cc +++ b/net/base/sdch_filter.cc @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include <ctype.h> #include <algorithm> #include "base/file_util.h" @@ -15,6 +16,8 @@ SdchFilter::SdchFilter() : decoding_status_(DECODING_UNINITIALIZED), vcdiff_streaming_decoder_(NULL), + dictionary_hash_(), + dictionary_hash_is_plausible_(false), dictionary_(NULL), dest_buffer_excess_(), dest_buffer_excess_index_(0), @@ -23,20 +26,21 @@ SdchFilter::SdchFilter() } SdchFilter::~SdchFilter() { + static int filter_use_count = 0; + ++filter_use_count; + if (META_REFRESH_RECOVERY == decoding_status_) { + HISTOGRAM_COUNTS(L"Sdch.FilterUseBeforeDisabling", filter_use_count); + } + if (vcdiff_streaming_decoder_.get()) { if (!vcdiff_streaming_decoder_->FinishDecoding()) decoding_status_ = DECODING_ERROR; } - // TODO(jar): Use DHISTOGRAM when we turn sdch on by default. - if (decoding_status_ == DECODING_ERROR) { - HISTOGRAM_COUNTS(L"Sdch.Decoding Error bytes read", source_bytes_); - HISTOGRAM_COUNTS(L"Sdch.Decoding Error bytes output", output_bytes_); - } else { - if (decoding_status_ == DECODING_IN_PROGRESS) { - HISTOGRAM_COUNTS(L"Sdch.Bytes read", source_bytes_); - HISTOGRAM_COUNTS(L"Sdch.Bytes output", output_bytes_); - } - } + + // TODO(jar): Use UMA_HISTOGRAM when we turn sdch on by default. + HISTOGRAM_COUNTS(L"Sdch.Bytes read", source_bytes_); + HISTOGRAM_COUNTS(L"Sdch.Bytes output", output_bytes_); + if (dictionary_) dictionary_->Release(); } @@ -50,6 +54,16 @@ bool SdchFilter::InitDecoding() { return true; } +static const char* kDecompressionErrorHtml = + "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>" + "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;" + "border-color:black;border-style:solid;text-align:left;font-family:arial;" + "font-size:10pt;foreground-color:black;background-color:white\">" + "An error occurred. This page will be reloaded shortly. " + "Or press the \"reload\" button now to reload it immediately." + "</div>"; + + Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, int* dest_len) { int available_space = *dest_len; @@ -58,20 +72,35 @@ Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, if (!dest_buffer || available_space <= 0) return FILTER_ERROR; + if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) { FilterStatus status = InitializeDictionary(); - if (DECODING_IN_PROGRESS != decoding_status_) { - DCHECK(status == FILTER_ERROR || status == FILTER_NEED_MORE_DATA); - return status; + if (FILTER_NEED_MORE_DATA == status) + return FILTER_NEED_MORE_DATA; + if (FILTER_ERROR == status) { + DCHECK(DECODING_ERROR == decoding_status_); + DCHECK(0 == dest_buffer_excess_index_); + DCHECK(dest_buffer_excess_.empty()); + if (!dictionary_hash_is_plausible_) { + SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH); + decoding_status_ = PASS_THROUGH; + dest_buffer_excess_ = dictionary_hash_; // Send what we scanned. + } else { + SdchManager::BlacklistDomain(url()); + if (std::string::npos == mime_type().find_first_of("text/html")) { + SdchManager::SdchErrorRecovery(SdchManager::UNRECOVERABLE_ERROR); + return FILTER_ERROR; + } + SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY); + decoding_status_ = META_REFRESH_RECOVERY; + // Issue a meta redirect with SDCH disabled. + dest_buffer_excess_ = kDecompressionErrorHtml; + } + } else { + DCHECK(DECODING_IN_PROGRESS == decoding_status_); } } - if (decoding_status_ != DECODING_IN_PROGRESS) { - decoding_status_ = DECODING_ERROR; - return FILTER_ERROR; - } - - int amount = OutputBufferExcess(dest_buffer, available_space); *dest_len += amount; dest_buffer += amount; @@ -81,6 +110,23 @@ Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, if (available_space <= 0) return FILTER_OK; DCHECK(dest_buffer_excess_.empty()); + DCHECK(0 == dest_buffer_excess_index_); + + if (decoding_status_ != DECODING_IN_PROGRESS) { + if (META_REFRESH_RECOVERY == decoding_status_) { + // Absorb all input data. We've already output page reload HTML. + next_stream_data_ = NULL; + stream_data_len_ = 0; + return FILTER_NEED_MORE_DATA; + } + if (PASS_THROUGH == decoding_status_) { + return CopyOut(dest_buffer, dest_len); + } + DCHECK(false); + decoding_status_ = DECODING_ERROR; + return FILTER_ERROR; + } + if (!next_stream_data_ || stream_data_len_ <= 0) return FILTER_NEED_MORE_DATA; @@ -95,6 +141,7 @@ Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, if (!ret) { vcdiff_streaming_decoder_.reset(NULL); // Don't call it again. decoding_status_ = DECODING_ERROR; + SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR); return FILTER_ERROR; } @@ -127,17 +174,28 @@ Filter::FilterStatus SdchFilter::InitializeDictionary() { else next_stream_data_ = NULL; - if ('\0' != dictionary_hash_[kServerIdLength - 1] || - (kServerIdLength - 1) != strlen(dictionary_hash_.data())) { - decoding_status_ = DECODING_ERROR; - return FILTER_ERROR; // No dictionary hash. - } - dictionary_hash_.erase(kServerIdLength - 1); - DCHECK(!dictionary_); - SdchManager::Global()->GetVcdiffDictionary(dictionary_hash_, url(), - &dictionary_); + dictionary_hash_is_plausible_ = true; // Assume plausible, but check. + if ('\0' == dictionary_hash_[kServerIdLength - 1]) + SdchManager::Global()->GetVcdiffDictionary(std::string(dictionary_hash_, 0, + kServerIdLength - 1), + url(), &dictionary_); + else + dictionary_hash_is_plausible_ = false; + if (!dictionary_) { + DCHECK(dictionary_hash_.size() == kServerIdLength); + for (size_t i = 0; i < kServerIdLength - 1; ++i) { + char base64_char = dictionary_hash_[i]; + if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) { + dictionary_hash_is_plausible_ = false; + break; + } + } + if (dictionary_hash_is_plausible_) + SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND); + else + SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED); decoding_status_ = DECODING_ERROR; return FILTER_ERROR; } diff --git a/net/base/sdch_filter.h b/net/base/sdch_filter.h index a13bd38..6bd0ed9 100644 --- a/net/base/sdch_filter.h +++ b/net/base/sdch_filter.h @@ -49,7 +49,9 @@ class SdchFilter : public Filter { DECODING_UNINITIALIZED, WAITING_FOR_DICTIONARY_SELECTION, DECODING_IN_PROGRESS, - DECODING_ERROR + DECODING_ERROR, + META_REFRESH_RECOVERY, // Decoding error being handled by a meta-refresh. + PASS_THROUGH, // Non-sdch content being passed without alteration. }; // Identify the suggested dictionary, and initialize underlying decompressor. @@ -70,9 +72,17 @@ class SdchFilter : public Filter { scoped_ptr<open_vcdiff::VCDiffStreamingDecoder> vcdiff_streaming_decoder_; // In case we need to assemble the hash piecemeal, we have a place to store - // a part of the hash until we "get all 8 bytes." + // a part of the hash until we "get all 8 bytes plus a null." std::string dictionary_hash_; + // After assembling an entire dictionary hash (the first 9 bytes of the + // sdch payload, we check to see if it is plausible, meaning it has a null + // termination, and has 8 characters that are possible in a net-safe base64 + // encoding. If the hash is not plausible, then the payload is probably not + // an SDCH encoded bundle, and various error recovery strategies can be + // attempted. + bool dictionary_hash_is_plausible_; + // We hold an in-memory copy of the dictionary during the entire decoding. // The char* data is embedded in a RefCounted dictionary_. SdchManager::Dictionary* dictionary_; diff --git a/net/base/sdch_filter_unitest.cc b/net/base/sdch_filter_unitest.cc index d7a5b88..0b0ba8c 100644 --- a/net/base/sdch_filter_unitest.cc +++ b/net/base/sdch_filter_unitest.cc @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +// TODO(jar): Change file name to *_unittest.cc (with double "t'). + #include <algorithm> #include <string> #include <vector> @@ -14,37 +16,55 @@ #include "testing/gtest/include/gtest/gtest.h" #include "third_party/zlib/zlib.h" +//------------------------------------------------------------------------------ // Provide sample data and compression results with a sample VCDIFF dictionary. -// Note an SDCH dictionary has extra meta-data before the VCDIFF text. -const char kTtestVcdiffDictionary[] = "DictionaryFor" +// Note an SDCH dictionary has extra meta-data before the VCDIFF dictionary. +static const char kTestVcdiffDictionary[] = "DictionaryFor" "SdchCompression1SdchCompression2SdchCompression3SdchCompression\n"; // Pre-compression test data. -const char kTestData[] = "TestData " +static const char kTestData[] = "TestData " "SdchCompression1SdchCompression2SdchCompression3SdchCompression\n"; // Note SDCH compressed data will include a reference to the SDCH dictionary. -const char kCompressedTestData[] = +static const char kCompressedTestData[] = "\326\303\304\0\0\001M\0\022I\0\t\003\001TestData \n\023\100\r"; -namespace { +//------------------------------------------------------------------------------ class SdchFilterTest : public testing::Test { protected: SdchFilterTest() - : test_vcdiff_dictionary_(kTtestVcdiffDictionary, - sizeof(kTtestVcdiffDictionary) - 1), - compressed_test_data_(kCompressedTestData, - sizeof(kCompressedTestData) - 1), + : test_vcdiff_dictionary_(kTestVcdiffDictionary, + sizeof(kTestVcdiffDictionary) - 1), + vcdiff_compressed_data_(kCompressedTestData, + sizeof(kCompressedTestData) - 1), expanded_(kTestData, sizeof(kTestData) - 1), sdch_manager_(new SdchManager) { + sdch_manager_->EnableSdchSupport(""); } + std::string NewSdchCompressedData(const std::string dictionary); + const std::string test_vcdiff_dictionary_; - const std::string compressed_test_data_; + const std::string vcdiff_compressed_data_; const std::string expanded_; // Desired final, decompressed data. scoped_ptr<SdchManager> sdch_manager_; // A singleton database. }; +std::string SdchFilterTest::NewSdchCompressedData(const std::string dictionary) { + std::string client_hash; + std::string server_hash; + SdchManager::GenerateHash(dictionary, &client_hash, &server_hash); + + // Build compressed data that refers to our dictionary. + std::string compressed(server_hash); + compressed.append("\0", 1); + compressed.append(vcdiff_compressed_data_); + return compressed; +} + +//------------------------------------------------------------------------------ + TEST_F(SdchFilterTest, Hashing) { std::string client_hash, server_hash; @@ -95,18 +115,29 @@ static bool FilterTestData(const std::string& source, } while (1); } //------------------------------------------------------------------------------ +static std::string NewSdchDictionary(const std::string& domain) { + std::string dictionary; + if (!domain.empty()) { + dictionary.append("Domain: "); + dictionary.append(domain); + dictionary.append("\n"); + } + dictionary.append("\n"); + dictionary.append(kTestVcdiffDictionary, sizeof(kTestVcdiffDictionary) - 1); + return dictionary; +} +//------------------------------------------------------------------------------ -TEST_F(SdchFilterTest, BasicBadDicitonary) { - SdchManager::enable_sdch_support(""); - +TEST_F(SdchFilterTest, BasicBadDictionary) { std::vector<std::string> filters; filters.push_back("sdch"); - int kInputBufferSize(30); + const int kInputBufferSize(30); char output_buffer[20]; scoped_ptr<Filter> filter(Filter::Factory(filters, "missing-mime", kInputBufferSize)); - filter->SetURL(GURL("http://ignore.com")); + std::string url_string("http://ignore.com"); + filter->SetURL(GURL(url_string)); // With no input data, try to read output. @@ -154,42 +185,51 @@ TEST_F(SdchFilterTest, BasicBadDicitonary) { EXPECT_EQ(0, output_bytes_or_buffer_size); EXPECT_EQ(Filter::FILTER_ERROR, status); + + EXPECT_FALSE(SdchManager::Global()->IsInSupportedDomain(GURL(url_string))); + SdchManager::ClearBlacklistings(); + EXPECT_TRUE(SdchManager::Global()->IsInSupportedDomain(GURL(url_string))); } +TEST_F(SdchFilterTest, DictionaryAddOnce) { + // Construct a valid SDCH dictionary from a VCDIFF dictionary. + const std::string kSampleDomain = "sdchtest.com"; + std::string dictionary(NewSdchDictionary(kSampleDomain)); -TEST_F(SdchFilterTest, BasicDictionary) { - SdchManager::enable_sdch_support(""); + std::string url_string = "http://" + kSampleDomain; + GURL url(url_string); + EXPECT_TRUE(sdch_manager_->AddSdchDictionary(dictionary, url)); - const std::string kSampleDomain = "sdchtest.com"; + // Check we can't add it twice. + EXPECT_FALSE(sdch_manager_->AddSdchDictionary(dictionary, url)); + + const std::string kSampleDomain2 = "sdchtest2.com"; + + // Construct a second SDCH dictionary from a VCDIFF dictionary. + std::string dictionary2(NewSdchDictionary(kSampleDomain2)); + std::string url_string2 = "http://" + kSampleDomain2; + GURL url2(url_string2); + EXPECT_TRUE(sdch_manager_->AddSdchDictionary(dictionary2, url2)); +} + +TEST_F(SdchFilterTest, BasicDictionary) { // Construct a valid SDCH dictionary from a VCDIFF dictionary. - std::string dictionary("Domain: "); - dictionary.append(kSampleDomain); - dictionary.append("\n\n"); - dictionary.append(test_vcdiff_dictionary_); - std::string client_hash; - std::string server_hash; - SdchManager::GenerateHash(dictionary, &client_hash, &server_hash); + const std::string kSampleDomain = "sdchtest.com"; + std::string dictionary(NewSdchDictionary(kSampleDomain)); + std::string url_string = "http://" + kSampleDomain; GURL url(url_string); - bool status = sdch_manager_->AddSdchDictionary(dictionary, url); - EXPECT_TRUE(status); + EXPECT_TRUE(sdch_manager_->AddSdchDictionary(dictionary, url)); - // Check we can't add it twice. - status = sdch_manager_->AddSdchDictionary(dictionary, url); - EXPECT_FALSE(status); // Already loaded. - - // Build compressed data that refers to our dictionary. - std::string compressed(server_hash); - compressed.append("\0", 1); - compressed.append(compressed_test_data_); + std::string compressed(NewSdchCompressedData(dictionary)); std::vector<std::string> filters; filters.push_back("sdch"); - // First try with a large buffer (larger than test input, or compressed data). - int kInputBufferSize(100); + // Decode with a large buffer (larger than test input, or compressed data). + const int kInputBufferSize(100); scoped_ptr<Filter> filter(Filter::Factory(filters, "missing-mime", kInputBufferSize)); filter->SetURL(url); @@ -197,66 +237,92 @@ TEST_F(SdchFilterTest, BasicDictionary) { size_t feed_block_size = 100; size_t output_block_size = 100; std::string output; - status = FilterTestData(compressed, feed_block_size, output_block_size, - filter.get(), &output); - EXPECT_TRUE(status); - EXPECT_TRUE(output == expanded_); + EXPECT_TRUE(FilterTestData(compressed, feed_block_size, output_block_size, + filter.get(), &output)); + EXPECT_EQ(output, expanded_); - // Now try with really small buffers (size 1) to check for edge effects. + // Decode with really small buffers (size 1) to check for edge effects. filter.reset((Filter::Factory(filters, "missing-mime", kInputBufferSize))); filter->SetURL(url); feed_block_size = 1; output_block_size = 1; output.clear(); - status = FilterTestData(compressed, feed_block_size, output_block_size, - filter.get(), &output); - EXPECT_TRUE(status); - EXPECT_TRUE(output == expanded_); + EXPECT_TRUE(FilterTestData(compressed, feed_block_size, output_block_size, + filter.get(), &output)); + EXPECT_EQ(output, expanded_); +} + +TEST_F(SdchFilterTest, CrossDomainDictionaryUse) { + // Construct a valid SDCH dictionary from a VCDIFF dictionary. + const std::string kSampleDomain = "sdchtest.com"; + std::string dictionary(NewSdchDictionary(kSampleDomain)); + + std::string url_string = "http://" + kSampleDomain; + + GURL url(url_string); + EXPECT_TRUE(sdch_manager_->AddSdchDictionary(dictionary, url)); + + std::string compressed(NewSdchCompressedData(dictionary)); - // Now try with content arriving from the "wrong" domain. + std::vector<std::string> filters; + filters.push_back("sdch"); + const int kInputBufferSize(100); + + // Decode with content arriving from the "wrong" domain. // This tests CanSet() in the sdch_manager_-> - filter.reset((Filter::Factory(filters, "missing-mime", kInputBufferSize))); - filter->SetURL(GURL("http://www.wrongdomain.com")); + scoped_ptr<Filter> filter((Filter::Factory(filters, "missing-mime", + kInputBufferSize))); + GURL wrong_domain_url("http://www.wrongdomain.com"); + filter->SetURL(wrong_domain_url); - feed_block_size = 100; - output_block_size = 100; - output.clear(); - status = FilterTestData(compressed, feed_block_size, output_block_size, - filter.get(), &output); - EXPECT_FALSE(status); // Couldn't decode. + size_t feed_block_size = 100; + size_t output_block_size = 100; + std::string output; + EXPECT_FALSE(FilterTestData(compressed, feed_block_size, output_block_size, + filter.get(), &output)); EXPECT_EQ(output.size(), 0u); // No output written. + EXPECT_TRUE(SdchManager::Global()->IsInSupportedDomain(GURL(url_string))); + EXPECT_FALSE(SdchManager::Global()->IsInSupportedDomain(wrong_domain_url)); + SdchManager::ClearBlacklistings(); + EXPECT_TRUE(SdchManager::Global()->IsInSupportedDomain(wrong_domain_url)); +} + +TEST_F(SdchFilterTest, DictionaryPathValidation) { + // Construct a valid SDCH dictionary from a VCDIFF dictionary. + const std::string kSampleDomain = "sdchtest.com"; + std::string dictionary(NewSdchDictionary(kSampleDomain)); + + std::string url_string = "http://" + kSampleDomain; - // Now check that path restrictions on dictionary are being enforced. + GURL url(url_string); + EXPECT_TRUE(sdch_manager_->AddSdchDictionary(dictionary, url)); - // Create a dictionary with a path restriction, by prefixing old dictionary. + // Create a dictionary with a path restriction, by prefixing dictionary. const std::string path("/special_path/bin"); std::string dictionary_with_path("Path: " + path + "\n"); dictionary_with_path.append(dictionary); - std::string pathed_client_hash; - std::string pathed_server_hash; - SdchManager::GenerateHash(dictionary_with_path, - &pathed_client_hash, &pathed_server_hash); - status = sdch_manager_->AddSdchDictionary(dictionary_with_path, url); - EXPECT_TRUE(status); - - // Build compressed data that refers to our dictionary - std::string compressed_for_path(pathed_server_hash); - compressed_for_path.append("\0", 1); - compressed_for_path.append(compressed_test_data_); + EXPECT_TRUE(sdch_manager_->AddSdchDictionary(dictionary_with_path, url)); + + std::string compressed_for_path(NewSdchCompressedData(dictionary_with_path)); + + std::vector<std::string> filters; + filters.push_back("sdch"); + const int kInputBufferSize(100); // Test decode the path data, arriving from a valid path. - filter.reset((Filter::Factory(filters, "missing-mime", kInputBufferSize))); + scoped_ptr<Filter> filter((Filter::Factory(filters, "missing-mime", + kInputBufferSize))); filter->SetURL(GURL(url_string + path)); - feed_block_size = 100; - output_block_size = 100; - output.clear(); - status = FilterTestData(compressed_for_path, feed_block_size, - output_block_size, filter.get(), &output); - EXPECT_TRUE(status); - EXPECT_TRUE(output == expanded_); + size_t feed_block_size = 100; + size_t output_block_size = 100; + std::string output; + + EXPECT_TRUE(FilterTestData(compressed_for_path, feed_block_size, + output_block_size, filter.get(), &output)); + EXPECT_EQ(output, expanded_); // Test decode the path data, arriving from a invalid path. filter.reset((Filter::Factory(filters, "missing-mime", kInputBufferSize))); @@ -265,41 +331,51 @@ TEST_F(SdchFilterTest, BasicDictionary) { feed_block_size = 100; output_block_size = 100; output.clear(); - status = FilterTestData(compressed_for_path, feed_block_size, - output_block_size, filter.get(), &output); - EXPECT_FALSE(status); // Couldn't decode. + EXPECT_FALSE(FilterTestData(compressed_for_path, feed_block_size, + output_block_size, filter.get(), &output)); EXPECT_EQ(output.size(), 0u); // No output written. + EXPECT_FALSE(SdchManager::Global()->IsInSupportedDomain(GURL(url_string))); + SdchManager::ClearBlacklistings(); + EXPECT_TRUE(SdchManager::Global()->IsInSupportedDomain(GURL(url_string))); +} + +TEST_F(SdchFilterTest, DictionaryPortValidation) { + // Construct a valid SDCH dictionary from a VCDIFF dictionary. + const std::string kSampleDomain = "sdchtest.com"; + std::string dictionary(NewSdchDictionary(kSampleDomain)); + + std::string url_string = "http://" + kSampleDomain; + + GURL url(url_string); + EXPECT_TRUE(sdch_manager_->AddSdchDictionary(dictionary, url)); + // Create a dictionary with a port restriction, by prefixing old dictionary. const std::string port("502"); std::string dictionary_with_port("Port: " + port + "\n"); dictionary_with_port.append("Port: 80\n"); // Add default port. dictionary_with_port.append(dictionary); - std::string ported_client_hash; - std::string ported_server_hash; - SdchManager::GenerateHash(dictionary_with_port, - &ported_client_hash, &ported_server_hash); - status = sdch_manager_->AddSdchDictionary(dictionary_with_port, - GURL(url_string + ":" + port)); - EXPECT_TRUE(status); - - // Build compressed data that refers to our dictionary - std::string compressed_for_port(ported_server_hash); - compressed_for_port.append("\0", 1); - compressed_for_port.append(compressed_test_data_); + EXPECT_TRUE(sdch_manager_->AddSdchDictionary(dictionary_with_port, + GURL(url_string + ":" + port))); + + std::string compressed_for_port(NewSdchCompressedData(dictionary_with_port)); + + std::vector<std::string> filters; + filters.push_back("sdch"); + const int kInputBufferSize(100); // Test decode the port data, arriving from a valid port. - filter.reset((Filter::Factory(filters, "missing-mime", kInputBufferSize))); + scoped_ptr<Filter> filter((Filter::Factory(filters, "missing-mime", + kInputBufferSize))); filter->SetURL(GURL(url_string + ":" + port)); - feed_block_size = 100; - output_block_size = 100; - output.clear(); - status = FilterTestData(compressed_for_port, feed_block_size, - output_block_size, filter.get(), &output); - EXPECT_TRUE(status); - EXPECT_TRUE(output == expanded_); + size_t feed_block_size = 100; + size_t output_block_size = 100; + std::string output; + EXPECT_TRUE(FilterTestData(compressed_for_port, feed_block_size, + output_block_size, filter.get(), &output)); + EXPECT_EQ(output, expanded_); // Test decode the port data, arriving from a valid (default) port. filter.reset((Filter::Factory(filters, "missing-mime", kInputBufferSize))); @@ -308,10 +384,9 @@ TEST_F(SdchFilterTest, BasicDictionary) { feed_block_size = 100; output_block_size = 100; output.clear(); - status = FilterTestData(compressed_for_port, feed_block_size, - output_block_size, filter.get(), &output); - EXPECT_TRUE(status); - EXPECT_TRUE(output == expanded_); + EXPECT_TRUE(FilterTestData(compressed_for_port, feed_block_size, + output_block_size, filter.get(), &output)); + EXPECT_EQ(output, expanded_); // Test decode the port data, arriving from a invalid port. filter.reset((Filter::Factory(filters, "missing-mime", kInputBufferSize))); @@ -320,46 +395,19 @@ TEST_F(SdchFilterTest, BasicDictionary) { feed_block_size = 100; output_block_size = 100; output.clear(); - status = FilterTestData(compressed_for_port, feed_block_size, - output_block_size, filter.get(), &output); - EXPECT_FALSE(status); // Couldn't decode. + EXPECT_FALSE(FilterTestData(compressed_for_port, feed_block_size, + output_block_size, filter.get(), &output)); EXPECT_EQ(output.size(), 0u); // No output written. -} - - -// Test that filters can be cascaded (chained) so that the output of one filter -// is processed by the next one. This is most critical for SDCH, which is -// routinely followed by gzip (during encoding). The filter we'll test for will -// do the gzip decoding first, and then decode the SDCH content. -TEST_F(SdchFilterTest, FilterChaining) { - SdchManager::enable_sdch_support(""); - - const std::string kSampleDomain = "sdchtest.com"; - - // Construct a valid SDCH dictionary from a VCDIFF dictionary. - std::string dictionary("Domain: "); - dictionary.append(kSampleDomain); - dictionary.append("\n\n"); - dictionary.append(test_vcdiff_dictionary_); - std::string client_hash; - std::string server_hash; - SdchManager::GenerateHash(dictionary, &client_hash, &server_hash); - std::string url_string = "http://" + kSampleDomain; - GURL url(url_string); - bool status = sdch_manager_->AddSdchDictionary(dictionary, url); - EXPECT_TRUE(status); - - // Check we can't add it twice. - status = sdch_manager_->AddSdchDictionary(dictionary, url); - EXPECT_FALSE(status); // Already loaded. + EXPECT_FALSE(SdchManager::Global()->IsInSupportedDomain(GURL(url_string))); + SdchManager::ClearBlacklistings(); + EXPECT_TRUE(SdchManager::Global()->IsInSupportedDomain(GURL(url_string))); +} - // Build compressed sdch encoded data that refers to our dictionary. - std::string sdch_compressed(server_hash); - sdch_compressed.append("\0", 1); - sdch_compressed.append(compressed_test_data_); +//------------------------------------------------------------------------------ +// Helper function to perform gzip compression of data. - // Use Gzip to compress the sdch sdch_compressed data. +static std::string gzip_compress(const std::string &input) { z_stream zlib_stream; memset(&zlib_stream, 0, sizeof(zlib_stream)); int code; @@ -373,8 +421,8 @@ TEST_F(SdchFilterTest, FilterChaining) { CHECK(code == Z_OK); // Fill in zlib control block - zlib_stream.next_in = bit_cast<Bytef*>(sdch_compressed.data()); - zlib_stream.avail_in = sdch_compressed.size(); + zlib_stream.next_in = bit_cast<Bytef*>(input.data()); + zlib_stream.avail_in = input.size(); // Assume we can compress into similar buffer (add 100 bytes to be sure). size_t gzip_compressed_length = zlib_stream.avail_in + 100; @@ -406,6 +454,30 @@ TEST_F(SdchFilterTest, FilterChaining) { code = MOZ_Z_deflate(&zlib_stream, Z_FINISH); gzip_compressed_length -= zlib_stream.avail_out; std::string compressed(gzip_compressed.get(), gzip_compressed_length); + MOZ_Z_deflateEnd(&zlib_stream); + return compressed; +} + +//------------------------------------------------------------------------------ + +// Test that filters can be cascaded (chained) so that the output of one filter +// is processed by the next one. This is most critical for SDCH, which is +// routinely followed by gzip (during encoding). The filter we'll test for will +// do the gzip decoding first, and then decode the SDCH content. +TEST_F(SdchFilterTest, FilterChaining) { + // Construct a valid SDCH dictionary from a VCDIFF dictionary. + const std::string kSampleDomain = "sdchtest.com"; + std::string dictionary(NewSdchDictionary(kSampleDomain)); + + std::string url_string = "http://" + kSampleDomain; + + GURL url(url_string); + EXPECT_TRUE(sdch_manager_->AddSdchDictionary(dictionary, url)); + + std::string sdch_compressed(NewSdchCompressedData(dictionary)); + + // Use Gzip to compress the sdch sdch_compressed data. + std::string gzip_compressed_sdch = gzip_compress(sdch_compressed); // Construct a chained filter. std::vector<std::string> filters; @@ -413,7 +485,7 @@ TEST_F(SdchFilterTest, FilterChaining) { filters.push_back("gzip"); // First try with a large buffer (larger than test input, or compressed data). - int kInputBufferSize(100); + const int kInputBufferSize(100); scoped_ptr<Filter> filter(Filter::Factory(filters, "missing-mime", kInputBufferSize)); filter->SetURL(url); @@ -427,10 +499,9 @@ TEST_F(SdchFilterTest, FilterChaining) { size_t feed_block_size = 100; size_t output_block_size = 100; std::string output; - status = FilterTestData(compressed, feed_block_size, output_block_size, - filter.get(), &output); - EXPECT_TRUE(status); - EXPECT_TRUE(output == expanded_); + EXPECT_TRUE(FilterTestData(gzip_compressed_sdch, feed_block_size, + output_block_size, filter.get(), &output)); + EXPECT_EQ(output, expanded_); // Next try with a tiny buffer to cover edge effects. filter.reset(Filter::Factory(filters, "missing-mime", kInputBufferSize)); @@ -439,12 +510,135 @@ TEST_F(SdchFilterTest, FilterChaining) { feed_block_size = 1; output_block_size = 1; output.clear(); - status = FilterTestData(compressed, feed_block_size, output_block_size, - filter.get(), &output); - EXPECT_TRUE(status); - EXPECT_TRUE(output == expanded_); + EXPECT_TRUE(FilterTestData(gzip_compressed_sdch, feed_block_size, + output_block_size, filter.get(), &output)); + EXPECT_EQ(output, expanded_); +} - MOZ_Z_deflateEnd(&zlib_stream); +TEST_F(SdchFilterTest, DefaultGzipIfSdch) { + // Construct a valid SDCH dictionary from a VCDIFF dictionary. + const std::string kSampleDomain = "sdchtest.com"; + std::string dictionary(NewSdchDictionary(kSampleDomain)); + + std::string url_string = "http://" + kSampleDomain; + + GURL url(url_string); + EXPECT_TRUE(sdch_manager_->AddSdchDictionary(dictionary, url)); + + std::string sdch_compressed(NewSdchCompressedData(dictionary)); + + // Use Gzip to compress the sdch sdch_compressed data. + std::string gzip_compressed_sdch = gzip_compress(sdch_compressed); + + // Only claim to have sdch content, but really usethe gzipped sdch content. + // System should automatically add the missing (optional) gzip. + std::vector<std::string> filters; + filters.push_back("sdch"); + + // First try with a large buffer (larger than test input, or compressed data). + const int kInputBufferSize(100); + scoped_ptr<Filter> filter(Filter::Factory(filters, "missing-mime", + kInputBufferSize)); + filter->SetURL(url); + + // Verify that chained filter is waiting for data. + char tiny_output_buffer[10]; + int tiny_output_size = sizeof(tiny_output_buffer); + EXPECT_EQ(Filter::FILTER_NEED_MORE_DATA, + filter->ReadData(tiny_output_buffer, &tiny_output_size)); + + size_t feed_block_size = 100; + size_t output_block_size = 100; + std::string output; + EXPECT_TRUE(FilterTestData(gzip_compressed_sdch, feed_block_size, + output_block_size, filter.get(), &output)); + EXPECT_EQ(output, expanded_); + + // Next try with a tiny buffer to cover edge effects. + filter.reset(Filter::Factory(filters, "missing-mime", kInputBufferSize)); + filter->SetURL(url); + + feed_block_size = 1; + output_block_size = 1; + output.clear(); + EXPECT_TRUE(FilterTestData(gzip_compressed_sdch, feed_block_size, + output_block_size, filter.get(), &output)); + EXPECT_EQ(output, expanded_); +} + +TEST_F(SdchFilterTest, DomainSupported) { + GURL test_url("http://www.test.com"); + GURL google_url("http://www.google.com"); + + EXPECT_TRUE(SdchManager::sdch_enabled()); + EXPECT_TRUE(SdchManager::Global()->IsInSupportedDomain(test_url)); + sdch_manager_->EnableSdchSupport(".google.com"); + EXPECT_FALSE(SdchManager::Global()->IsInSupportedDomain(test_url)); + EXPECT_TRUE(SdchManager::Global()->IsInSupportedDomain(google_url)); } -}; // namespace anonymous +TEST_F(SdchFilterTest, DomainBlacklisting) { + GURL test_url("http://www.test.com"); + GURL google_url("http://www.google.com"); + + SdchManager::BlacklistDomain(test_url); + EXPECT_FALSE(SdchManager::Global()->IsInSupportedDomain(test_url)); + EXPECT_TRUE(SdchManager::Global()->IsInSupportedDomain(google_url)); + + SdchManager::BlacklistDomain(google_url); + EXPECT_FALSE(SdchManager::Global()->IsInSupportedDomain(test_url)); + EXPECT_FALSE(SdchManager::Global()->IsInSupportedDomain(google_url)); +} + + +// TODO(jar): move this sort of test into filter_unittest.cc, or +// url_request_http_job_unittest.cc if that is more applicable after refactoring +// to use array of enums rather than array of strings to express content +// encodings. +TEST_F(SdchFilterTest, ContentTypeId) { + // Check for basic translation of Content-Encoding, including case variations. + EXPECT_EQ(Filter::FILTER_TYPE_DEFLATE, + Filter::ConvertEncodingToType("deflate", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_DEFLATE, + Filter::ConvertEncodingToType("deflAte", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_GZIP, + Filter::ConvertEncodingToType("gzip", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_GZIP, + Filter::ConvertEncodingToType("GzIp", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_GZIP, + Filter::ConvertEncodingToType("x-gzip", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_GZIP, + Filter::ConvertEncodingToType("X-GzIp", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_BZIP2, + Filter::ConvertEncodingToType("bzip2", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_BZIP2, + Filter::ConvertEncodingToType("BZiP2", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_BZIP2, + Filter::ConvertEncodingToType("x-bzip2", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_BZIP2, + Filter::ConvertEncodingToType("X-BZiP2", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_SDCH, + Filter::ConvertEncodingToType("sdch", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_SDCH, + Filter::ConvertEncodingToType("sDcH", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_UNSUPPORTED, + Filter::ConvertEncodingToType("weird", "nothing")); + EXPECT_EQ(Filter::FILTER_TYPE_UNSUPPORTED, + Filter::ConvertEncodingToType("strange", "nothing")); + + // Check to see that apache bug (marking things as gzipped because of their + // on-disk file type) is ignored. + EXPECT_EQ(Filter::FILTER_TYPE_UNSUPPORTED, + Filter::ConvertEncodingToType("gzip", "application/x-gzip")); + EXPECT_EQ(Filter::FILTER_TYPE_UNSUPPORTED, + Filter::ConvertEncodingToType("gzip", "application/gzip")); + EXPECT_EQ(Filter::FILTER_TYPE_UNSUPPORTED, + Filter::ConvertEncodingToType("gzip", "application/x-gunzip")); + + EXPECT_EQ(Filter::FILTER_TYPE_UNSUPPORTED, + Filter::ConvertEncodingToType("x-gzip", "application/x-gzip")); + EXPECT_EQ(Filter::FILTER_TYPE_UNSUPPORTED, + Filter::ConvertEncodingToType("x-gzip", "application/gzip")); + EXPECT_EQ(Filter::FILTER_TYPE_UNSUPPORTED, + Filter::ConvertEncodingToType("x-gzip", "application/x-gunzip")); +} diff --git a/net/base/sdch_manager.cc b/net/base/sdch_manager.cc index de1e0b0..5b86a650 100644 --- a/net/base/sdch_manager.cc +++ b/net/base/sdch_manager.cc @@ -21,6 +21,20 @@ SdchManager* SdchManager::Global() { return global_; } +// static +void SdchManager::SdchErrorRecovery(ProblemCodes problem) { + static LinearHistogram histogram(L"Sdch.ProblemCodes", MIN_PROBLEM_CODE, + MAX_PROBLEM_CODE - 1, MAX_PROBLEM_CODE); + // TODO(jar): Set UMA flag for uploading. + histogram.Add(problem); +} + +// static +void SdchManager::ClearBlacklistings() { + Global()->blacklisted_domains_.clear(); +} + + //------------------------------------------------------------------------------ SdchManager::SdchManager() : sdch_enabled_(false) { DCHECK(!global_); @@ -37,10 +51,33 @@ SdchManager::~SdchManager() { global_ = NULL; } +// static +bool SdchManager::BlacklistDomain(const GURL& url) { + if (!global_ ) + return false; + std::string domain(url.host()); + global_->blacklisted_domains_.insert(url.host()); + return true; +} + +void SdchManager::EnableSdchSupport(const std::string& domain) { + // We presume that there is a SDCH manager instance. + global_->supported_domain_ = domain; + global_->sdch_enabled_ = true; +} + const bool SdchManager::IsInSupportedDomain(const GURL& url) const { - return sdch_enabled_ && - (supported_domain_.empty() || - url.DomainIs(supported_domain_.data(), supported_domain_.size())); + if (!sdch_enabled_ ) + return false; + if (!supported_domain_.empty() && + !url.DomainIs(supported_domain_.data(), supported_domain_.size())) + return false; // It is not the singular supported domain. + + if (blacklisted_domains_.empty()) + return true; + + std::string domain = StringToLowerASCII(url.host()); + return blacklisted_domains_.end() == blacklisted_domains_.find(domain); } void SdchManager::FetchDictionary(const GURL& referring_url, @@ -56,10 +93,14 @@ void SdchManager::FetchDictionary(const GURL& referring_url, */ // Item (1) above implies item (2). Spec should be updated. // I take "host name match" to be "is identical to" - if (referring_url.host() != dictionary_url.host()) + if (referring_url.host() != dictionary_url.host()) { + SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST); return; - if (referring_url.SchemeIs("https")) + } + if (referring_url.SchemeIs("https")) { + SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL); return; + } if (fetcher_.get()) fetcher_->Schedule(dictionary_url); } @@ -69,16 +110,20 @@ bool SdchManager::AddSdchDictionary(const std::string& dictionary_text, std::string client_hash; std::string server_hash; GenerateHash(dictionary_text, &client_hash, &server_hash); - if (dictionaries_.find(server_hash) != dictionaries_.end()) + if (dictionaries_.find(server_hash) != dictionaries_.end()) { + SdchErrorRecovery(DICTIONARY_ALREADY_LOADED); return false; // Already loaded. + } std::string domain, path; std::set<int> ports; Time expiration; size_t header_end = dictionary_text.find("\n\n"); - if (std::string::npos == header_end) + if (std::string::npos == header_end) { + SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER); return false; // Missing header. + } size_t line_start = 0; // Start of line being parsed. while (1) { size_t line_end = dictionary_text.find('\n', line_start); @@ -86,8 +131,10 @@ bool SdchManager::AddSdchDictionary(const std::string& dictionary_text, DCHECK(line_end <= header_end); size_t colon_index = dictionary_text.find(':', line_start); - if (std::string::npos == colon_index) + if (std::string::npos == colon_index) { + SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON); return false; // Illegal line missing a colon. + } if (colon_index > line_end) break; @@ -124,7 +171,7 @@ bool SdchManager::AddSdchDictionary(const std::string& dictionary_text, if (!Dictionary::CanSet(domain, path, ports, dictionary_url)) return false; - DHISTOGRAM_COUNTS(L"Sdch.Dictionary size loaded", dictionary_text.size()); + HISTOGRAM_COUNTS(L"Sdch.Dictionary size loaded", dictionary_text.size()); DLOG(INFO) << "Loaded dictionary with client hash " << client_hash << " and server hash " << server_hash; Dictionary* dictionary = @@ -139,8 +186,10 @@ void SdchManager::GetVcdiffDictionary(const std::string& server_hash, const GURL& referring_url, Dictionary** dictionary) { *dictionary = NULL; DictionaryMap::iterator it = dictionaries_.find(server_hash); - if (it == dictionaries_.end()) + if (it == dictionaries_.end()) { + SdchErrorRecovery(DICTIONARY_NOT_FOUND_FOR_HASH); return; + } Dictionary* matching_dictionary = it->second; if (!matching_dictionary->CanUse(referring_url)) return; @@ -233,19 +282,27 @@ bool SdchManager::Dictionary::CanSet(const std::string& domain, 5. If the dictionary has a Port attribute and the referer URL's port was not in the list. */ - if (domain.empty()) + if (domain.empty()) { + SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER); return false; // Domain is required. - if (0 == - net::RegistryControlledDomainService::GetDomainAndRegistry(domain).size()) + } + if (net::RegistryControlledDomainService::GetDomainAndRegistry(domain).size() + == 0) { + SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN); return false; // domain was a TLD. - if (!Dictionary::DomainMatch(dictionary_url, domain)) + } + if (!Dictionary::DomainMatch(dictionary_url, domain)) { + SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL); return false; + } // TODO(jar): Enforce item 4 above. if (!ports.empty() - && 0 == ports.count(dictionary_url.EffectiveIntPort())) + && 0 == ports.count(dictionary_url.EffectiveIntPort())) { + SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL); return false; + } return true; } @@ -261,15 +318,23 @@ bool SdchManager::Dictionary::CanUse(const GURL referring_url) { 3. The request URL path-matches the path attribute of the dictionary. 4. The request is not an HTTPS request. */ - if (!DomainMatch(referring_url, domain_)) + if (!DomainMatch(referring_url, domain_)) { + SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN); return false; + } if (!ports_.empty() - && 0 == ports_.count(referring_url.EffectiveIntPort())) + && 0 == ports_.count(referring_url.EffectiveIntPort())) { + SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST); return false; - if (path_.size() && !PathMatch(referring_url.path(), path_)) + } + if (path_.size() && !PathMatch(referring_url.path(), path_)) { + SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH); return false; - if (referring_url.SchemeIsSecure()) + } + if (referring_url.SchemeIsSecure()) { + SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME); return false; + } return true; } diff --git a/net/base/sdch_manager.h b/net/base/sdch_manager.h index ae5f2d3..1d00163 100644 --- a/net/base/sdch_manager.h +++ b/net/base/sdch_manager.h @@ -51,6 +51,50 @@ class SdchFetcher { class SdchManager { public: + // A list of errors that appeared and were either resolved, or used to turn + // off sdch encoding. + enum ProblemCodes { + MIN_PROBLEM_CODE, + + // Content Decode problems. + ADDED_CONTENT_ENCODING, + FIXED_CONTENT_ENCODING, + FIXED_CONTENT_ENCODINGS, + + // Content decoding errors. + DECODE_HEADER_ERROR, + DECODE_BODY_ERROR, + + // Dictionary selection for use problems. + DICTIONARY_NOT_FOUND_FOR_HASH = 10, + DICTIONARY_FOUND_HAS_WRONG_DOMAIN, + DICTIONARY_FOUND_HAS_WRONG_PORT_LIST, + DICTIONARY_FOUND_HAS_WRONG_PATH, + DICTIONARY_FOUND_HAS_WRONG_SCHEME, + DICTIONARY_HASH_NOT_FOUND, + DICTIONARY_HASH_MALFORMED, + + // Decode recovery methods. + META_REFRESH_RECOVERY, + PASSING_THROUGH_NON_SDCH, + UNRECOVERABLE_ERROR, + + // Dictionary saving problems. + DICTIONARY_HAS_NO_HEADER = 20, + DICTIONARY_HEADER_LINE_MISSING_COLON, + DICTIONARY_MISSING_DOMAIN_SPECIFIER, + DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN, + DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL, + DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL, + + // Dictionary loading problems. + DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30, + DICTIONARY_SELECTED_FOR_SSL, + DICTIONARY_ALREADY_LOADED, + + MAX_PROBLEM_CODE // Used to bound histogram + }; + // There is one instance of |Dictionary| for each memory-cached SDCH // dictionary. class Dictionary : public base::RefCounted<Dictionary> { @@ -121,18 +165,31 @@ class SdchManager { // Provide access to the single instance of this class. static SdchManager* Global(); + // Record stats on various errors. + static void SdchErrorRecovery(ProblemCodes problem); + // Register a fetcher that this class can use to obtain dictionaries. void set_sdch_fetcher(SdchFetcher* fetcher) { fetcher_.reset(fetcher); } // If called with an empty string, advertise and support sdch on all domains. // If called with a specific string, advertise and support only the specified - // domain. - static void enable_sdch_support(const std::string& domain) { - // We presume that there is a SDCH manager instance. - global_->supported_domain_ = domain; - global_->sdch_enabled_ = true; - } + // domain. Function assumes the existence of a global SdchManager instance. + void EnableSdchSupport(const std::string& domain); + + static bool sdch_enabled() { return global_ && global_->sdch_enabled_; } + // Prevent further advertising of SDCH on this domain (if SDCH is enabled). + // Used when filter errors are found from a given domain, to prevent further + // use of SDCH on that domain. + static bool BlacklistDomain(const GURL& url); + + // For testing only, tihs function resets enabling of sdch, and clears the + // blacklist. + static void ClearBlacklistings(); + + // Check to see if SDCH is enabled (globally), and the given URL is in a + // supported domain (i.e., not blacklisted, and either the specific supported + // domain, or all domains were assumed supported). const bool IsInSupportedDomain(const GURL& url) const; // Schedule the URL fetching to load a dictionary. This will generally return @@ -177,7 +234,7 @@ class SdchManager { // The one global instance of that holds all the data. static SdchManager* global_; - // A simple implementatino of a RFC 3548 "URL safe" base64 encoder. + // A simple implementation of a RFC 3548 "URL safe" base64 encoder. static void UrlSafeBase64Encode(const std::string& input, std::string* output); DictionaryMap dictionaries_; @@ -192,6 +249,9 @@ class SdchManager { // domain is supported. std::string supported_domain_; + // List domains where decode failures have required disabling sdch. + std::set<std::string> blacklisted_domains_; + DISALLOW_COPY_AND_ASSIGN(SdchManager); }; diff --git a/net/url_request/url_request_http_job.cc b/net/url_request/url_request_http_job.cc index fe1332d..4539f13 100644 --- a/net/url_request/url_request_http_job.cc +++ b/net/url_request/url_request_http_job.cc @@ -173,11 +173,65 @@ bool URLRequestHttpJob::GetContentEncodings( void* iter = NULL; while (response_info_->headers->EnumerateHeader(&iter, "Content-Encoding", &encoding_type)) { - encoding_types->push_back(encoding_type); + encoding_types->push_back(StringToLowerASCII(encoding_type)); } + + // TODO(jar): Transition to returning enums, rather than strings, and perform + // all content encoding fixups here, rather than doing some in the + // FilterFactor(). Note that enums generated can be more specific than mere + // restatement of strings. For example, rather than just having a GZIP + // encoding we can have a GZIP_OPTIONAL encoding to help with odd SDCH related + // fixups. + + // TODO(jar): Refactor code so that content-encoding error recovery is + // testable via unit tests. + + if (!IsSdchResponse()) + return !encoding_types->empty(); + + // If content encoding included SDCH, then everything is fine. + if (!encoding_types->empty() && ("sdch" == encoding_types->front())) + return !encoding_types->empty(); + + // SDCH "search results" protective hack: To make sure we don't break the only + // currently deployed SDCH enabled server, be VERY cautious about proxies that + // strip all content-encoding to not include sdch. IF we don't see content + // encodings that seem to match what we'd expect from a server that asked us + // to use a dictionary (and we advertised said dictionary in the GET), then + // we set the encoding to (try to) use SDCH to decode. Note that SDCH will + // degrade into a pass-through filter if it doesn't have a viable dictionary + // hash in its header. Also note that a solo "sdch" will implicitly create + // a "sdch,gzip" decoding filter, where the gzip portion will degrade to a + // pass through if a gzip header is not encountered. Hence we can replace + // "gzip" with "sdch" and "everything will work." + // The one failure mode comes when we advertise a dictionary, and the server + // tries to *send* a gzipped file (not gzip encode content), and then we could + // do a gzip decode :-(. Since current server support does not ever see such + // a transfer, we are safe (for now). + + std::string mime_type; + GetMimeType(&mime_type); + if (std::string::npos != mime_type.find_first_of("text/html")) { + // Suspicious case: Advertised dictionary, but server didn't use sdch, even + // though it is text_html content. + if (encoding_types->empty()) + SdchManager::SdchErrorRecovery(SdchManager::ADDED_CONTENT_ENCODING); + else if (encoding_types->size() == 1) + SdchManager::SdchErrorRecovery(SdchManager::FIXED_CONTENT_ENCODING); + else + SdchManager::SdchErrorRecovery(SdchManager::FIXED_CONTENT_ENCODINGS); + encoding_types->clear(); + encoding_types->push_back("sdch"); // Handle SDCH/GZIP-opt encoding. + } + return !encoding_types->empty(); } +bool URLRequestHttpJob::IsSdchResponse() const { + return response_info_ && + (request_info_.load_flags & net::LOAD_SDCH_DICTIONARY_ADVERTISED); +} + bool URLRequestHttpJob::IsRedirectResponse(GURL* location, int* http_status_code) { if (!response_info_) @@ -516,9 +570,11 @@ void URLRequestHttpJob::AddExtraHeaders() { std::string avail_dictionaries; SdchManager::Global()->GetAvailDictionaryList(request_->url(), &avail_dictionaries); - if (!avail_dictionaries.empty()) + if (!avail_dictionaries.empty()) { request_info_.extra_headers += "Avail-Dictionary: " + avail_dictionaries + "\r\n"; + request_info_.load_flags |= net::LOAD_SDCH_DICTIONARY_ADVERTISED; + } scoped_ptr<FileVersionInfo> file_version_info( FileVersionInfo::CreateFileVersionInfoForCurrentModule()); diff --git a/net/url_request/url_request_http_job.h b/net/url_request/url_request_http_job.h index 908db2c..b984722 100644 --- a/net/url_request/url_request_http_job.h +++ b/net/url_request/url_request_http_job.h @@ -42,6 +42,7 @@ class URLRequestHttpJob : public URLRequestJob { virtual bool GetResponseCookies(std::vector<std::string>* cookies); virtual int GetResponseCode(); virtual bool GetContentEncodings(std::vector<std::string>* encoding_type); + virtual bool IsSdchResponse() const; virtual bool IsRedirectResponse(GURL* location, int* http_status_code); virtual bool IsSafeRedirect(const GURL& location); virtual bool NeedsAuth(); diff --git a/net/url_request/url_request_job.cc b/net/url_request/url_request_job.cc index 0501286..f4924bb 100644 --- a/net/url_request/url_request_job.cc +++ b/net/url_request/url_request_job.cc @@ -52,8 +52,10 @@ void URLRequestJob::SetupFilter() { std::string mime_type; GetMimeType(&mime_type); filter_.reset(Filter::Factory(encoding_types, mime_type, kFilterBufSize)); - if (filter_.get()) + if (filter_.get()) { filter_->SetURL(request_->url()); + filter_->SetMimeType(mime_type); + } } } diff --git a/net/url_request/url_request_job.h b/net/url_request/url_request_job.h index 1e4a089..2df62fd 100644 --- a/net/url_request/url_request_job.h +++ b/net/url_request/url_request_job.h @@ -123,6 +123,10 @@ class URLRequestJob : public base::RefCountedThreadSafe<URLRequestJob> { return false; } + // Find out if this is a response to a request that advertised an SDCH + // dictionary. Only makes sense for some types of requests. + virtual bool IsSdchResponse() const { return false; } + // Called to setup stream filter for this request. An example of filter is // content encoding/decoding. void SetupFilter(); |