diff options
author | jar@google.com <jar@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-10-27 17:39:28 +0000 |
---|---|---|
committer | jar@google.com <jar@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-10-27 17:39:28 +0000 |
commit | 423041b0a7182411149472ae1e7fd87317173097 (patch) | |
tree | 494092141aa32f87e6bf02159579ae590d0a3fbf /net/url_request | |
parent | c05ef7e4fe5aaadb4193217209a11440bd4d2c27 (diff) | |
download | chromium_src-423041b0a7182411149472ae1e7fd87317173097.zip chromium_src-423041b0a7182411149472ae1e7fd87317173097.tar.gz chromium_src-423041b0a7182411149472ae1e7fd87317173097.tar.bz2 |
Clean up filter and content encoding handling
Centralize translation functions (text of "Content-Encoding" to enum)
in filter.cc
Centralize error recovery (for damaged content encoding headers) in filter.cc
Error recovery includes a loss of SDCH encoding headers, plus handling
of Apache server bug with gzip files are tagged as also being gzip encoded.
Centralize and add a pile of unit tests to this filter code.
r=openvcdiff,huanr
Review URL: http://codereview.chromium.org/8018
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@4004 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/url_request')
-rw-r--r-- | net/url_request/url_request_http_job.cc | 121 | ||||
-rw-r--r-- | net/url_request/url_request_http_job.h | 3 | ||||
-rw-r--r-- | net/url_request/url_request_job.cc | 8 | ||||
-rw-r--r-- | net/url_request/url_request_job.h | 14 |
4 files changed, 55 insertions, 91 deletions
diff --git a/net/url_request/url_request_http_job.cc b/net/url_request/url_request_http_job.cc index d6286af..49a8925 100644 --- a/net/url_request/url_request_http_job.cc +++ b/net/url_request/url_request_http_job.cc @@ -10,6 +10,7 @@ #include "base/message_loop.h" #include "base/string_util.h" #include "net/base/cookie_monster.h" +#include "net/base/filter.h" #include "net/base/load_flags.h" #include "net/base/net_errors.h" #include "net/base/net_util.h" @@ -162,67 +163,24 @@ int URLRequestHttpJob::GetResponseCode() { } bool URLRequestHttpJob::GetContentEncodings( - std::vector<std::string>* encoding_types) { + std::vector<Filter::FilterType>* encoding_types) { DCHECK(transaction_.get()); - if (!response_info_) return false; + DCHECK(encoding_types->empty()); std::string encoding_type; void* iter = NULL; while (response_info_->headers->EnumerateHeader(&iter, "Content-Encoding", &encoding_type)) { - encoding_types->push_back(StringToLowerASCII(encoding_type)); + encoding_types->push_back(Filter::ConvertEncodingToType(encoding_type)); } - // TODO(jar): Transition to returning enums, rather than strings, and perform - // all content encoding fixups here, rather than doing some in the - // FilterFactor(). Note that enums generated can be more specific than mere - // restatement of strings. For example, rather than just having a GZIP - // encoding we can have a GZIP_OPTIONAL encoding to help with odd SDCH related - // fixups. - - // TODO(jar): Refactor code so that content-encoding error recovery is - // testable via unit tests. - - if (!IsSdchResponse()) - return !encoding_types->empty(); - - // If content encoding included SDCH, then everything is fine. - if (!encoding_types->empty() && ("sdch" == encoding_types->front())) - return !encoding_types->empty(); - - // SDCH "search results" protective hack: To make sure we don't break the only - // currently deployed SDCH enabled server, be VERY cautious about proxies that - // strip all content-encoding to not include sdch. IF we don't see content - // encodings that seem to match what we'd expect from a server that asked us - // to use a dictionary (and we advertised said dictionary in the GET), then - // we set the encoding to (try to) use SDCH to decode. Note that SDCH will - // degrade into a pass-through filter if it doesn't have a viable dictionary - // hash in its header. Also note that a solo "sdch" will implicitly create - // a "sdch,gzip" decoding filter, where the gzip portion will degrade to a - // pass through if a gzip header is not encountered. Hence we can replace - // "gzip" with "sdch" and "everything will work." - // The one failure mode comes when we advertise a dictionary, and the server - // tries to *send* a gzipped file (not gzip encode content), and then we could - // do a gzip decode :-(. Since current server support does not ever see such - // a transfer, we are safe (for now). - - std::string mime_type; - GetMimeType(&mime_type); - if (std::string::npos != mime_type.find_first_of("text/html")) { - // Suspicious case: Advertised dictionary, but server didn't use sdch, even - // though it is text_html content. - if (encoding_types->empty()) - SdchManager::SdchErrorRecovery(SdchManager::ADDED_CONTENT_ENCODING); - else if (encoding_types->size() == 1) - SdchManager::SdchErrorRecovery(SdchManager::FIXED_CONTENT_ENCODING); - else - SdchManager::SdchErrorRecovery(SdchManager::FIXED_CONTENT_ENCODINGS); - encoding_types->clear(); - encoding_types->push_back("sdch"); // Handle SDCH/GZIP-opt encoding. + if (!encoding_types->empty()) { + std::string mime_type; + GetMimeType(&mime_type); + Filter::FixupEncodingTypes(IsSdchResponse(), mime_type, encoding_types); } - return !encoding_types->empty(); } @@ -534,6 +492,39 @@ void URLRequestHttpJob::StartTransaction() { } void URLRequestHttpJob::AddExtraHeaders() { + // Supply Accept-Encoding headers first so that it is more likely that they + // will be in the first transmitted packet. This can sometimes make it easier + // to filter and analyze the streams to assure that a proxy has not damaged + // these headers. Some proxies deliberately corrupt Accept-Encoding headers. + if (!SdchManager::Global() || + !SdchManager::Global()->IsInSupportedDomain(request_->url())) { + // Tell the server what compression formats we support (other than SDCH). + request_info_.extra_headers += "Accept-Encoding: gzip,deflate,bzip2\r\n"; + } else { + // Supply SDCH related headers, as well as accepting that encoding. + // Tell the server what compression formats we support. + request_info_.extra_headers += "Accept-Encoding: " + "gzip,deflate,bzip2,sdch\r\n"; + + // TODO(jar): See if it is worth optimizing away these bytes when the URL is + // probably an img or such. (and SDCH encoding is not likely). + std::string avail_dictionaries; + SdchManager::Global()->GetAvailDictionaryList(request_->url(), + &avail_dictionaries); + if (!avail_dictionaries.empty()) { + request_info_.extra_headers += "Avail-Dictionary: " + + avail_dictionaries + "\r\n"; + request_info_.load_flags |= net::LOAD_SDCH_DICTIONARY_ADVERTISED; + } + + scoped_ptr<FileVersionInfo> file_version_info( + FileVersionInfo::CreateFileVersionInfoForCurrentModule()); + request_info_.extra_headers += "X-SDCH: Chrome "; + request_info_.extra_headers += + WideToASCII(file_version_info->product_version()); + request_info_.extra_headers += "\r\n"; + } + URLRequestContext* context = request_->context(); if (context) { // Add in the cookie header. TODO might we need more than one header? @@ -553,36 +544,6 @@ void URLRequestHttpJob::AddExtraHeaders() { request_info_.extra_headers += "Accept-Charset: " + context->accept_charset() + "\r\n"; } - - if (!SdchManager::Global() || - !SdchManager::Global()->IsInSupportedDomain(request_->url())) { - // Tell the server what compression formats we support (other than SDCH). - request_info_.extra_headers += "Accept-Encoding: gzip,deflate,bzip2\r\n"; - return; - } - - // Supply SDCH related headers, as well as accepting that encoding. - - // TODO(jar): See if it is worth optimizing away these bytes when the URL is - // probably an img or such. (and SDCH encoding is not likely). - std::string avail_dictionaries; - SdchManager::Global()->GetAvailDictionaryList(request_->url(), - &avail_dictionaries); - if (!avail_dictionaries.empty()) { - request_info_.extra_headers += "Avail-Dictionary: " - + avail_dictionaries + "\r\n"; - request_info_.load_flags |= net::LOAD_SDCH_DICTIONARY_ADVERTISED; - } - - scoped_ptr<FileVersionInfo> file_version_info( - FileVersionInfo::CreateFileVersionInfoForCurrentModule()); - request_info_.extra_headers += "X-SDCH: Chrome "; - request_info_.extra_headers += - WideToASCII(file_version_info->product_version()); - request_info_.extra_headers += "\r\n"; - - // Tell the server what compression formats we support. - request_info_.extra_headers += "Accept-Encoding: gzip,deflate,bzip2,sdch\r\n"; } void URLRequestHttpJob::FetchResponseCookies() { diff --git a/net/url_request/url_request_http_job.h b/net/url_request/url_request_http_job.h index c8e3ba2..f36a072 100644 --- a/net/url_request/url_request_http_job.h +++ b/net/url_request/url_request_http_job.h @@ -41,7 +41,8 @@ class URLRequestHttpJob : public URLRequestJob { virtual void GetResponseInfo(net::HttpResponseInfo* info); virtual bool GetResponseCookies(std::vector<std::string>* cookies); virtual int GetResponseCode(); - virtual bool GetContentEncodings(std::vector<std::string>* encoding_type); + virtual bool GetContentEncodings( + std::vector<Filter::FilterType>* encoding_type); virtual bool IsSdchResponse() const; virtual bool IsRedirectResponse(GURL* location, int* http_status_code); virtual bool IsSafeRedirect(const GURL& location); diff --git a/net/url_request/url_request_job.cc b/net/url_request/url_request_job.cc index f4924bb..10323f6 100644 --- a/net/url_request/url_request_job.cc +++ b/net/url_request/url_request_job.cc @@ -47,12 +47,12 @@ void URLRequestJob::DetachRequest() { } void URLRequestJob::SetupFilter() { - std::vector<std::string> encoding_types; + std::vector<Filter::FilterType> encoding_types; if (GetContentEncodings(&encoding_types)) { - std::string mime_type; - GetMimeType(&mime_type); - filter_.reset(Filter::Factory(encoding_types, mime_type, kFilterBufSize)); + filter_.reset(Filter::Factory(encoding_types, kFilterBufSize)); if (filter_.get()) { + std::string mime_type; + GetMimeType(&mime_type); filter_->SetURL(request_->url()); filter_->SetMimeType(mime_type); } diff --git a/net/url_request/url_request_job.h b/net/url_request/url_request_job.h index 2df62fd..6b309be 100644 --- a/net/url_request/url_request_job.h +++ b/net/url_request/url_request_job.h @@ -114,12 +114,14 @@ class URLRequestJob : public base::RefCountedThreadSafe<URLRequestJob> { // some types of requests. Returns true on success. Calling this on a request // that doesn't have or specify an encoding type will return false. // Returns a array of strings showing the sequential encodings used on the - // content. For example, types[0] = "sdch" and types[1] = gzip, means the - // content was first encoded by sdch, and then encoded by gzip. To decode, - // a series of filters must be applied in the reverse order (in the above - // example, ungzip first, and then sdch expand). - // TODO(jar): Cleaner API would return an array of enums. - virtual bool GetContentEncodings(std::vector<std::string>* encoding_types) { + // content. + // For example, encoding_types[0] = FILTER_TYPE_SDCH and encoding_types[1] = + // FILTER_TYPE_GZIP, means the content was first encoded by sdch, and then + // result was encoded by gzip. To decode, a series of filters must be applied + // in the reverse order (in the above example, ungzip first, and then sdch + // expand). + virtual bool GetContentEncodings( + std::vector<Filter::FilterType>* encoding_types) { return false; } |