summaryrefslogtreecommitdiffstats
path: root/net/url_request
diff options
context:
space:
mode:
authorjar@google.com <jar@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-10-27 17:39:28 +0000
committerjar@google.com <jar@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-10-27 17:39:28 +0000
commit423041b0a7182411149472ae1e7fd87317173097 (patch)
tree494092141aa32f87e6bf02159579ae590d0a3fbf /net/url_request
parentc05ef7e4fe5aaadb4193217209a11440bd4d2c27 (diff)
downloadchromium_src-423041b0a7182411149472ae1e7fd87317173097.zip
chromium_src-423041b0a7182411149472ae1e7fd87317173097.tar.gz
chromium_src-423041b0a7182411149472ae1e7fd87317173097.tar.bz2
Clean up filter and content encoding handling
Centralize translation functions (text of "Content-Encoding" to enum) in filter.cc Centralize error recovery (for damaged content encoding headers) in filter.cc Error recovery includes a loss of SDCH encoding headers, plus handling of Apache server bug with gzip files are tagged as also being gzip encoded. Centralize and add a pile of unit tests to this filter code. r=openvcdiff,huanr Review URL: http://codereview.chromium.org/8018 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@4004 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/url_request')
-rw-r--r--net/url_request/url_request_http_job.cc121
-rw-r--r--net/url_request/url_request_http_job.h3
-rw-r--r--net/url_request/url_request_job.cc8
-rw-r--r--net/url_request/url_request_job.h14
4 files changed, 55 insertions, 91 deletions
diff --git a/net/url_request/url_request_http_job.cc b/net/url_request/url_request_http_job.cc
index d6286af..49a8925 100644
--- a/net/url_request/url_request_http_job.cc
+++ b/net/url_request/url_request_http_job.cc
@@ -10,6 +10,7 @@
#include "base/message_loop.h"
#include "base/string_util.h"
#include "net/base/cookie_monster.h"
+#include "net/base/filter.h"
#include "net/base/load_flags.h"
#include "net/base/net_errors.h"
#include "net/base/net_util.h"
@@ -162,67 +163,24 @@ int URLRequestHttpJob::GetResponseCode() {
}
bool URLRequestHttpJob::GetContentEncodings(
- std::vector<std::string>* encoding_types) {
+ std::vector<Filter::FilterType>* encoding_types) {
DCHECK(transaction_.get());
-
if (!response_info_)
return false;
+ DCHECK(encoding_types->empty());
std::string encoding_type;
void* iter = NULL;
while (response_info_->headers->EnumerateHeader(&iter, "Content-Encoding",
&encoding_type)) {
- encoding_types->push_back(StringToLowerASCII(encoding_type));
+ encoding_types->push_back(Filter::ConvertEncodingToType(encoding_type));
}
- // TODO(jar): Transition to returning enums, rather than strings, and perform
- // all content encoding fixups here, rather than doing some in the
- // FilterFactor(). Note that enums generated can be more specific than mere
- // restatement of strings. For example, rather than just having a GZIP
- // encoding we can have a GZIP_OPTIONAL encoding to help with odd SDCH related
- // fixups.
-
- // TODO(jar): Refactor code so that content-encoding error recovery is
- // testable via unit tests.
-
- if (!IsSdchResponse())
- return !encoding_types->empty();
-
- // If content encoding included SDCH, then everything is fine.
- if (!encoding_types->empty() && ("sdch" == encoding_types->front()))
- return !encoding_types->empty();
-
- // SDCH "search results" protective hack: To make sure we don't break the only
- // currently deployed SDCH enabled server, be VERY cautious about proxies that
- // strip all content-encoding to not include sdch. IF we don't see content
- // encodings that seem to match what we'd expect from a server that asked us
- // to use a dictionary (and we advertised said dictionary in the GET), then
- // we set the encoding to (try to) use SDCH to decode. Note that SDCH will
- // degrade into a pass-through filter if it doesn't have a viable dictionary
- // hash in its header. Also note that a solo "sdch" will implicitly create
- // a "sdch,gzip" decoding filter, where the gzip portion will degrade to a
- // pass through if a gzip header is not encountered. Hence we can replace
- // "gzip" with "sdch" and "everything will work."
- // The one failure mode comes when we advertise a dictionary, and the server
- // tries to *send* a gzipped file (not gzip encode content), and then we could
- // do a gzip decode :-(. Since current server support does not ever see such
- // a transfer, we are safe (for now).
-
- std::string mime_type;
- GetMimeType(&mime_type);
- if (std::string::npos != mime_type.find_first_of("text/html")) {
- // Suspicious case: Advertised dictionary, but server didn't use sdch, even
- // though it is text_html content.
- if (encoding_types->empty())
- SdchManager::SdchErrorRecovery(SdchManager::ADDED_CONTENT_ENCODING);
- else if (encoding_types->size() == 1)
- SdchManager::SdchErrorRecovery(SdchManager::FIXED_CONTENT_ENCODING);
- else
- SdchManager::SdchErrorRecovery(SdchManager::FIXED_CONTENT_ENCODINGS);
- encoding_types->clear();
- encoding_types->push_back("sdch"); // Handle SDCH/GZIP-opt encoding.
+ if (!encoding_types->empty()) {
+ std::string mime_type;
+ GetMimeType(&mime_type);
+ Filter::FixupEncodingTypes(IsSdchResponse(), mime_type, encoding_types);
}
-
return !encoding_types->empty();
}
@@ -534,6 +492,39 @@ void URLRequestHttpJob::StartTransaction() {
}
void URLRequestHttpJob::AddExtraHeaders() {
+ // Supply Accept-Encoding headers first so that it is more likely that they
+ // will be in the first transmitted packet. This can sometimes make it easier
+ // to filter and analyze the streams to assure that a proxy has not damaged
+ // these headers. Some proxies deliberately corrupt Accept-Encoding headers.
+ if (!SdchManager::Global() ||
+ !SdchManager::Global()->IsInSupportedDomain(request_->url())) {
+ // Tell the server what compression formats we support (other than SDCH).
+ request_info_.extra_headers += "Accept-Encoding: gzip,deflate,bzip2\r\n";
+ } else {
+ // Supply SDCH related headers, as well as accepting that encoding.
+ // Tell the server what compression formats we support.
+ request_info_.extra_headers += "Accept-Encoding: "
+ "gzip,deflate,bzip2,sdch\r\n";
+
+ // TODO(jar): See if it is worth optimizing away these bytes when the URL is
+ // probably an img or such. (and SDCH encoding is not likely).
+ std::string avail_dictionaries;
+ SdchManager::Global()->GetAvailDictionaryList(request_->url(),
+ &avail_dictionaries);
+ if (!avail_dictionaries.empty()) {
+ request_info_.extra_headers += "Avail-Dictionary: "
+ + avail_dictionaries + "\r\n";
+ request_info_.load_flags |= net::LOAD_SDCH_DICTIONARY_ADVERTISED;
+ }
+
+ scoped_ptr<FileVersionInfo> file_version_info(
+ FileVersionInfo::CreateFileVersionInfoForCurrentModule());
+ request_info_.extra_headers += "X-SDCH: Chrome ";
+ request_info_.extra_headers +=
+ WideToASCII(file_version_info->product_version());
+ request_info_.extra_headers += "\r\n";
+ }
+
URLRequestContext* context = request_->context();
if (context) {
// Add in the cookie header. TODO might we need more than one header?
@@ -553,36 +544,6 @@ void URLRequestHttpJob::AddExtraHeaders() {
request_info_.extra_headers += "Accept-Charset: " +
context->accept_charset() + "\r\n";
}
-
- if (!SdchManager::Global() ||
- !SdchManager::Global()->IsInSupportedDomain(request_->url())) {
- // Tell the server what compression formats we support (other than SDCH).
- request_info_.extra_headers += "Accept-Encoding: gzip,deflate,bzip2\r\n";
- return;
- }
-
- // Supply SDCH related headers, as well as accepting that encoding.
-
- // TODO(jar): See if it is worth optimizing away these bytes when the URL is
- // probably an img or such. (and SDCH encoding is not likely).
- std::string avail_dictionaries;
- SdchManager::Global()->GetAvailDictionaryList(request_->url(),
- &avail_dictionaries);
- if (!avail_dictionaries.empty()) {
- request_info_.extra_headers += "Avail-Dictionary: "
- + avail_dictionaries + "\r\n";
- request_info_.load_flags |= net::LOAD_SDCH_DICTIONARY_ADVERTISED;
- }
-
- scoped_ptr<FileVersionInfo> file_version_info(
- FileVersionInfo::CreateFileVersionInfoForCurrentModule());
- request_info_.extra_headers += "X-SDCH: Chrome ";
- request_info_.extra_headers +=
- WideToASCII(file_version_info->product_version());
- request_info_.extra_headers += "\r\n";
-
- // Tell the server what compression formats we support.
- request_info_.extra_headers += "Accept-Encoding: gzip,deflate,bzip2,sdch\r\n";
}
void URLRequestHttpJob::FetchResponseCookies() {
diff --git a/net/url_request/url_request_http_job.h b/net/url_request/url_request_http_job.h
index c8e3ba2..f36a072 100644
--- a/net/url_request/url_request_http_job.h
+++ b/net/url_request/url_request_http_job.h
@@ -41,7 +41,8 @@ class URLRequestHttpJob : public URLRequestJob {
virtual void GetResponseInfo(net::HttpResponseInfo* info);
virtual bool GetResponseCookies(std::vector<std::string>* cookies);
virtual int GetResponseCode();
- virtual bool GetContentEncodings(std::vector<std::string>* encoding_type);
+ virtual bool GetContentEncodings(
+ std::vector<Filter::FilterType>* encoding_type);
virtual bool IsSdchResponse() const;
virtual bool IsRedirectResponse(GURL* location, int* http_status_code);
virtual bool IsSafeRedirect(const GURL& location);
diff --git a/net/url_request/url_request_job.cc b/net/url_request/url_request_job.cc
index f4924bb..10323f6 100644
--- a/net/url_request/url_request_job.cc
+++ b/net/url_request/url_request_job.cc
@@ -47,12 +47,12 @@ void URLRequestJob::DetachRequest() {
}
void URLRequestJob::SetupFilter() {
- std::vector<std::string> encoding_types;
+ std::vector<Filter::FilterType> encoding_types;
if (GetContentEncodings(&encoding_types)) {
- std::string mime_type;
- GetMimeType(&mime_type);
- filter_.reset(Filter::Factory(encoding_types, mime_type, kFilterBufSize));
+ filter_.reset(Filter::Factory(encoding_types, kFilterBufSize));
if (filter_.get()) {
+ std::string mime_type;
+ GetMimeType(&mime_type);
filter_->SetURL(request_->url());
filter_->SetMimeType(mime_type);
}
diff --git a/net/url_request/url_request_job.h b/net/url_request/url_request_job.h
index 2df62fd..6b309be 100644
--- a/net/url_request/url_request_job.h
+++ b/net/url_request/url_request_job.h
@@ -114,12 +114,14 @@ class URLRequestJob : public base::RefCountedThreadSafe<URLRequestJob> {
// some types of requests. Returns true on success. Calling this on a request
// that doesn't have or specify an encoding type will return false.
// Returns a array of strings showing the sequential encodings used on the
- // content. For example, types[0] = "sdch" and types[1] = gzip, means the
- // content was first encoded by sdch, and then encoded by gzip. To decode,
- // a series of filters must be applied in the reverse order (in the above
- // example, ungzip first, and then sdch expand).
- // TODO(jar): Cleaner API would return an array of enums.
- virtual bool GetContentEncodings(std::vector<std::string>* encoding_types) {
+ // content.
+ // For example, encoding_types[0] = FILTER_TYPE_SDCH and encoding_types[1] =
+ // FILTER_TYPE_GZIP, means the content was first encoded by sdch, and then
+ // result was encoded by gzip. To decode, a series of filters must be applied
+ // in the reverse order (in the above example, ungzip first, and then sdch
+ // expand).
+ virtual bool GetContentEncodings(
+ std::vector<Filter::FilterType>* encoding_types) {
return false;
}