Provide error reporting and recovery for SDCH responses not market as HTML

Some AV software may actually strip content type as well as content encoding. As a result, when we propose a dicitonary (suggesting that this will probably be an SDCH encoded response) we need to be ready to "fix" the content-encoding string if it is not marked as gzip,sdch. Note that "fixups" put in tentative decodes, which degrade to pass-through filters if the content's header is not compatible with the decoding format. I also added a line of defensive coding in the dictionary fetcher, and cleaned up a line where I record stats after decoding sdch. bug=7679 r=openvcdiff,huanr Review URL: http://codereview.chromium.org/27016 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@10143 0039d316-1c4b-4281-b951-d872f2087c98
author: jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-02-21 06:11:45 +0000
committer: jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-02-21 06:11:45 +0000
commit: a41fae8db8109bfeed047015b77b0d9227118540 (patch)
tree: 62ffb92aedeb660c0def2f6d5e187ba8161291e6
parent: 46ec902d42373598dd4a1d30da48690109f6571c (diff)
download: chromium_src-a41fae8db8109bfeed047015b77b0d9227118540.zip
chromium_src-a41fae8db8109bfeed047015b77b0d9227118540.tar.gz
chromium_src-a41fae8db8109bfeed047015b77b0d9227118540.tar.bz2
5 files changed, 58 insertions, 20 deletions
diff --git a/net/base/filter.cc b/net/base/filter.cc
index 19b7876..730aef50 100644
--- a/net/base/filter.cc
+++ b/net/base/filter.cc
@@ -136,26 +136,40 @@ void Filter::FixupEncodingTypes(
   // tries to *send* a gzipped file (not gzip encode content), and then we could
   // do a gzip decode :-(.  Since current server support does not ever see such
   // a transfer, we are safe (for now).
-  if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {
-    // Suspicious case: Advertised dictionary, but server didn't use sdch, even
-    // though it is text_html content.
-    if (encoding_types->empty())
+  if (StartsWithASCII(mime_type, kTextHtml, false)) {
+    // Suspicious case: Advertised dictionary, but server didn't use sdch, and
+    // we're HTML tagged.
+    if (encoding_types->empty()) {
       SdchManager::SdchErrorRecovery(SdchManager::ADDED_CONTENT_ENCODING);
-    else if (1 == encoding_types->size())
+    } else if (1 == encoding_types->size()) {
       SdchManager::SdchErrorRecovery(SdchManager::FIXED_CONTENT_ENCODING);
-    else
+    } else {
       SdchManager::SdchErrorRecovery(SdchManager::FIXED_CONTENT_ENCODINGS);
-    encoding_types->clear();
-    encoding_types->push_back(FILTER_TYPE_SDCH_POSSIBLE);
-    encoding_types->push_back(FILTER_TYPE_GZIP_HELPING_SDCH);
-    return;
+    }
+  } else {
+    // Remarkable case!?!  We advertised an SDCH dictionary, content-encoding
+    // was not marked for SDCH processing: Why did the server suggest an SDCH
+    // dictionary in the first place??.  Also, the content isn't
+    // tagged as HTML, despite the fact that SDCH encoding is mostly likely for
+    // HTML: Did some anti-virus system strip this tag (sometimes they strip
+    // accept-encoding headers on the request)??  Does the content encoding not
+    // start with "text/html" for some other reason??  We'll report this as a
+    // fixup to a binary file, but it probably really is text/html (some how).
+    if (encoding_types->empty()) {
+      SdchManager::SdchErrorRecovery(
+          SdchManager::BINARY_ADDED_CONTENT_ENCODING);
+    } else if (1 == encoding_types->size()) {
+      SdchManager::SdchErrorRecovery(
+          SdchManager::BINARY_FIXED_CONTENT_ENCODING);
+    } else {
+      SdchManager::SdchErrorRecovery(
+          SdchManager::BINARY_FIXED_CONTENT_ENCODINGS);
+    }
   }
 
-  // It didn't have SDCH encoding... but it wasn't HTML... so maybe it really
-  // wasn't SDCH encoded.  It would be nice if we knew this, and didn't bother
-  // to propose a dictionary etc., but current SDCH spec does not provide a nice
-  // way for us to conclude that.  Perhaps in the future, this case will be much
-  // more rare.
+  encoding_types->clear();
+  encoding_types->push_back(FILTER_TYPE_SDCH_POSSIBLE);
+  encoding_types->push_back(FILTER_TYPE_GZIP_HELPING_SDCH);
   return;
 }
 
diff --git a/net/base/filter_unittest.cc b/net/base/filter_unittest.cc
index 0d306e1..c9e7308 100644
--- a/net/base/filter_unittest.cc
+++ b/net/base/filter_unittest.cc
@@ -126,8 +126,22 @@ TEST(FilterTest, MissingSdchEncoding) {
   EXPECT_EQ(Filter::FILTER_TYPE_SDCH_POSSIBLE, encoding_types[0]);
   EXPECT_EQ(Filter::FILTER_TYPE_GZIP_HELPING_SDCH, encoding_types[1]);
 
+  // Loss of encoding, but it was an SDCH response with a prefix that says it
+  // was an html type.  Note that it *should* be the case that a precise match
+  // with "text/html" we be collected by GetMimeType() and passed in, but we
+  // coded the fixup defensively (scanning for a prexif of "text/html", so this
+  // is an example which could survive such confusion in the caller.
+  encoding_types.clear();
+  Filter::FixupEncodingTypes(is_sdch_response, "text/html; charset=UTF-8",
+                             &encoding_types);
+  EXPECT_EQ(2U, encoding_types.size());
+  EXPECT_EQ(Filter::FILTER_TYPE_SDCH_POSSIBLE, encoding_types[0]);
+  EXPECT_EQ(Filter::FILTER_TYPE_GZIP_HELPING_SDCH, encoding_types[1]);
+
   // No encoding, but it was an SDCH response with non-html type.
   encoding_types.clear();
   Filter::FixupEncodingTypes(is_sdch_response, "other/mime", &encoding_types);
-  EXPECT_TRUE(encoding_types.empty());
+  EXPECT_EQ(2U, encoding_types.size());
+  EXPECT_EQ(Filter::FILTER_TYPE_SDCH_POSSIBLE, encoding_types[0]);
+  EXPECT_EQ(Filter::FILTER_TYPE_GZIP_HELPING_SDCH, encoding_types[1]);
 }
diff --git a/net/base/sdch_filter.cc b/net/base/sdch_filter.cc
index 39a1fe3..37d5871 100644
--- a/net/base/sdch_filter.cc
+++ b/net/base/sdch_filter.cc
@@ -47,8 +47,7 @@ SdchFilter::~SdchFilter() {
 
   if (!was_cached()
       && base::Time() != connect_time()
-      && read_times_.size() > 0
-      && base::Time() != read_times_.back()) {
+      && read_times_.size() > 0) {
     base::TimeDelta duration = read_times_.back() - connect_time();
     // We clip our logging at 10 minutes to prevent anamolous data from being
     // considered (per suggestion from Jake Brutlag).
diff --git a/net/base/sdch_manager.h b/net/base/sdch_manager.h
index d94ea7d..023175c 100644
--- a/net/base/sdch_manager.h
+++ b/net/base/sdch_manager.h
@@ -60,6 +60,10 @@ class SdchManager {
     ADDED_CONTENT_ENCODING = 1,
     FIXED_CONTENT_ENCODING = 2,
     FIXED_CONTENT_ENCODINGS = 3,
+    // Content encoding correction when we're not even tagged as HTML!?!
+    BINARY_ADDED_CONTENT_ENCODING = 4,
+    BINARY_FIXED_CONTENT_ENCODING = 5,
+    BINARY_FIXED_CONTENT_ENCODINGS = 6,
 
     // Content decoding errors.
     DECODE_HEADER_ERROR = 4,
diff --git a/net/url_request/url_request_http_job.cc b/net/url_request/url_request_http_job.cc
index 0f65534..32c6ed2 100644
--- a/net/url_request/url_request_http_job.cc
+++ b/net/url_request/url_request_http_job.cc
@@ -70,8 +70,15 @@ URLRequestHttpJob::~URLRequestHttpJob() {
     // Prior to reaching the destructor, request_ has been set to a NULL
     // pointer, so request_->url() is no longer valid in the destructor, and we
     // use an alternate copy |request_info_.url|.
-    SdchManager::Global()->FetchDictionary(request_info_.url,
-                                           sdch_dictionary_url_);
+    SdchManager* manager = SdchManager::Global();
+    // To be extra safe, since this is a "different time" from when we decided
+    // to get the dictionary, we'll validate that an SdchManager is available.
+    // At shutdown time, care is taken to be sure that we don't delete this
+    // globally useful instance "too soon," so this check is just defensive
+    // coding to assure that IF the system is shutting down, we don't have any
+    // problem if the manager was deleted ahead of time.
+    if (manager)  // Defensive programming.
+      manager->FetchDictionary(request_info_.url, sdch_dictionary_url_);
   }
 }
author	jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-02-21 06:11:45 +0000
committer	jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-02-21 06:11:45 +0000
commit	a41fae8db8109bfeed047015b77b0d9227118540 (patch)
tree	62ffb92aedeb660c0def2f6d5e187ba8161291e6
parent	46ec902d42373598dd4a1d30da48690109f6571c (diff)
download	chromium_src-a41fae8db8109bfeed047015b77b0d9227118540.zip chromium_src-a41fae8db8109bfeed047015b77b0d9227118540.tar.gz chromium_src-a41fae8db8109bfeed047015b77b0d9227118540.tar.bz2