-Add error code for chunked encoding

-Add unit test for chunk-size > 2GB -Allow trailing space (0x20) -Document how other browsers parse the chunk-size BUG=1326627 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@961 0039d316-1c4b-4281-b951-d872f2087c98
author: ericroman@google.com <ericroman@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2008-08-15 21:06:12 +0000
committer: ericroman@google.com <ericroman@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2008-08-15 21:06:12 +0000
commit: e05294e5feca88329e5d795bc927da82cd97656a (patch)
tree: cae091cc7166bd3661c88872021f938657c1c699 /net
parent: a88d601f7f632a21afe88359d503559fa20d9e40 (diff)
download: chromium_src-e05294e5feca88329e5d795bc927da82cd97656a.zip
chromium_src-e05294e5feca88329e5d795bc927da82cd97656a.tar.gz
chromium_src-e05294e5feca88329e5d795bc927da82cd97656a.tar.bz2
3 files changed, 117 insertions, 7 deletions
diff --git a/net/base/net_error_list.h b/net/base/net_error_list.h
index 9a6c4f0..62696be 100644
--- a/net/base/net_error_list.h
+++ b/net/base/net_error_list.h
@@ -60,6 +60,9 @@ NET_ERROR(FILE_TOO_BIG, -8)
 // invalid assumption.
 NET_ERROR(UNEXPECTED, -9)
 
+// Error in chunked transfer encoding.
+NET_ERROR(BAD_CHUNKED_ENCODING, -10)
+
 // A connection was closed (corresponding to a TCP FIN).
 NET_ERROR(CONNECTION_CLOSED, -100)
 
diff --git a/net/http/http_chunked_decoder.cc b/net/http/http_chunked_decoder.cc
index 9c0bbcb..1afe0f2 100644
--- a/net/http/http_chunked_decoder.cc
+++ b/net/http/http_chunked_decoder.cc
@@ -116,7 +116,7 @@ int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
     } else if (chunk_terminator_remaining_) {
        if (buf_len) {
          DLOG(ERROR) << "chunk data not terminated properly";
-         return ERR_FAILED;
+         return ERR_BAD_CHUNKED_ENCODING;
        }
        chunk_terminator_remaining_ = false;
     } else if (buf_len) {
@@ -128,14 +128,14 @@ int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
       if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
           DLOG(ERROR) << "Failed parsing HEX from: " <<
               std::string(buf, buf_len);
-        return ERR_FAILED;
+        return ERR_BAD_CHUNKED_ENCODING;
       }
 
       if (chunk_remaining_ == 0)
         reached_last_chunk_ = true;
     } else {
       DLOG(ERROR) << "missing chunk-size";
-      return ERR_FAILED;
+      return ERR_BAD_CHUNKED_ENCODING;
     } 
     line_buf_.clear();
   } else {
@@ -151,8 +151,33 @@ int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
   return bytes_consumed;
 }
 
-// static
+
+// While the HTTP 1.1 specification defines chunk-size as 1*HEX
+// some sites rely on more lenient parsing.
+// yahoo.com for example, includes trailing spaces (0x20).
+//
+// A comparison of browsers running on WindowsXP shows that
+// they will parse the following inputs (egrep syntax):
+//
+// Let \X be the character class for a hex digit: [0-9a-fA-F]
+//
+//   RFC 2616: ^\X+$
+//        IE7: ^\X+[^\X]*$
+// Safari 3.1: ^[\t\r ]*\X+[\t ]*$
+//  Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
+// Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
+//
+// Our strategy is to be as strict as possible, while not breaking
+// known sites.
+//
+//  Chromium: ^\X+[ ]*$
 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) {
+  DCHECK(len >= 0);
+
+  // Strip trailing spaces
+  while (len && start[len - 1] == ' ')
+    len--;
+
   // Be more restrictive than HexStringToInt;
   // don't allow inputs with leading "-", "+", "0x", "0X"
   if (StringPiece(start, len).find_first_not_of("0123456789abcdefABCDEF")!=
diff --git a/net/http/http_chunked_decoder_unittest.cc b/net/http/http_chunked_decoder_unittest.cc
index 36433c0..1c44b98 100644
--- a/net/http/http_chunked_decoder_unittest.cc
+++ b/net/http/http_chunked_decoder_unittest.cc
@@ -28,6 +28,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "base/basictypes.h"
+#include "net/base/net_errors.h"
 #include "net/http/http_chunked_decoder.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
@@ -64,6 +65,7 @@ void RunTestUntilFailure(const char* inputs[], size_t num_inputs, size_t fail_in
     std::string input = inputs[i];
     int n = decoder.FilterBuf(&input[0], static_cast<int>(input.size()));
     if (n < 0) {
+      EXPECT_EQ(net::ERR_BAD_CHUNKED_ENCODING, n);
       EXPECT_EQ(fail_index, i);
       return;
     }
@@ -114,6 +116,19 @@ TEST(HttpChunkedDecoderTest, Incremental) {
   RunTest(inputs, arraysize(inputs), "hello", true);
 }
 
+TEST(HttpChunkedDecoderTest, LF_InsteadOf_CRLF) {
+  // Compatibility: [RFC 2616 - Invalid]
+  // {Firefox3} - Valid
+  // {IE7, Safari3.1, Opera9.51} - Invalid
+  const char* inputs[] = {
+    "5\nhello\n",
+    "1\n \n",
+    "5\nworld\n",
+    "0\n\n"
+  };
+  RunTest(inputs, arraysize(inputs), "hello world", true);
+}
+
 TEST(HttpChunkedDecoderTest, Extensions) {
   const char* inputs[] = {
     "5;x=0\r\nhello\r\n",
@@ -142,24 +157,88 @@ TEST(HttpChunkedDecoderTest, TrailersUnfinished) {
   RunTest(inputs, arraysize(inputs), "hello", false);
 }
 
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_TooBig) {
+  const char* inputs[] = {
+    // This chunked body is not terminated.
+    // However we will fail decoding because the chunk-size
+    // number is larger than we can handle.
+    "48469410265455838241\r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 0);
+}
+
 TEST(HttpChunkedDecoderTest, InvalidChunkSize_0X) {
   const char* inputs[] = {
+    // Compatibility [RFC 2616 - Invalid]:
+    // {Safari3.1, IE7} - Invalid
+    // {Firefox3, Opera 9.51} - Valid
     "0x5\r\nhello\r\n",
     "0\r\n\r\n"
   };
   RunTestUntilFailure(inputs, arraysize(inputs), 0);
 }
 
-TEST(HttpChunkedDecoderTest, InvalidChunkSize_TrailingWhitespace) {
+TEST(HttpChunkedDecoderTest, ChunkSize_TrailingSpace) {
+  const char* inputs[] = {
+    // Compatibility [RFC 2616 - Invalid]:
+    // {IE7, Safari3.1, Firefox3, Opera 9.51} - Valid
+    //
+    // At least yahoo.com depends on this being valid.
+    "5      \r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTest(inputs, arraysize(inputs), "hello", true);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_TrailingTab) {
+  const char* inputs[] = {
+    // Compatibility [RFC 2616 - Invalid]:
+    // {IE7, Safari3.1, Firefox3, Opera 9.51} - Valid
+    "5\t\r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 0);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_TrailingFormFeed) {
+  const char* inputs[] = {
+    // Compatibility [RFC 2616- Invalid]:
+    // {Safari3.1} - Invalid
+    // {IE7, Firefox3, Opera 9.51} - Valid
+    "5\f\r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 0);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_TrailingVerticalTab) {
+  const char* inputs[] = {
+    // Compatibility [RFC 2616 - Invalid]:
+    // {Safari 3.1} - Invalid
+    // {IE7, Firefox3, Opera 9.51} - Valid
+    "5\v\r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 0);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_TrailingNonHexDigit) {
   const char* inputs[] = {
-    "5 \r\nhello\r\n",
+    // Compatibility [RFC 2616 - Invalid]:
+    // {Safari 3.1} - Invalid
+    // {IE7, Firefox3, Opera 9.51} - Valid
+    "5H\r\nhello\r\n",
     "0\r\n\r\n"
   };
   RunTestUntilFailure(inputs, arraysize(inputs), 0);
 }
 
-TEST(HttpChunkedDecoderTest, InvalidChunkSize_LeadingWhitespace) {
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_LeadingSpace) {
   const char* inputs[] = {
+    // Compatibility [RFC 2616 - Invalid]:
+    // {IE7} - Invalid
+    // {Safari 3.1, Firefox3, Opera 9.51} - Valid
     " 5\r\nhello\r\n",
     "0\r\n\r\n"
   };
@@ -193,6 +272,9 @@ TEST(HttpChunkedDecoderTest, InvalidChunkSize_Negative) {
 
 TEST(HttpChunkedDecoderTest, InvalidChunkSize_Plus) {
   const char* inputs[] = {
+    // Compatibility [RFC 2616 - Invalid]:
+    // {IE7, Safari 3.1} - Invalid
+    // {Firefox3, Opera 9.51} - Valid
     "+5\r\nhello\r\n",
     "0\r\n\r\n"
   };
author	ericroman@google.com <ericroman@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2008-08-15 21:06:12 +0000
committer	ericroman@google.com <ericroman@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2008-08-15 21:06:12 +0000
commit	e05294e5feca88329e5d795bc927da82cd97656a (patch)
tree	cae091cc7166bd3661c88872021f938657c1c699 /net
parent	a88d601f7f632a21afe88359d503559fa20d9e40 (diff)
download	chromium_src-e05294e5feca88329e5d795bc927da82cd97656a.zip chromium_src-e05294e5feca88329e5d795bc927da82cd97656a.tar.gz chromium_src-e05294e5feca88329e5d795bc927da82cd97656a.tar.bz2