Address some issues I found in chunked decoder:

(1) stricter parsing of chunk-size (don't allow leading/trailing whitespace, redundant "+" sign, leading "0x") (2) check for negative chunk sizes, and fail early rather than hitting weird stuff (3) don't mutate the const char* returned by std::string::data() (4) fail if CRLF terminator is missing on chunk-data. (why the spec has this in first place seems unecessary, since the chunk-size already tells the story...) (5) don't allow empty CRLFs git-svn-id: svn://svn.chromium.org/chrome/trunk/src@853 0039d316-1c4b-4281-b951-d872f2087c98
author: ericroman@google.com <ericroman@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2008-08-14 02:44:45 +0000
committer: ericroman@google.com <ericroman@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2008-08-14 02:44:45 +0000
commit: d89222accb371e210c434b27787fc80c30fa758d (patch)
tree: 19c518623c133f3a70b7549017db7e3c31b929bc /net
parent: 293c0a7b4306a4cce26eb31b21587206c2d9a497 (diff)
download: chromium_src-d89222accb371e210c434b27787fc80c30fa758d.zip
chromium_src-d89222accb371e210c434b27787fc80c30fa758d.tar.gz
chromium_src-d89222accb371e210c434b27787fc80c30fa758d.tar.bz2
3 files changed, 153 insertions, 26 deletions
diff --git a/net/http/http_chunked_decoder.cc b/net/http/http_chunked_decoder.cc
index 059d792..9c0bbcb 100644
--- a/net/http/http_chunked_decoder.cc
+++ b/net/http/http_chunked_decoder.cc
@@ -41,12 +41,15 @@
 #include "net/http/http_chunked_decoder.h"
 
 #include "base/logging.h"
+#include "base/string_piece.h"
+#include "base/string_util.h"
 #include "net/base/net_errors.h"
 
 namespace net {
 
 HttpChunkedDecoder::HttpChunkedDecoder()
     : chunk_remaining_(0),
+      chunk_terminator_remaining_(false),
       reached_last_chunk_(false),
       reached_eof_(false) {
 }
@@ -63,66 +66,81 @@ int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
 
       result += num;
       buf += num;
+
+      // After each chunk's data there should be a CRLF
+      if (!chunk_remaining_)
+        chunk_terminator_remaining_ = true;
       continue;
     } else if (reached_eof_) {
       break;  // Done!
     }
 
-    // Returns bytes consumed or an error code.
-    int rv = ScanForChunkRemaining(buf, buf_len);
-    if (rv < 0)
-      return rv;
+    int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
+    if (bytes_consumed < 0)
+      return bytes_consumed; // Error
 
-    buf_len -= rv;
+    buf_len -= bytes_consumed;
     if (buf_len)
-      memmove(buf, buf + rv, buf_len);
+      memmove(buf, buf + bytes_consumed, buf_len);
   }
 
   return result;
 }
 
-int HttpChunkedDecoder::ScanForChunkRemaining(char* buf, int buf_len) {
+int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
   DCHECK(chunk_remaining_ == 0);
   DCHECK(buf_len > 0);
 
-  int result = 0;
+  int bytes_consumed = 0;
 
-  char *p = static_cast<char*>(memchr(buf, '\n', buf_len));
-  if (p) {
-    *p = 0;
-    if ((p > buf) && (*(p - 1) == '\r'))  // Eliminate a preceding CR.
-      *(p - 1) = 0;
-    result = static_cast<int>(p - buf) + 1;
+  size_t index_of_lf = StringPiece(buf, buf_len).find('\n');
+  if (index_of_lf != StringPiece::npos) {
+    buf_len = static_cast<int>(index_of_lf);
+    if (buf_len && buf[buf_len - 1] == '\r')  // Eliminate a preceding CR.
+      buf_len--;
+    bytes_consumed = static_cast<int>(index_of_lf) + 1;
 
     // Make buf point to the full line buffer to parse.
     if (!line_buf_.empty()) {
-      line_buf_.append(buf);
-      buf = const_cast<char*>(line_buf_.data());
+      line_buf_.append(buf, buf_len);
+      buf = line_buf_.data();
+      buf_len = static_cast<int>(line_buf_.size());
     }
 
     if (reached_last_chunk_) {
-      if (*buf) {
+      if (buf_len) {
         DLOG(INFO) << "ignoring http trailer";
       } else {
         reached_eof_ = true;
       }
-    } else if (*buf) {
+    } else if (chunk_terminator_remaining_) {
+       if (buf_len) {
+         DLOG(ERROR) << "chunk data not terminated properly";
+         return ERR_FAILED;
+       }
+       chunk_terminator_remaining_ = false;
+    } else if (buf_len) {
       // Ignore any chunk-extensions.
-      if ((p = strchr(buf, ';')) != NULL)
-        *p = 0;
+      size_t index_of_semicolon = StringPiece(buf, buf_len).find(';');
+      if (index_of_semicolon != StringPiece::npos)
+        buf_len = static_cast<int>(index_of_semicolon);
 
-      if (!sscanf_s(buf, "%x", &chunk_remaining_)) {
-        DLOG(ERROR) << "sscanf failed parsing HEX from: " << buf;
+      if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
+          DLOG(ERROR) << "Failed parsing HEX from: " <<
+              std::string(buf, buf_len);
         return ERR_FAILED;
       }
 
       if (chunk_remaining_ == 0)
         reached_last_chunk_ = true;
-    }
+    } else {
+      DLOG(ERROR) << "missing chunk-size";
+      return ERR_FAILED;
+    } 
     line_buf_.clear();
   } else {
     // Save the partial line; wait for more data.
-    result = buf_len;
+    bytes_consumed = buf_len;
 
     // Ignore a trailing CR
     if (buf[buf_len - 1] == '\r')
@@ -130,7 +148,17 @@ int HttpChunkedDecoder::ScanForChunkRemaining(char* buf, int buf_len) {
 
     line_buf_.append(buf, buf_len);
   }
-  return result;
+  return bytes_consumed;
+}
+
+// static
+bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) {
+  // Be more restrictive than HexStringToInt;
+  // don't allow inputs with leading "-", "+", "0x", "0X"
+  if (StringPiece(start, len).find_first_not_of("0123456789abcdefABCDEF")!=
+      StringPiece::npos)
+    return false;
+  return HexStringToInt(std::string(start, len), out);
 }
 
 }  // namespace net
diff --git a/net/http/http_chunked_decoder.h b/net/http/http_chunked_decoder.h
index 799ebfd..970d1d1 100644
--- a/net/http/http_chunked_decoder.h
+++ b/net/http/http_chunked_decoder.h
@@ -87,11 +87,19 @@ class HttpChunkedDecoder {
   // Scan |buf| for the next chunk delimiter.  This method returns the number
   // of bytes consumed from |buf|.  If found, |chunk_remaining_| holds the
   // value for the next chunk size.
-  int ScanForChunkRemaining(char* buf, int buf_len);
+  int ScanForChunkRemaining(const char* buf, int buf_len);
+
+  // Convert string |start| of length |len| to a numeric value.
+  // |start| is a string of type "chunk-size" (hex string).
+  // If the conversion succeeds, return true and place the result in |out|.
+  static bool ParseChunkSize(const char* start, int len, int* out); 
 
   // Indicates the number of bytes remaining for the current chunk.
   int chunk_remaining_;
 
+  // True if waiting for the terminal CRLF of a chunk's data.
+  bool chunk_terminator_remaining_;
+
   // A small buffer used to store a partial chunk marker.
   std::string line_buf_;
 
diff --git a/net/http/http_chunked_decoder_unittest.cc b/net/http/http_chunked_decoder_unittest.cc
index 377f1b1..36433c0 100644
--- a/net/http/http_chunked_decoder_unittest.cc
+++ b/net/http/http_chunked_decoder_unittest.cc
@@ -55,6 +55,22 @@ void RunTest(const char* inputs[], size_t num_inputs,
   EXPECT_TRUE(decoder.reached_eof() == expected_eof);
 }
 
+// Feed the inputs to the decoder, until it returns an error.
+void RunTestUntilFailure(const char* inputs[], size_t num_inputs, size_t fail_index) {
+  net::HttpChunkedDecoder decoder;
+  EXPECT_FALSE(decoder.reached_eof());
+
+  for (size_t i = 0; i < num_inputs; ++i) {
+    std::string input = inputs[i];
+    int n = decoder.FilterBuf(&input[0], static_cast<int>(input.size()));
+    if (n < 0) {
+      EXPECT_EQ(fail_index, i);
+      return;
+    }
+  }
+  FAIL(); // We should have failed on the i'th iteration of the loop.
+}
+
 }  // namespace
 
 TEST(HttpChunkedDecoderTest, Basic) {
@@ -116,3 +132,78 @@ TEST(HttpChunkedDecoderTest, Trailers) {
   };
   RunTest(inputs, arraysize(inputs), "hello", true);
 }
+
+TEST(HttpChunkedDecoderTest, TrailersUnfinished) {
+  const char* inputs[] = {
+    "5\r\nhello\r\n",
+    "0\r\n",
+    "Foo: 1\r\n"
+  };
+  RunTest(inputs, arraysize(inputs), "hello", false);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_0X) {
+  const char* inputs[] = {
+    "0x5\r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 0);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_TrailingWhitespace) {
+  const char* inputs[] = {
+    "5 \r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 0);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_LeadingWhitespace) {
+  const char* inputs[] = {
+    " 5\r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 0);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidLeadingSeparator) {
+  const char* inputs[] = {
+    "\r\n5\r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 0);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_NoSeparator) {
+  const char* inputs[] = {
+    "5\r\nhello",
+    "1\r\n \r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 1);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_Negative) {
+  const char* inputs[] = {
+    "8\r\n12345678\r\n-5\r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 0);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidChunkSize_Plus) {
+  const char* inputs[] = {
+    "+5\r\nhello\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 0);
+}
+
+TEST(HttpChunkedDecoderTest, InvalidConsecutiveCRLFs) {
+  const char* inputs[] = {
+    "5\r\nhello\r\n",
+    "\r\n\r\n\r\n\r\n",
+    "0\r\n\r\n"
+  };
+  RunTestUntilFailure(inputs, arraysize(inputs), 1);
+}
+\ No newline at end of file
author	ericroman@google.com <ericroman@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2008-08-14 02:44:45 +0000
committer	ericroman@google.com <ericroman@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2008-08-14 02:44:45 +0000
commit	d89222accb371e210c434b27787fc80c30fa758d (patch)
tree	19c518623c133f3a70b7549017db7e3c31b929bc /net
parent	293c0a7b4306a4cce26eb31b21587206c2d9a497 (diff)
download	chromium_src-d89222accb371e210c434b27787fc80c30fa758d.zip chromium_src-d89222accb371e210c434b27787fc80c30fa758d.tar.gz chromium_src-d89222accb371e210c434b27787fc80c30fa758d.tar.bz2