summaryrefslogtreecommitdiffstats
path: root/webkit/child/multipart_response_delegate.cc
diff options
context:
space:
mode:
Diffstat (limited to 'webkit/child/multipart_response_delegate.cc')
-rw-r--r--webkit/child/multipart_response_delegate.cc404
1 files changed, 404 insertions, 0 deletions
diff --git a/webkit/child/multipart_response_delegate.cc b/webkit/child/multipart_response_delegate.cc
new file mode 100644
index 0000000..06af916
--- /dev/null
+++ b/webkit/child/multipart_response_delegate.cc
@@ -0,0 +1,404 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "webkit/child/multipart_response_delegate.h"
+
+#include "base/logging.h"
+#include "base/strings/string_number_conversions.h"
+#include "base/strings/string_util.h"
+#include "net/base/net_util.h"
+#include "net/http/http_util.h"
+#include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"
+#include "third_party/WebKit/public/platform/WebString.h"
+#include "third_party/WebKit/public/platform/WebURL.h"
+#include "third_party/WebKit/public/platform/WebURLLoaderClient.h"
+
+using WebKit::WebHTTPHeaderVisitor;
+using WebKit::WebString;
+using WebKit::WebURLLoader;
+using WebKit::WebURLLoaderClient;
+using WebKit::WebURLResponse;
+
+namespace webkit_glue {
+
+namespace {
+
+// The list of response headers that we do not copy from the original
+// response when generating a WebURLResponse for a MIME payload.
+const char* kReplaceHeaders[] = {
+ "content-type",
+ "content-length",
+ "content-disposition",
+ "content-range",
+ "range",
+ "set-cookie"
+};
+
+class HeaderCopier : public WebHTTPHeaderVisitor {
+ public:
+ HeaderCopier(WebURLResponse* response)
+ : response_(response) {
+ }
+ virtual void visitHeader(const WebString& name, const WebString& value) {
+ const std::string& name_utf8 = name.utf8();
+ for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
+ if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i]))
+ return;
+ }
+ response_->setHTTPHeaderField(name, value);
+ }
+ private:
+ WebURLResponse* response_;
+};
+
+} // namespace
+
+MultipartResponseDelegate::MultipartResponseDelegate(
+ WebURLLoaderClient* client,
+ WebURLLoader* loader,
+ const WebURLResponse& response,
+ const std::string& boundary)
+ : client_(client),
+ loader_(loader),
+ original_response_(response),
+ encoded_data_length_(0),
+ boundary_("--"),
+ first_received_data_(true),
+ processing_headers_(false),
+ stop_sending_(false),
+ has_sent_first_response_(false) {
+ // Some servers report a boundary prefixed with "--". See bug 5786.
+ if (StartsWithASCII(boundary, "--", true)) {
+ boundary_.assign(boundary);
+ } else {
+ boundary_.append(boundary);
+ }
+}
+
+void MultipartResponseDelegate::OnReceivedData(const char* data,
+ int data_len,
+ int encoded_data_length) {
+ // stop_sending_ means that we've already received the final boundary token.
+ // The server should stop sending us data at this point, but if it does, we
+ // just throw it away.
+ if (stop_sending_)
+ return;
+
+ data_.append(data, data_len);
+ encoded_data_length_ += encoded_data_length;
+ if (first_received_data_) {
+ // Some servers don't send a boundary token before the first chunk of
+ // data. We handle this case anyway (Gecko does too).
+ first_received_data_ = false;
+
+ // Eat leading \r\n
+ int pos = PushOverLine(data_, 0);
+ if (pos)
+ data_ = data_.substr(pos);
+
+ if (data_.length() < boundary_.length() + 2) {
+ // We don't have enough data yet to make a boundary token. Just wait
+ // until the next chunk of data arrives.
+ first_received_data_ = true;
+ return;
+ }
+
+ if (0 != data_.compare(0, boundary_.length(), boundary_)) {
+ data_ = boundary_ + "\n" + data_;
+ }
+ }
+ DCHECK(!first_received_data_);
+
+ // Headers
+ if (processing_headers_) {
+ // Eat leading \r\n
+ int pos = PushOverLine(data_, 0);
+ if (pos)
+ data_ = data_.substr(pos);
+
+ if (ParseHeaders()) {
+ // Successfully parsed headers.
+ processing_headers_ = false;
+ } else {
+ // Get more data before trying again.
+ return;
+ }
+ }
+ DCHECK(!processing_headers_);
+
+ size_t boundary_pos;
+ while ((boundary_pos = FindBoundary()) != std::string::npos) {
+ if (client_) {
+ // Strip out trailing \n\r characters in the buffer preceding the
+ // boundary on the same lines as Firefox.
+ size_t data_length = boundary_pos;
+ if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') {
+ data_length--;
+ if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') {
+ data_length--;
+ }
+ }
+ if (data_length > 0) {
+ // Send the last data chunk.
+ client_->didReceiveData(loader_,
+ data_.data(),
+ static_cast<int>(data_length),
+ encoded_data_length_);
+ encoded_data_length_ = 0;
+ }
+ }
+ size_t boundary_end_pos = boundary_pos + boundary_.length();
+ if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) {
+ // This was the last boundary so we can stop processing.
+ stop_sending_ = true;
+ data_.clear();
+ return;
+ }
+
+ // We can now throw out data up through the boundary
+ int offset = PushOverLine(data_, boundary_end_pos);
+ data_ = data_.substr(boundary_end_pos + offset);
+
+ // Ok, back to parsing headers
+ if (!ParseHeaders()) {
+ processing_headers_ = true;
+ break;
+ }
+ }
+
+ // At this point, we should send over any data we have, but keep enough data
+ // buffered to handle a boundary that may have been truncated.
+ if (!processing_headers_ && data_.length() > boundary_.length()) {
+ // If the last character is a new line character, go ahead and just send
+ // everything we have buffered. This matches an optimization in Gecko.
+ int send_length = data_.length() - boundary_.length();
+ if (data_[data_.length() - 1] == '\n')
+ send_length = data_.length();
+ if (client_)
+ client_->didReceiveData(loader_,
+ data_.data(),
+ send_length,
+ encoded_data_length_);
+ data_ = data_.substr(send_length);
+ encoded_data_length_ = 0;
+ }
+}
+
+void MultipartResponseDelegate::OnCompletedRequest() {
+ // If we have any pending data and we're not in a header, go ahead and send
+ // it to WebCore.
+ if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) {
+ client_->didReceiveData(loader_,
+ data_.data(),
+ static_cast<int>(data_.length()),
+ encoded_data_length_);
+ encoded_data_length_ = 0;
+ }
+}
+
+int MultipartResponseDelegate::PushOverLine(const std::string& data,
+ size_t pos) {
+ int offset = 0;
+ if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) {
+ ++offset;
+ if (pos + 1 < data.length() && data[pos + 1] == '\n')
+ ++offset;
+ }
+ return offset;
+}
+
+bool MultipartResponseDelegate::ParseHeaders() {
+ int line_feed_increment = 1;
+
+ // Grab the headers being liberal about line endings.
+ size_t line_start_pos = 0;
+ size_t line_end_pos = data_.find('\n');
+ while (line_end_pos != std::string::npos) {
+ // Handle CRLF
+ if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') {
+ line_feed_increment = 2;
+ --line_end_pos;
+ } else {
+ line_feed_increment = 1;
+ }
+ if (line_start_pos == line_end_pos) {
+ // A blank line, end of headers
+ line_end_pos += line_feed_increment;
+ break;
+ }
+ // Find the next header line.
+ line_start_pos = line_end_pos + line_feed_increment;
+ line_end_pos = data_.find('\n', line_start_pos);
+ }
+ // Truncated in the middle of a header, stop parsing.
+ if (line_end_pos == std::string::npos)
+ return false;
+
+ // Eat headers
+ std::string headers("\n");
+ headers.append(data_, 0, line_end_pos);
+ data_ = data_.substr(line_end_pos);
+
+ // Create a WebURLResponse based on the original set of headers + the
+ // replacement headers. We only replace the same few headers that gecko
+ // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
+ std::string content_type = net::GetSpecificHeader(headers, "content-type");
+ std::string mime_type;
+ std::string charset;
+ bool has_charset = false;
+ net::HttpUtil::ParseContentType(content_type, &mime_type, &charset,
+ &has_charset, NULL);
+ WebURLResponse response(original_response_.url());
+ response.setMIMEType(WebString::fromUTF8(mime_type));
+ response.setTextEncodingName(WebString::fromUTF8(charset));
+
+ HeaderCopier copier(&response);
+ original_response_.visitHTTPHeaderFields(&copier);
+
+ for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
+ std::string name(kReplaceHeaders[i]);
+ std::string value = net::GetSpecificHeader(headers, name);
+ if (!value.empty()) {
+ response.setHTTPHeaderField(WebString::fromUTF8(name),
+ WebString::fromUTF8(value));
+ }
+ }
+ // To avoid recording every multipart load as a separate visit in
+ // the history database, we want to keep track of whether the response
+ // is part of a multipart payload. We do want to record the first visit,
+ // so we only set isMultipartPayload to true after the first visit.
+ response.setIsMultipartPayload(has_sent_first_response_);
+ has_sent_first_response_ = true;
+ // Send the response!
+ if (client_)
+ client_->didReceiveResponse(loader_, response);
+
+ return true;
+}
+
+// Boundaries are supposed to be preceeded with --, but it looks like gecko
+// doesn't require the dashes to exist. See nsMultiMixedConv::FindToken.
+size_t MultipartResponseDelegate::FindBoundary() {
+ size_t boundary_pos = data_.find(boundary_);
+ if (boundary_pos != std::string::npos) {
+ // Back up over -- for backwards compat
+ // TODO(tc): Don't we only want to do this once? Gecko code doesn't seem
+ // to care.
+ if (boundary_pos >= 2) {
+ if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) {
+ boundary_pos -= 2;
+ boundary_ = "--" + boundary_;
+ }
+ }
+ }
+ return boundary_pos;
+}
+
+bool MultipartResponseDelegate::ReadMultipartBoundary(
+ const WebURLResponse& response,
+ std::string* multipart_boundary) {
+ std::string content_type =
+ response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8();
+
+ size_t boundary_start_offset = content_type.find("boundary=");
+ if (boundary_start_offset == std::string::npos)
+ return false;
+
+ boundary_start_offset += strlen("boundary=");
+
+ size_t boundary_end_offset = content_type.find(';', boundary_start_offset);
+
+ if (boundary_end_offset == std::string::npos)
+ boundary_end_offset = content_type.length();
+
+ size_t boundary_length = boundary_end_offset - boundary_start_offset;
+
+ *multipart_boundary =
+ content_type.substr(boundary_start_offset, boundary_length);
+ // The byte range response can have quoted boundary strings. This is legal
+ // as per MIME specifications. Individual data fragements however don't
+ // contain quoted boundary strings.
+ TrimString(*multipart_boundary, "\"", multipart_boundary);
+ return true;
+}
+
+bool MultipartResponseDelegate::ReadContentRanges(
+ const WebURLResponse& response,
+ int64* content_range_lower_bound,
+ int64* content_range_upper_bound,
+ int64* content_range_instance_size) {
+
+ std::string content_range = response.httpHeaderField("Content-Range").utf8();
+ if (content_range.empty()) {
+ content_range = response.httpHeaderField("Range").utf8();
+ }
+
+ if (content_range.empty()) {
+ DLOG(WARNING) << "Failed to read content range from response.";
+ return false;
+ }
+
+ size_t byte_range_lower_bound_start_offset = content_range.find(" ");
+ if (byte_range_lower_bound_start_offset == std::string::npos) {
+ return false;
+ }
+
+ // Skip over the initial space.
+ byte_range_lower_bound_start_offset++;
+
+ // Find the lower bound.
+ size_t byte_range_lower_bound_end_offset =
+ content_range.find("-", byte_range_lower_bound_start_offset);
+ if (byte_range_lower_bound_end_offset == std::string::npos) {
+ return false;
+ }
+
+ size_t byte_range_lower_bound_characters =
+ byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset;
+ std::string byte_range_lower_bound =
+ content_range.substr(byte_range_lower_bound_start_offset,
+ byte_range_lower_bound_characters);
+
+ // Find the upper bound.
+ size_t byte_range_upper_bound_start_offset =
+ byte_range_lower_bound_end_offset + 1;
+
+ size_t byte_range_upper_bound_end_offset =
+ content_range.find("/", byte_range_upper_bound_start_offset);
+ if (byte_range_upper_bound_end_offset == std::string::npos) {
+ return false;
+ }
+
+ size_t byte_range_upper_bound_characters =
+ byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset;
+ std::string byte_range_upper_bound =
+ content_range.substr(byte_range_upper_bound_start_offset,
+ byte_range_upper_bound_characters);
+
+ // Find the instance size.
+ size_t byte_range_instance_size_start_offset =
+ byte_range_upper_bound_end_offset + 1;
+
+ size_t byte_range_instance_size_end_offset =
+ content_range.length();
+
+ size_t byte_range_instance_size_characters =
+ byte_range_instance_size_end_offset -
+ byte_range_instance_size_start_offset;
+ std::string byte_range_instance_size =
+ content_range.substr(byte_range_instance_size_start_offset,
+ byte_range_instance_size_characters);
+
+ if (!base::StringToInt64(byte_range_lower_bound, content_range_lower_bound))
+ return false;
+ if (!base::StringToInt64(byte_range_upper_bound, content_range_upper_bound))
+ return false;
+ if (!base::StringToInt64(byte_range_instance_size,
+ content_range_instance_size)) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace webkit_glue