diff options
Diffstat (limited to 'webkit/child/multipart_response_delegate.cc')
-rw-r--r-- | webkit/child/multipart_response_delegate.cc | 404 |
1 files changed, 404 insertions, 0 deletions
diff --git a/webkit/child/multipart_response_delegate.cc b/webkit/child/multipart_response_delegate.cc new file mode 100644 index 0000000..06af916 --- /dev/null +++ b/webkit/child/multipart_response_delegate.cc @@ -0,0 +1,404 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "webkit/child/multipart_response_delegate.h" + +#include "base/logging.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/string_util.h" +#include "net/base/net_util.h" +#include "net/http/http_util.h" +#include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h" +#include "third_party/WebKit/public/platform/WebString.h" +#include "third_party/WebKit/public/platform/WebURL.h" +#include "third_party/WebKit/public/platform/WebURLLoaderClient.h" + +using WebKit::WebHTTPHeaderVisitor; +using WebKit::WebString; +using WebKit::WebURLLoader; +using WebKit::WebURLLoaderClient; +using WebKit::WebURLResponse; + +namespace webkit_glue { + +namespace { + +// The list of response headers that we do not copy from the original +// response when generating a WebURLResponse for a MIME payload. +const char* kReplaceHeaders[] = { + "content-type", + "content-length", + "content-disposition", + "content-range", + "range", + "set-cookie" +}; + +class HeaderCopier : public WebHTTPHeaderVisitor { + public: + HeaderCopier(WebURLResponse* response) + : response_(response) { + } + virtual void visitHeader(const WebString& name, const WebString& value) { + const std::string& name_utf8 = name.utf8(); + for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) { + if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i])) + return; + } + response_->setHTTPHeaderField(name, value); + } + private: + WebURLResponse* response_; +}; + +} // namespace + +MultipartResponseDelegate::MultipartResponseDelegate( + WebURLLoaderClient* client, + WebURLLoader* loader, + const WebURLResponse& response, + const std::string& boundary) + : client_(client), + loader_(loader), + original_response_(response), + encoded_data_length_(0), + boundary_("--"), + first_received_data_(true), + processing_headers_(false), + stop_sending_(false), + has_sent_first_response_(false) { + // Some servers report a boundary prefixed with "--". See bug 5786. + if (StartsWithASCII(boundary, "--", true)) { + boundary_.assign(boundary); + } else { + boundary_.append(boundary); + } +} + +void MultipartResponseDelegate::OnReceivedData(const char* data, + int data_len, + int encoded_data_length) { + // stop_sending_ means that we've already received the final boundary token. + // The server should stop sending us data at this point, but if it does, we + // just throw it away. + if (stop_sending_) + return; + + data_.append(data, data_len); + encoded_data_length_ += encoded_data_length; + if (first_received_data_) { + // Some servers don't send a boundary token before the first chunk of + // data. We handle this case anyway (Gecko does too). + first_received_data_ = false; + + // Eat leading \r\n + int pos = PushOverLine(data_, 0); + if (pos) + data_ = data_.substr(pos); + + if (data_.length() < boundary_.length() + 2) { + // We don't have enough data yet to make a boundary token. Just wait + // until the next chunk of data arrives. + first_received_data_ = true; + return; + } + + if (0 != data_.compare(0, boundary_.length(), boundary_)) { + data_ = boundary_ + "\n" + data_; + } + } + DCHECK(!first_received_data_); + + // Headers + if (processing_headers_) { + // Eat leading \r\n + int pos = PushOverLine(data_, 0); + if (pos) + data_ = data_.substr(pos); + + if (ParseHeaders()) { + // Successfully parsed headers. + processing_headers_ = false; + } else { + // Get more data before trying again. + return; + } + } + DCHECK(!processing_headers_); + + size_t boundary_pos; + while ((boundary_pos = FindBoundary()) != std::string::npos) { + if (client_) { + // Strip out trailing \n\r characters in the buffer preceding the + // boundary on the same lines as Firefox. + size_t data_length = boundary_pos; + if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') { + data_length--; + if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') { + data_length--; + } + } + if (data_length > 0) { + // Send the last data chunk. + client_->didReceiveData(loader_, + data_.data(), + static_cast<int>(data_length), + encoded_data_length_); + encoded_data_length_ = 0; + } + } + size_t boundary_end_pos = boundary_pos + boundary_.length(); + if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) { + // This was the last boundary so we can stop processing. + stop_sending_ = true; + data_.clear(); + return; + } + + // We can now throw out data up through the boundary + int offset = PushOverLine(data_, boundary_end_pos); + data_ = data_.substr(boundary_end_pos + offset); + + // Ok, back to parsing headers + if (!ParseHeaders()) { + processing_headers_ = true; + break; + } + } + + // At this point, we should send over any data we have, but keep enough data + // buffered to handle a boundary that may have been truncated. + if (!processing_headers_ && data_.length() > boundary_.length()) { + // If the last character is a new line character, go ahead and just send + // everything we have buffered. This matches an optimization in Gecko. + int send_length = data_.length() - boundary_.length(); + if (data_[data_.length() - 1] == '\n') + send_length = data_.length(); + if (client_) + client_->didReceiveData(loader_, + data_.data(), + send_length, + encoded_data_length_); + data_ = data_.substr(send_length); + encoded_data_length_ = 0; + } +} + +void MultipartResponseDelegate::OnCompletedRequest() { + // If we have any pending data and we're not in a header, go ahead and send + // it to WebCore. + if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) { + client_->didReceiveData(loader_, + data_.data(), + static_cast<int>(data_.length()), + encoded_data_length_); + encoded_data_length_ = 0; + } +} + +int MultipartResponseDelegate::PushOverLine(const std::string& data, + size_t pos) { + int offset = 0; + if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) { + ++offset; + if (pos + 1 < data.length() && data[pos + 1] == '\n') + ++offset; + } + return offset; +} + +bool MultipartResponseDelegate::ParseHeaders() { + int line_feed_increment = 1; + + // Grab the headers being liberal about line endings. + size_t line_start_pos = 0; + size_t line_end_pos = data_.find('\n'); + while (line_end_pos != std::string::npos) { + // Handle CRLF + if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') { + line_feed_increment = 2; + --line_end_pos; + } else { + line_feed_increment = 1; + } + if (line_start_pos == line_end_pos) { + // A blank line, end of headers + line_end_pos += line_feed_increment; + break; + } + // Find the next header line. + line_start_pos = line_end_pos + line_feed_increment; + line_end_pos = data_.find('\n', line_start_pos); + } + // Truncated in the middle of a header, stop parsing. + if (line_end_pos == std::string::npos) + return false; + + // Eat headers + std::string headers("\n"); + headers.append(data_, 0, line_end_pos); + data_ = data_.substr(line_end_pos); + + // Create a WebURLResponse based on the original set of headers + the + // replacement headers. We only replace the same few headers that gecko + // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp. + std::string content_type = net::GetSpecificHeader(headers, "content-type"); + std::string mime_type; + std::string charset; + bool has_charset = false; + net::HttpUtil::ParseContentType(content_type, &mime_type, &charset, + &has_charset, NULL); + WebURLResponse response(original_response_.url()); + response.setMIMEType(WebString::fromUTF8(mime_type)); + response.setTextEncodingName(WebString::fromUTF8(charset)); + + HeaderCopier copier(&response); + original_response_.visitHTTPHeaderFields(&copier); + + for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) { + std::string name(kReplaceHeaders[i]); + std::string value = net::GetSpecificHeader(headers, name); + if (!value.empty()) { + response.setHTTPHeaderField(WebString::fromUTF8(name), + WebString::fromUTF8(value)); + } + } + // To avoid recording every multipart load as a separate visit in + // the history database, we want to keep track of whether the response + // is part of a multipart payload. We do want to record the first visit, + // so we only set isMultipartPayload to true after the first visit. + response.setIsMultipartPayload(has_sent_first_response_); + has_sent_first_response_ = true; + // Send the response! + if (client_) + client_->didReceiveResponse(loader_, response); + + return true; +} + +// Boundaries are supposed to be preceeded with --, but it looks like gecko +// doesn't require the dashes to exist. See nsMultiMixedConv::FindToken. +size_t MultipartResponseDelegate::FindBoundary() { + size_t boundary_pos = data_.find(boundary_); + if (boundary_pos != std::string::npos) { + // Back up over -- for backwards compat + // TODO(tc): Don't we only want to do this once? Gecko code doesn't seem + // to care. + if (boundary_pos >= 2) { + if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) { + boundary_pos -= 2; + boundary_ = "--" + boundary_; + } + } + } + return boundary_pos; +} + +bool MultipartResponseDelegate::ReadMultipartBoundary( + const WebURLResponse& response, + std::string* multipart_boundary) { + std::string content_type = + response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8(); + + size_t boundary_start_offset = content_type.find("boundary="); + if (boundary_start_offset == std::string::npos) + return false; + + boundary_start_offset += strlen("boundary="); + + size_t boundary_end_offset = content_type.find(';', boundary_start_offset); + + if (boundary_end_offset == std::string::npos) + boundary_end_offset = content_type.length(); + + size_t boundary_length = boundary_end_offset - boundary_start_offset; + + *multipart_boundary = + content_type.substr(boundary_start_offset, boundary_length); + // The byte range response can have quoted boundary strings. This is legal + // as per MIME specifications. Individual data fragements however don't + // contain quoted boundary strings. + TrimString(*multipart_boundary, "\"", multipart_boundary); + return true; +} + +bool MultipartResponseDelegate::ReadContentRanges( + const WebURLResponse& response, + int64* content_range_lower_bound, + int64* content_range_upper_bound, + int64* content_range_instance_size) { + + std::string content_range = response.httpHeaderField("Content-Range").utf8(); + if (content_range.empty()) { + content_range = response.httpHeaderField("Range").utf8(); + } + + if (content_range.empty()) { + DLOG(WARNING) << "Failed to read content range from response."; + return false; + } + + size_t byte_range_lower_bound_start_offset = content_range.find(" "); + if (byte_range_lower_bound_start_offset == std::string::npos) { + return false; + } + + // Skip over the initial space. + byte_range_lower_bound_start_offset++; + + // Find the lower bound. + size_t byte_range_lower_bound_end_offset = + content_range.find("-", byte_range_lower_bound_start_offset); + if (byte_range_lower_bound_end_offset == std::string::npos) { + return false; + } + + size_t byte_range_lower_bound_characters = + byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset; + std::string byte_range_lower_bound = + content_range.substr(byte_range_lower_bound_start_offset, + byte_range_lower_bound_characters); + + // Find the upper bound. + size_t byte_range_upper_bound_start_offset = + byte_range_lower_bound_end_offset + 1; + + size_t byte_range_upper_bound_end_offset = + content_range.find("/", byte_range_upper_bound_start_offset); + if (byte_range_upper_bound_end_offset == std::string::npos) { + return false; + } + + size_t byte_range_upper_bound_characters = + byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset; + std::string byte_range_upper_bound = + content_range.substr(byte_range_upper_bound_start_offset, + byte_range_upper_bound_characters); + + // Find the instance size. + size_t byte_range_instance_size_start_offset = + byte_range_upper_bound_end_offset + 1; + + size_t byte_range_instance_size_end_offset = + content_range.length(); + + size_t byte_range_instance_size_characters = + byte_range_instance_size_end_offset - + byte_range_instance_size_start_offset; + std::string byte_range_instance_size = + content_range.substr(byte_range_instance_size_start_offset, + byte_range_instance_size_characters); + + if (!base::StringToInt64(byte_range_lower_bound, content_range_lower_bound)) + return false; + if (!base::StringToInt64(byte_range_upper_bound, content_range_upper_bound)) + return false; + if (!base::StringToInt64(byte_range_instance_size, + content_range_instance_size)) { + return false; + } + return true; +} + +} // namespace webkit_glue |