// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "webkit/glue/multipart_response_delegate.h" #include "base/logging.h" #include "base/string_number_conversions.h" #include "base/string_util.h" #include "net/base/net_util.h" #include "net/http/http_util.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebHTTPHeaderVisitor.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLLoaderClient.h" using WebKit::WebHTTPHeaderVisitor; using WebKit::WebString; using WebKit::WebURLLoader; using WebKit::WebURLLoaderClient; using WebKit::WebURLResponse; namespace webkit_glue { namespace { // The list of response headers that we do not copy from the original // response when generating a WebURLResponse for a MIME payload. const char* kReplaceHeaders[] = { "content-type", "content-length", "content-disposition", "content-range", "range", "set-cookie" }; class HeaderCopier : public WebHTTPHeaderVisitor { public: HeaderCopier(WebURLResponse* response) : response_(response) { } virtual void visitHeader(const WebString& name, const WebString& value) { const std::string& name_utf8 = name.utf8(); for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) { if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i])) return; } response_->setHTTPHeaderField(name, value); } private: WebURLResponse* response_; }; } // namespace MultipartResponseDelegate::MultipartResponseDelegate( WebURLLoaderClient* client, WebURLLoader* loader, const WebURLResponse& response, const std::string& boundary) : client_(client), loader_(loader), original_response_(response), raw_data_length_(0), boundary_("--"), first_received_data_(true), processing_headers_(false), stop_sending_(false), has_sent_first_response_(false) { // Some servers report a boundary prefixed with "--". See bug 5786. if (StartsWithASCII(boundary, "--", true)) { boundary_.assign(boundary); } else { boundary_.append(boundary); } } void MultipartResponseDelegate::OnReceivedData(const char* data, int data_len, int raw_data_length) { // stop_sending_ means that we've already received the final boundary token. // The server should stop sending us data at this point, but if it does, we // just throw it away. if (stop_sending_) return; data_.append(data, data_len); raw_data_length_ += raw_data_length; if (first_received_data_) { // Some servers don't send a boundary token before the first chunk of // data. We handle this case anyway (Gecko does too). first_received_data_ = false; // Eat leading \r\n int pos = PushOverLine(data_, 0); if (pos) data_ = data_.substr(pos); if (data_.length() < boundary_.length() + 2) { // We don't have enough data yet to make a boundary token. Just wait // until the next chunk of data arrives. first_received_data_ = true; return; } if (0 != data_.compare(0, boundary_.length(), boundary_)) { data_ = boundary_ + "\n" + data_; } } DCHECK(!first_received_data_); // Headers if (processing_headers_) { // Eat leading \r\n int pos = PushOverLine(data_, 0); if (pos) data_ = data_.substr(pos); if (ParseHeaders()) { // Successfully parsed headers. processing_headers_ = false; } else { // Get more data before trying again. return; } } DCHECK(!processing_headers_); size_t boundary_pos; while ((boundary_pos = FindBoundary()) != std::string::npos) { if (client_) { // Strip out trailing \n\r characters in the buffer preceding the // boundary on the same lines as Firefox. size_t data_length = boundary_pos; if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') { data_length--; if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') { data_length--; } } if (data_length > 0) { // Send the last data chunk. client_->didReceiveData(loader_, data_.data(), static_cast(data_length), raw_data_length_); raw_data_length_ = 0; } } size_t boundary_end_pos = boundary_pos + boundary_.length(); if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) { // This was the last boundary so we can stop processing. stop_sending_ = true; data_.clear(); return; } // We can now throw out data up through the boundary int offset = PushOverLine(data_, boundary_end_pos); data_ = data_.substr(boundary_end_pos + offset); // Ok, back to parsing headers if (!ParseHeaders()) { processing_headers_ = true; break; } } // At this point, we should send over any data we have, but keep enough data // buffered to handle a boundary that may have been truncated. if (!processing_headers_ && data_.length() > boundary_.length()) { // If the last character is a new line character, go ahead and just send // everything we have buffered. This matches an optimization in Gecko. int send_length = data_.length() - boundary_.length(); if (data_[data_.length() - 1] == '\n') send_length = data_.length(); if (client_) client_->didReceiveData(loader_, data_.data(), send_length, raw_data_length_); data_ = data_.substr(send_length); raw_data_length_ = 0; } } void MultipartResponseDelegate::OnCompletedRequest() { // If we have any pending data and we're not in a header, go ahead and send // it to WebCore. if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) { client_->didReceiveData(loader_, data_.data(), static_cast(data_.length()), raw_data_length_); raw_data_length_ = 0; } } int MultipartResponseDelegate::PushOverLine(const std::string& data, size_t pos) { int offset = 0; if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) { ++offset; if (pos + 1 < data.length() && data[pos + 1] == '\n') ++offset; } return offset; } bool MultipartResponseDelegate::ParseHeaders() { int line_feed_increment = 1; // Grab the headers being liberal about line endings. size_t line_start_pos = 0; size_t line_end_pos = data_.find('\n'); while (line_end_pos != std::string::npos) { // Handle CRLF if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') { line_feed_increment = 2; --line_end_pos; } else { line_feed_increment = 1; } if (line_start_pos == line_end_pos) { // A blank line, end of headers line_end_pos += line_feed_increment; break; } // Find the next header line. line_start_pos = line_end_pos + line_feed_increment; line_end_pos = data_.find('\n', line_start_pos); } // Truncated in the middle of a header, stop parsing. if (line_end_pos == std::string::npos) return false; // Eat headers std::string headers("\n"); headers.append(data_, 0, line_end_pos); data_ = data_.substr(line_end_pos); // Create a WebURLResponse based on the original set of headers + the // replacement headers. We only replace the same few headers that gecko // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp. std::string content_type = net::GetSpecificHeader(headers, "content-type"); std::string mime_type; std::string charset; bool has_charset = false; net::HttpUtil::ParseContentType(content_type, &mime_type, &charset, &has_charset); WebURLResponse response(original_response_.url()); response.setMIMEType(WebString::fromUTF8(mime_type)); response.setTextEncodingName(WebString::fromUTF8(charset)); HeaderCopier copier(&response); original_response_.visitHTTPHeaderFields(&copier); for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) { std::string name(kReplaceHeaders[i]); std::string value = net::GetSpecificHeader(headers, name); if (!value.empty()) { response.setHTTPHeaderField(WebString::fromUTF8(name), WebString::fromUTF8(value)); } } // To avoid recording every multipart load as a separate visit in // the history database, we want to keep track of whether the response // is part of a multipart payload. We do want to record the first visit, // so we only set isMultipartPayload to true after the first visit. response.setIsMultipartPayload(has_sent_first_response_); has_sent_first_response_ = true; // Send the response! if (client_) client_->didReceiveResponse(loader_, response); return true; } // Boundaries are supposed to be preceeded with --, but it looks like gecko // doesn't require the dashes to exist. See nsMultiMixedConv::FindToken. size_t MultipartResponseDelegate::FindBoundary() { size_t boundary_pos = data_.find(boundary_); if (boundary_pos != std::string::npos) { // Back up over -- for backwards compat // TODO(tc): Don't we only want to do this once? Gecko code doesn't seem // to care. if (boundary_pos >= 2) { if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) { boundary_pos -= 2; boundary_ = "--" + boundary_; } } } return boundary_pos; } bool MultipartResponseDelegate::ReadMultipartBoundary( const WebURLResponse& response, std::string* multipart_boundary) { std::string content_type = response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8(); size_t boundary_start_offset = content_type.find("boundary="); if (boundary_start_offset == std::string::npos) return false; boundary_start_offset += strlen("boundary="); size_t boundary_end_offset = content_type.find(';', boundary_start_offset); if (boundary_end_offset == std::string::npos) boundary_end_offset = content_type.length(); size_t boundary_length = boundary_end_offset - boundary_start_offset; *multipart_boundary = content_type.substr(boundary_start_offset, boundary_length); // The byte range response can have quoted boundary strings. This is legal // as per MIME specifications. Individual data fragements however don't // contain quoted boundary strings. TrimString(*multipart_boundary, "\"", multipart_boundary); return true; } bool MultipartResponseDelegate::ReadContentRanges( const WebURLResponse& response, int* content_range_lower_bound, int* content_range_upper_bound, int* content_range_instance_size) { std::string content_range = response.httpHeaderField("Content-Range").utf8(); if (content_range.empty()) { content_range = response.httpHeaderField("Range").utf8(); } if (content_range.empty()) { DLOG(WARNING) << "Failed to read content range from response."; return false; } size_t byte_range_lower_bound_start_offset = content_range.find(" "); if (byte_range_lower_bound_start_offset == std::string::npos) { return false; } // Skip over the initial space. byte_range_lower_bound_start_offset++; // Find the lower bound. size_t byte_range_lower_bound_end_offset = content_range.find("-", byte_range_lower_bound_start_offset); if (byte_range_lower_bound_end_offset == std::string::npos) { return false; } size_t byte_range_lower_bound_characters = byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset; std::string byte_range_lower_bound = content_range.substr(byte_range_lower_bound_start_offset, byte_range_lower_bound_characters); // Find the upper bound. size_t byte_range_upper_bound_start_offset = byte_range_lower_bound_end_offset + 1; size_t byte_range_upper_bound_end_offset = content_range.find("/", byte_range_upper_bound_start_offset); if (byte_range_upper_bound_end_offset == std::string::npos) { return false; } size_t byte_range_upper_bound_characters = byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset; std::string byte_range_upper_bound = content_range.substr(byte_range_upper_bound_start_offset, byte_range_upper_bound_characters); // Find the instance size. size_t byte_range_instance_size_start_offset = byte_range_upper_bound_end_offset + 1; size_t byte_range_instance_size_end_offset = content_range.length(); size_t byte_range_instance_size_characters = byte_range_instance_size_end_offset - byte_range_instance_size_start_offset; std::string byte_range_instance_size = content_range.substr(byte_range_instance_size_start_offset, byte_range_instance_size_characters); if (!base::StringToInt(byte_range_lower_bound, content_range_lower_bound)) return false; if (!base::StringToInt(byte_range_upper_bound, content_range_upper_bound)) return false; if (!base::StringToInt(byte_range_instance_size, content_range_instance_size)) return false; return true; } } // namespace webkit_glue