summaryrefslogtreecommitdiffstats
path: root/pdf
diff options
context:
space:
mode:
authorspelchat <spelchat@chromium.org>2015-12-09 16:44:15 -0800
committerCommit bot <commit-bot@chromium.org>2015-12-10 00:45:12 +0000
commit3ba2a28104c4d6feef3efd71d9be73f085886f53 (patch)
tree2c05f055b78bed8cf7faf7ad5af3918595e74d46 /pdf
parent0eabbb80369d664feb34db3ea4e3f9247f535e95 (diff)
downloadchromium_src-3ba2a28104c4d6feef3efd71d9be73f085886f53.zip
chromium_src-3ba2a28104c4d6feef3efd71d9be73f085886f53.tar.gz
chromium_src-3ba2a28104c4d6feef3efd71d9be73f085886f53.tar.bz2
Minimize the number of range requests made by PDFium
Previously, Chrome would make a series of range requests for linearized PDFs, starting at 32 KB and slowly increasing. On high latency connections, this is considerably slower than using a single request. Now Chrome will make range requests as large as possible and cancel them if the renderer needs some data in a different place in the document. This significantly reduces the number of range requests performed by Chrome. BUG=460201,78264 Review URL: https://codereview.chromium.org/1506023002 Cr-Commit-Position: refs/heads/master@{#364235}
Diffstat (limited to 'pdf')
-rw-r--r--pdf/chunk_stream.cc14
-rw-r--r--pdf/chunk_stream.h11
-rw-r--r--pdf/document_loader.cc222
-rw-r--r--pdf/document_loader.h28
4 files changed, 150 insertions, 125 deletions
diff --git a/pdf/chunk_stream.cc b/pdf/chunk_stream.cc
index 7ac8f97..e580151 100644
--- a/pdf/chunk_stream.cc
+++ b/pdf/chunk_stream.cc
@@ -17,7 +17,7 @@
namespace chrome_pdf {
-ChunkStream::ChunkStream() {
+ChunkStream::ChunkStream() : stream_size_(0) {
}
ChunkStream::~ChunkStream() {
@@ -26,10 +26,12 @@ ChunkStream::~ChunkStream() {
void ChunkStream::Clear() {
chunks_.clear();
data_.clear();
+ stream_size_ = 0;
}
void ChunkStream::Preallocate(size_t stream_size) {
data_.reserve(stream_size);
+ stream_size_ = stream_size;
}
size_t ChunkStream::GetSize() {
@@ -150,7 +152,7 @@ size_t ChunkStream::GetFirstMissingByte() const {
return begin->first > 0 ? 0 : begin->second;
}
-size_t ChunkStream::GetLastByteBefore(size_t offset) const {
+size_t ChunkStream::GetFirstMissingByteInInterval(size_t offset) const {
if (chunks_.empty())
return 0;
std::map<size_t, size_t>::const_iterator it = chunks_.upper_bound(offset);
@@ -160,13 +162,13 @@ size_t ChunkStream::GetLastByteBefore(size_t offset) const {
return it->first + it->second;
}
-size_t ChunkStream::GetFirstByteAfter(size_t offset) const {
+size_t ChunkStream::GetLastMissingByteInInterval(size_t offset) const {
if (chunks_.empty())
- return 0;
+ return stream_size_ - 1;
std::map<size_t, size_t>::const_iterator it = chunks_.upper_bound(offset);
if (it == chunks_.end())
- return data_.size();
- return it->first;
+ return stream_size_ - 1;
+ return it->first - 1;
}
} // namespace chrome_pdf
diff --git a/pdf/chunk_stream.h b/pdf/chunk_stream.h
index fac1ec6..048f958 100644
--- a/pdf/chunk_stream.h
+++ b/pdf/chunk_stream.h
@@ -8,6 +8,7 @@
#include <stddef.h>
#include <map>
+#include <utility>
#include <vector>
namespace chrome_pdf {
@@ -33,16 +34,20 @@ class ChunkStream {
bool IsRangeAvailable(size_t offset, size_t size) const;
size_t GetFirstMissingByte() const;
- size_t GetLastByteBefore(size_t offset) const;
- size_t GetFirstByteAfter(size_t offset) const;
+ // Finds the first byte of the missing byte interval that offset belongs to.
+ size_t GetFirstMissingByteInInterval(size_t offset) const;
+ // Returns the last byte of the missing byte interval that offset belongs to.
+ size_t GetLastMissingByteInInterval(size_t offset) const;
private:
std::vector<unsigned char> data_;
// Pair, first - begining of the chunk, second - size of the chunk.
std::map<size_t, size_t> chunks_;
+
+ size_t stream_size_;
};
}; // namespace chrome_pdf
-#endif
+#endif // PDF_CHUNK_STREAM_H_
diff --git a/pdf/document_loader.cc b/pdf/document_loader.cc
index 5bbed1a..89e7467 100644
--- a/pdf/document_loader.cc
+++ b/pdf/document_loader.cc
@@ -16,9 +16,6 @@ namespace chrome_pdf {
namespace {
-// Document below size will be downloaded in one chunk.
-const uint32_t kMinFileSize = 64 * 1024;
-
// If the headers have a byte-range response, writes the start and end
// positions and returns true if at least the start position was parsed.
// The end position will be set to 0 if it was not found or parsed from the
@@ -176,14 +173,18 @@ bool DocumentLoader::Init(const pp::URLLoader& loader,
}
void DocumentLoader::LoadPartialDocument() {
+ // The current request is a full request (not a range request) so it starts at
+ // 0 and ends at |document_size_|.
+ current_chunk_size_ = document_size_;
+ current_pos_ = 0;
+ current_request_offset_ = 0;
+ current_request_size_ = 0;
+ current_request_extended_size_ = document_size_;
+ request_pending_ = true;
+
partial_document_ = true;
- // Force the main request to be cancelled, since if we're a full-frame plugin
- // there could be other references to the loader.
- loader_.Close();
- loader_ = pp::URLLoader();
- // Download file header.
header_request_ = true;
- RequestData(0, std::min(GetRequestSize(), document_size_));
+ ReadMore();
}
void DocumentLoader::LoadFullDocument() {
@@ -212,12 +213,8 @@ uint32_t DocumentLoader::GetAvailableData() const {
}
void DocumentLoader::ClearPendingRequests() {
- // The first item in the queue is pending (need to keep it in the queue).
- if (pending_requests_.size() > 1) {
- // Remove all elements except the first one.
- pending_requests_.erase(++pending_requests_.begin(),
- pending_requests_.end());
- }
+ pending_requests_.erase(pending_requests_.begin(),
+ pending_requests_.end());
}
bool DocumentLoader::GetBlock(uint32_t position,
@@ -247,86 +244,74 @@ void DocumentLoader::RequestData(uint32_t position, uint32_t size) {
DownloadPendingRequests();
}
+void DocumentLoader::RemoveCompletedRanges() {
+ // Split every request that has been partially downloaded already into smaller
+ // requests.
+ std::vector<std::pair<size_t, size_t> > ranges;
+ auto it = pending_requests_.begin();
+ while (it != pending_requests_.end()) {
+ chunk_stream_.GetMissedRanges(it->first, it->second, &ranges);
+ pending_requests_.insert(it, ranges.begin(), ranges.end());
+ ranges.clear();
+ pending_requests_.erase(it++);
+ }
+}
+
void DocumentLoader::DownloadPendingRequests() {
- if (request_pending_ || pending_requests_.empty())
+ if (request_pending_)
return;
- // Remove already completed requests.
- // By design DownloadPendingRequests() should have at least 1 request in the
- // queue. ReadComplete() will remove the last pending comment from the queue.
- while (pending_requests_.size() > 1) {
- if (IsDataAvailable(pending_requests_.front().first,
- pending_requests_.front().second)) {
- pending_requests_.pop_front();
- } else {
- break;
+ uint32_t pos;
+ uint32_t size;
+ if (pending_requests_.empty()) {
+ // If the document is not complete and we have no outstanding requests,
+ // download what's left for as long as no other request gets added to
+ // |pending_requests_|.
+ pos = chunk_stream_.GetFirstMissingByte();
+ if (pos >= document_size_) {
+ // We're done downloading the document.
+ return;
}
- }
-
- uint32_t pos = pending_requests_.front().first;
- uint32_t size = pending_requests_.front().second;
- if (IsDataAvailable(pos, size)) {
- ReadComplete();
- return;
- }
+ // Start with size 0, we'll set |current_request_extended_size_| to > 0.
+ // This way this request will get cancelled as soon as the renderer wants
+ // another portion of the document.
+ size = 0;
+ } else {
+ RemoveCompletedRanges();
- // If current request has been partially downloaded already, split it into
- // a few smaller requests.
- std::vector<std::pair<size_t, size_t> > ranges;
- chunk_stream_.GetMissedRanges(pos, size, &ranges);
- if (!ranges.empty()) {
- pending_requests_.pop_front();
- pending_requests_.insert(pending_requests_.begin(),
- ranges.begin(), ranges.end());
pos = pending_requests_.front().first;
size = pending_requests_.front().second;
- }
-
- uint32_t cur_request_size = GetRequestSize();
- // If size is less than default request, try to expand download range for
- // more optimal download.
- if (size < cur_request_size && partial_document_) {
- // First, try to expand block towards the end of the file.
- uint32_t new_pos = pos;
- uint32_t new_size = cur_request_size;
- if (pos + new_size > document_size_)
- new_size = document_size_ - pos;
-
- std::vector<std::pair<size_t, size_t> > ranges;
- if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) {
- new_pos = ranges[0].first;
- new_size = ranges[0].second;
+ if (IsDataAvailable(pos, size)) {
+ ReadComplete();
+ return;
}
+ }
- // Second, try to expand block towards the beginning of the file.
- if (new_size < cur_request_size) {
- uint32_t block_end = new_pos + new_size;
- if (block_end > cur_request_size) {
- new_pos = block_end - cur_request_size;
- } else {
- new_pos = 0;
- }
- new_size = block_end - new_pos;
-
- if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) {
- new_pos = ranges.back().first;
- new_size = ranges.back().second;
- }
+ size_t last_byte_before = chunk_stream_.GetFirstMissingByteInInterval(pos);
+ if (size < kDefaultRequestSize) {
+ // Try to extend before pos, up to size |kDefaultRequestSize|.
+ if (pos + size - last_byte_before > kDefaultRequestSize) {
+ pos += size - kDefaultRequestSize;
+ size = kDefaultRequestSize;
+ } else {
+ size += pos - last_byte_before;
+ pos = last_byte_before;
}
- pos = new_pos;
- size = new_size;
}
-
- size_t last_byte_before = chunk_stream_.GetLastByteBefore(pos);
- size_t first_byte_after = chunk_stream_.GetFirstByteAfter(pos + size - 1);
- if (pos - last_byte_before < cur_request_size) {
- size = pos + size - last_byte_before;
+ if (pos - last_byte_before < kDefaultRequestSize) {
+ // Don't leave a gap smaller than |kDefaultRequestSize|.
+ size += pos - last_byte_before;
pos = last_byte_before;
}
- if ((pos + size < first_byte_after) &&
- (pos + size + cur_request_size >= first_byte_after))
- size = first_byte_after - pos;
+ current_request_offset_ = pos;
+ current_request_size_ = size;
+
+ // Extend the request until the next downloaded byte or the end of the
+ // document.
+ size_t last_missing_byte =
+ chunk_stream_.GetLastMissingByteInInterval(pos + size - 1);
+ current_request_extended_size_ = last_missing_byte - pos + 1;
request_pending_ = true;
@@ -335,7 +320,7 @@ void DocumentLoader::DownloadPendingRequests() {
loader_ = client_->CreateURLLoader();
pp::CompletionCallback callback =
loader_factory_.NewCallback(&DocumentLoader::DidOpen);
- pp::URLRequestInfo request = GetRequest(pos, size);
+ pp::URLRequestInfo request = GetRequest(pos, current_request_extended_size_);
requests_count_++;
int rv = loader_.Open(request, callback);
if (rv != PP_OK_COMPLETIONPENDING)
@@ -469,14 +454,51 @@ void DocumentLoader::DidRead(int32_t result) {
current_chunk_read_ += length;
client_->OnNewDataAvailable();
}
+
+ // Only call the renderer if we allow partial loading.
+ if (!partial_document_) {
+ ReadMore();
+ return;
+ }
+
+ UpdateRendering();
+ RemoveCompletedRanges();
+
+ if (!pending_requests_.empty()) {
+ // If there are pending requests and the current content we're downloading
+ // doesn't satisfy any of these requests, cancel the current request to
+ // fullfill those more important requests.
+ bool satisfying_pending_request =
+ SatisfyingRequest(current_request_offset_, current_request_size_);
+ for (const auto& pending_request : pending_requests_) {
+ if (SatisfyingRequest(pending_request.first, pending_request.second)) {
+ satisfying_pending_request = true;
+ break;
+ }
+ }
+ // Cancel the request as it's not satisfying any request from the
+ // renderer, unless the current request is finished in which case we let
+ // it finish cleanly.
+ if (!satisfying_pending_request &&
+ current_pos_ < current_request_offset_ +
+ current_request_extended_size_) {
+ loader_.Close();
+ }
+ }
+
ReadMore();
- } else if (result == PP_OK) {
+ } else if (result == PP_OK || result == PP_ERROR_ABORTED) {
ReadComplete();
} else {
NOTREACHED();
}
}
+bool DocumentLoader::SatisfyingRequest(size_t offset, size_t size) const {
+ return offset <= current_pos_ + kDefaultRequestSize &&
+ current_pos_ < offset + size;
+}
+
void DocumentLoader::ReadComplete() {
if (!partial_document_) {
if (document_size_ == 0) {
@@ -497,46 +519,22 @@ void DocumentLoader::ReadComplete() {
}
request_pending_ = false;
- pending_requests_.pop_front();
-
- // If there are more pending request - continue downloading.
- if (!pending_requests_.empty()) {
- DownloadPendingRequests();
- return;
- }
if (IsDocumentComplete()) {
client_->OnDocumentComplete();
return;
}
+ UpdateRendering();
+ DownloadPendingRequests();
+}
+
+void DocumentLoader::UpdateRendering() {
if (header_request_)
client_->OnPartialDocumentLoaded();
else
client_->OnPendingRequestComplete();
header_request_ = false;
-
- // The OnPendingRequestComplete could have added more requests.
- if (!pending_requests_.empty()) {
- DownloadPendingRequests();
- } else {
- // Document is not complete and we have no outstanding requests.
- // Let's keep downloading PDF file in small chunks.
- uint32_t pos = chunk_stream_.GetFirstMissingByte();
- std::vector<std::pair<size_t, size_t> > ranges;
- chunk_stream_.GetMissedRanges(pos, GetRequestSize(), &ranges);
- DCHECK(!ranges.empty());
- RequestData(ranges[0].first, ranges[0].second);
- }
-}
-
-uint32_t DocumentLoader::GetRequestSize() const {
- // Document loading strategy:
- // For first 10 requests, we use 32k chunk sizes, for the next 10 requests we
- // double the size (64k), and so on, until we cap max request size at 2M for
- // 71 or more requests.
- uint32_t limited_count = std::min(std::max(requests_count_, 10u), 70u);
- return 32 * 1024 * (1 << ((limited_count - 1) / 10u));
}
} // namespace chrome_pdf
diff --git a/pdf/document_loader.h b/pdf/document_loader.h
index 4e734a0..7e175de 100644
--- a/pdf/document_loader.h
+++ b/pdf/document_loader.h
@@ -7,6 +7,7 @@
#include <list>
#include <string>
+#include <utility>
#include <vector>
#include "base/basictypes.h"
@@ -14,8 +15,6 @@
#include "ppapi/cpp/url_loader.h"
#include "ppapi/utility/completion_callback_factory.h"
-#define kDefaultRequestSize 32768u
-
namespace chrome_pdf {
class DocumentLoader {
@@ -81,12 +80,24 @@ class DocumentLoader {
void LoadFullDocument();
// Download pending requests.
void DownloadPendingRequests();
+ // Remove completed ranges.
+ void RemoveCompletedRanges();
+ // Returns true if we are already in progress satisfying the request, or just
+ // about ready to start. This helps us avoid expensive jumping around, and
+ // even worse leaving tiny gaps in the byte stream that might have to be
+ // filled later.
+ bool SatisfyingRequest(size_t pos, size_t size) const;
// Called when we complete server request and read all data from it.
void ReadComplete();
// Creates request to download size byte of data data starting from position.
pp::URLRequestInfo GetRequest(uint32_t position, uint32_t size) const;
- // Returns current request size in bytes.
- uint32_t GetRequestSize() const;
+ // Updates the rendering by the Client.
+ void UpdateRendering();
+
+ // Document below size will be downloaded in one chunk.
+ static const uint32_t kMinFileSize = 64 * 1024;
+ // Number was chosen in crbug.com/78264#c8
+ enum { kDefaultRequestSize = 65536 };
Client* client_;
std::string url_;
@@ -97,6 +108,15 @@ class DocumentLoader {
bool request_pending_;
typedef std::list<std::pair<size_t, size_t> > PendingRequests;
PendingRequests pending_requests_;
+ // The starting position of the HTTP request currently being processed.
+ size_t current_request_offset_;
+ // The size of the byte range the current HTTP request must download before
+ // being cancelled.
+ size_t current_request_size_;
+ // The actual byte range size of the current HTTP request. This may be larger
+ // than |current_request_size_| and the request may be cancelled before
+ // reaching |current_request_offset_| + |current_request_extended_size_|.
+ size_t current_request_extended_size_;
char buffer_[kDefaultRequestSize];
uint32_t current_pos_;
uint32_t current_chunk_size_;