diff options
-rw-r--r-- | content/browser/renderer_host/duplicate_content_resource_handler.cc | 25 | ||||
-rw-r--r-- | content/browser/renderer_host/duplicate_content_resource_handler.h | 4 |
2 files changed, 20 insertions, 9 deletions
diff --git a/content/browser/renderer_host/duplicate_content_resource_handler.cc b/content/browser/renderer_host/duplicate_content_resource_handler.cc index 5760db4..c94f511 100644 --- a/content/browser/renderer_host/duplicate_content_resource_handler.cc +++ b/content/browser/renderer_host/duplicate_content_resource_handler.cc @@ -51,6 +51,11 @@ DuplicateContentResourceHandler::DuplicateContentResourceHandler( request_(request), pmurhash_ph1_(0), pmurhash_pcarry_(0) { + // Ignore everything that's not http/https. Specifically, exclude data and + // blob URLs which can be generated by content and cause the maintained sets + // to grow without bounds. + const GURL& url = request_->url(); + track_request_ = url.SchemeIs("http") || url.SchemeIs("https"); } DuplicateContentResourceHandler::~DuplicateContentResourceHandler() { @@ -71,9 +76,11 @@ bool DuplicateContentResourceHandler::OnWillRead(int request_id, bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, int bytes_read, bool* defer) { - PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, - read_buffer_->data(), bytes_read); - bytes_read_ += bytes_read; + if (track_request_) { + PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, + read_buffer_->data(), bytes_read); + bytes_read_ += bytes_read; + } return next_handler_->OnReadCompleted(request_id, bytes_read, defer); } @@ -81,9 +88,10 @@ bool DuplicateContentResourceHandler::OnResponseCompleted( int request_id, const net::URLRequestStatus& status, const std::string& security_info) { - - if (status.is_success()) - RecordContentMetrics(); + if (track_request_) { + if (status.is_success()) + RecordContentMetrics(); + } return next_handler_->OnResponseCompleted(request_id, status, security_info); } @@ -98,12 +106,11 @@ void DuplicateContentResourceHandler::RecordContentMetrics() { // Combine the contents_hash with the url, so we can test if future content // identical resources have the same original url or not. - MH_UINT32 hashed_with_url; const std::string& url_spec = request_->url().spec(); PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, url_spec.data(), url_spec.length()); - hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, - url_spec.length() + bytes_read_); + MH_UINT32 hashed_with_url = PMurHash32_Result( + pmurhash_ph1_, pmurhash_pcarry_, url_spec.length() + bytes_read_); DVLOG(4) << "url: " << url_spec; DVLOG(4) << "contents hash: " << contents_hash; diff --git a/content/browser/renderer_host/duplicate_content_resource_handler.h b/content/browser/renderer_host/duplicate_content_resource_handler.h index 8ca80e4b..ac30dfc 100644 --- a/content/browser/renderer_host/duplicate_content_resource_handler.h +++ b/content/browser/renderer_host/duplicate_content_resource_handler.h @@ -56,6 +56,10 @@ class DuplicateContentResourceHandler: public LayeredResourceHandler { MH_UINT32 pmurhash_ph1_; MH_UINT32 pmurhash_pcarry_; + // Used to track if it's a request we're interested in. If it's not, then we + // early out to avoid calculating the hash unnecessarily. + bool track_request_; + DISALLOW_COPY_AND_ASSIGN(DuplicateContentResourceHandler); }; |