summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--content/browser/renderer_host/duplicate_content_resource_handler.cc25
-rw-r--r--content/browser/renderer_host/duplicate_content_resource_handler.h4
2 files changed, 20 insertions, 9 deletions
diff --git a/content/browser/renderer_host/duplicate_content_resource_handler.cc b/content/browser/renderer_host/duplicate_content_resource_handler.cc
index 5760db4..c94f511 100644
--- a/content/browser/renderer_host/duplicate_content_resource_handler.cc
+++ b/content/browser/renderer_host/duplicate_content_resource_handler.cc
@@ -51,6 +51,11 @@ DuplicateContentResourceHandler::DuplicateContentResourceHandler(
request_(request),
pmurhash_ph1_(0),
pmurhash_pcarry_(0) {
+ // Ignore everything that's not http/https. Specifically, exclude data and
+ // blob URLs which can be generated by content and cause the maintained sets
+ // to grow without bounds.
+ const GURL& url = request_->url();
+ track_request_ = url.SchemeIs("http") || url.SchemeIs("https");
}
DuplicateContentResourceHandler::~DuplicateContentResourceHandler() {
@@ -71,9 +76,11 @@ bool DuplicateContentResourceHandler::OnWillRead(int request_id,
bool DuplicateContentResourceHandler::OnReadCompleted(int request_id,
int bytes_read,
bool* defer) {
- PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
- read_buffer_->data(), bytes_read);
- bytes_read_ += bytes_read;
+ if (track_request_) {
+ PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
+ read_buffer_->data(), bytes_read);
+ bytes_read_ += bytes_read;
+ }
return next_handler_->OnReadCompleted(request_id, bytes_read, defer);
}
@@ -81,9 +88,10 @@ bool DuplicateContentResourceHandler::OnResponseCompleted(
int request_id,
const net::URLRequestStatus& status,
const std::string& security_info) {
-
- if (status.is_success())
- RecordContentMetrics();
+ if (track_request_) {
+ if (status.is_success())
+ RecordContentMetrics();
+ }
return next_handler_->OnResponseCompleted(request_id, status, security_info);
}
@@ -98,12 +106,11 @@ void DuplicateContentResourceHandler::RecordContentMetrics() {
// Combine the contents_hash with the url, so we can test if future content
// identical resources have the same original url or not.
- MH_UINT32 hashed_with_url;
const std::string& url_spec = request_->url().spec();
PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
url_spec.data(), url_spec.length());
- hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_,
- url_spec.length() + bytes_read_);
+ MH_UINT32 hashed_with_url = PMurHash32_Result(
+ pmurhash_ph1_, pmurhash_pcarry_, url_spec.length() + bytes_read_);
DVLOG(4) << "url: " << url_spec;
DVLOG(4) << "contents hash: " << contents_hash;
diff --git a/content/browser/renderer_host/duplicate_content_resource_handler.h b/content/browser/renderer_host/duplicate_content_resource_handler.h
index 8ca80e4b..ac30dfc 100644
--- a/content/browser/renderer_host/duplicate_content_resource_handler.h
+++ b/content/browser/renderer_host/duplicate_content_resource_handler.h
@@ -56,6 +56,10 @@ class DuplicateContentResourceHandler: public LayeredResourceHandler {
MH_UINT32 pmurhash_ph1_;
MH_UINT32 pmurhash_pcarry_;
+ // Used to track if it's a request we're interested in. If it's not, then we
+ // early out to avoid calculating the hash unnecessarily.
+ bool track_request_;
+
DISALLOW_COPY_AND_ASSIGN(DuplicateContentResourceHandler);
};