summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--DEPS2
-rw-r--r--content/browser/DEPS1
-rw-r--r--content/browser/renderer_host/duplicate_content_resource_handler.cc134
-rw-r--r--content/browser/renderer_host/duplicate_content_resource_handler.h65
-rw-r--r--content/browser/renderer_host/resource_dispatcher_host_impl.cc11
-rw-r--r--content/content_browser.gypi3
-rw-r--r--third_party/smhasher/README.chromium2
-rw-r--r--third_party/smhasher/smhasher.gyp10
8 files changed, 225 insertions, 3 deletions
diff --git a/DEPS b/DEPS
index 33364d0..6cf7884a 100644
--- a/DEPS
+++ b/DEPS
@@ -314,7 +314,7 @@ deps = {
(Var("googlecode_url") % "mozc") + "/trunk/src/chrome/chromeos/renderer@83",
"src/third_party/smhasher/src":
- (Var("googlecode_url") % "smhasher") + "/trunk@136",
+ (Var("googlecode_url") % "smhasher") + "/trunk@146",
"src/third_party/libphonenumber/src/phonenumbers":
(Var("googlecode_url") % "libphonenumber") +
diff --git a/content/browser/DEPS b/content/browser/DEPS
index 063fc46..ed0fea5 100644
--- a/content/browser/DEPS
+++ b/content/browser/DEPS
@@ -9,6 +9,7 @@ include_rules = [
# Other libraries.
"+third_party/iaccessible2",
"+third_party/isimpledom",
+ "+third_party/smhasher",
"+third_party/speex",
# DO NOT ADD ANY CHROME INCLUDES HERE!!!
diff --git a/content/browser/renderer_host/duplicate_content_resource_handler.cc b/content/browser/renderer_host/duplicate_content_resource_handler.cc
new file mode 100644
index 0000000..0c7a8ba
--- /dev/null
+++ b/content/browser/renderer_host/duplicate_content_resource_handler.cc
@@ -0,0 +1,134 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/browser/renderer_host/duplicate_content_resource_handler.h"
+
+#include <set>
+
+#include "base/lazy_instance.h"
+#include "base/logging.h"
+#include "base/metrics/histogram.h"
+#include "content/browser/renderer_host/resource_request_info_impl.h"
+#include "net/base/io_buffer.h"
+#include "net/url_request/url_request.h"
+
+namespace content {
+namespace {
+
+class GlobalDuplicateRecords {
+ public:
+ static GlobalDuplicateRecords* GetInstance() {
+ static base::LazyInstance<GlobalDuplicateRecords>::Leaky records;
+ return records.Pointer();
+ }
+
+ std::set<MH_UINT32>* content_matches() {
+ return &content_matches_;
+ }
+
+ std::set<MH_UINT32>* content_and_url_matches() {
+ return &content_and_url_matches_;
+ }
+
+ GlobalDuplicateRecords() {}
+ ~GlobalDuplicateRecords() {}
+
+ private:
+ std::set<MH_UINT32> content_matches_;
+ std::set<MH_UINT32> content_and_url_matches_;
+};
+
+} // namespace
+
+DuplicateContentResourceHandler::DuplicateContentResourceHandler(
+ scoped_ptr<ResourceHandler> next_handler,
+ ResourceType::Type resource_type,
+ net::URLRequest* request)
+ : LayeredResourceHandler(next_handler.Pass()),
+ resource_type_(resource_type),
+ bytes_read_(0),
+ request_(request),
+ pmurhash_ph1_(0),
+ pmurhash_pcarry_(0) {
+}
+
+DuplicateContentResourceHandler::~DuplicateContentResourceHandler() {
+}
+
+bool DuplicateContentResourceHandler::OnWillRead(int request_id,
+ net::IOBuffer** buf,
+ int* buf_size,
+ int min_size) {
+ DCHECK_EQ(-1, min_size);
+
+ if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size))
+ return false;
+ read_buffer_ = *buf;
+ return true;
+}
+
+bool DuplicateContentResourceHandler::OnReadCompleted(int request_id,
+ int bytes_read,
+ bool* defer) {
+ PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
+ read_buffer_->data(), bytes_read);
+ bytes_read_ += bytes_read;
+ return next_handler_->OnReadCompleted(request_id, bytes_read, defer);
+}
+
+bool DuplicateContentResourceHandler::OnResponseCompleted(
+ int request_id,
+ const net::URLRequestStatus& status,
+ const std::string& security_info) {
+
+ if (status.is_success())
+ RecordContentMetrics();
+
+ return next_handler_->OnResponseCompleted(request_id, status, security_info);
+}
+
+void DuplicateContentResourceHandler::RecordContentMetrics() {
+ MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_,
+ pmurhash_pcarry_, bytes_read_);
+
+ // Combine the contents_hash with the url, so we can test if future content
+ // identical resources have the same original url or not.
+ MH_UINT32 hashed_with_url;
+ const std::string& url_spec = request_->url().spec();
+ PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
+ url_spec.data(), url_spec.length());
+ hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_,
+ url_spec.length() + bytes_read_);
+
+ DVLOG(4) << "url: " << url_spec;
+ DVLOG(4) << "contents hash: " << contents_hash;
+ DVLOG(4) << "hash with url: " << hashed_with_url;
+
+ std::set<MH_UINT32>* content_matches =
+ GlobalDuplicateRecords::GetInstance()->content_matches();
+ std::set<MH_UINT32>* content_and_url_matches =
+ GlobalDuplicateRecords::GetInstance()->content_and_url_matches();
+
+ const bool did_match_contents = content_matches->count(contents_hash) > 0;
+ const bool did_match_contents_and_url =
+ content_and_url_matches->count(hashed_with_url) > 0;
+
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents);
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl",
+ did_match_contents && did_match_contents_and_url);
+ if (did_match_contents && !did_match_contents_and_url) {
+ content_and_url_matches->insert(hashed_with_url);
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_,
+ 1, 0x7FFFFFFF, 50);
+ UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss",
+ resource_type_, ResourceType::LAST_TYPE);
+ }
+ content_matches->insert(contents_hash);
+ content_and_url_matches->insert(hashed_with_url);
+
+ bytes_read_ = 0;
+ read_buffer_ = NULL;
+}
+
+} // namespace content
diff --git a/content/browser/renderer_host/duplicate_content_resource_handler.h b/content/browser/renderer_host/duplicate_content_resource_handler.h
new file mode 100644
index 0000000..8ca80e4b
--- /dev/null
+++ b/content/browser/renderer_host/duplicate_content_resource_handler.h
@@ -0,0 +1,65 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// DuplicateContentResourceHandler keeps a hash of resources seen and based on
+// url. It is an experiment meant to simulate cache hits for both a url-based
+// and a content-based cache.
+
+#ifndef CONTENT_BROWSER_RENDERER_HOST_DUPLICATE_CONTENT_RESOURCE_HANDLER_H_
+#define CONTENT_BROWSER_RENDERER_HOST_DUPLICATE_CONTENT_RESOURCE_HANDLER_H_
+
+#include <string>
+
+#include "base/memory/ref_counted.h"
+#include "content/browser/renderer_host/layered_resource_handler.h"
+#include "third_party/smhasher/src/PMurHash.h"
+#include "webkit/glue/resource_type.h"
+
+namespace net {
+class IOBuffer;
+class URLRequest;
+class URLRequestStatus;
+}
+
+namespace content {
+
+class DuplicateContentResourceHandler: public LayeredResourceHandler {
+ public:
+ DuplicateContentResourceHandler(scoped_ptr<ResourceHandler> next_handler,
+ ResourceType::Type resource_type,
+ net::URLRequest* request);
+ virtual ~DuplicateContentResourceHandler();
+
+ private:
+ // ResourceHandler implementation
+ virtual bool OnWillRead(int request_id,
+ net::IOBuffer** buf,
+ int* buf_size,
+ int min_size) OVERRIDE;
+ virtual bool OnReadCompleted(int request_id,
+ int bytes_read,
+ bool* defer) OVERRIDE;
+ virtual bool OnResponseCompleted(int request_id,
+ const net::URLRequestStatus& status,
+ const std::string& security_info) OVERRIDE;
+
+ void RecordContentMetrics();
+
+ ResourceType::Type resource_type_;
+ int bytes_read_;
+ scoped_refptr<net::IOBuffer> read_buffer_;
+ net::URLRequest* request_;
+
+ // These values are temporary values that are used in each digest of the
+ // inputs for the incremental hash (PMurHash).
+ MH_UINT32 pmurhash_ph1_;
+ MH_UINT32 pmurhash_pcarry_;
+
+ DISALLOW_COPY_AND_ASSIGN(DuplicateContentResourceHandler);
+};
+
+} // namespace content
+
+#endif // CONTENT_BROWSER_RENDERER_HOST_DUPLICATE_CONTENT_RESOURCE_HANDLER_H_
+
diff --git a/content/browser/renderer_host/resource_dispatcher_host_impl.cc b/content/browser/renderer_host/resource_dispatcher_host_impl.cc
index a897353..052e5ce 100644
--- a/content/browser/renderer_host/resource_dispatcher_host_impl.cc
+++ b/content/browser/renderer_host/resource_dispatcher_host_impl.cc
@@ -35,6 +35,7 @@
#include "content/browser/renderer_host/async_resource_handler.h"
#include "content/browser/renderer_host/buffered_resource_handler.h"
#include "content/browser/renderer_host/cross_site_resource_handler.h"
+#include "content/browser/renderer_host/duplicate_content_resource_handler.h"
#include "content/browser/renderer_host/redirect_to_file_resource_handler.h"
#include "content/browser/renderer_host/render_view_host_delegate.h"
#include "content/browser/renderer_host/render_view_host_impl.h"
@@ -1042,6 +1043,16 @@ void ResourceDispatcherHostImpl::BeginRequest(
handler.reset(
new BufferedResourceHandler(handler.Pass(), this, request));
+ // This is an experiment that observes resources and observes how many are
+ // duplicated and how many of those duplicated resources are from the same
+ // and different URLs by storing the hash of the resource and the hash of the
+ // resource with the URL.
+ // TODO(frankwang, gavinp): Clean up this experiment.
+ handler.reset(
+ new DuplicateContentResourceHandler(handler.Pass(),
+ request_data.resource_type,
+ request));
+
ScopedVector<ResourceThrottle> throttles;
if (delegate_) {
bool is_continuation_of_transferred_request =
diff --git a/content/content_browser.gypi b/content/content_browser.gypi
index 8b191c8..351143d 100644
--- a/content/content_browser.gypi
+++ b/content/content_browser.gypi
@@ -13,6 +13,7 @@
'../ppapi/ppapi_internal.gyp:ppapi_proxy',
'../skia/skia.gyp:skia',
'<(webkit_src_dir)/Source/WebKit/chromium/WebKit.gyp:webkit',
+ '../third_party/smhasher/smhasher.gyp:pmurhash',
'../third_party/zlib/zlib.gyp:zlib',
'../ui/surface/surface.gyp:surface',
'../ui/ui.gyp:ui',
@@ -522,6 +523,8 @@
'browser/renderer_host/dip_util.h',
'browser/renderer_host/doomed_resource_handler.cc',
'browser/renderer_host/doomed_resource_handler.h',
+ 'browser/renderer_host/duplicate_content_resource_handler.cc',
+ 'browser/renderer_host/duplicate_content_resource_handler.h',
'browser/renderer_host/file_utilities_message_filter.cc',
'browser/renderer_host/file_utilities_message_filter.h',
'browser/renderer_host/gamepad_browser_message_filter.cc',
diff --git a/third_party/smhasher/README.chromium b/third_party/smhasher/README.chromium
index dcf4bce..3c96573 100644
--- a/third_party/smhasher/README.chromium
+++ b/third_party/smhasher/README.chromium
@@ -1,7 +1,7 @@
Name: SMHasher
URL: http://code.google.com/p/smhasher/
Version: 0
-Revision: 136
+Revision: 146
License: MIT (SMHasher), Public Domain (MurmurHash)
License File: LICENSE
Security Critical: yes
diff --git a/third_party/smhasher/smhasher.gyp b/third_party/smhasher/smhasher.gyp
index fa59c2c..3f0cbc8 100644
--- a/third_party/smhasher/smhasher.gyp
+++ b/third_party/smhasher/smhasher.gyp
@@ -1,4 +1,4 @@
-# Copyright (c) 2011 The Chromium Authors. All rights reserved.
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -12,5 +12,13 @@
'src/MurmurHash3.cpp',
],
},
+ {
+ 'target_name': 'pmurhash',
+ 'type': 'static_library',
+ 'sources': [
+ 'src/PMurHash.h',
+ 'src/PMurHash.c',
+ ],
+ },
],
}