diff options
-rw-r--r-- | DEPS | 2 | ||||
-rw-r--r-- | content/browser/DEPS | 1 | ||||
-rw-r--r-- | content/browser/renderer_host/duplicate_content_resource_handler.cc | 134 | ||||
-rw-r--r-- | content/browser/renderer_host/duplicate_content_resource_handler.h | 65 | ||||
-rw-r--r-- | content/browser/renderer_host/resource_dispatcher_host_impl.cc | 11 | ||||
-rw-r--r-- | content/content_browser.gypi | 3 | ||||
-rw-r--r-- | third_party/smhasher/README.chromium | 2 | ||||
-rw-r--r-- | third_party/smhasher/smhasher.gyp | 10 |
8 files changed, 225 insertions, 3 deletions
@@ -314,7 +314,7 @@ deps = { (Var("googlecode_url") % "mozc") + "/trunk/src/chrome/chromeos/renderer@83", "src/third_party/smhasher/src": - (Var("googlecode_url") % "smhasher") + "/trunk@136", + (Var("googlecode_url") % "smhasher") + "/trunk@146", "src/third_party/libphonenumber/src/phonenumbers": (Var("googlecode_url") % "libphonenumber") + diff --git a/content/browser/DEPS b/content/browser/DEPS index 063fc46..ed0fea5 100644 --- a/content/browser/DEPS +++ b/content/browser/DEPS @@ -9,6 +9,7 @@ include_rules = [ # Other libraries. "+third_party/iaccessible2", "+third_party/isimpledom", + "+third_party/smhasher", "+third_party/speex", # DO NOT ADD ANY CHROME INCLUDES HERE!!! diff --git a/content/browser/renderer_host/duplicate_content_resource_handler.cc b/content/browser/renderer_host/duplicate_content_resource_handler.cc new file mode 100644 index 0000000..0c7a8ba --- /dev/null +++ b/content/browser/renderer_host/duplicate_content_resource_handler.cc @@ -0,0 +1,134 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "content/browser/renderer_host/duplicate_content_resource_handler.h" + +#include <set> + +#include "base/lazy_instance.h" +#include "base/logging.h" +#include "base/metrics/histogram.h" +#include "content/browser/renderer_host/resource_request_info_impl.h" +#include "net/base/io_buffer.h" +#include "net/url_request/url_request.h" + +namespace content { +namespace { + +class GlobalDuplicateRecords { + public: + static GlobalDuplicateRecords* GetInstance() { + static base::LazyInstance<GlobalDuplicateRecords>::Leaky records; + return records.Pointer(); + } + + std::set<MH_UINT32>* content_matches() { + return &content_matches_; + } + + std::set<MH_UINT32>* content_and_url_matches() { + return &content_and_url_matches_; + } + + GlobalDuplicateRecords() {} + ~GlobalDuplicateRecords() {} + + private: + std::set<MH_UINT32> content_matches_; + std::set<MH_UINT32> content_and_url_matches_; +}; + +} // namespace + +DuplicateContentResourceHandler::DuplicateContentResourceHandler( + scoped_ptr<ResourceHandler> next_handler, + ResourceType::Type resource_type, + net::URLRequest* request) + : LayeredResourceHandler(next_handler.Pass()), + resource_type_(resource_type), + bytes_read_(0), + request_(request), + pmurhash_ph1_(0), + pmurhash_pcarry_(0) { +} + +DuplicateContentResourceHandler::~DuplicateContentResourceHandler() { +} + +bool DuplicateContentResourceHandler::OnWillRead(int request_id, + net::IOBuffer** buf, + int* buf_size, + int min_size) { + DCHECK_EQ(-1, min_size); + + if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) + return false; + read_buffer_ = *buf; + return true; +} + +bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, + int bytes_read, + bool* defer) { + PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, + read_buffer_->data(), bytes_read); + bytes_read_ += bytes_read; + return next_handler_->OnReadCompleted(request_id, bytes_read, defer); +} + +bool DuplicateContentResourceHandler::OnResponseCompleted( + int request_id, + const net::URLRequestStatus& status, + const std::string& security_info) { + + if (status.is_success()) + RecordContentMetrics(); + + return next_handler_->OnResponseCompleted(request_id, status, security_info); +} + +void DuplicateContentResourceHandler::RecordContentMetrics() { + MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, + pmurhash_pcarry_, bytes_read_); + + // Combine the contents_hash with the url, so we can test if future content + // identical resources have the same original url or not. + MH_UINT32 hashed_with_url; + const std::string& url_spec = request_->url().spec(); + PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, + url_spec.data(), url_spec.length()); + hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, + url_spec.length() + bytes_read_); + + DVLOG(4) << "url: " << url_spec; + DVLOG(4) << "contents hash: " << contents_hash; + DVLOG(4) << "hash with url: " << hashed_with_url; + + std::set<MH_UINT32>* content_matches = + GlobalDuplicateRecords::GetInstance()->content_matches(); + std::set<MH_UINT32>* content_and_url_matches = + GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); + + const bool did_match_contents = content_matches->count(contents_hash) > 0; + const bool did_match_contents_and_url = + content_and_url_matches->count(hashed_with_url) > 0; + + UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents); + UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", + did_match_contents && did_match_contents_and_url); + if (did_match_contents && !did_match_contents_and_url) { + content_and_url_matches->insert(hashed_with_url); + UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_, + 1, 0x7FFFFFFF, 50); + UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss", + resource_type_, ResourceType::LAST_TYPE); + } + content_matches->insert(contents_hash); + content_and_url_matches->insert(hashed_with_url); + + bytes_read_ = 0; + read_buffer_ = NULL; +} + +} // namespace content diff --git a/content/browser/renderer_host/duplicate_content_resource_handler.h b/content/browser/renderer_host/duplicate_content_resource_handler.h new file mode 100644 index 0000000..8ca80e4b --- /dev/null +++ b/content/browser/renderer_host/duplicate_content_resource_handler.h @@ -0,0 +1,65 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// DuplicateContentResourceHandler keeps a hash of resources seen and based on +// url. It is an experiment meant to simulate cache hits for both a url-based +// and a content-based cache. + +#ifndef CONTENT_BROWSER_RENDERER_HOST_DUPLICATE_CONTENT_RESOURCE_HANDLER_H_ +#define CONTENT_BROWSER_RENDERER_HOST_DUPLICATE_CONTENT_RESOURCE_HANDLER_H_ + +#include <string> + +#include "base/memory/ref_counted.h" +#include "content/browser/renderer_host/layered_resource_handler.h" +#include "third_party/smhasher/src/PMurHash.h" +#include "webkit/glue/resource_type.h" + +namespace net { +class IOBuffer; +class URLRequest; +class URLRequestStatus; +} + +namespace content { + +class DuplicateContentResourceHandler: public LayeredResourceHandler { + public: + DuplicateContentResourceHandler(scoped_ptr<ResourceHandler> next_handler, + ResourceType::Type resource_type, + net::URLRequest* request); + virtual ~DuplicateContentResourceHandler(); + + private: + // ResourceHandler implementation + virtual bool OnWillRead(int request_id, + net::IOBuffer** buf, + int* buf_size, + int min_size) OVERRIDE; + virtual bool OnReadCompleted(int request_id, + int bytes_read, + bool* defer) OVERRIDE; + virtual bool OnResponseCompleted(int request_id, + const net::URLRequestStatus& status, + const std::string& security_info) OVERRIDE; + + void RecordContentMetrics(); + + ResourceType::Type resource_type_; + int bytes_read_; + scoped_refptr<net::IOBuffer> read_buffer_; + net::URLRequest* request_; + + // These values are temporary values that are used in each digest of the + // inputs for the incremental hash (PMurHash). + MH_UINT32 pmurhash_ph1_; + MH_UINT32 pmurhash_pcarry_; + + DISALLOW_COPY_AND_ASSIGN(DuplicateContentResourceHandler); +}; + +} // namespace content + +#endif // CONTENT_BROWSER_RENDERER_HOST_DUPLICATE_CONTENT_RESOURCE_HANDLER_H_ + diff --git a/content/browser/renderer_host/resource_dispatcher_host_impl.cc b/content/browser/renderer_host/resource_dispatcher_host_impl.cc index a897353..052e5ce 100644 --- a/content/browser/renderer_host/resource_dispatcher_host_impl.cc +++ b/content/browser/renderer_host/resource_dispatcher_host_impl.cc @@ -35,6 +35,7 @@ #include "content/browser/renderer_host/async_resource_handler.h" #include "content/browser/renderer_host/buffered_resource_handler.h" #include "content/browser/renderer_host/cross_site_resource_handler.h" +#include "content/browser/renderer_host/duplicate_content_resource_handler.h" #include "content/browser/renderer_host/redirect_to_file_resource_handler.h" #include "content/browser/renderer_host/render_view_host_delegate.h" #include "content/browser/renderer_host/render_view_host_impl.h" @@ -1042,6 +1043,16 @@ void ResourceDispatcherHostImpl::BeginRequest( handler.reset( new BufferedResourceHandler(handler.Pass(), this, request)); + // This is an experiment that observes resources and observes how many are + // duplicated and how many of those duplicated resources are from the same + // and different URLs by storing the hash of the resource and the hash of the + // resource with the URL. + // TODO(frankwang, gavinp): Clean up this experiment. + handler.reset( + new DuplicateContentResourceHandler(handler.Pass(), + request_data.resource_type, + request)); + ScopedVector<ResourceThrottle> throttles; if (delegate_) { bool is_continuation_of_transferred_request = diff --git a/content/content_browser.gypi b/content/content_browser.gypi index 8b191c8..351143d 100644 --- a/content/content_browser.gypi +++ b/content/content_browser.gypi @@ -13,6 +13,7 @@ '../ppapi/ppapi_internal.gyp:ppapi_proxy', '../skia/skia.gyp:skia', '<(webkit_src_dir)/Source/WebKit/chromium/WebKit.gyp:webkit', + '../third_party/smhasher/smhasher.gyp:pmurhash', '../third_party/zlib/zlib.gyp:zlib', '../ui/surface/surface.gyp:surface', '../ui/ui.gyp:ui', @@ -522,6 +523,8 @@ 'browser/renderer_host/dip_util.h', 'browser/renderer_host/doomed_resource_handler.cc', 'browser/renderer_host/doomed_resource_handler.h', + 'browser/renderer_host/duplicate_content_resource_handler.cc', + 'browser/renderer_host/duplicate_content_resource_handler.h', 'browser/renderer_host/file_utilities_message_filter.cc', 'browser/renderer_host/file_utilities_message_filter.h', 'browser/renderer_host/gamepad_browser_message_filter.cc', diff --git a/third_party/smhasher/README.chromium b/third_party/smhasher/README.chromium index dcf4bce..3c96573 100644 --- a/third_party/smhasher/README.chromium +++ b/third_party/smhasher/README.chromium @@ -1,7 +1,7 @@ Name: SMHasher URL: http://code.google.com/p/smhasher/ Version: 0 -Revision: 136 +Revision: 146 License: MIT (SMHasher), Public Domain (MurmurHash) License File: LICENSE Security Critical: yes diff --git a/third_party/smhasher/smhasher.gyp b/third_party/smhasher/smhasher.gyp index fa59c2c..3f0cbc8 100644 --- a/third_party/smhasher/smhasher.gyp +++ b/third_party/smhasher/smhasher.gyp @@ -1,4 +1,4 @@ -# Copyright (c) 2011 The Chromium Authors. All rights reserved. +# Copyright (c) 2012 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -12,5 +12,13 @@ 'src/MurmurHash3.cpp', ], }, + { + 'target_name': 'pmurhash', + 'type': 'static_library', + 'sources': [ + 'src/PMurHash.h', + 'src/PMurHash.c', + ], + }, ], } |