From 17e7b20bdf6906f4bdd574a9e0d4526456f3c301 Mon Sep 17 00:00:00 2001 From: "jar@chromium.org" Date: Fri, 13 Mar 2009 20:29:58 +0000 Subject: wtc: please look at URL related code, and hooks and nits you might have commented on before. huanr: please look at sdch_filter code. The intent was no semantic change, and only change in histograms and stats gathered. I wanted to be sure I had better stats on several failure cases, as the turn-around time of adding stats to instrument such cases after they surface is just too long. The big feature is the mechanism for getting the total number of bytes passed to a filter. We use the filter context to achieve this, and then the SDCH filter can calculate compression ratio (from pre-gunzip vs post SDCH decompress). The number of bytes read was also histogrammed in a number of error scenarios, to better diagnose what is going on when these cases arrise (example: When some data is still buffered in the VCDIFF decoder). The sdch_filter destructor was getting long and hard to read with multiple if blocks, so I cleaned that up as well a bit (less indentation, and use of early returns). Nits not included in previous CL that earlier are listed as well. r=wtc,huanr Review URL: http://codereview.chromium.org/40319 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@11665 0039d316-1c4b-4281-b951-d872f2087c98 --- net/base/filter.cc | 55 ++------- net/base/filter.h | 83 ++++++-------- net/base/filter_unittest.cc | 49 +++++--- net/base/filter_unittest.h | 7 +- net/base/sdch_filter.cc | 178 ++++++++++++++++++----------- net/base/sdch_filter.h | 22 +++- net/base/sdch_filter_unittest.cc | 7 +- net/base/sdch_manager.h | 10 +- net/url_request/url_request_http_job.cc | 5 +- net/url_request/url_request_job.cc | 8 +- net/url_request/url_request_job.h | 8 +- net/url_request/url_request_job_metrics.cc | 5 +- net/url_request/url_request_job_metrics.h | 2 +- 13 files changed, 242 insertions(+), 197 deletions(-) (limited to 'net') diff --git a/net/base/filter.cc b/net/base/filter.cc index 06da53a..397a599 100644 --- a/net/base/filter.cc +++ b/net/base/filter.cc @@ -38,8 +38,8 @@ const char kTextHtml[] = "text/html"; Filter* Filter::Factory(const std::vector& filter_types, const FilterContext& filter_context) { - DCHECK(filter_context.GetInputStreambufferSize() > 0); - if (filter_types.empty() || filter_context.GetInputStreambufferSize() <= 0) + DCHECK(filter_context.GetInputStreamBufferSize() > 0); + if (filter_types.empty() || filter_context.GetInputStreamBufferSize() <= 0) return NULL; @@ -50,24 +50,6 @@ Filter* Filter::Factory(const std::vector& filter_types, if (!filter_list) return NULL; } - - // TODO(jar): These settings should go into the derived classes, on an as-needed basis. - std::string mime_type; - bool success = filter_context.GetMimeType(&mime_type); - DCHECK(success); - GURL gurl; - success = filter_context.GetURL(&gurl); - DCHECK(success); - base::Time request_time = filter_context.GetRequestTime(); - bool is_cached_content = filter_context.IsCachedContent(); - - filter_list->SetMimeType(mime_type); - filter_list->SetURL(gurl); - // Approximate connect time with request_time. If it is not cached, then - // this is a good approximation for when the first bytes went on the - // wire. - filter_list->SetConnectTime(request_time, is_cached_content); - return filter_list; } @@ -95,9 +77,11 @@ Filter::FilterType Filter::ConvertEncodingToType( // static void Filter::FixupEncodingTypes( - bool is_sdch_response, - const std::string& mime_type, + const FilterContext& filter_context, std::vector* encoding_types) { + std::string mime_type; + bool success = filter_context.GetMimeType(&mime_type); + DCHECK(success); if ((1 == encoding_types->size()) && (FILTER_TYPE_GZIP == encoding_types->front())) { @@ -111,7 +95,7 @@ void Filter::FixupEncodingTypes( encoding_types->clear(); } - if (!is_sdch_response) { + if (!filter_context.IsSdchResponse()) { if (1 < encoding_types->size()) { // Multiple filters were intended to only be used for SDCH (thus far!) SdchManager::SdchErrorRecovery( @@ -249,10 +233,6 @@ Filter::Filter(const FilterContext& filter_context) stream_buffer_size_(0), next_stream_data_(NULL), stream_data_len_(0), - url_(), - connect_time_(), - was_cached_(false), - mime_type_(), next_filter_(NULL), last_status_(FILTER_NEED_MORE_DATA), filter_context_(filter_context) { @@ -261,7 +241,7 @@ Filter::Filter(const FilterContext& filter_context) Filter::~Filter() {} bool Filter::InitBuffer() { - int buffer_size = filter_context_.GetInputStreambufferSize(); + int buffer_size = filter_context_.GetInputStreamBufferSize(); DCHECK(buffer_size > 0); if (buffer_size <= 0 || stream_buffer()) return false; @@ -365,22 +345,3 @@ bool Filter::FlushStreamBuffer(int stream_data_len) { stream_data_len_ = stream_data_len; return true; } - -void Filter::SetURL(const GURL& url) { - url_ = url; - if (next_filter_.get()) - next_filter_->SetURL(url); -} - -void Filter::SetMimeType(const std::string& mime_type) { - mime_type_ = mime_type; - if (next_filter_.get()) - next_filter_->SetMimeType(mime_type); -} - -void Filter::SetConnectTime(const base::Time& time, bool was_cached) { - connect_time_ = time; - was_cached_ = was_cached; - if (next_filter_.get()) - next_filter_->SetConnectTime(time, was_cached_); -} diff --git a/net/base/filter.h b/net/base/filter.h index 7599571..504093d 100644 --- a/net/base/filter.h +++ b/net/base/filter.h @@ -41,13 +41,18 @@ //------------------------------------------------------------------------------ // Define an interface class that allows access to contextual information -// supplied by the owner of this filter. +// supplied by the owner of this filter. In the case where there are a chain of +// filters, there is only one owner of all the chained filters, and that context +// is passed to the constructor of all those filters. To be clear, the context +// does NOT reflect the position in a chain, or the fact that there are prior +// or later filters in a chain. class FilterContext { public: - FilterContext() {}; - virtual ~FilterContext() {}; + virtual ~FilterContext() {} // What mime type was specified in the header for this data? + // Only makes senses for some types of contexts, and returns false + // when not applicable. virtual bool GetMimeType(std::string* mime_type) const = 0; // What URL was used to access this data? @@ -60,16 +65,19 @@ class FilterContext { // Is data supplied from cache, or fresh across the net? virtual bool IsCachedContent() const = 0; - // TODO(jar): We could use flags, defined by callers, rather than naming a - // protocol here in the base class. // Was this data flagged as a response to a request with an SDCH dictionary? virtual bool IsSdchResponse() const = 0; - // What is the desirable input buffer size for these filters? - virtual int GetInputStreambufferSize() const = 0; + // How many bytes were read from the net or cache so far (and potentially + // pushed into a filter for processing)? + virtual int64 GetByteReadCount() const = 0; - private: - DISALLOW_COPY_AND_ASSIGN(FilterContext); + // What is the desirable input buffer size for these filters? + // This value is currently supplied by the context, and is constant for all + // filters, even when they are part of a chain of filters. (i.e., we currently + // don't change the input buffer sizes for a linked chain of filters, and the + // buffer size for input to all filters in a chain is this one constant). + virtual int GetInputStreamBufferSize() const = 0; }; //------------------------------------------------------------------------------ @@ -151,14 +159,6 @@ class Filter { // The function returns true if success, and false otherwise. bool FlushStreamBuffer(int stream_data_len); - void SetURL(const GURL& url); - const GURL& url() const { return url_; } - - void SetMimeType(const std::string& mime_type); - const std::string& mime_type() const { return mime_type_; } - - void SetConnectTime(const base::Time& time, bool was_cached); - // Translate the text of a filter name (from Content-Encoding header) into a // FilterType. static FilterType ConvertEncodingToType(const std::string& filter_type); @@ -170,8 +170,7 @@ class Filter { // encodings. These fixups require context, which includes whether this // response was made to an SDCH request (i.e., an available dictionary was // advertised in the GET), as well as the mime type of the content. - static void FixupEncodingTypes(bool is_sdch_response, - const std::string& mime_type, + static void FixupEncodingTypes(const FilterContext& filter_context, std::vector* encoding_types); protected: explicit Filter(const FilterContext& filter_context); @@ -193,25 +192,9 @@ class Filter { // Copy pre-filter data directly to destination buffer without decoding. FilterStatus CopyOut(char* dest_buffer, int* dest_len); - // Allocates and initializes stream_buffer_ based on filter_context_. - // Establishes a buffer large enough to handle the amount specified in - // filter_context_.GetInputStreambufferSize(). - bool InitBuffer(); - - // A factory helper for creating filters for within a chain of potentially - // multiple encodings. If a chain of filters is created, then this may be - // called multiple times during the filter creation process. In most simple - // cases, this is only called once. Returns NULL and cleans up (deleting - // filter_list) if a new filter can't be constructed. - static Filter* PrependNewFilter(FilterType type_id, - const FilterContext& filter_context, - Filter* filter_list); - FilterStatus last_status() const { return last_status_; } - base::Time connect_time() const { return connect_time_; } - - bool was_cached() const { return was_cached_; } + const FilterContext& filter_context() const { return filter_context_; } // Buffer to hold the data to be filtered (the input queue). scoped_refptr stream_buffer_; @@ -225,26 +208,24 @@ class Filter { // Total number of remaining chars in stream_buffer_ to be filtered. int stream_data_len_; - private: // TODO(jar): Make more data private by moving this up higher. - // The URL that is currently being filtered. - // This is used by SDCH filters which need to restrict use of a dictionary to - // a specific URL or path. - GURL url_; + private: + // A factory helper for creating filters for within a chain of potentially + // multiple encodings. If a chain of filters is created, then this may be + // called multiple times during the filter creation process. In most simple + // cases, this is only called once. Returns NULL and cleans up (deleting + // filter_list) if a new filter can't be constructed. + static Filter* PrependNewFilter(FilterType type_id, + const FilterContext& filter_context, + Filter* filter_list); - // To facilitate histogramming by individual filters, we store the connect - // time for the corresponding HTTP transaction, as well as whether this time - // was recalled from a cached entry. - base::Time connect_time_; - bool was_cached_; + // Allocates and initializes stream_buffer_ based on filter_context_. + // Establishes a buffer large enough to handle the amount specified in + // filter_context_.GetInputStreamBufferSize(). + bool InitBuffer(); // Helper function to empty our output into the next filter's input. void PushDataIntoNextFilter(); - // To facilitate error recovery in SDCH filters, allow filter to know if - // content is text/html by checking within this mime type (SDCH filter may - // do a meta-refresh via html). - std::string mime_type_; - // An optional filter to process output from this filter. scoped_ptr next_filter_; // Remember what status or local filter last returned so we can better handle diff --git a/net/base/filter_unittest.cc b/net/base/filter_unittest.cc index 1d6ae6e..999a1e6 100644 --- a/net/base/filter_unittest.cc +++ b/net/base/filter_unittest.cc @@ -3,6 +3,7 @@ // found in the LICENSE file. #include "net/base/filter.h" +#include "net/base/filter_unittest.h" #include "testing/gtest/include/gtest/gtest.h" class FilterTest : public testing::Test { @@ -42,8 +43,11 @@ TEST(FilterTest, ContentTypeId) { // Check various fixups that modify content encoding lists. TEST(FilterTest, ApacheGzip) { + const int kInputBufferSize(100); + MockFilterContext filter_context(kInputBufferSize); + filter_context.SetSdchResponse(false); + // Check that redundant gzip mime type removes only solo gzip encoding. - const bool is_sdch_response = false; const std::string kGzipMime1("application/x-gzip"); const std::string kGzipMime2("application/gzip"); const std::string kGzipMime3("application/x-gunzip"); @@ -52,45 +56,54 @@ TEST(FilterTest, ApacheGzip) { // First show it removes the gzip, given any gzip style mime type. encoding_types.clear(); encoding_types.push_back(Filter::FILTER_TYPE_GZIP); - Filter::FixupEncodingTypes(is_sdch_response, kGzipMime1, &encoding_types); + filter_context.SetMimeType(kGzipMime1); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_TRUE(encoding_types.empty()); encoding_types.clear(); encoding_types.push_back(Filter::FILTER_TYPE_GZIP); - Filter::FixupEncodingTypes(is_sdch_response, kGzipMime2, &encoding_types); + filter_context.SetMimeType(kGzipMime2); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_TRUE(encoding_types.empty()); encoding_types.clear(); encoding_types.push_back(Filter::FILTER_TYPE_GZIP); - Filter::FixupEncodingTypes(is_sdch_response, kGzipMime3, &encoding_types); + filter_context.SetMimeType(kGzipMime3); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_TRUE(encoding_types.empty()); // Check to be sure it doesn't remove everything when it has such a type. encoding_types.clear(); encoding_types.push_back(Filter::FILTER_TYPE_SDCH); - Filter::FixupEncodingTypes(is_sdch_response, kGzipMime1, &encoding_types); + filter_context.SetMimeType(kGzipMime1); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_EQ(1U, encoding_types.size()); EXPECT_EQ(Filter::FILTER_TYPE_SDCH, encoding_types.front()); // Check to be sure that gzip can survive with other mime types. encoding_types.clear(); encoding_types.push_back(Filter::FILTER_TYPE_GZIP); - Filter::FixupEncodingTypes(is_sdch_response, "other/mime", &encoding_types); + filter_context.SetMimeType("other/mime"); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_EQ(1U, encoding_types.size()); EXPECT_EQ(Filter::FILTER_TYPE_GZIP, encoding_types.front()); } TEST(FilterTest, SdchEncoding) { // Handle content encodings including SDCH. - const bool is_sdch_response = true; const std::string kTextHtmlMime("text/html"); + const int kInputBufferSize(100); + MockFilterContext filter_context(kInputBufferSize); + filter_context.SetSdchResponse(true); + std::vector encoding_types; // Check for most common encoding, and verify it survives unchanged. encoding_types.clear(); encoding_types.push_back(Filter::FILTER_TYPE_SDCH); encoding_types.push_back(Filter::FILTER_TYPE_GZIP); - Filter::FixupEncodingTypes(is_sdch_response, kTextHtmlMime, &encoding_types); + filter_context.SetMimeType(kTextHtmlMime); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_EQ(2U, encoding_types.size()); EXPECT_EQ(Filter::FILTER_TYPE_SDCH, encoding_types[0]); EXPECT_EQ(Filter::FILTER_TYPE_GZIP, encoding_types[1]); @@ -99,7 +112,8 @@ TEST(FilterTest, SdchEncoding) { encoding_types.clear(); encoding_types.push_back(Filter::FILTER_TYPE_SDCH); encoding_types.push_back(Filter::FILTER_TYPE_GZIP); - Filter::FixupEncodingTypes(is_sdch_response, "other/type", &encoding_types); + filter_context.SetMimeType("other/type"); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_EQ(2U, encoding_types.size()); EXPECT_EQ(Filter::FILTER_TYPE_SDCH, encoding_types[0]); EXPECT_EQ(Filter::FILTER_TYPE_GZIP, encoding_types[1]); @@ -107,7 +121,7 @@ TEST(FilterTest, SdchEncoding) { // Solo SDCH is extended to include optional gunzip. encoding_types.clear(); encoding_types.push_back(Filter::FILTER_TYPE_SDCH); - Filter::FixupEncodingTypes(is_sdch_response, "other/type", &encoding_types); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_EQ(2U, encoding_types.size()); EXPECT_EQ(Filter::FILTER_TYPE_SDCH, encoding_types[0]); EXPECT_EQ(Filter::FILTER_TYPE_GZIP_HELPING_SDCH, encoding_types[1]); @@ -115,13 +129,17 @@ TEST(FilterTest, SdchEncoding) { TEST(FilterTest, MissingSdchEncoding) { // Handle interesting case where entire SDCH encoding assertion "got lost." - const bool is_sdch_response = true; const std::string kTextHtmlMime("text/html"); + const int kInputBufferSize(100); + MockFilterContext filter_context(kInputBufferSize); + filter_context.SetSdchResponse(true); + std::vector encoding_types; // Loss of encoding, but it was an SDCH response with html type. encoding_types.clear(); - Filter::FixupEncodingTypes(is_sdch_response, kTextHtmlMime, &encoding_types); + filter_context.SetMimeType(kTextHtmlMime); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_EQ(2U, encoding_types.size()); EXPECT_EQ(Filter::FILTER_TYPE_SDCH_POSSIBLE, encoding_types[0]); EXPECT_EQ(Filter::FILTER_TYPE_GZIP_HELPING_SDCH, encoding_types[1]); @@ -132,15 +150,16 @@ TEST(FilterTest, MissingSdchEncoding) { // coded the fixup defensively (scanning for a prefix of "text/html", so this // is an example which could survive such confusion in the caller). encoding_types.clear(); - Filter::FixupEncodingTypes(is_sdch_response, "text/html; charset=UTF-8", - &encoding_types); + filter_context.SetMimeType("text/html; charset=UTF-8"); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_EQ(2U, encoding_types.size()); EXPECT_EQ(Filter::FILTER_TYPE_SDCH_POSSIBLE, encoding_types[0]); EXPECT_EQ(Filter::FILTER_TYPE_GZIP_HELPING_SDCH, encoding_types[1]); // No encoding, but it was an SDCH response with non-html type. encoding_types.clear(); - Filter::FixupEncodingTypes(is_sdch_response, "other/mime", &encoding_types); + filter_context.SetMimeType("other/mime"); + Filter::FixupEncodingTypes(filter_context, &encoding_types); EXPECT_EQ(2U, encoding_types.size()); EXPECT_EQ(Filter::FILTER_TYPE_SDCH_POSSIBLE, encoding_types[0]); EXPECT_EQ(Filter::FILTER_TYPE_GZIP_HELPING_SDCH, encoding_types[1]); diff --git a/net/base/filter_unittest.h b/net/base/filter_unittest.h index 17a09ad..cf0e13d 100644 --- a/net/base/filter_unittest.h +++ b/net/base/filter_unittest.h @@ -26,7 +26,7 @@ class MockFilterContext : public FilterContext { void SetRequestTime(const base::Time time) { request_time_ = time; } void SetCached(bool is_cached) { is_cached_content_ = is_cached; } void SetSdchResponse(bool is_sdch_response) { - is_sdch_response = is_sdch_response; + is_sdch_response_ = is_sdch_response; } virtual bool GetMimeType(std::string* mime_type) const { @@ -52,8 +52,11 @@ class MockFilterContext : public FilterContext { // Was this data flagged as a response to a request with an SDCH dictionary? virtual bool IsSdchResponse() const { return is_sdch_response_; } + // How many bytes were fed to filter(s) so far? + virtual int64 GetByteReadCount() const { return 0; } + // What is the desirable input buffer size for these filters? - virtual int GetInputStreambufferSize() const { return buffer_size_; } + virtual int GetInputStreamBufferSize() const { return buffer_size_; } private: diff --git a/net/base/sdch_filter.cc b/net/base/sdch_filter.cc index 5040395..fbe6a51 100644 --- a/net/base/sdch_filter.cc +++ b/net/base/sdch_filter.cc @@ -25,10 +25,19 @@ SdchFilter::SdchFilter(const FilterContext& filter_context) dest_buffer_excess_index_(0), source_bytes_(0), output_bytes_(0), - possible_pass_through_(false) { + possible_pass_through_(false), + connect_time_(filter_context.GetRequestTime()), + was_cached_(filter_context.IsCachedContent()) { + bool success = filter_context.GetMimeType(&mime_type_); + DCHECK(success); + success = filter_context.GetURL(&url_); + DCHECK(success); } SdchFilter::~SdchFilter() { + // All code here is for gathering stats, and can be removed when SDCH is + // considered stable. + static int filter_use_count = 0; ++filter_use_count; if (META_REFRESH_RECOVERY == decoding_status_) { @@ -42,77 +51,114 @@ SdchFilter::~SdchFilter() { // Make it possible for the user to hit reload, and get non-sdch content. // Note this will "wear off" quickly enough, and is just meant to assure // in some rare case that the user is not stuck. - SdchManager::BlacklistDomain(url()); + SdchManager::BlacklistDomain(url_); + UMA_HISTOGRAM_COUNTS("Sdch.PartialBytesIn", + static_cast(filter_context().GetByteReadCount())); + UMA_HISTOGRAM_COUNTS("Sdch.PartialVcdiffIn", source_bytes_); + UMA_HISTOGRAM_COUNTS("Sdch.PartialVcdiffOut", output_bytes_); } } if (!dest_buffer_excess_.empty()) { // Filter chaining error, or premature teardown. SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT); + UMA_HISTOGRAM_COUNTS("Sdch.UnflushedBytesIn", + static_cast(filter_context().GetByteReadCount())); + UMA_HISTOGRAM_COUNTS("Sdch.UnflushedVcdiffIn", source_bytes_); + UMA_HISTOGRAM_COUNTS("Sdch.UnflushedVcdiffOut", output_bytes_); } - if (was_cached()) { + if (was_cached_) { // Not a real error, but it is useful to have this tally. // TODO(jar): Remove this stat after SDCH stability is validated. SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED); - } else if (base::Time() == connect_time() - || read_times_.empty()) { + return; // We don't need timing stats, and we aready got ratios. + } + + if (base::Time() == connect_time_ || read_times_.empty()) { // Not a real error, but it is useful to have this tally. // TODO(jar): Remove this stat after SDCH stability is validated. SdchManager::SdchErrorRecovery(SdchManager::MISSING_TIME_STATS); - } else { - base::TimeDelta duration = read_times_.back() - connect_time(); - // We clip our logging at 10 minutes to prevent anamolous data from being - // considered (per suggestion from Jake Brutlag). - if (10 >= duration.InMinutes()) { - if (DECODING_IN_PROGRESS == decoding_status_) { - UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_Latency_F", duration, - base::TimeDelta::FromMilliseconds(20), - base::TimeDelta::FromMinutes(10), 100); - UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_1st_To_Last", - read_times_.back() - read_times_[0], - base::TimeDelta::FromMilliseconds(20), - base::TimeDelta::FromMinutes(10), 100); - if (read_times_.size() > 3) { - UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_3rd_To_4th", - read_times_[3] - read_times_[2], - base::TimeDelta::FromMilliseconds(10), - base::TimeDelta::FromSeconds(3), 100); - UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_2nd_To_3rd", - read_times_[2] - read_times_[1], - base::TimeDelta::FromMilliseconds(10), - base::TimeDelta::FromSeconds(3), 100); - } - UMA_HISTOGRAM_COUNTS_100("Sdch.Network_Decode_Reads", - read_times_.size()); - UMA_HISTOGRAM_COUNTS("Sdch.Network_Decode_Bytes_Read", output_bytes_); - } else if (PASS_THROUGH == decoding_status_) { - UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_Latency_F", - duration, - base::TimeDelta::FromMilliseconds(20), - base::TimeDelta::FromMinutes(10), 100); - UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_1st_To_Last", - read_times_.back() - read_times_[0], - base::TimeDelta::FromMilliseconds(20), - base::TimeDelta::FromMinutes(10), 100); - if (read_times_.size() > 3) { - UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_3rd_To_4th", - read_times_[3] - read_times_[2], - base::TimeDelta::FromMilliseconds(10), - base::TimeDelta::FromSeconds(3), 100); - UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_2nd_To_3rd", - read_times_[2] - read_times_[1], - base::TimeDelta::FromMilliseconds(10), - base::TimeDelta::FromSeconds(3), 100); - } - UMA_HISTOGRAM_COUNTS_100("Sdch.Network_Pass-through_Reads", - read_times_.size()); - } - } + UMA_HISTOGRAM_COUNTS("Sdch.MissingTimeBytesIn", + static_cast(filter_context().GetByteReadCount())); + UMA_HISTOGRAM_COUNTS("Sdch.MissingTimeVcdiffIn", source_bytes_); + return; + } + + base::TimeDelta duration = read_times_.back() - connect_time_; + // We clip our logging at 10 minutes to prevent anamolous data from being + // considered (per suggestion from Jake Brutlag). + if (10 < duration.InMinutes()) { + SdchManager::SdchErrorRecovery(SdchManager::OVER_10_MINUTES); + return; } - if (dictionary_) - dictionary_->Release(); + switch (decoding_status_) { + case DECODING_IN_PROGRESS: { + UMA_HISTOGRAM_PERCENTAGE("Sdch.Network_Decode_Ratio",static_cast( + (filter_context().GetByteReadCount() * 100) / output_bytes_)); + UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_Latency_F", duration, + base::TimeDelta::FromMilliseconds(20), + base::TimeDelta::FromMinutes(10), 100); + UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_1st_To_Last", + read_times_.back() - read_times_[0], + base::TimeDelta::FromMilliseconds(20), + base::TimeDelta::FromMinutes(10), 100); + if (read_times_.size() > 3) { + UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_3rd_To_4th", + read_times_[3] - read_times_[2], + base::TimeDelta::FromMilliseconds(10), + base::TimeDelta::FromSeconds(3), 100); + UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_2nd_To_3rd", + read_times_[2] - read_times_[1], + base::TimeDelta::FromMilliseconds(10), + base::TimeDelta::FromSeconds(3), 100); + } + UMA_HISTOGRAM_COUNTS_100("Sdch.Network_Decode_Reads", + read_times_.size()); + UMA_HISTOGRAM_COUNTS("Sdch.Network_Decode_Bytes_Read", output_bytes_); + return; + } + case PASS_THROUGH: { + UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_Latency_F", + duration, + base::TimeDelta::FromMilliseconds(20), + base::TimeDelta::FromMinutes(10), 100); + UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_1st_To_Last", + read_times_.back() - read_times_[0], + base::TimeDelta::FromMilliseconds(20), + base::TimeDelta::FromMinutes(10), 100); + if (read_times_.size() > 3) { + UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_3rd_To_4th", + read_times_[3] - read_times_[2], + base::TimeDelta::FromMilliseconds(10), + base::TimeDelta::FromSeconds(3), 100); + UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_2nd_To_3rd", + read_times_[2] - read_times_[1], + base::TimeDelta::FromMilliseconds(10), + base::TimeDelta::FromSeconds(3), 100); + } + UMA_HISTOGRAM_COUNTS_100("Sdch.Network_Pass-through_Reads", + read_times_.size()); + return; + } + case DECODING_UNINITIALIZED: { + SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED); + return; + } + case WAITING_FOR_DICTIONARY_SELECTION: { + SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY); + return; + } + case DECODING_ERROR: { + SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR); + return; + } + case META_REFRESH_RECOVERY: { + // Already accounted for when set. + return; + } + } // end of switch. } bool SdchFilter::InitDecoding(Filter::FilterType filter_type) { @@ -181,9 +227,9 @@ Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, // With very low probability, random garbage data looked like a // dictionary specifier (8 ASCII characters followed by a null), but // that is sufficiently unlikely that we ignore it. - if (std::string::npos == mime_type().find_first_of("text/html")) { - SdchManager::BlacklistDomainForever(url()); - if (was_cached()) + if (std::string::npos == mime_type_.find_first_of("text/html")) { + SdchManager::BlacklistDomainForever(url_); + if (was_cached_) SdchManager::SdchErrorRecovery( SdchManager::CACHED_META_REFRESH_UNSUPPORTED); else @@ -193,7 +239,7 @@ Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, } // HTML content means we can issue a meta-refresh, and get the content // again, perhaps without SDCH (to be safe). - if (was_cached()) { + if (was_cached_) { // Cached content is probably a startup tab, so we'll just get fresh // content and try again, without disabling sdch. SdchManager::SdchErrorRecovery( @@ -201,7 +247,7 @@ Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, } else { // Since it wasn't in the cache, we definately need at least some // period of blacklisting to get the correct content. - SdchManager::BlacklistDomain(url()); + SdchManager::BlacklistDomain(url_); SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY); } decoding_status_ = META_REFRESH_RECOVERY; @@ -290,16 +336,18 @@ Filter::FilterStatus SdchFilter::InitializeDictionary() { else next_stream_data_ = NULL; - DCHECK(!dictionary_); + DCHECK(!dictionary_.get()); dictionary_hash_is_plausible_ = true; // Assume plausible, but check. + + SdchManager::Dictionary* dictionary = NULL; if ('\0' == dictionary_hash_[kServerIdLength - 1]) SdchManager::Global()->GetVcdiffDictionary(std::string(dictionary_hash_, 0, kServerIdLength - 1), - url(), &dictionary_); + url_, &dictionary); else dictionary_hash_is_plausible_ = false; - if (!dictionary_) { + if (!dictionary) { DCHECK(dictionary_hash_.size() == kServerIdLength); // Since dictionary was not found, check to see if hash was even plausible. for (size_t i = 0; i < kServerIdLength - 1; ++i) { @@ -316,7 +364,7 @@ Filter::FilterStatus SdchFilter::InitializeDictionary() { decoding_status_ = DECODING_ERROR; return FILTER_ERROR; } - dictionary_->AddRef(); + dictionary_ = dictionary; vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder); vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(), dictionary_->text().size()); diff --git a/net/base/sdch_filter.h b/net/base/sdch_filter.h index c8cf7f4..cbb3491 100644 --- a/net/base/sdch_filter.h +++ b/net/base/sdch_filter.h @@ -84,9 +84,10 @@ class SdchFilter : public Filter { // attempted. bool dictionary_hash_is_plausible_; - // We hold an in-memory copy of the dictionary during the entire decoding. - // The char* data is embedded in a RefCounted dictionary_. - SdchManager::Dictionary* dictionary_; + // We hold an in-memory copy of the dictionary during the entire decoding, as + // it is used directly by the VC-DIFF decoding system. + // That char* data is part of the dictionary_ we hold a reference to. + scoped_refptr dictionary_; // The decoder may demand a larger output buffer than the target of // ReadFilteredData so we buffer the excess output between calls. @@ -109,6 +110,21 @@ class SdchFilter : public Filter { // an applicable dictionary can't be found. bool possible_pass_through_; + // The URL that is currently being filtered. + // This is used to restrict use of a dictionary to a specific URL or path. + GURL url_; + + // To facilitate histogramming by individual filters, we store the connect + // time for the corresponding HTTP transaction, as well as whether this time + // was recalled from a cached entry. The time is approximate, and may be + // bogus if the data was gotten from cache (i.e., it may be LOOOONG ago). + const base::Time connect_time_; + const bool was_cached_; + + // To facilitate error recovery, allow filter to know if content is text/html + // by checking within this mime type (we may do a meta-refresh via html). + std::string mime_type_; + DISALLOW_COPY_AND_ASSIGN(SdchFilter); }; diff --git a/net/base/sdch_filter_unittest.cc b/net/base/sdch_filter_unittest.cc index 9f185cb..6f3ee62 100644 --- a/net/base/sdch_filter_unittest.cc +++ b/net/base/sdch_filter_unittest.cc @@ -747,11 +747,14 @@ TEST_F(SdchFilterTest, DefaultGzipIfSdch) { // System should automatically add the missing (optional) gzip. std::vector filter_types; filter_types.push_back(Filter::FILTER_TYPE_SDCH); - Filter::FixupEncodingTypes(true, "anything/mime", &filter_types); - // First try with a large buffer (larger than test input, or compressed data). const int kInputBufferSize(100); MockFilterContext filter_context(kInputBufferSize); + filter_context.SetMimeType("anything/mime"); + filter_context.SetSdchResponse(true); + Filter::FixupEncodingTypes(filter_context, &filter_types); + + // First try with a large buffer (larger than test input, or compressed data). filter_context.SetURL(url); scoped_ptr filter(Filter::Factory(filter_types, filter_context)); diff --git a/net/base/sdch_manager.h b/net/base/sdch_manager.h index e6ad7a0..3c585c3 100644 --- a/net/base/sdch_manager.h +++ b/net/base/sdch_manager.h @@ -119,7 +119,7 @@ class SdchManager { META_REFRESH_UNSUPPORTED = 74, // Unrecoverable error. CACHED_META_REFRESH_UNSUPPORTED = 75, // As above, but pulled from cache. PASSING_THROUGH_NON_SDCH = 76, // Non-html tagged as sdch but malformed. - INCOMPLETE_SDCH_CONTENT = 77, // Last window was not completely decoded. + INCOMPLETE_SDCH_CONTENT = 77, // Last window was not completely decoded. // Common decoded recovery methods. @@ -130,9 +130,13 @@ class SdchManager { // (i.e., be able to be sure all dictionary advertisements are accounted // for). - UNFLUSHED_CONTENT = 90, // Possible error in filter chaining. + UNFLUSHED_CONTENT = 90, // Possible error in filter chaining. MISSING_TIME_STATS = 91, // Should never happen. - CACHE_DECODED = 92, // Hence there are no timing stats recorded. + CACHE_DECODED = 92, // No timing stats recorded. + OVER_10_MINUTES = 93, // No timing stats will be recorded. + UNINITIALIZED = 94, // Filter never even got initialized. + PRIOR_TO_DICTIONARY = 95, // We hadn't even parsed a dictionary selector. + DECODE_ERROR = 96, // Something went wrong during decode. MAX_PROBLEM_CODE // Used to bound histogram. }; diff --git a/net/url_request/url_request_http_job.cc b/net/url_request/url_request_http_job.cc index 657af1b..cf380de 100644 --- a/net/url_request/url_request_http_job.cc +++ b/net/url_request/url_request_http_job.cc @@ -204,10 +204,7 @@ bool URLRequestHttpJob::GetContentEncodings( } if (!encoding_types->empty()) { - std::string mime_type; - GetMimeType(&mime_type); - // TODO(jar): Need to change this call to use the FilterContext interfaces. - Filter::FixupEncodingTypes(IsSdchResponse(), mime_type, encoding_types); + Filter::FixupEncodingTypes(*this, encoding_types); } return !encoding_types->empty(); } diff --git a/net/url_request/url_request_job.cc b/net/url_request/url_request_job.cc index 2873ae2..4075ede 100644 --- a/net/url_request/url_request_job.cc +++ b/net/url_request/url_request_job.cc @@ -28,7 +28,8 @@ URLRequestJob::URLRequestJob(URLRequest* request) read_buffer_(NULL), read_buffer_len_(0), has_handled_response_(false), - expected_content_size_(-1) { + expected_content_size_(-1), + filter_input_byte_count_(0) { is_profiling_ = request->enable_profiling(); if (is_profiling()) { metrics_.reset(new URLRequestJobMetrics()); @@ -86,6 +87,10 @@ void URLRequestJob::ContinueDespiteLastError() { NOTREACHED(); } +int64 URLRequestJob::GetByteReadCount() const { + return filter_input_byte_count_ ; +} + bool URLRequestJob::GetURL(GURL* gurl) const { if (!request_) return false; @@ -505,6 +510,7 @@ void URLRequestJob::RecordBytesRead(int bytes_read) { ++(metrics_->number_of_read_IO_); metrics_->total_bytes_read_ += bytes_read; } + filter_input_byte_count_ += bytes_read; g_url_request_job_tracker.OnBytesRead(this, bytes_read); } diff --git a/net/url_request/url_request_job.h b/net/url_request/url_request_job.h index 121519e..f99ffdf 100644 --- a/net/url_request/url_request_job.h +++ b/net/url_request/url_request_job.h @@ -195,10 +195,11 @@ class URLRequestJob : public base::RefCountedThreadSafe, // FilterContext methods: // These methods are not applicable to all connections. virtual bool GetMimeType(std::string* mime_type) const { return false; } + virtual int64 GetByteReadCount() const; virtual bool GetURL(GURL* gurl) const; virtual base::Time GetRequestTime() const; virtual bool IsCachedContent() const; - virtual int GetInputStreambufferSize() const { return kFilterBufSize; } + virtual int GetInputStreamBufferSize() const { return kFilterBufSize; } protected: // Notifies the job that headers have been received. @@ -315,6 +316,11 @@ class URLRequestJob : public base::RefCountedThreadSafe, // Expected content size int64 expected_content_size_; + // Total number of bytes read from network (or cache) and and typically handed + // to filter to process. Used to histogram compression ratios, and error + // recovery scenarios in filters. + int64 filter_input_byte_count_; + DISALLOW_COPY_AND_ASSIGN(URLRequestJob); }; diff --git a/net/url_request/url_request_job_metrics.cc b/net/url_request/url_request_job_metrics.cc index 69a0605..eea21fa 100644 --- a/net/url_request/url_request_job_metrics.cc +++ b/net/url_request/url_request_job_metrics.cc @@ -23,8 +23,9 @@ void URLRequestJobMetrics::AppendText(std::wstring* text) { TimeDelta elapsed = end_time_ - start_time_; StringAppendF(text, - L"; total bytes read = %d; read calls = %d; time = %lld ms;", - total_bytes_read_, number_of_read_IO_, elapsed.InMilliseconds()); + L"; total bytes read = %ld; read calls = %d; time = %lld ms;", + static_cast(total_bytes_read_), + number_of_read_IO_, elapsed.InMilliseconds()); if (success_) { text->append(L" success."); diff --git a/net/url_request/url_request_job_metrics.h b/net/url_request/url_request_job_metrics.h index 42108db..11ee288 100644 --- a/net/url_request/url_request_job_metrics.h +++ b/net/url_request/url_request_job_metrics.h @@ -34,7 +34,7 @@ class URLRequestJobMetrics { base::TimeTicks end_time_; // Total number of bytes the job reads from underline IO. - int total_bytes_read_; + int64 total_bytes_read_; // Number of IO read operations the job issues. int number_of_read_IO_; -- cgit v1.1