Hand craft an A/B test of SDCH compression

After we're sure we can do SDCH compression to a given URL, toss a 50-50 coin each time we have a chance to advertise SDCH, and either completely avoid advertisement, or advertise (including the dictionary). Histogram both compression download times, as well as the download times for the "completely avoid" case. http://crbug.com/11236 bug=11236 r=wtc,openvcdiff Review URL: http://codereview.chromium.org/100004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15010 0039d316-1c4b-4281-b951-d872f2087c98
author: jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-04-30 23:06:01 +0000
committer: jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-04-30 23:06:01 +0000
commit: 5b90b5d3473f3f846618092e79d4c5fcff7d3969 (patch)
tree: 2d8c6e9118f8062399e6a89103e212d78479938e /net/url_request
parent: 6165e2ab4af4c217e21c0ec167b7eced8fca491c (diff)
download: chromium_src-5b90b5d3473f3f846618092e79d4c5fcff7d3969.zip
chromium_src-5b90b5d3473f3f846618092e79d4c5fcff7d3969.tar.gz
chromium_src-5b90b5d3473f3f846618092e79d4c5fcff7d3969.tar.bz2
4 files changed, 280 insertions, 17 deletions
diff --git a/net/url_request/url_request_http_job.cc b/net/url_request/url_request_http_job.cc
index c0a2080..9c81e00 100644
--- a/net/url_request/url_request_http_job.cc
+++ b/net/url_request/url_request_http_job.cc
@@ -10,6 +10,7 @@
 #include "base/file_util.h"
 #include "base/file_version_info.h"
 #include "base/message_loop.h"
+#include "base/rand_util.h"
 #include "base/string_util.h"
 #include "net/base/cookie_monster.h"
 #include "net/base/filter.h"
@@ -63,10 +64,19 @@ URLRequestHttpJob::URLRequestHttpJob(URLRequest* request)
       ALLOW_THIS_IN_INITIALIZER_LIST(
           read_callback_(this, &URLRequestHttpJob::OnReadCompleted)),
       read_in_progress_(false),
-      context_(request->context()) {
+      context_(request->context()),
+      sdch_dictionary_advertised_(false),
+      sdch_test_activated_(false),
+      sdch_test_control_(false) {
 }
 
 URLRequestHttpJob::~URLRequestHttpJob() {
+  DCHECK(!sdch_test_control_ || !sdch_test_activated_);
+  if (sdch_test_control_)
+    RecordPacketStats(SDCH_EXPERIMENT_HOLDBACK);
+  if (sdch_test_activated_)
+    RecordPacketStats(SDCH_EXPERIMENT_DECODE);
+
   if (sdch_dictionary_url_.is_valid()) {
     // Prior to reaching the destructor, request_ has been set to a NULL
     // pointer, so request_->url() is no longer valid in the destructor, and we
@@ -215,8 +225,7 @@ bool URLRequestHttpJob::GetContentEncodings(
 }
 
 bool URLRequestHttpJob::IsSdchResponse() const {
-  return response_info_ &&
-      (request_info_.load_flags & net::LOAD_SDCH_DICTIONARY_ADVERTISED);
+  return sdch_dictionary_advertised_;
 }
 
 bool URLRequestHttpJob::IsRedirectResponse(GURL* location,
@@ -546,29 +555,54 @@ void URLRequestHttpJob::StartTransaction() {
 }
 
 void URLRequestHttpJob::AddExtraHeaders() {
+  // TODO(jar): Consider optimizing away SDCH advertising bytes when the URL is
+  // probably an img or such (and SDCH encoding is not likely).
+  bool advertise_sdch = SdchManager::Global() &&
+      SdchManager::Global()->IsInSupportedDomain(request_->url());
+  std::string avail_dictionaries;
+  if (advertise_sdch) {
+    SdchManager::Global()->GetAvailDictionaryList(request_->url(),
+                                                  &avail_dictionaries);
+
+    // The AllowLatencyExperiment() is only true if we've successfully done a
+    // full SDCH compression recently in this browser session for this host.
+    // Note that for this path, there might be no applicable dictionaries, and
+    // hence we can't participate in the experiment.
+    if (!avail_dictionaries.empty() &&
+        SdchManager::Global()->AllowLatencyExperiment(request_->url())) {
+      // We are participating in the test (or control), and hence we'll
+      // eventually record statistics via either SDCH_EXPERIMENT_DECODE or
+      // SDCH_EXPERIMENT_HOLDBACK, and we'll need some packet timing data.
+      EnablePacketCounting(kSdchPacketHistogramCount);
+      if (base::RandDouble() < .5) {
+        sdch_test_control_ = true;
+        advertise_sdch = false;
+      } else {
+        sdch_test_activated_ = true;
+      }
+    }
+  }
+
   // Supply Accept-Encoding headers first so that it is more likely that they
   // will be in the first transmitted packet.  This can sometimes make it easier
   // to filter and analyze the streams to assure that a proxy has not damaged
   // these headers.  Some proxies deliberately corrupt Accept-Encoding headers.
-  if (!SdchManager::Global() ||
-      !SdchManager::Global()->IsInSupportedDomain(request_->url())) {
+  if (!advertise_sdch) {
     // Tell the server what compression formats we support (other than SDCH).
     request_info_.extra_headers += "Accept-Encoding: gzip,deflate,bzip2\r\n";
   } else {
-    // Supply SDCH related headers, as well as accepting that encoding.
-    // Tell the server what compression formats we support.
+    // Include SDCH in acceptable list.
     request_info_.extra_headers += "Accept-Encoding: "
         "gzip,deflate,bzip2,sdch\r\n";
-
-    // TODO(jar): See if it is worth optimizing away these bytes when the URL is
-    // probably an img or such. (and SDCH encoding is not likely).
-    std::string avail_dictionaries;
-    SdchManager::Global()->GetAvailDictionaryList(request_->url(),
-                                                  &avail_dictionaries);
     if (!avail_dictionaries.empty()) {
       request_info_.extra_headers += "Avail-Dictionary: "
           + avail_dictionaries + "\r\n";
-      request_info_.load_flags |= net::LOAD_SDCH_DICTIONARY_ADVERTISED;
+      sdch_dictionary_advertised_ = true;
+      // Since we're tagging this transaction as advertising a dictionary, we'll
+      // definately employ an SDCH filter (or tentative sdch filter) when we get
+      // a response.  When done, we'll record histograms via SDCH_DECODE or
+      // SDCH_PASSTHROUGH.  Hence we need to record packet arrival times.
+      EnablePacketCounting(kSdchPacketHistogramCount);
     }
   }
 
diff --git a/net/url_request/url_request_http_job.h b/net/url_request/url_request_http_job.h
index 19ecdf86..57764ef 100644
--- a/net/url_request/url_request_http_job.h
+++ b/net/url_request/url_request_http_job.h
@@ -93,6 +93,18 @@ class URLRequestHttpJob : public URLRequestJob {
   // before us.
   scoped_refptr<URLRequestContext> context_;
 
+  // Indicated if an SDCH dictionary was advertised, and hence an SDCH
+  // compressed response is expected.  We use this to help detect (accidental?)
+  // proxy corruption of a response, which sometimes marks SDCH content as
+  // having no content encoding <oops>.
+  bool sdch_dictionary_advertised_;
+
+  // For SDCH latency experiments, when we are able to do SDCH, we may enable
+  // either an SDCH latency test xor a pass through test.  The following bools
+  // indicate what we decided on for this instance.
+  bool sdch_test_activated_;  // Advertising a dictionary for sdch.
+  bool sdch_test_control_;    // Not even accepting-content sdch.
+
   DISALLOW_COPY_AND_ASSIGN(URLRequestHttpJob);
 };
 
diff --git a/net/url_request/url_request_job.cc b/net/url_request/url_request_job.cc
index 544913d..0b005a5 100644
--- a/net/url_request/url_request_job.cc
+++ b/net/url_request/url_request_job.cc
@@ -30,7 +30,11 @@ URLRequestJob::URLRequestJob(URLRequest* request)
       read_buffer_len_(0),
       has_handled_response_(false),
       expected_content_size_(-1),
-      filter_input_byte_count_(0) {
+      packet_timing_enabled_(false),
+      filter_input_byte_count_(0),
+      bytes_observed_in_packets_(0),
+      max_packets_timed_(0),
+      observed_packet_count_(0) {
   load_flags_ = request_->load_flags();
   is_profiling_ = request->enable_profiling();
   if (is_profiling()) {
@@ -41,6 +45,10 @@ URLRequestJob::URLRequestJob(URLRequest* request)
 }
 
 URLRequestJob::~URLRequestJob() {
+  // Cause filter chain to be destroyed now, so that any histogram requests can
+  // be made before we are torn down.
+  filter_.reset(NULL);
+
   g_url_request_job_tracker.RemoveJob(this);
 }
 
@@ -94,7 +102,7 @@ void URLRequestJob::ContinueDespiteLastError() {
 }
 
 int64 URLRequestJob::GetByteReadCount() const {
-  return filter_input_byte_count_ ;
+  return filter_input_byte_count_;
 }
 
 bool URLRequestJob::GetURL(GURL* gurl) const {
@@ -522,6 +530,7 @@ void URLRequestJob::RecordBytesRead(int bytes_read) {
     metrics_->total_bytes_read_ += bytes_read;
   }
   filter_input_byte_count_ += bytes_read;
+  UpdatePacketReadTimes();  // Facilitate stats recording if it is active.
   g_url_request_job_tracker.OnBytesRead(this, bytes_read);
 }
 
@@ -537,3 +546,159 @@ void URLRequestJob::SetStatus(const URLRequestStatus &status) {
   if (request_)
     request_->set_status(status);
 }
+
+void URLRequestJob::UpdatePacketReadTimes() {
+  if (!packet_timing_enabled_)
+    return;
+
+  if (filter_input_byte_count_ <= bytes_observed_in_packets_) {
+    DCHECK(filter_input_byte_count_ == bytes_observed_in_packets_);
+    return;  // No new bytes have arrived.
+  }
+
+  if (!bytes_observed_in_packets_)
+    request_time_snapshot_ = GetRequestTime();
+
+  final_packet_time_ = base::Time::Now();
+  const size_t kTypicalPacketSize = 1430;
+  while (filter_input_byte_count_ > bytes_observed_in_packets_) {
+    ++observed_packet_count_;
+    if (max_packets_timed_ > packet_times_.size()) {
+      packet_times_.push_back(final_packet_time_);
+      DCHECK(static_cast<size_t>(observed_packet_count_) ==
+             packet_times_.size());
+    }
+    bytes_observed_in_packets_ += kTypicalPacketSize;
+  }
+  // Since packets may not be full, we'll remember the number of bytes we've
+  // accounted for in packets thus far.
+  bytes_observed_in_packets_ = filter_input_byte_count_;
+}
+
+void URLRequestJob::EnablePacketCounting(size_t max_packets_timed) {
+  if (max_packets_timed_ < max_packets_timed)
+    max_packets_timed_ = max_packets_timed;
+  packet_timing_enabled_ = true;
+}
+
+void URLRequestJob::RecordPacketStats(StatisticSelector statistic) const {
+  if (!packet_timing_enabled_ || (final_packet_time_ == base::Time()))
+    return;
+  base::TimeDelta duration = final_packet_time_ - request_time_snapshot_;
+  switch (statistic) {
+    case SDCH_DECODE: {
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_Latency_F_a", duration,
+                                  base::TimeDelta::FromMilliseconds(20),
+                                  base::TimeDelta::FromMinutes(10), 100);
+      UMA_HISTOGRAM_COUNTS_100("Sdch.Network_Decode_Packets_b",
+                               static_cast<int>(observed_packet_count_));
+      UMA_HISTOGRAM_COUNTS("Sdch.Network_Decode_Bytes_Processed_a",
+          static_cast<int>(bytes_observed_in_packets_));
+      if (packet_times_.empty())
+        return;
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_1st_To_Last_a",
+                                  final_packet_time_ - packet_times_[0],
+                                  base::TimeDelta::FromMilliseconds(20),
+                                  base::TimeDelta::FromMinutes(10), 100);
+
+      DCHECK(max_packets_timed_ >= kSdchPacketHistogramCount);
+      DCHECK(kSdchPacketHistogramCount > 4);
+      if (packet_times_.size() <= 4)
+        return;
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_1st_To_2nd_c",
+                                  packet_times_[1] - packet_times_[0],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_2nd_To_3rd_c",
+                                  packet_times_[2] - packet_times_[1],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_3rd_To_4th_c",
+                                  packet_times_[3] - packet_times_[2],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Decode_4th_To_5th_c",
+                                  packet_times_[4] - packet_times_[3],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      return;
+    }
+    case SDCH_PASSTHROUGH: {
+      // Despite advertising a dictionary, we handled non-sdch compressed
+      // content.
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_Latency_F_a",
+                                  duration,
+                                  base::TimeDelta::FromMilliseconds(20),
+                                  base::TimeDelta::FromMinutes(10), 100);
+      UMA_HISTOGRAM_COUNTS_100("Sdch.Network_Pass-through_Packets_b",
+                               observed_packet_count_);
+      if (packet_times_.empty())
+        return;
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_1st_To_Last_a",
+                                  final_packet_time_ - packet_times_[0],
+                                  base::TimeDelta::FromMilliseconds(20),
+                                  base::TimeDelta::FromMinutes(10), 100);
+      DCHECK(max_packets_timed_ >= kSdchPacketHistogramCount);
+      DCHECK(kSdchPacketHistogramCount > 4);
+      if (packet_times_.size() <= 4)
+        return;
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_1st_To_2nd_c",
+                                  packet_times_[1] - packet_times_[0],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_2nd_To_3rd_c",
+                                  packet_times_[2] - packet_times_[1],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_3rd_To_4th_c",
+                                  packet_times_[3] - packet_times_[2],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Network_Pass-through_4th_To_5th_c",
+                                  packet_times_[4] - packet_times_[3],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      return;
+    }
+
+    case SDCH_EXPERIMENT_DECODE: {
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Experiment_Decode",
+                                  duration,
+                                  base::TimeDelta::FromMilliseconds(20),
+                                  base::TimeDelta::FromMinutes(10), 100);
+      // We already provided interpacket histograms above in the SDCH_DECODE
+      // case, so we don't need them here.
+      return;
+    }
+    case SDCH_EXPERIMENT_HOLDBACK: {
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Experiment_Holdback",
+                                  duration,
+                                  base::TimeDelta::FromMilliseconds(20),
+                                  base::TimeDelta::FromMinutes(10), 100);
+      DCHECK(max_packets_timed_ >= kSdchPacketHistogramCount);
+      DCHECK(kSdchPacketHistogramCount > 4);
+      if (packet_times_.size() <= 4)
+        return;
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Experiment_Holdback_1st_To_2nd_c",
+                                  packet_times_[1] - packet_times_[0],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Experiment_Holdback_2nd_To_3rd_c",
+                                  packet_times_[2] - packet_times_[1],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Experiment_Holdback_3rd_To_4th_c",
+                                  packet_times_[3] - packet_times_[2],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      UMA_HISTOGRAM_CLIPPED_TIMES("Sdch.Experiment_Holdback_4th_To_5th_c",
+                                  packet_times_[4] - packet_times_[3],
+                                  base::TimeDelta::FromMilliseconds(1),
+                                  base::TimeDelta::FromSeconds(10), 100);
+      return;
+    }
+    default:
+      NOTREACHED();
+      return;
+  }
+}
diff --git a/net/url_request/url_request_job.h b/net/url_request/url_request_job.h
index 133af7e..5ecb6d0 100644
--- a/net/url_request/url_request_job.h
+++ b/net/url_request/url_request_job.h
@@ -10,6 +10,7 @@
 
 #include "base/ref_counted.h"
 #include "base/scoped_ptr.h"
+#include "base/time.h"
 #include "net/base/filter.h"
 #include "net/base/load_states.h"
 
@@ -31,6 +32,13 @@ class URLRequestJobMetrics;
 class URLRequestJob : public base::RefCountedThreadSafe<URLRequestJob>,
                       public FilterContext {
  public:
+  // When histogramming results related to SDCH and/or an SDCH latency test, the
+  // number of packets for which we need to record arrival times so as to
+  // calculate interpacket latencies.  We currently are only looking at the
+  // first few packets, as we're monitoring the impact of the initial TCP
+  // congestion window on stalling of transmissions.
+  static const size_t kSdchPacketHistogramCount = 5;
+
   explicit URLRequestJob(URLRequest* request);
   virtual ~URLRequestJob();
 
@@ -201,6 +209,7 @@ class URLRequestJob : public base::RefCountedThreadSafe<URLRequestJob>,
   virtual int64 GetByteReadCount() const;
   virtual int GetResponseCode() const { return -1; }
   virtual int GetInputStreamBufferSize() const { return kFilterBufSize; }
+  virtual void RecordPacketStats(StatisticSelector statistic) const;
 
  protected:
   // Notifies the job that headers have been received.
@@ -272,6 +281,10 @@ class URLRequestJob : public base::RefCountedThreadSafe<URLRequestJob>,
   // Contains IO performance measurement when profiling is enabled.
   scoped_ptr<URLRequestJobMetrics> metrics_;
 
+  // Facilitate histogramming by turning on packet counting.
+  // If called more than once, the largest value will be used.
+  void EnablePacketCounting(size_t max_packets_timed);
+
  private:
   // Size of filter input buffers used by this class.
   static const int kFilterBufSize;
@@ -290,11 +303,13 @@ class URLRequestJob : public base::RefCountedThreadSafe<URLRequestJob>,
   // have been read.
   void RecordBytesRead(int bytes_read);
 
- private:
   // Called to query whether there is data available in the filter to be read
   // out.
   bool FilterHasData();
 
+  // Record packet arrival times for possible use in histograms.
+  void UpdatePacketReadTimes();
+
   // Indicates that the job is done producing data, either it has completed
   // all the data or an error has been encountered. Set exclusively by
   // NotifyDone so that it is kept in sync with the request.
@@ -324,11 +339,48 @@ class URLRequestJob : public base::RefCountedThreadSafe<URLRequestJob>,
   // Expected content size
   int64 expected_content_size_;
 
+  //----------------------------------------------------------------------------
+  // Data used for statistics gathering in some instances.  This data is only
+  // used for histograms etc., and is not required.  It is optionally gathered
+  // based on the settings of several control variables.
+
+  // Enable recording of packet arrival times for histogramming.
+  bool packet_timing_enabled_;
+
+  // TODO(jar): improve the quality of the gathered info by gathering most times
+  // at a lower point in the network stack, assuring we have actual packet
+  // boundaries, rather than approximations.  Also note that input byte count
+  // as gathered here is post-SSL, and post-cache-fetch, and does not reflect
+  // true packet arrival times in such cases.
+
   // Total number of bytes read from network (or cache) and and typically handed
   // to filter to process.  Used to histogram compression ratios, and error
   // recovery scenarios in filters.
   int64 filter_input_byte_count_;
 
+  // The number of bytes that have been accounted for in packets (where some of
+  // those packets may possibly have had their time of arrival recorded).
+  int64 bytes_observed_in_packets_;
+
+  // Limit on the size of the array packet_times_.  This can be set to
+  // zero, and then no packet times will be gathered.
+  size_t max_packets_timed_;
+
+  // Arrival times for some of the first few packets.
+  std::vector<base::Time> packet_times_;
+
+  // The request time may not be available when we are being destroyed, so we
+  // snapshot it early on.
+  base::Time request_time_snapshot_;
+
+  // Since we don't save all packet times in packet_times_, we save the
+  // last time for use in histograms.
+  base::Time final_packet_time_;
+
+  // The count of the number of packets, some of which may not have been timed.
+  // We're ignoring overflow, as 1430 x 2^31 is a LOT of bytes.
+  int observed_packet_count_;
+
   DISALLOW_COPY_AND_ASSIGN(URLRequestJob);
 };
author	jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-04-30 23:06:01 +0000
committer	jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-04-30 23:06:01 +0000
commit	5b90b5d3473f3f846618092e79d4c5fcff7d3969 (patch)
tree	2d8c6e9118f8062399e6a89103e212d78479938e /net/url_request
parent	6165e2ab4af4c217e21c0ec167b7eced8fca491c (diff)
download	chromium_src-5b90b5d3473f3f846618092e79d4c5fcff7d3969.zip chromium_src-5b90b5d3473f3f846618092e79d4c5fcff7d3969.tar.gz chromium_src-5b90b5d3473f3f846618092e79d4c5fcff7d3969.tar.bz2