From a6c84f4dd8a40702b7f7e9edd8204bbdb4e52f8f Mon Sep 17 00:00:00 2001
From: "mef@chromium.org"
 <mef@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>
Date: Fri, 7 Jun 2013 20:39:38 +0000
Subject: Track DNS Server Success and Failure time and use it in next server
 selection.

BUG=110197

Review URL: https://chromiumcodereview.appspot.com/16357018

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@204915 0039d316-1c4b-4281-b951-d872f2087c98
---
 net/dns/dns_session.cc     | 150 +++++++++++++++++++++++++++++++++++++--------
 net/dns/dns_session.h      |  36 ++++++++---
 net/dns/dns_transaction.cc |  63 +++++++++----------
 3 files changed, 185 insertions(+), 64 deletions(-)

diff --git a/net/dns/dns_session.cc b/net/dns/dns_session.cc
index 6d8d0db..2e6d0c9 100644
--- a/net/dns/dns_session.cc
+++ b/net/dns/dns_session.cc
@@ -6,6 +6,7 @@
 
 #include "base/basictypes.h"
 #include "base/bind.h"
+#include "base/lazy_instance.h"
 #include "base/metrics/histogram.h"
 #include "base/metrics/sample_vector.h"
 #include "base/rand_util.h"
@@ -32,6 +33,40 @@ const size_t kRTTBucketCount = 100;
 const unsigned kRTOPercentile = 99;
 }  // namespace
 
+// Runtime statistics of DNS server.
+struct DnsSession::ServerStats {
+  ServerStats(base::TimeDelta rtt_estimate_param, RttBuckets* buckets)
+    : last_failure_count(0), rtt_estimate(rtt_estimate_param) {
+    rtt_histogram.reset(new base::SampleVector(buckets));
+  }
+
+  // Count of consecutive failures after last success.
+  int last_failure_count;
+
+  // Last time when server returned failure or timeout.
+  base::Time last_failure;
+  // Last time when server returned success.
+  base::Time last_success;
+
+  // Estimated RTT using moving average.
+  base::TimeDelta rtt_estimate;
+  // Estimated error in the above.
+  base::TimeDelta rtt_deviation;
+
+  // A histogram of observed RTT .
+  scoped_ptr<base::SampleVector> rtt_histogram;
+
+  DISALLOW_COPY_AND_ASSIGN(ServerStats);
+};
+
+// static
+base::LazyInstance<DnsSession::RttBuckets>::Leaky DnsSession::rtt_buckets_ =
+    LAZY_INSTANCE_INITIALIZER;
+
+DnsSession::RttBuckets::RttBuckets() : base::BucketRanges(kRTTBucketCount + 1) {
+  base::Histogram::InitializeBucketRanges(1, 5000, kRTTBucketCount, this);
+}
+
 DnsSession::SocketLease::SocketLease(scoped_refptr<DnsSession> session,
                                      unsigned server_index,
                                      scoped_ptr<DatagramClientSocket> socket)
@@ -49,33 +84,79 @@ DnsSession::DnsSession(const DnsConfig& config,
       socket_pool_(socket_pool.Pass()),
       rand_callback_(base::Bind(rand_int_callback, 0, kuint16max)),
       net_log_(net_log),
-      server_index_(0),
-      rtt_estimates_(config_.nameservers.size(), config_.timeout),
-      rtt_deviations_(config_.nameservers.size()),
-      rtt_buckets_(new base::BucketRanges(kRTTBucketCount + 1)) {
+      server_index_(0) {
   socket_pool_->Initialize(&config_.nameservers, net_log);
-
-  // TODO(mef): This could be done once per process lifetime.
-  base::Histogram::InitializeBucketRanges(1, 5000, kRTTBucketCount,
-                                          rtt_buckets_.get());
+  UMA_HISTOGRAM_CUSTOM_COUNTS(
+      "AsyncDNS.ServerCount", config_.nameservers.size(), 0, 10, 10);
   for (size_t i = 0; i < config_.nameservers.size(); ++i) {
-    rtt_histograms_.push_back(new base::SampleVector(rtt_buckets_.get()));
+    server_stats_.push_back(new ServerStats(config_.timeout,
+                                            rtt_buckets_.Pointer()));
   }
 }
 
-DnsSession::~DnsSession() {}
+DnsSession::~DnsSession() {
+  RecordServerStats();
+}
 
 int DnsSession::NextQueryId() const { return rand_callback_.Run(); }
 
-int DnsSession::NextFirstServerIndex() {
-  int index = server_index_;
+unsigned DnsSession::NextFirstServerIndex() {
+  unsigned index = NextGoodServerIndex(server_index_);
   if (config_.rotate)
     server_index_ = (server_index_ + 1) % config_.nameservers.size();
   return index;
 }
 
+unsigned DnsSession::NextGoodServerIndex(unsigned server_index) {
+  unsigned index = server_index;
+  base::Time oldest_server_failure(base::Time::Now());
+  unsigned oldest_server_failure_index = 0;
+
+  UMA_HISTOGRAM_BOOLEAN("AsyncDNS.ServerIsGood",
+                        server_stats_[server_index]->last_failure.is_null());
+
+  do {
+    base::Time cur_server_failure = server_stats_[index]->last_failure;
+    // If number of failures on this server doesn't exceed number of allowed
+    // attempts, return its index.
+    if (server_stats_[server_index]->last_failure_count < config_.attempts) {
+      return index;
+    }
+    // Track oldest failed server.
+    if (cur_server_failure < oldest_server_failure) {
+      oldest_server_failure = cur_server_failure;
+      oldest_server_failure_index = index;
+    }
+    index = (index + 1) % config_.nameservers.size();
+  } while (index != server_index);
+
+  // If we are here it means that there are no successful servers, so we have
+  // to use one that has failed oldest.
+  return oldest_server_failure_index;
+}
+
+void DnsSession::RecordServerFailure(unsigned server_index) {
+  UMA_HISTOGRAM_CUSTOM_COUNTS(
+      "AsyncDNS.ServerFailureIndex", server_index, 0, 10, 10);
+  ++(server_stats_[server_index]->last_failure_count);
+  server_stats_[server_index]->last_failure = base::Time::Now();
+}
+
+void DnsSession::RecordServerSuccess(unsigned server_index) {
+  if (server_stats_[server_index]->last_success.is_null()) {
+    UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresAfterNetworkChange",
+                           server_stats_[server_index]->last_failure_count);
+  } else {
+    UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresBeforeSuccess",
+                           server_stats_[server_index]->last_failure_count);
+  }
+  server_stats_[server_index]->last_failure_count = 0;
+  server_stats_[server_index]->last_failure = base::Time();
+  server_stats_[server_index]->last_success = base::Time::Now();
+}
+
 void DnsSession::RecordRTT(unsigned server_index, base::TimeDelta rtt) {
-  DCHECK_LT(server_index, rtt_histograms_.size());
+  DCHECK_LT(server_index, server_stats_.size());
 
   // For measurement, assume it is the first attempt (no backoff).
   base::TimeDelta timeout_jacobson = NextTimeoutFromJacobson(server_index, 0);
@@ -90,8 +171,8 @@ void DnsSession::RecordRTT(unsigned server_index, base::TimeDelta rtt) {
 
   // Jacobson/Karels algorithm for TCP.
   // Using parameters: alpha = 1/8, delta = 1/4, beta = 4
-  base::TimeDelta& estimate = rtt_estimates_[server_index];
-  base::TimeDelta& deviation = rtt_deviations_[server_index];
+  base::TimeDelta& estimate = server_stats_[server_index]->rtt_estimate;
+  base::TimeDelta& deviation = server_stats_[server_index]->rtt_deviation;
   base::TimeDelta current_error = rtt - estimate;
   estimate += current_error / 8;  // * alpha
   base::TimeDelta abs_error = base::TimeDelta::FromInternalValue(
@@ -99,7 +180,8 @@ void DnsSession::RecordRTT(unsigned server_index, base::TimeDelta rtt) {
   deviation += (abs_error - deviation) / 4;  // * delta
 
   // Histogram-based method.
-  rtt_histograms_[server_index]->Accumulate(rtt.InMilliseconds(), 1);
+  server_stats_[server_index]->rtt_histogram
+      ->Accumulate(rtt.InMilliseconds(), 1);
 }
 
 void DnsSession::RecordLostPacket(unsigned server_index, int attempt) {
@@ -111,10 +193,28 @@ void DnsSession::RecordLostPacket(unsigned server_index, int attempt) {
   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentHistogram", timeout_histogram);
 }
 
+void DnsSession::RecordServerStats() {
+  for (size_t index = 0; index < server_stats_.size(); ++index) {
+    if (server_stats_[index]->last_failure_count) {
+      if (server_stats_[index]->last_success.is_null()) {
+        UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresWithoutSuccess",
+                             server_stats_[index]->last_failure_count);
+      } else {
+        UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresAfterSuccess",
+                             server_stats_[index]->last_failure_count);
+      }
+    }
+  }
+}
+
+
 base::TimeDelta DnsSession::NextTimeout(unsigned server_index, int attempt) {
-  DCHECK_LT(server_index, rtt_histograms_.size());
+  DCHECK_LT(server_index, server_stats_.size());
 
   base::TimeDelta timeout = config_.timeout;
+  // If this server has not responded successfully, then don't wait too long.
+  if (server_stats_[server_index]->last_success.is_null())
+    return timeout;
 
   // The timeout doubles every full round (each nameserver once).
   unsigned num_backoffs = attempt / config_.nameservers.size();
@@ -155,10 +255,10 @@ void DnsSession::FreeSocket(unsigned server_index,
 
 base::TimeDelta DnsSession::NextTimeoutFromJacobson(unsigned server_index,
                                                     int attempt) {
-  DCHECK_LT(server_index, rtt_estimates_.size());
+  DCHECK_LT(server_index, server_stats_.size());
 
-  base::TimeDelta timeout =
-      rtt_estimates_[server_index] + 4 * rtt_deviations_[server_index];
+  base::TimeDelta timeout = server_stats_[server_index]->rtt_estimate +
+                            4 * server_stats_[server_index]->rtt_deviation;
 
   timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs));
 
@@ -171,23 +271,25 @@ base::TimeDelta DnsSession::NextTimeoutFromJacobson(unsigned server_index,
 
 base::TimeDelta DnsSession::NextTimeoutFromHistogram(unsigned server_index,
                                                      int attempt) {
-  DCHECK_LT(server_index, rtt_histograms_.size());
+  DCHECK_LT(server_index, server_stats_.size());
 
   COMPILE_ASSERT(std::numeric_limits<base::HistogramBase::Count>::is_signed,
                  histogram_base_count_assumed_to_be_signed);
 
   // Use fixed percentile of observed samples.
-  const base::SampleVector& samples = *rtt_histograms_[server_index];
+  const base::SampleVector& samples =
+      *server_stats_[server_index]->rtt_histogram;
+
   base::HistogramBase::Count total = samples.TotalCount();
   base::HistogramBase::Count remaining_count = kRTOPercentile * total / 100;
   size_t index = 0;
-  while (remaining_count > 0 && index < rtt_buckets_->size()) {
+  while (remaining_count > 0 && index < rtt_buckets_.Get().size()) {
     remaining_count -= samples.GetCountAtIndex(index);
     ++index;
   }
 
   base::TimeDelta timeout =
-      base::TimeDelta::FromMilliseconds(rtt_buckets_->range(index));
+      base::TimeDelta::FromMilliseconds(rtt_buckets_.Get().range(index));
 
   timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs));
 
diff --git a/net/dns/dns_session.h b/net/dns/dns_session.h
index e35ddb0..bced1e6 100644
--- a/net/dns/dns_session.h
+++ b/net/dns/dns_session.h
@@ -7,9 +7,11 @@
 
 #include <vector>
 
+#include "base/lazy_instance.h"
 #include "base/memory/ref_counted.h"
 #include "base/memory/scoped_ptr.h"
 #include "base/memory/scoped_vector.h"
+#include "base/metrics/bucket_ranges.h"
 #include "base/time.h"
 #include "net/base/net_export.h"
 #include "net/base/rand_callback.h"
@@ -67,7 +69,19 @@ class NET_EXPORT_PRIVATE DnsSession
   int NextQueryId() const;
 
   // Return the index of the first configured server to use on first attempt.
-  int NextFirstServerIndex();
+  unsigned NextFirstServerIndex();
+
+  // Start with |server_index| and find the index of the next known good server
+  // to use on this attempt. Returns |server_index| if this server has no
+  // recorded failures, or if there are no other servers that have not failed
+  // or have failed longer time ago.
+  unsigned NextGoodServerIndex(unsigned server_index);
+
+  // Record that server failed to respond (due to SRV_FAIL or timeout).
+  void RecordServerFailure(unsigned server_index);
+
+  // Record that server responded successfully.
+  void RecordServerSuccess(unsigned server_index);
 
   // Record how long it took to receive a response from the server.
   void RecordRTT(unsigned server_index, base::TimeDelta rtt);
@@ -75,6 +89,9 @@ class NET_EXPORT_PRIVATE DnsSession
   // Record suspected loss of a packet for a specific server.
   void RecordLostPacket(unsigned server_index, int attempt);
 
+  // Record server stats before it is destroyed.
+  void RecordServerStats();
+
   // Return the timeout for the next query. |attempt| counts from 0 and is used
   // for exponential backoff.
   base::TimeDelta NextTimeout(unsigned server_index, int attempt);
@@ -111,15 +128,16 @@ class NET_EXPORT_PRIVATE DnsSession
   // Current index into |config_.nameservers| to begin resolution with.
   int server_index_;
 
-  // Estimated RTT for each name server using moving average.
-  std::vector<base::TimeDelta> rtt_estimates_;
-  // Estimated error in the above for each name server.
-  std::vector<base::TimeDelta> rtt_deviations_;
+  struct ServerStats;
+
+  // Track runtime statistics of each DNS server.
+  ScopedVector<ServerStats> server_stats_;
 
-  // Buckets shared for all |rtt_histograms_|.
-  scoped_ptr<base::BucketRanges> rtt_buckets_;
-  // A histogram of observed RTT for each name server.
-  ScopedVector<base::SampleVector> rtt_histograms_;
+  // Buckets shared for all |ServerStats::rtt_histogram|.
+  struct RttBuckets : public base::BucketRanges {
+    RttBuckets();
+  };
+  static base::LazyInstance<RttBuckets>::Leaky rtt_buckets_;
 
   DISALLOW_COPY_AND_ASSIGN(DnsSession);
 };
diff --git a/net/dns/dns_transaction.cc b/net/dns/dns_transaction.cc
index 8001b0a..fef5f26 100644
--- a/net/dns/dns_transaction.cc
+++ b/net/dns/dns_transaction.cc
@@ -73,7 +73,8 @@ Value* NetLogStartCallback(const std::string* hostname,
 // matches. Logging is done in the socket and in the outer DnsTransaction.
 class DnsAttempt {
  public:
-  DnsAttempt() : result_(ERR_FAILED) {}
+  explicit DnsAttempt(unsigned server_index)
+      : result_(ERR_FAILED), server_index_(server_index) {}
 
   virtual ~DnsAttempt() {}
   // Starts the attempt. Returns ERR_IO_PENDING if cannot complete synchronously
@@ -91,7 +92,7 @@ class DnsAttempt {
   virtual const BoundNetLog& GetSocketNetLog() const = 0;
 
   // Returns the index of the destination server within DnsConfig::nameservers.
-  virtual unsigned GetServerIndex() const = 0;
+  unsigned server_index() const { return server_index_; }
 
   // Returns a Value representing the received response, along with a reference
   // to the NetLog source source of the UDP socket used.  The request must have
@@ -124,17 +125,20 @@ class DnsAttempt {
  private:
   // Result of last operation.
   int result_;
+
+  const unsigned server_index_;
 };
 
 class DnsUDPAttempt : public DnsAttempt {
  public:
-  DnsUDPAttempt(scoped_ptr<DnsSession::SocketLease> socket_lease,
+  DnsUDPAttempt(unsigned server_index,
+                scoped_ptr<DnsSession::SocketLease> socket_lease,
                 scoped_ptr<DnsQuery> query)
-      : next_state_(STATE_NONE),
+      : DnsAttempt(server_index),
+        next_state_(STATE_NONE),
         received_malformed_response_(false),
         socket_lease_(socket_lease.Pass()),
-        query_(query.Pass()) {
-  }
+        query_(query.Pass()) {}
 
   // DnsAttempt:
   virtual int Start(const CompletionCallback& callback) OVERRIDE {
@@ -158,10 +162,6 @@ class DnsUDPAttempt : public DnsAttempt {
     return socket_lease_->socket()->NetLog();
   }
 
-  virtual unsigned GetServerIndex() const OVERRIDE {
-    return socket_lease_->server_index();
-  }
-
  private:
   enum State {
     STATE_SEND_QUERY,
@@ -298,13 +298,12 @@ class DnsTCPAttempt : public DnsAttempt {
   DnsTCPAttempt(unsigned server_index,
                 scoped_ptr<StreamSocket> socket,
                 scoped_ptr<DnsQuery> query)
-      : next_state_(STATE_NONE),
-        server_index_(server_index),
+      : DnsAttempt(server_index),
+        next_state_(STATE_NONE),
         socket_(socket.Pass()),
         query_(query.Pass()),
         length_buffer_(new IOBufferWithSize(sizeof(uint16))),
-        response_length_(0) {
-  }
+        response_length_(0) {}
 
   // DnsAttempt:
   virtual int Start(const CompletionCallback& callback) OVERRIDE {
@@ -334,10 +333,6 @@ class DnsTCPAttempt : public DnsAttempt {
     return socket_->NetLog();
   }
 
-  virtual unsigned GetServerIndex() const OVERRIDE {
-    return server_index_;
-  }
-
  private:
   enum State {
     STATE_CONNECT_COMPLETE,
@@ -497,7 +492,6 @@ class DnsTCPAttempt : public DnsAttempt {
   State next_state_;
   base::TimeTicks start_time_;
 
-  unsigned server_index_;
   scoped_ptr<StreamSocket> socket_;
   scoped_ptr<DnsQuery> query_;
   scoped_refptr<IOBufferWithSize> length_buffer_;
@@ -685,13 +679,16 @@ class DnsTransactionImpl : public DnsTransaction,
 
     unsigned server_index =
         (first_server_index_ + attempt_number) % config.nameservers.size();
+    // Skip over known failed servers.
+    server_index = session_->NextGoodServerIndex(server_index);
 
     scoped_ptr<DnsSession::SocketLease> lease =
         session_->AllocateSocket(server_index, net_log_.source());
 
     bool got_socket = !!lease.get();
 
-    DnsUDPAttempt* attempt = new DnsUDPAttempt(lease.Pass(), query.Pass());
+    DnsUDPAttempt* attempt =
+        new DnsUDPAttempt(server_index, lease.Pass(), query.Pass());
 
     attempts_.push_back(attempt);
 
@@ -718,7 +715,7 @@ class DnsTransactionImpl : public DnsTransaction,
     DCHECK(previous_attempt);
     DCHECK(!had_tcp_attempt_);
 
-    unsigned server_index = previous_attempt->GetServerIndex();
+    unsigned server_index = previous_attempt->server_index();
 
     scoped_ptr<StreamSocket> socket(
         session_->CreateTCPSocket(server_index, net_log_.source()));
@@ -774,7 +771,7 @@ class DnsTransactionImpl : public DnsTransaction,
     DCHECK_LT(attempt_number, attempts_.size());
     const DnsAttempt* attempt = attempts_[attempt_number];
     if (attempt->GetResponse()) {
-      session_->RecordRTT(attempt->GetServerIndex(),
+      session_->RecordRTT(attempt->server_index(),
                           base::TimeTicks::Now() - start);
     }
     OnAttemptComplete(attempt_number, rv);
@@ -805,15 +802,14 @@ class DnsTransactionImpl : public DnsTransaction,
       return;
 
     size_t num_servers = session_->config().nameservers.size();
+    std::vector<int> server_attempts(num_servers);
     for (size_t i = 0; i < first_completed; ++i) {
+      unsigned server_index = attempts_[i]->server_index();
+      int server_attempt = server_attempts[server_index]++;
       // Don't record lost packet unless attempt is in pending state.
       if (!attempts_[i]->is_pending())
         continue;
-      unsigned server_index = attempts_[i]->GetServerIndex();
-      // Servers are rotated in round robin, so server round can be calculated
-      // from attempt index.
-      size_t server_round = i / num_servers;
-      session_->RecordLostPacket(server_index, static_cast<int>(server_round));
+      session_->RecordLostPacket(server_index, server_attempt);
     }
   }
 
@@ -841,14 +837,16 @@ class DnsTransactionImpl : public DnsTransaction,
 
       switch (result.rv) {
         case OK:
-          net_log_.EndEventWithNetErrorCode(
-              NetLog::TYPE_DNS_TRANSACTION_QUERY, result.rv);
+          session_->RecordServerSuccess(result.attempt->server_index());
+          net_log_.EndEventWithNetErrorCode(NetLog::TYPE_DNS_TRANSACTION_QUERY,
+                                            result.rv);
           DCHECK(result.attempt);
           DCHECK(result.attempt->GetResponse());
           return result;
         case ERR_NAME_NOT_RESOLVED:
-          net_log_.EndEventWithNetErrorCode(
-              NetLog::TYPE_DNS_TRANSACTION_QUERY, result.rv);
+          session_->RecordServerSuccess(result.attempt->server_index());
+          net_log_.EndEventWithNetErrorCode(NetLog::TYPE_DNS_TRANSACTION_QUERY,
+                                            result.rv);
           // Try next suffix.
           qnames_.pop_front();
           if (qnames_.empty()) {
@@ -859,6 +857,8 @@ class DnsTransactionImpl : public DnsTransaction,
           break;
         case ERR_CONNECTION_REFUSED:
         case ERR_DNS_TIMED_OUT:
+          if (result.attempt)
+            session_->RecordServerFailure(result.attempt->server_index());
           if (MoreAttemptsAllowed()) {
             result = MakeAttempt();
           } else {
@@ -873,6 +873,7 @@ class DnsTransactionImpl : public DnsTransaction,
           DCHECK(result.attempt);
           if (result.attempt != attempts_.back()) {
             // This attempt already timed out. Ignore it.
+            session_->RecordServerFailure(result.attempt->server_index());
             return AttemptResult(ERR_IO_PENDING, NULL);
           }
           if (MoreAttemptsAllowed()) {
-- 
cgit v1.1