summaryrefslogtreecommitdiffstats
path: root/net/base/host_resolver_impl.cc
diff options
context:
space:
mode:
authorjar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-05-02 07:34:23 +0000
committerjar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-05-02 07:34:23 +0000
commite57ec4c1653c7f08cf6d92f3341a04f242885814 (patch)
treeaa13df6c8f9ce15e19193d09fa2128e367b3cfeb /net/base/host_resolver_impl.cc
parente1a2ffff2b61ff308ffa77e32c71e45be74ee8f3 (diff)
downloadchromium_src-e57ec4c1653c7f08cf6d92f3341a04f242885814.zip
chromium_src-e57ec4c1653c7f08cf6d92f3341a04f242885814.tar.gz
chromium_src-e57ec4c1653c7f08cf6d92f3341a04f242885814.tar.bz2
Revert 83641 - DNS Host resolver changes with retry logic. Fix for
bug Chromium cannot recover from a state when its DNS requests have been dropped. Whenever we try to resolve the host, we post a delayed task to check if host resolution (OnLookupComplete) is completed or not. If the original ateempt hasn't completed, then we start another attempt to resolve for the same request. We take the results from the attempt that finishes first and leave all other attempts as orphaned. BUG=73327 TEST=dns host resolver tests R=eroman,jar Review URL: http://codereview.chromium.org/6782001 TBR=rtenneti@chromium.org Review URL: http://codereview.chromium.org/6902198 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@83710 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base/host_resolver_impl.cc')
-rw-r--r--net/base/host_resolver_impl.cc219
1 files changed, 44 insertions, 175 deletions
diff --git a/net/base/host_resolver_impl.cc b/net/base/host_resolver_impl.cc
index e73073f..b512561 100644
--- a/net/base/host_resolver_impl.cc
+++ b/net/base/host_resolver_impl.cc
@@ -24,7 +24,6 @@
#include "base/stl_util-inl.h"
#include "base/string_util.h"
#include "base/synchronization/lock.h"
-#include "base/task.h"
#include "base/threading/worker_pool.h"
#include "base/time.h"
#include "base/utf_string_conversions.h"
@@ -72,7 +71,6 @@ HostCache* CreateDefaultCache() {
} // anonymous namespace
-// static
HostResolver* CreateSystemHostResolver(size_t max_concurrent_resolves,
NetLog* net_log) {
// Maximum of 8 concurrent resolver threads.
@@ -354,10 +352,8 @@ class HostResolverImpl::Job
resolver_(resolver),
origin_loop_(MessageLoop::current()),
resolver_proc_(resolver->effective_resolver_proc()),
- unresponsive_delay_(resolver->unresponsive_delay()),
- attempt_number_(0),
- completed_attempt_number_(0),
- completed_attempt_error_(ERR_UNEXPECTED),
+ error_(OK),
+ os_error_(0),
had_non_speculative_request_(false),
net_log_(BoundNetLog::Make(net_log,
NetLog::SOURCE_HOST_RESOLVER_IMPL_JOB)) {
@@ -384,44 +380,23 @@ class HostResolverImpl::Job
// Called from origin loop.
void Start() {
- StartLookupAttempt();
- }
+ start_time_ = base::TimeTicks::Now();
- // Called from origin loop.
- void StartLookupAttempt() {
- base::TimeTicks start_time = base::TimeTicks::Now();
- ++attempt_number_;
- // Dispatch the lookup attempt to a worker thread.
- if (!base::WorkerPool::PostTask(
- FROM_HERE,
- NewRunnableMethod(this, &Job::DoLookup, start_time,
- attempt_number_),
- true)) {
+ // Dispatch the job to a worker thread.
+ if (!base::WorkerPool::PostTask(FROM_HERE,
+ NewRunnableMethod(this, &Job::DoLookup), true)) {
NOTREACHED();
// Since we could be running within Resolve() right now, we can't just
// call OnLookupComplete(). Instead we must wait until Resolve() has
// returned (IO_PENDING).
+ error_ = ERR_UNEXPECTED;
MessageLoop::current()->PostTask(
- FROM_HERE,
- NewRunnableMethod(this, &Job::OnLookupComplete, AddressList(),
- start_time, attempt_number_, ERR_UNEXPECTED, 0));
- return;
+ FROM_HERE, NewRunnableMethod(this, &Job::OnLookupComplete));
}
- // Post a task to check if we get the results within a given time.
- // OnCheckForComplete has the potential for starting a new attempt on a
- // different worker thread if none of our outstanding attempts have
- // completed yet.
- MessageLoop::current()->PostDelayedTask(
- FROM_HERE,
- NewRunnableMethod(this, &Job::OnCheckForComplete),
- unresponsive_delay_.InMilliseconds());
- }
-
- // Cancels the current job. The Job will be orphaned. Any outstanding resolve
- // attempts running on worker threads will continue running. Only once all the
- // attempts complete will the final reference to this Job be released.
- // Callable from origin thread.
+ }
+
+ // Cancels the current job. Callable from origin thread.
void Cancel() {
net_log_.AddEvent(NetLog::TYPE_CANCELLED, NULL);
@@ -455,11 +430,6 @@ class HostResolverImpl::Job
}
// Called from origin thread.
- bool was_completed() const {
- return completed_attempt_number_ > 0;
- }
-
- // Called from origin thread.
const Key& key() const {
return key_;
}
@@ -468,6 +438,10 @@ class HostResolverImpl::Job
return id_;
}
+ base::TimeTicks start_time() const {
+ return start_time_;
+ }
+
// Called from origin thread.
const RequestsList& requests() const {
return requests_;
@@ -496,100 +470,49 @@ class HostResolverImpl::Job
// WARNING: This code runs inside a worker pool. The shutdown code cannot
// wait for it to finish, so we must be very careful here about using other
// objects (like MessageLoops, Singletons, etc). During shutdown these objects
- // may no longer exist. Multiple DoLookups() could be running in parallel, so
- // any state inside of |this| must not mutate .
- void DoLookup(const base::TimeTicks& start_time,
- const uint32 attempt_number) {
- AddressList results;
- int os_error = 0;
+ // may no longer exist.
+ void DoLookup() {
// Running on the worker thread
- int error = ResolveAddrInfo(resolver_proc_,
- key_.hostname,
- key_.address_family,
- key_.host_resolver_flags,
- &results,
- &os_error);
+ error_ = ResolveAddrInfo(resolver_proc_,
+ key_.hostname,
+ key_.address_family,
+ key_.host_resolver_flags,
+ &results_,
+ &os_error_);
// The origin loop could go away while we are trying to post to it, so we
// need to call its PostTask method inside a lock. See ~HostResolver.
{
base::AutoLock locked(origin_loop_lock_);
if (origin_loop_) {
- origin_loop_->PostTask(
- FROM_HERE,
- NewRunnableMethod(this, &Job::OnLookupComplete, results, start_time,
- attempt_number, error, os_error));
+ origin_loop_->PostTask(FROM_HERE,
+ NewRunnableMethod(this, &Job::OnLookupComplete));
}
}
}
- // Callback to see if DoLookup has finished or not (runs on origin thread).
- void OnCheckForComplete() {
- if (was_cancelled() || was_completed())
- return;
-
- DCHECK(resolver_);
- base::TimeDelta unresponsive_delay =
- unresponsive_delay_ * resolver_->retry_factor();
- if (unresponsive_delay >= resolver_->maximum_unresponsive_delay())
- return;
-
- unresponsive_delay_ = unresponsive_delay;
- StartLookupAttempt();
- }
-
// Callback for when DoLookup() completes (runs on origin thread).
- void OnLookupComplete(const AddressList& results,
- const base::TimeTicks& start_time,
- const uint32 attempt_number,
- int error,
- const int os_error) {
+ void OnLookupComplete() {
// Should be running on origin loop.
// TODO(eroman): this is being hit by URLRequestTest.CancelTest*,
// because MessageLoop::current() == NULL.
//DCHECK_EQ(origin_loop_, MessageLoop::current());
- DCHECK(error || results.head());
-
- bool was_retry_attempt = attempt_number > 1;
-
- if (!was_cancelled()) {
- // If host is already resolved, then record data and return.
- if (was_completed()) {
- // If this is the first attempt that is finishing later, then record
- // data for the first attempt. Won't contaminate with retry attempt's
- // data.
- if (!was_retry_attempt)
- RecordPerformanceHistograms(start_time, error, os_error);
-
- RecordAttemptHistograms(start_time, attempt_number, error, os_error);
- return;
- }
-
- // Copy the results from the first worker thread that resolves the host.
- results_ = results;
- completed_attempt_number_ = attempt_number;
- completed_attempt_error_ = error;
- }
+ DCHECK(error_ || results_.head());
// Ideally the following code would be part of host_resolver_proc.cc,
// however it isn't safe to call NetworkChangeNotifier from worker
// threads. So we do it here on the IO thread instead.
- if (error != OK && NetworkChangeNotifier::IsOffline())
- error = ERR_INTERNET_DISCONNECTED;
-
- // We will record data for the first attempt. Don't contaminate with retry
- // attempt's data.
- if (!was_retry_attempt)
- RecordPerformanceHistograms(start_time, error, os_error);
+ if (error_ != OK && NetworkChangeNotifier::IsOffline())
+ error_ = ERR_INTERNET_DISCONNECTED;
- RecordAttemptHistograms(start_time, attempt_number, error, os_error);
+ RecordPerformanceHistograms();
if (was_cancelled())
return;
scoped_refptr<NetLog::EventParameters> params;
- if (error != OK) {
- params = new HostResolveFailedParams(error, os_error);
+ if (error_ != OK) {
+ params = new HostResolveFailedParams(error_, os_error_);
} else {
params = new AddressListNetLogParam(results_);
}
@@ -601,15 +524,13 @@ class HostResolverImpl::Job
DCHECK(!requests_.empty());
// Use the port number of the first request.
- if (error == OK)
+ if (error_ == OK)
results_.SetPort(requests_[0]->port());
- resolver_->OnJobComplete(this, error, os_error, results_);
+ resolver_->OnJobComplete(this, error_, os_error_, results_);
}
- void RecordPerformanceHistograms(const base::TimeTicks& start_time,
- const int error,
- const int os_error) const {
+ void RecordPerformanceHistograms() const {
enum Category { // Used in HISTOGRAM_ENUMERATION.
RESOLVE_SUCCESS,
RESOLVE_FAIL,
@@ -619,8 +540,8 @@ class HostResolverImpl::Job
};
int category = RESOLVE_MAX; // Illegal value for later DCHECK only.
- base::TimeDelta duration = base::TimeTicks::Now() - start_time;
- if (error == OK) {
+ base::TimeDelta duration = base::TimeTicks::Now() - start_time_;
+ if (error_ == OK) {
if (had_non_speculative_request_) {
category = RESOLVE_SUCCESS;
DNS_HISTOGRAM("DNS.ResolveSuccess", duration);
@@ -637,7 +558,7 @@ class HostResolverImpl::Job
DNS_HISTOGRAM("DNS.ResolveSpeculativeFail", duration);
}
UMA_HISTOGRAM_CUSTOM_ENUMERATION(kOSErrorsForGetAddrinfoHistogramName,
- std::abs(os_error),
+ std::abs(os_error_),
GetAllGetAddrinfoOSErrors());
}
DCHECK_LT(category, static_cast<int>(RESOLVE_MAX)); // Be sure it was set.
@@ -670,47 +591,7 @@ class HostResolverImpl::Job
}
}
- void RecordAttemptHistograms(const base::TimeTicks& start_time,
- const uint32 attempt_number,
- const int error,
- const int os_error) const {
- bool first_attempt_to_complete =
- completed_attempt_number_ == attempt_number;
-
- if (first_attempt_to_complete) {
- // If this was first attempt to complete, then record the resolution
- // status of the attempt.
- if (completed_attempt_error_ == OK) {
- UMA_HISTOGRAM_ENUMERATION(
- "DNS.AttemptFirstSuccess", attempt_number, 100);
- } else {
- UMA_HISTOGRAM_ENUMERATION(
- "DNS.AttemptFirstFailure", attempt_number, 100);
- }
- }
-
- if (error == OK)
- UMA_HISTOGRAM_ENUMERATION("DNS.AttemptSuccess", attempt_number, 100);
- else
- UMA_HISTOGRAM_ENUMERATION("DNS.AttemptFailure", attempt_number, 100);
-
- if (was_cancelled() || !first_attempt_to_complete) {
- // Count those attempts which completed after the job was already canceled
- // OR after the job was already completed by an earlier attempt (so in
- // effect).
- UMA_HISTOGRAM_ENUMERATION("DNS.AttemptDiscarded", attempt_number, 100);
-
- // Record if job is cancelled.
- if (was_cancelled())
- UMA_HISTOGRAM_ENUMERATION("DNS.AttemptCancelled", attempt_number, 100);
- }
- base::TimeDelta duration = base::TimeTicks::Now() - start_time;
- if (error == OK)
- DNS_HISTOGRAM("DNS.AttemptSuccessDuration", duration);
- else
- DNS_HISTOGRAM("DNS.AttemptFailDuration", duration);
- }
// Immutable. Can be read from either thread,
const int id_;
@@ -732,21 +613,9 @@ class HostResolverImpl::Job
// reference ensures that it remains valid until we are done.
scoped_refptr<HostResolverProc> resolver_proc_;
- // The amount of time after starting a resolution attempt until deciding to
- // retry.
- base::TimeDelta unresponsive_delay_;
-
- // Keeps track of the number of attempts we have made so far to resolve the
- // host. Whenever we start an attempt to resolve the host, we increase this
- // number.
- uint32 attempt_number_;
-
- // The index of the attempt which finished first (or 0 if the job is still in
- // progress).
- uint32 completed_attempt_number_;
-
- // The result (a net error code) from the first attempt to complete.
- int completed_attempt_error_;
+ // Assigned on the worker thread, read on the origin thread.
+ int error_;
+ int os_error_;
// True if a non-speculative request was ever attached to this job
// (regardless of whether or not it was later cancelled.
@@ -756,6 +625,9 @@ class HostResolverImpl::Job
AddressList results_;
+ // The time when the job was started.
+ base::TimeTicks start_time_;
+
BoundNetLog net_log_;
DISALLOW_COPY_AND_ASSIGN(Job);
@@ -1031,9 +903,6 @@ HostResolverImpl::HostResolverImpl(
NetLog* net_log)
: cache_(cache),
max_jobs_(max_jobs),
- unresponsive_delay_(base::TimeDelta::FromMilliseconds(6000)),
- retry_factor_(2),
- maximum_unresponsive_delay_(base::TimeDelta::FromMilliseconds(60000)),
next_request_id_(0),
next_job_id_(0),
resolver_proc_(resolver_proc),