diff options
Diffstat (limited to 'chrome/browser/net/predictor.cc')
-rw-r--r-- | chrome/browser/net/predictor.cc | 643 |
1 files changed, 643 insertions, 0 deletions
diff --git a/chrome/browser/net/predictor.cc b/chrome/browser/net/predictor.cc new file mode 100644 index 0000000..6bb2330 --- /dev/null +++ b/chrome/browser/net/predictor.cc @@ -0,0 +1,643 @@ +// Copyright (c) 2006-2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/net/predictor.h" + +#include <algorithm> +#include <set> +#include <sstream> + +#include "base/compiler_specific.h" +#include "base/histogram.h" +#include "base/stats_counters.h" +#include "base/string_util.h" +#include "base/time.h" +#include "chrome/browser/chrome_thread.h" +#include "chrome/browser/net/preconnect.h" +#include "net/base/address_list.h" +#include "net/base/completion_callback.h" +#include "net/base/host_port_pair.h" +#include "net/base/host_resolver.h" +#include "net/base/net_errors.h" +#include "net/base/net_log.h" + +using base::TimeDelta; + +namespace chrome_browser_net { + +class Predictor::LookupRequest { + public: + LookupRequest(Predictor* predictor, + net::HostResolver* host_resolver, + const GURL& url) + : ALLOW_THIS_IN_INITIALIZER_LIST( + net_callback_(this, &LookupRequest::OnLookupFinished)), + predictor_(predictor), + url_(url), + resolver_(host_resolver) { + } + + // Return underlying network resolver status. + // net::OK ==> Host was found synchronously. + // net:ERR_IO_PENDING ==> Network will callback later with result. + // anything else ==> Host was not found synchronously. + int Start() { + net::HostResolver::RequestInfo resolve_info(url_.host(), + url_.EffectiveIntPort()); + + // Make a note that this is a speculative resolve request. This allows us + // to separate it from real navigations in the observer's callback, and + // lets the HostResolver know it can de-prioritize it. + resolve_info.set_is_speculative(true); + return resolver_.Resolve( + resolve_info, &addresses_, &net_callback_, net::BoundNetLog()); + } + + private: + void OnLookupFinished(int result) { + predictor_->OnLookupFinished(this, url_, result == net::OK); + } + + // HostResolver will call us using this callback when resolution is complete. + net::CompletionCallbackImpl<LookupRequest> net_callback_; + + Predictor* predictor_; // The predictor which started us. + + const GURL url_; // Hostname to resolve. + net::SingleRequestHostResolver resolver_; + net::AddressList addresses_; + + DISALLOW_COPY_AND_ASSIGN(LookupRequest); +}; + +Predictor::Predictor(net::HostResolver* host_resolver, + base::TimeDelta max_dns_queue_delay, + size_t max_concurrent, + bool preconnect_enabled) + : peak_pending_lookups_(0), + shutdown_(false), + max_concurrent_dns_lookups_(max_concurrent), + max_dns_queue_delay_(max_dns_queue_delay), + host_resolver_(host_resolver), + preconnect_enabled_(preconnect_enabled) { + Referrer::SetUsePreconnectValuations(preconnect_enabled); +} + +Predictor::~Predictor() { + DCHECK(shutdown_); +} + +void Predictor::Shutdown() { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + DCHECK(!shutdown_); + shutdown_ = true; + + std::set<LookupRequest*>::iterator it; + for (it = pending_lookups_.begin(); it != pending_lookups_.end(); ++it) + delete *it; +} + +// Overloaded Resolve() to take a vector of names. +void Predictor::ResolveList(const UrlList& urls, + UrlInfo::ResolutionMotivation motivation) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + + for (UrlList::const_iterator it = urls.begin(); it < urls.end(); ++it) { + AppendToResolutionQueue(*it, motivation); + } +} + +// Basic Resolve() takes an invidual name, and adds it +// to the queue. +void Predictor::Resolve(const GURL& url, + UrlInfo::ResolutionMotivation motivation) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + if (!url.has_host()) + return; + AppendToResolutionQueue(url, motivation); +} + +bool Predictor::AccruePrefetchBenefits(const GURL& referrer, + UrlInfo* navigation_info) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + GURL url = navigation_info->url(); + Results::iterator it = results_.find(url); + if (it == results_.end()) { + // Use UMA histogram to quantify potential future gains here. + UMA_HISTOGRAM_LONG_TIMES("DNS.UnexpectedResolutionL", + navigation_info->resolve_duration()); + navigation_info->DLogResultsStats("DNS UnexpectedResolution"); + + LearnFromNavigation(referrer, navigation_info->url()); + return false; + } + UrlInfo& prefetched_host_info(it->second); + + // Sometimes a host is used as a subresource by several referrers, so it is + // in our list, but was never motivated by a page-link-scan. In that case, it + // really is an "unexpected" navigation, and we should tally it, and augment + // our referrers_. + bool referrer_based_prefetch = !prefetched_host_info.was_linked(); + if (referrer_based_prefetch) { + // This wasn't the first time this host refered to *some* referrer. + LearnFromNavigation(referrer, navigation_info->url()); + } + + DnsBenefit benefit = prefetched_host_info.AccruePrefetchBenefits( + navigation_info); + switch (benefit) { + case PREFETCH_NAME_FOUND: + case PREFETCH_NAME_NONEXISTANT: + dns_cache_hits_.push_back(*navigation_info); + if (referrer_based_prefetch) { + if (referrer.has_host()) { + referrers_[referrer].AccrueValue( + navigation_info->benefits_remaining(), url); + } + } + return true; + + case PREFETCH_CACHE_EVICTION: + cache_eviction_map_[url] = *navigation_info; + return false; + + case PREFETCH_NO_BENEFIT: + // Prefetch never hit the network. Name was pre-cached. + return false; + + default: + NOTREACHED(); + return false; + } +} + +void Predictor::LearnFromNavigation(const GURL& referring_url, + const GURL& target_url) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + if (referring_url.has_host() && + referring_url != target_url) { + DCHECK(referring_url == referring_url.GetWithEmptyPath()); + referrers_[referring_url].SuggestHost(target_url); + } +} + +void Predictor::PredictSubresources(const GURL& url) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + Referrers::iterator it = referrers_.find(url); + if (referrers_.end() == it) + return; + Referrer* referrer = &(it->second); + referrer->IncrementUseCount(); + for (Referrer::iterator future_url = referrer->begin(); + future_url != referrer->end(); ++future_url) { + UrlInfo* queued_info = AppendToResolutionQueue( + future_url->first, + UrlInfo::LEARNED_REFERAL_MOTIVATED); + if (queued_info) + queued_info->SetReferringHostname(url); + } +} + +void Predictor::PredictFrameSubresources(const GURL& url) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + DCHECK(url.GetWithEmptyPath() == url); + Referrers::iterator it = referrers_.find(url); + if (referrers_.end() == it) + return; + Referrer* referrer = &(it->second); + referrer->IncrementUseCount(); + for (Referrer::iterator future_url = referrer->begin(); + future_url != referrer->end(); ++future_url) { + if (future_url->second.IsPreconnectWorthDoing()) + Preconnect::PreconnectOnIOThread(future_url->first); + } +} + +// Provide sort order so all .com's are together, etc. +struct RightToLeftStringSorter { + bool operator()(const net::HostPortPair& left, + const net::HostPortPair& right) const { + return string_compare(left.host, right.host); + } + bool operator()(const GURL& left, + const GURL& right) const { + return string_compare(left.host(), right.host()); + } + + static bool string_compare(const std::string& left_host, + const std::string right_host) { + if (left_host == right_host) return true; + size_t left_already_matched = left_host.size(); + size_t right_already_matched = right_host.size(); + + // Ensure both strings have characters. + if (!left_already_matched) return true; + if (!right_already_matched) return false; + + // Watch for trailing dot, so we'll always be safe to go one beyond dot. + if ('.' == left_host[left_already_matched - 1]) { + if ('.' != right_host[right_already_matched - 1]) + return true; + // Both have dots at end of string. + --left_already_matched; + --right_already_matched; + } else { + if ('.' == right_host[right_already_matched - 1]) + return false; + } + + while (1) { + if (!left_already_matched) return true; + if (!right_already_matched) return false; + + size_t left_length, right_length; + size_t left_start = left_host.find_last_of('.', left_already_matched - 1); + if (std::string::npos == left_start) { + left_length = left_already_matched; + left_already_matched = left_start = 0; + } else { + left_length = left_already_matched - left_start; + left_already_matched = left_start; + ++left_start; // Don't compare the dot. + } + size_t right_start = right_host.find_last_of('.', + right_already_matched - 1); + if (std::string::npos == right_start) { + right_length = right_already_matched; + right_already_matched = right_start = 0; + } else { + right_length = right_already_matched - right_start; + right_already_matched = right_start; + ++right_start; // Don't compare the dot. + } + + int diff = left_host.compare(left_start, left_host.size(), + right_host, right_start, right_host.size()); + if (diff > 0) return false; + if (diff < 0) return true; + } + } +}; + +void Predictor::GetHtmlReferrerLists(std::string* output) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + if (referrers_.empty()) + return; + + // TODO(jar): Remove any plausible JavaScript from names before displaying. + + typedef std::set<GURL, struct RightToLeftStringSorter> + SortedNames; + SortedNames sorted_names; + + for (Referrers::iterator it = referrers_.begin(); + referrers_.end() != it; ++it) + sorted_names.insert(it->first); + + output->append("<br><table border>"); + output->append( + "<tr><th>Host for Page</th>" + "<th>Page Load<br>Count</th>" + "<th>Subresource<br>Navigations</th>" + "<th>Subresource<br>PreConnects</th>" + "<th>Expected<br>Connects</th>" + "<th>DNS<br>Savings</th>" + "<th>Subresource Spec</th></tr>"); + + for (SortedNames::iterator it = sorted_names.begin(); + sorted_names.end() != it; ++it) { + Referrer* referrer = &(referrers_[*it]); + bool first_set_of_futures = true; + for (Referrer::iterator future_url = referrer->begin(); + future_url != referrer->end(); ++future_url) { + output->append("<tr align=right>"); + if (first_set_of_futures) + StringAppendF(output, "<td rowspan=%d>%s</td><td rowspan=%d>%d</td>", + static_cast<int>(referrer->size()), + it->spec().c_str(), + static_cast<int>(referrer->size()), + static_cast<int>(referrer->use_count())); + first_set_of_futures = false; + StringAppendF(output, + "<td>%d</td><td>%d</td><td>%2.3f</td><td>%dms</td><td>%s</td></tr>", + static_cast<int>(future_url->second.navigation_count()), + static_cast<int>(future_url->second.preconnection_count()), + static_cast<double>(future_url->second.subresource_use_rate()), + static_cast<int>(future_url->second.latency().InMilliseconds()), + future_url->first.spec().c_str()); + } + } + output->append("</table>"); +} + +void Predictor::GetHtmlInfo(std::string* output) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + // Local lists for calling UrlInfo + UrlInfo::DnsInfoTable cache_hits; + UrlInfo::DnsInfoTable cache_evictions; + UrlInfo::DnsInfoTable name_not_found; + UrlInfo::DnsInfoTable network_hits; + UrlInfo::DnsInfoTable already_cached; + + // Get copies of all useful data. + typedef std::map<GURL, UrlInfo, RightToLeftStringSorter> + Snapshot; + Snapshot snapshot; + { + // UrlInfo supports value semantics, so we can do a shallow copy. + for (Results::iterator it(results_.begin()); it != results_.end(); it++) { + snapshot[it->first] = it->second; + } + for (Results::iterator it(cache_eviction_map_.begin()); + it != cache_eviction_map_.end(); + it++) { + cache_evictions.push_back(it->second); + } + // Reverse list as we copy cache hits, so that new hits are at the top. + size_t index = dns_cache_hits_.size(); + while (index > 0) { + index--; + cache_hits.push_back(dns_cache_hits_[index]); + } + } + + // Partition the UrlInfo's into categories. + for (Snapshot::iterator it(snapshot.begin()); it != snapshot.end(); it++) { + if (it->second.was_nonexistant()) { + name_not_found.push_back(it->second); + continue; + } + if (!it->second.was_found()) + continue; // Still being processed. + if (TimeDelta() != it->second.benefits_remaining()) { + network_hits.push_back(it->second); // With no benefit yet. + continue; + } + if (UrlInfo::kMaxNonNetworkDnsLookupDuration > + it->second.resolve_duration()) { + already_cached.push_back(it->second); + continue; + } + // Remaining case is where prefetch benefit was significant, and was used. + // Since we shot those cases as historical hits, we won't bother here. + } + + bool brief = false; +#ifdef NDEBUG + brief = true; +#endif // NDEBUG + + // Call for display of each table, along with title. + UrlInfo::GetHtmlTable(cache_hits, + "Prefetching DNS records produced benefits for ", false, output); + UrlInfo::GetHtmlTable(cache_evictions, + "Cache evictions negated DNS prefetching benefits for ", brief, output); + UrlInfo::GetHtmlTable(network_hits, + "Prefetching DNS records was not yet beneficial for ", brief, output); + UrlInfo::GetHtmlTable(already_cached, + "Previously cached resolutions were found for ", brief, output); + UrlInfo::GetHtmlTable(name_not_found, + "Prefetching DNS records revealed non-existance for ", brief, output); +} + +UrlInfo* Predictor::AppendToResolutionQueue( + const GURL& url, + UrlInfo::ResolutionMotivation motivation) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + DCHECK(url.has_host()); + + if (shutdown_) + return NULL; + + UrlInfo* info = &results_[url]; + info->SetUrl(url); // Initialize or DCHECK. + // TODO(jar): I need to discard names that have long since expired. + // Currently we only add to the domain map :-/ + + DCHECK(info->HasUrl(url)); + + if (!info->NeedsDnsUpdate()) { + info->DLogResultsStats("DNS PrefetchNotUpdated"); + return NULL; + } + + info->SetQueuedState(motivation); + work_queue_.Push(url, motivation); + StartSomeQueuedResolutions(); + return info; +} + +void Predictor::StartSomeQueuedResolutions() { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + + while (!work_queue_.IsEmpty() && + pending_lookups_.size() < max_concurrent_dns_lookups_) { + const GURL url(work_queue_.Pop()); + UrlInfo* info = &results_[url]; + DCHECK(info->HasUrl(url)); + info->SetAssignedState(); + + if (CongestionControlPerformed(info)) { + DCHECK(work_queue_.IsEmpty()); + return; + } + + LookupRequest* request = new LookupRequest(this, host_resolver_, url); + int status = request->Start(); + if (status == net::ERR_IO_PENDING) { + // Will complete asynchronously. + pending_lookups_.insert(request); + peak_pending_lookups_ = std::max(peak_pending_lookups_, + pending_lookups_.size()); + } else { + // Completed synchronously (was already cached by HostResolver), or else + // there was (equivalently) some network error that prevents us from + // finding the name. Status net::OK means it was "found." + LookupFinished(request, url, status == net::OK); + delete request; + } + } +} + +bool Predictor::CongestionControlPerformed(UrlInfo* info) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + // Note: queue_duration is ONLY valid after we go to assigned state. + if (info->queue_duration() < max_dns_queue_delay_) + return false; + // We need to discard all entries in our queue, as we're keeping them waiting + // too long. By doing this, we'll have a chance to quickly service urgent + // resolutions, and not have a bogged down system. + while (true) { + info->RemoveFromQueue(); + if (work_queue_.IsEmpty()) + break; + info = &results_[work_queue_.Pop()]; + info->SetAssignedState(); + } + return true; +} + +void Predictor::OnLookupFinished(LookupRequest* request, const GURL& url, + bool found) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + + LookupFinished(request, url, found); + pending_lookups_.erase(request); + delete request; + + StartSomeQueuedResolutions(); +} + +void Predictor::LookupFinished(LookupRequest* request, const GURL& url, + bool found) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + UrlInfo* info = &results_[url]; + DCHECK(info->HasUrl(url)); + if (info->is_marked_to_delete()) { + results_.erase(url); + } else { + if (found) + info->SetFoundState(); + else + info->SetNoSuchNameState(); + } +} + +void Predictor::DiscardAllResults() { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + // Delete anything listed so far in this session that shows in about:dns. + cache_eviction_map_.clear(); + dns_cache_hits_.clear(); + referrers_.clear(); + + + // Try to delete anything in our work queue. + while (!work_queue_.IsEmpty()) { + // Emulate processing cycle as though host was not found. + GURL url = work_queue_.Pop(); + UrlInfo* info = &results_[url]; + DCHECK(info->HasUrl(url)); + info->SetAssignedState(); + info->SetNoSuchNameState(); + } + // Now every result_ is either resolved, or is being resolved + // (see LookupRequest). + + // Step through result_, recording names of all hosts that can't be erased. + // We can't erase anything being worked on. + Results assignees; + for (Results::iterator it = results_.begin(); results_.end() != it; ++it) { + GURL url(it->first); + UrlInfo* info = &it->second; + DCHECK(info->HasUrl(url)); + if (info->is_assigned()) { + info->SetPendingDeleteState(); + assignees[url] = *info; + } + } + DCHECK(assignees.size() <= max_concurrent_dns_lookups_); + results_.clear(); + // Put back in the names being worked on. + for (Results::iterator it = assignees.begin(); assignees.end() != it; ++it) { + DCHECK(it->second.is_marked_to_delete()); + results_[it->first] = it->second; + } +} + +void Predictor::TrimReferrers() { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + std::vector<GURL> urls; + for (Referrers::const_iterator it = referrers_.begin(); + it != referrers_.end(); ++it) + urls.push_back(it->first); + for (size_t i = 0; i < urls.size(); ++i) + if (!referrers_[urls[i]].Trim()) + referrers_.erase(urls[i]); +} + +void Predictor::SerializeReferrers(ListValue* referral_list) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + referral_list->Clear(); + referral_list->Append(new FundamentalValue(DNS_REFERRER_VERSION)); + for (Referrers::const_iterator it = referrers_.begin(); + it != referrers_.end(); ++it) { + // Serialize the list of subresource names. + Value* subresource_list(it->second.Serialize()); + + // Create a list for each referer. + ListValue* motivator(new ListValue); + motivator->Append(new StringValue(it->first.spec())); + motivator->Append(subresource_list); + + referral_list->Append(motivator); + } +} + +void Predictor::DeserializeReferrers(const ListValue& referral_list) { + DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); + int format_version = -1; + if (referral_list.GetSize() > 0 && + referral_list.GetInteger(0, &format_version) && + format_version == DNS_REFERRER_VERSION) { + for (size_t i = 1; i < referral_list.GetSize(); ++i) { + ListValue* motivator; + if (!referral_list.GetList(i, &motivator)) { + NOTREACHED(); + return; + } + std::string motivating_url_spec; + if (!motivator->GetString(0, &motivating_url_spec)) { + NOTREACHED(); + return; + } + + Value* subresource_list; + if (!motivator->Get(1, &subresource_list)) { + NOTREACHED(); + return; + } + + referrers_[GURL(motivating_url_spec)].Deserialize(*subresource_list); + } + } +} + + +//------------------------------------------------------------------------------ + +Predictor::HostNameQueue::HostNameQueue() { +} + +Predictor::HostNameQueue::~HostNameQueue() { +} + +void Predictor::HostNameQueue::Push(const GURL& url, + UrlInfo::ResolutionMotivation motivation) { + switch (motivation) { + case UrlInfo::STATIC_REFERAL_MOTIVATED: + case UrlInfo::LEARNED_REFERAL_MOTIVATED: + case UrlInfo::MOUSE_OVER_MOTIVATED: + rush_queue_.push(url); + break; + + default: + background_queue_.push(url); + break; + } +} + +bool Predictor::HostNameQueue::IsEmpty() const { + return rush_queue_.empty() && background_queue_.empty(); +} + +GURL Predictor::HostNameQueue::Pop() { + DCHECK(!IsEmpty()); + std::queue<GURL> *queue(rush_queue_.empty() ? &background_queue_ + : &rush_queue_); + GURL url(queue->front()); + queue->pop(); + return url; +} + +} // namespace chrome_browser_net |