diff options
author | twifkak <twifkak@chromium.org> | 2015-07-02 00:23:33 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-07-02 07:24:21 +0000 |
commit | cffae42a2d58588ca6cca5f40351b168fcdc10b0 (patch) | |
tree | 3f9bc3f678149a216c7ec96c49ea4474b85c6296 /components/history | |
parent | f50846371674560b369a8ee28d7d49afe1e0c8f1 (diff) | |
download | chromium_src-cffae42a2d58588ca6cca5f40351b168fcdc10b0.zip chromium_src-cffae42a2d58588ca6cca5f40351b168fcdc10b0.tar.gz chromium_src-cffae42a2d58588ca6cca5f40351b168fcdc10b0.tar.bz2 |
Add History.TopHostsVisitsByRank UMA metric.
Created in the style of TopSitesVisitsByRank
(https://codereview.chromium.org/16517002/), but instead of populating
the top hosts map at Init(), where it is potentially user-impacting
(blocking other history-needing features, such as autocomplete, from
running), it is lazily populated by TopHosts(). Callers of that method
should not expect any additional delay due to this addition.
Practically speaking, this metric will only report for members of the
Precache field trial, as that is the only client of TopHosts, currently.
This will only provide information on Android devices, but otherwise is not
expected to bias the metric significantly.
BUG=499532
Review URL: https://codereview.chromium.org/1179953005
Cr-Commit-Position: refs/heads/master@{#337185}
Diffstat (limited to 'components/history')
7 files changed, 106 insertions, 5 deletions
diff --git a/components/history/core/browser/history_backend.cc b/components/history/core/browser/history_backend.cc index fc347ba..0f6df3e 100644 --- a/components/history/core/browser/history_backend.cc +++ b/components/history/core/browser/history_backend.cc @@ -37,6 +37,7 @@ #include "components/history/core/browser/keyword_search_term.h" #include "components/history/core/browser/page_usage_data.h" #include "components/history/core/browser/typed_url_syncable_service.h" +#include "components/history/core/browser/url_utils.h" #include "components/history/core/browser/visit_filter.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "sql/error_delegate_util.h" @@ -89,6 +90,8 @@ const int kMaxRedirectCount = 32; // and is deleted. const int kExpireDaysThreshold = 90; +const int kMaxTopHostsForMetrics = 50; + // Converts from PageUsageData to MostVisitedURL. |redirects| is a // list of redirects for this URL. Empty list means no redirects. MostVisitedURL MakeMostVisitedURL(const PageUsageData& page_data, @@ -418,7 +421,13 @@ TopHostsList HistoryBackend::TopHosts(int num_hosts) const { if (!db_) return TopHostsList(); - return db_->TopHosts(num_hosts); + auto top_hosts = db_->TopHosts(num_hosts); + + host_ranks_.clear(); + int i = 0; + for (const auto& host_count : top_hosts) + host_ranks_[host_count.first] = i++; + return top_hosts; } void HistoryBackend::AddPage(const HistoryAddPageArgs& request) { @@ -733,6 +742,15 @@ void HistoryBackend::CloseAllDatabases() { } } +void HistoryBackend::RecordTopHostsMetrics(const GURL& url) { + auto it = host_ranks_.find(HostForTopHosts(url)); + int host_rank = it != host_ranks_.end() ? it->second : kMaxTopHostsForMetrics; + + // Convert index from 0-based to 1-based. + UMA_HISTOGRAM_ENUMERATION("History.TopHostsVisitsByRank", host_rank + 1, + kMaxTopHostsForMetrics + 2); +} + std::pair<URLID, VisitID> HistoryBackend::AddPageVisit( const GURL& url, Time time, @@ -753,6 +771,11 @@ std::pair<URLID, VisitID> HistoryBackend::AddPageVisit( transition_type == ui::PAGE_TRANSITION_KEYWORD_GENERATED)) typed_increment = 1; + if (!host_ranks_.empty() && visit_source == SOURCE_BROWSED && + (transition & ui::PAGE_TRANSITION_CHAIN_END)) { + RecordTopHostsMetrics(url); + } + // See if this URL is already in the DB. URLRow url_info(url); URLID url_id = db_->GetRowForURL(url, &url_info); diff --git a/components/history/core/browser/history_backend.h b/components/history/core/browser/history_backend.h index 3038475..86173fc 100644 --- a/components/history/core/browser/history_backend.h +++ b/components/history/core/browser/history_backend.h @@ -10,6 +10,7 @@ #include <utility> #include <vector> +#include "base/containers/hash_tables.h" #include "base/containers/mru_cache.h" #include "base/files/file_path.h" #include "base/gtest_prod_util.h" @@ -202,6 +203,9 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // Computes the |num_hosts| most-visited hostnames in the past 30 days. See // history_service.h for details. Returns an empty list if db_ is not // initialized. + // + // As a side effect, caches the list of top hosts for the purposes of + // generating internal metrics. TopHostsList TopHosts(int num_hosts) const; // Navigation ---------------------------------------------------------------- @@ -521,6 +525,7 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, TopHosts_OnlyLast30Days); FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, TopHosts_MaxNumHosts); FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, TopHosts_IgnoreUnusualURLs); + FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, RecordTopHostsMetrics); FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, UpdateVisitDuration); FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, ExpireHistoryForTimes); FRIEND_TEST_ALL_PREFIXES(HistoryBackendTest, DeleteFTSIndexDatabases); @@ -738,6 +743,8 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, const URLRows& rows, const std::set<GURL>& favicon_urls) override; + void RecordTopHostsMetrics(const GURL& url); + // Deleting all history ------------------------------------------------------ // Deletes all history. This is a special case of deleting that is separated @@ -836,6 +843,10 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>, // Listens for the system being under memory pressure. scoped_ptr<base::MemoryPressureListener> memory_pressure_listener_; + // Map from host to index in the TopHosts list. It is updated only by + // TopHosts(), so it's usually stale. + mutable base::hash_map<std::string, int> host_ranks_; + // List of observers base::ObserverList<HistoryBackendObserver> observers_; diff --git a/components/history/core/browser/history_backend_unittest.cc b/components/history/core/browser/history_backend_unittest.cc index c9aad33..ba8782c 100644 --- a/components/history/core/browser/history_backend_unittest.cc +++ b/components/history/core/browser/history_backend_unittest.cc @@ -19,11 +19,15 @@ #include "base/files/scoped_temp_dir.h" #include "base/memory/ref_counted.h" #include "base/memory/scoped_ptr.h" +#include "base/metrics/histogram_base.h" +#include "base/metrics/histogram_samples.h" +#include "base/metrics/statistics_recorder.h" #include "base/prefs/pref_service.h" #include "base/run_loop.h" #include "base/strings/string16.h" #include "base/strings/string_number_conversions.h" #include "base/strings/utf_string_conversions.h" +#include "base/test/histogram_tester.h" #include "base/thread_task_runner_handle.h" #include "components/favicon_base/favicon_usage_data.h" #include "components/history/core/browser/history_backend_client.h" @@ -53,6 +57,7 @@ namespace { using ::testing::ElementsAre; +using base::HistogramBase; const int kTinyEdgeSize = 10; const int kSmallEdgeSize = 16; @@ -3040,6 +3045,50 @@ TEST_F(HistoryBackendTest, TopHosts_IgnoreUnusualURLs) { EXPECT_THAT(backend_->TopHosts(5), ElementsAre(std::make_pair("cnn.com", 3))); } +TEST_F(HistoryBackendTest, RecordTopHostsMetrics) { + base::HistogramTester histogram; + + // Load initial URLs for the purpose of populating host_ranks_. + std::vector<GURL> urls; + urls.push_back(GURL("http://cnn.com/us")); + urls.push_back(GURL("http://cnn.com/intl")); + urls.push_back(GURL("http://dogtopia.com/")); + for (const auto& url : urls) { + backend_->AddPageVisit(url, base::Time::Now(), 0, ui::PAGE_TRANSITION_LINK, + history::SOURCE_BROWSED); + } + + // Compute host_ranks_ for RecordTopHostsMetrics. + EXPECT_THAT(backend_->TopHosts(3), + ElementsAre(std::make_pair("cnn.com", 2), + std::make_pair("dogtopia.com", 1))); + + // Load URLs to record top-hosts metrics for. + urls.clear(); + urls.push_back(GURL("http://cnn.com/us")); + urls.push_back(GURL("http://www.unipresse.com/")); + for (const auto& url : urls) { + backend_->AddPageVisit(url, base::Time::Now(), 0, + ui::PAGE_TRANSITION_CHAIN_END, + history::SOURCE_BROWSED); + } + + // Extract list of histogram samples. + std::vector<std::pair<HistogramBase::Sample, HistogramBase::Count>> samples; + scoped_ptr<base::HistogramSamples> snapshot = + histogram.GetHistogramSamplesSinceCreation( + "History.TopHostsVisitsByRank"); + for (auto it = snapshot->Iterator(); !it->Done(); it->Next()) { + HistogramBase::Sample sample; + HistogramBase::Count count; + it->Get(&sample, nullptr, &count); + samples.push_back(std::make_pair(sample, count)); + } + + EXPECT_THAT(samples, + ElementsAre(std::make_pair(1, 1), std::make_pair(51, 1))); +} + TEST_F(HistoryBackendTest, UpdateVisitDuration) { // This unit test will test adding and deleting visit details information. ASSERT_TRUE(backend_.get()); diff --git a/components/history/core/browser/history_database.cc b/components/history/core/browser/history_database.cc index 888a30a..0966d93 100644 --- a/components/history/core/browser/history_database.cc +++ b/components/history/core/browser/history_database.cc @@ -18,6 +18,7 @@ #include "base/rand_util.h" #include "base/strings/string_util.h" #include "base/time/time.h" +#include "components/history/core/browser/url_utils.h" #include "sql/statement.h" #include "sql/transaction.h" @@ -208,10 +209,7 @@ TopHostsList HistoryDatabase::TopHosts(int num_hosts) { continue; int64 visit_count = url_sql.ColumnInt64(1); - std::string host = url.host(); - if (base::StartsWithASCII(host, "www.", true)) - host.assign(host, 4, std::string::npos); - host_count[host] += visit_count; + host_count[HostForTopHosts(url)] += visit_count; // kMaxHostsInMemory is well above typical values for // History.MonthlyHostCount, but here to guard against unbounded memory diff --git a/components/history/core/browser/url_utils.cc b/components/history/core/browser/url_utils.cc index a993619..54dc5a9 100644 --- a/components/history/core/browser/url_utils.cc +++ b/components/history/core/browser/url_utils.cc @@ -6,6 +6,7 @@ #include <algorithm> +#include "base/strings/string_util.h" #include "url/gurl.h" namespace history { @@ -85,4 +86,12 @@ GURL ToggleHTTPAndHTTPS(const GURL& url) { return url.ReplaceComponents(replacement); } +std::string HostForTopHosts(const GURL& url) { + std::string host = url.host(); + base::StringToLowerASCII(&host); + if (base::StartsWith(host, "www.", base::CompareCase::SENSITIVE)) + host.assign(host, 4, std::string::npos); + return host; +} + } // namespace history diff --git a/components/history/core/browser/url_utils.h b/components/history/core/browser/url_utils.h index e60eb4f..ba4f4aa 100644 --- a/components/history/core/browser/url_utils.h +++ b/components/history/core/browser/url_utils.h @@ -41,6 +41,10 @@ bool IsPathPrefix(const std::string& p1, const std::string& p2); // If |url| is neither HTTP nor HTTPS, returns an empty URL. GURL ToggleHTTPAndHTTPS(const GURL& url); +// Returns the host of the given URL, canonicalized as it would be for +// HistoryService::TopHosts(). +std::string HostForTopHosts(const GURL& url); + } // namespace history #endif // COMPONENTS_HISTORY_CORE_BROWSER_URL_UTILS_H_ diff --git a/components/history/core/browser/url_utils_unittest.cc b/components/history/core/browser/url_utils_unittest.cc index 184bd66..77f8f1c 100644 --- a/components/history/core/browser/url_utils_unittest.cc +++ b/components/history/core/browser/url_utils_unittest.cc @@ -126,6 +126,13 @@ TEST(HistoryUrlUtilsTest, ToggleHTTPAndHTTPS) { ToggleHTTPAndHTTPS(GURL("ftp://www.google.com/"))); } +TEST(HistoryUrlUtilsTest, HostForTopHosts) { + EXPECT_EQ("foo.com", HostForTopHosts(GURL("https://foo.com/bar"))); + EXPECT_EQ("foo.com", HostForTopHosts(GURL("http://foo.com:999/bar"))); + EXPECT_EQ("foo.com", HostForTopHosts(GURL("http://www.foo.com/bar"))); + EXPECT_EQ("foo.com", HostForTopHosts(GURL("HtTP://WWw.FoO.cOM/BAR"))); +} + } // namespace } // namespace history |