diff options
author | sdefresne <sdefresne@chromium.org> | 2015-04-27 04:08:11 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-04-27 11:08:18 +0000 |
commit | 236ea69d09d08d1f1156b192e314831d4d9b3785 (patch) | |
tree | 4465025674a3eb66f50413c5f206a20f3d286130 /components | |
parent | 42bb56a59a89c56263310be7251aca3b6352d8ac (diff) | |
download | chromium_src-236ea69d09d08d1f1156b192e314831d4d9b3785.zip chromium_src-236ea69d09d08d1f1156b192e314831d4d9b3785.tar.gz chromium_src-236ea69d09d08d1f1156b192e314831d4d9b3785.tar.bz2 |
Move top_sites_impl.{cc,h} into history component
All problematic dependencies of TopSitesImpl have been removed, thus
move the file into //components/history/core/browser.
BUG=479174
Review URL: https://codereview.chromium.org/1099303003
Cr-Commit-Position: refs/heads/master@{#327026}
Diffstat (limited to 'components')
-rw-r--r-- | components/history.gypi | 5 | ||||
-rw-r--r-- | components/history/core/browser/BUILD.gn | 5 | ||||
-rw-r--r-- | components/history/core/browser/top_sites_impl.cc | 914 | ||||
-rw-r--r-- | components/history/core/browser/top_sites_impl.h | 320 |
4 files changed, 1244 insertions, 0 deletions
diff --git a/components/history.gypi b/components/history.gypi index e3d0605..6b21304 100644 --- a/components/history.gypi +++ b/components/history.gypi @@ -13,6 +13,8 @@ ], 'dependencies': [ '../base/base.gyp:base', + '../base/base.gyp:base_i18n', + '../base/base.gyp:base_prefs', '../google_apis/google_apis.gyp:google_apis', '../net/net.gyp:net', '../skia/skia.gyp:skia', @@ -23,6 +25,7 @@ '../ui/gfx/gfx.gyp:gfx', '../url/url.gyp:url_lib', 'favicon_base', + 'history_core_common', 'keyed_service_core', 'query_parser', 'signin_core_browser', @@ -80,6 +83,8 @@ 'history/core/browser/top_sites_cache.h', 'history/core/browser/top_sites_database.cc', 'history/core/browser/top_sites_database.h', + 'history/core/browser/top_sites_impl.cc', + 'history/core/browser/top_sites_impl.h', 'history/core/browser/top_sites_observer.h', 'history/core/browser/typed_url_syncable_service.cc', 'history/core/browser/typed_url_syncable_service.h', diff --git a/components/history/core/browser/BUILD.gn b/components/history/core/browser/BUILD.gn index b2ed448..d0f5008 100644 --- a/components/history/core/browser/BUILD.gn +++ b/components/history/core/browser/BUILD.gn @@ -55,6 +55,8 @@ static_library("browser") { "top_sites_cache.h", "top_sites_database.cc", "top_sites_database.h", + "top_sites_impl.cc", + "top_sites_impl.h", "top_sites_observer.h", "typed_url_syncable_service.cc", "typed_url_syncable_service.h", @@ -80,7 +82,10 @@ static_library("browser") { deps = [ "//base", + "//base:i18n", + "//base:prefs", "//components/favicon_base", + "//components/history/core/common", "//components/keyed_service/core", "//components/query_parser", "//components/signin/core/browser", diff --git a/components/history/core/browser/top_sites_impl.cc b/components/history/core/browser/top_sites_impl.cc new file mode 100644 index 0000000..c5b1873 --- /dev/null +++ b/components/history/core/browser/top_sites_impl.cc @@ -0,0 +1,914 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/history/core/browser/top_sites_impl.h" + +#include <algorithm> +#include <set> + +#include "base/bind.h" +#include "base/bind_helpers.h" +#include "base/logging.h" +#include "base/md5.h" +#include "base/memory/ref_counted_memory.h" +#include "base/message_loop/message_loop_proxy.h" +#include "base/metrics/histogram.h" +#include "base/prefs/pref_service.h" +#include "base/prefs/scoped_user_pref_update.h" +#include "base/single_thread_task_runner.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "base/task_runner.h" +#include "base/values.h" +#include "components/history/core/browser/history_backend.h" +#include "components/history/core/browser/history_db_task.h" +#include "components/history/core/browser/page_usage_data.h" +#include "components/history/core/browser/top_sites_cache.h" +#include "components/history/core/browser/url_utils.h" +#include "components/history/core/common/thumbnail_score.h" +#include "ui/base/l10n/l10n_util.h" +#include "ui/base/layout.h" +#include "ui/base/resource/resource_bundle.h" +#include "ui/gfx/image/image_util.h" + +using base::DictionaryValue; + +namespace history { +namespace { + +void RunOrPostGetMostVisitedURLsCallback( + base::TaskRunner* task_runner, + bool include_forced_urls, + const TopSitesImpl::GetMostVisitedURLsCallback& callback, + const MostVisitedURLList& all_urls, + const MostVisitedURLList& nonforced_urls) { + const MostVisitedURLList* urls = + include_forced_urls ? &all_urls : &nonforced_urls; + if (task_runner->RunsTasksOnCurrentThread()) + callback.Run(*urls); + else + task_runner->PostTask(FROM_HERE, base::Bind(callback, *urls)); +} + +// Compares two MostVisitedURL having a non-null |last_forced_time|. +bool ForcedURLComparator(const MostVisitedURL& first, + const MostVisitedURL& second) { + DCHECK(!first.last_forced_time.is_null() && + !second.last_forced_time.is_null()); + return first.last_forced_time < second.last_forced_time; +} + +// How many non-forced top sites to store in the cache. +const size_t kNonForcedTopSitesNumber = 20; + +// How many forced top sites to store in the cache. +const size_t kForcedTopSitesNumber = 20; + +// Max number of temporary images we'll cache. See comment above +// temp_images_ for details. +const size_t kMaxTempTopImages = 8; + +const int kDaysOfHistory = 90; +// Time from startup to first HistoryService query. +const int64 kUpdateIntervalSecs = 15; +// Intervals between requests to HistoryService. +const int64 kMinUpdateIntervalMinutes = 1; +const int64 kMaxUpdateIntervalMinutes = 60; + +// Use 100 quality (highest quality) because we're very sensitive to +// artifacts for these small sized, highly detailed images. +const int kTopSitesImageQuality = 100; + +} // namespace + +// Initially, histogram is not recorded. +bool TopSitesImpl::histogram_recorded_ = false; + +TopSitesImpl::TopSitesImpl(PrefService* pref_service, + HistoryService* history_service, + const char* blacklist_pref_name, + const PrepopulatedPageList& prepopulated_pages, + const CanAddURLToHistoryFn& can_add_url_to_history) + : backend_(nullptr), + cache_(new TopSitesCache()), + thread_safe_cache_(new TopSitesCache()), + last_num_urls_changed_(0), + prepopulated_pages_(prepopulated_pages), + pref_service_(pref_service), + blacklist_pref_name_(blacklist_pref_name), + history_service_(history_service), + can_add_url_to_history_(can_add_url_to_history), + loaded_(false), + history_service_observer_(this) { + DCHECK(pref_service_); + DCHECK(blacklist_pref_name_); + DCHECK(!can_add_url_to_history_.is_null()); +} + +void TopSitesImpl::Init( + const base::FilePath& db_name, + const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner) { + // Create the backend here, rather than in the constructor, so that + // unit tests that do not need the backend can run without a problem. + backend_ = new TopSitesBackend(db_task_runner); + backend_->Init(db_name); + backend_->GetMostVisitedThumbnails( + base::Bind(&TopSitesImpl::OnGotMostVisitedThumbnails, + base::Unretained(this)), + &cancelable_task_tracker_); +} + +bool TopSitesImpl::SetPageThumbnail(const GURL& url, + const gfx::Image& thumbnail, + const ThumbnailScore& score) { + DCHECK(thread_checker_.CalledOnValidThread()); + + if (!loaded_) { + // TODO(sky): I need to cache these and apply them after the load + // completes. + return false; + } + + bool add_temp_thumbnail = false; + if (!IsKnownURL(url)) { + if (!IsNonForcedFull()) { + add_temp_thumbnail = true; + } else { + return false; // This URL is not known to us. + } + } + + if (!can_add_url_to_history_.Run(url)) + return false; // It's not a real webpage. + + scoped_refptr<base::RefCountedBytes> thumbnail_data; + if (!EncodeBitmap(thumbnail, &thumbnail_data)) + return false; + + if (add_temp_thumbnail) { + // Always remove the existing entry and then add it back. That way if we end + // up with too many temp thumbnails we'll prune the oldest first. + RemoveTemporaryThumbnailByURL(url); + AddTemporaryThumbnail(url, thumbnail_data.get(), score); + return true; + } + + return SetPageThumbnailEncoded(url, thumbnail_data.get(), score); +} + +bool TopSitesImpl::SetPageThumbnailToJPEGBytes( + const GURL& url, + const base::RefCountedMemory* memory, + const ThumbnailScore& score) { + DCHECK(thread_checker_.CalledOnValidThread()); + + if (!loaded_) { + // TODO(sky): I need to cache these and apply them after the load + // completes. + return false; + } + + bool add_temp_thumbnail = false; + if (!IsKnownURL(url)) { + if (!IsNonForcedFull()) { + add_temp_thumbnail = true; + } else { + return false; // This URL is not known to us. + } + } + + if (!can_add_url_to_history_.Run(url)) + return false; // It's not a real webpage. + + if (add_temp_thumbnail) { + // Always remove the existing entry and then add it back. That way if we end + // up with too many temp thumbnails we'll prune the oldest first. + RemoveTemporaryThumbnailByURL(url); + AddTemporaryThumbnail(url, memory, score); + return true; + } + + return SetPageThumbnailEncoded(url, memory, score); +} + +// WARNING: this function may be invoked on any thread. +void TopSitesImpl::GetMostVisitedURLs( + const GetMostVisitedURLsCallback& callback, + bool include_forced_urls) { + MostVisitedURLList filtered_urls; + { + base::AutoLock lock(lock_); + if (!loaded_) { + // A request came in before we finished loading. Store the callback and + // we'll run it on current thread when we finish loading. + pending_callbacks_.push_back( + base::Bind(&RunOrPostGetMostVisitedURLsCallback, + base::MessageLoopProxy::current(), + include_forced_urls, + callback)); + return; + } + if (include_forced_urls) { + filtered_urls = thread_safe_cache_->top_sites(); + } else { + filtered_urls.assign(thread_safe_cache_->top_sites().begin() + + thread_safe_cache_->GetNumForcedURLs(), + thread_safe_cache_->top_sites().end()); + } + } + callback.Run(filtered_urls); +} + +bool TopSitesImpl::GetPageThumbnail( + const GURL& url, + bool prefix_match, + scoped_refptr<base::RefCountedMemory>* bytes) { + // WARNING: this may be invoked on any thread. + // Perform exact match. + { + base::AutoLock lock(lock_); + if (thread_safe_cache_->GetPageThumbnail(url, bytes)) + return true; + } + + // Resource bundle is thread safe. + for (const auto& prepopulated_page : prepopulated_pages_) { + if (url == prepopulated_page.most_visited.url) { + *bytes = + ResourceBundle::GetSharedInstance().LoadDataResourceBytesForScale( + prepopulated_page.thumbnail_id, ui::SCALE_FACTOR_100P); + return true; + } + } + + if (prefix_match) { + // If http or https, search with |url| first, then try the other one. + std::vector<GURL> url_list; + url_list.push_back(url); + if (url.SchemeIsHTTPOrHTTPS()) + url_list.push_back(ToggleHTTPAndHTTPS(url)); + + for (std::vector<GURL>::iterator it = url_list.begin(); + it != url_list.end(); ++it) { + base::AutoLock lock(lock_); + + GURL canonical_url; + // Test whether any stored URL is a prefix of |url|. + canonical_url = thread_safe_cache_->GetGeneralizedCanonicalURL(*it); + if (!canonical_url.is_empty() && + thread_safe_cache_->GetPageThumbnail(canonical_url, bytes)) { + return true; + } + } + } + + return false; +} + +bool TopSitesImpl::GetPageThumbnailScore(const GURL& url, + ThumbnailScore* score) { + // WARNING: this may be invoked on any thread. + base::AutoLock lock(lock_); + return thread_safe_cache_->GetPageThumbnailScore(url, score); +} + +bool TopSitesImpl::GetTemporaryPageThumbnailScore(const GURL& url, + ThumbnailScore* score) { + for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end(); + ++i) { + if (i->first == url) { + *score = i->second.thumbnail_score; + return true; + } + } + return false; +} + + +// Returns the index of |url| in |urls|, or -1 if not found. +static int IndexOf(const MostVisitedURLList& urls, const GURL& url) { + for (size_t i = 0; i < urls.size(); i++) { + if (urls[i].url == url) + return i; + } + return -1; +} + +void TopSitesImpl::SyncWithHistory() { + DCHECK(thread_checker_.CalledOnValidThread()); + if (loaded_ && temp_images_.size()) { + // If we have temporary thumbnails it means there isn't much data, and most + // likely the user is first running Chrome. During this time we throttle + // updating from history by 30 seconds. If the user creates a new tab page + // during this window of time we force updating from history so that the new + // tab page isn't so far out of date. + timer_.Stop(); + StartQueryForMostVisited(); + } +} + +bool TopSitesImpl::HasBlacklistedItems() const { + const base::DictionaryValue* blacklist = + pref_service_->GetDictionary(blacklist_pref_name_); + return blacklist && !blacklist->empty(); +} + +void TopSitesImpl::AddBlacklistedURL(const GURL& url) { + DCHECK(thread_checker_.CalledOnValidThread()); + + base::Value* dummy = base::Value::CreateNullValue(); + { + DictionaryPrefUpdate update(pref_service_, blacklist_pref_name_); + base::DictionaryValue* blacklist = update.Get(); + blacklist->SetWithoutPathExpansion(GetURLHash(url), dummy); + } + + ResetThreadSafeCache(); + NotifyTopSitesChanged(); +} + +void TopSitesImpl::RemoveBlacklistedURL(const GURL& url) { + DCHECK(thread_checker_.CalledOnValidThread()); + { + DictionaryPrefUpdate update(pref_service_, blacklist_pref_name_); + base::DictionaryValue* blacklist = update.Get(); + blacklist->RemoveWithoutPathExpansion(GetURLHash(url), nullptr); + } + ResetThreadSafeCache(); + NotifyTopSitesChanged(); +} + +bool TopSitesImpl::IsBlacklisted(const GURL& url) { + DCHECK(thread_checker_.CalledOnValidThread()); + const base::DictionaryValue* blacklist = + pref_service_->GetDictionary(blacklist_pref_name_); + return blacklist && blacklist->HasKey(GetURLHash(url)); +} + +void TopSitesImpl::ClearBlacklistedURLs() { + DCHECK(thread_checker_.CalledOnValidThread()); + { + DictionaryPrefUpdate update(pref_service_, blacklist_pref_name_); + base::DictionaryValue* blacklist = update.Get(); + blacklist->Clear(); + } + ResetThreadSafeCache(); + NotifyTopSitesChanged(); +} + +void TopSitesImpl::ShutdownOnUIThread() { + history_service_ = nullptr; + history_service_observer_.RemoveAll(); + // Cancel all requests so that the service doesn't callback to us after we've + // invoked Shutdown (this could happen if we have a pending request and + // Shutdown is invoked). + cancelable_task_tracker_.TryCancelAll(); + if (backend_) + backend_->Shutdown(); +} + +// static +void TopSitesImpl::DiffMostVisited(const MostVisitedURLList& old_list, + const MostVisitedURLList& new_list, + TopSitesDelta* delta) { + + // Add all the old URLs for quick lookup. This maps URLs to the corresponding + // index in the input. + std::map<GURL, size_t> all_old_urls; + size_t num_old_forced = 0; + for (size_t i = 0; i < old_list.size(); i++) { + if (!old_list[i].last_forced_time.is_null()) + num_old_forced++; + DCHECK(old_list[i].last_forced_time.is_null() || i < num_old_forced) + << "Forced URLs must all appear before non-forced URLs."; + all_old_urls[old_list[i].url] = i; + } + + // Check all the URLs in the new set to see which ones are new or just moved. + // When we find a match in the old set, we'll reset its index to our special + // marker. This allows us to quickly identify the deleted ones in a later + // pass. + const size_t kAlreadyFoundMarker = static_cast<size_t>(-1); + int rank = -1; // Forced URLs have a rank of -1. + for (size_t i = 0; i < new_list.size(); i++) { + // Increase the rank if we're going through forced URLs. This works because + // non-forced URLs all come after forced URLs. + if (new_list[i].last_forced_time.is_null()) + rank++; + DCHECK(new_list[i].last_forced_time.is_null() == (rank != -1)) + << "Forced URLs must all appear before non-forced URLs."; + std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url); + if (found == all_old_urls.end()) { + MostVisitedURLWithRank added; + added.url = new_list[i]; + added.rank = rank; + delta->added.push_back(added); + } else { + DCHECK(found->second != kAlreadyFoundMarker) + << "Same URL appears twice in the new list."; + int old_rank = found->second >= num_old_forced ? + found->second - num_old_forced : -1; + if (old_rank != rank || + old_list[found->second].last_forced_time != + new_list[i].last_forced_time) { + MostVisitedURLWithRank moved; + moved.url = new_list[i]; + moved.rank = rank; + delta->moved.push_back(moved); + } + found->second = kAlreadyFoundMarker; + } + } + + // Any member without the special marker in the all_old_urls list means that + // there wasn't a "new" URL that mapped to it, so it was deleted. + for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin(); + i != all_old_urls.end(); ++i) { + if (i->second != kAlreadyFoundMarker) + delta->deleted.push_back(old_list[i->second]); + } +} + +base::CancelableTaskTracker::TaskId TopSitesImpl::StartQueryForMostVisited() { + DCHECK(loaded_); + if (!history_service_) + return base::CancelableTaskTracker::kBadTaskId; + + return history_service_->QueryMostVisitedURLs( + num_results_to_request_from_history(), kDaysOfHistory, + base::Bind(&TopSitesImpl::OnTopSitesAvailableFromHistory, + base::Unretained(this)), + &cancelable_task_tracker_); +} + +bool TopSitesImpl::IsKnownURL(const GURL& url) { + return loaded_ && cache_->IsKnownURL(url); +} + +const std::string& TopSitesImpl::GetCanonicalURLString(const GURL& url) const { + return cache_->GetCanonicalURL(url).spec(); +} + +bool TopSitesImpl::IsNonForcedFull() { + return loaded_ && cache_->GetNumNonForcedURLs() >= kNonForcedTopSitesNumber; +} + +bool TopSitesImpl::IsForcedFull() { + return loaded_ && cache_->GetNumForcedURLs() >= kForcedTopSitesNumber; +} + +TopSitesImpl::~TopSitesImpl() { +} + +bool TopSitesImpl::SetPageThumbnailNoDB( + const GURL& url, + const base::RefCountedMemory* thumbnail_data, + const ThumbnailScore& score) { + // This should only be invoked when we know about the url. + DCHECK(cache_->IsKnownURL(url)); + + const MostVisitedURL& most_visited = + cache_->top_sites()[cache_->GetURLIndex(url)]; + Images* image = cache_->GetImage(url); + + // When comparing the thumbnail scores, we need to take into account the + // redirect hops, which are not generated when the thumbnail is because the + // redirects weren't known. We fill that in here since we know the redirects. + ThumbnailScore new_score_with_redirects(score); + new_score_with_redirects.redirect_hops_from_dest = + GetRedirectDistanceForURL(most_visited, url); + + if (!ShouldReplaceThumbnailWith(image->thumbnail_score, + new_score_with_redirects) && + image->thumbnail.get()) + return false; // The one we already have is better. + + image->thumbnail = const_cast<base::RefCountedMemory*>(thumbnail_data); + image->thumbnail_score = new_score_with_redirects; + + ResetThreadSafeImageCache(); + return true; +} + +bool TopSitesImpl::SetPageThumbnailEncoded( + const GURL& url, + const base::RefCountedMemory* thumbnail, + const ThumbnailScore& score) { + if (!SetPageThumbnailNoDB(url, thumbnail, score)) + return false; + + // Update the database. + if (!cache_->IsKnownURL(url)) + return false; + + size_t index = cache_->GetURLIndex(url); + int url_rank = index - cache_->GetNumForcedURLs(); + const MostVisitedURL& most_visited = cache_->top_sites()[index]; + backend_->SetPageThumbnail(most_visited, + url_rank < 0 ? -1 : url_rank, + *(cache_->GetImage(most_visited.url))); + return true; +} + +// static +bool TopSitesImpl::EncodeBitmap(const gfx::Image& bitmap, + scoped_refptr<base::RefCountedBytes>* bytes) { + if (bitmap.IsEmpty()) + return false; + *bytes = new base::RefCountedBytes(); + std::vector<unsigned char> data; + if (!gfx::JPEG1xEncodedDataFromImage(bitmap, kTopSitesImageQuality, &data)) + return false; + + // As we're going to cache this data, make sure the vector is only as big as + // it needs to be, as JPEGCodec::Encode() over-allocates data.capacity(). + // (In a C++0x future, we can just call shrink_to_fit() in Encode()) + (*bytes)->data() = data; + return true; +} + +void TopSitesImpl::RemoveTemporaryThumbnailByURL(const GURL& url) { + for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end(); + ++i) { + if (i->first == url) { + temp_images_.erase(i); + return; + } + } +} + +void TopSitesImpl::AddTemporaryThumbnail( + const GURL& url, + const base::RefCountedMemory* thumbnail, + const ThumbnailScore& score) { + if (temp_images_.size() == kMaxTempTopImages) + temp_images_.erase(temp_images_.begin()); + + TempImage image; + image.first = url; + image.second.thumbnail = const_cast<base::RefCountedMemory*>(thumbnail); + image.second.thumbnail_score = score; + temp_images_.push_back(image); +} + +void TopSitesImpl::TimerFired() { + StartQueryForMostVisited(); +} + +// static +int TopSitesImpl::GetRedirectDistanceForURL(const MostVisitedURL& most_visited, + const GURL& url) { + for (size_t i = 0; i < most_visited.redirects.size(); i++) { + if (most_visited.redirects[i] == url) + return static_cast<int>(most_visited.redirects.size() - i - 1); + } + NOTREACHED() << "URL should always be found."; + return 0; +} + +PrepopulatedPageList TopSitesImpl::GetPrepopulatedPages() { + return prepopulated_pages_; +} + +bool TopSitesImpl::loaded() const { + return loaded_; +} + +bool TopSitesImpl::AddForcedURL(const GURL& url, const base::Time& time) { + DCHECK(thread_checker_.CalledOnValidThread()); + size_t num_forced = cache_->GetNumForcedURLs(); + MostVisitedURLList new_list(cache_->top_sites()); + MostVisitedURL new_url; + + if (cache_->IsKnownURL(url)) { + size_t index = cache_->GetURLIndex(url); + // Do nothing if we currently have that URL as non-forced. + if (new_list[index].last_forced_time.is_null()) + return false; + + // Update the |last_forced_time| of the already existing URL. Delete it and + // reinsert it at the right location. + new_url = new_list[index]; + new_list.erase(new_list.begin() + index); + num_forced--; + } else { + new_url.url = url; + new_url.redirects.push_back(url); + } + new_url.last_forced_time = time; + // Add forced URLs and sort. Added to the end of the list of forced URLs + // since this is almost always where it needs to go, unless the user's local + // clock is fiddled with. + MostVisitedURLList::iterator mid = new_list.begin() + num_forced; + new_list.insert(mid, new_url); + mid = new_list.begin() + num_forced; // Mid was invalidated. + std::inplace_merge(new_list.begin(), mid, mid + 1, ForcedURLComparator); + SetTopSites(new_list, CALL_LOCATION_FROM_OTHER_PLACES); + return true; +} + +void TopSitesImpl::OnNavigationCommitted(const GURL& url) { + DCHECK(thread_checker_.CalledOnValidThread()); + if (!loaded_ || IsNonForcedFull()) + return; + + if (!cache_->IsKnownURL(url) && can_add_url_to_history_.Run(url)) { + // To avoid slamming history we throttle requests when the url updates. To + // do otherwise negatively impacts perf tests. + RestartQueryForTopSitesTimer(GetUpdateDelay()); + } +} + +bool TopSitesImpl::AddPrepopulatedPages(MostVisitedURLList* urls, + size_t num_forced_urls) { + bool added = false; + for (const auto& prepopulated_page : prepopulated_pages_) { + if (urls->size() - num_forced_urls < kNonForcedTopSitesNumber && + IndexOf(*urls, prepopulated_page.most_visited.url) == -1) { + urls->push_back(prepopulated_page.most_visited); + added = true; + } + } + return added; +} + +size_t TopSitesImpl::MergeCachedForcedURLs(MostVisitedURLList* new_list) { + DCHECK(thread_checker_.CalledOnValidThread()); + // Add all the new URLs for quick lookup. Take that opportunity to count the + // number of forced URLs in |new_list|. + std::set<GURL> all_new_urls; + size_t num_forced = 0; + for (size_t i = 0; i < new_list->size(); ++i) { + for (size_t j = 0; j < (*new_list)[i].redirects.size(); j++) { + all_new_urls.insert((*new_list)[i].redirects[j]); + } + if (!(*new_list)[i].last_forced_time.is_null()) + ++num_forced; + } + + // Keep the forced URLs from |cache_| that are not found in |new_list|. + MostVisitedURLList filtered_forced_urls; + for (size_t i = 0; i < cache_->GetNumForcedURLs(); ++i) { + if (all_new_urls.find(cache_->top_sites()[i].url) == all_new_urls.end()) + filtered_forced_urls.push_back(cache_->top_sites()[i]); + } + num_forced += filtered_forced_urls.size(); + + // Prepend forced URLs and sort in order of ascending |last_forced_time|. + new_list->insert(new_list->begin(), filtered_forced_urls.begin(), + filtered_forced_urls.end()); + std::inplace_merge( + new_list->begin(), new_list->begin() + filtered_forced_urls.size(), + new_list->begin() + num_forced, ForcedURLComparator); + + // Drop older forced URLs if the list overflows. Since forced URLs are always + // sort in increasing order of |last_forced_time|, drop the first ones. + if (num_forced > kForcedTopSitesNumber) { + new_list->erase(new_list->begin(), + new_list->begin() + (num_forced - kForcedTopSitesNumber)); + num_forced = kForcedTopSitesNumber; + } + + return num_forced; +} + +void TopSitesImpl::ApplyBlacklist(const MostVisitedURLList& urls, + MostVisitedURLList* out) { + // Log the number of times ApplyBlacklist is called so we can compute the + // average number of blacklisted items per user. + const base::DictionaryValue* blacklist = + pref_service_->GetDictionary(blacklist_pref_name_); + UMA_HISTOGRAM_BOOLEAN("TopSites.NumberOfApplyBlacklist", true); + UMA_HISTOGRAM_COUNTS_100("TopSites.NumberOfBlacklistedItems", + (blacklist ? blacklist->size() : 0)); + size_t num_non_forced_urls = 0; + size_t num_forced_urls = 0; + for (size_t i = 0; i < urls.size(); ++i) { + if (!IsBlacklisted(urls[i].url)) { + if (urls[i].last_forced_time.is_null()) { + // Non-forced URL. + if (num_non_forced_urls >= kNonForcedTopSitesNumber) + continue; + num_non_forced_urls++; + } else { + // Forced URL. + if (num_forced_urls >= kForcedTopSitesNumber) + continue; + num_forced_urls++; + } + out->push_back(urls[i]); + } + } +} + +std::string TopSitesImpl::GetURLHash(const GURL& url) { + // We don't use canonical URLs here to be able to blacklist only one of + // the two 'duplicate' sites, e.g. 'gmail.com' and 'mail.google.com'. + return base::MD5String(url.spec()); +} + +base::TimeDelta TopSitesImpl::GetUpdateDelay() { + if (cache_->top_sites().size() <= prepopulated_pages_.size()) + return base::TimeDelta::FromSeconds(30); + + int64 range = kMaxUpdateIntervalMinutes - kMinUpdateIntervalMinutes; + int64 minutes = kMaxUpdateIntervalMinutes - + last_num_urls_changed_ * range / cache_->top_sites().size(); + return base::TimeDelta::FromMinutes(minutes); +} + +void TopSitesImpl::SetTopSites(const MostVisitedURLList& new_top_sites, + const CallLocation location) { + DCHECK(thread_checker_.CalledOnValidThread()); + + MostVisitedURLList top_sites(new_top_sites); + size_t num_forced_urls = MergeCachedForcedURLs(&top_sites); + AddPrepopulatedPages(&top_sites, num_forced_urls); + + TopSitesDelta delta; + DiffMostVisited(cache_->top_sites(), top_sites, &delta); + + TopSitesBackend::RecordHistogram record_or_not = + TopSitesBackend::RECORD_HISTOGRAM_NO; + + // Record the delta size into a histogram if this function is called from + // function OnGotMostVisitedThumbnails and no histogram value has been + // recorded before. + if (location == CALL_LOCATION_FROM_ON_GOT_MOST_VISITED_THUMBNAILS && + !histogram_recorded_) { + size_t delta_size = + delta.deleted.size() + delta.added.size() + delta.moved.size(); + UMA_HISTOGRAM_COUNTS_100("History.FirstSetTopSitesDeltaSize", delta_size); + // Will be passed to TopSitesBackend to let it record the histogram too. + record_or_not = TopSitesBackend::RECORD_HISTOGRAM_YES; + // Change it to true so that the histogram will not be recorded any more. + histogram_recorded_ = true; + } + + if (!delta.deleted.empty() || !delta.added.empty() || !delta.moved.empty()) { + backend_->UpdateTopSites(delta, record_or_not); + } + + last_num_urls_changed_ = delta.added.size() + delta.moved.size(); + + // We always do the following steps (setting top sites in cache, and resetting + // thread safe cache ...) as this method is invoked during startup at which + // point the caches haven't been updated yet. + cache_->SetTopSites(top_sites); + + // See if we have any tmp thumbnails for the new sites. + if (!temp_images_.empty()) { + for (size_t i = 0; i < top_sites.size(); ++i) { + const MostVisitedURL& mv = top_sites[i]; + GURL canonical_url = cache_->GetCanonicalURL(mv.url); + // At the time we get the thumbnail redirects aren't known, so we have to + // iterate through all the images. + for (TempImages::iterator it = temp_images_.begin(); + it != temp_images_.end(); ++it) { + if (canonical_url == cache_->GetCanonicalURL(it->first)) { + SetPageThumbnailEncoded( + mv.url, it->second.thumbnail.get(), it->second.thumbnail_score); + temp_images_.erase(it); + break; + } + } + } + } + + if (top_sites.size() - num_forced_urls >= kNonForcedTopSitesNumber) + temp_images_.clear(); + + ResetThreadSafeCache(); + ResetThreadSafeImageCache(); + NotifyTopSitesChanged(); + + // Restart the timer that queries history for top sites. This is done to + // ensure we stay in sync with history. + RestartQueryForTopSitesTimer(GetUpdateDelay()); +} + +int TopSitesImpl::num_results_to_request_from_history() const { + DCHECK(thread_checker_.CalledOnValidThread()); + + const base::DictionaryValue* blacklist = + pref_service_->GetDictionary(blacklist_pref_name_); + return kNonForcedTopSitesNumber + (blacklist ? blacklist->size() : 0); +} + +void TopSitesImpl::MoveStateToLoaded() { + DCHECK(thread_checker_.CalledOnValidThread()); + + MostVisitedURLList filtered_urls_all; + MostVisitedURLList filtered_urls_nonforced; + PendingCallbacks pending_callbacks; + { + base::AutoLock lock(lock_); + + if (loaded_) + return; // Don't do anything if we're already loaded. + loaded_ = true; + + // Now that we're loaded we can service the queued up callbacks. Copy them + // here and service them outside the lock. + if (!pending_callbacks_.empty()) { + // We always filter out forced URLs because callers of GetMostVisitedURLs + // are not interested in them. + filtered_urls_all = thread_safe_cache_->top_sites(); + filtered_urls_nonforced.assign(thread_safe_cache_->top_sites().begin() + + thread_safe_cache_->GetNumForcedURLs(), + thread_safe_cache_->top_sites().end()); + pending_callbacks.swap(pending_callbacks_); + } + } + + for (size_t i = 0; i < pending_callbacks.size(); i++) + pending_callbacks[i].Run(filtered_urls_all, filtered_urls_nonforced); + + if (history_service_) + history_service_observer_.Add(history_service_); + + NotifyTopSitesLoaded(); +} + +void TopSitesImpl::ResetThreadSafeCache() { + base::AutoLock lock(lock_); + MostVisitedURLList cached; + ApplyBlacklist(cache_->top_sites(), &cached); + thread_safe_cache_->SetTopSites(cached); +} + +void TopSitesImpl::ResetThreadSafeImageCache() { + base::AutoLock lock(lock_); + thread_safe_cache_->SetThumbnails(cache_->images()); +} + +void TopSitesImpl::RestartQueryForTopSitesTimer(base::TimeDelta delta) { + if (timer_.IsRunning() && ((timer_start_time_ + timer_.GetCurrentDelay()) < + (base::TimeTicks::Now() + delta))) { + return; + } + + timer_start_time_ = base::TimeTicks::Now(); + timer_.Stop(); + timer_.Start(FROM_HERE, delta, this, &TopSitesImpl::TimerFired); +} + +void TopSitesImpl::OnGotMostVisitedThumbnails( + const scoped_refptr<MostVisitedThumbnails>& thumbnails) { + DCHECK(thread_checker_.CalledOnValidThread()); + + // Set the top sites directly in the cache so that SetTopSites diffs + // correctly. + cache_->SetTopSites(thumbnails->most_visited); + SetTopSites(thumbnails->most_visited, + CALL_LOCATION_FROM_ON_GOT_MOST_VISITED_THUMBNAILS); + cache_->SetThumbnails(thumbnails->url_to_images_map); + + ResetThreadSafeImageCache(); + + MoveStateToLoaded(); + + // Start a timer that refreshes top sites from history. + RestartQueryForTopSitesTimer( + base::TimeDelta::FromSeconds(kUpdateIntervalSecs)); +} + +void TopSitesImpl::OnTopSitesAvailableFromHistory( + const MostVisitedURLList* pages) { + DCHECK(pages); + SetTopSites(*pages, CALL_LOCATION_FROM_OTHER_PLACES); +} + +void TopSitesImpl::OnURLsDeleted(HistoryService* history_service, + bool all_history, + bool expired, + const URLRows& deleted_rows, + const std::set<GURL>& favicon_urls) { + if (!loaded_) + return; + + if (all_history) { + SetTopSites(MostVisitedURLList(), CALL_LOCATION_FROM_OTHER_PLACES); + backend_->ResetDatabase(); + } else { + std::set<size_t> indices_to_delete; // Indices into top_sites_. + for (const auto& row : deleted_rows) { + if (cache_->IsKnownURL(row.url())) + indices_to_delete.insert(cache_->GetURLIndex(row.url())); + } + + if (indices_to_delete.empty()) + return; + + MostVisitedURLList new_top_sites(cache_->top_sites()); + for (std::set<size_t>::reverse_iterator i = indices_to_delete.rbegin(); + i != indices_to_delete.rend(); i++) { + new_top_sites.erase(new_top_sites.begin() + *i); + } + SetTopSites(new_top_sites, CALL_LOCATION_FROM_OTHER_PLACES); + } + StartQueryForMostVisited(); +} + +} // namespace history diff --git a/components/history/core/browser/top_sites_impl.h b/components/history/core/browser/top_sites_impl.h new file mode 100644 index 0000000..e28ceb5 --- /dev/null +++ b/components/history/core/browser/top_sites_impl.h @@ -0,0 +1,320 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_HISTORY_CORE_BROWSER_TOP_SITES_IMPL_H_ +#define COMPONENTS_HISTORY_CORE_BROWSER_TOP_SITES_IMPL_H_ + +#include <list> +#include <set> +#include <string> +#include <utility> +#include <vector> + +#include "base/basictypes.h" +#include "base/callback.h" +#include "base/gtest_prod_util.h" +#include "base/memory/ref_counted.h" +#include "base/scoped_observer.h" +#include "base/synchronization/lock.h" +#include "base/task/cancelable_task_tracker.h" +#include "base/threading/thread_checker.h" +#include "base/time/time.h" +#include "base/timer/timer.h" +#include "components/history/core/browser/history_service.h" +#include "components/history/core/browser/history_service_observer.h" +#include "components/history/core/browser/history_types.h" +#include "components/history/core/browser/page_usage_data.h" +#include "components/history/core/browser/top_sites.h" +#include "components/history/core/browser/top_sites_backend.h" +#include "components/history/core/common/thumbnail_score.h" +#include "third_party/skia/include/core/SkColor.h" +#include "ui/gfx/image/image.h" +#include "url/gurl.h" + +class PrefService; + +namespace base { +class FilePath; +class RefCountedBytes; +class RefCountedMemory; +class SingleThreadTaskRunner; +} + +namespace history { + +class HistoryService; +class TopSitesCache; +class TopSitesImplTest; + +// This class allows requests for most visited urls and thumbnails on any +// thread. All other methods must be invoked on the UI thread. All mutations +// to internal state happen on the UI thread and are scheduled to update the +// db using TopSitesBackend. +class TopSitesImpl : public TopSites, public HistoryServiceObserver { + public: + // Called to check whether an URL can be added to the history. Must be + // callable multiple time and during the whole lifetime of TopSitesImpl. + using CanAddURLToHistoryFn = base::Callback<bool(const GURL&)>; + + TopSitesImpl(PrefService* pref_service, + HistoryService* history_service, + const char* blacklist_pref_name, + const PrepopulatedPageList& prepopulated_pages, + const CanAddURLToHistoryFn& can_add_url_to_history); + + // Initializes TopSitesImpl. + void Init(const base::FilePath& db_name, + const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner); + + // TopSites implementation. + bool SetPageThumbnail(const GURL& url, + const gfx::Image& thumbnail, + const ThumbnailScore& score) override; + bool SetPageThumbnailToJPEGBytes(const GURL& url, + const base::RefCountedMemory* memory, + const ThumbnailScore& score) override; + void GetMostVisitedURLs(const GetMostVisitedURLsCallback& callback, + bool include_forced_urls) override; + bool GetPageThumbnail(const GURL& url, + bool prefix_match, + scoped_refptr<base::RefCountedMemory>* bytes) override; + bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) override; + bool GetTemporaryPageThumbnailScore(const GURL& url, + ThumbnailScore* score) override; + void SyncWithHistory() override; + bool HasBlacklistedItems() const override; + void AddBlacklistedURL(const GURL& url) override; + void RemoveBlacklistedURL(const GURL& url) override; + bool IsBlacklisted(const GURL& url) override; + void ClearBlacklistedURLs() override; + base::CancelableTaskTracker::TaskId StartQueryForMostVisited() override; + bool IsKnownURL(const GURL& url) override; + const std::string& GetCanonicalURLString(const GURL& url) const override; + bool IsNonForcedFull() override; + bool IsForcedFull() override; + PrepopulatedPageList GetPrepopulatedPages() override; + bool loaded() const override; + bool AddForcedURL(const GURL& url, const base::Time& time) override; + void OnNavigationCommitted(const GURL& url) override; + + // RefcountedKeyedService: + void ShutdownOnUIThread() override; + + protected: + ~TopSitesImpl() override; + + private: + // TODO(yiyaoliu): Remove the enums and related code when crbug/223430 is + // fixed. + // An enum representing different situations under which function + // SetTopSites can be initiated. + // This is needed because a histogram is used to record speed related metrics + // when SetTopSites are initiated from OnGotMostVisitedThumbnails, which + // usually happens early and might affect Chrome startup speed. + enum CallLocation { + // SetTopSites is called from function OnGotMostVisitedThumbnails. + CALL_LOCATION_FROM_ON_GOT_MOST_VISITED_THUMBNAILS, + // All other situations. + CALL_LOCATION_FROM_OTHER_PLACES + }; + + friend class TopSitesImplTest; + FRIEND_TEST_ALL_PREFIXES(TopSitesImplTest, DiffMostVisited); + FRIEND_TEST_ALL_PREFIXES(TopSitesImplTest, DiffMostVisitedWithForced); + + typedef base::Callback<void(const MostVisitedURLList&, + const MostVisitedURLList&)> PendingCallback; + + typedef std::pair<GURL, Images> TempImage; + typedef std::list<TempImage> TempImages; + typedef std::vector<PendingCallback> PendingCallbacks; + + // Generates the diff of things that happened between "old" and "new." + // + // This treats forced URLs separately than non-forced URLs. + // + // The URLs that are in "new" but not "old" will be have their index into + // "new" put in |added_urls|. The non-forced URLs that are in "old" but not + // "new" will have their index into "old" put into |deleted_urls|. + // + // URLs appearing in both old and new lists but having different indices will + // have their index into "new" be put into |moved_urls|. + static void DiffMostVisited(const MostVisitedURLList& old_list, + const MostVisitedURLList& new_list, + TopSitesDelta* delta); + + // Sets the thumbnail without writing to the database. Useful when + // reading last known top sites from the DB. + // Returns true if the thumbnail was set, false if the existing one is better. + bool SetPageThumbnailNoDB(const GURL& url, + const base::RefCountedMemory* thumbnail_data, + const ThumbnailScore& score); + + // A version of SetPageThumbnail that takes RefCountedBytes as + // returned by HistoryService. + bool SetPageThumbnailEncoded(const GURL& url, + const base::RefCountedMemory* thumbnail, + const ThumbnailScore& score); + + // Encodes the bitmap to bytes for storage to the db. Returns true if the + // bitmap was successfully encoded. + static bool EncodeBitmap(const gfx::Image& bitmap, + scoped_refptr<base::RefCountedBytes>* bytes); + + // Removes the cached thumbnail for url. Does nothing if |url| if not cached + // in |temp_images_|. + void RemoveTemporaryThumbnailByURL(const GURL& url); + + // Add a thumbnail for an unknown url. See temp_thumbnails_map_. + void AddTemporaryThumbnail(const GURL& url, + const base::RefCountedMemory* thumbnail, + const ThumbnailScore& score); + + // Called by our timer. Starts the query for the most visited sites. + void TimerFired(); + + // Finds the given URL in the redirect chain for the given TopSite, and + // returns the distance from the destination in hops that the given URL is. + // The URL is assumed to be in the list. The destination is 0. + static int GetRedirectDistanceForURL(const MostVisitedURL& most_visited, + const GURL& url); + + // Add prepopulated pages: 'welcome to Chrome' and themes gallery to |urls|. + // Returns true if any pages were added. + bool AddPrepopulatedPages(MostVisitedURLList* urls, + size_t num_forced_urls); + + // Add all the forced URLs from |cache_| into |new_list|, making sure not to + // add any URL that's already in |new_list|'s non-forced URLs. The forced URLs + // in |cache_| and |new_list| are assumed to appear at the front of the list + // and be sorted in increasing |last_forced_time|. This will still be true + // after the call. If the list of forced URLs overflows the older ones are + // dropped. Returns the number of forced URLs after the merge. + size_t MergeCachedForcedURLs(MostVisitedURLList* new_list); + + // Takes |urls|, produces it's copy in |out| after removing blacklisted URLs. + // Also ensures we respect the maximum number of forced URLs and non-forced + // URLs. + void ApplyBlacklist(const MostVisitedURLList& urls, MostVisitedURLList* out); + + // Returns an MD5 hash of the URL. Hashing is required for blacklisted URLs. + std::string GetURLHash(const GURL& url); + + // Returns the delay until the next update of history is needed. + // Uses num_urls_changed + base::TimeDelta GetUpdateDelay(); + + // Updates URLs in |cache_| and the db (in the background). + // The non-forced URLs in |new_top_sites| replace those in |cache_|. + // The forced URLs of |new_top_sites| are merged with those in |cache_|, + // if the list of forced URLs overflows, the oldest ones are dropped. + // All mutations to cache_ *must* go through this. Should + // be called from the UI thread. + void SetTopSites(const MostVisitedURLList& new_top_sites, + const CallLocation location); + + // Returns the number of most visited results to request from history. This + // changes depending upon how many urls have been blacklisted. Should be + // called from the UI thread. + int num_results_to_request_from_history() const; + + // Invoked when transitioning to LOADED. Notifies any queued up callbacks. + // Should be called from the UI thread. + void MoveStateToLoaded(); + + void ResetThreadSafeCache(); + + void ResetThreadSafeImageCache(); + + // Stops and starts timer with a delay of |delta|. + void RestartQueryForTopSitesTimer(base::TimeDelta delta); + + // Callback from TopSites with the top sites/thumbnails. Should be called + // from the UI thread. + void OnGotMostVisitedThumbnails( + const scoped_refptr<MostVisitedThumbnails>& thumbnails); + + // Called when history service returns a list of top URLs. + void OnTopSitesAvailableFromHistory(const MostVisitedURLList* data); + + // history::HistoryServiceObserver: + void OnURLsDeleted(HistoryService* history_service, + bool all_history, + bool expired, + const URLRows& deleted_rows, + const std::set<GURL>& favicon_urls) override; + + // Ensures that non thread-safe methods are called on the correct thread. + base::ThreadChecker thread_checker_; + + scoped_refptr<TopSitesBackend> backend_; + + // The top sites data. + scoped_ptr<TopSitesCache> cache_; + + // Copy of the top sites data that may be accessed on any thread (assuming + // you hold |lock_|). The data in |thread_safe_cache_| has blacklisted and + // pinned urls applied (|cache_| does not). + scoped_ptr<TopSitesCache> thread_safe_cache_; + + // Lock used to access |thread_safe_cache_|. + mutable base::Lock lock_; + + // Task tracker for history and backend requests. + base::CancelableTaskTracker cancelable_task_tracker_; + + // Timer that asks history for the top sites. This is used to make sure our + // data stays in sync with history. + base::OneShotTimer<TopSitesImpl> timer_; + + // The time we started |timer_| at. Only valid if |timer_| is running. + base::TimeTicks timer_start_time_; + + // The number of URLs changed on the last update. + size_t last_num_urls_changed_; + + // The pending requests for the top sites list. Can only be non-empty at + // startup. After we read the top sites from the DB, we'll always have a + // cached list and be able to run callbacks immediately. + PendingCallbacks pending_callbacks_; + + // Stores thumbnails for unknown pages. When SetPageThumbnail is + // called, if we don't know about that URL yet and we don't have + // enough Top Sites (new profile), we store it until the next + // SetNonForcedTopSites call. + TempImages temp_images_; + + // URL List of prepopulated page. + PrepopulatedPageList prepopulated_pages_; + + // PrefService holding the NTP URL blacklist dictionary. Must outlive + // TopSitesImpl. + PrefService* pref_service_; + + // Key for the NTP URL blacklist dictionary in PrefService. + const char* blacklist_pref_name_; + + // HistoryService that TopSitesImpl can query. May be null, but if defined it + // must outlive TopSitesImpl. + HistoryService* history_service_; + + // Can URL be added to the history? + CanAddURLToHistoryFn can_add_url_to_history_; + + // Are we loaded? + bool loaded_; + + // Have the SetTopSites execution time related histograms been recorded? + // The histogram should only be recorded once for each Chrome execution. + static bool histogram_recorded_; + + ScopedObserver<HistoryService, HistoryServiceObserver> + history_service_observer_; + + DISALLOW_COPY_AND_ASSIGN(TopSitesImpl); +}; + +} // namespace history + +#endif // COMPONENTS_HISTORY_CORE_BROWSER_TOP_SITES_IMPL_H_ |