diff options
author | brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-02 04:02:39 +0000 |
---|---|---|
committer | brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-02 04:02:39 +0000 |
commit | d46bdac81462255501c76f5a7dcd15b736fa986f (patch) | |
tree | ef790bf7be1bdf3a490c0ec9fa2da599067626b6 | |
parent | f3ad027e3d26994292c5564b820cd91fee977ae8 (diff) | |
download | chromium_src-d46bdac81462255501c76f5a7dcd15b736fa986f.zip chromium_src-d46bdac81462255501c76f5a7dcd15b736fa986f.tar.gz chromium_src-d46bdac81462255501c76f5a7dcd15b736fa986f.tar.bz2 |
Add some initial code for the top sites service. This will be a replacement for
the thumbnail database, and will also replace the ThumbnailStore which was the
previous replacement we didn't ship. This component will be very much like the
ThumbnailStore wtih the addition of the actual most visited data (not just
thumbnails) and that it is threadsafe.
This class is designed to be called on any thread. When it is complete,
thumbnails will be added to it from the UI thread of the browser. Requests for
thumbnails and the most visited data can be serviced directly on the I/O thread
without going through the UI thread, and since the data is cached, the request
won't also have to go through the history thread.
The current state is that it cqan store and update the the most visited list and thumbnails. There are unit tests covering this behavior.
I also added support for redirect ranking to ThumbnailScore. This is to
duplicated the ranking function in history currently, where it prefers
thumbnails closer to the end of a redirect chain. Since we won't be using the
history service and are only storing thumbnails for the most visited items, we
have to track the redirect index ourselves.
BUG=none
TEST=covered by unit tests (hopefully!)
Review URL: http://codereview.chromium.org/251002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@27824 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/history/history_types.h | 11 | ||||
-rw-r--r-- | chrome/browser/history/top_sites.cc | 170 | ||||
-rw-r--r-- | chrome/browser/history/top_sites.h | 130 | ||||
-rw-r--r-- | chrome/browser/history/top_sites_unittest.cc | 187 | ||||
-rwxr-xr-x | chrome/chrome.gyp | 3 | ||||
-rw-r--r-- | chrome/common/thumbnail_score.cc | 50 | ||||
-rw-r--r-- | chrome/common/thumbnail_score.h | 21 | ||||
-rw-r--r-- | chrome/common/thumbnail_score_unittest.cc | 54 |
8 files changed, 605 insertions, 21 deletions
diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h index c967374..02023f1 100644 --- a/chrome/browser/history/history_types.h +++ b/chrome/browser/history/history_types.h @@ -518,6 +518,17 @@ struct KeywordSearchTermVisit { std::wstring term; }; +// MostVisitedURL -------------------------------------------------------------- + +// Holds the per-URL information of the most visited query. +struct MostVisitedURL { + GURL url; + GURL favicon_url; + std::wstring title; + + RedirectList redirects; +}; + } // history #endif // CHROME_BROWSER_HISTORY_HISTORY_TYPES_H__ diff --git a/chrome/browser/history/top_sites.cc b/chrome/browser/history/top_sites.cc new file mode 100644 index 0000000..48a1751 --- /dev/null +++ b/chrome/browser/history/top_sites.cc @@ -0,0 +1,170 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/top_sites.h" + +#include "base/gfx/jpeg_codec.h" +#include "base/logging.h" +#include "third_party/skia/include/core/SkBitmap.h" + +namespace history { + +TopSites::TopSites() { +} + +TopSites::~TopSites() { +} + +bool TopSites::Init() { + // TODO(brettw) read the database here. + return true; +} + +bool TopSites::SetPageThumbnail(const GURL& url, + const SkBitmap& thumbnail, + const ThumbnailScore& score) { + AutoLock lock(lock_); + + std::map<GURL, size_t>::iterator found = canonical_urls_.find(url); + if (found == canonical_urls_.end()) + return false; // This URL is not known to us. + MostVisitedURL& most_visited = top_sites_[found->second]; + Images& image = top_images_[most_visited.url]; + + // When comparing the thumbnail scores, we need to take into account the + // redirect hops, which are not generated when the thumbnail is becuase the + // redirects weren't known. We fill that in here since we know the redirects. + ThumbnailScore new_score_with_redirects(score); + new_score_with_redirects.redirect_hops_from_dest = + GetRedirectDistanceForURL(most_visited, url); + + if (!ShouldReplaceThumbnailWith(image.thumbnail_score, + new_score_with_redirects)) + return false; // The one we already have is better. + + image.thumbnail = new RefCountedBytes; + SkAutoLockPixels thumbnail_lock(thumbnail); + bool encoded = JPEGCodec::Encode( + reinterpret_cast<unsigned char*>(thumbnail.getAddr32(0, 0)), + JPEGCodec::FORMAT_BGRA, thumbnail.width(), + thumbnail.height(), + static_cast<int>(thumbnail.rowBytes()), 90, + &image.thumbnail->data); + if (!encoded) + return false; + image.thumbnail_score = new_score_with_redirects; + + return true; +} + +void TopSites::StoreMostVisited(std::vector<MostVisitedURL>* most_visited) { + lock_.AssertAcquired(); + // TODO(brettw) filter for blacklist! + + if (!top_sites_.empty()) { + std::vector<size_t> added; + std::vector<size_t> deleted; + std::vector<size_t> moved; + DiffMostVisited(top_sites_, *most_visited, &added, &deleted, &moved); + + // Delete all the thumbnails associated with URLs that were deleted. + for (size_t i = 0; i < deleted.size(); i++) { + std::map<GURL, Images>::iterator found = + top_images_.find(top_sites_[deleted[i]].url); + if (found != top_images_.end()) + top_images_.erase(found); + } + + // TODO(brettw) write the new data to disk. + } + + // Take ownership of the most visited data. + top_sites_.clear(); + top_sites_.swap(*most_visited); + + // Save the redirect information for quickly mapping to the canonical URLs. + canonical_urls_.clear(); + for (size_t i = 0; i < top_sites_.size(); i++) + StoreRedirectChain(top_sites_[i].redirects, i); +} + +void TopSites::StoreRedirectChain(const RedirectList& redirects, + size_t destination) { + lock_.AssertAcquired(); + + if (redirects.empty()) { + NOTREACHED(); + return; + } + + // We shouldn't get any duplicates. + DCHECK(canonical_urls_.find(redirects[0]) == canonical_urls_.end()); + + // Map all the redirected URLs to the destination. + for (size_t i = 0; i < redirects.size(); i++) + canonical_urls_[redirects[i]] = destination; +} + +GURL TopSites::GetCanonicalURL(const GURL& url) const { + lock_.AssertAcquired(); + + std::map<GURL, size_t>::const_iterator found = canonical_urls_.find(url); + if (found == canonical_urls_.end()) + return GURL(); // Don't know anything about this URL. + return top_sites_[found->second].url; +} + +// static +int TopSites::GetRedirectDistanceForURL(const MostVisitedURL& most_visited, + const GURL& url) { + for (size_t i = 0; i < most_visited.redirects.size(); i++) { + if (most_visited.redirects[i] == url) + return static_cast<int>(most_visited.redirects.size() - i - 1); + } + NOTREACHED() << "URL should always be found."; + return 0; +} + +// static +void TopSites::DiffMostVisited(const std::vector<MostVisitedURL>& old_list, + const std::vector<MostVisitedURL>& new_list, + std::vector<size_t>* added_urls, + std::vector<size_t>* deleted_urls, + std::vector<size_t>* moved_urls) { + added_urls->clear(); + deleted_urls->clear(); + moved_urls->clear(); + + // Add all the old URLs for quick lookup. This maps URLs to the corresponding + // index in the input. + std::map<GURL, size_t> all_old_urls; + for (size_t i = 0; i < old_list.size(); i++) + all_old_urls[old_list[i].url] = i; + + // Check all the URLs in the new set to see which ones are new or just moved. + // When we find a match in the old set, we'll reset its index to our special + // marker. This allows us to quickly identify the deleted ones in a later + // pass. + const size_t kAlreadyFoundMarker = static_cast<size_t>(-1); + for (size_t i = 0; i < new_list.size(); i++) { + std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url); + if (found == all_old_urls.end()) { + added_urls->push_back(i); + } else { + if (found->second != i) + moved_urls->push_back(i); + found->second = kAlreadyFoundMarker; + } + } + + // Any member without the special marker in the all_old_urls list means that + // there wasn't a "new" URL that mapped to it, so it was deleted. + for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin(); + i != all_old_urls.end(); ++i) { + if (i->second != kAlreadyFoundMarker) + deleted_urls->push_back(i->second); + } +} + +} // namespace history diff --git a/chrome/browser/history/top_sites.h b/chrome/browser/history/top_sites.h new file mode 100644 index 0000000..93fad64 --- /dev/null +++ b/chrome/browser/history/top_sites.h @@ -0,0 +1,130 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_H_ +#define CHROME_BROWSER_HISTORY_TOP_SITES_H_ + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "base/lock.h" +#include "base/ref_counted.h" +#include "chrome/browser/history/history_types.h" +#include "chrome/common/ref_counted_util.h" +#include "chrome/common/thumbnail_score.h" +#include "googleurl/src/gurl.h" + +class SkBitmap; +struct ThumbnailScore; + +namespace history { + +class TopSitesBackend; +class TopSitesTest; + +// Stores the data for the top "most visited" sites. This includes a cache of +// the most visited data from history, as well as the corresponding thumbnails +// of those sites. +// +// This class IS threadsafe. It is designed to be used from the UI thread of +// the browser (where history requests must be kicked off and received from) +// and from the I/O thread (where new tab page requests come in). Handling the +// new tab page requests on the I/O thread without proxying to the UI thread is +// a nontrivial performance win, especially when the browser is starting and +// the UI thread is busy. +class TopSites : public base::RefCountedThreadSafe<TopSites> { + public: + TopSites(); + ~TopSites(); + + // Initializes this component, returning true on success. + bool Init(); + + // Sets the given thumbnail for the given URL. Returns true if the thumbnail + // was updated. False means either the URL wasn't known to us, or we felt + // that our current thumbnail was superior to the given one. + bool SetPageThumbnail(const GURL& url, + const SkBitmap& thumbnail, + const ThumbnailScore& score); + + //TODO(brettw) write this. + //bool GetPageThumbnail(const GURL& url, RefCountedBytes** data) const; + + private: + friend class TopSitesTest; + + struct Images { + scoped_refptr<RefCountedBytes> thumbnail; + ThumbnailScore thumbnail_score; + + // TODO(brettw) this will eventually store the favicon. + //scoped_refptr<RefCountedBytes> favicon; + }; + + // Saves the set of the top URLs visited by this user. The 0th item is the + // most popular. + // + // DANGER! This will clear all data from the input argument. + void StoreMostVisited(std::vector<MostVisitedURL>* most_visited); + + // Saves the given set of redirects. The redirects are in order of the + // given vector, so [0] -> [1] -> [2]. + void StoreRedirectChain(const RedirectList& redirects, + size_t destination); + + // Each item in the most visited view can redirect elsewhere. This returns + // the canonical URL one identifying the site if the given URL does appear + // in the "top sites" list. + // + // If the given URL is not in the top sites, this will return an empty GURL. + GURL GetCanonicalURL(const GURL& url) const; + + // Finds the given URL in the redirect chain for the given TopSite, and + // returns the distance from the destination in hops that the given URL is. + // The URL is assumed to be in the list. The destination is 0. + static int GetRedirectDistanceForURL(const MostVisitedURL& most_visited, + const GURL& url); + + // Generates the diff of things that happened between "old" and "new." + // + // The URLs that are in "new" but not "old" will be have their index into + // "new" put in |added_urls|. The URLs that are in "old" but not "new" will + // have their index into "old" put into |deleted_urls|. + // + // URLs appearing in both old and new lists but having different indices will + // have their index into "new" be put into |moved_urls|. + static void DiffMostVisited(const std::vector<MostVisitedURL>& old_list, + const std::vector<MostVisitedURL>& new_list, + std::vector<size_t>* added_urls, + std::vector<size_t>* deleted_urls, + std::vector<size_t>* moved_urls); + + mutable Lock lock_; + + // The cached version of the top sites. The 0th item in this vector is the + // #1 site. + std::vector<MostVisitedURL> top_sites_; + + // The images corresponding to the top_sites. This is indexed by the URL of + // the top site, so this doesn't have to be shuffled around when the ordering + // changes of the top sites. Some top_sites_ entries may not have images. + std::map<GURL, Images> top_images_; + + // Generated from the redirects to and from the most visited pages, this + // maps the redirects to the index into top_sites_ that contains it. + std::map<GURL, size_t> canonical_urls_; + + // TODO(brettw) use the blacklist. + //std::set<GURL> blacklist_; + + DISALLOW_COPY_AND_ASSIGN(TopSites); +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_TOP_SITES_H_ + diff --git a/chrome/browser/history/top_sites_unittest.cc b/chrome/browser/history/top_sites_unittest.cc new file mode 100644 index 0000000..676261a --- /dev/null +++ b/chrome/browser/history/top_sites_unittest.cc @@ -0,0 +1,187 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/top_sites.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/skia/include/core/SkBitmap.h" + +namespace history { + +class TopSitesTest : public testing::Test { + public: + TopSitesTest() : top_sites_(new TopSites) { + } + ~TopSitesTest() { + } + + TopSites& top_sites() { return *top_sites_; } + + virtual void SetUp() { + } + + virtual void TearDown() { + } + + // Wrappers that allow private TopSites functions to be called from the + // individual tests without making them all be friends. + GURL GetCanonicalURL(const GURL& url) const { + AutoLock lock(top_sites_->lock_); // The function asserts it's in the lock. + return top_sites_->GetCanonicalURL(url); + } + + void StoreMostVisited(std::vector<MostVisitedURL>* urls) { + AutoLock lock(top_sites_->lock_); // The function asserts it's in the lock. + top_sites_->StoreMostVisited(urls); + } + + static void DiffMostVisited(const std::vector<MostVisitedURL>& old_list, + const std::vector<MostVisitedURL>& new_list, + std::vector<size_t>* added_urls, + std::vector<size_t>* deleted_urls, + std::vector<size_t>* moved_urls) { + TopSites::DiffMostVisited(old_list, new_list, + added_urls, deleted_urls, moved_urls); + } + + private: + scoped_refptr<TopSites> top_sites_; + + DISALLOW_COPY_AND_ASSIGN(TopSitesTest); +}; + +// Helper function for appending a URL to a vector of "most visited" URLs, +// using the default values for everything but the URL. +static void AppendMostVisitedURL(std::vector<MostVisitedURL>* list, + const GURL& url) { + MostVisitedURL mv; + mv.url = url; + mv.redirects.push_back(url); + list->push_back(mv); +} + +// Same as AppendMostVisitedURL except that it adds a redirect from the first +// URL to the second. +static void AppendMostVisitedURLWithRedirect( + std::vector<MostVisitedURL>* list, + const GURL& redirect_source, const GURL& redirect_dest) { + MostVisitedURL mv; + mv.url = redirect_dest; + mv.redirects.push_back(redirect_source); + mv.redirects.push_back(redirect_dest); + list->push_back(mv); +} + +TEST_F(TopSitesTest, GetCanonicalURL) { + // Have two chains: + // google.com -> www.google.com + // news.google.com (no redirects) + GURL news("http://news.google.com/"); + GURL source("http://google.com/"); + GURL dest("http://www.google.com/"); + + std::vector<MostVisitedURL> most_visited; + AppendMostVisitedURLWithRedirect(&most_visited, source, dest); + AppendMostVisitedURL(&most_visited, news); + StoreMostVisited(&most_visited); + + // Random URLs not in the database shouldn't be reported as being in there. + GURL result = GetCanonicalURL(GURL("http://fark.com/")); + EXPECT_TRUE(result.is_empty()); + + // Easy case, there are no redirects and the exact URL is stored. + result = GetCanonicalURL(news); + EXPECT_EQ(news, result); + + // The URL in question is the source URL in a redirect list. + result = GetCanonicalURL(source); + EXPECT_EQ(dest, result); + + // The URL in question is the destination of a redirect. + result = GetCanonicalURL(dest); + EXPECT_EQ(dest, result); +} + +TEST_F(TopSitesTest, DiffMostVisited) { + GURL stays_the_same("http://staysthesame/"); + GURL gets_added_1("http://getsadded1/"); + GURL gets_added_2("http://getsadded2/"); + GURL gets_deleted_1("http://getsdeleted2/"); + GURL gets_moved_1("http://getsmoved1/"); + + std::vector<MostVisitedURL> old_list; + AppendMostVisitedURL(&old_list, stays_the_same); // 0 (unchanged) + AppendMostVisitedURL(&old_list, gets_deleted_1); // 1 (deleted) + AppendMostVisitedURL(&old_list, gets_moved_1); // 2 (moved to 3) + + std::vector<MostVisitedURL> new_list; + AppendMostVisitedURL(&new_list, stays_the_same); // 0 (unchanged) + AppendMostVisitedURL(&new_list, gets_added_1); // 1 (added) + AppendMostVisitedURL(&new_list, gets_added_2); // 2 (added) + AppendMostVisitedURL(&new_list, gets_moved_1); // 3 (moved from 2) + + std::vector<size_t> added; + std::vector<size_t> deleted; + std::vector<size_t> moved; + DiffMostVisited(old_list, new_list, &added, &deleted, &moved); + + ASSERT_EQ(2u, added.size()); + ASSERT_EQ(1u, deleted.size()); + ASSERT_EQ(1u, moved.size()); + + // There should be 2 URLs added, we don't assume what order they're in inside + // the result vector. + EXPECT_TRUE(added[0] == 1 || added[1] == 1); + EXPECT_TRUE(added[0] == 2 || added[1] == 2); + + EXPECT_EQ(1u, deleted[0]); + EXPECT_EQ(3u, moved[0]); +} + +TEST_F(TopSitesTest, SetPageThumbnail) { + GURL url1a("http://google.com/"); + GURL url1b("http://www.google.com/"); + GURL url2("http://images.google.com/"); + GURL nonexistant_url("http://news.google.com/"); + + std::vector<MostVisitedURL> list; + AppendMostVisitedURL(&list, url2); + + MostVisitedURL mv; + mv.url = url1b; + mv.redirects.push_back(url1a); + mv.redirects.push_back(url1b); + list.push_back(mv); + + // Save our most visited data containing that one site. + StoreMostVisited(&list); + + // Create a dummy thumbnail. + SkBitmap thumbnail; + thumbnail.setConfig(SkBitmap::kARGB_8888_Config, 4, 4); + thumbnail.allocPixels(); + + base::Time now = base::Time::Now(); + ThumbnailScore low_score(1.0, true, true, now); + ThumbnailScore medium_score(0.5, true, true, now); + ThumbnailScore high_score(0.0, true, true, now); + + // Setting the thumbnail for nonexistant pages should fail. + EXPECT_FALSE(top_sites().SetPageThumbnail(nonexistant_url, + thumbnail, medium_score)); + + // Setting the thumbnail for url2 should succeed, lower scores shouldn't + // replace it, higher scores should. + EXPECT_TRUE(top_sites().SetPageThumbnail(url2, thumbnail, medium_score)); + EXPECT_FALSE(top_sites().SetPageThumbnail(url2, thumbnail, low_score)); + EXPECT_TRUE(top_sites().SetPageThumbnail(url2, thumbnail, high_score)); + + // Set on the redirect source should succeed. It should be replacable by + // the same score on the redirect destination, which in turn should not + // be replaced by the source again. + EXPECT_TRUE(top_sites().SetPageThumbnail(url1a, thumbnail, medium_score)); + EXPECT_TRUE(top_sites().SetPageThumbnail(url1b, thumbnail, medium_score)); + EXPECT_FALSE(top_sites().SetPageThumbnail(url1a, thumbnail, medium_score)); +} + +} // namespace history diff --git a/chrome/chrome.gyp b/chrome/chrome.gyp index 1ac712d..cae4673 100755 --- a/chrome/chrome.gyp +++ b/chrome/chrome.gyp @@ -1528,6 +1528,7 @@ 'browser/history/text_database_manager.h', 'browser/history/thumbnail_database.cc', 'browser/history/thumbnail_database.h', + 'browser/history/top_sites.cc', 'browser/history/url_database.cc', 'browser/history/url_database.h', 'browser/history/visit_database.cc', @@ -4318,6 +4319,7 @@ 'browser/history/text_database_manager_unittest.cc', 'browser/history/text_database_unittest.cc', 'browser/history/thumbnail_database_unittest.cc', + 'browser/history/top_sites_unittest.cc', 'browser/thumbnail_store_unittest.cc', 'browser/history/url_database_unittest.cc', 'browser/history/visit_database_unittest.cc', @@ -4423,6 +4425,7 @@ 'common/pref_service_unittest.cc', 'common/property_bag_unittest.cc', 'common/resource_dispatcher_unittest.cc', + 'common/thumbnail_score_unittest.cc', 'common/time_format_unittest.cc', 'common/worker_thread_ticker_unittest.cc', 'common/zip_unittest.cc', diff --git a/chrome/common/thumbnail_score.cc b/chrome/common/thumbnail_score.cc index 339b7e0..dc7856d 100644 --- a/chrome/common/thumbnail_score.cc +++ b/chrome/common/thumbnail_score.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -35,14 +35,16 @@ ThumbnailScore::ThumbnailScore() : boring_score(1.0), good_clipping(false), at_top(false), - time_at_snapshot(Time::Now()) { + time_at_snapshot(Time::Now()), + redirect_hops_from_dest(0) { } ThumbnailScore::ThumbnailScore(double score, bool clipping, bool top) : boring_score(score), good_clipping(clipping), at_top(top), - time_at_snapshot(Time::Now()) { + time_at_snapshot(Time::Now()), + redirect_hops_from_dest(0) { } ThumbnailScore::ThumbnailScore(double score, bool clipping, bool top, @@ -50,7 +52,8 @@ ThumbnailScore::ThumbnailScore(double score, bool clipping, bool top, : boring_score(score), good_clipping(clipping), at_top(top), - time_at_snapshot(time) { + time_at_snapshot(time), + redirect_hops_from_dest(0) { } ThumbnailScore::~ThumbnailScore() { @@ -63,7 +66,8 @@ bool ThumbnailScore::Equals(const ThumbnailScore& rhs) const { return boring_score == rhs.boring_score && good_clipping == rhs.good_clipping && at_top == rhs.at_top && - time_at_snapshot.ToTimeT() == rhs.time_at_snapshot.ToTimeT(); + time_at_snapshot.ToTimeT() == rhs.time_at_snapshot.ToTimeT() && + redirect_hops_from_dest == rhs.redirect_hops_from_dest; } bool ShouldReplaceThumbnailWith(const ThumbnailScore& current, @@ -77,22 +81,32 @@ bool ShouldReplaceThumbnailWith(const ThumbnailScore& current, return replacement.boring_score < ThumbnailScore::kThumbnailMaximumBoringness; } else if (replacement_type == current_type) { - if (replacement.boring_score < current.boring_score) { - // If we have a thumbnail that's straight up less boring, use it. - return true; - } + // It's much easier to do the scaling below when we're dealing with "higher + // is better." Then we can decrease the score by dividing by a fraction. + const double kThumbnailMinimumInterestingness = + 1.0 - ThumbnailScore::kThumbnailMaximumBoringness; + double current_interesting_score = 1.0 - current.boring_score; + double replacement_interesting_score = 1.0 - replacement.boring_score; + + // Degrade the score of each thumbnail to account for how many redirects + // they are away from the destination. 1/(x+1) gives a scaling factor of + // one for x = 0, and asymptotically approaches 0 for larger values of x. + current_interesting_score *= + 1.0 / (current.redirect_hops_from_dest + 1); + replacement_interesting_score *= + 1.0 / (replacement.redirect_hops_from_dest + 1); - // Slowly degrade the boring score of the current thumbnail - // so we take thumbnails which are slightly less good: - TimeDelta since_last_thumbnail = + // Degrade the score and prefer the newer one based on how long apart the + // two thumbnails were taken. This means we'll eventually replace an old + // good one with a new worse one assuming enough time has passed. + TimeDelta time_between_thumbnails = replacement.time_at_snapshot - current.time_at_snapshot; - double degraded_boring_score = current.boring_score + - (since_last_thumbnail.InHours() * - ThumbnailScore::kThumbnailDegradePerHour); + current_interesting_score -= time_between_thumbnails.InHours() * + ThumbnailScore::kThumbnailDegradePerHour; - if (degraded_boring_score > ThumbnailScore::kThumbnailMaximumBoringness) - degraded_boring_score = ThumbnailScore::kThumbnailMaximumBoringness; - if (replacement.boring_score < degraded_boring_score) + if (current_interesting_score < kThumbnailMinimumInterestingness) + current_interesting_score = kThumbnailMinimumInterestingness; + if (replacement_interesting_score > current_interesting_score) return true; } diff --git a/chrome/common/thumbnail_score.h b/chrome/common/thumbnail_score.h index 530f87e..c1ebb81 100644 --- a/chrome/common/thumbnail_score.h +++ b/chrome/common/thumbnail_score.h @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -9,8 +9,9 @@ // A set of metadata about a Thumbnail. struct ThumbnailScore { - // Initializes the ThumbnailScore to the absolute worst possible - // values except for time, which is set to Now(). + // Initializes the ThumbnailScore to the absolute worst possible values + // except for time, which is set to Now(), and redirect_hops_from_dest which + // is set to 0. ThumbnailScore(); // Builds a ThumbnailScore with the passed in values, and sets the @@ -48,6 +49,20 @@ struct ThumbnailScore { // sure thumbnails are kept fresh. base::Time time_at_snapshot; + // The number of hops from the final destination page that this thumbnail was + // taken at. When a thumbnail is taken, this will always be the redirect + // destination (value of 0). + // + // For the most visited view, we'll sometimes get thumbnails for URLs in the + // middle of a redirect chain. In this case, the top sites component will set + // this value so the distance from the destination can be taken into account + // by the comparison function. + // + // If "http://google.com/" redirected to "http://www.google.com/", then + // a thumbnail for the first would have a redirect hop of 1, and the second + // would have a redirect hop of 0. + int redirect_hops_from_dest; + // How bad a thumbnail needs to be before we completely ignore it. static const double kThumbnailMaximumBoringness; diff --git a/chrome/common/thumbnail_score_unittest.cc b/chrome/common/thumbnail_score_unittest.cc new file mode 100644 index 0000000..14d79dd --- /dev/null +++ b/chrome/common/thumbnail_score_unittest.cc @@ -0,0 +1,54 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/common/thumbnail_score.h" +#include "testing/gtest/include/gtest/gtest.h" + +// Tests that the different types of thumbnails are compared properly. +TEST(ThumbnailScoreTest, ShouldReplaceThumbnailWithType) { + base::Time now = base::Time::Now(); + + ThumbnailScore nothing_good(0.5, false, false, now); + ThumbnailScore not_at_top(0.5, false, true, now); + ThumbnailScore bad_clipping(0.5, true, false, now); + ThumbnailScore life_is_awesome(0.5, true, true, now); + + EXPECT_TRUE(ShouldReplaceThumbnailWith(nothing_good, not_at_top)); + EXPECT_TRUE(ShouldReplaceThumbnailWith(nothing_good, bad_clipping)); + EXPECT_TRUE(ShouldReplaceThumbnailWith(nothing_good, life_is_awesome)); + EXPECT_TRUE(ShouldReplaceThumbnailWith(not_at_top, bad_clipping)); + EXPECT_TRUE(ShouldReplaceThumbnailWith(not_at_top, life_is_awesome)); + EXPECT_TRUE(ShouldReplaceThumbnailWith(bad_clipping, life_is_awesome)); +} + +// Tests that we'll replace old thumbnails will crappier but newer ones. +TEST(ThumbnailScoreTest, ShouldReplaceThumbnailWithTime) { + // Use a really long time for the difference so we aren't sensitive to the + // degrading schedule. + base::Time now = base::Time::Now(); + base::Time last_year = now - base::TimeDelta::FromDays(365); + + ThumbnailScore oldie_but_goodie(0.1, true, true, last_year); + ThumbnailScore newie_but_crappie(0.9, true, true, now); + + EXPECT_TRUE(ShouldReplaceThumbnailWith(oldie_but_goodie, newie_but_crappie)); +} + +// Having many redirects should age the thumbnail. +TEST(ThumbnailScoreTest, RedirectCount) { + base::Time now = base::Time::Now(); + + ThumbnailScore no_redirects(0.5, true, true, now); + no_redirects.redirect_hops_from_dest = 0; + ThumbnailScore some_redirects(0.5, true, true, now); + some_redirects.redirect_hops_from_dest = 1; + + EXPECT_TRUE(ShouldReplaceThumbnailWith(some_redirects, no_redirects)); + + // This one has a lot of redirects but a better score. It should still be + // rejected. + ThumbnailScore lotsa_redirects(0.4, true, true, now); + lotsa_redirects.redirect_hops_from_dest = 4; + EXPECT_FALSE(ShouldReplaceThumbnailWith(no_redirects, lotsa_redirects)); +} |