diff options
author | boliu@chromium.org <boliu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-01-23 00:35:14 +0000 |
---|---|---|
committer | boliu@chromium.org <boliu@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-01-23 00:35:14 +0000 |
commit | 1f371fabe8d4a7f505fdb3f4cb358a56db35ba5a (patch) | |
tree | f9627beba52b585a72e7330d34cf34e6d691af7c /components/visitedlink | |
parent | 053acbd58afeb44de07f152cfd281e3a63fe3f80 (diff) | |
download | chromium_src-1f371fabe8d4a7f505fdb3f4cb358a56db35ba5a.zip chromium_src-1f371fabe8d4a7f505fdb3f4cb358a56db35ba5a.tar.gz chromium_src-1f371fabe8d4a7f505fdb3f4cb358a56db35ba5a.tar.bz2 |
Componentize visitedlinks to src/components/visitedlink
All visitedlink files moved to src/components/visitedlink/[browser|common|renderer|test]
Created static lib targets for browser, common, and renderer.
Test files are moved but still uses old test targets in chrome/
Dcommitted due to AllowScopedIO being moved but still
triggers presubmit error. Bots are mostly happy.
BUG=168716
Review URL: https://codereview.chromium.org/11825011
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@178176 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'components/visitedlink')
20 files changed, 3192 insertions, 0 deletions
diff --git a/components/visitedlink/OWNERS b/components/visitedlink/OWNERS new file mode 100644 index 0000000..b221410 --- /dev/null +++ b/components/visitedlink/OWNERS @@ -0,0 +1,2 @@ +brettw@chromium.org +sky@chromium.org diff --git a/components/visitedlink/browser/DEPS b/components/visitedlink/browser/DEPS new file mode 100644 index 0000000..1c35d9c --- /dev/null +++ b/components/visitedlink/browser/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+content/public/browser", +] diff --git a/components/visitedlink/browser/visitedlink_delegate.h b/components/visitedlink/browser/visitedlink_delegate.h new file mode 100644 index 0000000..700cff0 --- /dev/null +++ b/components/visitedlink/browser/visitedlink_delegate.h @@ -0,0 +1,56 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_DELEGATE_H_ +#define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_DELEGATE_H_ + +#include "base/memory/ref_counted.h" + +class GURL; + +namespace content { +class BrowserContext; +} // namespace content + +namespace components { + +// Delegate class that clients of VisitedLinkMaster must implement. +class VisitedLinkDelegate { + public: + // Returns true when the two BrowserContexts are equivalent. + virtual bool AreEquivalentContexts(content::BrowserContext* context1, + content::BrowserContext* context2) = 0; + + // See RebuildTable. + class URLEnumerator : public base::RefCountedThreadSafe<URLEnumerator> { + public: + // Call this with each URL to rebuild the table. + virtual void OnURL(const GURL& url) = 0; + + // This must be called by Delegate after RebuildTable is called. |success| + // indicates all URLs have been returned successfully. The URLEnumerator + // object cannot be used by the delegate after this call. + virtual void OnComplete(bool success) = 0; + + protected: + virtual ~URLEnumerator() {} + + private: + friend class base::RefCountedThreadSafe<URLEnumerator>; + }; + + // Delegate class is responsible for persisting the list of visited URLs + // across browser runs. This is called by VisitedLinkMaster to repopulate + // its internal table. Note that methods on enumerator can be called on any + // thread but the delegate is responsible for synchronizating the calls. + virtual void RebuildTable(const scoped_refptr<URLEnumerator>& enumerator) = 0; + + protected: + + virtual ~VisitedLinkDelegate() {} +}; + +} // namespace components + +#endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_DELEGATE_H_ diff --git a/components/visitedlink/browser/visitedlink_event_listener.cc b/components/visitedlink/browser/visitedlink_event_listener.cc new file mode 100644 index 0000000..c9e8a51 --- /dev/null +++ b/components/visitedlink/browser/visitedlink_event_listener.cc @@ -0,0 +1,220 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/visitedlink/browser/visitedlink_event_listener.h" + +#include "base/shared_memory.h" +#include "components/visitedlink/browser/visitedlink_delegate.h" +#include "components/visitedlink/common/visitedlink_messages.h" +#include "content/public/browser/notification_service.h" +#include "content/public/browser/notification_types.h" +#include "content/public/browser/render_process_host.h" +#include "content/public/browser/render_widget_host.h" + +using base::Time; +using base::TimeDelta; +using content::RenderWidgetHost; + +namespace { + +// The amount of time we wait to accumulate visited link additions. +static const int kCommitIntervalMs = 100; + +// Size of the buffer after which individual link updates deemed not warranted +// and the overall update should be used instead. +static const unsigned kVisitedLinkBufferThreshold = 50; + +} // namespace + +namespace components { + +// This class manages buffering and sending visited link hashes (fingerprints) +// to renderer based on widget visibility. +// As opposed to the VisitedLinkEventListener, which coalesces to +// reduce the rate of messages being sent to render processes, this class +// ensures that the updates occur only when explicitly requested. This is +// used for RenderProcessHostImpl to only send Add/Reset link events to the +// renderers when their tabs are visible and the corresponding RenderViews are +// created. +class VisitedLinkUpdater { + public: + explicit VisitedLinkUpdater(int render_process_id) + : reset_needed_(false), render_process_id_(render_process_id) { + } + + // Informs the renderer about a new visited link table. + void SendVisitedLinkTable(base::SharedMemory* table_memory) { + content::RenderProcessHost* process = + content::RenderProcessHost::FromID(render_process_id_); + if (!process) + return; // Happens in tests + base::SharedMemoryHandle handle_for_process; + table_memory->ShareToProcess(process->GetHandle(), &handle_for_process); + if (base::SharedMemory::IsHandleValid(handle_for_process)) + process->Send(new ChromeViewMsg_VisitedLink_NewTable( + handle_for_process)); + } + + // Buffers |links| to update, but doesn't actually relay them. + void AddLinks(const VisitedLinkCommon::Fingerprints& links) { + if (reset_needed_) + return; + + if (pending_.size() + links.size() > kVisitedLinkBufferThreshold) { + // Once the threshold is reached, there's no need to store pending visited + // link updates -- we opt for resetting the state for all links. + AddReset(); + return; + } + + pending_.insert(pending_.end(), links.begin(), links.end()); + } + + // Tells the updater that sending individual link updates is no longer + // necessary and the visited state for all links should be reset. + void AddReset() { + reset_needed_ = true; + pending_.clear(); + } + + // Sends visited link update messages: a list of links whose visited state + // changed or reset of visited state for all links. + void Update() { + content::RenderProcessHost* process = + content::RenderProcessHost::FromID(render_process_id_); + if (!process) + return; // Happens in tests + + if (!process->VisibleWidgetCount()) + return; + + if (reset_needed_) { + process->Send(new ChromeViewMsg_VisitedLink_Reset()); + reset_needed_ = false; + return; + } + + if (pending_.empty()) + return; + + process->Send(new ChromeViewMsg_VisitedLink_Add(pending_)); + + pending_.clear(); + } + + private: + bool reset_needed_; + int render_process_id_; + VisitedLinkCommon::Fingerprints pending_; +}; + +VisitedLinkEventListener::VisitedLinkEventListener( + VisitedLinkMaster* master, + content::BrowserContext* browser_context) + : master_(master), + browser_context_(browser_context) { + registrar_.Add(this, content::NOTIFICATION_RENDERER_PROCESS_CREATED, + content::NotificationService::AllBrowserContextsAndSources()); + registrar_.Add(this, content::NOTIFICATION_RENDERER_PROCESS_TERMINATED, + content::NotificationService::AllBrowserContextsAndSources()); + registrar_.Add(this, content::NOTIFICATION_RENDER_WIDGET_VISIBILITY_CHANGED, + content::NotificationService::AllBrowserContextsAndSources()); +} + +VisitedLinkEventListener::~VisitedLinkEventListener() { + if (!pending_visited_links_.empty()) + pending_visited_links_.clear(); +} + +void VisitedLinkEventListener::NewTable(base::SharedMemory* table_memory) { + if (!table_memory) + return; + + // Send to all RenderProcessHosts. + for (Updaters::iterator i = updaters_.begin(); i != updaters_.end(); ++i) { + // Make sure to not send to incognito renderers. + content::RenderProcessHost* process = + content::RenderProcessHost::FromID(i->first); + if (!process) + continue; + + i->second->SendVisitedLinkTable(table_memory); + } +} + +void VisitedLinkEventListener::Add(VisitedLinkMaster::Fingerprint fingerprint) { + pending_visited_links_.push_back(fingerprint); + + if (!coalesce_timer_.IsRunning()) { + coalesce_timer_.Start(FROM_HERE, + TimeDelta::FromMilliseconds(kCommitIntervalMs), this, + &VisitedLinkEventListener::CommitVisitedLinks); + } +} + +void VisitedLinkEventListener::Reset() { + pending_visited_links_.clear(); + coalesce_timer_.Stop(); + + for (Updaters::iterator i = updaters_.begin(); i != updaters_.end(); ++i) { + i->second->AddReset(); + i->second->Update(); + } +} + +void VisitedLinkEventListener::CommitVisitedLinks() { + // Send to all RenderProcessHosts. + for (Updaters::iterator i = updaters_.begin(); i != updaters_.end(); ++i) { + i->second->AddLinks(pending_visited_links_); + i->second->Update(); + } + + pending_visited_links_.clear(); +} + +void VisitedLinkEventListener::Observe( + int type, + const content::NotificationSource& source, + const content::NotificationDetails& details) { + switch (type) { + case content::NOTIFICATION_RENDERER_PROCESS_CREATED: { + content::RenderProcessHost* process = + content::Source<content::RenderProcessHost>(source).ptr(); + if (!master_->GetDelegate()->AreEquivalentContexts( + browser_context_, process->GetBrowserContext())) + return; + + // Happens on browser start up. + if (!master_->shared_memory()) + return; + + updaters_[process->GetID()] = + make_linked_ptr(new VisitedLinkUpdater(process->GetID())); + updaters_[process->GetID()]->SendVisitedLinkTable( + master_->shared_memory()); + break; + } + case content::NOTIFICATION_RENDERER_PROCESS_TERMINATED: { + content::RenderProcessHost* process = + content::Source<content::RenderProcessHost>(source).ptr(); + if (updaters_.count(process->GetID())) { + updaters_.erase(process->GetID()); + } + break; + } + case content::NOTIFICATION_RENDER_WIDGET_VISIBILITY_CHANGED: { + RenderWidgetHost* widget = + content::Source<RenderWidgetHost>(source).ptr(); + int child_id = widget->GetProcess()->GetID(); + if (updaters_.count(child_id)) + updaters_[child_id]->Update(); + break; + } + default: + NOTREACHED(); + break; + } +} + +} // namespace components diff --git a/components/visitedlink/browser/visitedlink_event_listener.h b/components/visitedlink/browser/visitedlink_event_listener.h new file mode 100644 index 0000000..3efac37 --- /dev/null +++ b/components/visitedlink/browser/visitedlink_event_listener.h @@ -0,0 +1,72 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// VisitedLinkEventListener broadcasts link coloring database updates to all +// processes. It also coalesces the updates to avoid excessive broadcasting of +// messages to the renderers. + +#ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_EVENT_LISTENER_H_ +#define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_EVENT_LISTENER_H_ + +#include <map> + +#include "base/memory/linked_ptr.h" +#include "base/timer.h" +#include "components/visitedlink/browser/visitedlink_master.h" +#include "content/public/browser/notification_observer.h" +#include "content/public/browser/notification_registrar.h" + + +namespace base { +class SharedMemory; +} + +namespace content { +class BrowserContext; +} // namespace content + +namespace components { + +class VisitedLinkUpdater; + +class VisitedLinkEventListener : public VisitedLinkMaster::Listener, + public content::NotificationObserver { + public: + VisitedLinkEventListener(VisitedLinkMaster* master, + content::BrowserContext* browser_context); + virtual ~VisitedLinkEventListener(); + + virtual void NewTable(base::SharedMemory* table_memory) OVERRIDE; + virtual void Add(VisitedLinkMaster::Fingerprint fingerprint) OVERRIDE; + virtual void Reset() OVERRIDE; + + private: + void CommitVisitedLinks(); + + // content::NotificationObserver implementation. + virtual void Observe(int type, + const content::NotificationSource& source, + const content::NotificationDetails& details) OVERRIDE; + + base::OneShotTimer<VisitedLinkEventListener> coalesce_timer_; + VisitedLinkCommon::Fingerprints pending_visited_links_; + + content::NotificationRegistrar registrar_; + + // Map between renderer child ids and their VisitedLinkUpdater. + typedef std::map<int, linked_ptr<VisitedLinkUpdater> > Updaters; + Updaters updaters_; + + VisitedLinkMaster* master_; + + // Used to filter RENDERER_PROCESS_CREATED notifications to renderers that + // belong to this BrowserContext. + content::BrowserContext* browser_context_; + + DISALLOW_COPY_AND_ASSIGN(VisitedLinkEventListener); +}; + +} // namespace components + +#endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_EVENT_LISTENER_H_ diff --git a/components/visitedlink/browser/visitedlink_master.cc b/components/visitedlink/browser/visitedlink_master.cc new file mode 100644 index 0000000..3205bcf --- /dev/null +++ b/components/visitedlink/browser/visitedlink_master.cc @@ -0,0 +1,969 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/visitedlink/browser/visitedlink_master.h" + +#if defined(OS_WIN) +#include <windows.h> +#include <io.h> +#include <shlobj.h> +#endif // defined(OS_WIN) +#include <stdio.h> + +#include <algorithm> + +#include "base/bind.h" +#include "base/bind_helpers.h" +#include "base/containers/stack_container.h" +#include "base/file_util.h" +#include "base/logging.h" +#include "base/message_loop.h" +#include "base/path_service.h" +#include "base/process_util.h" +#include "base/rand_util.h" +#include "base/string_util.h" +#include "base/threading/thread_restrictions.h" +#include "components/visitedlink/browser/visitedlink_delegate.h" +#include "components/visitedlink/browser/visitedlink_event_listener.h" +#include "content/public/browser/browser_context.h" +#include "content/public/browser/browser_thread.h" +#include "googleurl/src/gurl.h" + +namespace components { + +using content::BrowserThread; +using file_util::ScopedFILE; +using file_util::OpenFile; +using file_util::TruncateFile; + +const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; +const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; +const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; +const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; +const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; + +const int32 VisitedLinkMaster::kFileCurrentVersion = 3; + +// the signature at the beginning of the URL table = "VLnk" (visited links) +const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; +const size_t VisitedLinkMaster::kFileHeaderSize = + kFileHeaderSaltOffset + LINK_SALT_LENGTH; + +// This value should also be the same as the smallest size in the lookup +// table in NewTableSizeForCount (prime number). +const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; + +const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; + +namespace { + +// Fills the given salt structure with some quasi-random values +// It is not necessary to generate a cryptographically strong random string, +// only that it be reasonably different for different users. +void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { + DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; + uint64 randval = base::RandUint64(); + memcpy(salt, &randval, 8); +} + +// Opens file on a background thread to not block UI thread. +void AsyncOpen(FILE** file, const FilePath& filename) { + *file = OpenFile(filename, "wb+"); + DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value(); +} + +// Returns true if the write was complete. +static bool WriteToFile(FILE* file, + off_t offset, + const void* data, + size_t data_len) { + if (fseek(file, offset, SEEK_SET) != 0) + return false; // Don't write to an invalid part of the file. + + size_t num_written = fwrite(data, 1, data_len, file); + + // The write may not make it to the kernel (stdlib may buffer the write) + // until the next fseek/fclose call. If we crash, it's easy for our used + // item count to be out of sync with the number of hashes we write. + // Protect against this by calling fflush. + int ret = fflush(file); + DCHECK_EQ(0, ret); + return num_written == data_len; +} + +// This task executes on a background thread and executes a write. This +// prevents us from blocking the UI thread doing I/O. Double pointer to FILE +// is used because file may still not be opened by the time of scheduling +// the task for execution. +void AsyncWrite(FILE** file, int32 offset, const std::string& data) { + if (*file) + WriteToFile(*file, offset, data.data(), data.size()); +} + +// Truncates the file to the current position asynchronously on a background +// thread. Double pointer to FILE is used because file may still not be opened +// by the time of scheduling the task for execution. +void AsyncTruncate(FILE** file) { + if (*file) + base::IgnoreResult(TruncateFile(*file)); +} + +// Closes the file on a background thread and releases memory used for storage +// of FILE* value. Double pointer to FILE is used because file may still not +// be opened by the time of scheduling the task for execution. +void AsyncClose(FILE** file) { + if (*file) + base::IgnoreResult(fclose(*file)); + free(file); +} + +} // namespace + +// TableBuilder --------------------------------------------------------------- + +// How rebuilding from history works +// --------------------------------- +// +// We mark that we're rebuilding from history by setting the table_builder_ +// member in VisitedLinkMaster to the TableBuilder we create. This builder +// will be called on the history thread by the history system for every URL +// in the database. +// +// The builder will store the fingerprints for those URLs, and then marshalls +// back to the main thread where the VisitedLinkMaster will be notified. The +// master then replaces its table with a new table containing the computed +// fingerprints. +// +// The builder must remain active while the history system is using it. +// Sometimes, the master will be deleted before the rebuild is complete, in +// which case it notifies the builder via DisownMaster(). The builder will +// delete itself once rebuilding is complete, and not execute any callback. +class VisitedLinkMaster::TableBuilder + : public VisitedLinkDelegate::URLEnumerator { + public: + TableBuilder(VisitedLinkMaster* master, + const uint8 salt[LINK_SALT_LENGTH]); + + // Called on the main thread when the master is being destroyed. This will + // prevent a crash when the query completes and the master is no longer + // around. We can not actually do anything but mark this fact, since the + // table will be being rebuilt simultaneously on the other thread. + void DisownMaster(); + + // VisitedLinkDelegate::URLEnumerator + virtual void OnURL(const GURL& url); + virtual void OnComplete(bool succeed); + + private: + virtual ~TableBuilder() {} + + // OnComplete mashals to this function on the main thread to do the + // notification. + void OnCompleteMainThread(); + + // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! + VisitedLinkMaster* master_; + + // Indicates whether the operation has failed or not. + bool success_; + + // Salt for this new table. + uint8 salt_[LINK_SALT_LENGTH]; + + // Stores the fingerprints we computed on the background thread. + VisitedLinkCommon::Fingerprints fingerprints_; + + DISALLOW_COPY_AND_ASSIGN(TableBuilder); +}; + +// VisitedLinkMaster ---------------------------------------------------------- + +VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context, + VisitedLinkDelegate* delegate) + : browser_context_(browser_context), + delegate_(delegate), + listener_(new VisitedLinkEventListener( + ALLOW_THIS_IN_INITIALIZER_LIST(this), browser_context)) { + InitMembers(); +} + +VisitedLinkMaster::VisitedLinkMaster(Listener* listener, + VisitedLinkDelegate* delegate, + bool suppress_rebuild, + const FilePath& filename, + int32 default_table_size) + : browser_context_(NULL), + delegate_(delegate) { + listener_.reset(listener); + DCHECK(listener_.get()); + InitMembers(); + + database_name_override_ = filename; + table_size_override_ = default_table_size; + suppress_rebuild_ = suppress_rebuild; +} + +VisitedLinkMaster::~VisitedLinkMaster() { + if (table_builder_.get()) { + // Prevent the table builder from calling us back now that we're being + // destroyed. Note that we DON'T delete the object, since the history + // system is still writing into it. When that is complete, the table + // builder will destroy itself when it finds we are gone. + table_builder_->DisownMaster(); + } + FreeURLTable(); + // FreeURLTable() will schedule closing of the file and deletion of |file_|. + // So nothing should be done here. +} + +void VisitedLinkMaster::InitMembers() { + file_ = NULL; + shared_memory_ = NULL; + shared_memory_serial_ = 0; + used_items_ = 0; + table_size_override_ = 0; + suppress_rebuild_ = false; + sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken(); + +#ifndef NDEBUG + posted_asynchronous_operation_ = false; +#endif +} + +bool VisitedLinkMaster::Init() { + // We probably shouldn't be loading this from the UI thread, + // but it does need to happen early on in startup. + // http://code.google.com/p/chromium/issues/detail?id=24163 + base::ThreadRestrictions::ScopedAllowIO allow_io; + if (!InitFromFile()) + return InitFromScratch(suppress_rebuild_); + return true; +} + +VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { + // Extra check that we are not incognito. This should not happen. + // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is + // removed from BrowserContext. + if (browser_context_ && browser_context_->IsOffTheRecord()) { + NOTREACHED(); + return null_hash_; + } + + if (!url.is_valid()) + return null_hash_; // Don't add invalid URLs. + + Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), + url.spec().size(), + salt_); + if (table_builder_) { + // If we have a pending delete for this fingerprint, cancel it. + std::set<Fingerprint>::iterator found = + deleted_since_rebuild_.find(fingerprint); + if (found != deleted_since_rebuild_.end()) + deleted_since_rebuild_.erase(found); + + // A rebuild is in progress, save this addition in the temporary list so + // it can be added once rebuild is complete. + added_since_rebuild_.insert(fingerprint); + } + + // If the table is "full", we don't add URLs and just drop them on the floor. + // This can happen if we get thousands of new URLs and something causes + // the table resizing to fail. This check prevents a hang in that case. Note + // that this is *not* the resize limit, this is just a sanity check. + if (used_items_ / 8 > table_length_ / 10) + return null_hash_; // Table is more than 80% full. + + return AddFingerprint(fingerprint, true); +} + +void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here, + const base::Closure& task) { + BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_, + from_here, task); +} + +void VisitedLinkMaster::AddURL(const GURL& url) { + Hash index = TryToAddURL(url); + if (!table_builder_ && index != null_hash_) { + // Not rebuilding, so we want to keep the file on disk up-to-date. + WriteUsedItemCountToFile(); + WriteHashRangeToFile(index, index); + ResizeTableIfNecessary(); + } +} + +void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { + for (std::vector<GURL>::const_iterator i = url.begin(); + i != url.end(); ++i) { + Hash index = TryToAddURL(*i); + if (!table_builder_ && index != null_hash_) + ResizeTableIfNecessary(); + } + + // Keeps the file on disk up-to-date. + if (!table_builder_) + WriteFullTable(); +} + +void VisitedLinkMaster::DeleteAllURLs() { + // Any pending modifications are invalid. + added_since_rebuild_.clear(); + deleted_since_rebuild_.clear(); + + // Clear the hash table. + used_items_ = 0; + memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); + + // Resize it if it is now too empty. Resize may write the new table out for + // us, otherwise, schedule writing the new table to disk ourselves. + if (!ResizeTableIfNecessary()) + WriteFullTable(); + + listener_->Reset(); +} + +VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() { + return delegate_; +} + +void VisitedLinkMaster::DeleteURLs(URLIterator* urls) { + if (!urls->HasNextURL()) + return; + + listener_->Reset(); + + if (table_builder_) { + // A rebuild is in progress, save this deletion in the temporary list so + // it can be added once rebuild is complete. + while (urls->HasNextURL()) { + const GURL& url(urls->NextURL()); + if (!url.is_valid()) + continue; + + Fingerprint fingerprint = + ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_); + deleted_since_rebuild_.insert(fingerprint); + + // If the URL was just added and now we're deleting it, it may be in the + // list of things added since the last rebuild. Delete it from that list. + std::set<Fingerprint>::iterator found = + added_since_rebuild_.find(fingerprint); + if (found != added_since_rebuild_.end()) + added_since_rebuild_.erase(found); + + // Delete the URLs from the in-memory table, but don't bother writing + // to disk since it will be replaced soon. + DeleteFingerprint(fingerprint, false); + } + return; + } + + // Compute the deleted URLs' fingerprints and delete them + std::set<Fingerprint> deleted_fingerprints; + while (urls->HasNextURL()) { + const GURL& url(urls->NextURL()); + if (!url.is_valid()) + continue; + deleted_fingerprints.insert( + ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_)); + } + DeleteFingerprintsFromCurrentTable(deleted_fingerprints); +} + +// See VisitedLinkCommon::IsVisited which should be in sync with this algorithm +VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( + Fingerprint fingerprint, + bool send_notifications) { + if (!hash_table_ || table_length_ == 0) { + NOTREACHED(); // Not initialized. + return null_hash_; + } + + Hash cur_hash = HashFingerprint(fingerprint); + Hash first_hash = cur_hash; + while (true) { + Fingerprint cur_fingerprint = FingerprintAt(cur_hash); + if (cur_fingerprint == fingerprint) + return null_hash_; // This fingerprint is already in there, do nothing. + + if (cur_fingerprint == null_fingerprint_) { + // End of probe sequence found, insert here. + hash_table_[cur_hash] = fingerprint; + used_items_++; + // If allowed, notify listener that a new visited link was added. + if (send_notifications) + listener_->Add(fingerprint); + return cur_hash; + } + + // Advance in the probe sequence. + cur_hash = IncrementHash(cur_hash); + if (cur_hash == first_hash) { + // This means that we've wrapped around and are about to go into an + // infinite loop. Something was wrong with the hashtable resizing + // logic, so stop here. + NOTREACHED(); + return null_hash_; + } + } +} + +void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( + const std::set<Fingerprint>& fingerprints) { + bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); + + // Delete the URLs from the table. + for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); + i != fingerprints.end(); ++i) + DeleteFingerprint(*i, !bulk_write); + + // These deleted fingerprints may make us shrink the table. + if (ResizeTableIfNecessary()) + return; // The resize function wrote the new table to disk for us. + + // Nobody wrote this out for us, write the full file to disk. + if (bulk_write) + WriteFullTable(); +} + +bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, + bool update_file) { + if (!hash_table_ || table_length_ == 0) { + NOTREACHED(); // Not initialized. + return false; + } + if (!IsVisited(fingerprint)) + return false; // Not in the database to delete. + + // First update the header used count. + used_items_--; + if (update_file) + WriteUsedItemCountToFile(); + + Hash deleted_hash = HashFingerprint(fingerprint); + + // Find the range of "stuff" in the hash table that is adjacent to this + // fingerprint. These are things that could be affected by the change in + // the hash table. Since we use linear probing, anything after the deleted + // item up until an empty item could be affected. + Hash end_range = deleted_hash; + while (true) { + Hash next_hash = IncrementHash(end_range); + if (next_hash == deleted_hash) + break; // We wrapped around and the whole table is full. + if (!hash_table_[next_hash]) + break; // Found the last spot. + end_range = next_hash; + } + + // We could get all fancy and move the affected fingerprints around, but + // instead we just remove them all and re-add them (minus our deleted one). + // This will mean there's a small window of time where the affected links + // won't be marked visited. + base::StackVector<Fingerprint, 32> shuffled_fingerprints; + Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. + for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { + if (hash_table_[i] != fingerprint) { + // Don't save the one we're deleting! + shuffled_fingerprints->push_back(hash_table_[i]); + + // This will balance the increment of this value in AddFingerprint below + // so there is no net change. + used_items_--; + } + hash_table_[i] = null_fingerprint_; + } + + if (!shuffled_fingerprints->empty()) { + // Need to add the new items back. + for (size_t i = 0; i < shuffled_fingerprints->size(); i++) + AddFingerprint(shuffled_fingerprints[i], false); + } + + // Write the affected range to disk [deleted_hash, end_range]. + if (update_file) + WriteHashRangeToFile(deleted_hash, end_range); + + return true; +} + +void VisitedLinkMaster::WriteFullTable() { + // This function can get called when the file is open, for example, when we + // resize the table. We must handle this case and not try to reopen the file, + // since there may be write operations pending on the file I/O thread. + // + // Note that once we start writing, we do not delete on error. This means + // there can be a partial file, but the short file will be detected next time + // we start, and will be replaced. + // + // This might possibly get corrupted if we crash in the middle of writing. + // We should pick up the most common types of these failures when we notice + // that the file size is different when we load it back in, and then we will + // regenerate the table. + if (!file_) { + file_ = static_cast<FILE**>(calloc(1, sizeof(*file_))); + FilePath filename; + GetDatabaseFileName(&filename); + PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename)); + } + + // Write the new header. + int32 header[4]; + header[0] = kFileSignature; + header[1] = kFileCurrentVersion; + header[2] = table_length_; + header[3] = used_items_; + WriteToFile(file_, 0, header, sizeof(header)); + WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); + + // Write the hash data. + WriteToFile(file_, kFileHeaderSize, + hash_table_, table_length_ * sizeof(Fingerprint)); + + // The hash table may have shrunk, so make sure this is the end. + PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_)); +} + +bool VisitedLinkMaster::InitFromFile() { + DCHECK(file_ == NULL); + + FilePath filename; + GetDatabaseFileName(&filename); + ScopedFILE file_closer(OpenFile(filename, "rb+")); + if (!file_closer.get()) + return false; + + int32 num_entries, used_count; + if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) + return false; // Header isn't valid. + + // Allocate and read the table. + if (!CreateURLTable(num_entries, false)) + return false; + if (!ReadFromFile(file_closer.get(), kFileHeaderSize, + hash_table_, num_entries * sizeof(Fingerprint))) { + FreeURLTable(); + return false; + } + used_items_ = used_count; + +#ifndef NDEBUG + DebugValidate(); +#endif + + file_ = static_cast<FILE**>(malloc(sizeof(*file_))); + *file_ = file_closer.release(); + return true; +} + +bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { + int32 table_size = kDefaultTableSize; + if (table_size_override_) + table_size = table_size_override_; + + // The salt must be generated before the table so that it can be copied to + // the shared memory. + GenerateSalt(salt_); + if (!CreateURLTable(table_size, true)) + return false; + +#ifndef NDEBUG + DebugValidate(); +#endif + + if (suppress_rebuild) { + // When we disallow rebuilds (normally just unit tests), just use the + // current empty table. + WriteFullTable(); + return true; + } + + // This will build the table from history. On the first run, history will + // be empty, so this will be correct. This will also write the new table + // to disk. We don't want to save explicitly here, since the rebuild may + // not complete, leaving us with an empty but valid visited link database. + // In the future, we won't know we need to try rebuilding again. + return RebuildTableFromDelegate(); +} + +bool VisitedLinkMaster::ReadFileHeader(FILE* file, + int32* num_entries, + int32* used_count, + uint8 salt[LINK_SALT_LENGTH]) { + // Get file size. + // Note that there is no need to seek back to the original location in the + // file since ReadFromFile() [which is the next call accessing the file] + // seeks before reading. + if (fseek(file, 0, SEEK_END) == -1) + return false; + size_t file_size = ftell(file); + + if (file_size <= kFileHeaderSize) + return false; + + uint8 header[kFileHeaderSize]; + if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) + return false; + + // Verify the signature. + int32 signature; + memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); + if (signature != kFileSignature) + return false; + + // Verify the version is up-to-date. As with other read errors, a version + // mistmatch will trigger a rebuild of the database from history, which will + // have the effect of migrating the database. + int32 version; + memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); + if (version != kFileCurrentVersion) + return false; // Bad version. + + // Read the table size and make sure it matches the file size. + memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); + if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) + return false; // Bad size. + + // Read the used item count. + memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); + if (*used_count > *num_entries) + return false; // Bad used item count; + + // Read the salt. + memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); + + // This file looks OK from the header's perspective. + return true; +} + +bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) { + if (!database_name_override_.empty()) { + // use this filename, the directory must exist + *filename = database_name_override_; + return true; + } + + if (!browser_context_ || browser_context_->GetPath().empty()) + return false; + + FilePath profile_dir = browser_context_->GetPath(); + *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); + return true; +} + +// Initializes the shared memory structure. The salt should already be filled +// in so that it can be written to the shared memory +bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { + // The table is the size of the table followed by the entries. + uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); + + // Create the shared memory object. + shared_memory_ = new base::SharedMemory(); + if (!shared_memory_) + return false; + + if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) { + delete shared_memory_; + shared_memory_ = NULL; + return false; + } + + if (init_to_empty) { + memset(shared_memory_->memory(), 0, alloc_size); + used_items_ = 0; + } + table_length_ = num_entries; + + // Save the header for other processes to read. + SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); + header->length = table_length_; + memcpy(header->salt, salt_, LINK_SALT_LENGTH); + + // Our table pointer is just the data immediately following the size. + hash_table_ = reinterpret_cast<Fingerprint*>( + static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); + + return true; +} + +bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { + base::SharedMemory *old_shared_memory = shared_memory_; + Fingerprint* old_hash_table = hash_table_; + int32 old_table_length = table_length_; + if (!CreateURLTable(num_entries, true)) { + // Try to put back the old state. + shared_memory_ = old_shared_memory; + hash_table_ = old_hash_table; + table_length_ = old_table_length; + return false; + } + +#ifndef NDEBUG + DebugValidate(); +#endif + + return true; +} + +void VisitedLinkMaster::FreeURLTable() { + if (shared_memory_) { + delete shared_memory_; + shared_memory_ = NULL; + } + if (!file_) + return; + PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_)); + // AsyncClose() will close the file and free the memory pointed by |file_|. + file_ = NULL; +} + +bool VisitedLinkMaster::ResizeTableIfNecessary() { + DCHECK(table_length_ > 0) << "Must have a table"; + + // Load limits for good performance/space. We are pretty conservative about + // keeping the table not very full. This is because we use linear probing + // which increases the likelihood of clumps of entries which will reduce + // performance. + const float max_table_load = 0.5f; // Grow when we're > this full. + const float min_table_load = 0.2f; // Shrink when we're < this full. + + float load = ComputeTableLoad(); + if (load < max_table_load && + (table_length_ <= static_cast<float>(kDefaultTableSize) || + load > min_table_load)) + return false; + + // Table needs to grow or shrink. + int new_size = NewTableSizeForCount(used_items_); + DCHECK(new_size > used_items_); + DCHECK(load <= min_table_load || new_size > table_length_); + ResizeTable(new_size); + return true; +} + +void VisitedLinkMaster::ResizeTable(int32 new_size) { + DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); + shared_memory_serial_++; + +#ifndef NDEBUG + DebugValidate(); +#endif + + base::SharedMemory* old_shared_memory = shared_memory_; + Fingerprint* old_hash_table = hash_table_; + int32 old_table_length = table_length_; + if (!BeginReplaceURLTable(new_size)) + return; + + // Now we have two tables, our local copy which is the old one, and the new + // one loaded into this object where we need to copy the data. + for (int32 i = 0; i < old_table_length; i++) { + Fingerprint cur = old_hash_table[i]; + if (cur) + AddFingerprint(cur, false); + } + + // On error unmapping, just forget about it since we can't do anything + // else to release it. + delete old_shared_memory; + + // Send an update notification to all child processes so they read the new + // table. + listener_->NewTable(shared_memory_); + +#ifndef NDEBUG + DebugValidate(); +#endif + + // The new table needs to be written to disk. + WriteFullTable(); +} + +uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { + // These table sizes are selected to be the maximum prime number less than + // a "convenient" multiple of 1K. + static const int table_sizes[] = { + 16381, // 16K = 16384 <- don't shrink below this table size + // (should be == default_table_size) + 32767, // 32K = 32768 + 65521, // 64K = 65536 + 130051, // 128K = 131072 + 262127, // 256K = 262144 + 524269, // 512K = 524288 + 1048549, // 1M = 1048576 + 2097143, // 2M = 2097152 + 4194301, // 4M = 4194304 + 8388571, // 8M = 8388608 + 16777199, // 16M = 16777216 + 33554347}; // 32M = 33554432 + + // Try to leave the table 33% full. + int desired = item_count * 3; + + // Find the closest prime. + for (size_t i = 0; i < arraysize(table_sizes); i ++) { + if (table_sizes[i] > desired) + return table_sizes[i]; + } + + // Growing very big, just approximate a "good" number, not growing as much + // as normal. + return item_count * 2 - 1; +} + +// See the TableBuilder definition in the header file for how this works. +bool VisitedLinkMaster::RebuildTableFromDelegate() { + DCHECK(!table_builder_); + + // TODO(brettw) make sure we have reasonable salt! + table_builder_ = new TableBuilder(this, salt_); + delegate_->RebuildTable(table_builder_); + return true; +} + +// See the TableBuilder declaration above for how this works. +void VisitedLinkMaster::OnTableRebuildComplete( + bool success, + const std::vector<Fingerprint>& fingerprints) { + if (success) { + // Replace the old table with a new blank one. + shared_memory_serial_++; + + // We are responsible for freeing it AFTER it has been replaced if + // replacement succeeds. + base::SharedMemory* old_shared_memory = shared_memory_; + + int new_table_size = NewTableSizeForCount( + static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); + if (BeginReplaceURLTable(new_table_size)) { + // Free the old table. + delete old_shared_memory; + + // Add the stored fingerprints to the hash table. + for (size_t i = 0; i < fingerprints.size(); i++) + AddFingerprint(fingerprints[i], false); + + // Also add anything that was added while we were asynchronously + // generating the new table. + for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); + i != added_since_rebuild_.end(); ++i) + AddFingerprint(*i, false); + added_since_rebuild_.clear(); + + // Now handle deletions. + DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); + deleted_since_rebuild_.clear(); + + // Send an update notification to all child processes. + listener_->NewTable(shared_memory_); + + WriteFullTable(); + } + } + table_builder_ = NULL; // Will release our reference to the builder. + + // Notify the unit test that the rebuild is complete (will be NULL in prod.) + if (!rebuild_complete_task_.is_null()) { + rebuild_complete_task_.Run(); + rebuild_complete_task_.Reset(); + } +} + +void VisitedLinkMaster::WriteToFile(FILE** file, + off_t offset, + void* data, + int32 data_size) { +#ifndef NDEBUG + posted_asynchronous_operation_ = true; +#endif + PostIOTask(FROM_HERE, + base::Bind(&AsyncWrite, file, offset, + std::string(static_cast<const char*>(data), data_size))); +} + +void VisitedLinkMaster::WriteUsedItemCountToFile() { + if (!file_) + return; // See comment on the file_ variable for why this might happen. + WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); +} + +void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { + if (!file_) + return; // See comment on the file_ variable for why this might happen. + if (last_hash < first_hash) { + // Handle wraparound at 0. This first write is first_hash->EOF + WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, + &hash_table_[first_hash], + (table_length_ - first_hash + 1) * sizeof(Fingerprint)); + + // Now do 0->last_lash. + WriteToFile(file_, kFileHeaderSize, hash_table_, + (last_hash + 1) * sizeof(Fingerprint)); + } else { + // Normal case, just write the range. + WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, + &hash_table_[first_hash], + (last_hash - first_hash + 1) * sizeof(Fingerprint)); + } +} + +bool VisitedLinkMaster::ReadFromFile(FILE* file, + off_t offset, + void* data, + size_t data_size) { +#ifndef NDEBUG + // Since this function is synchronous, we require that no asynchronous + // operations could possibly be pending. + DCHECK(!posted_asynchronous_operation_); +#endif + + if (fseek(file, offset, SEEK_SET) != 0) + return false; + + size_t num_read = fread(data, 1, data_size, file); + return num_read == data_size; +} + +// VisitedLinkTableBuilder ---------------------------------------------------- + +VisitedLinkMaster::TableBuilder::TableBuilder( + VisitedLinkMaster* master, + const uint8 salt[LINK_SALT_LENGTH]) + : master_(master), + success_(true) { + fingerprints_.reserve(4096); + memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8)); +} + +// TODO(brettw): Do we want to try to cancel the request if this happens? It +// could delay shutdown if there are a lot of URLs. +void VisitedLinkMaster::TableBuilder::DisownMaster() { + master_ = NULL; +} + +void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { + if (!url.is_empty()) { + fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( + url.spec().data(), url.spec().length(), salt_)); + } +} + +void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { + success_ = success; + DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; + + // Marshal to the main thread to notify the VisitedLinkMaster that the + // rebuild is complete. + BrowserThread::PostTask( + BrowserThread::UI, FROM_HERE, + base::Bind(&TableBuilder::OnCompleteMainThread, this)); +} + +void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { + if (master_) + master_->OnTableRebuildComplete(success_, fingerprints_); +} + +} // namespace components diff --git a/components/visitedlink/browser/visitedlink_master.h b/components/visitedlink/browser/visitedlink_master.h new file mode 100644 index 0000000..eb8262c --- /dev/null +++ b/components/visitedlink/browser/visitedlink_master.h @@ -0,0 +1,438 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ +#define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ + +#if defined(OS_WIN) +#include <windows.h> +#endif +#include <set> +#include <vector> + +#include "base/callback.h" +#include "base/callback_forward.h" +#include "base/file_path.h" +#include "base/gtest_prod_util.h" +#include "base/shared_memory.h" +#include "base/threading/sequenced_worker_pool.h" +#include "components/visitedlink/common/visitedlink_common.h" + +#if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) +#include "base/logging.h" +#endif + +class GURL; + +namespace content { +class BrowserContext; +} // namespace content + +namespace components { + +class VisitedLinkDelegate; + +// Controls the link coloring database. The master controls all writing to the +// database as well as disk I/O. There should be only one master. +// +// This class will defer writing operations to the file thread. This means that +// class destruction, the file may still be open since operations are pending on +// another thread. +class VisitedLinkMaster : public VisitedLinkCommon { + public: + // Listens to the link coloring database events. The master is given this + // event as a constructor argument and dispatches events using it. + class Listener { + public: + virtual ~Listener() {} + + // Called when link coloring database has been created or replaced. The + // argument is the new table handle. + virtual void NewTable(base::SharedMemory*) = 0; + + // Called when new link has been added. The argument is the fingerprint + // (hash) of the link. + virtual void Add(Fingerprint fingerprint) = 0; + + // Called when link coloring state has been reset. This may occur when + // entire or parts of history were deleted. + virtual void Reset() = 0; + }; + + VisitedLinkMaster(content::BrowserContext* browser_context, + VisitedLinkDelegate* delegate); + + // In unit test mode, we allow the caller to optionally specify the database + // filename so that it can be run from a unit test. The directory where this + // file resides must exist in this mode. You can also specify the default + // table size to test table resizing. If this parameter is 0, we will use the + // defaults. + // + // In the unit test mode, we also allow the caller to provide a history + // service pointer (the history service can't be fetched from the browser + // process when we're in unit test mode). This can be NULL to try to access + // the main version, which will probably fail (which can be good for testing + // this failure mode). + // + // When |suppress_rebuild| is set, we'll not attempt to load data from + // history if the file can't be loaded. This should generally be set for + // testing except when you want to test the rebuild process explicitly. + VisitedLinkMaster(Listener* listener, + VisitedLinkDelegate* delegate, + bool suppress_rebuild, + const FilePath& filename, + int32 default_table_size); + virtual ~VisitedLinkMaster(); + + // Must be called immediately after object creation. Nothing else will work + // until this is called. Returns true on success, false means that this + // object won't work. + bool Init(); + + base::SharedMemory* shared_memory() { return shared_memory_; } + + // Adds a URL to the table. + void AddURL(const GURL& url); + + // Adds a set of URLs to the table. + void AddURLs(const std::vector<GURL>& url); + + // See DeleteURLs. + class URLIterator { + public: + // HasNextURL must return true when this is called. Returns the next URL + // then advances the iterator. Note that the returned reference is only + // valid until the next call of NextURL. + virtual const GURL& NextURL() = 0; + + // Returns true if still has URLs to be iterated. + virtual bool HasNextURL() const = 0; + + protected: + virtual ~URLIterator() {} + }; + + // Deletes the specified URLs from |rows| from the table. + void DeleteURLs(URLIterator* iterator); + + // Clears the visited links table by deleting the file from disk. Used as + // part of history clearing. + void DeleteAllURLs(); + + // Returns the Delegate of this Master. + VisitedLinkDelegate* GetDelegate(); + +#if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST) + // This is a debugging function that can be called to double-check internal + // data structures. It will assert if the check fails. + void DebugValidate(); + + // Sets a task to execute when the next rebuild from history is complete. + // This is used by unit tests to wait for the rebuild to complete before + // they continue. The pointer will be owned by this object after the call. + void set_rebuild_complete_task(const base::Closure& task) { + DCHECK(rebuild_complete_task_.is_null()); + rebuild_complete_task_ = task; + } + + // returns the number of items in the table for testing verification + int32 GetUsedCount() const { + return used_items_; + } + + // Returns the listener. + VisitedLinkMaster::Listener* GetListener() const { + return listener_.get(); + } + + // Call to cause the entire database file to be re-written from scratch + // to disk. Used by the performance tester. + void RewriteFile() { + WriteFullTable(); + } +#endif + + private: + FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete); + FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete); + FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport); + + // Object to rebuild the table on the history thread (see the .cc file). + class TableBuilder; + + // Byte offsets of values in the header. + static const int32 kFileHeaderSignatureOffset; + static const int32 kFileHeaderVersionOffset; + static const int32 kFileHeaderLengthOffset; + static const int32 kFileHeaderUsedOffset; + static const int32 kFileHeaderSaltOffset; + + // The signature at the beginning of a file. + static const int32 kFileSignature; + + // version of the file format this module currently uses + static const int32 kFileCurrentVersion; + + // Bytes in the file header, including the salt. + static const size_t kFileHeaderSize; + + // When creating a fresh new table, we use this many entries. + static const unsigned kDefaultTableSize; + + // When the user is deleting a boatload of URLs, we don't really want to do + // individual writes for each of them. When the count exceeds this threshold, + // we will write the whole table to disk at once instead of individual items. + static const size_t kBigDeleteThreshold; + + // Backend for the constructors initializing the members. + void InitMembers(); + + // If a rebuild is in progress, we save the URL in the temporary list. + // Otherwise, we add this to the table. Returns the index of the + // inserted fingerprint or null_hash_ on failure. + Hash TryToAddURL(const GURL& url); + + // File I/O functions + // ------------------ + + // Posts the given task to the blocking worker pool with our options. + void PostIOTask(const tracked_objects::Location& from_here, + const base::Closure& task); + + // Writes the entire table to disk. It will leave the table file open and + // the handle to it will be stored in file_. + void WriteFullTable(); + + // Try to load the table from the database file. If the file doesn't exist or + // is corrupt, this will return failure. + bool InitFromFile(); + + // Reads the header of the link coloring database from disk. Assumes the + // file pointer is at the beginning of the file and that there are no pending + // asynchronous I/O operations. + // + // Returns true on success and places the size of the table in num_entries + // and the number of nonzero fingerprints in used_count. This will fail if + // the version of the file is not the current version of the database. + bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count, + uint8 salt[LINK_SALT_LENGTH]); + + // Fills *filename with the name of the link database filename + bool GetDatabaseFileName(FilePath* filename); + + // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy + // the write to a background thread. + void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size); + + // Helper function to schedule and asynchronous write of the used count to + // disk (this is a common operation). + void WriteUsedItemCountToFile(); + + // Helper function to schedule an asynchronous write of the given range of + // hash functions to disk. The range is inclusive on both ends. The range can + // wrap around at 0 and this function will handle it. + void WriteHashRangeToFile(Hash first_hash, Hash last_hash); + + // Synchronous read from the file. Assumes there are no pending asynchronous + // I/O functions. Returns true if the entire buffer was successfully filled. + bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size); + + // General table handling + // ---------------------- + + // Called to add a fingerprint to the table. If |send_notifications| is true + // and the item is added successfully, Listener::Add will be invoked. + // Returns the index of the inserted fingerprint or null_hash_ if there was a + // duplicate and this item was skippped. + Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications); + + // Deletes all fingerprints from the given vector from the current hash table + // and syncs it to disk if there are changes. This does not update the + // deleted_since_rebuild_ list, the caller must update this itself if there + // is an update pending. + void DeleteFingerprintsFromCurrentTable( + const std::set<Fingerprint>& fingerprints); + + // Removes the indicated fingerprint from the table. If the update_file flag + // is set, the changes will also be written to disk. Returns true if the + // fingerprint was deleted, false if it was not in the table to delete. + bool DeleteFingerprint(Fingerprint fingerprint, bool update_file); + + // Creates a new empty table, call if InitFromFile() fails. Normally, when + // |suppress_rebuild| is false, the table will be rebuilt from history, + // keeping us in sync. When |suppress_rebuild| is true, the new table will be + // empty and we will not consult history. This is used when clearing the + // database and for unit tests. + bool InitFromScratch(bool suppress_rebuild); + + // Allocates the Fingerprint structure and length. When init_to_empty is set, + // the table will be filled with 0s and used_items_ will be set to 0 as well. + // If the flag is not set, these things are untouched and it is the + // responsibility of the caller to fill them (like when we are reading from + // a file). + bool CreateURLTable(int32 num_entries, bool init_to_empty); + + // A wrapper for CreateURLTable, this will allocate a new table, initialized + // to empty. The caller is responsible for saving the shared memory pointer + // and handles before this call (they will be replaced with new ones) and + // releasing them later. This is designed for callers that make a new table + // and then copy values from the old table to the new one, then release the + // old table. + // + // Returns true on success. On failure, the old table will be restored. The + // caller should not attemp to release the pointer/handle in this case. + bool BeginReplaceURLTable(int32 num_entries); + + // unallocates the Fingerprint table + void FreeURLTable(); + + // For growing the table. ResizeTableIfNecessary will check to see if the + // table should be resized and calls ResizeTable if needed. Returns true if + // we decided to resize the table. + bool ResizeTableIfNecessary(); + + // Resizes the table (growing or shrinking) as necessary to accomodate the + // current count. + void ResizeTable(int32 new_size); + + // Returns the desired table size for |item_count| URLs. + uint32 NewTableSizeForCount(int32 item_count) const; + + // Computes the table load as fraction. For example, if 1/4 of the entries are + // full, this value will be 0.25 + float ComputeTableLoad() const { + return static_cast<float>(used_items_) / static_cast<float>(table_length_); + } + + // Initializes a rebuild of the visited link database based on the browser + // history. This will set table_builder_ while working, and there should not + // already be a rebuild in place when called. See the definition for more + // details on how this works. + // + // Returns true on success. Failure means we're not attempting to rebuild + // the database because something failed. + bool RebuildTableFromDelegate(); + + // Callback that the table rebuilder uses when the rebuild is complete. + // |success| is true if the fingerprint generation succeeded, in which case + // |fingerprints| will contain the computed fingerprints. On failure, there + // will be no fingerprints. + void OnTableRebuildComplete(bool success, + const std::vector<Fingerprint>& fingerprints); + + // Increases or decreases the given hash value by one, wrapping around as + // necessary. Used for probing. + inline Hash IncrementHash(Hash hash) { + if (hash >= table_length_ - 1) + return 0; // Wrap around. + return hash + 1; + } + inline Hash DecrementHash(Hash hash) { + if (hash <= 0) + return table_length_ - 1; // Wrap around. + return hash - 1; + } + +#ifndef NDEBUG + // Indicates whether any asynchronous operation has ever been completed. + // We do some synchronous reads that require that no asynchronous operations + // are pending, yet we don't track whether they have been completed. This + // flag is a sanity check that these reads only happen before any + // asynchronous writes have been fired. + bool posted_asynchronous_operation_; +#endif + + // Reference to the browser context that this object belongs to + // (it knows the path to where the data is stored) + content::BrowserContext* browser_context_; + + // Client owns the delegate and is responsible for it being valid through + // the life time this VisitedLinkMaster. + VisitedLinkDelegate* delegate_; + + // VisitedLinkEventListener to handle incoming events. + scoped_ptr<Listener> listener_; + + // Lazily initialized sequence token for posting file tasks. + base::SequencedWorkerPool::SequenceToken sequence_token_; + + // When non-NULL, indicates we are in database rebuild mode and points to + // the class collecting fingerprint information from the history system. + // The pointer is owned by this class, but it must remain valid while the + // history query is running. We must only delete it when the query is done. + scoped_refptr<TableBuilder> table_builder_; + + // Indicates URLs added and deleted since we started rebuilding the table. + std::set<Fingerprint> added_since_rebuild_; + std::set<Fingerprint> deleted_since_rebuild_; + + // TODO(brettw) Support deletion, we need to track whether anything was + // deleted during the rebuild here. Then we should delete any of these + // entries from the complete table later. + // std::vector<Fingerprint> removed_since_rebuild_; + + // The currently open file with the table in it. This may be NULL if we're + // rebuilding and haven't written a new version yet. Writing to the file may + // be safely ignored in this case. Also |file_| may be non-NULL but point to + // a NULL pointer. That would mean that opening of the file is already + // scheduled in a background thread and any writing to the file can also be + // scheduled to the background thread as it's guaranteed to be executed after + // the opening. + // The class owns both the |file_| pointer and the pointer pointed + // by |*file_|. + FILE** file_; + + // Shared memory consists of a SharedHeader followed by the table. + base::SharedMemory *shared_memory_; + + // When we generate new tables, we increment the serial number of the + // shared memory object. + int32 shared_memory_serial_; + + // Number of non-empty items in the table, used to compute fullness. + int32 used_items_; + + // Testing values ----------------------------------------------------------- + // + // The following fields exist for testing purposes. They are not used in + // release builds. It'd be nice to eliminate them in release builds, but we + // don't want to change the signature of the object between the unit test and + // regular builds. Instead, we just have "default" values that these take + // in release builds that give "regular" behavior. + + // Overridden database file name for testing + FilePath database_name_override_; + + // When nonzero, overrides the table size for new databases for testing + int32 table_size_override_; + + // When set, indicates the task that should be run after the next rebuild from + // history is complete. + base::Closure rebuild_complete_task_; + + // Set to prevent us from attempting to rebuild the database from global + // history if we have an error opening the file. This is used for testing, + // will be false in production. + bool suppress_rebuild_; + + DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster); +}; + +// NOTE: These methods are defined inline here, so we can share the compilation +// of visitedlink_master.cc between the browser and the unit/perf tests. + +#if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) +inline void VisitedLinkMaster::DebugValidate() { + int32 used_count = 0; + for (int32 i = 0; i < table_length_; i++) { + if (hash_table_[i]) + used_count++; + } + DCHECK_EQ(used_count, used_items_); +} +#endif + +} // namespace components + +#endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ diff --git a/components/visitedlink/common/DEPS b/components/visitedlink/common/DEPS new file mode 100644 index 0000000..d5923ee --- /dev/null +++ b/components/visitedlink/common/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+content/public/common", +] diff --git a/components/visitedlink/common/OWNERS b/components/visitedlink/common/OWNERS new file mode 100644 index 0000000..0abb418 --- /dev/null +++ b/components/visitedlink/common/OWNERS @@ -0,0 +1,8 @@ +# Changes to IPC messages require a security review to avoid introducing +# new sandbox escapes. +per-file *_messages.h=set noparent +per-file *_messages.h=cdn@chromium.org +per-file *_messages.h=jln@chromium.org +per-file *_messages.h=jschuh@chromium.org +per-file *_messages.h=palmer@chromium.org +per-file *_messages.h=tsepez@chromium.org diff --git a/components/visitedlink/common/visitedlink_common.cc b/components/visitedlink/common/visitedlink_common.cc new file mode 100644 index 0000000..9ffc88b --- /dev/null +++ b/components/visitedlink/common/visitedlink_common.cc @@ -0,0 +1,102 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/visitedlink/common/visitedlink_common.h" + +#include <string.h> // for memset() + +#include "base/logging.h" +#include "base/md5.h" +#include "googleurl/src/gurl.h" + +namespace components { + +const VisitedLinkCommon::Fingerprint VisitedLinkCommon::null_fingerprint_ = 0; +const VisitedLinkCommon::Hash VisitedLinkCommon::null_hash_ = -1; + +VisitedLinkCommon::VisitedLinkCommon() + : hash_table_(NULL), + table_length_(0) { + memset(salt_, 0, sizeof(salt_)); +} + +VisitedLinkCommon::~VisitedLinkCommon() { +} + +// FIXME: this uses linear probing, it should be replaced with quadratic +// probing or something better. See VisitedLinkMaster::AddFingerprint +bool VisitedLinkCommon::IsVisited(const char* canonical_url, + size_t url_len) const { + if (url_len == 0) + return false; + if (!hash_table_ || table_length_ == 0) + return false; + return IsVisited(ComputeURLFingerprint(canonical_url, url_len)); +} + +bool VisitedLinkCommon::IsVisited(const GURL& url) const { + return IsVisited(url.spec().data(), url.spec().size()); +} + +bool VisitedLinkCommon::IsVisited(Fingerprint fingerprint) const { + // Go through the table until we find the item or an empty spot (meaning it + // wasn't found). This loop will terminate as long as the table isn't full, + // which should be enforced by AddFingerprint. + Hash first_hash = HashFingerprint(fingerprint); + Hash cur_hash = first_hash; + while (true) { + Fingerprint cur_fingerprint = FingerprintAt(cur_hash); + if (cur_fingerprint == null_fingerprint_) + return false; // End of probe sequence found. + if (cur_fingerprint == fingerprint) + return true; // Found a match. + + // This spot was taken, but not by the item we're looking for, search in + // the next position. + cur_hash++; + if (cur_hash == table_length_) + cur_hash = 0; + if (cur_hash == first_hash) { + // Wrapped around and didn't find an empty space, this means we're in an + // infinite loop because AddFingerprint didn't do its job resizing. + NOTREACHED(); + return false; + } + } +} + +// Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint, +// this is as random as any other subset of the MD5SUM. +// +// FIXME: this uses the MD5SUM of the 16-bit character version. For systems +// where wchar_t is not 16 bits (Linux uses 32 bits, I think), this will not be +// compatable. We should define explicitly what should happen here across +// platforms, and convert if necessary (probably to UTF-16). + +// static +VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputeURLFingerprint( + const char* canonical_url, + size_t url_len, + const uint8 salt[LINK_SALT_LENGTH]) { + DCHECK(url_len > 0) << "Canonical URLs should not be empty"; + + base::MD5Context ctx; + base::MD5Init(&ctx); + base::MD5Update(&ctx, base::StringPiece(reinterpret_cast<const char*>(salt), + LINK_SALT_LENGTH)); + base::MD5Update(&ctx, base::StringPiece(canonical_url, url_len)); + + base::MD5Digest digest; + base::MD5Final(&digest, &ctx); + + // This is the same as "return *(Fingerprint*)&digest.a;" but if we do that + // direct cast the alignment could be wrong, and we can't access a 64-bit int + // on arbitrary alignment on some processors. This reinterpret_casts it + // down to a char array of the same size as fingerprint, and then does the + // bit cast, which amounts to a memcpy. This does not handle endian issues. + return bit_cast<Fingerprint, uint8[8]>( + *reinterpret_cast<uint8(*)[8]>(&digest.a)); +} + +} // namespace components diff --git a/components/visitedlink/common/visitedlink_common.h b/components/visitedlink/common/visitedlink_common.h new file mode 100644 index 0000000..0443085 --- /dev/null +++ b/components/visitedlink/common/visitedlink_common.h @@ -0,0 +1,138 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_ +#define COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_ + +#include <vector> + +#include "base/basictypes.h" + +class GURL; + +namespace components { + +// number of bytes in the salt +#define LINK_SALT_LENGTH 8 + +// A multiprocess-safe database of the visited links for the browser. There +// should be exactly one process that has write access (implemented by +// VisitedLinkMaster), while all other processes should be read-only +// (implemented by VisitedLinkSlave). These other processes add links by calling +// the writer process to add them for it. The writer may also notify the readers +// to replace their table when the table is resized. +// +// IPC is not implemented in these classes. This is done through callback +// functions supplied by the creator of these objects to allow more flexibility, +// especially for testing. +// +// This class defines the common base for these others. We implement accessors +// for looking things up in the hash table, and for computing hash values and +// fingerprints. Both the master and the slave inherit from this, and add their +// own code to set up and change these values as their design requires. The +// slave pretty much just sets up the shared memory and saves the pointer. The +// master does a lot of work to manage the table, reading and writing it to and +// from disk, and resizing it when it gets too full. +// +// To ask whether a page is in history, we compute a 64-bit fingerprint of the +// URL. This URL is hashed and we see if it is in the URL hashtable. If it is, +// we consider it visited. Otherwise, it is unvisited. Note that it is possible +// to get collisions, which is the penalty for not storing all URL strings in +// memory (which could get to be more than we want to have in memory). We use +// a salt value for the links on one computer so that an attacker can not +// manually create a link that causes a collision. +class VisitedLinkCommon { + public: + // A number that identifies the URL. + typedef uint64 Fingerprint; + typedef std::vector<Fingerprint> Fingerprints; + + // A hash value of a fingerprint + typedef int32 Hash; + + // A fingerprint or hash value that does not exist + static const Fingerprint null_fingerprint_; + static const Hash null_hash_; + + VisitedLinkCommon(); + virtual ~VisitedLinkCommon(); + + // Returns the fingerprint for the given URL. + Fingerprint ComputeURLFingerprint(const char* canonical_url, + size_t url_len) const { + return ComputeURLFingerprint(canonical_url, url_len, salt_); + } + + // Looks up the given key in the table. The fingerprint for the URL is + // computed if you call one with the string argument. Returns true if found. + // Does not modify the hastable. + bool IsVisited(const char* canonical_url, size_t url_len) const; + bool IsVisited(const GURL& url) const; + bool IsVisited(Fingerprint fingerprint) const; + +#ifdef UNIT_TEST + // Returns statistics about DB usage + void GetUsageStatistics(int32* table_size, + VisitedLinkCommon::Fingerprint** fingerprints) { + *table_size = table_length_; + *fingerprints = hash_table_; + } +#endif + + protected: + // This structure is at the beginning of the shared memory so that the slaves + // can get stats on the table + struct SharedHeader { + // see goes into table_length_ + uint32 length; + + // goes into salt_ + uint8 salt[LINK_SALT_LENGTH]; + }; + + // Returns the fingerprint at the given index into the URL table. This + // function should be called instead of accessing the table directly to + // contain endian issues. + Fingerprint FingerprintAt(int32 table_offset) const { + if (!hash_table_) + return null_fingerprint_; + return hash_table_[table_offset]; + } + + // Computes the fingerprint of the given canonical URL. It is static so the + // same algorithm can be re-used by the table rebuilder, so you will have to + // pass the salt as a parameter. See the non-static version above if you + // want to use the current class' salt. + static Fingerprint ComputeURLFingerprint(const char* canonical_url, + size_t url_len, + const uint8 salt[LINK_SALT_LENGTH]); + + // Computes the hash value of the given fingerprint, this is used as a lookup + // into the hashtable. + static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) { + if (table_length == 0) + return null_hash_; + return static_cast<Hash>(fingerprint % table_length); + } + // Uses the current hashtable. + Hash HashFingerprint(Fingerprint fingerprint) const { + return HashFingerprint(fingerprint, table_length_); + } + + // pointer to the first item + VisitedLinkCommon::Fingerprint* hash_table_; + + // the number of items in the hash table + int32 table_length_; + + // salt used for each URL when computing the fingerprint + uint8 salt_[LINK_SALT_LENGTH]; + + private: + DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon); +}; + +} // namespace components + +#endif // COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_ diff --git a/components/visitedlink/common/visitedlink_message_generator.cc b/components/visitedlink/common/visitedlink_message_generator.cc new file mode 100644 index 0000000..29d3191 --- /dev/null +++ b/components/visitedlink/common/visitedlink_message_generator.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Get basic type definitions. +#define IPC_MESSAGE_IMPL +#include "components/visitedlink/common/visitedlink_message_generator.h" + +// Generate constructors. +#include "ipc/struct_constructor_macros.h" +#include "components/visitedlink/common/visitedlink_message_generator.h" + +// Generate destructors. +#include "ipc/struct_destructor_macros.h" +#include "components/visitedlink/common/visitedlink_message_generator.h" + +// Generate param traits write methods. +#include "ipc/param_traits_write_macros.h" +namespace IPC { +#include "components/visitedlink/common/visitedlink_message_generator.h" +} // namespace IPC + +// Generate param traits read methods. +#include "ipc/param_traits_read_macros.h" +namespace IPC { +#include "components/visitedlink/common/visitedlink_message_generator.h" +} // namespace IPC + +// Generate param traits log methods. +#include "ipc/param_traits_log_macros.h" +namespace IPC { +#include "components/visitedlink/common/visitedlink_message_generator.h" +} // namespace IPC + diff --git a/components/visitedlink/common/visitedlink_message_generator.h b/components/visitedlink/common/visitedlink_message_generator.h new file mode 100644 index 0000000..e4a496b --- /dev/null +++ b/components/visitedlink/common/visitedlink_message_generator.h @@ -0,0 +1,7 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Multiply-included file, no traditional include guard. + +#include "components/visitedlink/common/visitedlink_messages.h" diff --git a/components/visitedlink/common/visitedlink_messages.h b/components/visitedlink/common/visitedlink_messages.h new file mode 100644 index 0000000..ec557b7 --- /dev/null +++ b/components/visitedlink/common/visitedlink_messages.h @@ -0,0 +1,29 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Multiply-included file, no traditional include guard. +#include <vector> + +#include "base/basictypes.h" +#include "base/shared_memory.h" +#include "content/public/common/common_param_traits_macros.h" +#include "ipc/ipc_message_macros.h" + +#define IPC_MESSAGE_START VisitedLinkMsgStart + +// History system notification that the visited link database has been +// replaced. It has one SharedMemoryHandle argument consisting of the table +// handle. This handle is valid in the context of the renderer +IPC_MESSAGE_CONTROL1(ChromeViewMsg_VisitedLink_NewTable, + base::SharedMemoryHandle) + +// History system notification that a link has been added and the link +// coloring state for the given hash must be re-calculated. +IPC_MESSAGE_CONTROL1(ChromeViewMsg_VisitedLink_Add, std::vector<uint64>) + +// History system notification that one or more history items have been +// deleted, which at this point means that all link coloring state must be +// re-calculated. +IPC_MESSAGE_CONTROL0(ChromeViewMsg_VisitedLink_Reset) + diff --git a/components/visitedlink/renderer/DEPS b/components/visitedlink/renderer/DEPS new file mode 100644 index 0000000..49859da --- /dev/null +++ b/components/visitedlink/renderer/DEPS @@ -0,0 +1,6 @@ +include_rules = [ + "+content/public/renderer", + "+third_party/WebKit/Source/Platform/chromium/public", + "+third_party/WebKit/Source/WebKit/chromium/public", +] + diff --git a/components/visitedlink/renderer/visitedlink_slave.cc b/components/visitedlink/renderer/visitedlink_slave.cc new file mode 100644 index 0000000..c9d569a --- /dev/null +++ b/components/visitedlink/renderer/visitedlink_slave.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/visitedlink/renderer/visitedlink_slave.h" + +#include "base/logging.h" +#include "base/shared_memory.h" +#include "components/visitedlink/common/visitedlink_messages.h" +#include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" + +namespace components { + +using WebKit::WebView; + +VisitedLinkSlave::VisitedLinkSlave() : shared_memory_(NULL) { +} + +VisitedLinkSlave::~VisitedLinkSlave() { + FreeTable(); +} + +bool VisitedLinkSlave::OnControlMessageReceived(const IPC::Message& message) { + bool handled = true; + IPC_BEGIN_MESSAGE_MAP(VisitedLinkSlave, message) + IPC_MESSAGE_HANDLER(ChromeViewMsg_VisitedLink_NewTable, + OnUpdateVisitedLinks) + IPC_MESSAGE_HANDLER(ChromeViewMsg_VisitedLink_Add, OnAddVisitedLinks) + IPC_MESSAGE_HANDLER(ChromeViewMsg_VisitedLink_Reset, OnResetVisitedLinks) + IPC_MESSAGE_UNHANDLED(handled = false) + IPC_END_MESSAGE_MAP() + return handled; +} + +// This function's job is to initialize the table with the given +// shared memory handle. This memory is mapped into the process. +void VisitedLinkSlave::OnUpdateVisitedLinks(base::SharedMemoryHandle table) { + DCHECK(base::SharedMemory::IsHandleValid(table)) << "Bad table handle"; + // since this function may be called again to change the table, we may need + // to free old objects + FreeTable(); + DCHECK(shared_memory_ == NULL && hash_table_ == NULL); + + // create the shared memory object + shared_memory_ = new base::SharedMemory(table, true); + if (!shared_memory_) + return; + + // map the header into our process so we can see how long the rest is, + // and set the salt + if (!shared_memory_->Map(sizeof(SharedHeader))) + return; + SharedHeader* header = + static_cast<SharedHeader*>(shared_memory_->memory()); + DCHECK(header); + int32 table_len = header->length; + memcpy(salt_, header->salt, sizeof(salt_)); + shared_memory_->Unmap(); + + // now do the whole table because we know the length + if (!shared_memory_->Map(sizeof(SharedHeader) + + table_len * sizeof(Fingerprint))) { + shared_memory_->Close(); + return; + } + + // commit the data + DCHECK(shared_memory_->memory()); + hash_table_ = reinterpret_cast<Fingerprint*>( + static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); + table_length_ = table_len; +} + +void VisitedLinkSlave::OnAddVisitedLinks( + const VisitedLinkSlave::Fingerprints& fingerprints) { + for (size_t i = 0; i < fingerprints.size(); ++i) + WebView::updateVisitedLinkState(fingerprints[i]); +} + +void VisitedLinkSlave::OnResetVisitedLinks() { + WebView::resetVisitedLinkState(); +} + +void VisitedLinkSlave::FreeTable() { + if (shared_memory_) { + delete shared_memory_; + shared_memory_ = NULL; + } + hash_table_ = NULL; + table_length_ = 0; +} + +} // namespace components diff --git a/components/visitedlink/renderer/visitedlink_slave.h b/components/visitedlink/renderer/visitedlink_slave.h new file mode 100644 index 0000000..ffcc2f6 --- /dev/null +++ b/components/visitedlink/renderer/visitedlink_slave.h @@ -0,0 +1,41 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_VISITEDLINK_RENDERER_VISITEDLINK_SLAVE_H_ +#define COMPONENTS_VISITEDLINK_RENDERER_VISITEDLINK_SLAVE_H_ + +#include "base/compiler_specific.h" +#include "base/shared_memory.h" +#include "components/visitedlink/common/visitedlink_common.h" +#include "content/public/renderer/render_process_observer.h" + +namespace components { + +// Reads the link coloring database provided by the master. There can be any +// number of slaves reading the same database. +class VisitedLinkSlave : public VisitedLinkCommon, + public content::RenderProcessObserver { + public: + VisitedLinkSlave(); + virtual ~VisitedLinkSlave(); + + // RenderProcessObserver implementation. + virtual bool OnControlMessageReceived(const IPC::Message& message) OVERRIDE; + + // Message handlers. + void OnUpdateVisitedLinks(base::SharedMemoryHandle table); + void OnAddVisitedLinks(const VisitedLinkSlave::Fingerprints& fingerprints); + void OnResetVisitedLinks(); + private: + void FreeTable(); + + // shared memory consists of a SharedHeader followed by the table + base::SharedMemory* shared_memory_; + + DISALLOW_COPY_AND_ASSIGN(VisitedLinkSlave); +}; + +} // namespace components + +#endif // COMPONENTS_VISITEDLINK_RENDERER_VISITEDLINK_SLAVE_H_ diff --git a/components/visitedlink/test/DEPS b/components/visitedlink/test/DEPS new file mode 100644 index 0000000..0157a48 --- /dev/null +++ b/components/visitedlink/test/DEPS @@ -0,0 +1,6 @@ +include_rules = [ + "+content/public", + + # TODO(boliu): Remove this when we have our own component test harness. + "!chrome/test", +] diff --git a/components/visitedlink/test/visitedlink_perftest.cc b/components/visitedlink/test/visitedlink_perftest.cc new file mode 100644 index 0000000..68b250c --- /dev/null +++ b/components/visitedlink/test/visitedlink_perftest.cc @@ -0,0 +1,202 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <algorithm> +#include <string> +#include <vector> + +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/perftimer.h" +#include "base/shared_memory.h" +#include "base/stringprintf.h" +#include "base/test/test_file_util.h" +#include "components/visitedlink/browser/visitedlink_master.h" +#include "googleurl/src/gurl.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::TimeDelta; + +namespace components { + +namespace { + +// how we generate URLs, note that the two strings should be the same length +const int add_count = 10000; +const int load_test_add_count = 250000; +const char added_prefix[] = "http://www.google.com/stuff/something/foo?session=85025602345625&id=1345142319023&seq="; +const char unadded_prefix[] = "http://www.google.org/stuff/something/foo?session=39586739476365&id=2347624314402&seq="; + +// Returns a URL with the given prefix and index +GURL TestURL(const char* prefix, int i) { + return GURL(base::StringPrintf("%s%d", prefix, i)); +} + +// We have no slaves, so all methods on this listener are a no-ops. +class DummyVisitedLinkEventListener : public VisitedLinkMaster::Listener { + public: + DummyVisitedLinkEventListener() {} + virtual void NewTable(base::SharedMemory* table) {} + virtual void Add(VisitedLinkCommon::Fingerprint) {} + virtual void Reset() {} + + static DummyVisitedLinkEventListener* GetInstance() { + static DummyVisitedLinkEventListener instance; + return &instance; + } +}; + + +// this checks IsVisited for the URLs starting with the given prefix and +// within the given range +void CheckVisited(VisitedLinkMaster& master, const char* prefix, + int begin, int end) { + for (int i = begin; i < end; i++) + master.IsVisited(TestURL(prefix, i)); +} + +// Fills that master's table with URLs starting with the given prefix and +// within the given range +void FillTable(VisitedLinkMaster& master, const char* prefix, + int begin, int end) { + for (int i = begin; i < end; i++) + master.AddURL(TestURL(prefix, i)); +} + +class VisitedLink : public testing::Test { + protected: + FilePath db_path_; + virtual void SetUp() { + ASSERT_TRUE(file_util::CreateTemporaryFile(&db_path_)); + } + virtual void TearDown() { + file_util::Delete(db_path_, false); + } +}; + +} // namespace + +// This test tests adding many things to a database, and how long it takes +// to query the database with different numbers of things in it. The time +// is the total time to do all the operations, and as such, it is only +// useful for a regression test. If there is a regression, it might be +// useful to make another set of tests to test these things in isolation. +TEST_F(VisitedLink, TestAddAndQuery) { + // init + VisitedLinkMaster master(DummyVisitedLinkEventListener::GetInstance(), + NULL, true, db_path_, 0); + ASSERT_TRUE(master.Init()); + + PerfTimeLogger timer("Visited_link_add_and_query"); + + // first check without anything in the table + CheckVisited(master, added_prefix, 0, add_count); + + // now fill half the table + const int half_size = add_count / 2; + FillTable(master, added_prefix, 0, half_size); + + // check the table again, half of these URLs will be visited, the other half + // will not + CheckVisited(master, added_prefix, 0, add_count); + + // fill the rest of the table + FillTable(master, added_prefix, half_size, add_count); + + // check URLs, doing half visited, half unvisited + CheckVisited(master, added_prefix, 0, add_count); + CheckVisited(master, unadded_prefix, 0, add_count); +} + +// Tests how long it takes to write and read a large database to and from disk. +TEST_F(VisitedLink, TestLoad) { + // create a big DB + { + PerfTimeLogger table_initialization_timer("Table_initialization"); + + VisitedLinkMaster master(DummyVisitedLinkEventListener::GetInstance(), + NULL, true, db_path_, 0); + + // time init with empty table + PerfTimeLogger initTimer("Empty_visited_link_init"); + bool success = master.Init(); + initTimer.Done(); + ASSERT_TRUE(success); + + // add a bunch of stuff + // TODO(maruel): This is very inefficient because the file gets rewritten + // many time and this is the actual bottleneck of this test. The file should + // only get written that the end of the FillTable call, not 4169(!) times. + FillTable(master, added_prefix, 0, load_test_add_count); + + // time writing the file out out + PerfTimeLogger flushTimer("Visited_link_database_flush"); + master.RewriteFile(); + // TODO(maruel): Without calling FlushFileBuffers(master.file_); you don't + // know really how much time it took to write the file. + flushTimer.Done(); + + table_initialization_timer.Done(); + } + + // test loading the DB back, we do this several times since the flushing is + // not very reliable. + const int load_count = 5; + std::vector<double> cold_load_times; + std::vector<double> hot_load_times; + for (int i = 0; i < load_count; i++) { + // make sure the file has to be re-loaded + file_util::EvictFileFromSystemCache(db_path_); + + // cold load (no OS cache, hopefully) + { + PerfTimer cold_timer; + + VisitedLinkMaster master(DummyVisitedLinkEventListener::GetInstance(), + NULL, + true, + db_path_, + 0); + bool success = master.Init(); + TimeDelta elapsed = cold_timer.Elapsed(); + ASSERT_TRUE(success); + + cold_load_times.push_back(elapsed.InMillisecondsF()); + } + + // hot load (with OS caching the file in memory) + { + PerfTimer hot_timer; + + VisitedLinkMaster master(DummyVisitedLinkEventListener::GetInstance(), + NULL, + true, + db_path_, + 0); + bool success = master.Init(); + TimeDelta elapsed = hot_timer.Elapsed(); + ASSERT_TRUE(success); + + hot_load_times.push_back(elapsed.InMillisecondsF()); + } + } + + // We discard the max and return the average time. + cold_load_times.erase(std::max_element(cold_load_times.begin(), + cold_load_times.end())); + hot_load_times.erase(std::max_element(hot_load_times.begin(), + hot_load_times.end())); + + double cold_sum = 0, hot_sum = 0; + for (int i = 0; i < static_cast<int>(cold_load_times.size()); i++) { + cold_sum += cold_load_times[i]; + hot_sum += hot_load_times[i]; + } + LogPerfResult("Visited_link_cold_load_time", + cold_sum / cold_load_times.size(), "ms"); + LogPerfResult("Visited_link_hot_load_time", + hot_sum / hot_load_times.size(), "ms"); +} + +} // namespace components diff --git a/components/visitedlink/test/visitedlink_unittest.cc b/components/visitedlink/test/visitedlink_unittest.cc new file mode 100644 index 0000000..b9f2a0c --- /dev/null +++ b/components/visitedlink/test/visitedlink_unittest.cc @@ -0,0 +1,763 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <cstdio> +#include <string> +#include <vector> + +#include "base/file_util.h" +#include "base/message_loop.h" +#include "base/path_service.h" +#include "base/process_util.h" +#include "base/shared_memory.h" +#include "base/string_util.h" +#include "base/time.h" +#include "chrome/test/base/chrome_render_view_host_test_harness.h" +#include "chrome/test/base/testing_profile.h" +#include "components/visitedlink/browser/visitedlink_delegate.h" +#include "components/visitedlink/browser/visitedlink_event_listener.h" +#include "components/visitedlink/browser/visitedlink_master.h" +#include "components/visitedlink/common/visitedlink_messages.h" +#include "components/visitedlink/renderer/visitedlink_slave.h" +#include "content/public/browser/notification_service.h" +#include "content/public/browser/notification_types.h" +#include "content/public/test/mock_render_process_host.h" +#include "content/public/test/test_browser_context.h" +#include "content/public/test/test_browser_thread.h" +#include "content/public/test/test_renderer_host.h" +#include "googleurl/src/gurl.h" +#include "testing/gtest/include/gtest/gtest.h" + +using content::BrowserThread; +using content::MockRenderProcessHost; +using content::RenderViewHostTester; + +namespace components { + +namespace { + +typedef std::vector<GURL> URLs; + +// a nice long URL that we can append numbers to to get new URLs +const char g_test_prefix[] = + "http://www.google.com/products/foo/index.html?id=45028640526508376&seq="; +const int g_test_count = 1000; + +// Returns a test URL for index |i| +GURL TestURL(int i) { + return GURL(StringPrintf("%s%d", g_test_prefix, i)); +} + +std::vector<VisitedLinkSlave*> g_slaves; + +class TestVisitedLinkDelegate : public VisitedLinkDelegate { + public: + virtual bool AreEquivalentContexts( + content::BrowserContext* context1, + content::BrowserContext* context2) OVERRIDE; + virtual void RebuildTable( + const scoped_refptr<URLEnumerator>& enumerator) OVERRIDE; + + void AddURLForRebuild(const GURL& url); + + private: + + URLs rebuild_urls_; +}; + +bool TestVisitedLinkDelegate::AreEquivalentContexts( + content::BrowserContext* context1, content::BrowserContext* context2) { + DCHECK_EQ(context1, context2); + return true; // Test only has one profile. +} + +void TestVisitedLinkDelegate::RebuildTable( + const scoped_refptr<URLEnumerator>& enumerator) { + for (URLs::const_iterator itr = rebuild_urls_.begin(); + itr != rebuild_urls_.end(); + ++itr) + enumerator->OnURL(*itr); + enumerator->OnComplete(true); +} + +void TestVisitedLinkDelegate::AddURLForRebuild(const GURL& url) { + rebuild_urls_.push_back(url); +} + +class TestURLIterator : public VisitedLinkMaster::URLIterator { + public: + explicit TestURLIterator(const URLs& urls); + + virtual const GURL& NextURL() OVERRIDE; + virtual bool HasNextURL() const OVERRIDE; + + private: + URLs::const_iterator iterator_; + URLs::const_iterator end_; +}; + +TestURLIterator::TestURLIterator(const URLs& urls) + : iterator_(urls.begin()), + end_(urls.end()) { +} + +const GURL& TestURLIterator::NextURL() { + return *(iterator_++); +} + +bool TestURLIterator::HasNextURL() const { + return iterator_ != end_; +} + +} // namespace + +class TrackingVisitedLinkEventListener : public VisitedLinkMaster::Listener { + public: + TrackingVisitedLinkEventListener() + : reset_count_(0), + add_count_(0) {} + + virtual void NewTable(base::SharedMemory* table) { + if (table) { + for (std::vector<VisitedLinkSlave>::size_type i = 0; + i < g_slaves.size(); i++) { + base::SharedMemoryHandle new_handle = base::SharedMemory::NULLHandle(); + table->ShareToProcess(base::GetCurrentProcessHandle(), &new_handle); + g_slaves[i]->OnUpdateVisitedLinks(new_handle); + } + } + } + virtual void Add(VisitedLinkCommon::Fingerprint) { add_count_++; } + virtual void Reset() { reset_count_++; } + + void SetUp() { + reset_count_ = 0; + add_count_ = 0; + } + + int reset_count() const { return reset_count_; } + int add_count() const { return add_count_; } + + private: + int reset_count_; + int add_count_; +}; + +class VisitedLinkTest : public testing::Test { + protected: + VisitedLinkTest() + : ui_thread_(BrowserThread::UI, &message_loop_), + file_thread_(BrowserThread::FILE, &message_loop_) {} + // Initializes the visited link objects. Pass in the size that you want a + // freshly created table to be. 0 means use the default. + // + // |suppress_rebuild| is set when we're not testing rebuilding, see + // the VisitedLinkMaster constructor. + bool InitVisited(int initial_size, bool suppress_rebuild) { + // Initialize the visited link system. + master_.reset(new VisitedLinkMaster(new TrackingVisitedLinkEventListener(), + &delegate_, + suppress_rebuild, visited_file_, + initial_size)); + return master_->Init(); + } + + // May be called multiple times (some tests will do this to clear things, + // and TearDown will do this to make sure eveything is shiny before quitting. + void ClearDB() { + if (master_.get()) + master_.reset(NULL); + + // Wait for all pending file I/O to be completed. + BrowserThread::GetBlockingPool()->FlushForTesting(); + } + + // Loads the database from disk and makes sure that the same URLs are present + // as were generated by TestIO_Create(). This also checks the URLs with a + // slave to make sure it reads the data properly. + void Reload() { + // Clean up after our caller, who may have left the database open. + ClearDB(); + + ASSERT_TRUE(InitVisited(0, true)); + master_->DebugValidate(); + + // check that the table has the proper number of entries + int used_count = master_->GetUsedCount(); + ASSERT_EQ(used_count, g_test_count); + + // Create a slave database. + VisitedLinkSlave slave; + base::SharedMemoryHandle new_handle = base::SharedMemory::NULLHandle(); + master_->shared_memory()->ShareToProcess( + base::GetCurrentProcessHandle(), &new_handle); + slave.OnUpdateVisitedLinks(new_handle); + g_slaves.push_back(&slave); + + bool found; + for (int i = 0; i < g_test_count; i++) { + GURL cur = TestURL(i); + found = master_->IsVisited(cur); + EXPECT_TRUE(found) << "URL " << i << "not found in master."; + + found = slave.IsVisited(cur); + EXPECT_TRUE(found) << "URL " << i << "not found in slave."; + } + + // test some random URL so we know that it returns false sometimes too + found = master_->IsVisited(GURL("http://unfound.site/")); + ASSERT_FALSE(found); + found = slave.IsVisited(GURL("http://unfound.site/")); + ASSERT_FALSE(found); + + master_->DebugValidate(); + + g_slaves.clear(); + } + + // testing::Test + virtual void SetUp() { + ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); + + history_dir_ = temp_dir_.path().AppendASCII("VisitedLinkTest"); + ASSERT_TRUE(file_util::CreateDirectory(history_dir_)); + + visited_file_ = history_dir_.Append(FILE_PATH_LITERAL("VisitedLinks")); + } + + virtual void TearDown() { + ClearDB(); + } + + base::ScopedTempDir temp_dir_; + + MessageLoop message_loop_; + content::TestBrowserThread ui_thread_; + content::TestBrowserThread file_thread_; + + // Filenames for the services; + FilePath history_dir_; + FilePath visited_file_; + + scoped_ptr<VisitedLinkMaster> master_; + TestVisitedLinkDelegate delegate_; +}; + +// This test creates and reads some databases to make sure the data is +// preserved throughout those operations. +TEST_F(VisitedLinkTest, DatabaseIO) { + ASSERT_TRUE(InitVisited(0, true)); + + for (int i = 0; i < g_test_count; i++) + master_->AddURL(TestURL(i)); + + // Test that the database was written properly + Reload(); +} + +// Checks that we can delete things properly when there are collisions. +TEST_F(VisitedLinkTest, Delete) { + static const int32 kInitialSize = 17; + ASSERT_TRUE(InitVisited(kInitialSize, true)); + + // Add a cluster from 14-17 wrapping around to 0. These will all hash to the + // same value. + const VisitedLinkCommon::Fingerprint kFingerprint0 = kInitialSize * 0 + 14; + const VisitedLinkCommon::Fingerprint kFingerprint1 = kInitialSize * 1 + 14; + const VisitedLinkCommon::Fingerprint kFingerprint2 = kInitialSize * 2 + 14; + const VisitedLinkCommon::Fingerprint kFingerprint3 = kInitialSize * 3 + 14; + const VisitedLinkCommon::Fingerprint kFingerprint4 = kInitialSize * 4 + 14; + master_->AddFingerprint(kFingerprint0, false); // @14 + master_->AddFingerprint(kFingerprint1, false); // @15 + master_->AddFingerprint(kFingerprint2, false); // @16 + master_->AddFingerprint(kFingerprint3, false); // @0 + master_->AddFingerprint(kFingerprint4, false); // @1 + + // Deleting 14 should move the next value up one slot (we do not specify an + // order). + EXPECT_EQ(kFingerprint3, master_->hash_table_[0]); + master_->DeleteFingerprint(kFingerprint3, false); + VisitedLinkCommon::Fingerprint zero_fingerprint = 0; + EXPECT_EQ(zero_fingerprint, master_->hash_table_[1]); + EXPECT_NE(zero_fingerprint, master_->hash_table_[0]); + + // Deleting the other four should leave the table empty. + master_->DeleteFingerprint(kFingerprint0, false); + master_->DeleteFingerprint(kFingerprint1, false); + master_->DeleteFingerprint(kFingerprint2, false); + master_->DeleteFingerprint(kFingerprint4, false); + + EXPECT_EQ(0, master_->used_items_); + for (int i = 0; i < kInitialSize; i++) + EXPECT_EQ(zero_fingerprint, master_->hash_table_[i]) << + "Hash table has values in it."; +} + +// When we delete more than kBigDeleteThreshold we trigger different behavior +// where the entire file is rewritten. +TEST_F(VisitedLinkTest, BigDelete) { + ASSERT_TRUE(InitVisited(16381, true)); + + // Add the base set of URLs that won't be deleted. + // Reload() will test for these. + for (int32 i = 0; i < g_test_count; i++) + master_->AddURL(TestURL(i)); + + // Add more URLs than necessary to trigger this case. + const int kTestDeleteCount = VisitedLinkMaster::kBigDeleteThreshold + 2; + URLs urls_to_delete; + for (int32 i = g_test_count; i < g_test_count + kTestDeleteCount; i++) { + GURL url(TestURL(i)); + master_->AddURL(url); + urls_to_delete.push_back(url); + } + + TestURLIterator iterator(urls_to_delete); + master_->DeleteURLs(&iterator); + master_->DebugValidate(); + + Reload(); +} + +TEST_F(VisitedLinkTest, DeleteAll) { + ASSERT_TRUE(InitVisited(0, true)); + + { + VisitedLinkSlave slave; + base::SharedMemoryHandle new_handle = base::SharedMemory::NULLHandle(); + master_->shared_memory()->ShareToProcess( + base::GetCurrentProcessHandle(), &new_handle); + slave.OnUpdateVisitedLinks(new_handle); + g_slaves.push_back(&slave); + + // Add the test URLs. + for (int i = 0; i < g_test_count; i++) { + master_->AddURL(TestURL(i)); + ASSERT_EQ(i + 1, master_->GetUsedCount()); + } + master_->DebugValidate(); + + // Make sure the slave picked up the adds. + for (int i = 0; i < g_test_count; i++) + EXPECT_TRUE(slave.IsVisited(TestURL(i))); + + // Clear the table and make sure the slave picked it up. + master_->DeleteAllURLs(); + EXPECT_EQ(0, master_->GetUsedCount()); + for (int i = 0; i < g_test_count; i++) { + EXPECT_FALSE(master_->IsVisited(TestURL(i))); + EXPECT_FALSE(slave.IsVisited(TestURL(i))); + } + + // Close the database. + g_slaves.clear(); + ClearDB(); + } + + // Reopen and validate. + ASSERT_TRUE(InitVisited(0, true)); + master_->DebugValidate(); + EXPECT_EQ(0, master_->GetUsedCount()); + for (int i = 0; i < g_test_count; i++) + EXPECT_FALSE(master_->IsVisited(TestURL(i))); +} + +// This tests that the master correctly resizes its tables when it gets too +// full, notifies its slaves of the change, and updates the disk. +TEST_F(VisitedLinkTest, Resizing) { + // Create a very small database. + const int32 initial_size = 17; + ASSERT_TRUE(InitVisited(initial_size, true)); + + // ...and a slave + VisitedLinkSlave slave; + base::SharedMemoryHandle new_handle = base::SharedMemory::NULLHandle(); + master_->shared_memory()->ShareToProcess( + base::GetCurrentProcessHandle(), &new_handle); + slave.OnUpdateVisitedLinks(new_handle); + g_slaves.push_back(&slave); + + int32 used_count = master_->GetUsedCount(); + ASSERT_EQ(used_count, 0); + + for (int i = 0; i < g_test_count; i++) { + master_->AddURL(TestURL(i)); + used_count = master_->GetUsedCount(); + ASSERT_EQ(i + 1, used_count); + } + + // Verify that the table got resized sufficiently. + int32 table_size; + VisitedLinkCommon::Fingerprint* table; + master_->GetUsageStatistics(&table_size, &table); + used_count = master_->GetUsedCount(); + ASSERT_GT(table_size, used_count); + ASSERT_EQ(used_count, g_test_count) << + "table count doesn't match the # of things we added"; + + // Verify that the slave got the resize message and has the same + // table information. + int32 child_table_size; + VisitedLinkCommon::Fingerprint* child_table; + slave.GetUsageStatistics(&child_table_size, &child_table); + ASSERT_EQ(table_size, child_table_size); + for (int32 i = 0; i < table_size; i++) { + ASSERT_EQ(table[i], child_table[i]); + } + + master_->DebugValidate(); + g_slaves.clear(); + + // This tests that the file is written correctly by reading it in using + // a new database. + Reload(); +} + +// Tests that if the database doesn't exist, it will be rebuilt from history. +TEST_F(VisitedLinkTest, Rebuild) { + // Add half of our URLs to history. This needs to be done before we + // initialize the visited link DB. + int history_count = g_test_count / 2; + for (int i = 0; i < history_count; i++) + delegate_.AddURLForRebuild(TestURL(i)); + + // Initialize the visited link DB. Since the visited links file doesn't exist + // and we don't suppress history rebuilding, this will load from history. + ASSERT_TRUE(InitVisited(0, false)); + + // While the table is rebuilding, add the rest of the URLs to the visited + // link system. This isn't guaranteed to happen during the rebuild, so we + // can't be 100% sure we're testing the right thing, but in practice is. + // All the adds above will generally take some time queuing up on the + // history thread, and it will take a while to catch up to actually + // processing the rebuild that has queued behind it. We will generally + // finish adding all of the URLs before it has even found the first URL. + for (int i = history_count; i < g_test_count; i++) + master_->AddURL(TestURL(i)); + + // Add one more and then delete it. + master_->AddURL(TestURL(g_test_count)); + URLs urls_to_delete; + urls_to_delete.push_back(TestURL(g_test_count)); + TestURLIterator iterator(urls_to_delete); + master_->DeleteURLs(&iterator); + + // Wait for the rebuild to complete. The task will terminate the message + // loop when the rebuild is done. There's no chance that the rebuild will + // complete before we set the task because the rebuild completion message + // is posted to the message loop; until we Run() it, rebuild can not + // complete. + master_->set_rebuild_complete_task(MessageLoop::QuitClosure()); + MessageLoop::current()->Run(); + + // Test that all URLs were written to the database properly. + Reload(); + + // Make sure the extra one was *not* written (Reload won't test this). + EXPECT_FALSE(master_->IsVisited(TestURL(g_test_count))); +} + +// Test that importing a large number of URLs will work +TEST_F(VisitedLinkTest, BigImport) { + ASSERT_TRUE(InitVisited(0, false)); + + // Before the table rebuilds, add a large number of URLs + int total_count = VisitedLinkMaster::kDefaultTableSize + 10; + for (int i = 0; i < total_count; i++) + master_->AddURL(TestURL(i)); + + // Wait for the rebuild to complete. + master_->set_rebuild_complete_task(MessageLoop::QuitClosure()); + MessageLoop::current()->Run(); + + // Ensure that the right number of URLs are present + int used_count = master_->GetUsedCount(); + ASSERT_EQ(used_count, total_count); +} + +TEST_F(VisitedLinkTest, Listener) { + ASSERT_TRUE(InitVisited(0, true)); + + // Add test URLs. + for (int i = 0; i < g_test_count; i++) { + master_->AddURL(TestURL(i)); + ASSERT_EQ(i + 1, master_->GetUsedCount()); + } + + // Delete an URL. + URLs urls_to_delete; + urls_to_delete.push_back(TestURL(0)); + TestURLIterator iterator(urls_to_delete); + master_->DeleteURLs(&iterator); + + // ... and all of the remaining ones. + master_->DeleteAllURLs(); + + TrackingVisitedLinkEventListener* listener = + static_cast<TrackingVisitedLinkEventListener*>(master_->GetListener()); + + // Verify that VisitedLinkMaster::Listener::Add was called for each added URL. + EXPECT_EQ(g_test_count, listener->add_count()); + // Verify that VisitedLinkMaster::Listener::Reset was called both when one and + // all URLs are deleted. + EXPECT_EQ(2, listener->reset_count()); +} + +// TODO(boliu): Inherit content::TestBrowserContext when componentized. +class VisitCountingProfile : public TestingProfile { + public: + VisitCountingProfile() + : add_count_(0), + add_event_count_(0), + reset_event_count_(0) {} + + void CountAddEvent(int by) { + add_count_ += by; + add_event_count_++; + } + + void CountResetEvent() { + reset_event_count_++; + } + + int add_count() const { return add_count_; } + int add_event_count() const { return add_event_count_; } + int reset_event_count() const { return reset_event_count_; } + + private: + int add_count_; + int add_event_count_; + int reset_event_count_; +}; + +// Stub out as little as possible, borrowing from RenderProcessHost. +class VisitRelayingRenderProcessHost : public MockRenderProcessHost { + public: + explicit VisitRelayingRenderProcessHost( + content::BrowserContext* browser_context) + : MockRenderProcessHost(browser_context), widgets_(0) { + content::NotificationService::current()->Notify( + content::NOTIFICATION_RENDERER_PROCESS_CREATED, + content::Source<RenderProcessHost>(this), + content::NotificationService::NoDetails()); + } + virtual ~VisitRelayingRenderProcessHost() { + content::NotificationService::current()->Notify( + content::NOTIFICATION_RENDERER_PROCESS_TERMINATED, + content::Source<content::RenderProcessHost>(this), + content::NotificationService::NoDetails()); + } + + virtual void WidgetRestored() OVERRIDE { widgets_++; } + virtual void WidgetHidden() OVERRIDE { widgets_--; } + virtual int VisibleWidgetCount() const OVERRIDE { return widgets_; } + + virtual bool Send(IPC::Message* msg) OVERRIDE { + VisitCountingProfile* counting_profile = + static_cast<VisitCountingProfile*>( + GetBrowserContext()); + + if (msg->type() == ChromeViewMsg_VisitedLink_Add::ID) { + PickleIterator iter(*msg); + std::vector<uint64> fingerprints; + CHECK(IPC::ReadParam(msg, &iter, &fingerprints)); + counting_profile->CountAddEvent(fingerprints.size()); + } else if (msg->type() == ChromeViewMsg_VisitedLink_Reset::ID) { + counting_profile->CountResetEvent(); + } + + delete msg; + return true; + } + + private: + int widgets_; + + DISALLOW_COPY_AND_ASSIGN(VisitRelayingRenderProcessHost); +}; + +class VisitedLinkRenderProcessHostFactory + : public content::RenderProcessHostFactory { + public: + VisitedLinkRenderProcessHostFactory() + : content::RenderProcessHostFactory() {} + virtual content::RenderProcessHost* CreateRenderProcessHost( + content::BrowserContext* browser_context) const OVERRIDE { + return new VisitRelayingRenderProcessHost(browser_context); + } + + private: + + DISALLOW_COPY_AND_ASSIGN(VisitedLinkRenderProcessHostFactory); +}; + +// TODO(boliu): Inherit content::RenderViewHostTestHarness when componentized. +class VisitedLinkEventsTest : public ChromeRenderViewHostTestHarness { + public: + VisitedLinkEventsTest() + : ui_thread_(BrowserThread::UI, &message_loop_), + file_thread_(BrowserThread::FILE, &message_loop_) {} + virtual ~VisitedLinkEventsTest() {} + virtual void SetUp() { + browser_context_.reset(new VisitCountingProfile()); + master_.reset(new VisitedLinkMaster(profile(), &delegate_)); + master_->Init(); + SetRenderProcessHostFactory(&vc_rph_factory_); + content::RenderViewHostTestHarness::SetUp(); + } + + VisitCountingProfile* profile() const { + return static_cast<VisitCountingProfile*>(browser_context_.get()); + } + + VisitedLinkMaster* master() const { + return master_.get(); + } + + void WaitForCoalescense() { + // Let the timer fire. + MessageLoop::current()->PostDelayedTask( + FROM_HERE, + MessageLoop::QuitClosure(), + base::TimeDelta::FromMilliseconds(110)); + MessageLoop::current()->Run(); + } + + protected: + VisitedLinkRenderProcessHostFactory vc_rph_factory_; + + private: + TestVisitedLinkDelegate delegate_; + scoped_ptr<VisitedLinkMaster> master_; + content::TestBrowserThread ui_thread_; + content::TestBrowserThread file_thread_; + + DISALLOW_COPY_AND_ASSIGN(VisitedLinkEventsTest); +}; + +TEST_F(VisitedLinkEventsTest, Coalescense) { + // add some URLs to master. + // Add a few URLs. + master()->AddURL(GURL("http://acidtests.org/")); + master()->AddURL(GURL("http://google.com/")); + master()->AddURL(GURL("http://chromium.org/")); + // Just for kicks, add a duplicate URL. This shouldn't increase the resulting + master()->AddURL(GURL("http://acidtests.org/")); + + // Make sure that coalescing actually occurs. There should be no links or + // events received by the renderer. + EXPECT_EQ(0, profile()->add_count()); + EXPECT_EQ(0, profile()->add_event_count()); + + WaitForCoalescense(); + + // We now should have 3 entries added in 1 event. + EXPECT_EQ(3, profile()->add_count()); + EXPECT_EQ(1, profile()->add_event_count()); + + // Test whether the coalescing continues by adding a few more URLs. + master()->AddURL(GURL("http://google.com/chrome/")); + master()->AddURL(GURL("http://webkit.org/")); + master()->AddURL(GURL("http://acid3.acidtests.org/")); + + WaitForCoalescense(); + + // We should have 6 entries added in 2 events. + EXPECT_EQ(6, profile()->add_count()); + EXPECT_EQ(2, profile()->add_event_count()); + + // Test whether duplicate entries produce add events. + master()->AddURL(GURL("http://acidtests.org/")); + + WaitForCoalescense(); + + // We should have no change in results. + EXPECT_EQ(6, profile()->add_count()); + EXPECT_EQ(2, profile()->add_event_count()); + + // Ensure that the coalescing does not resume after resetting. + master()->AddURL(GURL("http://build.chromium.org/")); + master()->DeleteAllURLs(); + + WaitForCoalescense(); + + // We should have no change in results except for one new reset event. + EXPECT_EQ(6, profile()->add_count()); + EXPECT_EQ(2, profile()->add_event_count()); + EXPECT_EQ(1, profile()->reset_event_count()); +} + +TEST_F(VisitedLinkEventsTest, Basics) { + RenderViewHostTester::For(rvh())->CreateRenderView(string16(), + MSG_ROUTING_NONE, + -1); + + // Add a few URLs. + master()->AddURL(GURL("http://acidtests.org/")); + master()->AddURL(GURL("http://google.com/")); + master()->AddURL(GURL("http://chromium.org/")); + + WaitForCoalescense(); + + // We now should have 1 add event. + EXPECT_EQ(1, profile()->add_event_count()); + EXPECT_EQ(0, profile()->reset_event_count()); + + master()->DeleteAllURLs(); + + WaitForCoalescense(); + + // We should have no change in add results, plus one new reset event. + EXPECT_EQ(1, profile()->add_event_count()); + EXPECT_EQ(1, profile()->reset_event_count()); +} + +TEST_F(VisitedLinkEventsTest, TabVisibility) { + RenderViewHostTester::For(rvh())->CreateRenderView(string16(), + MSG_ROUTING_NONE, + -1); + + // Simulate tab becoming inactive. + RenderViewHostTester::For(rvh())->SimulateWasHidden(); + + // Add a few URLs. + master()->AddURL(GURL("http://acidtests.org/")); + master()->AddURL(GURL("http://google.com/")); + master()->AddURL(GURL("http://chromium.org/")); + + WaitForCoalescense(); + + // We shouldn't have any events. + EXPECT_EQ(0, profile()->add_event_count()); + EXPECT_EQ(0, profile()->reset_event_count()); + + // Simulate the tab becoming active. + RenderViewHostTester::For(rvh())->SimulateWasShown(); + + // We should now have 3 add events, still no reset events. + EXPECT_EQ(1, profile()->add_event_count()); + EXPECT_EQ(0, profile()->reset_event_count()); + + // Deactivate the tab again. + RenderViewHostTester::For(rvh())->SimulateWasHidden(); + + // Add a bunch of URLs (over 50) to exhaust the link event buffer. + for (int i = 0; i < 100; i++) + master()->AddURL(TestURL(i)); + + WaitForCoalescense(); + + // Again, no change in events until tab is active. + EXPECT_EQ(1, profile()->add_event_count()); + EXPECT_EQ(0, profile()->reset_event_count()); + + // Activate the tab. + RenderViewHostTester::For(rvh())->SimulateWasShown(); + + // We should have only one more reset event. + EXPECT_EQ(1, profile()->add_event_count()); + EXPECT_EQ(1, profile()->reset_event_count()); +} + +} // namespace components |