diff options
author | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-26 23:55:29 +0000 |
---|---|---|
committer | initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-07-26 23:55:29 +0000 |
commit | 09911bf300f1a419907a9412154760efd0b7abc3 (patch) | |
tree | f131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/visitedlink_master.cc | |
parent | 586acc5fe142f498261f52c66862fa417c3d52d2 (diff) | |
download | chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2 |
Add chrome to the repository.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/visitedlink_master.cc')
-rw-r--r-- | chrome/browser/visitedlink_master.cc | 1047 |
1 files changed, 1047 insertions, 0 deletions
diff --git a/chrome/browser/visitedlink_master.cc b/chrome/browser/visitedlink_master.cc new file mode 100644 index 0000000..1d066a9 --- /dev/null +++ b/chrome/browser/visitedlink_master.cc @@ -0,0 +1,1047 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <windows.h> +#include <shlobj.h> +#include <algorithm> + +#include "chrome/browser/visitedlink_master.h" + +#include "base/logging.h" +#include "base/message_loop.h" +#include "base/path_service.h" +#include "base/stack_container.h" +#include "base/string_util.h" +#include "chrome/browser/browser_process.h" +#include "chrome/browser/history/history.h" +#include "chrome/browser/profile.h" +#include "chrome/common/win_util.h" + +const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; +const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; +const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; +const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; +const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; + +const int32 VisitedLinkMaster::kFileCurrentVersion = 2; + +// the signature at the beginning of the URL table = "VLnk" (visited links) +const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; +const size_t VisitedLinkMaster::kFileHeaderSize = + kFileHeaderSaltOffset + LINK_SALT_LENGTH; + +// This value should also be the same as the smallest size in the lookup +// table in NewTableSizeForCount (prime number). +const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; + +const int32 VisitedLinkMaster::kBigDeleteThreshold = 64; + +namespace { + +// Fills the given salt structure with some quasi-random values +// Fills some salt values into the given buffer, we ask the system to generate +// a UUID for us, and we use some of the bytes out of that. It is not necessary +// to generate a cryptographically strong random string, only that it be +// reasonably different for different users. Here, we use every-other byte of +// the 16-byte UUID. +void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { + UUID uuid; + UuidCreate(&uuid); + + DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; + salt[0] = static_cast<uint8>(uuid.Data1 & 0xFF); + salt[1] = static_cast<uint8>((uuid.Data1 >> 8) & 0xFF); + salt[2] = static_cast<uint8>(uuid.Data2 & 0xFF); + salt[3] = static_cast<uint8>(uuid.Data3 & 0xFF); + salt[4] = uuid.Data4[0]; + salt[5] = uuid.Data4[2]; + salt[6] = uuid.Data4[4]; + salt[7] = uuid.Data4[6]; +} +// AsyncWriter ---------------------------------------------------------------- + +// This task executes on a background thread and executes a write. This +// prevents us from blocking the UI thread doing I/O. +class AsyncWriter : public Task { + public: + AsyncWriter(HANDLE hfile, int32 offset, const void* data, int32 data_len) + : hfile_(hfile), + offset_(offset) { + data_->resize(data_len); + memcpy(&*data_->begin(), data, data_len); + } + + virtual void Run() { + WriteToFile(hfile_, offset_, + &*data_->begin(), static_cast<int32>(data_->size())); + } + + // Exposed as a static so it can be called directly from the Master to + // reduce the number of platform-specific I/O sites we have. Returns true if + // the write was complete. + static bool WriteToFile(HANDLE hfile, + int32 offset, + const void* data, + int32 data_len) { + if (SetFilePointer(hfile, offset, NULL, FILE_BEGIN) == + INVALID_SET_FILE_POINTER) + return false; // Don't write to an invalid part of the file. + + DWORD num_written; + return WriteFile(hfile, data, data_len, &num_written, NULL) && + (num_written == data_len); + } + + private: + // The data to write and where to write it. + HANDLE hfile_; + int32 offset_; // Offset from the beginning of the file. + + // Most writes are just a single fingerprint, so we reserve that much in this + // object to avoid mallocs in that case. + StackVector<char, sizeof(VisitedLinkCommon::Fingerprint)> data_; + + DISALLOW_EVIL_CONSTRUCTORS(AsyncWriter); +}; + +// Used to asynchronously set the end of the file. This must be done on the +// same thread as the writing to keep things synchronized. +class AsyncSetEndOfFile : public Task { + public: + explicit AsyncSetEndOfFile(HANDLE hfile) : hfile_(hfile) {} + + virtual void Run() { + SetEndOfFile(hfile_); + } + + private: + HANDLE hfile_; + DISALLOW_EVIL_CONSTRUCTORS(AsyncSetEndOfFile); +}; + +// Used to asynchronously close a file. This must be done on the same thread as +// the writing to keep things synchronized. +class AsyncCloseHandle : public Task { + public: + explicit AsyncCloseHandle(HANDLE hfile) : hfile_(hfile) {} + + virtual void Run() { + CloseHandle(hfile_); + } + + private: + HANDLE hfile_; + DISALLOW_EVIL_CONSTRUCTORS(AsyncCloseHandle); +}; + +} // namespace + +// TableBuilder --------------------------------------------------------------- + +// How rebuilding from history works +// --------------------------------- +// +// We mark that we're rebuilding from history by setting the table_builder_ +// member in VisitedLinkMaster to the TableBuilder we create. This builder +// will be called on the history thread by the history system for every URL +// in the database. +// +// The builder will store the fingerprints for those URLs, and then marshalls +// back to the main thread where the VisitedLinkMaster will be notified. The +// master then replaces its table with a new table containing the computed +// fingerprints. +// +// The builder must remain active while the history system is using it. +// Sometimes, the master will be deleted before the rebuild is complete, in +// which case it notifies the builder via DisownMaster(). The builder will +// delete itself once rebuilding is complete, and not execute any callback. +class VisitedLinkMaster::TableBuilder : public HistoryService::URLEnumerator, + public base::RefCounted<TableBuilder> { + public: + TableBuilder(VisitedLinkMaster* master, + const uint8 salt[LINK_SALT_LENGTH]); + + // Called on the main thread when the master is being destroyed. This will + // prevent a crash when the query completes and the master is no longer + // around. We can not actually do anything but mark this fact, since the + // table will be being rebuilt simultaneously on the other thread. + void DisownMaster(); + + // HistoryService::URLEnumerator + virtual void OnURL(const GURL& url); + virtual void OnComplete(bool succeed); + + private: + // OnComplete mashals to this function on the main thread to do the + // notification. + void OnCompleteMainThread(); + + // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! + VisitedLinkMaster* master_; + + // The thread the visited link master is on where we will notify it. + MessageLoop* main_message_loop_; + + // Indicates whether the operation has failed or not. + bool success_; + + // Salt for this new table. + uint8 salt_[LINK_SALT_LENGTH]; + + // Stores the fingerprints we computed on the background thread. + std::vector<VisitedLinkMaster::Fingerprint> fingerprints_; +}; + +// VisitedLinkMaster ---------------------------------------------------------- + +VisitedLinkMaster::VisitedLinkMaster(Thread* file_thread, + PostNewTableEvent* poster, + Profile* profile) { + InitMembers(file_thread, poster, profile); +} + +VisitedLinkMaster::VisitedLinkMaster(Thread* file_thread, + PostNewTableEvent* poster, + HistoryService* history_service, + bool suppress_rebuild, + const std::wstring& filename, + int32 default_table_size) { + InitMembers(file_thread, poster, NULL); + + database_name_override_.assign(filename); + table_size_override_ = default_table_size; + history_service_override_ = history_service; + suppress_rebuild_ = suppress_rebuild; +} + +VisitedLinkMaster::~VisitedLinkMaster() { + if (table_builder_.get()) { + // Prevent the table builder from calling us back now that we're being + // destroyed. Note that we DON'T delete the object, since the history + // system is still writing into it. When that is complete, the table + // builder will destroy itself when it finds we are gone. + table_builder_->DisownMaster(); + } + FreeURLTable(); +} + +void VisitedLinkMaster::InitMembers(Thread* file_thread, + PostNewTableEvent* poster, + Profile* profile) { + if (file_thread) + file_thread_ = file_thread->message_loop(); + else + file_thread_ = NULL; + + post_new_table_event_ = poster; + file_ = NULL; + shared_memory_ = NULL; + shared_memory_serial_ = 0; + used_items_ = 0; + table_size_override_ = 0; + history_service_override_ = NULL; + suppress_rebuild_ = false; + profile_ = profile; + +#ifndef NDEBUG + posted_asynchronous_operation_ = false; +#endif +} + +// The shared memory name should be unique on the system and also needs to +// change when we create a new table. The scheme we use includes the process +// ID, an increasing serial number, and the profile ID. +std::wstring VisitedLinkMaster::GetSharedMemoryName() const { + // When unit testing, there's no profile, so use an empty ID string. + std::wstring profile_id; + if (profile_) + profile_id = profile_->GetID().c_str(); + + return StringPrintf(L"GVisitedLinks_%lu_%lu_%s", + GetCurrentProcessId(), shared_memory_serial_, + profile_id.c_str()); +} + +bool VisitedLinkMaster::Init() { + if (!InitFromFile()) + return InitFromScratch(suppress_rebuild_); + return true; +} + +bool VisitedLinkMaster::ShareToProcess(ProcessHandle process, + SharedMemoryHandle *new_handle) { + if (shared_memory_) + return shared_memory_->ShareToProcess(process, new_handle); + + NOTREACHED(); + return false; +} + +SharedMemoryHandle VisitedLinkMaster::GetSharedMemoryHandle() { + return shared_memory_->handle(); +} + +VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { + // Extra check that we are not off the record. This should not happen. + if (profile_ && profile_->IsOffTheRecord()) { + NOTREACHED(); + return null_hash_; + } + + if (!url.is_valid()) + return null_hash_; // Don't add invalid URLs. + + Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), + url.spec().size(), + salt_); + if (table_builder_) { + // If we have a pending delete for this fingerprint, cancel it. + std::set<Fingerprint>::iterator found = + deleted_since_rebuild_.find(fingerprint); + if (found != deleted_since_rebuild_.end()) + deleted_since_rebuild_.erase(found); + + // A rebuild is in progress, save this addition in the temporary list so + // it can be added once rebuild is complete. + added_since_rebuild_.insert(fingerprint); + } + + // If the table is "full", we don't add URLs and just drop them on the floor. + // This can happen if we get thousands of new URLs and something causes + // the table resizing to fail. This check prevents a hang in that case. Note + // that this is *not* the resize limit, this is just a sanity check. + if (used_items_ / 8 > table_length_ / 10) + return null_hash_; // Table is more than 80% full. + + return AddFingerprint(fingerprint); +} + +void VisitedLinkMaster::AddURL(const GURL& url) { + Hash index = TryToAddURL(url); + if (!table_builder_ && index != null_hash_) { + // Not rebuilding, so we want to keep the file on disk up-to-date. + WriteUsedItemCountToFile(); + WriteHashRangeToFile(index, index); + ResizeTableIfNecessary(); + } +} + +void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { + for (std::vector<GURL>::const_iterator i = url.begin(); + i != url.end(); ++i) { + Hash index = TryToAddURL(*i); + if (!table_builder_ && index != null_hash_) + ResizeTableIfNecessary(); + } + + // Keeps the file on disk up-to-date. + if (!table_builder_) + WriteFullTable(); +} + +void VisitedLinkMaster::DeleteAllURLs() { + // Any pending modifications are invalid. + added_since_rebuild_.clear(); + deleted_since_rebuild_.clear(); + + // Clear the hash table. + used_items_ = 0; + memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); + + // Resize it if it is now too empty. Resize may write the new table out for + // us, otherwise, schedule writing the new table to disk ourselves. + if (!ResizeTableIfNecessary()) + WriteFullTable(); +} + +void VisitedLinkMaster::DeleteURLs(const std::set<GURL>& urls) { + typedef std::set<GURL>::const_iterator SetIterator; + + if (urls.empty()) + return; + + if (table_builder_) { + // A rebuild is in progress, save this deletion in the temporary list so + // it can be added once rebuild is complete. + for (SetIterator i = urls.begin(); i != urls.end(); ++i) { + if (!i->is_valid()) + continue; + + Fingerprint fingerprint = + ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_); + deleted_since_rebuild_.insert(fingerprint); + + // If the URL was just added and now we're deleting it, it may be in the + // list of things added since the last rebuild. Delete it from that list. + std::set<Fingerprint>::iterator found = + added_since_rebuild_.find(fingerprint); + if (found != added_since_rebuild_.end()) + added_since_rebuild_.erase(found); + + // Delete the URLs from the in-memory table, but don't bother writing + // to disk since it will be replaced soon. + DeleteFingerprint(fingerprint, false); + } + return; + } + + // Compute the deleted URLs' fingerprints and delete them + std::set<Fingerprint> deleted_fingerprints; + for (SetIterator i = urls.begin(); i != urls.end(); ++i) { + if (!i->is_valid()) + continue; + deleted_fingerprints.insert( + ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_)); + } + DeleteFingerprintsFromCurrentTable(deleted_fingerprints); +} + +// See VisitedLinkCommon::IsVisited which should be in sync with this algorithm +VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( + Fingerprint fingerprint) { + if (!hash_table_ || table_length_ == 0) { + NOTREACHED(); // Not initialized. + return null_hash_; + } + + Hash cur_hash = HashFingerprint(fingerprint); + Hash first_hash = cur_hash; + while (true) { + Fingerprint cur_fingerprint = FingerprintAt(cur_hash); + if (cur_fingerprint == fingerprint) + return null_hash_; // This fingerprint is already in there, do nothing. + + if (cur_fingerprint == null_fingerprint_) { + // End of probe sequence found, insert here. + hash_table_[cur_hash] = fingerprint; + used_items_++; + return cur_hash; + } + + // Advance in the probe sequence. + cur_hash = IncrementHash(cur_hash); + if (cur_hash == first_hash) { + // This means that we've wrapped around and are about to go into an + // infinite loop. Something was wrong with the hashtable resizing + // logic, so stop here. + NOTREACHED(); + return null_hash_; + } + } +} + +void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( + const std::set<Fingerprint>& fingerprints) { + bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); + + // Delete the URLs from the table. + for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); + i != fingerprints.end(); ++i) + DeleteFingerprint(*i, !bulk_write); + + // These deleted fingerprints may make us shrink the table. + if (ResizeTableIfNecessary()) + return; // The resize function wrote the new table to disk for us. + + // Nobody wrote this out for us, write the full file to disk. + if (bulk_write) + WriteFullTable(); +} + +bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, + bool update_file) { + if (!hash_table_ || table_length_ == 0) { + NOTREACHED(); // Not initialized. + return false; + } + if (!IsVisited(fingerprint)) + return false; // Not in the database to delete. + + // First update the header used count. + used_items_--; + if (update_file) + WriteUsedItemCountToFile(); + + Hash deleted_hash = HashFingerprint(fingerprint); + + // Find the range of "stuff" in the hash table that is adjacent to this + // fingerprint. These are things that could be affected by the change in + // the hash table. Since we use linear probing, anything after the deleted + // item up until an empty item could be affected. + Hash end_range = deleted_hash; + while (true) { + Hash next_hash = IncrementHash(end_range); + if (next_hash == deleted_hash) + break; // We wrapped around and the whole table is full. + if (!hash_table_[next_hash]) + break; // Found the last spot. + end_range = next_hash; + } + + // We could get all fancy and move the affected fingerprints around, but + // instead we just remove them all and re-add them (minus our deleted one). + // This will mean there's a small window of time where the affected links + // won't be marked visited. + StackVector<Fingerprint, 32> shuffled_fingerprints; + Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. + for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { + if (hash_table_[i] != fingerprint) { + // Don't save the one we're deleting! + shuffled_fingerprints->push_back(hash_table_[i]); + + // This will balance the increment of this value in AddFingerprint below + // so there is no net change. + used_items_--; + } + hash_table_[i] = null_fingerprint_; + } + + if (!shuffled_fingerprints->empty()) { + // Need to add the new items back. + for (size_t i = 0; i < shuffled_fingerprints->size(); i++) + AddFingerprint(shuffled_fingerprints[i]); + } + + // Write the affected range to disk [deleted_hash, end_range]. + if (update_file) + WriteHashRangeToFile(deleted_hash, end_range); + + return true; +} + +bool VisitedLinkMaster::WriteFullTable() { + // This function can get called when the file is open, for example, when we + // resize the table. We must handle this case and not try to reopen the file, + // since there may be write operations pending on the file I/O thread. + // + // Note that once we start writing, we do not delete on error. This means + // there can be a partial file, but the short file will be detected next time + // we start, and will be replaced. + // + // This might possibly get corrupted if we crash in the middle of writing. + // We should pick up the most common types of these failures when we notice + // that the file size is different when we load it back in, and then we will + // regenerate the table. + win_util::ScopedHandle hfile_closer; // Valid only when not open already. + HANDLE hfile; // Always valid. + if (file_) { + hfile = file_; + } else { + std::wstring filename; + GetDatabaseFileName(&filename); + hfile_closer.Set( + CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE, 0, NULL, + CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)); + if (!hfile_closer.IsValid()) + return false; + hfile = hfile_closer; + } + + // Write the new header. + int32 header[4]; + header[0] = kFileSignature; + header[1] = kFileCurrentVersion; + header[2] = table_length_; + header[3] = used_items_; + WriteToFile(hfile, 0, header, sizeof(header)); + WriteToFile(hfile, sizeof(header), salt_, LINK_SALT_LENGTH); + + // Write the hash data. + WriteToFile(hfile, kFileHeaderSize, + hash_table_, table_length_ * sizeof(Fingerprint)); + + // The hash table may have shrunk, so make sure this is the end. + if (file_thread_) { + AsyncSetEndOfFile* setter = new AsyncSetEndOfFile(hfile); + file_thread_->PostTask(FROM_HERE, setter); + } else { + SetEndOfFile(hfile); + } + + // Keep the file open so we can dynamically write changes to it. When the + // file was alrady open, the hfile_closer is NULL, and file_ is already good. + if (hfile_closer.IsValid()) + file_ = hfile_closer.Take(); + return true; +} + +bool VisitedLinkMaster::InitFromFile() { + DCHECK(file_ == NULL); + + std::wstring filename; + GetDatabaseFileName(&filename); + win_util::ScopedHandle hfile( + CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE, 0, NULL, + OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)); + if (!hfile.IsValid()) + return false; + + int32 num_entries, used_count; + if (!ReadFileHeader(hfile, &num_entries, &used_count, salt_)) + return false; // Header isn't valid. + + // Allocate and read the table. + if (!CreateURLTable(num_entries, false)) + return false; + if (!ReadFromFile(hfile, kFileHeaderSize, + hash_table_, num_entries * sizeof(Fingerprint))) { + FreeURLTable(); + return false; + } + used_items_ = used_count; + + file_ = hfile.Take(); + return true; +} + +bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { + int32 table_size = kDefaultTableSize; + if (table_size_override_) + table_size = table_size_override_; + + // The salt must be generated before the table so that it can be copied to + // the shared memory. + GenerateSalt(salt_); + if (!CreateURLTable(table_size, true)) + return false; + + if (suppress_rebuild) { + // When we disallow rebuilds (normally just unit tests), just use the + // current empty table. + return WriteFullTable(); + } + + // This will build the table from history. On the first run, history will + // be empty, so this will be correct. This will also write the new table + // to disk. We don't want to save explicitly here, since the rebuild may + // not complete, leaving us with an empty but valid visited link database. + // In the future, we won't know we need to try rebuilding again. + return RebuildTableFromHistory(); +} + +bool VisitedLinkMaster::ReadFileHeader(HANDLE hfile, + int32* num_entries, + int32* used_count, + uint8 salt[LINK_SALT_LENGTH]) { + int32 file_size = GetFileSize(hfile, NULL); + if (file_size <= kFileHeaderSize) + return false; + + uint8 header[kFileHeaderSize]; + if (!ReadFromFile(hfile, 0, &header, kFileHeaderSize)) + return false; + + // Verify the signature. + uint32 signature; + memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); + if (signature != kFileSignature) + return false; + + // Verify the version is up-to-date. As with other read errors, a version + // mistmatch will trigger a rebuild of the database from history, which will + // have the effect of migrating the database. + int32 version; + memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); + if (version != kFileCurrentVersion) + return false; // Bad version. + + // Read the table size and make sure it matches the file size. + memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); + if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) + return false; // Bad size. + + // Read the used item count. + memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); + if (*used_count > *num_entries) + return false; // Bad used item count; + + // Read the salt. + memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); + + // This file looks OK from the header's perspective. + return true; +} + +bool VisitedLinkMaster::GetDatabaseFileName(std::wstring* filename) { + if (!database_name_override_.empty()) { + // use this filename, the directory must exist + *filename = database_name_override_; + return true; + } + + if (!profile_ || profile_->GetPath().empty()) + return false; + + *filename = profile_->GetPath(); + filename->append(L"\\Visited Links"); + return true; +} + +// Initializes the shared memory structure. The salt should already be filled +// in so that it can be written to the shared memory +bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { + // The table is the size of the table followed by the entries. + int32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); + + // Create the shared memory object. + shared_memory_ = new SharedMemory(); + if (!shared_memory_) + return false; + + if (!shared_memory_->Create(GetSharedMemoryName().c_str(), + false, false, alloc_size)) + return false; + + // Map into our process. + if (!shared_memory_->Map(alloc_size)) { + delete shared_memory_; + shared_memory_ = NULL; + return false; + } + + if (init_to_empty) { + memset(shared_memory_->memory(), 0, alloc_size); + used_items_ = 0; + } + table_length_ = num_entries; + + // Save the header for other processes to read. + SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); + header->length = table_length_; + memcpy(header->salt, salt_, LINK_SALT_LENGTH); + + // Our table pointer is just the data immediately following the size. + hash_table_ = reinterpret_cast<Fingerprint*>( + static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); + +#ifndef NDEBUG + DebugValidate(); +#endif + + return true; +} + +bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { + SharedMemory *old_shared_memory = shared_memory_; + Fingerprint* old_hash_table = hash_table_; + int32 old_table_length = table_length_; + if (!CreateURLTable(num_entries, true)) { + // Try to put back the old state. + shared_memory_ = old_shared_memory; + hash_table_ = old_hash_table; + table_length_ = old_table_length; + return false; + } + return true; +} + +void VisitedLinkMaster::FreeURLTable() { + if (shared_memory_) { + delete shared_memory_; + shared_memory_ = NULL; + } + if (file_) { + if (file_thread_) { + AsyncCloseHandle* closer = new AsyncCloseHandle(file_); + file_thread_->PostTask(FROM_HERE, closer); + } else { + CloseHandle(file_); + } + } +} + +bool VisitedLinkMaster::ResizeTableIfNecessary() { + DCHECK(table_length_ > 0) << "Must have a table"; + + // Load limits for good performance/space. We are pretty conservative about + // keeping the table not very full. This is because we use linear probing + // which increases the likelihood of clumps of entries which will reduce + // performance. + const float max_table_load = 0.5f; // Grow when we're > this full. + const float min_table_load = 0.2f; // Shrink when we're < this full. + + float load = ComputeTableLoad(); + if (load < max_table_load && + (table_length_ <= kDefaultTableSize || load > min_table_load)) + return false; + + // Table needs to grow or shrink. + int new_size = NewTableSizeForCount(used_items_); + DCHECK(new_size > used_items_); + DCHECK(load <= min_table_load || new_size > table_length_); + ResizeTable(new_size); + return true; +} + +void VisitedLinkMaster::ResizeTable(int32 new_size) { + DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); + shared_memory_serial_++; + +#ifndef NDEBUG + DebugValidate(); +#endif + + SharedMemory* old_shared_memory = shared_memory_; + Fingerprint* old_hash_table = hash_table_; + int32 old_table_length = table_length_; + if (!BeginReplaceURLTable(new_size)) + return; + + // Now we have two tables, our local copy which is the old one, and the new + // one loaded into this object where we need to copy the data. + for (int32 i = 0; i < old_table_length; i++) { + Fingerprint cur = old_hash_table[i]; + if (cur) + AddFingerprint(cur); + } + + // On error unmapping, just forget about it since we can't do anything + // else to release it. + delete old_shared_memory; + + // Send an update notification to all child processes so they read the new + // table. + post_new_table_event_(shared_memory_); + +#ifndef NDEBUG + DebugValidate(); +#endif + + // The new table needs to be written to disk. + WriteFullTable(); +} + +uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { + // These table sizes are selected to be the maximum prime number less than + // a "convenient" multiple of 1K. + static const int table_sizes[] = { + 16381, // 16K = 16384 <- don't shrink below this table size + // (should be == default_table_size) + 32767, // 32K = 32768 + 65521, // 64K = 65536 + 130051, // 128K = 131072 + 262127, // 256K = 262144 + 524269, // 512K = 524288 + 1048549, // 1M = 1048576 + 2097143, // 2M = 2097152 + 4194301, // 4M = 4194304 + 8388571, // 8M = 8388608 + 16777199, // 16M = 16777216 + 33554347}; // 32M = 33554432 + + // Try to leave the table 33% full. + int desired = item_count * 3; + + // Find the closest prime. + for (int i = 0; i < arraysize(table_sizes); i ++) { + if (table_sizes[i] > desired) + return table_sizes[i]; + } + + // Growing very big, just approximate a "good" number, not growing as much + // as normal. + return item_count * 2 - 1; +} + +// See the TableBuilder definition in the header file for how this works. +bool VisitedLinkMaster::RebuildTableFromHistory() { + DCHECK(!table_builder_); + if (table_builder_) + return false; + + HistoryService* history_service = history_service_override_; + if (!history_service && profile_) { + history_service = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); + } + + if (!history_service) { + DLOG(WARNING) << "Attempted to rebuild visited link table, but couldn't " + "obtain a HistoryService."; + return false; + } + + // TODO(brettw) make sure we have reasonable salt! + table_builder_ = new TableBuilder(this, salt_); + + // Make sure the table builder stays live during the call, even if the + // master is deleted. This is balanced in TableBuilder::OnCompleteMainThread. + table_builder_->AddRef(); + history_service->IterateURLs(table_builder_); + return true; +} + +// See the TableBuilder definition in the header file for how this works. +void VisitedLinkMaster::OnTableRebuildComplete( + bool success, + const std::vector<Fingerprint>& fingerprints) { + if (success) { + // Replace the old table with a new blank one. + shared_memory_serial_++; + + // We are responsible for freeing it AFTER it has been replaced if + // replacement succeeds. + SharedMemory* old_shared_memory = shared_memory_; + + int new_table_size = NewTableSizeForCount( + static_cast<int>(fingerprints.size())); + if (BeginReplaceURLTable(new_table_size)) { + // Free the old table. + delete old_shared_memory; + + // Add the stored fingerprints to the hash table. + for (size_t i = 0; i < fingerprints.size(); i++) + AddFingerprint(fingerprints[i]); + + // Also add anything that was added while we were asynchronously + // generating the new table. + for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); + i != added_since_rebuild_.end(); ++i) + AddFingerprint(*i); + added_since_rebuild_.clear(); + + // Now handle deletions. + DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); + deleted_since_rebuild_.clear(); + + // Send an update notification to all child processes. + post_new_table_event_(shared_memory_); + + WriteFullTable(); + } + } + table_builder_ = NULL; // Will release our reference to the builder. + + // Notify the unit test that the rebuild is complete (will be NULL in prod.) + if (rebuild_complete_task_.get()) { + rebuild_complete_task_->Run(); + rebuild_complete_task_.reset(NULL); + } +} + +void VisitedLinkMaster::WriteToFile(HANDLE hfile, + int32 offset, + void* data, + int32 data_size) { +#ifndef NDEBUG + posted_asynchronous_operation_ = true; +#endif + + if (file_thread_) { + // Send the write to the other thread for execution to avoid blocking. + AsyncWriter* writer = new AsyncWriter(hfile, offset, data, data_size); + file_thread_->PostTask(FROM_HERE, writer); + } else { + // When there is no I/O thread, we are probably running in unit test mode, + // just do the write synchronously. + AsyncWriter::WriteToFile(hfile, offset, data, data_size); + } +} + +void VisitedLinkMaster::WriteUsedItemCountToFile() { + WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); +} + +void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { + if (last_hash < first_hash) { + // Handle wraparound at 0. This first write is first_hash->EOF + WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, + &hash_table_[first_hash], + (table_length_ - first_hash + 1) * sizeof(Fingerprint)); + + // Now do 0->last_lash. + WriteToFile(file_, kFileHeaderSize, hash_table_, + (last_hash + 1) * sizeof(Fingerprint)); + } else { + // Normal case, just write the range. + WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, + &hash_table_[first_hash], + (last_hash - first_hash + 1) * sizeof(Fingerprint)); + } +} + +bool VisitedLinkMaster::ReadFromFile(HANDLE hfile, + int32 offset, + void* data, + int32 data_size) { +#ifndef NDEBUG + // Since this function is synchronous, we require that no asynchronous + // operations could possibly be pending. + DCHECK(!posted_asynchronous_operation_); +#endif + + SetFilePointer(hfile, offset, NULL, FILE_BEGIN); + + DWORD num_read; + return ReadFile(hfile, data, data_size, &num_read, NULL) && + num_read == data_size; +} + +// VisitedLinkTableBuilder ---------------------------------------------------- + +VisitedLinkMaster::TableBuilder::TableBuilder( + VisitedLinkMaster* master, + const uint8 salt[LINK_SALT_LENGTH]) + : master_(master), + success_(true), + main_message_loop_(MessageLoop::current()) { + fingerprints_.reserve(4096); + memcpy(salt_, salt, sizeof(salt)); +} + +// TODO(brettw): Do we want to try to cancel the request if this happens? It +// could delay shutdown if there are a lot of URLs. +void VisitedLinkMaster::TableBuilder::DisownMaster() { + master_ = NULL; +} + +void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { + if (!url.is_empty()) { + fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( + url.spec().data(), url.spec().length(), salt_)); + } +} + +void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { + success_ = success; + DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; + + // Marshal to the main thread to notify the VisitedLinkMaster that the + // rebuild is complete. + main_message_loop_->PostTask(FROM_HERE, NewRunnableMethod(this, + &TableBuilder::OnCompleteMainThread)); +} + +void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { + if (master_) + master_->OnTableRebuildComplete(success_, fingerprints_); + + // WILL (generally) DELETE THIS! This balances the AddRef in + // VisitedLinkMaster::RebuildTableFromHistory. + Release(); +} |