diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-20 06:29:28 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-20 06:29:28 +0000 |
commit | 5332aa894ad01d22aeb01107db6d82ccee648604 (patch) | |
tree | 2c5186162c3a0cff848751dad8f0dba472ef5b21 /chrome/browser/safe_browsing/safe_browsing_store_file.cc | |
parent | 86fdd8723d9f3e185eb946781ed160d4ec122fff (diff) | |
download | chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.zip chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.tar.gz chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.tar.bz2 |
SafeBrowsingStore storage abstraction for SafeBrowsing database.
First bit of refactoring safe-browsing to use a flat file format.
SafeBrowsingStore implements just what is needed for
SafeBrowsingDatabase using straight-forward read/modify/write code.
There will be a follow-on change to layer in on-the-fly format
migration and integrate with SafeBrowsingDatabase. This CL only adds
new classes and tests for same.
BUG=none
TEST=none
Review URL: http://codereview.chromium.org/545053
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@36615 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing/safe_browsing_store_file.cc')
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_store_file.cc | 553 |
1 files changed, 553 insertions, 0 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.cc b/chrome/browser/safe_browsing/safe_browsing_store_file.cc new file mode 100644 index 0000000..9fd1bd7 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store_file.cc @@ -0,0 +1,553 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/safe_browsing_store_file.h" + +namespace { + +// NOTE(shess): kFileMagic should not be a byte-wise palindrome, so +// that byte-order changes force corruption. +const int32 kFileMagic = 0x600D71FE; +const int32 kFileVersion = 7; // SQLite storage was 6... +const size_t kFileHeaderSize = 8 * sizeof(int32); + +bool ReadInt32(FILE* fp, int32* value) { + DCHECK(value); + const size_t ret = fread(value, sizeof(*value), 1, fp); + return ret == 1; +} + +bool WriteInt32(FILE* fp, int32 value) { + const size_t ret = fwrite(&value, sizeof(value), 1, fp); + return ret == 1; +} + +bool ReadTime(FILE* fp, base::Time* value) { + DCHECK(value); + + int64 time_t; + const size_t ret = fread(&time_t, sizeof(time_t), 1, fp); + if (ret != 1) + return false; + *value = base::Time::FromTimeT(time_t); + return true; +} + +bool WriteTime(FILE* fp, const base::Time& value) { + const int64 time_t = value.ToTimeT(); + const size_t ret = fwrite(&time_t, sizeof(time_t), 1, fp); + return ret == 1; +} + +bool ReadHash(FILE* fp, SBFullHash* value) { + DCHECK(value); + const size_t ret = fread(&value->full_hash, sizeof(value->full_hash), + 1, fp); + return ret == 1; +} + +bool WriteHash(FILE* fp, SBFullHash value) { + const size_t ret = fwrite(&value.full_hash, sizeof(value.full_hash), + 1, fp); + return ret == 1; +} + +bool FileSeek(FILE* fp, size_t offset) { + int rv = fseek(fp, offset, SEEK_SET); + DCHECK_EQ(rv, 0); + return rv == 0; +} + +// Delete the chunks in |deleted| from |chunks|. +void DeleteChunksFromSet(const base::hash_set<int32>& deleted, + std::set<int32>* chunks) { + for (std::set<int32>::iterator iter = chunks->begin(); + iter != chunks->end();) { + std::set<int32>::iterator prev = iter++; + if (deleted.count(*prev) > 0) + chunks->erase(prev); + } +} + +} // namespace + +SafeBrowsingStoreFile::SafeBrowsingStoreFile() + : chunks_written_(0), + file_(NULL) { +} +SafeBrowsingStoreFile::~SafeBrowsingStoreFile() { + Close(); +} + +bool SafeBrowsingStoreFile::Delete() { + // The database should not be open at this point. But, just in + // case, close everything before deleting. + if (!Close()) { + NOTREACHED(); + return false; + } + + if (!file_util::Delete(filename_, false) && + file_util::PathExists(filename_)) { + NOTREACHED(); + return false; + } + + const FilePath new_filename = TemporaryFileForFilename(filename_); + if (!file_util::Delete(new_filename, false) && + file_util::PathExists(new_filename)) { + NOTREACHED(); + return false; + } + + return true; +} + +void SafeBrowsingStoreFile::Init(const FilePath& filename, + Callback0::Type* corruption_callback) { + filename_ = filename; + corruption_callback_.reset(corruption_callback); +} + +bool SafeBrowsingStoreFile::OnCorruptDatabase() { + if (corruption_callback_.get()) + corruption_callback_->Run(); + + // Return false as a convenience to callers. + return false; +} + +bool SafeBrowsingStoreFile::Close() { + ClearUpdateBuffers(); + + // Make sure the files are closed. + file_.reset(); + new_file_.reset(); + return true; +} + +bool SafeBrowsingStoreFile::ReadChunksToSet(FILE* fp, std::set<int32>* chunks, + int count) { + DCHECK(fp); + + for (int i = 0; i < count; ++i) { + int32 chunk_id; + if (!ReadInt32(fp, &chunk_id)) + return false; + chunks->insert(chunk_id); + } + return true; +} + +bool SafeBrowsingStoreFile::WriteChunksFromSet(const std::set<int32>& chunks) { + DCHECK(new_file_.get()); + + for (std::set<int32>::const_iterator iter = chunks.begin(); + iter != chunks.end(); ++iter) { + if (!WriteInt32(new_file_.get(), *iter)) + return false; + } + return true; +} + +bool SafeBrowsingStoreFile::ReadAddPrefixes( + FILE* fp, std::vector<SBAddPrefix>* add_prefixes, int count) { + DCHECK(fp && add_prefixes); + + add_prefixes->reserve(add_prefixes->size() + count); + + for (int32 i = 0; i < count; ++i) { + int32 chunk_id; + SBPrefix prefix; + DCHECK_EQ(sizeof(int32), sizeof(prefix)); + + if (!ReadInt32(fp, &chunk_id) || !ReadInt32(fp, &prefix)) + return false; + + if (add_del_cache_.count(chunk_id) > 0) + continue; + + add_prefixes->push_back(SBAddPrefix(chunk_id, prefix)); + } + + return true; +} + +bool SafeBrowsingStoreFile::WriteAddPrefixes( + const std::vector<SBAddPrefix>& add_prefixes) { + DCHECK(new_file_.get()); + + for (std::vector<SBAddPrefix>::const_iterator iter = add_prefixes.begin(); + iter != add_prefixes.end(); ++iter) { + DCHECK_EQ(sizeof(int32), sizeof(iter->prefix)); + if (!WriteInt32(new_file_.get(), iter->chunk_id) || + !WriteInt32(new_file_.get(), iter->prefix)) + return false; + } + return true; +} + +bool SafeBrowsingStoreFile::ReadSubPrefixes( + FILE* fp, std::vector<SBSubPrefix>* sub_prefixes, int count) { + DCHECK(fp && sub_prefixes); + + sub_prefixes->reserve(sub_prefixes->size() + count); + + for (int32 i = 0; i < count; ++i) { + int32 chunk_id, add_chunk_id; + SBPrefix add_prefix; + DCHECK_EQ(sizeof(int32), sizeof(add_prefix)); + + if (!ReadInt32(fp, &chunk_id) || + !ReadInt32(fp, &add_chunk_id) || !ReadInt32(fp, &add_prefix)) + return false; + + if (sub_del_cache_.count(chunk_id) > 0) + continue; + + sub_prefixes->push_back(SBSubPrefix(chunk_id, add_chunk_id, add_prefix)); + } + + return true; +} + +bool SafeBrowsingStoreFile::WriteSubPrefixes( + std::vector<SBSubPrefix>& sub_prefixes) { + DCHECK(new_file_.get()); + + for (std::vector<SBSubPrefix>::const_iterator iter = sub_prefixes.begin(); + iter != sub_prefixes.end(); ++iter) { + if (!WriteInt32(new_file_.get(), iter->chunk_id) || + !WriteInt32(new_file_.get(), iter->add_prefix.chunk_id) || + !WriteInt32(new_file_.get(), iter->add_prefix.prefix)) + return false; + } + return true; +} + +bool SafeBrowsingStoreFile::ReadAddHashes( + FILE* fp, std::vector<SBAddFullHash>* add_hashes, int count) { + DCHECK(fp && add_hashes); + + add_hashes->reserve(add_hashes->size() + count); + + for (int i = 0; i < count; ++i) { + int32 chunk_id; + SBPrefix prefix; + base::Time received; + SBFullHash full_hash; + DCHECK_EQ(sizeof(int32), sizeof(prefix)); + + if (!ReadInt32(fp, &chunk_id) || + !ReadInt32(fp, &prefix) || + !ReadTime(fp, &received) || + !ReadHash(fp, &full_hash)) + return false; + + if (add_del_cache_.count(chunk_id) > 0) + continue; + + add_hashes->push_back(SBAddFullHash(chunk_id, prefix, received, full_hash)); + } + + return true; +} + +bool SafeBrowsingStoreFile::WriteAddHashes( + const std::vector<SBAddFullHash>& add_hashes) { + DCHECK(new_file_.get()); + + for (std::vector<SBAddFullHash>::const_iterator iter = add_hashes.begin(); + iter != add_hashes.end(); ++iter) { + if (!WriteInt32(new_file_.get(), iter->add_prefix.chunk_id) || + !WriteInt32(new_file_.get(), iter->add_prefix.prefix) || + !WriteTime(new_file_.get(), iter->received) || + !WriteHash(new_file_.get(), iter->full_hash)) + return false; + } + return true; +} + +bool SafeBrowsingStoreFile::ReadSubHashes( + FILE* fp, std::vector<SBSubFullHash>* sub_hashes, int count) { + DCHECK(fp); + + sub_hashes->reserve(sub_hashes->size() + count); + + for (int i = 0; i < count; ++i) { + int32 chunk_id; + int32 add_chunk_id; + SBPrefix add_prefix; + SBFullHash add_full_hash; + DCHECK_EQ(sizeof(int32), sizeof(add_prefix)); + + if (!ReadInt32(fp, &chunk_id) || + !ReadInt32(fp, &add_chunk_id) || + !ReadInt32(fp, &add_prefix) || + !ReadHash(fp, &add_full_hash)) + return false; + + if (sub_del_cache_.count(chunk_id) > 0) + continue; + + sub_hashes->push_back( + SBSubFullHash(chunk_id, add_chunk_id, add_prefix, add_full_hash)); + } + + return true; +} + +bool SafeBrowsingStoreFile::WriteSubHashes( + std::vector<SBSubFullHash>& sub_hashes) { + DCHECK(new_file_.get()); + + for (std::vector<SBSubFullHash>::const_iterator iter = sub_hashes.begin(); + iter != sub_hashes.end(); ++iter) { + if (!WriteInt32(new_file_.get(), iter->chunk_id) || + !WriteInt32(new_file_.get(), iter->add_prefix.chunk_id) || + !WriteInt32(new_file_.get(), iter->add_prefix.prefix) || + !WriteHash(new_file_.get(), iter->full_hash)) + return false; + } + return true; +} + +bool SafeBrowsingStoreFile::BeginUpdate() { + DCHECK(!file_.get() && !new_file_.get()); + + // Structures should all be clear unless something bad happened. + DCHECK(add_chunks_cache_.empty()); + DCHECK(sub_chunks_cache_.empty()); + DCHECK(add_del_cache_.empty()); + DCHECK(sub_del_cache_.empty()); + DCHECK(add_prefixes_.empty()); + DCHECK(sub_prefixes_.empty()); + DCHECK(add_hashes_.empty()); + DCHECK(sub_hashes_.empty()); + DCHECK_EQ(chunks_written_, 0); + + const FilePath new_filename = TemporaryFileForFilename(filename_); + file_util::ScopedFILE new_file(file_util::OpenFile(new_filename, "wb+")); + if (new_file.get() == NULL) + return false; + + file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb")); + empty_ = (file.get() == NULL); + if (empty_) { + // If the file exists but cannot be opened, try to delete it (not + // deleting directly, the bloom filter needs to be deleted, too). + if (file_util::PathExists(filename_)) + return OnCorruptDatabase(); + + new_file_.swap(new_file); + return true; + } + + int32 magic, version; + if (!ReadInt32(file.get(), &magic) || !ReadInt32(file.get(), &version)) + return OnCorruptDatabase(); + + if (magic != kFileMagic || version != kFileVersion) + return OnCorruptDatabase(); + + int32 add_chunk_count, sub_chunk_count; + if (!ReadInt32(file.get(), &add_chunk_count) || + !ReadInt32(file.get(), &sub_chunk_count)) + return OnCorruptDatabase(); + + if (!FileSeek(file.get(), kFileHeaderSize)) + return OnCorruptDatabase(); + + if (!ReadChunksToSet(file.get(), &add_chunks_cache_, add_chunk_count) || + !ReadChunksToSet(file.get(), &sub_chunks_cache_, sub_chunk_count)) + return OnCorruptDatabase(); + + file_.swap(file); + new_file_.swap(new_file); + return true; +} + +bool SafeBrowsingStoreFile::FinishChunk() { + if (!add_prefixes_.size() && !sub_prefixes_.size() && + !add_hashes_.size() && !sub_hashes_.size()) + return true; + + if (!WriteInt32(new_file_.get(), add_prefixes_.size()) || + !WriteInt32(new_file_.get(), sub_prefixes_.size()) || + !WriteInt32(new_file_.get(), add_hashes_.size()) || + !WriteInt32(new_file_.get(), sub_hashes_.size())) + return false; + + if (!WriteAddPrefixes(add_prefixes_) || + !WriteSubPrefixes(sub_prefixes_) || + !WriteAddHashes(add_hashes_) || + !WriteSubHashes(sub_hashes_)) + return false; + + ++chunks_written_; + + // Clear everything to save memory. + return ClearChunkBuffers(); +} + +bool SafeBrowsingStoreFile::DoUpdate( + const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result) { + DCHECK(file_.get() || empty_); + DCHECK(new_file_.get()); + + std::vector<SBAddPrefix> add_prefixes; + std::vector<SBSubPrefix> sub_prefixes; + std::vector<SBAddFullHash> add_full_hashes; + std::vector<SBSubFullHash> sub_full_hashes; + + // Read |file_| into the vectors. + if (!empty_) { + DCHECK(file_.get()); + + int32 magic, version; + int32 add_chunk_count, sub_chunk_count; + int32 add_prefix_count, sub_prefix_count; + int32 add_hash_count, sub_hash_count; + + if (!FileSeek(file_.get(), 0)) + return OnCorruptDatabase(); + + if (!ReadInt32(file_.get(), &magic) || + !ReadInt32(file_.get(), &version) || + !ReadInt32(file_.get(), &add_chunk_count) || + !ReadInt32(file_.get(), &sub_chunk_count) || + !ReadInt32(file_.get(), &add_prefix_count) || + !ReadInt32(file_.get(), &sub_prefix_count) || + !ReadInt32(file_.get(), &add_hash_count) || + !ReadInt32(file_.get(), &sub_hash_count)) + return OnCorruptDatabase(); + + if (magic != kFileMagic || version != kFileVersion) + return OnCorruptDatabase(); + + const size_t prefixes_offset = kFileHeaderSize + + (add_chunk_count + sub_chunk_count) * sizeof(int32); + if (!FileSeek(file_.get(), prefixes_offset)) + return OnCorruptDatabase(); + + if (!ReadAddPrefixes(file_.get(), &add_prefixes, add_prefix_count) || + !ReadSubPrefixes(file_.get(), &sub_prefixes, sub_prefix_count) || + !ReadAddHashes(file_.get(), &add_full_hashes, add_hash_count) || + !ReadSubHashes(file_.get(), &sub_full_hashes, sub_hash_count)) + return OnCorruptDatabase(); + + // Close the file so we can later rename over it. + file_.reset(); + } + DCHECK(!file_.get()); + + // Rewind the temporary storage. + if (!FileSeek(new_file_.get(), 0)) + return false; + + // Append the accumulated chunks onto the vectors from file_. + for (int i = 0; i < chunks_written_; ++i) { + int32 add_prefix_count, sub_prefix_count; + int32 add_hash_count, sub_hash_count; + + if (!ReadInt32(new_file_.get(), &add_prefix_count) || + !ReadInt32(new_file_.get(), &sub_prefix_count) || + !ReadInt32(new_file_.get(), &add_hash_count) || + !ReadInt32(new_file_.get(), &sub_hash_count)) + return false; + + // TODO(shess): If the vectors were kept sorted, then this code + // could use std::inplace_merge() to merge everything together in + // sorted order. That might still be slower than just sorting at + // the end if there were a large number of chunks. In that case + // some sort of recursive binary merge might be in order (merge + // chunks pairwise, merge those chunks pairwise, and so on, then + // merge the result with the main list). + if (!ReadAddPrefixes(new_file_.get(), &add_prefixes, add_prefix_count) || + !ReadSubPrefixes(new_file_.get(), &sub_prefixes, sub_prefix_count) || + !ReadAddHashes(new_file_.get(), &add_full_hashes, add_hash_count) || + !ReadSubHashes(new_file_.get(), &sub_full_hashes, sub_hash_count)) + return false; + } + + // Add the pending adds which haven't since been deleted. + for (std::vector<SBAddFullHash>::const_iterator iter = pending_adds.begin(); + iter != pending_adds.end(); ++iter) { + if (add_del_cache_.count(iter->add_prefix.chunk_id) == 0) + add_full_hashes.push_back(*iter); + } + + // Knock the subs from the adds. + SBProcessSubs(&add_prefixes, &sub_prefixes, + &add_full_hashes, &sub_full_hashes); + + // We no longer need to track deleted chunks. + DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_); + DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_); + + // Write the new data to new_file_. + // TODO(shess): If we receive a lot of subs relative to adds, + // overwriting the temporary chunk data in new_file_ with the + // permanent data could leave additional data at the end. Won't + // cause any problems, but does waste space. There is no truncate() + // for stdio. Could use ftruncate() or re-open the file. Or maybe + // ignore it, since we'll likely rewrite soon enough. + if (!FileSeek(new_file_.get(), 0)) + return false; + + if (!WriteInt32(new_file_.get(), kFileMagic) || + !WriteInt32(new_file_.get(), kFileVersion) || + !WriteInt32(new_file_.get(), add_chunks_cache_.size()) || + !WriteInt32(new_file_.get(), sub_chunks_cache_.size()) || + !WriteInt32(new_file_.get(), add_prefixes.size()) || + !WriteInt32(new_file_.get(), sub_prefixes.size()) || + !WriteInt32(new_file_.get(), add_full_hashes.size()) || + !WriteInt32(new_file_.get(), sub_full_hashes.size())) + return false; + + if (!WriteChunksFromSet(add_chunks_cache_) || + !WriteChunksFromSet(sub_chunks_cache_) || + !WriteAddPrefixes(add_prefixes) || + !WriteSubPrefixes(sub_prefixes) || + !WriteAddHashes(add_full_hashes) || + !WriteSubHashes(sub_full_hashes)) + return false; + + // Close the file handle and swizzle the file into place. + new_file_.reset(); + const FilePath new_filename = TemporaryFileForFilename(filename_); + if (!file_util::Delete(filename_, false) || + !file_util::Move(new_filename, filename_)) + return false; + + // Pass the resulting data off to the caller. + add_prefixes_result->swap(add_prefixes); + add_full_hashes_result->swap(add_full_hashes); + + return true; +} + +bool SafeBrowsingStoreFile::FinishUpdate( + const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result) { + bool ret = DoUpdate(pending_adds, + add_prefixes_result, add_full_hashes_result); + + if (!ret) { + CancelUpdate(); + return false; + } + + DCHECK(!new_file_.get()); + DCHECK(!file_.get()); + + return Close(); +} + +bool SafeBrowsingStoreFile::CancelUpdate() { + return Close(); +} |