diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-20 06:29:28 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-20 06:29:28 +0000 |
commit | 5332aa894ad01d22aeb01107db6d82ccee648604 (patch) | |
tree | 2c5186162c3a0cff848751dad8f0dba472ef5b21 /chrome/browser | |
parent | 86fdd8723d9f3e185eb946781ed160d4ec122fff (diff) | |
download | chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.zip chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.tar.gz chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.tar.bz2 |
SafeBrowsingStore storage abstraction for SafeBrowsing database.
First bit of refactoring safe-browsing to use a flat file format.
SafeBrowsingStore implements just what is needed for
SafeBrowsingDatabase using straight-forward read/modify/write code.
There will be a follow-on change to layer in on-the-fly format
migration and integrate with SafeBrowsingDatabase. This CL only adds
new classes and tests for same.
BUG=none
TEST=none
Review URL: http://codereview.chromium.org/545053
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@36615 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser')
11 files changed, 2795 insertions, 0 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_store.cc b/chrome/browser/safe_browsing/safe_browsing_store.cc new file mode 100644 index 0000000..21fd37f --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store.cc @@ -0,0 +1,152 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/safe_browsing_store.h" + +namespace { + +// Find items matching between |subs| and |adds|, and remove them, +// recording the item from |adds| in |adds_removed|. To minimize +// copies, the inputs are processing in parallel, so |subs| and |adds| +// should be compatibly ordered (either by SBAddPrefixLess or +// SBAddPrefixHashLess). +// +// |predAS| provides add < sub, |predSA| provides sub < add, for the +// tightest compare appropriate (see calls in SBProcessSubs). +template <class S, class A, typename PredAS, typename PredSA> +void KnockoutSubs(std::vector<S>* subs, + std::vector<A>* adds, + PredAS predAS, PredSA predSA, + std::vector<A>* adds_removed) { + // Keep a pair of output iterators for writing kept items. Due to + // deletions, these may lag the main iterators. Using erase() on + // individual items would result in O(N^2) copies. Using std::list + // would work around that, at double or triple the memory cost. + typename std::vector<A>::iterator add_out = adds->begin(); + typename std::vector<S>::iterator sub_out = subs->begin(); + + // Current location in vectors. + // TODO(shess): I want these to be const_iterator, but then + // std::copy() gets confused. Could snag a const_iterator add_end, + // or write an inline std::copy(), but it seems like I'm doing + // something wrong. + typename std::vector<A>::iterator add_iter = adds->begin(); + typename std::vector<S>::iterator sub_iter = subs->begin(); + + while (add_iter != adds->end() && sub_iter != subs->end()) { + // If |*sub_iter| < |*add_iter|, retain the sub. + if (predSA(*sub_iter, *add_iter)) { + *sub_out = *sub_iter; + ++sub_out; + ++sub_iter; + + // If |*add_iter| < |*sub_iter|, retain the add. + } else if (predAS(*add_iter, *sub_iter)) { + *add_out = *add_iter; + ++add_out; + ++add_iter; + + // Record equal items and drop them. + } else { + adds_removed->push_back(*add_iter); + ++add_iter; + ++sub_iter; + } + } + + // Erase any leftover gap. + adds->erase(add_out, add_iter); + subs->erase(sub_out, sub_iter); +} + +// Remove items in |removes| from |full_hashes|. |full_hashes| and +// |removes| should be ordered by SBAddPrefix component. +template <class T> +void RemoveMatchingPrefixes(const std::vector<SBAddPrefix>& removes, + std::vector<T>* full_hashes) { + // This is basically an inline of std::set_difference(). + // Unfortunately, that algorithm requires that the two iterator + // pairs use the same value types. + + // Where to store kept items. + typename std::vector<T>::iterator out = full_hashes->begin(); + + typename std::vector<T>::iterator hash_iter = full_hashes->begin(); + std::vector<SBAddPrefix>::const_iterator remove_iter = removes.begin(); + + while (hash_iter != full_hashes->end() && remove_iter != removes.end()) { + // Keep items less than |*remove_iter|. + if (SBAddPrefixLess(*hash_iter, *remove_iter)) { + *out = *hash_iter; + ++out; + ++hash_iter; + + // No hit for |*remove_iter|, bump it forward. + } else if (SBAddPrefixLess(*remove_iter, *hash_iter)) { + ++remove_iter; + + // Drop equal items, there may be multiple hits. + } else { + do { + ++hash_iter; + } while (hash_iter != full_hashes->end() && + !SBAddPrefixLess(*remove_iter, *hash_iter)); + ++remove_iter; + } + } + + // Erase any leftover gap. + full_hashes->erase(out, hash_iter); +} + +} // namespace + +void SBProcessSubs(std::vector<SBAddPrefix>* add_prefixes, + std::vector<SBSubPrefix>* sub_prefixes, + std::vector<SBAddFullHash>* add_full_hashes, + std::vector<SBSubFullHash>* sub_full_hashes) { + // It is possible to structure templates and template + // specializations such that the following calls work without having + // to qualify things. It becomes very arbitrary, though, and less + // clear how things are working. + + // Sort the inputs by the SBAddPrefix bits. + std::sort(add_prefixes->begin(), add_prefixes->end(), + SBAddPrefixLess<SBAddPrefix,SBAddPrefix>); + std::sort(sub_prefixes->begin(), sub_prefixes->end(), + SBAddPrefixLess<SBSubPrefix,SBSubPrefix>); + std::sort(add_full_hashes->begin(), add_full_hashes->end(), + SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>); + std::sort(sub_full_hashes->begin(), sub_full_hashes->end(), + SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>); + + // Factor out the prefix subs. + std::vector<SBAddPrefix> removed_adds; + KnockoutSubs(sub_prefixes, add_prefixes, + SBAddPrefixLess<SBAddPrefix,SBSubPrefix>, + SBAddPrefixLess<SBSubPrefix,SBAddPrefix>, + &removed_adds); + + // Remove the full-hashes corrosponding to the adds which + // KnockoutSubs() removed. Processing these w/in KnockoutSubs() + // would make the code more complicated, and they are very small + // relative to the prefix lists so the gain would be modest. + RemoveMatchingPrefixes(removed_adds, add_full_hashes); + RemoveMatchingPrefixes(removed_adds, sub_full_hashes); + + // TODO(shess): AFAICT this pass is not done on the trunk. I + // believe that's a bug, but it may not matter because full-hash + // subs almost never happen (I think you'd need multiple collisions + // where one of the sites stopped being flagged?). Enable this once + // everything is in. [if(0) instead of #ifdef 0 to make sure it + // compiles.] + if (0) { + // Factor out the full-hash subs. + std::vector<SBAddFullHash> removed_full_adds; + KnockoutSubs(sub_full_hashes, add_full_hashes, + SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>, + SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>, + &removed_full_adds); + } +} diff --git a/chrome/browser/safe_browsing/safe_browsing_store.h b/chrome/browser/safe_browsing/safe_browsing_store.h new file mode 100644 index 0000000..0d13e88 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store.h @@ -0,0 +1,211 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ +#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ + +#include <set> +#include <vector> + +#include "base/basictypes.h" +#include "base/file_path.h" +#include "base/task.h" +#include "base/time.h" +#include "chrome/browser/safe_browsing/safe_browsing_util.h" + +// SafeBrowsingStore provides a storage abstraction for the +// safe-browsing data used to build the bloom filter. The items +// stored are: +// The set of add and sub chunks seen. +// List of SBAddPrefix (chunk_id and SBPrefix). +// List of SBSubPrefix (chunk_id and the target SBAddPrefix). +// List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash). +// List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash). +// +// The store is geared towards updating the data, not runtime access +// to the data (that is handled by SafeBrowsingDatabase). Updates are +// handled similar to a SQL transaction cycle, with the new data being +// returned from FinishUpdate() (the COMMIT). Data is not persistent +// until FinishUpdate() returns successfully. +// +// FinishUpdate() also handles dropping items who's chunk has been +// deleted, and netting out the add/sub lists (when a sub matches an +// add, both are dropped). + +// GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so +// that these items can be generically compared with each other by +// SBAddPrefixLess() and SBAddPrefixHashLess(). + +struct SBAddPrefix { + int32 chunk_id; + SBPrefix prefix; + + SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {} + + int32 GetAddChunkId() const { return chunk_id; } + SBPrefix GetAddPrefix() const { return prefix; } +}; + +struct SBSubPrefix { + int32 chunk_id; + SBAddPrefix add_prefix; + + SBSubPrefix(int32 id, int32 add_id, int prefix) + : chunk_id(id), add_prefix(add_id, prefix) {} + + int32 GetAddChunkId() const { return add_prefix.chunk_id; } + SBPrefix GetAddPrefix() const { return add_prefix.prefix; } +}; + +// TODO(shess): The full_hash includes the prefix, so the prefix could +// be dropped. But SBAddPrefix is convenient for comparing across +// different structs, and there aren't many full hashes. Hmm. +struct SBAddFullHash { + SBAddPrefix add_prefix; + base::Time received; + SBFullHash full_hash; + + SBAddFullHash(int32 id, SBPrefix p, base::Time r, SBFullHash h) + : add_prefix(id, p), received(r), full_hash(h) {} + + int32 GetAddChunkId() const { return add_prefix.chunk_id; } + SBPrefix GetAddPrefix() const { return add_prefix.prefix; } +}; + +struct SBSubFullHash { + int32 chunk_id; + SBAddPrefix add_prefix; + SBFullHash full_hash; + + SBSubFullHash(int32 id, int32 add_id, SBPrefix p, SBFullHash h) + : chunk_id(id), add_prefix(add_id, p), full_hash(h) {} + + int32 GetAddChunkId() const { return add_prefix.chunk_id; } + SBPrefix GetAddPrefix() const { return add_prefix.prefix; } +}; + +// Determine less-than based on add chunk and prefix. +template <class T, class U> +bool SBAddPrefixLess(const T& a, const U& b) { + if (a.GetAddChunkId() != b.GetAddChunkId()) + return a.GetAddChunkId() < b.GetAddChunkId(); + + return a.GetAddPrefix() < b.GetAddPrefix(); +} + +// Determine less-than based on add chunk, prefix, and full hash. +// Prefix can compare differently than hash due to byte ordering, +// so it must take precedence. +template <class T, class U> +bool SBAddPrefixHashLess(const T& a, const U& b) { + if (SBAddPrefixLess(a, b)) + return true; + + if (SBAddPrefixLess(b, a)) + return false; + + return memcmp(a.full_hash.full_hash, b.full_hash.full_hash, + sizeof(a.full_hash.full_hash)) < 0; +} + +// Process the lists for subs which knock out adds. For any item in +// |sub_prefixes| which has a match in |add_prefixes|, knock out the +// matched items from all vectors. +// +// TODO(shess): Since the prefixes are uniformly-distributed hashes, +// there aren't many ways to organize the inputs for efficient +// processing. For this reason, the vectors are sorted and processed +// in parallel. At this time this code does the sorting internally, +// but it might make sense to make sorting an API requirement so that +// the storage can optimize for it. +// +// TODO(shess): The original code did not process |sub_full_hashes| +// for matches in |add_full_hashes|, so this code doesn't, either. I +// think this is probably a bug. +void SBProcessSubs(std::vector<SBAddPrefix>* add_prefixes, + std::vector<SBSubPrefix>* sub_prefixes, + std::vector<SBAddFullHash>* add_full_hashes, + std::vector<SBSubFullHash>* sub_full_hashes); + +// TODO(shess): This uses int32 rather than int because it's writing +// specifically-sized items to files. SBPrefix should likewise be +// explicitly sized. + +// Abstract interface for storing data. +class SafeBrowsingStore { + public: + SafeBrowsingStore() {} + virtual ~SafeBrowsingStore() {} + + // Sets up the information for later use, but does not necessarily + // check whether the underlying file exists, or is valid. If + // |curruption_callback| is non-NULL it will be called if corruption + // is detected, which could happen as part of any call other than + // Delete(). The appropriate action is to use Delete() to clear the + // store. + virtual void Init(const FilePath& filename, + Callback0::Type* corruption_callback) = 0; + + // Deletes the files which back the store, returning true if + // successful. + virtual bool Delete() = 0; + + // Start an update. None of the following methods should be called + // unless this returns true. If this returns true, the update + // should be terminated by FinishUpdate() or CancelUpdate(). + virtual bool BeginUpdate() = 0; + + // Start a chunk of data. None of the methods through FinishChunk() + // should be called unless this returns true. + // TODO(shess): Would it make sense for this to accept |chunk_id|? + // Possibly not, because of possible confusion between sub_chunk_id + // and add_chunk_id. + virtual bool BeginChunk() = 0; + + virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0; + virtual bool WriteAddHash(int32 chunk_id, SBPrefix prefix, + base::Time receive_time, SBFullHash full_hash) = 0; + virtual bool WriteSubPrefix(int32 chunk_id, + int32 add_chunk_id, SBPrefix prefix) = 0; + virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id, + SBPrefix prefix, SBFullHash full_hash) = 0; + + // Collect the chunk data and preferrably store it on disk to + // release memory. Shoul not modify the data in-place. + virtual bool FinishChunk() = 0; + + // Track the chunks which have been seen. + virtual void SetAddChunk(int32 chunk_id) = 0; + virtual bool CheckAddChunk(int32 chunk_id) = 0; + virtual void GetAddChunks(std::vector<int32>* out) = 0; + virtual void SetSubChunk(int32 chunk_id) = 0; + virtual bool CheckSubChunk(int32 chunk_id) = 0; + virtual void GetSubChunks(std::vector<int32>* out) = 0; + + // Delete the indicated chunk_id. The chunk will continue to be + // visible until the end of the transaction. + virtual void DeleteAddChunk(int32 chunk_id) = 0; + virtual void DeleteSubChunk(int32 chunk_id) = 0; + + // Pass the collected chunks through SBPRocessSubs() and commit to + // permanent storage. The resulting add prefixes and hashes will be + // stored in |add_prefixes_result| and |add_full_hashes_result|. + // |pending_adds| is the set of full hashes which have been received + // since the previous update, and is provided as a convenience + // (could be written via WriteAddHash(), but that would flush the + // chunk to disk). + virtual bool FinishUpdate( + const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result) = 0; + + // Cancel the update in process and remove any temporary disk + // storage, leaving the original data unmodified. + virtual bool CancelUpdate() = 0; + + private: + DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore); +}; + +#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.cc b/chrome/browser/safe_browsing/safe_browsing_store_file.cc new file mode 100644 index 0000000..9fd1bd7 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store_file.cc @@ -0,0 +1,553 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/safe_browsing_store_file.h" + +namespace { + +// NOTE(shess): kFileMagic should not be a byte-wise palindrome, so +// that byte-order changes force corruption. +const int32 kFileMagic = 0x600D71FE; +const int32 kFileVersion = 7; // SQLite storage was 6... +const size_t kFileHeaderSize = 8 * sizeof(int32); + +bool ReadInt32(FILE* fp, int32* value) { + DCHECK(value); + const size_t ret = fread(value, sizeof(*value), 1, fp); + return ret == 1; +} + +bool WriteInt32(FILE* fp, int32 value) { + const size_t ret = fwrite(&value, sizeof(value), 1, fp); + return ret == 1; +} + +bool ReadTime(FILE* fp, base::Time* value) { + DCHECK(value); + + int64 time_t; + const size_t ret = fread(&time_t, sizeof(time_t), 1, fp); + if (ret != 1) + return false; + *value = base::Time::FromTimeT(time_t); + return true; +} + +bool WriteTime(FILE* fp, const base::Time& value) { + const int64 time_t = value.ToTimeT(); + const size_t ret = fwrite(&time_t, sizeof(time_t), 1, fp); + return ret == 1; +} + +bool ReadHash(FILE* fp, SBFullHash* value) { + DCHECK(value); + const size_t ret = fread(&value->full_hash, sizeof(value->full_hash), + 1, fp); + return ret == 1; +} + +bool WriteHash(FILE* fp, SBFullHash value) { + const size_t ret = fwrite(&value.full_hash, sizeof(value.full_hash), + 1, fp); + return ret == 1; +} + +bool FileSeek(FILE* fp, size_t offset) { + int rv = fseek(fp, offset, SEEK_SET); + DCHECK_EQ(rv, 0); + return rv == 0; +} + +// Delete the chunks in |deleted| from |chunks|. +void DeleteChunksFromSet(const base::hash_set<int32>& deleted, + std::set<int32>* chunks) { + for (std::set<int32>::iterator iter = chunks->begin(); + iter != chunks->end();) { + std::set<int32>::iterator prev = iter++; + if (deleted.count(*prev) > 0) + chunks->erase(prev); + } +} + +} // namespace + +SafeBrowsingStoreFile::SafeBrowsingStoreFile() + : chunks_written_(0), + file_(NULL) { +} +SafeBrowsingStoreFile::~SafeBrowsingStoreFile() { + Close(); +} + +bool SafeBrowsingStoreFile::Delete() { + // The database should not be open at this point. But, just in + // case, close everything before deleting. + if (!Close()) { + NOTREACHED(); + return false; + } + + if (!file_util::Delete(filename_, false) && + file_util::PathExists(filename_)) { + NOTREACHED(); + return false; + } + + const FilePath new_filename = TemporaryFileForFilename(filename_); + if (!file_util::Delete(new_filename, false) && + file_util::PathExists(new_filename)) { + NOTREACHED(); + return false; + } + + return true; +} + +void SafeBrowsingStoreFile::Init(const FilePath& filename, + Callback0::Type* corruption_callback) { + filename_ = filename; + corruption_callback_.reset(corruption_callback); +} + +bool SafeBrowsingStoreFile::OnCorruptDatabase() { + if (corruption_callback_.get()) + corruption_callback_->Run(); + + // Return false as a convenience to callers. + return false; +} + +bool SafeBrowsingStoreFile::Close() { + ClearUpdateBuffers(); + + // Make sure the files are closed. + file_.reset(); + new_file_.reset(); + return true; +} + +bool SafeBrowsingStoreFile::ReadChunksToSet(FILE* fp, std::set<int32>* chunks, + int count) { + DCHECK(fp); + + for (int i = 0; i < count; ++i) { + int32 chunk_id; + if (!ReadInt32(fp, &chunk_id)) + return false; + chunks->insert(chunk_id); + } + return true; +} + +bool SafeBrowsingStoreFile::WriteChunksFromSet(const std::set<int32>& chunks) { + DCHECK(new_file_.get()); + + for (std::set<int32>::const_iterator iter = chunks.begin(); + iter != chunks.end(); ++iter) { + if (!WriteInt32(new_file_.get(), *iter)) + return false; + } + return true; +} + +bool SafeBrowsingStoreFile::ReadAddPrefixes( + FILE* fp, std::vector<SBAddPrefix>* add_prefixes, int count) { + DCHECK(fp && add_prefixes); + + add_prefixes->reserve(add_prefixes->size() + count); + + for (int32 i = 0; i < count; ++i) { + int32 chunk_id; + SBPrefix prefix; + DCHECK_EQ(sizeof(int32), sizeof(prefix)); + + if (!ReadInt32(fp, &chunk_id) || !ReadInt32(fp, &prefix)) + return false; + + if (add_del_cache_.count(chunk_id) > 0) + continue; + + add_prefixes->push_back(SBAddPrefix(chunk_id, prefix)); + } + + return true; +} + +bool SafeBrowsingStoreFile::WriteAddPrefixes( + const std::vector<SBAddPrefix>& add_prefixes) { + DCHECK(new_file_.get()); + + for (std::vector<SBAddPrefix>::const_iterator iter = add_prefixes.begin(); + iter != add_prefixes.end(); ++iter) { + DCHECK_EQ(sizeof(int32), sizeof(iter->prefix)); + if (!WriteInt32(new_file_.get(), iter->chunk_id) || + !WriteInt32(new_file_.get(), iter->prefix)) + return false; + } + return true; +} + +bool SafeBrowsingStoreFile::ReadSubPrefixes( + FILE* fp, std::vector<SBSubPrefix>* sub_prefixes, int count) { + DCHECK(fp && sub_prefixes); + + sub_prefixes->reserve(sub_prefixes->size() + count); + + for (int32 i = 0; i < count; ++i) { + int32 chunk_id, add_chunk_id; + SBPrefix add_prefix; + DCHECK_EQ(sizeof(int32), sizeof(add_prefix)); + + if (!ReadInt32(fp, &chunk_id) || + !ReadInt32(fp, &add_chunk_id) || !ReadInt32(fp, &add_prefix)) + return false; + + if (sub_del_cache_.count(chunk_id) > 0) + continue; + + sub_prefixes->push_back(SBSubPrefix(chunk_id, add_chunk_id, add_prefix)); + } + + return true; +} + +bool SafeBrowsingStoreFile::WriteSubPrefixes( + std::vector<SBSubPrefix>& sub_prefixes) { + DCHECK(new_file_.get()); + + for (std::vector<SBSubPrefix>::const_iterator iter = sub_prefixes.begin(); + iter != sub_prefixes.end(); ++iter) { + if (!WriteInt32(new_file_.get(), iter->chunk_id) || + !WriteInt32(new_file_.get(), iter->add_prefix.chunk_id) || + !WriteInt32(new_file_.get(), iter->add_prefix.prefix)) + return false; + } + return true; +} + +bool SafeBrowsingStoreFile::ReadAddHashes( + FILE* fp, std::vector<SBAddFullHash>* add_hashes, int count) { + DCHECK(fp && add_hashes); + + add_hashes->reserve(add_hashes->size() + count); + + for (int i = 0; i < count; ++i) { + int32 chunk_id; + SBPrefix prefix; + base::Time received; + SBFullHash full_hash; + DCHECK_EQ(sizeof(int32), sizeof(prefix)); + + if (!ReadInt32(fp, &chunk_id) || + !ReadInt32(fp, &prefix) || + !ReadTime(fp, &received) || + !ReadHash(fp, &full_hash)) + return false; + + if (add_del_cache_.count(chunk_id) > 0) + continue; + + add_hashes->push_back(SBAddFullHash(chunk_id, prefix, received, full_hash)); + } + + return true; +} + +bool SafeBrowsingStoreFile::WriteAddHashes( + const std::vector<SBAddFullHash>& add_hashes) { + DCHECK(new_file_.get()); + + for (std::vector<SBAddFullHash>::const_iterator iter = add_hashes.begin(); + iter != add_hashes.end(); ++iter) { + if (!WriteInt32(new_file_.get(), iter->add_prefix.chunk_id) || + !WriteInt32(new_file_.get(), iter->add_prefix.prefix) || + !WriteTime(new_file_.get(), iter->received) || + !WriteHash(new_file_.get(), iter->full_hash)) + return false; + } + return true; +} + +bool SafeBrowsingStoreFile::ReadSubHashes( + FILE* fp, std::vector<SBSubFullHash>* sub_hashes, int count) { + DCHECK(fp); + + sub_hashes->reserve(sub_hashes->size() + count); + + for (int i = 0; i < count; ++i) { + int32 chunk_id; + int32 add_chunk_id; + SBPrefix add_prefix; + SBFullHash add_full_hash; + DCHECK_EQ(sizeof(int32), sizeof(add_prefix)); + + if (!ReadInt32(fp, &chunk_id) || + !ReadInt32(fp, &add_chunk_id) || + !ReadInt32(fp, &add_prefix) || + !ReadHash(fp, &add_full_hash)) + return false; + + if (sub_del_cache_.count(chunk_id) > 0) + continue; + + sub_hashes->push_back( + SBSubFullHash(chunk_id, add_chunk_id, add_prefix, add_full_hash)); + } + + return true; +} + +bool SafeBrowsingStoreFile::WriteSubHashes( + std::vector<SBSubFullHash>& sub_hashes) { + DCHECK(new_file_.get()); + + for (std::vector<SBSubFullHash>::const_iterator iter = sub_hashes.begin(); + iter != sub_hashes.end(); ++iter) { + if (!WriteInt32(new_file_.get(), iter->chunk_id) || + !WriteInt32(new_file_.get(), iter->add_prefix.chunk_id) || + !WriteInt32(new_file_.get(), iter->add_prefix.prefix) || + !WriteHash(new_file_.get(), iter->full_hash)) + return false; + } + return true; +} + +bool SafeBrowsingStoreFile::BeginUpdate() { + DCHECK(!file_.get() && !new_file_.get()); + + // Structures should all be clear unless something bad happened. + DCHECK(add_chunks_cache_.empty()); + DCHECK(sub_chunks_cache_.empty()); + DCHECK(add_del_cache_.empty()); + DCHECK(sub_del_cache_.empty()); + DCHECK(add_prefixes_.empty()); + DCHECK(sub_prefixes_.empty()); + DCHECK(add_hashes_.empty()); + DCHECK(sub_hashes_.empty()); + DCHECK_EQ(chunks_written_, 0); + + const FilePath new_filename = TemporaryFileForFilename(filename_); + file_util::ScopedFILE new_file(file_util::OpenFile(new_filename, "wb+")); + if (new_file.get() == NULL) + return false; + + file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb")); + empty_ = (file.get() == NULL); + if (empty_) { + // If the file exists but cannot be opened, try to delete it (not + // deleting directly, the bloom filter needs to be deleted, too). + if (file_util::PathExists(filename_)) + return OnCorruptDatabase(); + + new_file_.swap(new_file); + return true; + } + + int32 magic, version; + if (!ReadInt32(file.get(), &magic) || !ReadInt32(file.get(), &version)) + return OnCorruptDatabase(); + + if (magic != kFileMagic || version != kFileVersion) + return OnCorruptDatabase(); + + int32 add_chunk_count, sub_chunk_count; + if (!ReadInt32(file.get(), &add_chunk_count) || + !ReadInt32(file.get(), &sub_chunk_count)) + return OnCorruptDatabase(); + + if (!FileSeek(file.get(), kFileHeaderSize)) + return OnCorruptDatabase(); + + if (!ReadChunksToSet(file.get(), &add_chunks_cache_, add_chunk_count) || + !ReadChunksToSet(file.get(), &sub_chunks_cache_, sub_chunk_count)) + return OnCorruptDatabase(); + + file_.swap(file); + new_file_.swap(new_file); + return true; +} + +bool SafeBrowsingStoreFile::FinishChunk() { + if (!add_prefixes_.size() && !sub_prefixes_.size() && + !add_hashes_.size() && !sub_hashes_.size()) + return true; + + if (!WriteInt32(new_file_.get(), add_prefixes_.size()) || + !WriteInt32(new_file_.get(), sub_prefixes_.size()) || + !WriteInt32(new_file_.get(), add_hashes_.size()) || + !WriteInt32(new_file_.get(), sub_hashes_.size())) + return false; + + if (!WriteAddPrefixes(add_prefixes_) || + !WriteSubPrefixes(sub_prefixes_) || + !WriteAddHashes(add_hashes_) || + !WriteSubHashes(sub_hashes_)) + return false; + + ++chunks_written_; + + // Clear everything to save memory. + return ClearChunkBuffers(); +} + +bool SafeBrowsingStoreFile::DoUpdate( + const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result) { + DCHECK(file_.get() || empty_); + DCHECK(new_file_.get()); + + std::vector<SBAddPrefix> add_prefixes; + std::vector<SBSubPrefix> sub_prefixes; + std::vector<SBAddFullHash> add_full_hashes; + std::vector<SBSubFullHash> sub_full_hashes; + + // Read |file_| into the vectors. + if (!empty_) { + DCHECK(file_.get()); + + int32 magic, version; + int32 add_chunk_count, sub_chunk_count; + int32 add_prefix_count, sub_prefix_count; + int32 add_hash_count, sub_hash_count; + + if (!FileSeek(file_.get(), 0)) + return OnCorruptDatabase(); + + if (!ReadInt32(file_.get(), &magic) || + !ReadInt32(file_.get(), &version) || + !ReadInt32(file_.get(), &add_chunk_count) || + !ReadInt32(file_.get(), &sub_chunk_count) || + !ReadInt32(file_.get(), &add_prefix_count) || + !ReadInt32(file_.get(), &sub_prefix_count) || + !ReadInt32(file_.get(), &add_hash_count) || + !ReadInt32(file_.get(), &sub_hash_count)) + return OnCorruptDatabase(); + + if (magic != kFileMagic || version != kFileVersion) + return OnCorruptDatabase(); + + const size_t prefixes_offset = kFileHeaderSize + + (add_chunk_count + sub_chunk_count) * sizeof(int32); + if (!FileSeek(file_.get(), prefixes_offset)) + return OnCorruptDatabase(); + + if (!ReadAddPrefixes(file_.get(), &add_prefixes, add_prefix_count) || + !ReadSubPrefixes(file_.get(), &sub_prefixes, sub_prefix_count) || + !ReadAddHashes(file_.get(), &add_full_hashes, add_hash_count) || + !ReadSubHashes(file_.get(), &sub_full_hashes, sub_hash_count)) + return OnCorruptDatabase(); + + // Close the file so we can later rename over it. + file_.reset(); + } + DCHECK(!file_.get()); + + // Rewind the temporary storage. + if (!FileSeek(new_file_.get(), 0)) + return false; + + // Append the accumulated chunks onto the vectors from file_. + for (int i = 0; i < chunks_written_; ++i) { + int32 add_prefix_count, sub_prefix_count; + int32 add_hash_count, sub_hash_count; + + if (!ReadInt32(new_file_.get(), &add_prefix_count) || + !ReadInt32(new_file_.get(), &sub_prefix_count) || + !ReadInt32(new_file_.get(), &add_hash_count) || + !ReadInt32(new_file_.get(), &sub_hash_count)) + return false; + + // TODO(shess): If the vectors were kept sorted, then this code + // could use std::inplace_merge() to merge everything together in + // sorted order. That might still be slower than just sorting at + // the end if there were a large number of chunks. In that case + // some sort of recursive binary merge might be in order (merge + // chunks pairwise, merge those chunks pairwise, and so on, then + // merge the result with the main list). + if (!ReadAddPrefixes(new_file_.get(), &add_prefixes, add_prefix_count) || + !ReadSubPrefixes(new_file_.get(), &sub_prefixes, sub_prefix_count) || + !ReadAddHashes(new_file_.get(), &add_full_hashes, add_hash_count) || + !ReadSubHashes(new_file_.get(), &sub_full_hashes, sub_hash_count)) + return false; + } + + // Add the pending adds which haven't since been deleted. + for (std::vector<SBAddFullHash>::const_iterator iter = pending_adds.begin(); + iter != pending_adds.end(); ++iter) { + if (add_del_cache_.count(iter->add_prefix.chunk_id) == 0) + add_full_hashes.push_back(*iter); + } + + // Knock the subs from the adds. + SBProcessSubs(&add_prefixes, &sub_prefixes, + &add_full_hashes, &sub_full_hashes); + + // We no longer need to track deleted chunks. + DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_); + DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_); + + // Write the new data to new_file_. + // TODO(shess): If we receive a lot of subs relative to adds, + // overwriting the temporary chunk data in new_file_ with the + // permanent data could leave additional data at the end. Won't + // cause any problems, but does waste space. There is no truncate() + // for stdio. Could use ftruncate() or re-open the file. Or maybe + // ignore it, since we'll likely rewrite soon enough. + if (!FileSeek(new_file_.get(), 0)) + return false; + + if (!WriteInt32(new_file_.get(), kFileMagic) || + !WriteInt32(new_file_.get(), kFileVersion) || + !WriteInt32(new_file_.get(), add_chunks_cache_.size()) || + !WriteInt32(new_file_.get(), sub_chunks_cache_.size()) || + !WriteInt32(new_file_.get(), add_prefixes.size()) || + !WriteInt32(new_file_.get(), sub_prefixes.size()) || + !WriteInt32(new_file_.get(), add_full_hashes.size()) || + !WriteInt32(new_file_.get(), sub_full_hashes.size())) + return false; + + if (!WriteChunksFromSet(add_chunks_cache_) || + !WriteChunksFromSet(sub_chunks_cache_) || + !WriteAddPrefixes(add_prefixes) || + !WriteSubPrefixes(sub_prefixes) || + !WriteAddHashes(add_full_hashes) || + !WriteSubHashes(sub_full_hashes)) + return false; + + // Close the file handle and swizzle the file into place. + new_file_.reset(); + const FilePath new_filename = TemporaryFileForFilename(filename_); + if (!file_util::Delete(filename_, false) || + !file_util::Move(new_filename, filename_)) + return false; + + // Pass the resulting data off to the caller. + add_prefixes_result->swap(add_prefixes); + add_full_hashes_result->swap(add_full_hashes); + + return true; +} + +bool SafeBrowsingStoreFile::FinishUpdate( + const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result) { + bool ret = DoUpdate(pending_adds, + add_prefixes_result, add_full_hashes_result); + + if (!ret) { + CancelUpdate(); + return false; + } + + DCHECK(!new_file_.get()); + DCHECK(!file_.get()); + + return Close(); +} + +bool SafeBrowsingStoreFile::CancelUpdate() { + return Close(); +} diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.h b/chrome/browser/safe_browsing/safe_browsing_store_file.h new file mode 100644 index 0000000..25f6d9c --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store_file.h @@ -0,0 +1,292 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ +#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ + +#include <set> +#include <vector> + +#include "chrome/browser/safe_browsing/safe_browsing_store.h" + +#include "base/file_util.h" + +// Implement SafeBrowsingStore in terms of a flat file. The file +// format is pretty literal: +// +// int32 magic; // magic number "validating" file +// int32 version; // format version +// +// // Counts for the various data which follows the header. +// int32 add_chunk_count; // Chunks seen, including empties. +// int32 sub_chunk_count; // Ditto. +// int32 add_prefix_count; +// int32 sub_prefix_count; +// int32 add_hash_count; +// int32 sub_hash_count; +// +// array[add_chunk_count] { +// int32 chunk_id; +// } +// array[sub_chunk_count] { +// int32 chunk_id; +// } +// array[add_prefix_count] { +// int32 chunk_id; +// int32 prefix; +// } +// array[sub_prefix_count] { +// int32 chunk_id; +// int32 add_chunk_id; +// int32 add_prefix; +// } +// array[add_hash_count] { +// int32 chunk_id; +// // TODO(shess): This duplicates first four bytes of full_hash! +// int32 prefix; +// // From base::Time::ToTimeT(). +// // TODO(shess): an int32 probably has enough resolution. +// int64 received_time; +// char[32] full_hash; +// array[sub_hash_count] { +// int32 chunk_id; +// int32 add_chunk_id; +// int32 add_prefix; +// char[32] add_full_hash; +// } +// TODO(shess): Would a checksum be worthwhile? If so, check at open, +// or at commit? +// +// During the course of an update, uncommitted data is stored in a +// temporary file (which is later re-used to commit). This is an +// array of chunks, with the count kept in memory until the end of the +// transaction. The format of this file is like the main file, with +// the list of chunks seen omitted, as that data is tracked in-memory: +// +// array[] { +// int32 add_prefix_count; +// int32 sub_prefix_count; +// int32 add_hash_count; +// int32 sub_hash_count; +// array[add_prefix_count] { +// int32 chunk_id; +// int32 prefix; +// } +// array[sub_prefix_count] { +// int32 chunk_id; +// int32 add_chunk_id; +// int32 add_prefix; +// } +// array[add_hash_count] { +// int32 chunk_id; +// int32 prefix; +// int64 received_time; +// char[32] full_hash; +// array[sub_hash_count] { +// int32 chunk_id; +// int32 add_chunk_id; +// int32 add_prefix; +// char[32] add_full_hash; +// } +// } +// +// The overall transaction works like this: +// - Open the original file to get the chunks-seen data. +// - Open a temp file for storing new chunk info. +// - Write new chunks to the temp file. +// - When the transaction is finished: +// - Read the rest of the original file's data into buffers. +// - Rewind the temp file and merge the new data into buffers. +// - Process buffers for deletions and apply subs. +// - Rewind and write the buffers out to temp file. +// - Delete original file. +// - Rename temp file to original filename. +// +// TODO(shess): Does there need to be an fsync() before the rename? +// important_file_writer.h seems to think that +// http://valhenson.livejournal.com/37921.html means you don't, but I +// don't think it follows (and, besides, this needs to run on other +// operating systems). +// +// TODO(shess): Using a checksum to validate the file would allow +// correctness without fsync, at the cost of periodically needing to +// regenerate the database from scratch. + +// TODO(shess): Regeneration could be moderated by saving the previous +// file, if valid, as a checkpoint. During update, if the current +// file is found to be invalid, rollback to the checkpoint and run the +// updat forward from there. This would require that the current file +// be validated at BeginUpdate() rather than FinishUpdate(), because +// the chunks-seen data may have changed. [Does this have +// implications for the pending_hashes, which were generated while +// using a newer bloom filter?] + +class SafeBrowsingStoreFile : public SafeBrowsingStore { + public: + SafeBrowsingStoreFile(); + virtual ~SafeBrowsingStoreFile(); + + virtual void Init(const FilePath& filename, + Callback0::Type* corruption_callback); + + // Delete any on-disk files, including the permanent storage. + virtual bool Delete(); + + virtual bool BeginChunk() { + return ClearChunkBuffers(); + } + virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) { + add_prefixes_.push_back(SBAddPrefix(chunk_id, prefix)); + return true; + } + virtual bool WriteAddHash(int32 chunk_id, SBPrefix prefix, + base::Time receive_time, SBFullHash full_hash) { + add_hashes_.push_back( + SBAddFullHash(chunk_id, prefix, receive_time, full_hash)); + return true; + } + virtual bool WriteSubPrefix(int32 chunk_id, + int32 add_chunk_id, SBPrefix prefix) { + sub_prefixes_.push_back(SBSubPrefix(chunk_id, add_chunk_id, prefix)); + return true; + } + virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id, + SBPrefix prefix, SBFullHash full_hash) { + sub_hashes_.push_back( + SBSubFullHash(chunk_id, add_chunk_id, prefix, full_hash)); + return true; + } + virtual bool FinishChunk(); + + virtual bool BeginUpdate(); + virtual bool DoUpdate(const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result); + virtual bool FinishUpdate(const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result); + virtual bool CancelUpdate(); + + virtual void SetAddChunk(int32 chunk_id) { + add_chunks_cache_.insert(chunk_id); + } + virtual bool CheckAddChunk(int32 chunk_id) { + return add_chunks_cache_.count(chunk_id) > 0; + } + virtual void GetAddChunks(std::vector<int32>* out) { + out->clear(); + out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end()); + } + virtual void SetSubChunk(int32 chunk_id) { + sub_chunks_cache_.insert(chunk_id); + } + virtual bool CheckSubChunk(int32 chunk_id) { + return sub_chunks_cache_.count(chunk_id) > 0; + } + virtual void GetSubChunks(std::vector<int32>* out) { + out->clear(); + out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end()); + } + + virtual void DeleteAddChunk(int32 chunk_id) { + add_del_cache_.insert(chunk_id); + } + virtual void DeleteSubChunk(int32 chunk_id) { + sub_del_cache_.insert(chunk_id); + } + + // Returns the name of the temporary file used to buffer data for + // |filename|. Exported for unit tests. + static const FilePath TemporaryFileForFilename(const FilePath& filename) { + return FilePath(filename.value() + FILE_PATH_LITERAL("_new")); + } + + private: + // Close all files and clear all buffers. + bool Close(); + + // Helpers to read/write the various data sets. Excepting + // ReadChunksToSet(), which is called too early, the readers skip + // items from deleted chunks (listed in add_del_cache_ and + // sub_del_cache_). + bool ReadChunksToSet(FILE* fp, std::set<int32>* chunks, int count); + bool WriteChunksFromSet(const std::set<int32>& chunks); + bool ReadAddPrefixes(FILE* fp, + std::vector<SBAddPrefix>* add_prefixes, int count); + bool WriteAddPrefixes(const std::vector<SBAddPrefix>& add_prefixes); + bool ReadSubPrefixes(FILE* fp, + std::vector<SBSubPrefix>* sub_prefixes, int count); + bool WriteSubPrefixes(std::vector<SBSubPrefix>& sub_prefixes); + bool ReadAddHashes(FILE* fp, + std::vector<SBAddFullHash>* add_hashes, int count); + bool WriteAddHashes(const std::vector<SBAddFullHash>& add_hashes); + bool ReadSubHashes(FILE* fp, + std::vector<SBSubFullHash>* sub_hashes, int count); + bool WriteSubHashes(std::vector<SBSubFullHash>& sub_hashes); + + // Calls |corruption_callback_| if non-NULL, always returns false as + // a convenience to the caller. + bool OnCorruptDatabase(); + + // Clear temporary buffers used to accumulate chunk data. + bool ClearChunkBuffers() { + // NOTE: .clear() doesn't release memory. + // TODO(shess): Figure out if this is overkill. Some amount of + // pre-reserved space is probably reasonable between each chunk + // collected. + std::vector<SBAddPrefix>().swap(add_prefixes_); + std::vector<SBSubPrefix>().swap(sub_prefixes_); + std::vector<SBAddFullHash>().swap(add_hashes_); + std::vector<SBSubFullHash>().swap(sub_hashes_); + return true; + } + + // Clear all buffers used during update. + void ClearUpdateBuffers() { + ClearChunkBuffers(); + chunks_written_ = 0; + std::set<int32>().swap(add_chunks_cache_); + std::set<int32>().swap(sub_chunks_cache_); + base::hash_set<int32>().swap(add_del_cache_); + base::hash_set<int32>().swap(sub_del_cache_); + } + + // Buffers for collecting data between BeginChunk() and + // FinishChunk(). + std::vector<SBAddPrefix> add_prefixes_; + std::vector<SBSubPrefix> sub_prefixes_; + std::vector<SBAddFullHash> add_hashes_; + std::vector<SBSubFullHash> sub_hashes_; + + // Count of chunks collected in |new_file_|. + int chunks_written_; + + // Name of the main database file. + FilePath filename_; + + // Handles to the main and scratch files. |empty_| is true if the + // main file didn't exist when the update was started. + file_util::ScopedFILE file_; + file_util::ScopedFILE new_file_; + bool empty_; + + // Cache of chunks which have been seen. Loaded from the database + // on BeginUpdate() so that it can be queried during the + // transaction. + std::set<int32> add_chunks_cache_; + std::set<int32> sub_chunks_cache_; + + // Cache the set of deleted chunks during a transaction, applied on + // FinishUpdate(). + // TODO(shess): If the set is small enough, hash_set<> might be + // slower than plain set<>. + base::hash_set<int32> add_del_cache_; + base::hash_set<int32> sub_del_cache_; + + scoped_ptr<Callback0::Type> corruption_callback_; + + DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreFile); +}; + +#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc new file mode 100644 index 0000000..383de55 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/safe_browsing_store_file.h" + +#include "chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h" +#include "chrome/test/file_test_utils.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/platform_test.h" + +namespace { + +const FilePath::CharType kFolderPrefix[] = + FILE_PATH_LITERAL("SafeBrowsingTestStoreFile"); + +class SafeBrowsingStoreFileTest : public PlatformTest { + public: + virtual void SetUp() { + PlatformTest::SetUp(); + + FilePath temp_dir; + ASSERT_TRUE(file_util::CreateNewTempDirectory(kFolderPrefix, &temp_dir)); + + file_deleter_.reset(new FileAutoDeleter(temp_dir)); + + filename_ = temp_dir; + filename_.AppendASCII("SafeBrowsingTestStore"); + file_util::Delete(filename_, false); + + // Make sure an old temporary file isn't hanging around. + const FilePath temp_file = + SafeBrowsingStoreFile::TemporaryFileForFilename(filename_); + file_util::Delete(temp_file, false); + + store_.reset(new SafeBrowsingStoreFile()); + store_->Init(filename_, NULL); + } + virtual void TearDown() { + store_->Delete(); + store_.reset(); + file_deleter_.reset(); + + PlatformTest::TearDown(); + } + + scoped_ptr<FileAutoDeleter> file_deleter_; + FilePath filename_; + scoped_ptr<SafeBrowsingStoreFile> store_; +}; + +TEST_STORE(SafeBrowsingStoreFileTest, store_.get(), filename_); + +// TODO(shess): Test corruption-handling? + +} // namespace diff --git a/chrome/browser/safe_browsing/safe_browsing_store_sqlite.cc b/chrome/browser/safe_browsing/safe_browsing_store_sqlite.cc new file mode 100644 index 0000000..a5e7df7 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store_sqlite.cc @@ -0,0 +1,714 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h" + +#include "base/file_util.h" +#include "chrome/common/sqlite_compiled_statement.h" +#include "chrome/common/sqlite_utils.h" + +namespace { + +// Database version. If this is different than what's stored on disk, the +// database is reset. +const int kDatabaseVersion = 6; + +// Used for reading full hashes from the database. +SBFullHash ReadFullHash(SqliteCompiledStatement* statement, int column) { + std::vector<unsigned char> blob; + (*statement)->column_blob_as_vector(column, &blob); + + SBFullHash ret; + DCHECK_EQ(blob.size(), sizeof(ret)); + memcpy(ret.full_hash, &blob[0], sizeof(ret)); + return ret; +} + +void DeleteChunksFromSet(const base::hash_set<int32>& deleted, + std::set<int32>* chunks) { + for (std::set<int32>::iterator iter = chunks->begin(); + iter != chunks->end();) { + std::set<int32>::iterator prev = iter++; + if (deleted.count(*prev) > 0) + chunks->erase(prev); + } +} + +} // namespace + +SafeBrowsingStoreSqlite::SafeBrowsingStoreSqlite() + : db_(NULL), + statement_cache_(NULL), + insert_transaction_(NULL) { +} +SafeBrowsingStoreSqlite::~SafeBrowsingStoreSqlite() { + Close(); +} + +bool SafeBrowsingStoreSqlite::Delete() { + // The database should not be open at this point. TODO(shess): It + // can be open if corruption was detected while opening the + // database. Ick. + DCHECK(!db_); + + // The file must be closed, both so that the journal file is deleted + // by SQLite, and because open files cannot be deleted on Windows. + if (!Close()) { + NOTREACHED(); + return false; + } + + // Just in case, delete the journal file, because associating the + // wrong journal file with a database is very bad. + const FilePath journal_file = JournalFileForFilename(filename_); + if (!file_util::Delete(journal_file, false) && + file_util::PathExists(journal_file)) { + NOTREACHED(); + return false; + } + + if (!file_util::Delete(filename_, false) && + file_util::PathExists(filename_)) { + NOTREACHED(); + return false; + } + + return true; +} + +void SafeBrowsingStoreSqlite::Init(const FilePath& filename, + Callback0::Type* corruption_callback) { + filename_ = filename; + corruption_callback_.reset(corruption_callback); +} + +bool SafeBrowsingStoreSqlite::OnCorruptDatabase() { + if (corruption_callback_.get()) + corruption_callback_->Run(); + return false; +} + +bool SafeBrowsingStoreSqlite::Open() { + if (db_) + return true; + + if (OpenSqliteDb(filename_, &db_) != SQLITE_OK) { + sqlite3_close(db_); + db_ = NULL; + return false; + } + + // Run the database in exclusive mode. Nobody else should be accessing the + // database while we're running, and this will give somewhat improved perf. + ExecSql("PRAGMA locking_mode = EXCLUSIVE"); + ExecSql("PRAGMA cache_size = 100"); + + statement_cache_.reset(new SqliteStatementCache(db_)); + + if (!DoesSqliteTableExist(db_, "add_prefix")) + return SetupDatabase(); + + return CheckCompatibleVersion(); +} + +bool SafeBrowsingStoreSqlite::ExecSql(const char* sql) { + DCHECK(db_); + + int rv = sqlite3_exec(db_, sql, NULL, NULL, NULL); + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK(rv == SQLITE_OK); + return true; +} + +bool SafeBrowsingStoreSqlite::Close() { + if (!db_) + return true; + + add_chunks_cache_.clear(); + sub_chunks_cache_.clear(); + + add_del_cache_.clear(); + sub_del_cache_.clear(); + + insert_transaction_.reset(); + statement_cache_.reset(); // Must free statements before closing DB. + bool result = sqlite3_close(db_) == SQLITE_OK; + db_ = NULL; + + return result; +} + +bool SafeBrowsingStoreSqlite::CreateTables() { + DCHECK(db_); + + // Store 32 bit add prefixes here. + if (!ExecSql("CREATE TABLE add_prefix (" + " chunk INTEGER," + " prefix INTEGER" + ")")) + return false; + + // Store 32 bit sub prefixes here. + if (!ExecSql("CREATE TABLE sub_prefix (" + " chunk INTEGER," + " add_chunk INTEGER," + " prefix INTEGER" + ")")) + return false; + + // Store 256 bit add full hashes (and GetHash results) here. + if (!ExecSql("CREATE TABLE add_full_hash (" + " chunk INTEGER," + " prefix INTEGER," + " receive_time INTEGER," + " full_hash BLOB" + ")")) + return false; + + // Store 256 bit sub full hashes here. + if (!ExecSql("CREATE TABLE sub_full_hash (" + " chunk INTEGER," + " add_chunk INTEGER," + " prefix INTEGER," + " full_hash BLOB" + ")")) + return false; + + // Store all the add and sub chunk numbers we receive. We cannot + // just rely on the prefix tables to generate these lists, since + // some chunks will have zero entries (and thus no prefixes), or + // potentially an add chunk can have all of its entries sub'd + // without receiving an AddDel, or a sub chunk might have been + // entirely consumed by adds. In these cases, we still have to + // report the chunk number but it will not have any prefixes in the + // prefix tables. + // + // TODO(paulg): Investigate storing the chunks as a string of + // ChunkRanges, one string for each of phish-add, phish-sub, + // malware-add, malware-sub. This might be better performance when + // the number of chunks is large, and is the natural format for the + // update request. + if (!ExecSql("CREATE TABLE add_chunks (" + " chunk INTEGER PRIMARY KEY" + ")")) + return false; + + if (!ExecSql("CREATE TABLE sub_chunks (" + " chunk INTEGER PRIMARY KEY" + ")")) + return false; + + return true; +} + +bool SafeBrowsingStoreSqlite::SetupDatabase() { + DCHECK(db_); + + SQLTransaction transaction(db_); + if (transaction.Begin() != SQLITE_OK) { + NOTREACHED(); + return false; + } + + if (!CreateTables()) + return false; + + // PRAGMA does not support bind parameters... + const std::string version = + StringPrintf("PRAGMA user_version = %d", kDatabaseVersion); + if (!ExecSql(version.c_str())) + return false; + + if (transaction.Commit() != SQLITE_OK) + return false; + + return true; +} + +bool SafeBrowsingStoreSqlite::CheckCompatibleVersion() { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "PRAGMA user_version"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + int result = statement->step(); + if (result != SQLITE_ROW) + return false; + + return statement->column_int(0) == kDatabaseVersion; +} + +bool SafeBrowsingStoreSqlite::ReadAddChunks() { + DCHECK(db_); + + add_chunks_cache_.clear(); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT chunk FROM add_chunks"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + int rv; + while ((rv = statement->step()) == SQLITE_ROW) { + add_chunks_cache_.insert(statement->column_int(0)); + } + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK_EQ(rv, SQLITE_DONE); + return rv == SQLITE_DONE; +} + +bool SafeBrowsingStoreSqlite::WriteAddChunks() { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "INSERT INTO add_chunks (chunk) VALUES (?)"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + for (std::set<int32>::const_iterator iter = add_chunks_cache_.begin(); + iter != add_chunks_cache_.end(); ++iter) { + statement->bind_int(0, *iter); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK(rv == SQLITE_DONE); + statement->reset(); + } + return true; +} + +bool SafeBrowsingStoreSqlite::ReadSubChunks() { + DCHECK(db_); + + sub_chunks_cache_.clear(); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT chunk FROM sub_chunks"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + int rv; + while ((rv = statement->step()) == SQLITE_ROW) { + sub_chunks_cache_.insert(statement->column_int(0)); + } + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + return rv == SQLITE_DONE; +} + +bool SafeBrowsingStoreSqlite::WriteSubChunks() { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "INSERT INTO sub_chunks (chunk) VALUES (?)"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + for (std::set<int32>::const_iterator iter = sub_chunks_cache_.begin(); + iter != sub_chunks_cache_.end(); ++iter) { + statement->bind_int(0, *iter); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK(rv == SQLITE_DONE); + statement->reset(); + } + return true; +} + +bool SafeBrowsingStoreSqlite::ReadAddPrefixes( + std::vector<SBAddPrefix>* add_prefixes) { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT chunk, prefix FROM add_prefix"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + int rv; + while ((rv = statement->step()) == SQLITE_ROW) { + const int32 chunk_id = statement->column_int(0); + if (add_del_cache_.count(chunk_id) > 0) + continue; + + const SBPrefix prefix = statement->column_int(1); + add_prefixes->push_back(SBAddPrefix(chunk_id, prefix)); + } + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK_EQ(rv, SQLITE_DONE); + return true; +} + +bool SafeBrowsingStoreSqlite::WriteAddPrefix(int32 chunk_id, SBPrefix prefix) { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "INSERT INTO add_prefix " + "(chunk, prefix) VALUES (?,?)"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + statement->bind_int(0, chunk_id); + statement->bind_int(1, prefix); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK(rv == SQLITE_DONE); + return true; +} + +bool SafeBrowsingStoreSqlite::WriteAddPrefixes( + const std::vector<SBAddPrefix>& add_prefixes) { + DCHECK(db_); + + for (std::vector<SBAddPrefix>::const_iterator iter = add_prefixes.begin(); + iter != add_prefixes.end(); ++iter) { + if (!WriteAddPrefix(iter->chunk_id, iter->prefix)) + return false; + } + return true; +} + +bool SafeBrowsingStoreSqlite::ReadSubPrefixes( + std::vector<SBSubPrefix>* sub_prefixes) { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT chunk, add_chunk, prefix " + "FROM sub_prefix"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + int rv; + while ((rv = statement->step()) == SQLITE_ROW) { + const int32 chunk_id = statement->column_int(0); + if (sub_del_cache_.count(chunk_id) > 0) + continue; + + const int32 add_chunk_id = statement->column_int(1); + const SBPrefix add_prefix = statement->column_int(2); + sub_prefixes->push_back(SBSubPrefix(chunk_id, add_chunk_id, add_prefix)); + } + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK_EQ(rv, SQLITE_DONE); + return true; +} + +bool SafeBrowsingStoreSqlite::WriteSubPrefix( + int32 chunk_id, int32 add_chunk_id, SBPrefix prefix) { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "INSERT INTO sub_prefix " + "(chunk, add_chunk, prefix) VALUES (?,?, ?)"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + statement->bind_int(0, chunk_id); + statement->bind_int(1, add_chunk_id); + statement->bind_int(2, prefix); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK(rv == SQLITE_DONE); + return true; +} + +bool SafeBrowsingStoreSqlite::WriteSubPrefixes( + std::vector<SBSubPrefix>& sub_prefixes) { + DCHECK(db_); + + for (std::vector<SBSubPrefix>::const_iterator iter = sub_prefixes.begin(); + iter != sub_prefixes.end(); ++iter) { + const SBAddPrefix &add_prefix = iter->add_prefix; + if (!WriteSubPrefix(iter->chunk_id, add_prefix.chunk_id, add_prefix.prefix)) + return false; + } + return true; +} + +bool SafeBrowsingStoreSqlite::ReadAddHashes( + std::vector<SBAddFullHash>* add_hashes) { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT chunk, prefix, receive_time, full_hash " + "FROM add_full_hash"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + int rv; + while ((rv = statement->step()) == SQLITE_ROW) { + const int32 chunk_id = statement->column_int(0); + if (add_del_cache_.count(chunk_id) > 0) + continue; + + const SBPrefix prefix = statement->column_int(1); + const base::Time received = + base::Time::FromTimeT(statement->column_int64(2)); + const SBFullHash full_hash = ReadFullHash(&statement, 3); + add_hashes->push_back(SBAddFullHash(chunk_id, prefix, received, full_hash)); + } + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK_EQ(rv, SQLITE_DONE); + return true; +} + +bool SafeBrowsingStoreSqlite::WriteAddHash(int32 chunk_id, SBPrefix prefix, + base::Time receive_time, + SBFullHash full_hash) { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "INSERT INTO add_full_hash " + "(chunk, prefix, receive_time, full_hash) " + "VALUES (?,?, ?, ?)"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + statement->bind_int(0, chunk_id); + statement->bind_int(1, prefix); + statement->bind_int64(2, receive_time.ToTimeT()); + statement->bind_blob(3, full_hash.full_hash, sizeof(full_hash.full_hash)); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK(rv == SQLITE_DONE); + return true; +} + +bool SafeBrowsingStoreSqlite::WriteAddHashes( + const std::vector<SBAddFullHash>& add_hashes) { + DCHECK(db_); + + for (std::vector<SBAddFullHash>::const_iterator iter = add_hashes.begin(); + iter != add_hashes.end(); ++iter) { + const SBAddPrefix& add_prefix = iter->add_prefix; + if (!WriteAddHash(add_prefix.chunk_id, add_prefix.prefix, + iter->received, iter->full_hash)) + return false; + } + return true; +} + +bool SafeBrowsingStoreSqlite::ReadSubHashes( + std::vector<SBSubFullHash>* sub_hashes) { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "SELECT chunk, add_chunk, prefix, full_hash " + "FROM sub_full_hash"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + int rv; + while ((rv = statement->step()) == SQLITE_ROW) { + const int32 chunk_id = statement->column_int(0); + if (sub_del_cache_.count(chunk_id) > 0) + continue; + + const int32 add_chunk_id = statement->column_int(1); + const SBPrefix add_prefix = statement->column_int(2); + const SBFullHash full_hash = ReadFullHash(&statement, 3); + sub_hashes->push_back( + SBSubFullHash(chunk_id, add_chunk_id, add_prefix, full_hash)); + } + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK_EQ(rv, SQLITE_DONE); + return true; +} + +bool SafeBrowsingStoreSqlite::WriteSubHash( + int32 chunk_id, int32 add_chunk_id, SBPrefix prefix, SBFullHash full_hash) { + DCHECK(db_); + + SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_, + "INSERT INTO sub_full_hash " + "(chunk, add_chunk, prefix, full_hash) " + "VALUES (?,?,?,?)"); + if (!statement.is_valid()) { + NOTREACHED(); + return false; + } + + statement->bind_int(0, chunk_id); + statement->bind_int(1, add_chunk_id); + statement->bind_int(2, prefix); + statement->bind_blob(3, full_hash.full_hash, sizeof(full_hash.full_hash)); + int rv = statement->step(); + if (rv == SQLITE_CORRUPT) + return OnCorruptDatabase(); + DCHECK(rv == SQLITE_DONE); + return true; +} + +bool SafeBrowsingStoreSqlite::WriteSubHashes( + std::vector<SBSubFullHash>& sub_hashes) { + DCHECK(db_); + + for (std::vector<SBSubFullHash>::const_iterator iter = sub_hashes.begin(); + iter != sub_hashes.end(); ++iter) { + if (!WriteSubHash(iter->chunk_id, iter->add_prefix.chunk_id, + iter->add_prefix.prefix, iter->full_hash)) + return false; + } + return true; +} + +bool SafeBrowsingStoreSqlite::RenameTables() { + DCHECK(db_); + + if (!ExecSql("ALTER TABLE add_prefix RENAME TO add_prefix_old") || + !ExecSql("ALTER TABLE sub_prefix RENAME TO sub_prefix_old") || + !ExecSql("ALTER TABLE add_full_hash RENAME TO add_full_hash_old") || + !ExecSql("ALTER TABLE sub_full_hash RENAME TO sub_full_hash_old") || + !ExecSql("ALTER TABLE add_chunks RENAME TO add_chunks_old") || + !ExecSql("ALTER TABLE sub_chunks RENAME TO sub_chunks_old")) + return false; + + return CreateTables(); +} + +bool SafeBrowsingStoreSqlite::DeleteOldTables() { + DCHECK(db_); + + if (!ExecSql("DROP TABLE add_prefix_old") || + !ExecSql("DROP TABLE sub_prefix_old") || + !ExecSql("DROP TABLE add_full_hash_old") || + !ExecSql("DROP TABLE sub_full_hash_old") || + !ExecSql("DROP TABLE add_chunks_old") || + !ExecSql("DROP TABLE sub_chunks_old")) + return false; + + return true; +} + +bool SafeBrowsingStoreSqlite::BeginUpdate() { + DCHECK(!db_); + + if (!Open()) + return false; + + insert_transaction_.reset(new SQLTransaction(db_)); + if (insert_transaction_->Begin() != SQLITE_OK) { + DCHECK(false) << "Safe browsing store couldn't start transaction"; + Close(); + return false; + } + + if (!ReadAddChunks() || !ReadSubChunks()) + return false; + + return true; +} + +bool SafeBrowsingStoreSqlite::DoUpdate( + const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result) { + DCHECK(db_); + + std::vector<SBAddPrefix> add_prefixes; + std::vector<SBAddFullHash> add_full_hashes; + std::vector<SBSubPrefix> sub_prefixes; + std::vector<SBSubFullHash> sub_full_hashes; + + if (!ReadAddPrefixes(&add_prefixes) || + !ReadAddHashes(&add_full_hashes) || + !ReadSubPrefixes(&sub_prefixes) || + !ReadSubHashes(&sub_full_hashes)) + return false; + + // Add the pending adds which haven't since been deleted. + for (std::vector<SBAddFullHash>::const_iterator iter = pending_adds.begin(); + iter != pending_adds.end(); ++iter) { + if (add_del_cache_.count(iter->add_prefix.chunk_id) == 0) + add_full_hashes.push_back(*iter); + } + + SBProcessSubs(&add_prefixes, &sub_prefixes, + &add_full_hashes, &sub_full_hashes); + + // Move the existing tables aside and prepare to write fresh tables. + if (!RenameTables()) + return false; + + DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_); + DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_); + + if (!WriteAddChunks() || + !WriteSubChunks() || + !WriteAddPrefixes(add_prefixes) || + !WriteSubPrefixes(sub_prefixes) || + !WriteAddHashes(add_full_hashes) || + !WriteSubHashes(sub_full_hashes)) + return false; + + // Delete the old tables. + if (!DeleteOldTables()) + return false; + + // Commit all the changes to the database. + int rv = insert_transaction_->Commit(); + if (rv != SQLITE_OK) { + NOTREACHED() << "SafeBrowsing update transaction failed to commit."; + // UMA_HISTOGRAM_COUNTS("SB2.FailedUpdate", 1); + return false; + } + + add_prefixes_result->swap(add_prefixes); + add_full_hashes_result->swap(add_full_hashes); + + return true; +} + +bool SafeBrowsingStoreSqlite::FinishUpdate( + const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result) { + bool ret = DoUpdate(pending_adds, + add_prefixes_result, add_full_hashes_result); + + // Make sure everything is closed even if DoUpdate() fails. + if (!Close()) + return false; + + return ret; +} + +bool SafeBrowsingStoreSqlite::CancelUpdate() { + return Close(); +} diff --git a/chrome/browser/safe_browsing/safe_browsing_store_sqlite.h b/chrome/browser/safe_browsing/safe_browsing_store_sqlite.h new file mode 100644 index 0000000..272ab56 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store_sqlite.h @@ -0,0 +1,168 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_SQLITE_H_ +#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_SQLITE_H_ + +#include <set> +#include <vector> + +#include "chrome/browser/safe_browsing/safe_browsing_store.h" +#include "testing/gtest/include/gtest/gtest_prod.h" + +struct sqlite3; +class SqliteCompiledStatement; +class SqliteStatementCache; +class SQLTransaction; + +class SafeBrowsingStoreSqlite : public SafeBrowsingStore { + public: + SafeBrowsingStoreSqlite(); + virtual ~SafeBrowsingStoreSqlite(); + + virtual bool Delete(); + + virtual void Init(const FilePath& filename, + Callback0::Type* corruption_callback); + + virtual bool BeginChunk() { + return true; + } + virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix); + virtual bool WriteAddHash(int32 chunk_id, SBPrefix prefix, + base::Time receive_time, SBFullHash full_hash); + virtual bool WriteSubPrefix(int32 chunk_id, + int32 add_chunk_id, SBPrefix prefix); + virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id, + SBPrefix prefix, SBFullHash full_hash); + virtual bool FinishChunk() { + return true; + } + + virtual bool BeginUpdate(); + // TODO(shess): Should not be public. + virtual bool DoUpdate(const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result); + virtual bool FinishUpdate(const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result); + virtual bool CancelUpdate(); + + virtual void SetAddChunk(int32 chunk_id) { + add_chunks_cache_.insert(chunk_id); + } + virtual bool CheckAddChunk(int32 chunk_id) { + return add_chunks_cache_.count(chunk_id) > 0; + } + virtual void GetAddChunks(std::vector<int32>* out) { + out->clear(); + out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end()); + } + + virtual void SetSubChunk(int32 chunk_id) { + sub_chunks_cache_.insert(chunk_id); + } + virtual bool CheckSubChunk(int32 chunk_id) { + return sub_chunks_cache_.count(chunk_id) > 0; + } + virtual void GetSubChunks(std::vector<int32>* out) { + out->clear(); + out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end()); + } + + virtual void DeleteAddChunk(int32 chunk_id) { + add_del_cache_.insert(chunk_id); + } + virtual void DeleteSubChunk(int32 chunk_id) { + sub_del_cache_.insert(chunk_id); + } + + // Returns the name of the SQLite journal file for |filename|. + // Exported for unit tests. + static const FilePath JournalFileForFilename(const FilePath& filename) { + return FilePath(filename.value() + FILE_PATH_LITERAL("-journal")); + } + + private: + // The following routines return true on success, or false on + // failure. Failure is presumed to be persistent, so the caller + // should stop trying and unwind the transaction. + // OnCorruptDatabase() is called if SQLite returns SQLITE_CORRUPT. + + // Open |db_| from |filename_|, creating if necessary. + bool Open(); + + // Close |db_|, rolling back any in-progress transaction. + bool Close(); + + // Execute all statements in sql, returning true if every one of + // them returns SQLITE_OK. + bool ExecSql(const char* sql); + + bool SetupDatabase(); + bool CheckCompatibleVersion(); + + bool CreateTables(); + bool RenameTables(); + bool DeleteOldTables(); + + // Read and write the chunks-seen data from |*_chunks_cache_|. + // Chunk deletions are not accounted for. + bool ReadAddChunks(); + bool ReadSubChunks(); + bool WriteAddChunks(); + bool WriteSubChunks(); + + // Read the various types of data, skipping items which belong to + // deleted chunks. New data is appended to the vectors. + bool ReadAddPrefixes(std::vector<SBAddPrefix>* add_prefixes); + bool ReadSubPrefixes(std::vector<SBSubPrefix>* sub_prefixes); + bool ReadAddHashes(std::vector<SBAddFullHash>* add_hashes); + bool ReadSubHashes(std::vector<SBSubFullHash>* sub_hashes); + + // Write the various types of data. The existing data is not + // cleared. + bool WriteAddPrefixes(const std::vector<SBAddPrefix>& add_prefixes); + bool WriteSubPrefixes(std::vector<SBSubPrefix>& sub_prefixes); + bool WriteAddHashes(const std::vector<SBAddFullHash>& add_hashes); + bool WriteSubHashes(std::vector<SBSubFullHash>& sub_hashes); + + // Calls |corruption_callback_| if non-NULL, always returns false as + // a convenience to the caller. + bool OnCorruptDatabase(); + + // The database path from Init(). + FilePath filename_; + + // Between BeginUpdate() and FinishUpdate(), this will be the SQLite + // database connection. Otherwise NULL. + sqlite3 *db_; + + // Cache of compiled statements for |db_|. + // TODO(shess): Probably doesn't gain us much. + scoped_ptr<SqliteStatementCache> statement_cache_; + + // Transaction for protecting database integrity between + // BeginUpdate() and FinishUpdate(). + scoped_ptr<SQLTransaction> insert_transaction_; + + // The set of chunks which the store has seen. Elements are added + // by SetAddChunk() and SetSubChunk(), and deleted on write for + // chunks that have been deleted. + std::set<int32> add_chunks_cache_; + std::set<int32> sub_chunks_cache_; + + // Cache the DeletedAddChunk() and DeleteSubChunk() chunks for later + // use in FinishUpdate(). + base::hash_set<int32> add_del_cache_; + base::hash_set<int32> sub_del_cache_; + + // Called when SQLite returns SQLITE_CORRUPT. + scoped_ptr<Callback0::Type> corruption_callback_; + + DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreSqlite); +}; + +#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_SQLITE_H_ diff --git a/chrome/browser/safe_browsing/safe_browsing_store_sqlite_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_sqlite_unittest.cc new file mode 100644 index 0000000..0921ad1 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store_sqlite_unittest.cc @@ -0,0 +1,53 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h" + +#include "chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h" +#include "chrome/test/file_test_utils.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/platform_test.h" + +namespace { + +const FilePath::CharType kFolderPrefix[] = + FILE_PATH_LITERAL("SafeBrowsingTestStoreSqlite"); + +class SafeBrowsingStoreSqliteTest : public PlatformTest { + public: + virtual void SetUp() { + PlatformTest::SetUp(); + + FilePath temp_dir; + ASSERT_TRUE(file_util::CreateNewTempDirectory(kFolderPrefix, &temp_dir)); + + file_deleter_.reset(new FileAutoDeleter(temp_dir)); + + filename_ = temp_dir; + filename_.AppendASCII("SafeBrowsingTestStore"); + file_util::Delete(filename_, false); + + const FilePath journal_file = + SafeBrowsingStoreSqlite::JournalFileForFilename(filename_); + file_util::Delete(journal_file, false); + + store_.reset(new SafeBrowsingStoreSqlite()); + store_->Init(filename_, NULL); + } + virtual void TearDown() { + store_->Delete(); + store_.reset(); + file_deleter_.reset(); + + PlatformTest::TearDown(); + } + + scoped_ptr<FileAutoDeleter> file_deleter_; + FilePath filename_; + scoped_ptr<SafeBrowsingStoreSqlite> store_; +}; + +TEST_STORE(SafeBrowsingStoreSqliteTest, store_.get(), filename_); + +} // namespace diff --git a/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc new file mode 100644 index 0000000..35b9928 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc @@ -0,0 +1,188 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/safe_browsing_store.h" +#include "chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h" + +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +TEST(SafeBrowsingStoreTest, SBAddPrefixLess) { + // chunk_id then prefix. + EXPECT_TRUE(SBAddPrefixLess(SBAddPrefix(10, 1), SBAddPrefix(11, 1))); + EXPECT_FALSE(SBAddPrefixLess(SBAddPrefix(11, 1), SBAddPrefix(10, 1))); + EXPECT_TRUE(SBAddPrefixLess(SBAddPrefix(10, 1), SBAddPrefix(10, 2))); + EXPECT_FALSE(SBAddPrefixLess(SBAddPrefix(10, 2), SBAddPrefix(10, 1))); + + // Equal is not less. + EXPECT_FALSE(SBAddPrefixLess(SBAddPrefix(10, 1), SBAddPrefix(10, 1))); +} + +TEST(SafeBrowsingStoreTest, SBAddPrefixHashLess) { + // The first four bytes of SBFullHash can be read as an int32, which + // means that byte-ordering issues can come up. To test this, |one| + // and |two| differ in the prefix, while |one| and |onetwo| have the + // same prefix, but differ in the byte after the prefix. + SBFullHash one, onetwo, two; + memset(&one, 0, sizeof(one)); + memset(&onetwo, 0, sizeof(onetwo)); + memset(&two, 0, sizeof(two)); + one.prefix = 1; + one.full_hash[sizeof(int32)] = 1; + onetwo.prefix = 1; + onetwo.full_hash[sizeof(int32)] = 2; + two.prefix = 2; + + const base::Time now = base::Time::Now(); + + // add_id dominates. + EXPECT_TRUE(SBAddPrefixHashLess(SBAddFullHash(10, two.prefix, now, two), + SBAddFullHash(11, one.prefix, now, one))); + EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(11, two.prefix, now, two), + SBAddFullHash(10, one.prefix, now, one))); + + // After add_id, prefix. + EXPECT_TRUE(SBAddPrefixHashLess(SBAddFullHash(10, one.prefix, now, two), + SBAddFullHash(10, two.prefix, now, one))); + EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(10, two.prefix, now, one), + SBAddFullHash(10, one.prefix, now, two))); + + // After prefix, full hash. + EXPECT_TRUE(SBAddPrefixHashLess(SBAddFullHash(10, one.prefix, now, one), + SBAddFullHash(10, onetwo.prefix, + now, onetwo))); + EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(10, onetwo.prefix, + now, onetwo), + SBAddFullHash(10, one.prefix, now, one))); + + // Equal is not less-than. + EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(10, one.prefix, now, one), + SBAddFullHash(10, one.prefix, now, one))); +} + +TEST(SafeBrowsingStoreTest, SBSubPrefixLess) { + // add_id dominates. + EXPECT_TRUE(SBAddPrefixLess(SBSubPrefix(12, 10, 2), SBSubPrefix(9, 11, 1))); + EXPECT_FALSE(SBAddPrefixLess(SBSubPrefix(12, 11, 2), SBSubPrefix(9, 10, 1))); + + // After add_id, prefix. + EXPECT_TRUE(SBAddPrefixLess(SBSubPrefix(12, 10, 1), SBSubPrefix(9, 10, 2))); + EXPECT_FALSE(SBAddPrefixLess(SBSubPrefix(12, 10, 2), SBSubPrefix(9, 10, 1))); + + // Equal is not less-than. + EXPECT_FALSE(SBAddPrefixLess(SBSubPrefix(12, 10, 1), SBSubPrefix(12, 10, 1))); + + // chunk_id doesn't matter. +} + +TEST(SafeBrowsingStoreTest, SBSubFullHashLess) { + SBFullHash one, onetwo, two; + memset(&one, 0, sizeof(one)); + memset(&onetwo, 0, sizeof(onetwo)); + memset(&two, 0, sizeof(two)); + one.prefix = 1; + one.full_hash[sizeof(int32)] = 1; + onetwo.prefix = 1; + onetwo.full_hash[sizeof(int32)] = 2; + two.prefix = 2; + + // add_id dominates. + EXPECT_TRUE(SBAddPrefixHashLess(SBSubFullHash(12, 10, two.prefix, two), + SBSubFullHash(9, 11, one.prefix, one))); + EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 11, two.prefix, two), + SBSubFullHash(9, 10, one.prefix, one))); + + // After add_id, prefix. + EXPECT_TRUE(SBAddPrefixHashLess(SBSubFullHash(12, 10, one.prefix, two), + SBSubFullHash(9, 10, two.prefix, one))); + EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 10, two.prefix, one), + SBSubFullHash(9, 10, one.prefix, two))); + + // After prefix, full_hash. + EXPECT_TRUE(SBAddPrefixHashLess(SBSubFullHash(12, 10, one.prefix, one), + SBSubFullHash(9, 10, onetwo.prefix, onetwo))); + EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 10, onetwo.prefix, onetwo), + SBSubFullHash(9, 10, one.prefix, one))); + + // Equal is not less-than. + EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 10, one.prefix, one), + SBSubFullHash(9, 10, one.prefix, one))); +} + +TEST(SafeBrowsingStoreTest, SBProcessSubs) { + std::vector<SBAddPrefix> add_prefixes; + std::vector<SBAddFullHash> add_hashes; + std::vector<SBSubPrefix> sub_prefixes; + std::vector<SBSubFullHash> sub_hashes; + + // SBProcessSubs does a lot of iteration, run through empty just to + // make sure degenerate cases work. + SBProcessSubs(&add_prefixes, &sub_prefixes, &add_hashes, &sub_hashes); + EXPECT_TRUE(add_prefixes.empty()); + EXPECT_TRUE(sub_prefixes.empty()); + EXPECT_TRUE(add_hashes.empty()); + EXPECT_TRUE(sub_hashes.empty()); + + const base::Time kNow = base::Time::Now(); + const SBFullHash kHash1(SBFullHashFromString("one")); + const SBFullHash kHash2(SBFullHashFromString("two")); + const SBFullHash kHash3(SBFullHashFromString("three")); + const int kAddChunk1 = 1; // Use different chunk numbers just in case. + const int kSubChunk1 = 2; + + // Construct some full hashes which share prefix with another. + SBFullHash kHash1mod1 = kHash1; + kHash1mod1.full_hash[sizeof(kHash1mod1.full_hash) - 1] ++; + SBFullHash kHash1mod2 = kHash1mod1; + kHash1mod2.full_hash[sizeof(kHash1mod2.full_hash) - 1] ++; + SBFullHash kHash1mod3 = kHash1mod2; + kHash1mod3.full_hash[sizeof(kHash1mod3.full_hash) - 1] ++; + + // An add with prefix and a couple hashes, plus a sub for the prefix + // and a couple sub hashes. The sub should knock all of them out. + add_prefixes.push_back(SBAddPrefix(kAddChunk1, kHash1.prefix)); + add_hashes.push_back( + SBAddFullHash(kAddChunk1, kHash1.prefix, kNow, kHash1)); + add_hashes.push_back( + SBAddFullHash(kAddChunk1, kHash1mod1.prefix, kNow, kHash1mod1)); + sub_prefixes.push_back(SBSubPrefix(kSubChunk1, kAddChunk1, kHash1.prefix)); + sub_hashes.push_back( + SBSubFullHash(kSubChunk1, kAddChunk1, kHash1mod2.prefix, kHash1mod2)); + sub_hashes.push_back( + SBSubFullHash(kSubChunk1, kAddChunk1, kHash1mod3.prefix, kHash1mod3)); + + // An add with no corresponding sub. Both items should be retained. + add_hashes.push_back(SBAddFullHash(kAddChunk1, kHash2.prefix, kNow, kHash2)); + add_prefixes.push_back(SBAddPrefix(kAddChunk1, kHash2.prefix)); + + // A sub with no corresponding add. Both items should be retained. + sub_hashes.push_back( + SBSubFullHash(kSubChunk1, kAddChunk1, kHash3.prefix, kHash3)); + sub_prefixes.push_back(SBSubPrefix(kSubChunk1, kAddChunk1, kHash3.prefix)); + + SBProcessSubs(&add_prefixes, &sub_prefixes, &add_hashes, &sub_hashes); + + EXPECT_EQ(1U, add_prefixes.size()); + EXPECT_EQ(kAddChunk1, add_prefixes[0].chunk_id); + EXPECT_EQ(kHash2.prefix, add_prefixes[0].prefix); + + EXPECT_EQ(1U, add_hashes.size()); + EXPECT_EQ(kAddChunk1, add_hashes[0].add_prefix.chunk_id); + EXPECT_EQ(kHash2.prefix, add_hashes[0].add_prefix.prefix); + EXPECT_TRUE(SBFullHashEq(kHash2, add_hashes[0].full_hash)); + + EXPECT_EQ(1U, sub_prefixes.size()); + EXPECT_EQ(kSubChunk1, sub_prefixes[0].chunk_id); + EXPECT_EQ(kAddChunk1, sub_prefixes[0].add_prefix.chunk_id); + EXPECT_EQ(kHash3.prefix, sub_prefixes[0].add_prefix.prefix); + + EXPECT_EQ(1U, sub_hashes.size()); + EXPECT_EQ(kSubChunk1, sub_hashes[0].chunk_id); + EXPECT_EQ(kAddChunk1, sub_hashes[0].add_prefix.chunk_id); + EXPECT_EQ(kHash3.prefix, sub_hashes[0].add_prefix.prefix); + EXPECT_TRUE(SBFullHashEq(kHash3, sub_hashes[0].full_hash)); +} + +} // namespace diff --git a/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.cc b/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.cc new file mode 100644 index 0000000..9a69472 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.cc @@ -0,0 +1,340 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h" + +#include "base/file_util.h" + +namespace { + +const int kAddChunk1 = 1; +const int kAddChunk2 = 3; +const int kAddChunk3 = 5; +const int kAddChunk4 = 7; +// Disjoint chunk numbers for subs to flush out typos. +const int kSubChunk1 = 2; +const int kSubChunk2 = 4; +const int kSubChunk3 = 6; + +const SBFullHash kHash1 = SBFullHashFromString("one"); +const SBFullHash kHash2 = SBFullHashFromString("two"); +const SBFullHash kHash3 = SBFullHashFromString("three"); +const SBFullHash kHash4 = SBFullHashFromString("four"); +const SBFullHash kHash5 = SBFullHashFromString("five"); + +} // namespace + +void SafeBrowsingStoreTestEmpty(SafeBrowsingStore* store) { + EXPECT_TRUE(store->BeginUpdate()); + + std::vector<int> chunks; + store->GetAddChunks(&chunks); + EXPECT_TRUE(chunks.empty()); + store->GetSubChunks(&chunks); + EXPECT_TRUE(chunks.empty()); + + // Shouldn't see anything, but anything is a big set to test. + EXPECT_FALSE(store->CheckAddChunk(0)); + EXPECT_FALSE(store->CheckAddChunk(1)); + EXPECT_FALSE(store->CheckAddChunk(-1)); + + EXPECT_FALSE(store->CheckSubChunk(0)); + EXPECT_FALSE(store->CheckSubChunk(1)); + EXPECT_FALSE(store->CheckSubChunk(-1)); + + std::vector<SBAddFullHash> pending_adds; + std::vector<SBAddPrefix> add_prefixes_result; + std::vector<SBAddFullHash> add_full_hashes_result; + + EXPECT_TRUE(store->FinishUpdate(pending_adds, + &add_prefixes_result, + &add_full_hashes_result)); + EXPECT_TRUE(add_prefixes_result.empty()); + EXPECT_TRUE(add_full_hashes_result.empty()); +} + +void SafeBrowsingStoreTestStorePrefix(SafeBrowsingStore* store) { + EXPECT_TRUE(store->BeginUpdate()); + + const base::Time now = base::Time::Now(); + + EXPECT_TRUE(store->BeginChunk()); + store->SetAddChunk(kAddChunk1); + EXPECT_TRUE(store->CheckAddChunk(kAddChunk1)); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash1.prefix)); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash2.prefix)); + EXPECT_TRUE(store->WriteAddHash(kAddChunk1, kHash2.prefix, now, kHash2)); + + store->SetSubChunk(kSubChunk1); + EXPECT_TRUE(store->CheckSubChunk(kSubChunk1)); + EXPECT_TRUE(store->WriteSubPrefix(kSubChunk1, kAddChunk3, kHash3.prefix)); + EXPECT_TRUE(store->WriteSubHash(kSubChunk1, + kAddChunk3, kHash3.prefix, kHash3)); + EXPECT_TRUE(store->FinishChunk()); + + // Chunk numbers shouldn't leak over. + EXPECT_FALSE(store->CheckAddChunk(kSubChunk1)); + EXPECT_FALSE(store->CheckAddChunk(kAddChunk3)); + EXPECT_FALSE(store->CheckSubChunk(kAddChunk1)); + + std::vector<int> chunks; + store->GetAddChunks(&chunks); + ASSERT_EQ(1U, chunks.size()); + EXPECT_EQ(kAddChunk1, chunks[0]); + + store->GetSubChunks(&chunks); + ASSERT_EQ(1U, chunks.size()); + EXPECT_EQ(kSubChunk1, chunks[0]); + + std::vector<SBAddFullHash> pending_adds; + std::vector<SBAddPrefix> add_prefixes_result; + std::vector<SBAddFullHash> add_full_hashes_result; + + EXPECT_TRUE(store->FinishUpdate(pending_adds, + &add_prefixes_result, + &add_full_hashes_result)); + + ASSERT_EQ(2U, add_prefixes_result.size()); + EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id); + EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix); + EXPECT_EQ(kAddChunk1, add_prefixes_result[1].chunk_id); + EXPECT_EQ(kHash2.prefix, add_prefixes_result[1].prefix); + + ASSERT_EQ(1U, add_full_hashes_result.size()); + EXPECT_EQ(kAddChunk1, add_full_hashes_result[0].add_prefix.chunk_id); + EXPECT_EQ(kHash2.prefix, add_full_hashes_result[0].add_prefix.prefix); + EXPECT_EQ(add_full_hashes_result[0].add_prefix.prefix, + add_full_hashes_result[0].full_hash.prefix); + // EXPECT_TRUE(add_full_hashes_result[0].received == now)? + EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received.ToTimeT()); + EXPECT_TRUE(SBFullHashEq(kHash2, add_full_hashes_result[0].full_hash)); + + add_prefixes_result.clear(); + add_full_hashes_result.clear(); + + EXPECT_TRUE(store->BeginUpdate()); + + // Still has the chunks expected in the next update. + store->GetAddChunks(&chunks); + ASSERT_EQ(1U, chunks.size()); + EXPECT_EQ(kAddChunk1, chunks[0]); + + store->GetSubChunks(&chunks); + ASSERT_EQ(1U, chunks.size()); + EXPECT_EQ(kSubChunk1, chunks[0]); + + EXPECT_TRUE(store->CheckAddChunk(kAddChunk1)); + EXPECT_TRUE(store->CheckSubChunk(kSubChunk1)); + + EXPECT_TRUE(store->FinishUpdate(pending_adds, + &add_prefixes_result, + &add_full_hashes_result)); + + // Still has the expected contents. + ASSERT_EQ(2U, add_prefixes_result.size()); + EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id); + EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix); + EXPECT_EQ(kAddChunk1, add_prefixes_result[1].chunk_id); + EXPECT_EQ(kHash2.prefix, add_prefixes_result[1].prefix); + + ASSERT_EQ(1U, add_full_hashes_result.size()); + EXPECT_EQ(kAddChunk1, add_full_hashes_result[0].add_prefix.chunk_id); + EXPECT_EQ(kHash2.prefix, add_full_hashes_result[0].add_prefix.prefix); + EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received.ToTimeT()); + EXPECT_TRUE(SBFullHashEq(kHash2, add_full_hashes_result[0].full_hash)); +} + +void SafeBrowsingStoreTestSubKnockout(SafeBrowsingStore* store) { + EXPECT_TRUE(store->BeginUpdate()); + + const base::Time now = base::Time::Now(); + + EXPECT_TRUE(store->BeginChunk()); + store->SetAddChunk(kAddChunk1); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash1.prefix)); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash2.prefix)); + EXPECT_TRUE(store->WriteAddHash(kAddChunk1, kHash2.prefix, now, kHash2)); + + store->SetSubChunk(kSubChunk1); + EXPECT_TRUE(store->WriteSubPrefix(kSubChunk1, kAddChunk3, kHash3.prefix)); + EXPECT_TRUE(store->WriteSubPrefix(kSubChunk1, kAddChunk1, kHash2.prefix)); + EXPECT_TRUE(store->FinishChunk()); + + std::vector<SBAddFullHash> pending_adds; + std::vector<SBAddPrefix> add_prefixes_result; + std::vector<SBAddFullHash> add_full_hashes_result; + + EXPECT_TRUE(store->FinishUpdate(pending_adds, + &add_prefixes_result, + &add_full_hashes_result)); + + // Knocked out the chunk expected. + ASSERT_EQ(1U, add_prefixes_result.size()); + EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id); + EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix); + EXPECT_TRUE(add_full_hashes_result.empty()); + + add_prefixes_result.clear(); + + EXPECT_TRUE(store->BeginUpdate()); + + // This add should be knocked out by an existing sub. + EXPECT_TRUE(store->BeginChunk()); + store->SetAddChunk(kAddChunk3); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk3, kHash3.prefix)); + EXPECT_TRUE(store->FinishChunk()); + + EXPECT_TRUE(store->FinishUpdate(pending_adds, + &add_prefixes_result, + &add_full_hashes_result)); + EXPECT_EQ(1U, add_prefixes_result.size()); + EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id); + EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix); + EXPECT_TRUE(add_full_hashes_result.empty()); + + add_prefixes_result.clear(); + + EXPECT_TRUE(store->BeginUpdate()); + + // But by here the sub should be gone, so it should stick this time. + EXPECT_TRUE(store->BeginChunk()); + store->SetAddChunk(kAddChunk3); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk3, kHash3.prefix)); + EXPECT_TRUE(store->FinishChunk()); + + EXPECT_TRUE(store->FinishUpdate(pending_adds, + &add_prefixes_result, + &add_full_hashes_result)); + ASSERT_EQ(2U, add_prefixes_result.size()); + EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id); + EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix); + EXPECT_EQ(kAddChunk3, add_prefixes_result[1].chunk_id); + EXPECT_EQ(kHash3.prefix, add_prefixes_result[1].prefix); + EXPECT_TRUE(add_full_hashes_result.empty()); +} + +void SafeBrowsingStoreTestDeleteChunks(SafeBrowsingStore* store) { + EXPECT_TRUE(store->BeginUpdate()); + + const base::Time now = base::Time::Now(); + + // A chunk which will be deleted. + EXPECT_FALSE(store->CheckAddChunk(kAddChunk1)); + store->SetAddChunk(kAddChunk1); + EXPECT_TRUE(store->BeginChunk()); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash1.prefix)); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash2.prefix)); + EXPECT_TRUE(store->WriteAddHash(kAddChunk1, kHash2.prefix, now, kHash2)); + EXPECT_TRUE(store->FinishChunk()); + + // Another which won't. + EXPECT_FALSE(store->CheckAddChunk(kAddChunk2)); + store->SetAddChunk(kAddChunk2); + EXPECT_TRUE(store->BeginChunk()); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk2, kHash3.prefix)); + EXPECT_TRUE(store->WriteAddHash(kAddChunk2, kHash3.prefix, now, kHash3)); + EXPECT_TRUE(store->FinishChunk()); + + // A sub chunk to delete. + EXPECT_FALSE(store->CheckSubChunk(kSubChunk1)); + store->SetSubChunk(kSubChunk1); + EXPECT_TRUE(store->BeginChunk()); + EXPECT_TRUE(store->WriteSubPrefix(kSubChunk1, kAddChunk3, kHash4.prefix)); + EXPECT_TRUE(store->WriteSubHash(kSubChunk1, + kAddChunk3, kHash4.prefix, kHash4)); + EXPECT_TRUE(store->FinishChunk()); + + // A sub chunk to keep. + EXPECT_FALSE(store->CheckSubChunk(kSubChunk2)); + store->SetSubChunk(kSubChunk2); + EXPECT_TRUE(store->BeginChunk()); + EXPECT_TRUE(store->WriteSubPrefix(kSubChunk2, kAddChunk4, kHash5.prefix)); + EXPECT_TRUE(store->WriteSubHash(kSubChunk2, + kAddChunk4, kHash5.prefix, kHash5)); + EXPECT_TRUE(store->FinishChunk()); + + store->DeleteAddChunk(kAddChunk1); + store->DeleteSubChunk(kSubChunk1); + + // Not actually deleted until finish. + EXPECT_TRUE(store->CheckAddChunk(kAddChunk1)); + EXPECT_TRUE(store->CheckAddChunk(kAddChunk2)); + EXPECT_TRUE(store->CheckSubChunk(kSubChunk1)); + EXPECT_TRUE(store->CheckSubChunk(kSubChunk2)); + + std::vector<SBAddFullHash> pending_adds; + std::vector<SBAddPrefix> add_prefixes_result; + std::vector<SBAddFullHash> add_full_hashes_result; + + EXPECT_TRUE(store->FinishUpdate(pending_adds, + &add_prefixes_result, + &add_full_hashes_result)); + + EXPECT_EQ(1U, add_prefixes_result.size()); + EXPECT_EQ(kAddChunk2, add_prefixes_result[0].chunk_id); + EXPECT_EQ(kHash3.prefix, add_prefixes_result[0].prefix); + EXPECT_EQ(1U, add_full_hashes_result.size()); + EXPECT_EQ(kAddChunk2, add_full_hashes_result[0].add_prefix.chunk_id); + EXPECT_EQ(kHash3.prefix, add_full_hashes_result[0].add_prefix.prefix); + EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received.ToTimeT()); + EXPECT_TRUE(SBFullHashEq(kHash3, add_full_hashes_result[0].full_hash)); + + // Expected chunks are there in another update. + EXPECT_TRUE(store->BeginUpdate()); + EXPECT_FALSE(store->CheckAddChunk(kAddChunk1)); + EXPECT_TRUE(store->CheckAddChunk(kAddChunk2)); + EXPECT_FALSE(store->CheckSubChunk(kSubChunk1)); + EXPECT_TRUE(store->CheckSubChunk(kSubChunk2)); + + // Delete them, too. + store->DeleteAddChunk(kAddChunk2); + store->DeleteSubChunk(kSubChunk2); + + add_prefixes_result.clear(); + add_full_hashes_result.clear(); + EXPECT_TRUE(store->FinishUpdate(pending_adds, + &add_prefixes_result, + &add_full_hashes_result)); + + // Expect no more chunks. + EXPECT_TRUE(store->BeginUpdate()); + EXPECT_FALSE(store->CheckAddChunk(kAddChunk1)); + EXPECT_FALSE(store->CheckAddChunk(kAddChunk2)); + EXPECT_FALSE(store->CheckSubChunk(kSubChunk1)); + EXPECT_FALSE(store->CheckSubChunk(kSubChunk2)); + add_prefixes_result.clear(); + add_full_hashes_result.clear(); + EXPECT_TRUE(store->FinishUpdate(pending_adds, + &add_prefixes_result, + &add_full_hashes_result)); + EXPECT_TRUE(add_prefixes_result.empty()); + EXPECT_TRUE(add_full_hashes_result.empty()); +} + +void SafeBrowsingStoreTestDelete(SafeBrowsingStore* store, + const FilePath& filename) { + EXPECT_TRUE(store->BeginUpdate()); + + EXPECT_TRUE(store->BeginChunk()); + store->SetAddChunk(kAddChunk1); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash1.prefix)); + EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash2.prefix)); + + store->SetSubChunk(kSubChunk1); + EXPECT_TRUE(store->WriteSubPrefix(kSubChunk1, kAddChunk3, kHash3.prefix)); + EXPECT_TRUE(store->FinishChunk()); + + std::vector<SBAddFullHash> pending_adds; + std::vector<SBAddPrefix> add_prefixes_result; + std::vector<SBAddFullHash> add_full_hashes_result; + + EXPECT_TRUE(store->FinishUpdate(pending_adds, + &add_prefixes_result, + &add_full_hashes_result)); + + EXPECT_TRUE(file_util::PathExists(filename)); + + EXPECT_TRUE(store->Delete()); + EXPECT_FALSE(file_util::PathExists(filename)); +} diff --git a/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h b/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h new file mode 100644 index 0000000..ea61b8f --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h @@ -0,0 +1,68 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_UNITTEST_HELPER_H_ +#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_UNITTEST_HELPER_H_ + +#include "chrome/browser/safe_browsing/safe_browsing_store.h" + +#include "base/sha2.h" +#include "testing/gtest/include/gtest/gtest.h" + +// Helper code for testing that a SafeBrowsingStore implementation +// works to spec. + +// Helper to make it easy to initialize SBFullHash constants. +inline const SBFullHash SBFullHashFromString(const char* str) { + SBFullHash h; + base::SHA256HashString(str, &h.full_hash, sizeof(h.full_hash)); + return h; +} + +// TODO(shess): There's an == operator defined in +// safe_browsing_utils.h, but using it gives me the heebie-jeebies. +inline bool SBFullHashEq(const SBFullHash& a, const SBFullHash& b) { + return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)); +} + +// Test that the empty store looks empty. +void SafeBrowsingStoreTestEmpty(SafeBrowsingStore* store); + +// Write some prefix data to the store and verify that it looks like +// it is still there after the transaction completes. +void SafeBrowsingStoreTestStorePrefix(SafeBrowsingStore* store); + +// Test that subs knockout adds. +void SafeBrowsingStoreTestSubKnockout(SafeBrowsingStore* store); + +// Test that deletes delete the chunk's data. +void SafeBrowsingStoreTestDeleteChunks(SafeBrowsingStore* store); + +// Test that deleting the store deletes the store. +void SafeBrowsingStoreTestDelete(SafeBrowsingStore* store, + const FilePath& filename); + +// Wrap all the tests up for implementation subclasses. +// |test_fixture| is the class that would be passed to TEST_F(), +// |instance_name| is the name of the SafeBrowsingStore instance +// within the class, as a pointer, and |filename| is that store's +// filename, for the Delete() test. +#define TEST_STORE(test_fixture, instance_name, filename) \ + TEST_F(test_fixture, Empty) { \ + SafeBrowsingStoreTestEmpty(instance_name); \ + } \ + TEST_F(test_fixture, StorePrefix) { \ + SafeBrowsingStoreTestStorePrefix(instance_name); \ + } \ + TEST_F(test_fixture, SubKnockout) { \ + SafeBrowsingStoreTestSubKnockout(instance_name); \ + } \ + TEST_F(test_fixture, DeleteChunks) { \ + SafeBrowsingStoreTestDeleteChunks(instance_name); \ + } \ + TEST_F(test_fixture, Delete) { \ + SafeBrowsingStoreTestDelete(instance_name, filename); \ + } + +#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_UNITTEST_HELPER_H_ |