diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-20 06:29:28 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-20 06:29:28 +0000 |
commit | 5332aa894ad01d22aeb01107db6d82ccee648604 (patch) | |
tree | 2c5186162c3a0cff848751dad8f0dba472ef5b21 /chrome/browser/safe_browsing/safe_browsing_store.h | |
parent | 86fdd8723d9f3e185eb946781ed160d4ec122fff (diff) | |
download | chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.zip chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.tar.gz chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.tar.bz2 |
SafeBrowsingStore storage abstraction for SafeBrowsing database.
First bit of refactoring safe-browsing to use a flat file format.
SafeBrowsingStore implements just what is needed for
SafeBrowsingDatabase using straight-forward read/modify/write code.
There will be a follow-on change to layer in on-the-fly format
migration and integrate with SafeBrowsingDatabase. This CL only adds
new classes and tests for same.
BUG=none
TEST=none
Review URL: http://codereview.chromium.org/545053
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@36615 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing/safe_browsing_store.h')
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_store.h | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_store.h b/chrome/browser/safe_browsing/safe_browsing_store.h new file mode 100644 index 0000000..0d13e88 --- /dev/null +++ b/chrome/browser/safe_browsing/safe_browsing_store.h @@ -0,0 +1,211 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ +#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ + +#include <set> +#include <vector> + +#include "base/basictypes.h" +#include "base/file_path.h" +#include "base/task.h" +#include "base/time.h" +#include "chrome/browser/safe_browsing/safe_browsing_util.h" + +// SafeBrowsingStore provides a storage abstraction for the +// safe-browsing data used to build the bloom filter. The items +// stored are: +// The set of add and sub chunks seen. +// List of SBAddPrefix (chunk_id and SBPrefix). +// List of SBSubPrefix (chunk_id and the target SBAddPrefix). +// List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash). +// List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash). +// +// The store is geared towards updating the data, not runtime access +// to the data (that is handled by SafeBrowsingDatabase). Updates are +// handled similar to a SQL transaction cycle, with the new data being +// returned from FinishUpdate() (the COMMIT). Data is not persistent +// until FinishUpdate() returns successfully. +// +// FinishUpdate() also handles dropping items who's chunk has been +// deleted, and netting out the add/sub lists (when a sub matches an +// add, both are dropped). + +// GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so +// that these items can be generically compared with each other by +// SBAddPrefixLess() and SBAddPrefixHashLess(). + +struct SBAddPrefix { + int32 chunk_id; + SBPrefix prefix; + + SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {} + + int32 GetAddChunkId() const { return chunk_id; } + SBPrefix GetAddPrefix() const { return prefix; } +}; + +struct SBSubPrefix { + int32 chunk_id; + SBAddPrefix add_prefix; + + SBSubPrefix(int32 id, int32 add_id, int prefix) + : chunk_id(id), add_prefix(add_id, prefix) {} + + int32 GetAddChunkId() const { return add_prefix.chunk_id; } + SBPrefix GetAddPrefix() const { return add_prefix.prefix; } +}; + +// TODO(shess): The full_hash includes the prefix, so the prefix could +// be dropped. But SBAddPrefix is convenient for comparing across +// different structs, and there aren't many full hashes. Hmm. +struct SBAddFullHash { + SBAddPrefix add_prefix; + base::Time received; + SBFullHash full_hash; + + SBAddFullHash(int32 id, SBPrefix p, base::Time r, SBFullHash h) + : add_prefix(id, p), received(r), full_hash(h) {} + + int32 GetAddChunkId() const { return add_prefix.chunk_id; } + SBPrefix GetAddPrefix() const { return add_prefix.prefix; } +}; + +struct SBSubFullHash { + int32 chunk_id; + SBAddPrefix add_prefix; + SBFullHash full_hash; + + SBSubFullHash(int32 id, int32 add_id, SBPrefix p, SBFullHash h) + : chunk_id(id), add_prefix(add_id, p), full_hash(h) {} + + int32 GetAddChunkId() const { return add_prefix.chunk_id; } + SBPrefix GetAddPrefix() const { return add_prefix.prefix; } +}; + +// Determine less-than based on add chunk and prefix. +template <class T, class U> +bool SBAddPrefixLess(const T& a, const U& b) { + if (a.GetAddChunkId() != b.GetAddChunkId()) + return a.GetAddChunkId() < b.GetAddChunkId(); + + return a.GetAddPrefix() < b.GetAddPrefix(); +} + +// Determine less-than based on add chunk, prefix, and full hash. +// Prefix can compare differently than hash due to byte ordering, +// so it must take precedence. +template <class T, class U> +bool SBAddPrefixHashLess(const T& a, const U& b) { + if (SBAddPrefixLess(a, b)) + return true; + + if (SBAddPrefixLess(b, a)) + return false; + + return memcmp(a.full_hash.full_hash, b.full_hash.full_hash, + sizeof(a.full_hash.full_hash)) < 0; +} + +// Process the lists for subs which knock out adds. For any item in +// |sub_prefixes| which has a match in |add_prefixes|, knock out the +// matched items from all vectors. +// +// TODO(shess): Since the prefixes are uniformly-distributed hashes, +// there aren't many ways to organize the inputs for efficient +// processing. For this reason, the vectors are sorted and processed +// in parallel. At this time this code does the sorting internally, +// but it might make sense to make sorting an API requirement so that +// the storage can optimize for it. +// +// TODO(shess): The original code did not process |sub_full_hashes| +// for matches in |add_full_hashes|, so this code doesn't, either. I +// think this is probably a bug. +void SBProcessSubs(std::vector<SBAddPrefix>* add_prefixes, + std::vector<SBSubPrefix>* sub_prefixes, + std::vector<SBAddFullHash>* add_full_hashes, + std::vector<SBSubFullHash>* sub_full_hashes); + +// TODO(shess): This uses int32 rather than int because it's writing +// specifically-sized items to files. SBPrefix should likewise be +// explicitly sized. + +// Abstract interface for storing data. +class SafeBrowsingStore { + public: + SafeBrowsingStore() {} + virtual ~SafeBrowsingStore() {} + + // Sets up the information for later use, but does not necessarily + // check whether the underlying file exists, or is valid. If + // |curruption_callback| is non-NULL it will be called if corruption + // is detected, which could happen as part of any call other than + // Delete(). The appropriate action is to use Delete() to clear the + // store. + virtual void Init(const FilePath& filename, + Callback0::Type* corruption_callback) = 0; + + // Deletes the files which back the store, returning true if + // successful. + virtual bool Delete() = 0; + + // Start an update. None of the following methods should be called + // unless this returns true. If this returns true, the update + // should be terminated by FinishUpdate() or CancelUpdate(). + virtual bool BeginUpdate() = 0; + + // Start a chunk of data. None of the methods through FinishChunk() + // should be called unless this returns true. + // TODO(shess): Would it make sense for this to accept |chunk_id|? + // Possibly not, because of possible confusion between sub_chunk_id + // and add_chunk_id. + virtual bool BeginChunk() = 0; + + virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0; + virtual bool WriteAddHash(int32 chunk_id, SBPrefix prefix, + base::Time receive_time, SBFullHash full_hash) = 0; + virtual bool WriteSubPrefix(int32 chunk_id, + int32 add_chunk_id, SBPrefix prefix) = 0; + virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id, + SBPrefix prefix, SBFullHash full_hash) = 0; + + // Collect the chunk data and preferrably store it on disk to + // release memory. Shoul not modify the data in-place. + virtual bool FinishChunk() = 0; + + // Track the chunks which have been seen. + virtual void SetAddChunk(int32 chunk_id) = 0; + virtual bool CheckAddChunk(int32 chunk_id) = 0; + virtual void GetAddChunks(std::vector<int32>* out) = 0; + virtual void SetSubChunk(int32 chunk_id) = 0; + virtual bool CheckSubChunk(int32 chunk_id) = 0; + virtual void GetSubChunks(std::vector<int32>* out) = 0; + + // Delete the indicated chunk_id. The chunk will continue to be + // visible until the end of the transaction. + virtual void DeleteAddChunk(int32 chunk_id) = 0; + virtual void DeleteSubChunk(int32 chunk_id) = 0; + + // Pass the collected chunks through SBPRocessSubs() and commit to + // permanent storage. The resulting add prefixes and hashes will be + // stored in |add_prefixes_result| and |add_full_hashes_result|. + // |pending_adds| is the set of full hashes which have been received + // since the previous update, and is provided as a convenience + // (could be written via WriteAddHash(), but that would flush the + // chunk to disk). + virtual bool FinishUpdate( + const std::vector<SBAddFullHash>& pending_adds, + std::vector<SBAddPrefix>* add_prefixes_result, + std::vector<SBAddFullHash>* add_full_hashes_result) = 0; + + // Cancel the update in process and remove any temporary disk + // storage, leaving the original data unmodified. + virtual bool CancelUpdate() = 0; + + private: + DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore); +}; + +#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ |