summaryrefslogtreecommitdiffstats
path: root/chrome/browser
diff options
context:
space:
mode:
authorshess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-01-20 06:29:28 +0000
committershess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-01-20 06:29:28 +0000
commit5332aa894ad01d22aeb01107db6d82ccee648604 (patch)
tree2c5186162c3a0cff848751dad8f0dba472ef5b21 /chrome/browser
parent86fdd8723d9f3e185eb946781ed160d4ec122fff (diff)
downloadchromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.zip
chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.tar.gz
chromium_src-5332aa894ad01d22aeb01107db6d82ccee648604.tar.bz2
SafeBrowsingStore storage abstraction for SafeBrowsing database.
First bit of refactoring safe-browsing to use a flat file format. SafeBrowsingStore implements just what is needed for SafeBrowsingDatabase using straight-forward read/modify/write code. There will be a follow-on change to layer in on-the-fly format migration and integrate with SafeBrowsingDatabase. This CL only adds new classes and tests for same. BUG=none TEST=none Review URL: http://codereview.chromium.org/545053 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@36615 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser')
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store.cc152
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store.h211
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_file.cc553
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_file.h292
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc56
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_sqlite.cc714
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_sqlite.h168
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_sqlite_unittest.cc53
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_unittest.cc188
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.cc340
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h68
11 files changed, 2795 insertions, 0 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_store.cc b/chrome/browser/safe_browsing/safe_browsing_store.cc
new file mode 100644
index 0000000..21fd37f
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store.cc
@@ -0,0 +1,152 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/safe_browsing/safe_browsing_store.h"
+
+namespace {
+
+// Find items matching between |subs| and |adds|, and remove them,
+// recording the item from |adds| in |adds_removed|. To minimize
+// copies, the inputs are processing in parallel, so |subs| and |adds|
+// should be compatibly ordered (either by SBAddPrefixLess or
+// SBAddPrefixHashLess).
+//
+// |predAS| provides add < sub, |predSA| provides sub < add, for the
+// tightest compare appropriate (see calls in SBProcessSubs).
+template <class S, class A, typename PredAS, typename PredSA>
+void KnockoutSubs(std::vector<S>* subs,
+ std::vector<A>* adds,
+ PredAS predAS, PredSA predSA,
+ std::vector<A>* adds_removed) {
+ // Keep a pair of output iterators for writing kept items. Due to
+ // deletions, these may lag the main iterators. Using erase() on
+ // individual items would result in O(N^2) copies. Using std::list
+ // would work around that, at double or triple the memory cost.
+ typename std::vector<A>::iterator add_out = adds->begin();
+ typename std::vector<S>::iterator sub_out = subs->begin();
+
+ // Current location in vectors.
+ // TODO(shess): I want these to be const_iterator, but then
+ // std::copy() gets confused. Could snag a const_iterator add_end,
+ // or write an inline std::copy(), but it seems like I'm doing
+ // something wrong.
+ typename std::vector<A>::iterator add_iter = adds->begin();
+ typename std::vector<S>::iterator sub_iter = subs->begin();
+
+ while (add_iter != adds->end() && sub_iter != subs->end()) {
+ // If |*sub_iter| < |*add_iter|, retain the sub.
+ if (predSA(*sub_iter, *add_iter)) {
+ *sub_out = *sub_iter;
+ ++sub_out;
+ ++sub_iter;
+
+ // If |*add_iter| < |*sub_iter|, retain the add.
+ } else if (predAS(*add_iter, *sub_iter)) {
+ *add_out = *add_iter;
+ ++add_out;
+ ++add_iter;
+
+ // Record equal items and drop them.
+ } else {
+ adds_removed->push_back(*add_iter);
+ ++add_iter;
+ ++sub_iter;
+ }
+ }
+
+ // Erase any leftover gap.
+ adds->erase(add_out, add_iter);
+ subs->erase(sub_out, sub_iter);
+}
+
+// Remove items in |removes| from |full_hashes|. |full_hashes| and
+// |removes| should be ordered by SBAddPrefix component.
+template <class T>
+void RemoveMatchingPrefixes(const std::vector<SBAddPrefix>& removes,
+ std::vector<T>* full_hashes) {
+ // This is basically an inline of std::set_difference().
+ // Unfortunately, that algorithm requires that the two iterator
+ // pairs use the same value types.
+
+ // Where to store kept items.
+ typename std::vector<T>::iterator out = full_hashes->begin();
+
+ typename std::vector<T>::iterator hash_iter = full_hashes->begin();
+ std::vector<SBAddPrefix>::const_iterator remove_iter = removes.begin();
+
+ while (hash_iter != full_hashes->end() && remove_iter != removes.end()) {
+ // Keep items less than |*remove_iter|.
+ if (SBAddPrefixLess(*hash_iter, *remove_iter)) {
+ *out = *hash_iter;
+ ++out;
+ ++hash_iter;
+
+ // No hit for |*remove_iter|, bump it forward.
+ } else if (SBAddPrefixLess(*remove_iter, *hash_iter)) {
+ ++remove_iter;
+
+ // Drop equal items, there may be multiple hits.
+ } else {
+ do {
+ ++hash_iter;
+ } while (hash_iter != full_hashes->end() &&
+ !SBAddPrefixLess(*remove_iter, *hash_iter));
+ ++remove_iter;
+ }
+ }
+
+ // Erase any leftover gap.
+ full_hashes->erase(out, hash_iter);
+}
+
+} // namespace
+
+void SBProcessSubs(std::vector<SBAddPrefix>* add_prefixes,
+ std::vector<SBSubPrefix>* sub_prefixes,
+ std::vector<SBAddFullHash>* add_full_hashes,
+ std::vector<SBSubFullHash>* sub_full_hashes) {
+ // It is possible to structure templates and template
+ // specializations such that the following calls work without having
+ // to qualify things. It becomes very arbitrary, though, and less
+ // clear how things are working.
+
+ // Sort the inputs by the SBAddPrefix bits.
+ std::sort(add_prefixes->begin(), add_prefixes->end(),
+ SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+ std::sort(sub_prefixes->begin(), sub_prefixes->end(),
+ SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+ std::sort(add_full_hashes->begin(), add_full_hashes->end(),
+ SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+ std::sort(sub_full_hashes->begin(), sub_full_hashes->end(),
+ SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+
+ // Factor out the prefix subs.
+ std::vector<SBAddPrefix> removed_adds;
+ KnockoutSubs(sub_prefixes, add_prefixes,
+ SBAddPrefixLess<SBAddPrefix,SBSubPrefix>,
+ SBAddPrefixLess<SBSubPrefix,SBAddPrefix>,
+ &removed_adds);
+
+ // Remove the full-hashes corrosponding to the adds which
+ // KnockoutSubs() removed. Processing these w/in KnockoutSubs()
+ // would make the code more complicated, and they are very small
+ // relative to the prefix lists so the gain would be modest.
+ RemoveMatchingPrefixes(removed_adds, add_full_hashes);
+ RemoveMatchingPrefixes(removed_adds, sub_full_hashes);
+
+ // TODO(shess): AFAICT this pass is not done on the trunk. I
+ // believe that's a bug, but it may not matter because full-hash
+ // subs almost never happen (I think you'd need multiple collisions
+ // where one of the sites stopped being flagged?). Enable this once
+ // everything is in. [if(0) instead of #ifdef 0 to make sure it
+ // compiles.]
+ if (0) {
+ // Factor out the full-hash subs.
+ std::vector<SBAddFullHash> removed_full_adds;
+ KnockoutSubs(sub_full_hashes, add_full_hashes,
+ SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>,
+ SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>,
+ &removed_full_adds);
+ }
+}
diff --git a/chrome/browser/safe_browsing/safe_browsing_store.h b/chrome/browser/safe_browsing/safe_browsing_store.h
new file mode 100644
index 0000000..0d13e88
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store.h
@@ -0,0 +1,211 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
+#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
+
+#include <set>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/file_path.h"
+#include "base/task.h"
+#include "base/time.h"
+#include "chrome/browser/safe_browsing/safe_browsing_util.h"
+
+// SafeBrowsingStore provides a storage abstraction for the
+// safe-browsing data used to build the bloom filter. The items
+// stored are:
+// The set of add and sub chunks seen.
+// List of SBAddPrefix (chunk_id and SBPrefix).
+// List of SBSubPrefix (chunk_id and the target SBAddPrefix).
+// List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash).
+// List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash).
+//
+// The store is geared towards updating the data, not runtime access
+// to the data (that is handled by SafeBrowsingDatabase). Updates are
+// handled similar to a SQL transaction cycle, with the new data being
+// returned from FinishUpdate() (the COMMIT). Data is not persistent
+// until FinishUpdate() returns successfully.
+//
+// FinishUpdate() also handles dropping items who's chunk has been
+// deleted, and netting out the add/sub lists (when a sub matches an
+// add, both are dropped).
+
+// GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so
+// that these items can be generically compared with each other by
+// SBAddPrefixLess() and SBAddPrefixHashLess().
+
+struct SBAddPrefix {
+ int32 chunk_id;
+ SBPrefix prefix;
+
+ SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {}
+
+ int32 GetAddChunkId() const { return chunk_id; }
+ SBPrefix GetAddPrefix() const { return prefix; }
+};
+
+struct SBSubPrefix {
+ int32 chunk_id;
+ SBAddPrefix add_prefix;
+
+ SBSubPrefix(int32 id, int32 add_id, int prefix)
+ : chunk_id(id), add_prefix(add_id, prefix) {}
+
+ int32 GetAddChunkId() const { return add_prefix.chunk_id; }
+ SBPrefix GetAddPrefix() const { return add_prefix.prefix; }
+};
+
+// TODO(shess): The full_hash includes the prefix, so the prefix could
+// be dropped. But SBAddPrefix is convenient for comparing across
+// different structs, and there aren't many full hashes. Hmm.
+struct SBAddFullHash {
+ SBAddPrefix add_prefix;
+ base::Time received;
+ SBFullHash full_hash;
+
+ SBAddFullHash(int32 id, SBPrefix p, base::Time r, SBFullHash h)
+ : add_prefix(id, p), received(r), full_hash(h) {}
+
+ int32 GetAddChunkId() const { return add_prefix.chunk_id; }
+ SBPrefix GetAddPrefix() const { return add_prefix.prefix; }
+};
+
+struct SBSubFullHash {
+ int32 chunk_id;
+ SBAddPrefix add_prefix;
+ SBFullHash full_hash;
+
+ SBSubFullHash(int32 id, int32 add_id, SBPrefix p, SBFullHash h)
+ : chunk_id(id), add_prefix(add_id, p), full_hash(h) {}
+
+ int32 GetAddChunkId() const { return add_prefix.chunk_id; }
+ SBPrefix GetAddPrefix() const { return add_prefix.prefix; }
+};
+
+// Determine less-than based on add chunk and prefix.
+template <class T, class U>
+bool SBAddPrefixLess(const T& a, const U& b) {
+ if (a.GetAddChunkId() != b.GetAddChunkId())
+ return a.GetAddChunkId() < b.GetAddChunkId();
+
+ return a.GetAddPrefix() < b.GetAddPrefix();
+}
+
+// Determine less-than based on add chunk, prefix, and full hash.
+// Prefix can compare differently than hash due to byte ordering,
+// so it must take precedence.
+template <class T, class U>
+bool SBAddPrefixHashLess(const T& a, const U& b) {
+ if (SBAddPrefixLess(a, b))
+ return true;
+
+ if (SBAddPrefixLess(b, a))
+ return false;
+
+ return memcmp(a.full_hash.full_hash, b.full_hash.full_hash,
+ sizeof(a.full_hash.full_hash)) < 0;
+}
+
+// Process the lists for subs which knock out adds. For any item in
+// |sub_prefixes| which has a match in |add_prefixes|, knock out the
+// matched items from all vectors.
+//
+// TODO(shess): Since the prefixes are uniformly-distributed hashes,
+// there aren't many ways to organize the inputs for efficient
+// processing. For this reason, the vectors are sorted and processed
+// in parallel. At this time this code does the sorting internally,
+// but it might make sense to make sorting an API requirement so that
+// the storage can optimize for it.
+//
+// TODO(shess): The original code did not process |sub_full_hashes|
+// for matches in |add_full_hashes|, so this code doesn't, either. I
+// think this is probably a bug.
+void SBProcessSubs(std::vector<SBAddPrefix>* add_prefixes,
+ std::vector<SBSubPrefix>* sub_prefixes,
+ std::vector<SBAddFullHash>* add_full_hashes,
+ std::vector<SBSubFullHash>* sub_full_hashes);
+
+// TODO(shess): This uses int32 rather than int because it's writing
+// specifically-sized items to files. SBPrefix should likewise be
+// explicitly sized.
+
+// Abstract interface for storing data.
+class SafeBrowsingStore {
+ public:
+ SafeBrowsingStore() {}
+ virtual ~SafeBrowsingStore() {}
+
+ // Sets up the information for later use, but does not necessarily
+ // check whether the underlying file exists, or is valid. If
+ // |curruption_callback| is non-NULL it will be called if corruption
+ // is detected, which could happen as part of any call other than
+ // Delete(). The appropriate action is to use Delete() to clear the
+ // store.
+ virtual void Init(const FilePath& filename,
+ Callback0::Type* corruption_callback) = 0;
+
+ // Deletes the files which back the store, returning true if
+ // successful.
+ virtual bool Delete() = 0;
+
+ // Start an update. None of the following methods should be called
+ // unless this returns true. If this returns true, the update
+ // should be terminated by FinishUpdate() or CancelUpdate().
+ virtual bool BeginUpdate() = 0;
+
+ // Start a chunk of data. None of the methods through FinishChunk()
+ // should be called unless this returns true.
+ // TODO(shess): Would it make sense for this to accept |chunk_id|?
+ // Possibly not, because of possible confusion between sub_chunk_id
+ // and add_chunk_id.
+ virtual bool BeginChunk() = 0;
+
+ virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0;
+ virtual bool WriteAddHash(int32 chunk_id, SBPrefix prefix,
+ base::Time receive_time, SBFullHash full_hash) = 0;
+ virtual bool WriteSubPrefix(int32 chunk_id,
+ int32 add_chunk_id, SBPrefix prefix) = 0;
+ virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id,
+ SBPrefix prefix, SBFullHash full_hash) = 0;
+
+ // Collect the chunk data and preferrably store it on disk to
+ // release memory. Shoul not modify the data in-place.
+ virtual bool FinishChunk() = 0;
+
+ // Track the chunks which have been seen.
+ virtual void SetAddChunk(int32 chunk_id) = 0;
+ virtual bool CheckAddChunk(int32 chunk_id) = 0;
+ virtual void GetAddChunks(std::vector<int32>* out) = 0;
+ virtual void SetSubChunk(int32 chunk_id) = 0;
+ virtual bool CheckSubChunk(int32 chunk_id) = 0;
+ virtual void GetSubChunks(std::vector<int32>* out) = 0;
+
+ // Delete the indicated chunk_id. The chunk will continue to be
+ // visible until the end of the transaction.
+ virtual void DeleteAddChunk(int32 chunk_id) = 0;
+ virtual void DeleteSubChunk(int32 chunk_id) = 0;
+
+ // Pass the collected chunks through SBPRocessSubs() and commit to
+ // permanent storage. The resulting add prefixes and hashes will be
+ // stored in |add_prefixes_result| and |add_full_hashes_result|.
+ // |pending_adds| is the set of full hashes which have been received
+ // since the previous update, and is provided as a convenience
+ // (could be written via WriteAddHash(), but that would flush the
+ // chunk to disk).
+ virtual bool FinishUpdate(
+ const std::vector<SBAddFullHash>& pending_adds,
+ std::vector<SBAddPrefix>* add_prefixes_result,
+ std::vector<SBAddFullHash>* add_full_hashes_result) = 0;
+
+ // Cancel the update in process and remove any temporary disk
+ // storage, leaving the original data unmodified.
+ virtual bool CancelUpdate() = 0;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore);
+};
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.cc b/chrome/browser/safe_browsing/safe_browsing_store_file.cc
new file mode 100644
index 0000000..9fd1bd7
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store_file.cc
@@ -0,0 +1,553 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
+
+namespace {
+
+// NOTE(shess): kFileMagic should not be a byte-wise palindrome, so
+// that byte-order changes force corruption.
+const int32 kFileMagic = 0x600D71FE;
+const int32 kFileVersion = 7; // SQLite storage was 6...
+const size_t kFileHeaderSize = 8 * sizeof(int32);
+
+bool ReadInt32(FILE* fp, int32* value) {
+ DCHECK(value);
+ const size_t ret = fread(value, sizeof(*value), 1, fp);
+ return ret == 1;
+}
+
+bool WriteInt32(FILE* fp, int32 value) {
+ const size_t ret = fwrite(&value, sizeof(value), 1, fp);
+ return ret == 1;
+}
+
+bool ReadTime(FILE* fp, base::Time* value) {
+ DCHECK(value);
+
+ int64 time_t;
+ const size_t ret = fread(&time_t, sizeof(time_t), 1, fp);
+ if (ret != 1)
+ return false;
+ *value = base::Time::FromTimeT(time_t);
+ return true;
+}
+
+bool WriteTime(FILE* fp, const base::Time& value) {
+ const int64 time_t = value.ToTimeT();
+ const size_t ret = fwrite(&time_t, sizeof(time_t), 1, fp);
+ return ret == 1;
+}
+
+bool ReadHash(FILE* fp, SBFullHash* value) {
+ DCHECK(value);
+ const size_t ret = fread(&value->full_hash, sizeof(value->full_hash),
+ 1, fp);
+ return ret == 1;
+}
+
+bool WriteHash(FILE* fp, SBFullHash value) {
+ const size_t ret = fwrite(&value.full_hash, sizeof(value.full_hash),
+ 1, fp);
+ return ret == 1;
+}
+
+bool FileSeek(FILE* fp, size_t offset) {
+ int rv = fseek(fp, offset, SEEK_SET);
+ DCHECK_EQ(rv, 0);
+ return rv == 0;
+}
+
+// Delete the chunks in |deleted| from |chunks|.
+void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
+ std::set<int32>* chunks) {
+ for (std::set<int32>::iterator iter = chunks->begin();
+ iter != chunks->end();) {
+ std::set<int32>::iterator prev = iter++;
+ if (deleted.count(*prev) > 0)
+ chunks->erase(prev);
+ }
+}
+
+} // namespace
+
+SafeBrowsingStoreFile::SafeBrowsingStoreFile()
+ : chunks_written_(0),
+ file_(NULL) {
+}
+SafeBrowsingStoreFile::~SafeBrowsingStoreFile() {
+ Close();
+}
+
+bool SafeBrowsingStoreFile::Delete() {
+ // The database should not be open at this point. But, just in
+ // case, close everything before deleting.
+ if (!Close()) {
+ NOTREACHED();
+ return false;
+ }
+
+ if (!file_util::Delete(filename_, false) &&
+ file_util::PathExists(filename_)) {
+ NOTREACHED();
+ return false;
+ }
+
+ const FilePath new_filename = TemporaryFileForFilename(filename_);
+ if (!file_util::Delete(new_filename, false) &&
+ file_util::PathExists(new_filename)) {
+ NOTREACHED();
+ return false;
+ }
+
+ return true;
+}
+
+void SafeBrowsingStoreFile::Init(const FilePath& filename,
+ Callback0::Type* corruption_callback) {
+ filename_ = filename;
+ corruption_callback_.reset(corruption_callback);
+}
+
+bool SafeBrowsingStoreFile::OnCorruptDatabase() {
+ if (corruption_callback_.get())
+ corruption_callback_->Run();
+
+ // Return false as a convenience to callers.
+ return false;
+}
+
+bool SafeBrowsingStoreFile::Close() {
+ ClearUpdateBuffers();
+
+ // Make sure the files are closed.
+ file_.reset();
+ new_file_.reset();
+ return true;
+}
+
+bool SafeBrowsingStoreFile::ReadChunksToSet(FILE* fp, std::set<int32>* chunks,
+ int count) {
+ DCHECK(fp);
+
+ for (int i = 0; i < count; ++i) {
+ int32 chunk_id;
+ if (!ReadInt32(fp, &chunk_id))
+ return false;
+ chunks->insert(chunk_id);
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreFile::WriteChunksFromSet(const std::set<int32>& chunks) {
+ DCHECK(new_file_.get());
+
+ for (std::set<int32>::const_iterator iter = chunks.begin();
+ iter != chunks.end(); ++iter) {
+ if (!WriteInt32(new_file_.get(), *iter))
+ return false;
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreFile::ReadAddPrefixes(
+ FILE* fp, std::vector<SBAddPrefix>* add_prefixes, int count) {
+ DCHECK(fp && add_prefixes);
+
+ add_prefixes->reserve(add_prefixes->size() + count);
+
+ for (int32 i = 0; i < count; ++i) {
+ int32 chunk_id;
+ SBPrefix prefix;
+ DCHECK_EQ(sizeof(int32), sizeof(prefix));
+
+ if (!ReadInt32(fp, &chunk_id) || !ReadInt32(fp, &prefix))
+ return false;
+
+ if (add_del_cache_.count(chunk_id) > 0)
+ continue;
+
+ add_prefixes->push_back(SBAddPrefix(chunk_id, prefix));
+ }
+
+ return true;
+}
+
+bool SafeBrowsingStoreFile::WriteAddPrefixes(
+ const std::vector<SBAddPrefix>& add_prefixes) {
+ DCHECK(new_file_.get());
+
+ for (std::vector<SBAddPrefix>::const_iterator iter = add_prefixes.begin();
+ iter != add_prefixes.end(); ++iter) {
+ DCHECK_EQ(sizeof(int32), sizeof(iter->prefix));
+ if (!WriteInt32(new_file_.get(), iter->chunk_id) ||
+ !WriteInt32(new_file_.get(), iter->prefix))
+ return false;
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreFile::ReadSubPrefixes(
+ FILE* fp, std::vector<SBSubPrefix>* sub_prefixes, int count) {
+ DCHECK(fp && sub_prefixes);
+
+ sub_prefixes->reserve(sub_prefixes->size() + count);
+
+ for (int32 i = 0; i < count; ++i) {
+ int32 chunk_id, add_chunk_id;
+ SBPrefix add_prefix;
+ DCHECK_EQ(sizeof(int32), sizeof(add_prefix));
+
+ if (!ReadInt32(fp, &chunk_id) ||
+ !ReadInt32(fp, &add_chunk_id) || !ReadInt32(fp, &add_prefix))
+ return false;
+
+ if (sub_del_cache_.count(chunk_id) > 0)
+ continue;
+
+ sub_prefixes->push_back(SBSubPrefix(chunk_id, add_chunk_id, add_prefix));
+ }
+
+ return true;
+}
+
+bool SafeBrowsingStoreFile::WriteSubPrefixes(
+ std::vector<SBSubPrefix>& sub_prefixes) {
+ DCHECK(new_file_.get());
+
+ for (std::vector<SBSubPrefix>::const_iterator iter = sub_prefixes.begin();
+ iter != sub_prefixes.end(); ++iter) {
+ if (!WriteInt32(new_file_.get(), iter->chunk_id) ||
+ !WriteInt32(new_file_.get(), iter->add_prefix.chunk_id) ||
+ !WriteInt32(new_file_.get(), iter->add_prefix.prefix))
+ return false;
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreFile::ReadAddHashes(
+ FILE* fp, std::vector<SBAddFullHash>* add_hashes, int count) {
+ DCHECK(fp && add_hashes);
+
+ add_hashes->reserve(add_hashes->size() + count);
+
+ for (int i = 0; i < count; ++i) {
+ int32 chunk_id;
+ SBPrefix prefix;
+ base::Time received;
+ SBFullHash full_hash;
+ DCHECK_EQ(sizeof(int32), sizeof(prefix));
+
+ if (!ReadInt32(fp, &chunk_id) ||
+ !ReadInt32(fp, &prefix) ||
+ !ReadTime(fp, &received) ||
+ !ReadHash(fp, &full_hash))
+ return false;
+
+ if (add_del_cache_.count(chunk_id) > 0)
+ continue;
+
+ add_hashes->push_back(SBAddFullHash(chunk_id, prefix, received, full_hash));
+ }
+
+ return true;
+}
+
+bool SafeBrowsingStoreFile::WriteAddHashes(
+ const std::vector<SBAddFullHash>& add_hashes) {
+ DCHECK(new_file_.get());
+
+ for (std::vector<SBAddFullHash>::const_iterator iter = add_hashes.begin();
+ iter != add_hashes.end(); ++iter) {
+ if (!WriteInt32(new_file_.get(), iter->add_prefix.chunk_id) ||
+ !WriteInt32(new_file_.get(), iter->add_prefix.prefix) ||
+ !WriteTime(new_file_.get(), iter->received) ||
+ !WriteHash(new_file_.get(), iter->full_hash))
+ return false;
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreFile::ReadSubHashes(
+ FILE* fp, std::vector<SBSubFullHash>* sub_hashes, int count) {
+ DCHECK(fp);
+
+ sub_hashes->reserve(sub_hashes->size() + count);
+
+ for (int i = 0; i < count; ++i) {
+ int32 chunk_id;
+ int32 add_chunk_id;
+ SBPrefix add_prefix;
+ SBFullHash add_full_hash;
+ DCHECK_EQ(sizeof(int32), sizeof(add_prefix));
+
+ if (!ReadInt32(fp, &chunk_id) ||
+ !ReadInt32(fp, &add_chunk_id) ||
+ !ReadInt32(fp, &add_prefix) ||
+ !ReadHash(fp, &add_full_hash))
+ return false;
+
+ if (sub_del_cache_.count(chunk_id) > 0)
+ continue;
+
+ sub_hashes->push_back(
+ SBSubFullHash(chunk_id, add_chunk_id, add_prefix, add_full_hash));
+ }
+
+ return true;
+}
+
+bool SafeBrowsingStoreFile::WriteSubHashes(
+ std::vector<SBSubFullHash>& sub_hashes) {
+ DCHECK(new_file_.get());
+
+ for (std::vector<SBSubFullHash>::const_iterator iter = sub_hashes.begin();
+ iter != sub_hashes.end(); ++iter) {
+ if (!WriteInt32(new_file_.get(), iter->chunk_id) ||
+ !WriteInt32(new_file_.get(), iter->add_prefix.chunk_id) ||
+ !WriteInt32(new_file_.get(), iter->add_prefix.prefix) ||
+ !WriteHash(new_file_.get(), iter->full_hash))
+ return false;
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreFile::BeginUpdate() {
+ DCHECK(!file_.get() && !new_file_.get());
+
+ // Structures should all be clear unless something bad happened.
+ DCHECK(add_chunks_cache_.empty());
+ DCHECK(sub_chunks_cache_.empty());
+ DCHECK(add_del_cache_.empty());
+ DCHECK(sub_del_cache_.empty());
+ DCHECK(add_prefixes_.empty());
+ DCHECK(sub_prefixes_.empty());
+ DCHECK(add_hashes_.empty());
+ DCHECK(sub_hashes_.empty());
+ DCHECK_EQ(chunks_written_, 0);
+
+ const FilePath new_filename = TemporaryFileForFilename(filename_);
+ file_util::ScopedFILE new_file(file_util::OpenFile(new_filename, "wb+"));
+ if (new_file.get() == NULL)
+ return false;
+
+ file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb"));
+ empty_ = (file.get() == NULL);
+ if (empty_) {
+ // If the file exists but cannot be opened, try to delete it (not
+ // deleting directly, the bloom filter needs to be deleted, too).
+ if (file_util::PathExists(filename_))
+ return OnCorruptDatabase();
+
+ new_file_.swap(new_file);
+ return true;
+ }
+
+ int32 magic, version;
+ if (!ReadInt32(file.get(), &magic) || !ReadInt32(file.get(), &version))
+ return OnCorruptDatabase();
+
+ if (magic != kFileMagic || version != kFileVersion)
+ return OnCorruptDatabase();
+
+ int32 add_chunk_count, sub_chunk_count;
+ if (!ReadInt32(file.get(), &add_chunk_count) ||
+ !ReadInt32(file.get(), &sub_chunk_count))
+ return OnCorruptDatabase();
+
+ if (!FileSeek(file.get(), kFileHeaderSize))
+ return OnCorruptDatabase();
+
+ if (!ReadChunksToSet(file.get(), &add_chunks_cache_, add_chunk_count) ||
+ !ReadChunksToSet(file.get(), &sub_chunks_cache_, sub_chunk_count))
+ return OnCorruptDatabase();
+
+ file_.swap(file);
+ new_file_.swap(new_file);
+ return true;
+}
+
+bool SafeBrowsingStoreFile::FinishChunk() {
+ if (!add_prefixes_.size() && !sub_prefixes_.size() &&
+ !add_hashes_.size() && !sub_hashes_.size())
+ return true;
+
+ if (!WriteInt32(new_file_.get(), add_prefixes_.size()) ||
+ !WriteInt32(new_file_.get(), sub_prefixes_.size()) ||
+ !WriteInt32(new_file_.get(), add_hashes_.size()) ||
+ !WriteInt32(new_file_.get(), sub_hashes_.size()))
+ return false;
+
+ if (!WriteAddPrefixes(add_prefixes_) ||
+ !WriteSubPrefixes(sub_prefixes_) ||
+ !WriteAddHashes(add_hashes_) ||
+ !WriteSubHashes(sub_hashes_))
+ return false;
+
+ ++chunks_written_;
+
+ // Clear everything to save memory.
+ return ClearChunkBuffers();
+}
+
+bool SafeBrowsingStoreFile::DoUpdate(
+ const std::vector<SBAddFullHash>& pending_adds,
+ std::vector<SBAddPrefix>* add_prefixes_result,
+ std::vector<SBAddFullHash>* add_full_hashes_result) {
+ DCHECK(file_.get() || empty_);
+ DCHECK(new_file_.get());
+
+ std::vector<SBAddPrefix> add_prefixes;
+ std::vector<SBSubPrefix> sub_prefixes;
+ std::vector<SBAddFullHash> add_full_hashes;
+ std::vector<SBSubFullHash> sub_full_hashes;
+
+ // Read |file_| into the vectors.
+ if (!empty_) {
+ DCHECK(file_.get());
+
+ int32 magic, version;
+ int32 add_chunk_count, sub_chunk_count;
+ int32 add_prefix_count, sub_prefix_count;
+ int32 add_hash_count, sub_hash_count;
+
+ if (!FileSeek(file_.get(), 0))
+ return OnCorruptDatabase();
+
+ if (!ReadInt32(file_.get(), &magic) ||
+ !ReadInt32(file_.get(), &version) ||
+ !ReadInt32(file_.get(), &add_chunk_count) ||
+ !ReadInt32(file_.get(), &sub_chunk_count) ||
+ !ReadInt32(file_.get(), &add_prefix_count) ||
+ !ReadInt32(file_.get(), &sub_prefix_count) ||
+ !ReadInt32(file_.get(), &add_hash_count) ||
+ !ReadInt32(file_.get(), &sub_hash_count))
+ return OnCorruptDatabase();
+
+ if (magic != kFileMagic || version != kFileVersion)
+ return OnCorruptDatabase();
+
+ const size_t prefixes_offset = kFileHeaderSize +
+ (add_chunk_count + sub_chunk_count) * sizeof(int32);
+ if (!FileSeek(file_.get(), prefixes_offset))
+ return OnCorruptDatabase();
+
+ if (!ReadAddPrefixes(file_.get(), &add_prefixes, add_prefix_count) ||
+ !ReadSubPrefixes(file_.get(), &sub_prefixes, sub_prefix_count) ||
+ !ReadAddHashes(file_.get(), &add_full_hashes, add_hash_count) ||
+ !ReadSubHashes(file_.get(), &sub_full_hashes, sub_hash_count))
+ return OnCorruptDatabase();
+
+ // Close the file so we can later rename over it.
+ file_.reset();
+ }
+ DCHECK(!file_.get());
+
+ // Rewind the temporary storage.
+ if (!FileSeek(new_file_.get(), 0))
+ return false;
+
+ // Append the accumulated chunks onto the vectors from file_.
+ for (int i = 0; i < chunks_written_; ++i) {
+ int32 add_prefix_count, sub_prefix_count;
+ int32 add_hash_count, sub_hash_count;
+
+ if (!ReadInt32(new_file_.get(), &add_prefix_count) ||
+ !ReadInt32(new_file_.get(), &sub_prefix_count) ||
+ !ReadInt32(new_file_.get(), &add_hash_count) ||
+ !ReadInt32(new_file_.get(), &sub_hash_count))
+ return false;
+
+ // TODO(shess): If the vectors were kept sorted, then this code
+ // could use std::inplace_merge() to merge everything together in
+ // sorted order. That might still be slower than just sorting at
+ // the end if there were a large number of chunks. In that case
+ // some sort of recursive binary merge might be in order (merge
+ // chunks pairwise, merge those chunks pairwise, and so on, then
+ // merge the result with the main list).
+ if (!ReadAddPrefixes(new_file_.get(), &add_prefixes, add_prefix_count) ||
+ !ReadSubPrefixes(new_file_.get(), &sub_prefixes, sub_prefix_count) ||
+ !ReadAddHashes(new_file_.get(), &add_full_hashes, add_hash_count) ||
+ !ReadSubHashes(new_file_.get(), &sub_full_hashes, sub_hash_count))
+ return false;
+ }
+
+ // Add the pending adds which haven't since been deleted.
+ for (std::vector<SBAddFullHash>::const_iterator iter = pending_adds.begin();
+ iter != pending_adds.end(); ++iter) {
+ if (add_del_cache_.count(iter->add_prefix.chunk_id) == 0)
+ add_full_hashes.push_back(*iter);
+ }
+
+ // Knock the subs from the adds.
+ SBProcessSubs(&add_prefixes, &sub_prefixes,
+ &add_full_hashes, &sub_full_hashes);
+
+ // We no longer need to track deleted chunks.
+ DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);
+ DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);
+
+ // Write the new data to new_file_.
+ // TODO(shess): If we receive a lot of subs relative to adds,
+ // overwriting the temporary chunk data in new_file_ with the
+ // permanent data could leave additional data at the end. Won't
+ // cause any problems, but does waste space. There is no truncate()
+ // for stdio. Could use ftruncate() or re-open the file. Or maybe
+ // ignore it, since we'll likely rewrite soon enough.
+ if (!FileSeek(new_file_.get(), 0))
+ return false;
+
+ if (!WriteInt32(new_file_.get(), kFileMagic) ||
+ !WriteInt32(new_file_.get(), kFileVersion) ||
+ !WriteInt32(new_file_.get(), add_chunks_cache_.size()) ||
+ !WriteInt32(new_file_.get(), sub_chunks_cache_.size()) ||
+ !WriteInt32(new_file_.get(), add_prefixes.size()) ||
+ !WriteInt32(new_file_.get(), sub_prefixes.size()) ||
+ !WriteInt32(new_file_.get(), add_full_hashes.size()) ||
+ !WriteInt32(new_file_.get(), sub_full_hashes.size()))
+ return false;
+
+ if (!WriteChunksFromSet(add_chunks_cache_) ||
+ !WriteChunksFromSet(sub_chunks_cache_) ||
+ !WriteAddPrefixes(add_prefixes) ||
+ !WriteSubPrefixes(sub_prefixes) ||
+ !WriteAddHashes(add_full_hashes) ||
+ !WriteSubHashes(sub_full_hashes))
+ return false;
+
+ // Close the file handle and swizzle the file into place.
+ new_file_.reset();
+ const FilePath new_filename = TemporaryFileForFilename(filename_);
+ if (!file_util::Delete(filename_, false) ||
+ !file_util::Move(new_filename, filename_))
+ return false;
+
+ // Pass the resulting data off to the caller.
+ add_prefixes_result->swap(add_prefixes);
+ add_full_hashes_result->swap(add_full_hashes);
+
+ return true;
+}
+
+bool SafeBrowsingStoreFile::FinishUpdate(
+ const std::vector<SBAddFullHash>& pending_adds,
+ std::vector<SBAddPrefix>* add_prefixes_result,
+ std::vector<SBAddFullHash>* add_full_hashes_result) {
+ bool ret = DoUpdate(pending_adds,
+ add_prefixes_result, add_full_hashes_result);
+
+ if (!ret) {
+ CancelUpdate();
+ return false;
+ }
+
+ DCHECK(!new_file_.get());
+ DCHECK(!file_.get());
+
+ return Close();
+}
+
+bool SafeBrowsingStoreFile::CancelUpdate() {
+ return Close();
+}
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.h b/chrome/browser/safe_browsing/safe_browsing_store_file.h
new file mode 100644
index 0000000..25f6d9c
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store_file.h
@@ -0,0 +1,292 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_
+#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_
+
+#include <set>
+#include <vector>
+
+#include "chrome/browser/safe_browsing/safe_browsing_store.h"
+
+#include "base/file_util.h"
+
+// Implement SafeBrowsingStore in terms of a flat file. The file
+// format is pretty literal:
+//
+// int32 magic; // magic number "validating" file
+// int32 version; // format version
+//
+// // Counts for the various data which follows the header.
+// int32 add_chunk_count; // Chunks seen, including empties.
+// int32 sub_chunk_count; // Ditto.
+// int32 add_prefix_count;
+// int32 sub_prefix_count;
+// int32 add_hash_count;
+// int32 sub_hash_count;
+//
+// array[add_chunk_count] {
+// int32 chunk_id;
+// }
+// array[sub_chunk_count] {
+// int32 chunk_id;
+// }
+// array[add_prefix_count] {
+// int32 chunk_id;
+// int32 prefix;
+// }
+// array[sub_prefix_count] {
+// int32 chunk_id;
+// int32 add_chunk_id;
+// int32 add_prefix;
+// }
+// array[add_hash_count] {
+// int32 chunk_id;
+// // TODO(shess): This duplicates first four bytes of full_hash!
+// int32 prefix;
+// // From base::Time::ToTimeT().
+// // TODO(shess): an int32 probably has enough resolution.
+// int64 received_time;
+// char[32] full_hash;
+// array[sub_hash_count] {
+// int32 chunk_id;
+// int32 add_chunk_id;
+// int32 add_prefix;
+// char[32] add_full_hash;
+// }
+// TODO(shess): Would a checksum be worthwhile? If so, check at open,
+// or at commit?
+//
+// During the course of an update, uncommitted data is stored in a
+// temporary file (which is later re-used to commit). This is an
+// array of chunks, with the count kept in memory until the end of the
+// transaction. The format of this file is like the main file, with
+// the list of chunks seen omitted, as that data is tracked in-memory:
+//
+// array[] {
+// int32 add_prefix_count;
+// int32 sub_prefix_count;
+// int32 add_hash_count;
+// int32 sub_hash_count;
+// array[add_prefix_count] {
+// int32 chunk_id;
+// int32 prefix;
+// }
+// array[sub_prefix_count] {
+// int32 chunk_id;
+// int32 add_chunk_id;
+// int32 add_prefix;
+// }
+// array[add_hash_count] {
+// int32 chunk_id;
+// int32 prefix;
+// int64 received_time;
+// char[32] full_hash;
+// array[sub_hash_count] {
+// int32 chunk_id;
+// int32 add_chunk_id;
+// int32 add_prefix;
+// char[32] add_full_hash;
+// }
+// }
+//
+// The overall transaction works like this:
+// - Open the original file to get the chunks-seen data.
+// - Open a temp file for storing new chunk info.
+// - Write new chunks to the temp file.
+// - When the transaction is finished:
+// - Read the rest of the original file's data into buffers.
+// - Rewind the temp file and merge the new data into buffers.
+// - Process buffers for deletions and apply subs.
+// - Rewind and write the buffers out to temp file.
+// - Delete original file.
+// - Rename temp file to original filename.
+//
+// TODO(shess): Does there need to be an fsync() before the rename?
+// important_file_writer.h seems to think that
+// http://valhenson.livejournal.com/37921.html means you don't, but I
+// don't think it follows (and, besides, this needs to run on other
+// operating systems).
+//
+// TODO(shess): Using a checksum to validate the file would allow
+// correctness without fsync, at the cost of periodically needing to
+// regenerate the database from scratch.
+
+// TODO(shess): Regeneration could be moderated by saving the previous
+// file, if valid, as a checkpoint. During update, if the current
+// file is found to be invalid, rollback to the checkpoint and run the
+// updat forward from there. This would require that the current file
+// be validated at BeginUpdate() rather than FinishUpdate(), because
+// the chunks-seen data may have changed. [Does this have
+// implications for the pending_hashes, which were generated while
+// using a newer bloom filter?]
+
+class SafeBrowsingStoreFile : public SafeBrowsingStore {
+ public:
+ SafeBrowsingStoreFile();
+ virtual ~SafeBrowsingStoreFile();
+
+ virtual void Init(const FilePath& filename,
+ Callback0::Type* corruption_callback);
+
+ // Delete any on-disk files, including the permanent storage.
+ virtual bool Delete();
+
+ virtual bool BeginChunk() {
+ return ClearChunkBuffers();
+ }
+ virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) {
+ add_prefixes_.push_back(SBAddPrefix(chunk_id, prefix));
+ return true;
+ }
+ virtual bool WriteAddHash(int32 chunk_id, SBPrefix prefix,
+ base::Time receive_time, SBFullHash full_hash) {
+ add_hashes_.push_back(
+ SBAddFullHash(chunk_id, prefix, receive_time, full_hash));
+ return true;
+ }
+ virtual bool WriteSubPrefix(int32 chunk_id,
+ int32 add_chunk_id, SBPrefix prefix) {
+ sub_prefixes_.push_back(SBSubPrefix(chunk_id, add_chunk_id, prefix));
+ return true;
+ }
+ virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id,
+ SBPrefix prefix, SBFullHash full_hash) {
+ sub_hashes_.push_back(
+ SBSubFullHash(chunk_id, add_chunk_id, prefix, full_hash));
+ return true;
+ }
+ virtual bool FinishChunk();
+
+ virtual bool BeginUpdate();
+ virtual bool DoUpdate(const std::vector<SBAddFullHash>& pending_adds,
+ std::vector<SBAddPrefix>* add_prefixes_result,
+ std::vector<SBAddFullHash>* add_full_hashes_result);
+ virtual bool FinishUpdate(const std::vector<SBAddFullHash>& pending_adds,
+ std::vector<SBAddPrefix>* add_prefixes_result,
+ std::vector<SBAddFullHash>* add_full_hashes_result);
+ virtual bool CancelUpdate();
+
+ virtual void SetAddChunk(int32 chunk_id) {
+ add_chunks_cache_.insert(chunk_id);
+ }
+ virtual bool CheckAddChunk(int32 chunk_id) {
+ return add_chunks_cache_.count(chunk_id) > 0;
+ }
+ virtual void GetAddChunks(std::vector<int32>* out) {
+ out->clear();
+ out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end());
+ }
+ virtual void SetSubChunk(int32 chunk_id) {
+ sub_chunks_cache_.insert(chunk_id);
+ }
+ virtual bool CheckSubChunk(int32 chunk_id) {
+ return sub_chunks_cache_.count(chunk_id) > 0;
+ }
+ virtual void GetSubChunks(std::vector<int32>* out) {
+ out->clear();
+ out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end());
+ }
+
+ virtual void DeleteAddChunk(int32 chunk_id) {
+ add_del_cache_.insert(chunk_id);
+ }
+ virtual void DeleteSubChunk(int32 chunk_id) {
+ sub_del_cache_.insert(chunk_id);
+ }
+
+ // Returns the name of the temporary file used to buffer data for
+ // |filename|. Exported for unit tests.
+ static const FilePath TemporaryFileForFilename(const FilePath& filename) {
+ return FilePath(filename.value() + FILE_PATH_LITERAL("_new"));
+ }
+
+ private:
+ // Close all files and clear all buffers.
+ bool Close();
+
+ // Helpers to read/write the various data sets. Excepting
+ // ReadChunksToSet(), which is called too early, the readers skip
+ // items from deleted chunks (listed in add_del_cache_ and
+ // sub_del_cache_).
+ bool ReadChunksToSet(FILE* fp, std::set<int32>* chunks, int count);
+ bool WriteChunksFromSet(const std::set<int32>& chunks);
+ bool ReadAddPrefixes(FILE* fp,
+ std::vector<SBAddPrefix>* add_prefixes, int count);
+ bool WriteAddPrefixes(const std::vector<SBAddPrefix>& add_prefixes);
+ bool ReadSubPrefixes(FILE* fp,
+ std::vector<SBSubPrefix>* sub_prefixes, int count);
+ bool WriteSubPrefixes(std::vector<SBSubPrefix>& sub_prefixes);
+ bool ReadAddHashes(FILE* fp,
+ std::vector<SBAddFullHash>* add_hashes, int count);
+ bool WriteAddHashes(const std::vector<SBAddFullHash>& add_hashes);
+ bool ReadSubHashes(FILE* fp,
+ std::vector<SBSubFullHash>* sub_hashes, int count);
+ bool WriteSubHashes(std::vector<SBSubFullHash>& sub_hashes);
+
+ // Calls |corruption_callback_| if non-NULL, always returns false as
+ // a convenience to the caller.
+ bool OnCorruptDatabase();
+
+ // Clear temporary buffers used to accumulate chunk data.
+ bool ClearChunkBuffers() {
+ // NOTE: .clear() doesn't release memory.
+ // TODO(shess): Figure out if this is overkill. Some amount of
+ // pre-reserved space is probably reasonable between each chunk
+ // collected.
+ std::vector<SBAddPrefix>().swap(add_prefixes_);
+ std::vector<SBSubPrefix>().swap(sub_prefixes_);
+ std::vector<SBAddFullHash>().swap(add_hashes_);
+ std::vector<SBSubFullHash>().swap(sub_hashes_);
+ return true;
+ }
+
+ // Clear all buffers used during update.
+ void ClearUpdateBuffers() {
+ ClearChunkBuffers();
+ chunks_written_ = 0;
+ std::set<int32>().swap(add_chunks_cache_);
+ std::set<int32>().swap(sub_chunks_cache_);
+ base::hash_set<int32>().swap(add_del_cache_);
+ base::hash_set<int32>().swap(sub_del_cache_);
+ }
+
+ // Buffers for collecting data between BeginChunk() and
+ // FinishChunk().
+ std::vector<SBAddPrefix> add_prefixes_;
+ std::vector<SBSubPrefix> sub_prefixes_;
+ std::vector<SBAddFullHash> add_hashes_;
+ std::vector<SBSubFullHash> sub_hashes_;
+
+ // Count of chunks collected in |new_file_|.
+ int chunks_written_;
+
+ // Name of the main database file.
+ FilePath filename_;
+
+ // Handles to the main and scratch files. |empty_| is true if the
+ // main file didn't exist when the update was started.
+ file_util::ScopedFILE file_;
+ file_util::ScopedFILE new_file_;
+ bool empty_;
+
+ // Cache of chunks which have been seen. Loaded from the database
+ // on BeginUpdate() so that it can be queried during the
+ // transaction.
+ std::set<int32> add_chunks_cache_;
+ std::set<int32> sub_chunks_cache_;
+
+ // Cache the set of deleted chunks during a transaction, applied on
+ // FinishUpdate().
+ // TODO(shess): If the set is small enough, hash_set<> might be
+ // slower than plain set<>.
+ base::hash_set<int32> add_del_cache_;
+ base::hash_set<int32> sub_del_cache_;
+
+ scoped_ptr<Callback0::Type> corruption_callback_;
+
+ DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreFile);
+};
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc
new file mode 100644
index 0000000..383de55
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc
@@ -0,0 +1,56 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
+
+#include "chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h"
+#include "chrome/test/file_test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/platform_test.h"
+
+namespace {
+
+const FilePath::CharType kFolderPrefix[] =
+ FILE_PATH_LITERAL("SafeBrowsingTestStoreFile");
+
+class SafeBrowsingStoreFileTest : public PlatformTest {
+ public:
+ virtual void SetUp() {
+ PlatformTest::SetUp();
+
+ FilePath temp_dir;
+ ASSERT_TRUE(file_util::CreateNewTempDirectory(kFolderPrefix, &temp_dir));
+
+ file_deleter_.reset(new FileAutoDeleter(temp_dir));
+
+ filename_ = temp_dir;
+ filename_.AppendASCII("SafeBrowsingTestStore");
+ file_util::Delete(filename_, false);
+
+ // Make sure an old temporary file isn't hanging around.
+ const FilePath temp_file =
+ SafeBrowsingStoreFile::TemporaryFileForFilename(filename_);
+ file_util::Delete(temp_file, false);
+
+ store_.reset(new SafeBrowsingStoreFile());
+ store_->Init(filename_, NULL);
+ }
+ virtual void TearDown() {
+ store_->Delete();
+ store_.reset();
+ file_deleter_.reset();
+
+ PlatformTest::TearDown();
+ }
+
+ scoped_ptr<FileAutoDeleter> file_deleter_;
+ FilePath filename_;
+ scoped_ptr<SafeBrowsingStoreFile> store_;
+};
+
+TEST_STORE(SafeBrowsingStoreFileTest, store_.get(), filename_);
+
+// TODO(shess): Test corruption-handling?
+
+} // namespace
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_sqlite.cc b/chrome/browser/safe_browsing/safe_browsing_store_sqlite.cc
new file mode 100644
index 0000000..a5e7df7
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store_sqlite.cc
@@ -0,0 +1,714 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h"
+
+#include "base/file_util.h"
+#include "chrome/common/sqlite_compiled_statement.h"
+#include "chrome/common/sqlite_utils.h"
+
+namespace {
+
+// Database version. If this is different than what's stored on disk, the
+// database is reset.
+const int kDatabaseVersion = 6;
+
+// Used for reading full hashes from the database.
+SBFullHash ReadFullHash(SqliteCompiledStatement* statement, int column) {
+ std::vector<unsigned char> blob;
+ (*statement)->column_blob_as_vector(column, &blob);
+
+ SBFullHash ret;
+ DCHECK_EQ(blob.size(), sizeof(ret));
+ memcpy(ret.full_hash, &blob[0], sizeof(ret));
+ return ret;
+}
+
+void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
+ std::set<int32>* chunks) {
+ for (std::set<int32>::iterator iter = chunks->begin();
+ iter != chunks->end();) {
+ std::set<int32>::iterator prev = iter++;
+ if (deleted.count(*prev) > 0)
+ chunks->erase(prev);
+ }
+}
+
+} // namespace
+
+SafeBrowsingStoreSqlite::SafeBrowsingStoreSqlite()
+ : db_(NULL),
+ statement_cache_(NULL),
+ insert_transaction_(NULL) {
+}
+SafeBrowsingStoreSqlite::~SafeBrowsingStoreSqlite() {
+ Close();
+}
+
+bool SafeBrowsingStoreSqlite::Delete() {
+ // The database should not be open at this point. TODO(shess): It
+ // can be open if corruption was detected while opening the
+ // database. Ick.
+ DCHECK(!db_);
+
+ // The file must be closed, both so that the journal file is deleted
+ // by SQLite, and because open files cannot be deleted on Windows.
+ if (!Close()) {
+ NOTREACHED();
+ return false;
+ }
+
+ // Just in case, delete the journal file, because associating the
+ // wrong journal file with a database is very bad.
+ const FilePath journal_file = JournalFileForFilename(filename_);
+ if (!file_util::Delete(journal_file, false) &&
+ file_util::PathExists(journal_file)) {
+ NOTREACHED();
+ return false;
+ }
+
+ if (!file_util::Delete(filename_, false) &&
+ file_util::PathExists(filename_)) {
+ NOTREACHED();
+ return false;
+ }
+
+ return true;
+}
+
+void SafeBrowsingStoreSqlite::Init(const FilePath& filename,
+ Callback0::Type* corruption_callback) {
+ filename_ = filename;
+ corruption_callback_.reset(corruption_callback);
+}
+
+bool SafeBrowsingStoreSqlite::OnCorruptDatabase() {
+ if (corruption_callback_.get())
+ corruption_callback_->Run();
+ return false;
+}
+
+bool SafeBrowsingStoreSqlite::Open() {
+ if (db_)
+ return true;
+
+ if (OpenSqliteDb(filename_, &db_) != SQLITE_OK) {
+ sqlite3_close(db_);
+ db_ = NULL;
+ return false;
+ }
+
+ // Run the database in exclusive mode. Nobody else should be accessing the
+ // database while we're running, and this will give somewhat improved perf.
+ ExecSql("PRAGMA locking_mode = EXCLUSIVE");
+ ExecSql("PRAGMA cache_size = 100");
+
+ statement_cache_.reset(new SqliteStatementCache(db_));
+
+ if (!DoesSqliteTableExist(db_, "add_prefix"))
+ return SetupDatabase();
+
+ return CheckCompatibleVersion();
+}
+
+bool SafeBrowsingStoreSqlite::ExecSql(const char* sql) {
+ DCHECK(db_);
+
+ int rv = sqlite3_exec(db_, sql, NULL, NULL, NULL);
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK(rv == SQLITE_OK);
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::Close() {
+ if (!db_)
+ return true;
+
+ add_chunks_cache_.clear();
+ sub_chunks_cache_.clear();
+
+ add_del_cache_.clear();
+ sub_del_cache_.clear();
+
+ insert_transaction_.reset();
+ statement_cache_.reset(); // Must free statements before closing DB.
+ bool result = sqlite3_close(db_) == SQLITE_OK;
+ db_ = NULL;
+
+ return result;
+}
+
+bool SafeBrowsingStoreSqlite::CreateTables() {
+ DCHECK(db_);
+
+ // Store 32 bit add prefixes here.
+ if (!ExecSql("CREATE TABLE add_prefix ("
+ " chunk INTEGER,"
+ " prefix INTEGER"
+ ")"))
+ return false;
+
+ // Store 32 bit sub prefixes here.
+ if (!ExecSql("CREATE TABLE sub_prefix ("
+ " chunk INTEGER,"
+ " add_chunk INTEGER,"
+ " prefix INTEGER"
+ ")"))
+ return false;
+
+ // Store 256 bit add full hashes (and GetHash results) here.
+ if (!ExecSql("CREATE TABLE add_full_hash ("
+ " chunk INTEGER,"
+ " prefix INTEGER,"
+ " receive_time INTEGER,"
+ " full_hash BLOB"
+ ")"))
+ return false;
+
+ // Store 256 bit sub full hashes here.
+ if (!ExecSql("CREATE TABLE sub_full_hash ("
+ " chunk INTEGER,"
+ " add_chunk INTEGER,"
+ " prefix INTEGER,"
+ " full_hash BLOB"
+ ")"))
+ return false;
+
+ // Store all the add and sub chunk numbers we receive. We cannot
+ // just rely on the prefix tables to generate these lists, since
+ // some chunks will have zero entries (and thus no prefixes), or
+ // potentially an add chunk can have all of its entries sub'd
+ // without receiving an AddDel, or a sub chunk might have been
+ // entirely consumed by adds. In these cases, we still have to
+ // report the chunk number but it will not have any prefixes in the
+ // prefix tables.
+ //
+ // TODO(paulg): Investigate storing the chunks as a string of
+ // ChunkRanges, one string for each of phish-add, phish-sub,
+ // malware-add, malware-sub. This might be better performance when
+ // the number of chunks is large, and is the natural format for the
+ // update request.
+ if (!ExecSql("CREATE TABLE add_chunks ("
+ " chunk INTEGER PRIMARY KEY"
+ ")"))
+ return false;
+
+ if (!ExecSql("CREATE TABLE sub_chunks ("
+ " chunk INTEGER PRIMARY KEY"
+ ")"))
+ return false;
+
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::SetupDatabase() {
+ DCHECK(db_);
+
+ SQLTransaction transaction(db_);
+ if (transaction.Begin() != SQLITE_OK) {
+ NOTREACHED();
+ return false;
+ }
+
+ if (!CreateTables())
+ return false;
+
+ // PRAGMA does not support bind parameters...
+ const std::string version =
+ StringPrintf("PRAGMA user_version = %d", kDatabaseVersion);
+ if (!ExecSql(version.c_str()))
+ return false;
+
+ if (transaction.Commit() != SQLITE_OK)
+ return false;
+
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::CheckCompatibleVersion() {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "PRAGMA user_version");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ int result = statement->step();
+ if (result != SQLITE_ROW)
+ return false;
+
+ return statement->column_int(0) == kDatabaseVersion;
+}
+
+bool SafeBrowsingStoreSqlite::ReadAddChunks() {
+ DCHECK(db_);
+
+ add_chunks_cache_.clear();
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT chunk FROM add_chunks");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ int rv;
+ while ((rv = statement->step()) == SQLITE_ROW) {
+ add_chunks_cache_.insert(statement->column_int(0));
+ }
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK_EQ(rv, SQLITE_DONE);
+ return rv == SQLITE_DONE;
+}
+
+bool SafeBrowsingStoreSqlite::WriteAddChunks() {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "INSERT INTO add_chunks (chunk) VALUES (?)");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ for (std::set<int32>::const_iterator iter = add_chunks_cache_.begin();
+ iter != add_chunks_cache_.end(); ++iter) {
+ statement->bind_int(0, *iter);
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK(rv == SQLITE_DONE);
+ statement->reset();
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::ReadSubChunks() {
+ DCHECK(db_);
+
+ sub_chunks_cache_.clear();
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT chunk FROM sub_chunks");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ int rv;
+ while ((rv = statement->step()) == SQLITE_ROW) {
+ sub_chunks_cache_.insert(statement->column_int(0));
+ }
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ return rv == SQLITE_DONE;
+}
+
+bool SafeBrowsingStoreSqlite::WriteSubChunks() {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "INSERT INTO sub_chunks (chunk) VALUES (?)");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ for (std::set<int32>::const_iterator iter = sub_chunks_cache_.begin();
+ iter != sub_chunks_cache_.end(); ++iter) {
+ statement->bind_int(0, *iter);
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK(rv == SQLITE_DONE);
+ statement->reset();
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::ReadAddPrefixes(
+ std::vector<SBAddPrefix>* add_prefixes) {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT chunk, prefix FROM add_prefix");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ int rv;
+ while ((rv = statement->step()) == SQLITE_ROW) {
+ const int32 chunk_id = statement->column_int(0);
+ if (add_del_cache_.count(chunk_id) > 0)
+ continue;
+
+ const SBPrefix prefix = statement->column_int(1);
+ add_prefixes->push_back(SBAddPrefix(chunk_id, prefix));
+ }
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK_EQ(rv, SQLITE_DONE);
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::WriteAddPrefix(int32 chunk_id, SBPrefix prefix) {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "INSERT INTO add_prefix "
+ "(chunk, prefix) VALUES (?,?)");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ statement->bind_int(0, chunk_id);
+ statement->bind_int(1, prefix);
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK(rv == SQLITE_DONE);
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::WriteAddPrefixes(
+ const std::vector<SBAddPrefix>& add_prefixes) {
+ DCHECK(db_);
+
+ for (std::vector<SBAddPrefix>::const_iterator iter = add_prefixes.begin();
+ iter != add_prefixes.end(); ++iter) {
+ if (!WriteAddPrefix(iter->chunk_id, iter->prefix))
+ return false;
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::ReadSubPrefixes(
+ std::vector<SBSubPrefix>* sub_prefixes) {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT chunk, add_chunk, prefix "
+ "FROM sub_prefix");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ int rv;
+ while ((rv = statement->step()) == SQLITE_ROW) {
+ const int32 chunk_id = statement->column_int(0);
+ if (sub_del_cache_.count(chunk_id) > 0)
+ continue;
+
+ const int32 add_chunk_id = statement->column_int(1);
+ const SBPrefix add_prefix = statement->column_int(2);
+ sub_prefixes->push_back(SBSubPrefix(chunk_id, add_chunk_id, add_prefix));
+ }
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK_EQ(rv, SQLITE_DONE);
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::WriteSubPrefix(
+ int32 chunk_id, int32 add_chunk_id, SBPrefix prefix) {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "INSERT INTO sub_prefix "
+ "(chunk, add_chunk, prefix) VALUES (?,?, ?)");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ statement->bind_int(0, chunk_id);
+ statement->bind_int(1, add_chunk_id);
+ statement->bind_int(2, prefix);
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK(rv == SQLITE_DONE);
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::WriteSubPrefixes(
+ std::vector<SBSubPrefix>& sub_prefixes) {
+ DCHECK(db_);
+
+ for (std::vector<SBSubPrefix>::const_iterator iter = sub_prefixes.begin();
+ iter != sub_prefixes.end(); ++iter) {
+ const SBAddPrefix &add_prefix = iter->add_prefix;
+ if (!WriteSubPrefix(iter->chunk_id, add_prefix.chunk_id, add_prefix.prefix))
+ return false;
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::ReadAddHashes(
+ std::vector<SBAddFullHash>* add_hashes) {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT chunk, prefix, receive_time, full_hash "
+ "FROM add_full_hash");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ int rv;
+ while ((rv = statement->step()) == SQLITE_ROW) {
+ const int32 chunk_id = statement->column_int(0);
+ if (add_del_cache_.count(chunk_id) > 0)
+ continue;
+
+ const SBPrefix prefix = statement->column_int(1);
+ const base::Time received =
+ base::Time::FromTimeT(statement->column_int64(2));
+ const SBFullHash full_hash = ReadFullHash(&statement, 3);
+ add_hashes->push_back(SBAddFullHash(chunk_id, prefix, received, full_hash));
+ }
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK_EQ(rv, SQLITE_DONE);
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::WriteAddHash(int32 chunk_id, SBPrefix prefix,
+ base::Time receive_time,
+ SBFullHash full_hash) {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "INSERT INTO add_full_hash "
+ "(chunk, prefix, receive_time, full_hash) "
+ "VALUES (?,?, ?, ?)");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ statement->bind_int(0, chunk_id);
+ statement->bind_int(1, prefix);
+ statement->bind_int64(2, receive_time.ToTimeT());
+ statement->bind_blob(3, full_hash.full_hash, sizeof(full_hash.full_hash));
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK(rv == SQLITE_DONE);
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::WriteAddHashes(
+ const std::vector<SBAddFullHash>& add_hashes) {
+ DCHECK(db_);
+
+ for (std::vector<SBAddFullHash>::const_iterator iter = add_hashes.begin();
+ iter != add_hashes.end(); ++iter) {
+ const SBAddPrefix& add_prefix = iter->add_prefix;
+ if (!WriteAddHash(add_prefix.chunk_id, add_prefix.prefix,
+ iter->received, iter->full_hash))
+ return false;
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::ReadSubHashes(
+ std::vector<SBSubFullHash>* sub_hashes) {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "SELECT chunk, add_chunk, prefix, full_hash "
+ "FROM sub_full_hash");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ int rv;
+ while ((rv = statement->step()) == SQLITE_ROW) {
+ const int32 chunk_id = statement->column_int(0);
+ if (sub_del_cache_.count(chunk_id) > 0)
+ continue;
+
+ const int32 add_chunk_id = statement->column_int(1);
+ const SBPrefix add_prefix = statement->column_int(2);
+ const SBFullHash full_hash = ReadFullHash(&statement, 3);
+ sub_hashes->push_back(
+ SBSubFullHash(chunk_id, add_chunk_id, add_prefix, full_hash));
+ }
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK_EQ(rv, SQLITE_DONE);
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::WriteSubHash(
+ int32 chunk_id, int32 add_chunk_id, SBPrefix prefix, SBFullHash full_hash) {
+ DCHECK(db_);
+
+ SQLITE_UNIQUE_STATEMENT(statement, *statement_cache_,
+ "INSERT INTO sub_full_hash "
+ "(chunk, add_chunk, prefix, full_hash) "
+ "VALUES (?,?,?,?)");
+ if (!statement.is_valid()) {
+ NOTREACHED();
+ return false;
+ }
+
+ statement->bind_int(0, chunk_id);
+ statement->bind_int(1, add_chunk_id);
+ statement->bind_int(2, prefix);
+ statement->bind_blob(3, full_hash.full_hash, sizeof(full_hash.full_hash));
+ int rv = statement->step();
+ if (rv == SQLITE_CORRUPT)
+ return OnCorruptDatabase();
+ DCHECK(rv == SQLITE_DONE);
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::WriteSubHashes(
+ std::vector<SBSubFullHash>& sub_hashes) {
+ DCHECK(db_);
+
+ for (std::vector<SBSubFullHash>::const_iterator iter = sub_hashes.begin();
+ iter != sub_hashes.end(); ++iter) {
+ if (!WriteSubHash(iter->chunk_id, iter->add_prefix.chunk_id,
+ iter->add_prefix.prefix, iter->full_hash))
+ return false;
+ }
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::RenameTables() {
+ DCHECK(db_);
+
+ if (!ExecSql("ALTER TABLE add_prefix RENAME TO add_prefix_old") ||
+ !ExecSql("ALTER TABLE sub_prefix RENAME TO sub_prefix_old") ||
+ !ExecSql("ALTER TABLE add_full_hash RENAME TO add_full_hash_old") ||
+ !ExecSql("ALTER TABLE sub_full_hash RENAME TO sub_full_hash_old") ||
+ !ExecSql("ALTER TABLE add_chunks RENAME TO add_chunks_old") ||
+ !ExecSql("ALTER TABLE sub_chunks RENAME TO sub_chunks_old"))
+ return false;
+
+ return CreateTables();
+}
+
+bool SafeBrowsingStoreSqlite::DeleteOldTables() {
+ DCHECK(db_);
+
+ if (!ExecSql("DROP TABLE add_prefix_old") ||
+ !ExecSql("DROP TABLE sub_prefix_old") ||
+ !ExecSql("DROP TABLE add_full_hash_old") ||
+ !ExecSql("DROP TABLE sub_full_hash_old") ||
+ !ExecSql("DROP TABLE add_chunks_old") ||
+ !ExecSql("DROP TABLE sub_chunks_old"))
+ return false;
+
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::BeginUpdate() {
+ DCHECK(!db_);
+
+ if (!Open())
+ return false;
+
+ insert_transaction_.reset(new SQLTransaction(db_));
+ if (insert_transaction_->Begin() != SQLITE_OK) {
+ DCHECK(false) << "Safe browsing store couldn't start transaction";
+ Close();
+ return false;
+ }
+
+ if (!ReadAddChunks() || !ReadSubChunks())
+ return false;
+
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::DoUpdate(
+ const std::vector<SBAddFullHash>& pending_adds,
+ std::vector<SBAddPrefix>* add_prefixes_result,
+ std::vector<SBAddFullHash>* add_full_hashes_result) {
+ DCHECK(db_);
+
+ std::vector<SBAddPrefix> add_prefixes;
+ std::vector<SBAddFullHash> add_full_hashes;
+ std::vector<SBSubPrefix> sub_prefixes;
+ std::vector<SBSubFullHash> sub_full_hashes;
+
+ if (!ReadAddPrefixes(&add_prefixes) ||
+ !ReadAddHashes(&add_full_hashes) ||
+ !ReadSubPrefixes(&sub_prefixes) ||
+ !ReadSubHashes(&sub_full_hashes))
+ return false;
+
+ // Add the pending adds which haven't since been deleted.
+ for (std::vector<SBAddFullHash>::const_iterator iter = pending_adds.begin();
+ iter != pending_adds.end(); ++iter) {
+ if (add_del_cache_.count(iter->add_prefix.chunk_id) == 0)
+ add_full_hashes.push_back(*iter);
+ }
+
+ SBProcessSubs(&add_prefixes, &sub_prefixes,
+ &add_full_hashes, &sub_full_hashes);
+
+ // Move the existing tables aside and prepare to write fresh tables.
+ if (!RenameTables())
+ return false;
+
+ DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);
+ DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);
+
+ if (!WriteAddChunks() ||
+ !WriteSubChunks() ||
+ !WriteAddPrefixes(add_prefixes) ||
+ !WriteSubPrefixes(sub_prefixes) ||
+ !WriteAddHashes(add_full_hashes) ||
+ !WriteSubHashes(sub_full_hashes))
+ return false;
+
+ // Delete the old tables.
+ if (!DeleteOldTables())
+ return false;
+
+ // Commit all the changes to the database.
+ int rv = insert_transaction_->Commit();
+ if (rv != SQLITE_OK) {
+ NOTREACHED() << "SafeBrowsing update transaction failed to commit.";
+ // UMA_HISTOGRAM_COUNTS("SB2.FailedUpdate", 1);
+ return false;
+ }
+
+ add_prefixes_result->swap(add_prefixes);
+ add_full_hashes_result->swap(add_full_hashes);
+
+ return true;
+}
+
+bool SafeBrowsingStoreSqlite::FinishUpdate(
+ const std::vector<SBAddFullHash>& pending_adds,
+ std::vector<SBAddPrefix>* add_prefixes_result,
+ std::vector<SBAddFullHash>* add_full_hashes_result) {
+ bool ret = DoUpdate(pending_adds,
+ add_prefixes_result, add_full_hashes_result);
+
+ // Make sure everything is closed even if DoUpdate() fails.
+ if (!Close())
+ return false;
+
+ return ret;
+}
+
+bool SafeBrowsingStoreSqlite::CancelUpdate() {
+ return Close();
+}
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_sqlite.h b/chrome/browser/safe_browsing/safe_browsing_store_sqlite.h
new file mode 100644
index 0000000..272ab56
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store_sqlite.h
@@ -0,0 +1,168 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_SQLITE_H_
+#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_SQLITE_H_
+
+#include <set>
+#include <vector>
+
+#include "chrome/browser/safe_browsing/safe_browsing_store.h"
+#include "testing/gtest/include/gtest/gtest_prod.h"
+
+struct sqlite3;
+class SqliteCompiledStatement;
+class SqliteStatementCache;
+class SQLTransaction;
+
+class SafeBrowsingStoreSqlite : public SafeBrowsingStore {
+ public:
+ SafeBrowsingStoreSqlite();
+ virtual ~SafeBrowsingStoreSqlite();
+
+ virtual bool Delete();
+
+ virtual void Init(const FilePath& filename,
+ Callback0::Type* corruption_callback);
+
+ virtual bool BeginChunk() {
+ return true;
+ }
+ virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix);
+ virtual bool WriteAddHash(int32 chunk_id, SBPrefix prefix,
+ base::Time receive_time, SBFullHash full_hash);
+ virtual bool WriteSubPrefix(int32 chunk_id,
+ int32 add_chunk_id, SBPrefix prefix);
+ virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id,
+ SBPrefix prefix, SBFullHash full_hash);
+ virtual bool FinishChunk() {
+ return true;
+ }
+
+ virtual bool BeginUpdate();
+ // TODO(shess): Should not be public.
+ virtual bool DoUpdate(const std::vector<SBAddFullHash>& pending_adds,
+ std::vector<SBAddPrefix>* add_prefixes_result,
+ std::vector<SBAddFullHash>* add_full_hashes_result);
+ virtual bool FinishUpdate(const std::vector<SBAddFullHash>& pending_adds,
+ std::vector<SBAddPrefix>* add_prefixes_result,
+ std::vector<SBAddFullHash>* add_full_hashes_result);
+ virtual bool CancelUpdate();
+
+ virtual void SetAddChunk(int32 chunk_id) {
+ add_chunks_cache_.insert(chunk_id);
+ }
+ virtual bool CheckAddChunk(int32 chunk_id) {
+ return add_chunks_cache_.count(chunk_id) > 0;
+ }
+ virtual void GetAddChunks(std::vector<int32>* out) {
+ out->clear();
+ out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end());
+ }
+
+ virtual void SetSubChunk(int32 chunk_id) {
+ sub_chunks_cache_.insert(chunk_id);
+ }
+ virtual bool CheckSubChunk(int32 chunk_id) {
+ return sub_chunks_cache_.count(chunk_id) > 0;
+ }
+ virtual void GetSubChunks(std::vector<int32>* out) {
+ out->clear();
+ out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end());
+ }
+
+ virtual void DeleteAddChunk(int32 chunk_id) {
+ add_del_cache_.insert(chunk_id);
+ }
+ virtual void DeleteSubChunk(int32 chunk_id) {
+ sub_del_cache_.insert(chunk_id);
+ }
+
+ // Returns the name of the SQLite journal file for |filename|.
+ // Exported for unit tests.
+ static const FilePath JournalFileForFilename(const FilePath& filename) {
+ return FilePath(filename.value() + FILE_PATH_LITERAL("-journal"));
+ }
+
+ private:
+ // The following routines return true on success, or false on
+ // failure. Failure is presumed to be persistent, so the caller
+ // should stop trying and unwind the transaction.
+ // OnCorruptDatabase() is called if SQLite returns SQLITE_CORRUPT.
+
+ // Open |db_| from |filename_|, creating if necessary.
+ bool Open();
+
+ // Close |db_|, rolling back any in-progress transaction.
+ bool Close();
+
+ // Execute all statements in sql, returning true if every one of
+ // them returns SQLITE_OK.
+ bool ExecSql(const char* sql);
+
+ bool SetupDatabase();
+ bool CheckCompatibleVersion();
+
+ bool CreateTables();
+ bool RenameTables();
+ bool DeleteOldTables();
+
+ // Read and write the chunks-seen data from |*_chunks_cache_|.
+ // Chunk deletions are not accounted for.
+ bool ReadAddChunks();
+ bool ReadSubChunks();
+ bool WriteAddChunks();
+ bool WriteSubChunks();
+
+ // Read the various types of data, skipping items which belong to
+ // deleted chunks. New data is appended to the vectors.
+ bool ReadAddPrefixes(std::vector<SBAddPrefix>* add_prefixes);
+ bool ReadSubPrefixes(std::vector<SBSubPrefix>* sub_prefixes);
+ bool ReadAddHashes(std::vector<SBAddFullHash>* add_hashes);
+ bool ReadSubHashes(std::vector<SBSubFullHash>* sub_hashes);
+
+ // Write the various types of data. The existing data is not
+ // cleared.
+ bool WriteAddPrefixes(const std::vector<SBAddPrefix>& add_prefixes);
+ bool WriteSubPrefixes(std::vector<SBSubPrefix>& sub_prefixes);
+ bool WriteAddHashes(const std::vector<SBAddFullHash>& add_hashes);
+ bool WriteSubHashes(std::vector<SBSubFullHash>& sub_hashes);
+
+ // Calls |corruption_callback_| if non-NULL, always returns false as
+ // a convenience to the caller.
+ bool OnCorruptDatabase();
+
+ // The database path from Init().
+ FilePath filename_;
+
+ // Between BeginUpdate() and FinishUpdate(), this will be the SQLite
+ // database connection. Otherwise NULL.
+ sqlite3 *db_;
+
+ // Cache of compiled statements for |db_|.
+ // TODO(shess): Probably doesn't gain us much.
+ scoped_ptr<SqliteStatementCache> statement_cache_;
+
+ // Transaction for protecting database integrity between
+ // BeginUpdate() and FinishUpdate().
+ scoped_ptr<SQLTransaction> insert_transaction_;
+
+ // The set of chunks which the store has seen. Elements are added
+ // by SetAddChunk() and SetSubChunk(), and deleted on write for
+ // chunks that have been deleted.
+ std::set<int32> add_chunks_cache_;
+ std::set<int32> sub_chunks_cache_;
+
+ // Cache the DeletedAddChunk() and DeleteSubChunk() chunks for later
+ // use in FinishUpdate().
+ base::hash_set<int32> add_del_cache_;
+ base::hash_set<int32> sub_del_cache_;
+
+ // Called when SQLite returns SQLITE_CORRUPT.
+ scoped_ptr<Callback0::Type> corruption_callback_;
+
+ DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreSqlite);
+};
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_SQLITE_H_
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_sqlite_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_sqlite_unittest.cc
new file mode 100644
index 0000000..0921ad1
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store_sqlite_unittest.cc
@@ -0,0 +1,53 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h"
+
+#include "chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h"
+#include "chrome/test/file_test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/platform_test.h"
+
+namespace {
+
+const FilePath::CharType kFolderPrefix[] =
+ FILE_PATH_LITERAL("SafeBrowsingTestStoreSqlite");
+
+class SafeBrowsingStoreSqliteTest : public PlatformTest {
+ public:
+ virtual void SetUp() {
+ PlatformTest::SetUp();
+
+ FilePath temp_dir;
+ ASSERT_TRUE(file_util::CreateNewTempDirectory(kFolderPrefix, &temp_dir));
+
+ file_deleter_.reset(new FileAutoDeleter(temp_dir));
+
+ filename_ = temp_dir;
+ filename_.AppendASCII("SafeBrowsingTestStore");
+ file_util::Delete(filename_, false);
+
+ const FilePath journal_file =
+ SafeBrowsingStoreSqlite::JournalFileForFilename(filename_);
+ file_util::Delete(journal_file, false);
+
+ store_.reset(new SafeBrowsingStoreSqlite());
+ store_->Init(filename_, NULL);
+ }
+ virtual void TearDown() {
+ store_->Delete();
+ store_.reset();
+ file_deleter_.reset();
+
+ PlatformTest::TearDown();
+ }
+
+ scoped_ptr<FileAutoDeleter> file_deleter_;
+ FilePath filename_;
+ scoped_ptr<SafeBrowsingStoreSqlite> store_;
+};
+
+TEST_STORE(SafeBrowsingStoreSqliteTest, store_.get(), filename_);
+
+} // namespace
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc
new file mode 100644
index 0000000..35b9928
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc
@@ -0,0 +1,188 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/safe_browsing/safe_browsing_store.h"
+#include "chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+
+TEST(SafeBrowsingStoreTest, SBAddPrefixLess) {
+ // chunk_id then prefix.
+ EXPECT_TRUE(SBAddPrefixLess(SBAddPrefix(10, 1), SBAddPrefix(11, 1)));
+ EXPECT_FALSE(SBAddPrefixLess(SBAddPrefix(11, 1), SBAddPrefix(10, 1)));
+ EXPECT_TRUE(SBAddPrefixLess(SBAddPrefix(10, 1), SBAddPrefix(10, 2)));
+ EXPECT_FALSE(SBAddPrefixLess(SBAddPrefix(10, 2), SBAddPrefix(10, 1)));
+
+ // Equal is not less.
+ EXPECT_FALSE(SBAddPrefixLess(SBAddPrefix(10, 1), SBAddPrefix(10, 1)));
+}
+
+TEST(SafeBrowsingStoreTest, SBAddPrefixHashLess) {
+ // The first four bytes of SBFullHash can be read as an int32, which
+ // means that byte-ordering issues can come up. To test this, |one|
+ // and |two| differ in the prefix, while |one| and |onetwo| have the
+ // same prefix, but differ in the byte after the prefix.
+ SBFullHash one, onetwo, two;
+ memset(&one, 0, sizeof(one));
+ memset(&onetwo, 0, sizeof(onetwo));
+ memset(&two, 0, sizeof(two));
+ one.prefix = 1;
+ one.full_hash[sizeof(int32)] = 1;
+ onetwo.prefix = 1;
+ onetwo.full_hash[sizeof(int32)] = 2;
+ two.prefix = 2;
+
+ const base::Time now = base::Time::Now();
+
+ // add_id dominates.
+ EXPECT_TRUE(SBAddPrefixHashLess(SBAddFullHash(10, two.prefix, now, two),
+ SBAddFullHash(11, one.prefix, now, one)));
+ EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(11, two.prefix, now, two),
+ SBAddFullHash(10, one.prefix, now, one)));
+
+ // After add_id, prefix.
+ EXPECT_TRUE(SBAddPrefixHashLess(SBAddFullHash(10, one.prefix, now, two),
+ SBAddFullHash(10, two.prefix, now, one)));
+ EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(10, two.prefix, now, one),
+ SBAddFullHash(10, one.prefix, now, two)));
+
+ // After prefix, full hash.
+ EXPECT_TRUE(SBAddPrefixHashLess(SBAddFullHash(10, one.prefix, now, one),
+ SBAddFullHash(10, onetwo.prefix,
+ now, onetwo)));
+ EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(10, onetwo.prefix,
+ now, onetwo),
+ SBAddFullHash(10, one.prefix, now, one)));
+
+ // Equal is not less-than.
+ EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(10, one.prefix, now, one),
+ SBAddFullHash(10, one.prefix, now, one)));
+}
+
+TEST(SafeBrowsingStoreTest, SBSubPrefixLess) {
+ // add_id dominates.
+ EXPECT_TRUE(SBAddPrefixLess(SBSubPrefix(12, 10, 2), SBSubPrefix(9, 11, 1)));
+ EXPECT_FALSE(SBAddPrefixLess(SBSubPrefix(12, 11, 2), SBSubPrefix(9, 10, 1)));
+
+ // After add_id, prefix.
+ EXPECT_TRUE(SBAddPrefixLess(SBSubPrefix(12, 10, 1), SBSubPrefix(9, 10, 2)));
+ EXPECT_FALSE(SBAddPrefixLess(SBSubPrefix(12, 10, 2), SBSubPrefix(9, 10, 1)));
+
+ // Equal is not less-than.
+ EXPECT_FALSE(SBAddPrefixLess(SBSubPrefix(12, 10, 1), SBSubPrefix(12, 10, 1)));
+
+ // chunk_id doesn't matter.
+}
+
+TEST(SafeBrowsingStoreTest, SBSubFullHashLess) {
+ SBFullHash one, onetwo, two;
+ memset(&one, 0, sizeof(one));
+ memset(&onetwo, 0, sizeof(onetwo));
+ memset(&two, 0, sizeof(two));
+ one.prefix = 1;
+ one.full_hash[sizeof(int32)] = 1;
+ onetwo.prefix = 1;
+ onetwo.full_hash[sizeof(int32)] = 2;
+ two.prefix = 2;
+
+ // add_id dominates.
+ EXPECT_TRUE(SBAddPrefixHashLess(SBSubFullHash(12, 10, two.prefix, two),
+ SBSubFullHash(9, 11, one.prefix, one)));
+ EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 11, two.prefix, two),
+ SBSubFullHash(9, 10, one.prefix, one)));
+
+ // After add_id, prefix.
+ EXPECT_TRUE(SBAddPrefixHashLess(SBSubFullHash(12, 10, one.prefix, two),
+ SBSubFullHash(9, 10, two.prefix, one)));
+ EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 10, two.prefix, one),
+ SBSubFullHash(9, 10, one.prefix, two)));
+
+ // After prefix, full_hash.
+ EXPECT_TRUE(SBAddPrefixHashLess(SBSubFullHash(12, 10, one.prefix, one),
+ SBSubFullHash(9, 10, onetwo.prefix, onetwo)));
+ EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 10, onetwo.prefix, onetwo),
+ SBSubFullHash(9, 10, one.prefix, one)));
+
+ // Equal is not less-than.
+ EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 10, one.prefix, one),
+ SBSubFullHash(9, 10, one.prefix, one)));
+}
+
+TEST(SafeBrowsingStoreTest, SBProcessSubs) {
+ std::vector<SBAddPrefix> add_prefixes;
+ std::vector<SBAddFullHash> add_hashes;
+ std::vector<SBSubPrefix> sub_prefixes;
+ std::vector<SBSubFullHash> sub_hashes;
+
+ // SBProcessSubs does a lot of iteration, run through empty just to
+ // make sure degenerate cases work.
+ SBProcessSubs(&add_prefixes, &sub_prefixes, &add_hashes, &sub_hashes);
+ EXPECT_TRUE(add_prefixes.empty());
+ EXPECT_TRUE(sub_prefixes.empty());
+ EXPECT_TRUE(add_hashes.empty());
+ EXPECT_TRUE(sub_hashes.empty());
+
+ const base::Time kNow = base::Time::Now();
+ const SBFullHash kHash1(SBFullHashFromString("one"));
+ const SBFullHash kHash2(SBFullHashFromString("two"));
+ const SBFullHash kHash3(SBFullHashFromString("three"));
+ const int kAddChunk1 = 1; // Use different chunk numbers just in case.
+ const int kSubChunk1 = 2;
+
+ // Construct some full hashes which share prefix with another.
+ SBFullHash kHash1mod1 = kHash1;
+ kHash1mod1.full_hash[sizeof(kHash1mod1.full_hash) - 1] ++;
+ SBFullHash kHash1mod2 = kHash1mod1;
+ kHash1mod2.full_hash[sizeof(kHash1mod2.full_hash) - 1] ++;
+ SBFullHash kHash1mod3 = kHash1mod2;
+ kHash1mod3.full_hash[sizeof(kHash1mod3.full_hash) - 1] ++;
+
+ // An add with prefix and a couple hashes, plus a sub for the prefix
+ // and a couple sub hashes. The sub should knock all of them out.
+ add_prefixes.push_back(SBAddPrefix(kAddChunk1, kHash1.prefix));
+ add_hashes.push_back(
+ SBAddFullHash(kAddChunk1, kHash1.prefix, kNow, kHash1));
+ add_hashes.push_back(
+ SBAddFullHash(kAddChunk1, kHash1mod1.prefix, kNow, kHash1mod1));
+ sub_prefixes.push_back(SBSubPrefix(kSubChunk1, kAddChunk1, kHash1.prefix));
+ sub_hashes.push_back(
+ SBSubFullHash(kSubChunk1, kAddChunk1, kHash1mod2.prefix, kHash1mod2));
+ sub_hashes.push_back(
+ SBSubFullHash(kSubChunk1, kAddChunk1, kHash1mod3.prefix, kHash1mod3));
+
+ // An add with no corresponding sub. Both items should be retained.
+ add_hashes.push_back(SBAddFullHash(kAddChunk1, kHash2.prefix, kNow, kHash2));
+ add_prefixes.push_back(SBAddPrefix(kAddChunk1, kHash2.prefix));
+
+ // A sub with no corresponding add. Both items should be retained.
+ sub_hashes.push_back(
+ SBSubFullHash(kSubChunk1, kAddChunk1, kHash3.prefix, kHash3));
+ sub_prefixes.push_back(SBSubPrefix(kSubChunk1, kAddChunk1, kHash3.prefix));
+
+ SBProcessSubs(&add_prefixes, &sub_prefixes, &add_hashes, &sub_hashes);
+
+ EXPECT_EQ(1U, add_prefixes.size());
+ EXPECT_EQ(kAddChunk1, add_prefixes[0].chunk_id);
+ EXPECT_EQ(kHash2.prefix, add_prefixes[0].prefix);
+
+ EXPECT_EQ(1U, add_hashes.size());
+ EXPECT_EQ(kAddChunk1, add_hashes[0].add_prefix.chunk_id);
+ EXPECT_EQ(kHash2.prefix, add_hashes[0].add_prefix.prefix);
+ EXPECT_TRUE(SBFullHashEq(kHash2, add_hashes[0].full_hash));
+
+ EXPECT_EQ(1U, sub_prefixes.size());
+ EXPECT_EQ(kSubChunk1, sub_prefixes[0].chunk_id);
+ EXPECT_EQ(kAddChunk1, sub_prefixes[0].add_prefix.chunk_id);
+ EXPECT_EQ(kHash3.prefix, sub_prefixes[0].add_prefix.prefix);
+
+ EXPECT_EQ(1U, sub_hashes.size());
+ EXPECT_EQ(kSubChunk1, sub_hashes[0].chunk_id);
+ EXPECT_EQ(kAddChunk1, sub_hashes[0].add_prefix.chunk_id);
+ EXPECT_EQ(kHash3.prefix, sub_hashes[0].add_prefix.prefix);
+ EXPECT_TRUE(SBFullHashEq(kHash3, sub_hashes[0].full_hash));
+}
+
+} // namespace
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.cc b/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.cc
new file mode 100644
index 0000000..9a69472
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.cc
@@ -0,0 +1,340 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h"
+
+#include "base/file_util.h"
+
+namespace {
+
+const int kAddChunk1 = 1;
+const int kAddChunk2 = 3;
+const int kAddChunk3 = 5;
+const int kAddChunk4 = 7;
+// Disjoint chunk numbers for subs to flush out typos.
+const int kSubChunk1 = 2;
+const int kSubChunk2 = 4;
+const int kSubChunk3 = 6;
+
+const SBFullHash kHash1 = SBFullHashFromString("one");
+const SBFullHash kHash2 = SBFullHashFromString("two");
+const SBFullHash kHash3 = SBFullHashFromString("three");
+const SBFullHash kHash4 = SBFullHashFromString("four");
+const SBFullHash kHash5 = SBFullHashFromString("five");
+
+} // namespace
+
+void SafeBrowsingStoreTestEmpty(SafeBrowsingStore* store) {
+ EXPECT_TRUE(store->BeginUpdate());
+
+ std::vector<int> chunks;
+ store->GetAddChunks(&chunks);
+ EXPECT_TRUE(chunks.empty());
+ store->GetSubChunks(&chunks);
+ EXPECT_TRUE(chunks.empty());
+
+ // Shouldn't see anything, but anything is a big set to test.
+ EXPECT_FALSE(store->CheckAddChunk(0));
+ EXPECT_FALSE(store->CheckAddChunk(1));
+ EXPECT_FALSE(store->CheckAddChunk(-1));
+
+ EXPECT_FALSE(store->CheckSubChunk(0));
+ EXPECT_FALSE(store->CheckSubChunk(1));
+ EXPECT_FALSE(store->CheckSubChunk(-1));
+
+ std::vector<SBAddFullHash> pending_adds;
+ std::vector<SBAddPrefix> add_prefixes_result;
+ std::vector<SBAddFullHash> add_full_hashes_result;
+
+ EXPECT_TRUE(store->FinishUpdate(pending_adds,
+ &add_prefixes_result,
+ &add_full_hashes_result));
+ EXPECT_TRUE(add_prefixes_result.empty());
+ EXPECT_TRUE(add_full_hashes_result.empty());
+}
+
+void SafeBrowsingStoreTestStorePrefix(SafeBrowsingStore* store) {
+ EXPECT_TRUE(store->BeginUpdate());
+
+ const base::Time now = base::Time::Now();
+
+ EXPECT_TRUE(store->BeginChunk());
+ store->SetAddChunk(kAddChunk1);
+ EXPECT_TRUE(store->CheckAddChunk(kAddChunk1));
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash1.prefix));
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash2.prefix));
+ EXPECT_TRUE(store->WriteAddHash(kAddChunk1, kHash2.prefix, now, kHash2));
+
+ store->SetSubChunk(kSubChunk1);
+ EXPECT_TRUE(store->CheckSubChunk(kSubChunk1));
+ EXPECT_TRUE(store->WriteSubPrefix(kSubChunk1, kAddChunk3, kHash3.prefix));
+ EXPECT_TRUE(store->WriteSubHash(kSubChunk1,
+ kAddChunk3, kHash3.prefix, kHash3));
+ EXPECT_TRUE(store->FinishChunk());
+
+ // Chunk numbers shouldn't leak over.
+ EXPECT_FALSE(store->CheckAddChunk(kSubChunk1));
+ EXPECT_FALSE(store->CheckAddChunk(kAddChunk3));
+ EXPECT_FALSE(store->CheckSubChunk(kAddChunk1));
+
+ std::vector<int> chunks;
+ store->GetAddChunks(&chunks);
+ ASSERT_EQ(1U, chunks.size());
+ EXPECT_EQ(kAddChunk1, chunks[0]);
+
+ store->GetSubChunks(&chunks);
+ ASSERT_EQ(1U, chunks.size());
+ EXPECT_EQ(kSubChunk1, chunks[0]);
+
+ std::vector<SBAddFullHash> pending_adds;
+ std::vector<SBAddPrefix> add_prefixes_result;
+ std::vector<SBAddFullHash> add_full_hashes_result;
+
+ EXPECT_TRUE(store->FinishUpdate(pending_adds,
+ &add_prefixes_result,
+ &add_full_hashes_result));
+
+ ASSERT_EQ(2U, add_prefixes_result.size());
+ EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id);
+ EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix);
+ EXPECT_EQ(kAddChunk1, add_prefixes_result[1].chunk_id);
+ EXPECT_EQ(kHash2.prefix, add_prefixes_result[1].prefix);
+
+ ASSERT_EQ(1U, add_full_hashes_result.size());
+ EXPECT_EQ(kAddChunk1, add_full_hashes_result[0].add_prefix.chunk_id);
+ EXPECT_EQ(kHash2.prefix, add_full_hashes_result[0].add_prefix.prefix);
+ EXPECT_EQ(add_full_hashes_result[0].add_prefix.prefix,
+ add_full_hashes_result[0].full_hash.prefix);
+ // EXPECT_TRUE(add_full_hashes_result[0].received == now)?
+ EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received.ToTimeT());
+ EXPECT_TRUE(SBFullHashEq(kHash2, add_full_hashes_result[0].full_hash));
+
+ add_prefixes_result.clear();
+ add_full_hashes_result.clear();
+
+ EXPECT_TRUE(store->BeginUpdate());
+
+ // Still has the chunks expected in the next update.
+ store->GetAddChunks(&chunks);
+ ASSERT_EQ(1U, chunks.size());
+ EXPECT_EQ(kAddChunk1, chunks[0]);
+
+ store->GetSubChunks(&chunks);
+ ASSERT_EQ(1U, chunks.size());
+ EXPECT_EQ(kSubChunk1, chunks[0]);
+
+ EXPECT_TRUE(store->CheckAddChunk(kAddChunk1));
+ EXPECT_TRUE(store->CheckSubChunk(kSubChunk1));
+
+ EXPECT_TRUE(store->FinishUpdate(pending_adds,
+ &add_prefixes_result,
+ &add_full_hashes_result));
+
+ // Still has the expected contents.
+ ASSERT_EQ(2U, add_prefixes_result.size());
+ EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id);
+ EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix);
+ EXPECT_EQ(kAddChunk1, add_prefixes_result[1].chunk_id);
+ EXPECT_EQ(kHash2.prefix, add_prefixes_result[1].prefix);
+
+ ASSERT_EQ(1U, add_full_hashes_result.size());
+ EXPECT_EQ(kAddChunk1, add_full_hashes_result[0].add_prefix.chunk_id);
+ EXPECT_EQ(kHash2.prefix, add_full_hashes_result[0].add_prefix.prefix);
+ EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received.ToTimeT());
+ EXPECT_TRUE(SBFullHashEq(kHash2, add_full_hashes_result[0].full_hash));
+}
+
+void SafeBrowsingStoreTestSubKnockout(SafeBrowsingStore* store) {
+ EXPECT_TRUE(store->BeginUpdate());
+
+ const base::Time now = base::Time::Now();
+
+ EXPECT_TRUE(store->BeginChunk());
+ store->SetAddChunk(kAddChunk1);
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash1.prefix));
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash2.prefix));
+ EXPECT_TRUE(store->WriteAddHash(kAddChunk1, kHash2.prefix, now, kHash2));
+
+ store->SetSubChunk(kSubChunk1);
+ EXPECT_TRUE(store->WriteSubPrefix(kSubChunk1, kAddChunk3, kHash3.prefix));
+ EXPECT_TRUE(store->WriteSubPrefix(kSubChunk1, kAddChunk1, kHash2.prefix));
+ EXPECT_TRUE(store->FinishChunk());
+
+ std::vector<SBAddFullHash> pending_adds;
+ std::vector<SBAddPrefix> add_prefixes_result;
+ std::vector<SBAddFullHash> add_full_hashes_result;
+
+ EXPECT_TRUE(store->FinishUpdate(pending_adds,
+ &add_prefixes_result,
+ &add_full_hashes_result));
+
+ // Knocked out the chunk expected.
+ ASSERT_EQ(1U, add_prefixes_result.size());
+ EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id);
+ EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix);
+ EXPECT_TRUE(add_full_hashes_result.empty());
+
+ add_prefixes_result.clear();
+
+ EXPECT_TRUE(store->BeginUpdate());
+
+ // This add should be knocked out by an existing sub.
+ EXPECT_TRUE(store->BeginChunk());
+ store->SetAddChunk(kAddChunk3);
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk3, kHash3.prefix));
+ EXPECT_TRUE(store->FinishChunk());
+
+ EXPECT_TRUE(store->FinishUpdate(pending_adds,
+ &add_prefixes_result,
+ &add_full_hashes_result));
+ EXPECT_EQ(1U, add_prefixes_result.size());
+ EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id);
+ EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix);
+ EXPECT_TRUE(add_full_hashes_result.empty());
+
+ add_prefixes_result.clear();
+
+ EXPECT_TRUE(store->BeginUpdate());
+
+ // But by here the sub should be gone, so it should stick this time.
+ EXPECT_TRUE(store->BeginChunk());
+ store->SetAddChunk(kAddChunk3);
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk3, kHash3.prefix));
+ EXPECT_TRUE(store->FinishChunk());
+
+ EXPECT_TRUE(store->FinishUpdate(pending_adds,
+ &add_prefixes_result,
+ &add_full_hashes_result));
+ ASSERT_EQ(2U, add_prefixes_result.size());
+ EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id);
+ EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix);
+ EXPECT_EQ(kAddChunk3, add_prefixes_result[1].chunk_id);
+ EXPECT_EQ(kHash3.prefix, add_prefixes_result[1].prefix);
+ EXPECT_TRUE(add_full_hashes_result.empty());
+}
+
+void SafeBrowsingStoreTestDeleteChunks(SafeBrowsingStore* store) {
+ EXPECT_TRUE(store->BeginUpdate());
+
+ const base::Time now = base::Time::Now();
+
+ // A chunk which will be deleted.
+ EXPECT_FALSE(store->CheckAddChunk(kAddChunk1));
+ store->SetAddChunk(kAddChunk1);
+ EXPECT_TRUE(store->BeginChunk());
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash1.prefix));
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash2.prefix));
+ EXPECT_TRUE(store->WriteAddHash(kAddChunk1, kHash2.prefix, now, kHash2));
+ EXPECT_TRUE(store->FinishChunk());
+
+ // Another which won't.
+ EXPECT_FALSE(store->CheckAddChunk(kAddChunk2));
+ store->SetAddChunk(kAddChunk2);
+ EXPECT_TRUE(store->BeginChunk());
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk2, kHash3.prefix));
+ EXPECT_TRUE(store->WriteAddHash(kAddChunk2, kHash3.prefix, now, kHash3));
+ EXPECT_TRUE(store->FinishChunk());
+
+ // A sub chunk to delete.
+ EXPECT_FALSE(store->CheckSubChunk(kSubChunk1));
+ store->SetSubChunk(kSubChunk1);
+ EXPECT_TRUE(store->BeginChunk());
+ EXPECT_TRUE(store->WriteSubPrefix(kSubChunk1, kAddChunk3, kHash4.prefix));
+ EXPECT_TRUE(store->WriteSubHash(kSubChunk1,
+ kAddChunk3, kHash4.prefix, kHash4));
+ EXPECT_TRUE(store->FinishChunk());
+
+ // A sub chunk to keep.
+ EXPECT_FALSE(store->CheckSubChunk(kSubChunk2));
+ store->SetSubChunk(kSubChunk2);
+ EXPECT_TRUE(store->BeginChunk());
+ EXPECT_TRUE(store->WriteSubPrefix(kSubChunk2, kAddChunk4, kHash5.prefix));
+ EXPECT_TRUE(store->WriteSubHash(kSubChunk2,
+ kAddChunk4, kHash5.prefix, kHash5));
+ EXPECT_TRUE(store->FinishChunk());
+
+ store->DeleteAddChunk(kAddChunk1);
+ store->DeleteSubChunk(kSubChunk1);
+
+ // Not actually deleted until finish.
+ EXPECT_TRUE(store->CheckAddChunk(kAddChunk1));
+ EXPECT_TRUE(store->CheckAddChunk(kAddChunk2));
+ EXPECT_TRUE(store->CheckSubChunk(kSubChunk1));
+ EXPECT_TRUE(store->CheckSubChunk(kSubChunk2));
+
+ std::vector<SBAddFullHash> pending_adds;
+ std::vector<SBAddPrefix> add_prefixes_result;
+ std::vector<SBAddFullHash> add_full_hashes_result;
+
+ EXPECT_TRUE(store->FinishUpdate(pending_adds,
+ &add_prefixes_result,
+ &add_full_hashes_result));
+
+ EXPECT_EQ(1U, add_prefixes_result.size());
+ EXPECT_EQ(kAddChunk2, add_prefixes_result[0].chunk_id);
+ EXPECT_EQ(kHash3.prefix, add_prefixes_result[0].prefix);
+ EXPECT_EQ(1U, add_full_hashes_result.size());
+ EXPECT_EQ(kAddChunk2, add_full_hashes_result[0].add_prefix.chunk_id);
+ EXPECT_EQ(kHash3.prefix, add_full_hashes_result[0].add_prefix.prefix);
+ EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received.ToTimeT());
+ EXPECT_TRUE(SBFullHashEq(kHash3, add_full_hashes_result[0].full_hash));
+
+ // Expected chunks are there in another update.
+ EXPECT_TRUE(store->BeginUpdate());
+ EXPECT_FALSE(store->CheckAddChunk(kAddChunk1));
+ EXPECT_TRUE(store->CheckAddChunk(kAddChunk2));
+ EXPECT_FALSE(store->CheckSubChunk(kSubChunk1));
+ EXPECT_TRUE(store->CheckSubChunk(kSubChunk2));
+
+ // Delete them, too.
+ store->DeleteAddChunk(kAddChunk2);
+ store->DeleteSubChunk(kSubChunk2);
+
+ add_prefixes_result.clear();
+ add_full_hashes_result.clear();
+ EXPECT_TRUE(store->FinishUpdate(pending_adds,
+ &add_prefixes_result,
+ &add_full_hashes_result));
+
+ // Expect no more chunks.
+ EXPECT_TRUE(store->BeginUpdate());
+ EXPECT_FALSE(store->CheckAddChunk(kAddChunk1));
+ EXPECT_FALSE(store->CheckAddChunk(kAddChunk2));
+ EXPECT_FALSE(store->CheckSubChunk(kSubChunk1));
+ EXPECT_FALSE(store->CheckSubChunk(kSubChunk2));
+ add_prefixes_result.clear();
+ add_full_hashes_result.clear();
+ EXPECT_TRUE(store->FinishUpdate(pending_adds,
+ &add_prefixes_result,
+ &add_full_hashes_result));
+ EXPECT_TRUE(add_prefixes_result.empty());
+ EXPECT_TRUE(add_full_hashes_result.empty());
+}
+
+void SafeBrowsingStoreTestDelete(SafeBrowsingStore* store,
+ const FilePath& filename) {
+ EXPECT_TRUE(store->BeginUpdate());
+
+ EXPECT_TRUE(store->BeginChunk());
+ store->SetAddChunk(kAddChunk1);
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash1.prefix));
+ EXPECT_TRUE(store->WriteAddPrefix(kAddChunk1, kHash2.prefix));
+
+ store->SetSubChunk(kSubChunk1);
+ EXPECT_TRUE(store->WriteSubPrefix(kSubChunk1, kAddChunk3, kHash3.prefix));
+ EXPECT_TRUE(store->FinishChunk());
+
+ std::vector<SBAddFullHash> pending_adds;
+ std::vector<SBAddPrefix> add_prefixes_result;
+ std::vector<SBAddFullHash> add_full_hashes_result;
+
+ EXPECT_TRUE(store->FinishUpdate(pending_adds,
+ &add_prefixes_result,
+ &add_full_hashes_result));
+
+ EXPECT_TRUE(file_util::PathExists(filename));
+
+ EXPECT_TRUE(store->Delete());
+ EXPECT_FALSE(file_util::PathExists(filename));
+}
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h b/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h
new file mode 100644
index 0000000..ea61b8f
--- /dev/null
+++ b/chrome/browser/safe_browsing/safe_browsing_store_unittest_helper.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_UNITTEST_HELPER_H_
+#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_UNITTEST_HELPER_H_
+
+#include "chrome/browser/safe_browsing/safe_browsing_store.h"
+
+#include "base/sha2.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+// Helper code for testing that a SafeBrowsingStore implementation
+// works to spec.
+
+// Helper to make it easy to initialize SBFullHash constants.
+inline const SBFullHash SBFullHashFromString(const char* str) {
+ SBFullHash h;
+ base::SHA256HashString(str, &h.full_hash, sizeof(h.full_hash));
+ return h;
+}
+
+// TODO(shess): There's an == operator defined in
+// safe_browsing_utils.h, but using it gives me the heebie-jeebies.
+inline bool SBFullHashEq(const SBFullHash& a, const SBFullHash& b) {
+ return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash));
+}
+
+// Test that the empty store looks empty.
+void SafeBrowsingStoreTestEmpty(SafeBrowsingStore* store);
+
+// Write some prefix data to the store and verify that it looks like
+// it is still there after the transaction completes.
+void SafeBrowsingStoreTestStorePrefix(SafeBrowsingStore* store);
+
+// Test that subs knockout adds.
+void SafeBrowsingStoreTestSubKnockout(SafeBrowsingStore* store);
+
+// Test that deletes delete the chunk's data.
+void SafeBrowsingStoreTestDeleteChunks(SafeBrowsingStore* store);
+
+// Test that deleting the store deletes the store.
+void SafeBrowsingStoreTestDelete(SafeBrowsingStore* store,
+ const FilePath& filename);
+
+// Wrap all the tests up for implementation subclasses.
+// |test_fixture| is the class that would be passed to TEST_F(),
+// |instance_name| is the name of the SafeBrowsingStore instance
+// within the class, as a pointer, and |filename| is that store's
+// filename, for the Delete() test.
+#define TEST_STORE(test_fixture, instance_name, filename) \
+ TEST_F(test_fixture, Empty) { \
+ SafeBrowsingStoreTestEmpty(instance_name); \
+ } \
+ TEST_F(test_fixture, StorePrefix) { \
+ SafeBrowsingStoreTestStorePrefix(instance_name); \
+ } \
+ TEST_F(test_fixture, SubKnockout) { \
+ SafeBrowsingStoreTestSubKnockout(instance_name); \
+ } \
+ TEST_F(test_fixture, DeleteChunks) { \
+ SafeBrowsingStoreTestDeleteChunks(instance_name); \
+ } \
+ TEST_F(test_fixture, Delete) { \
+ SafeBrowsingStoreTestDelete(instance_name, filename); \
+ }
+
+#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_UNITTEST_HELPER_H_