summaryrefslogtreecommitdiffstats
path: root/chrome/browser/safe_browsing
diff options
context:
space:
mode:
authorshess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-03-27 05:27:17 +0000
committershess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-03-27 05:27:17 +0000
commitd3dd0715afc50f2e999f5a28478e783519170a29 (patch)
treeac638131304a9a2524d10e08dee9b7929e55b239 /chrome/browser/safe_browsing
parentf42a5a827ca1c591f1f9e918fce5ce6d485d0e3d (diff)
downloadchromium_src-d3dd0715afc50f2e999f5a28478e783519170a29.zip
chromium_src-d3dd0715afc50f2e999f5a28478e783519170a29.tar.gz
chromium_src-d3dd0715afc50f2e999f5a28478e783519170a29.tar.bz2
Refactor safe-browsing file storage to allow incremental processing.
Previously the file was read into memory, processed, and written out to a new file. Modify the format to shard the data by prefix, so that only a portion needs to be read into memory at a given time. The sharding is dynamic based on target memory footprint (currently 100k), so small files have fewer shards than large files. Additionally, add a checksum over the header and chunk table-of-contents area to prevent corruption from causing broken update requests. BUG=120219,351448 Review URL: https://codereview.chromium.org/208583005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@259791 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing')
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store.cc29
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store.h13
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_file.cc900
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_file.h88
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc250
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_store_unittest.cc27
6 files changed, 1015 insertions, 292 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_store.cc b/chrome/browser/safe_browsing/safe_browsing_store.cc
index 2638e58..a942b9a 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_store.cc
@@ -10,6 +10,17 @@
namespace {
+// Return |true| if the range is sorted by the given comparator.
+template <typename CTI, typename LESS>
+bool sorted(CTI beg, CTI end, LESS less) {
+ while ((end - beg) > 2) {
+ CTI n = beg++;
+ if (less(*beg, *n))
+ return false;
+ }
+ return true;
+}
+
// Find items matching between |subs| and |adds|, and remove them,
// recording the item from |adds| in |adds_removed|. To minimize
// copies, the inputs are processing in parallel, so |subs| and |adds|
@@ -133,15 +144,15 @@ void SBProcessSubs(SBAddPrefixes* add_prefixes,
// to qualify things. It becomes very arbitrary, though, and less
// clear how things are working.
- // Sort the inputs by the SBAddPrefix bits.
- std::sort(add_prefixes->begin(), add_prefixes->end(),
- SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
- std::sort(sub_prefixes->begin(), sub_prefixes->end(),
- SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
- std::sort(add_full_hashes->begin(), add_full_hashes->end(),
- SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
- std::sort(sub_full_hashes->begin(), sub_full_hashes->end(),
- SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+ // Make sure things are sorted appropriately.
+ DCHECK(sorted(add_prefixes->begin(), add_prefixes->end(),
+ SBAddPrefixLess<SBAddPrefix,SBAddPrefix>));
+ DCHECK(sorted(sub_prefixes->begin(), sub_prefixes->end(),
+ SBAddPrefixLess<SBSubPrefix,SBSubPrefix>));
+ DCHECK(sorted(add_full_hashes->begin(), add_full_hashes->end(),
+ SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>));
+ DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(),
+ SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>));
// Factor out the prefix subs.
SBAddPrefixes removed_adds;
diff --git a/chrome/browser/safe_browsing/safe_browsing_store.h b/chrome/browser/safe_browsing/safe_browsing_store.h
index 76cf28a..b381743 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store.h
+++ b/chrome/browser/safe_browsing/safe_browsing_store.h
@@ -53,6 +53,8 @@ struct SBAddPrefix {
SBPrefix GetAddPrefix() const { return prefix; }
};
+// TODO(shess): Measure the performance impact of switching this back to
+// std::vector<> once the v8 file format dominates. Also SBSubPrefixes.
typedef std::deque<SBAddPrefix> SBAddPrefixes;
struct SBSubPrefix {
@@ -134,12 +136,7 @@ bool SBAddPrefixHashLess(const T& a, const U& b) {
// matched items from all vectors. Additionally remove items from
// deleted chunks.
//
-// TODO(shess): Since the prefixes are uniformly-distributed hashes,
-// there aren't many ways to organize the inputs for efficient
-// processing. For this reason, the vectors are sorted and processed
-// in parallel. At this time this code does the sorting internally,
-// but it might make sense to make sorting an API requirement so that
-// the storage can optimize for it.
+// The inputs must be sorted by SBAddPrefixLess or SBAddPrefixHashLess.
void SBProcessSubs(SBAddPrefixes* add_prefixes,
SBSubPrefixes* sub_prefixes,
std::vector<SBAddFullHash>* add_full_hashes,
@@ -147,10 +144,6 @@ void SBProcessSubs(SBAddPrefixes* add_prefixes,
const base::hash_set<int32>& add_chunks_deleted,
const base::hash_set<int32>& sub_chunks_deleted);
-// TODO(shess): This uses int32 rather than int because it's writing
-// specifically-sized items to files. SBPrefix should likewise be
-// explicitly sized.
-
// Abstract interface for storing data.
class SafeBrowsingStore {
public:
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.cc b/chrome/browser/safe_browsing/safe_browsing_store_file.cc
index a879096..b58d961 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store_file.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_store_file.cc
@@ -14,22 +14,68 @@ namespace {
// NOTE(shess): kFileMagic should not be a byte-wise palindrome, so
// that byte-order changes force corruption.
const int32 kFileMagic = 0x600D71FE;
-const int32 kFileVersion = 7; // SQLite storage was 6...
+
+// Version history:
+// Version 6: aad08754/r2814 by erikkay@google.com on 2008-10-02 (sqlite)
+// Version 7: 6afe28a5/r37435 by shess@chromium.org on 2010-01-28
+// Version 8: ????????/r????? by shess@chromium.org on 2014-03-??
+const int32 kFileVersion = 8;
+
+// ReadAndVerifyHeader() returns this in case of error.
+const int32 kInvalidVersion = -1;
// Header at the front of the main database file.
-struct FileHeader {
+struct FileHeaderV7 {
int32 magic, version;
uint32 add_chunk_count, sub_chunk_count;
uint32 add_prefix_count, sub_prefix_count;
uint32 add_hash_count, sub_hash_count;
};
+// Starting with version 8, the storage is sorted and can be sharded to allow
+// updates to be done with lower memory requirements. Newly written files will
+// be sharded to need less than this amount of memory during update. Larger
+// values are preferred to minimize looping overhead during processing.
+const int64 kUpdateStorageBytes = 100 * 1024;
+
+// Prevent excessive sharding by setting a lower limit on the shard stride.
+// Smaller values should work fine, but very small values will probably lead to
+// poor performance. Shard stride is indirectly related to
+// |kUpdateStorageBytes|, setting that very small will bump against this.
+const uint32 kMinShardStride = 1 << 24;
+
+// Strides over the entire SBPrefix space.
+const uint64 kMaxShardStride = GG_LONGLONG(1u) << 32;
+
+// Maximum SBPrefix value.
+const SBPrefix kMaxSBPrefix = ~0;
+
+// Header at the front of the main database file.
+struct FileHeaderV8 {
+ int32 magic, version;
+ uint32 add_chunk_count, sub_chunk_count;
+ uint32 shard_stride;
+ // TODO(shess): Is this where 64-bit will bite me? Perhaps write a
+ // specialized read/write?
+};
+
+union FileHeader {
+ struct FileHeaderV7 v7;
+ struct FileHeaderV8 v8;
+};
+
// Header for each chunk in the chunk-accumulation file.
struct ChunkHeader {
uint32 add_prefix_count, sub_prefix_count;
uint32 add_hash_count, sub_hash_count;
};
+// Header for each shard of data in the main database file.
+struct ShardHeader {
+ uint32 add_prefix_count, sub_prefix_count;
+ uint32 add_hash_count, sub_hash_count;
+};
+
// Rewind the file. Using fseek(2) because rewind(3) errors are
// weird.
bool FileRewind(FILE* fp) {
@@ -38,18 +84,6 @@ bool FileRewind(FILE* fp) {
return rv == 0;
}
-// Move file read pointer forward by |bytes| relative to current position.
-bool FileSkip(size_t bytes, FILE* fp) {
- // Although fseek takes negative values, for this case, we only want
- // to skip forward.
- DCHECK(static_cast<long>(bytes) >= 0);
- if (static_cast<long>(bytes) < 0)
- return false;
- int rv = fseek(fp, static_cast<long>(bytes), SEEK_CUR);
- DCHECK_EQ(rv, 0);
- return rv == 0;
-}
-
// Read from |fp| into |item|, and fold the input data into the
// checksum in |context|, if non-NULL. Return true on success.
template <class T>
@@ -104,22 +138,26 @@ bool ReadToContainer(CT* values, size_t count, FILE* fp,
return true;
}
-// Write all of |values| to |fp|, and fold the data into the checksum
-// in |context|, if non-NULL. Returns true on succsess.
-template <typename CT>
-bool WriteContainer(const CT& values, FILE* fp,
- base::MD5Context* context) {
- if (values.empty())
- return true;
-
- for (typename CT::const_iterator iter = values.begin();
- iter != values.end(); ++iter) {
+// Write values between |beg| and |end| to |fp|, and fold the data into the
+// checksum in |context|, if non-NULL. Returns true if all items successful.
+template <typename CTI>
+bool WriteRange(const CTI& beg, const CTI& end,
+ FILE* fp, base::MD5Context* context) {
+ for (CTI iter = beg; iter != end; ++iter) {
if (!WriteItem(*iter, fp, context))
return false;
}
return true;
}
+// Write all of |values| to |fp|, and fold the data into the checksum
+// in |context|, if non-NULL. Returns true if all items successful.
+template <typename CT>
+bool WriteContainer(const CT& values, FILE* fp,
+ base::MD5Context* context) {
+ return WriteRange(values.begin(), values.end(), fp, context);
+}
+
// Delete the chunks in |deleted| from |chunks|.
void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
std::set<int32>* chunks) {
@@ -131,16 +169,37 @@ void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
}
}
+// base::MD5Final() modifies |context| in generating |digest|. This wrapper
+// generates an intermediate digest without modifying the context.
+void MD5IntermediateDigest(base::MD5Digest* digest, base::MD5Context* context) {
+ base::MD5Context temp_context;
+ memcpy(&temp_context, context, sizeof(temp_context));
+ base::MD5Final(digest, &temp_context);
+}
+
+bool ReadAndVerifyChecksum(FILE* fp, base::MD5Context* context) {
+ base::MD5Digest calculated_digest;
+ MD5IntermediateDigest(&calculated_digest, context);
+
+ base::MD5Digest file_digest;
+ if (!ReadItem(&file_digest, fp, context))
+ return false;
+
+ return memcmp(&file_digest, &calculated_digest, sizeof(file_digest)) == 0;
+}
+
// Sanity-check the header against the file's size to make sure our
// vectors aren't gigantic. This doubles as a cheap way to detect
// corruption without having to checksum the entire file.
-bool FileHeaderSanityCheck(const base::FilePath& filename,
- const FileHeader& header) {
+bool FileHeaderV7SanityCheck(const base::FilePath& filename,
+ const FileHeaderV7& header) {
+ DCHECK_EQ(header.version, 7);
+
int64 size = 0;
if (!base::GetFileSize(filename, &size))
return false;
- int64 expected_size = sizeof(FileHeader);
+ int64 expected_size = sizeof(FileHeaderV7);
expected_size += header.add_chunk_count * sizeof(int32);
expected_size += header.sub_chunk_count * sizeof(int32);
expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
@@ -154,21 +213,387 @@ bool FileHeaderSanityCheck(const base::FilePath& filename,
return true;
}
-// This a helper function that reads header to |header|. Returns true if the
-// magic number is correct and santiy check passes.
-bool ReadAndVerifyHeader(const base::FilePath& filename,
- FILE* fp,
- FileHeader* header,
- base::MD5Context* context) {
- if (!ReadItem(header, fp, context))
+// Helper function to read the file header and chunk TOC. Rewinds |fp| and
+// initializes |context|. The header is left in |header|, with the version
+// returned. kInvalidVersion is returned for sanity check or checksum failure.
+int ReadAndVerifyHeader(const base::FilePath& filename,
+ FileHeader* header,
+ std::set<int32>* add_chunks,
+ std::set<int32>* sub_chunks,
+ FILE* fp,
+ base::MD5Context* context) {
+ DCHECK(header);
+ DCHECK(add_chunks);
+ DCHECK(sub_chunks);
+ DCHECK(fp);
+ DCHECK(context);
+
+ int version = kInvalidVersion;
+
+ base::MD5Init(context);
+ if (!FileRewind(fp))
+ return kInvalidVersion;
+ if (!ReadItem(&header->v8, fp, context))
+ return kInvalidVersion;
+ if (header->v8.magic != kFileMagic)
+ return kInvalidVersion;
+
+ size_t add_chunks_count = 0;
+ size_t sub_chunks_count = 0;
+
+ if (header->v8.version == 7) {
+ version = 7;
+
+ // Reset the context and re-read the v7 header.
+ base::MD5Init(context);
+ if (!FileRewind(fp))
+ return kInvalidVersion;
+ if (!ReadItem(&header->v7, fp, context))
+ return kInvalidVersion;
+ if (header->v7.magic != kFileMagic || header->v7.version != 7)
+ return kInvalidVersion;
+ if (!FileHeaderV7SanityCheck(filename, header->v7))
+ return kInvalidVersion;
+
+ add_chunks_count = header->v7.add_chunk_count;
+ sub_chunks_count = header->v7.sub_chunk_count;
+ } else if (header->v8.version == kFileVersion) {
+ version = 8;
+ add_chunks_count = header->v8.add_chunk_count;
+ sub_chunks_count = header->v8.sub_chunk_count;
+ } else {
+ return kInvalidVersion;
+ }
+
+ if (!ReadToContainer(add_chunks, add_chunks_count, fp, context) ||
+ !ReadToContainer(sub_chunks, sub_chunks_count, fp, context)) {
+ return kInvalidVersion;
+ }
+
+ // v8 includes a checksum to validate the header.
+ if (version > 7 && !ReadAndVerifyChecksum(fp, context))
+ return kInvalidVersion;
+
+ return version;
+}
+
+// Helper function to write out the initial header and chunks-contained data.
+// Rewinds |fp|, initializes |context|, then writes a file header and
+// |add_chunks| and |sub_chunks|.
+bool WriteHeader(uint32 out_stride,
+ const std::set<int32>& add_chunks,
+ const std::set<int32>& sub_chunks,
+ FILE* fp,
+ base::MD5Context* context) {
+ if (!FileRewind(fp))
+ return false;
+
+ base::MD5Init(context);
+ FileHeaderV8 header;
+ header.magic = kFileMagic;
+ header.version = kFileVersion;
+ header.add_chunk_count = add_chunks.size();
+ header.sub_chunk_count = sub_chunks.size();
+ header.shard_stride = out_stride;
+ if (!WriteItem(header, fp, context))
return false;
- if (header->magic != kFileMagic || header->version != kFileVersion)
+
+ if (!WriteContainer(add_chunks, fp, context) ||
+ !WriteContainer(sub_chunks, fp, context))
return false;
- if (!FileHeaderSanityCheck(filename, *header))
+
+ // Write out the header digest.
+ base::MD5Digest header_digest;
+ MD5IntermediateDigest(&header_digest, context);
+ if (!WriteItem(header_digest, fp, context))
return false;
+
+ return true;
+}
+
+// Return |true| if the range is sorted by the given comparator.
+template <typename CTI, typename LESS>
+bool sorted(CTI beg, CTI end, LESS less) {
+ while ((end - beg) > 2) {
+ CTI n = beg++;
+ DCHECK(!less(*beg, *n));
+ if (less(*beg, *n))
+ return false;
+ }
return true;
}
+// Merge |beg|..|end| into |container|. Both should be sorted by the given
+// comparator, and the range iterators should not be derived from |container|.
+// Differs from std::inplace_merge() in that additional memory is not required
+// for linear performance.
+template <typename CT, typename CTI, typename COMP>
+void container_merge(CT* container, CTI beg, CTI end, const COMP& less) {
+ DCHECK(sorted(container->begin(), container->end(), less));
+ DCHECK(sorted(beg, end, less));
+
+ // Size the container to fit the results.
+ const size_t c_size = container->size();
+ container->resize(c_size + (end - beg));
+
+ // |c_end| points to the original endpoint, while |c_out| points to the
+ // endpoint that will scan from end to beginning while merging.
+ typename CT::iterator c_end = container->begin() + c_size;
+ typename CT::iterator c_out = container->end();
+
+ // While both inputs have data, move the greater to |c_out|.
+ while (c_end != container->begin() && end != beg) {
+ if (less(*(c_end - 1), *(end - 1))) {
+ *(--c_out) = *(--end);
+ } else {
+ *(--c_out) = *(--c_end);
+ }
+ }
+
+ // Copy any data remaining in the new range.
+ if (end != beg) {
+ // The original container data has been fully shifted.
+ DCHECK(c_end == container->begin());
+
+ // There is exactly the correct amount of space left.
+ DCHECK_EQ(c_out - c_end, end - beg);
+
+ std::copy(beg, end, container->begin());
+ }
+
+ DCHECK(sorted(container->begin(), container->end(), less));
+}
+
+// Collection of iterators used while stepping through StateInternal (see
+// below).
+class StateInternalPos {
+ public:
+ StateInternalPos(SBAddPrefixes::iterator add_prefixes_iter,
+ SBSubPrefixes::iterator sub_prefixes_iter,
+ std::vector<SBAddFullHash>::iterator add_hashes_iter,
+ std::vector<SBSubFullHash>::iterator sub_hashes_iter)
+ : add_prefixes_iter_(add_prefixes_iter),
+ sub_prefixes_iter_(sub_prefixes_iter),
+ add_hashes_iter_(add_hashes_iter),
+ sub_hashes_iter_(sub_hashes_iter) {
+ }
+
+ SBAddPrefixes::iterator add_prefixes_iter_;
+ SBSubPrefixes::iterator sub_prefixes_iter_;
+ std::vector<SBAddFullHash>::iterator add_hashes_iter_;
+ std::vector<SBSubFullHash>::iterator sub_hashes_iter_;
+};
+
+// Helper to find the next shard boundary.
+template <class T>
+bool prefix_bounder(SBPrefix val, const T& elt) {
+ return val < elt.GetAddPrefix();
+}
+
+// Container for partial database state. Includes add/sub prefixes/hashes, plus
+// aggregate operations on same.
+class StateInternal {
+ public:
+ explicit StateInternal(const std::vector<SBAddFullHash>& pending_adds)
+ : add_full_hashes_(pending_adds.begin(), pending_adds.end()) {
+ }
+
+ StateInternal() {}
+
+ // Append indicated amount of data from |fp|.
+ bool AppendData(size_t add_prefix_count, size_t sub_prefix_count,
+ size_t add_hash_count, size_t sub_hash_count,
+ FILE* fp, base::MD5Context* context) {
+ return
+ ReadToContainer(&add_prefixes_, add_prefix_count, fp, context) &&
+ ReadToContainer(&sub_prefixes_, sub_prefix_count, fp, context) &&
+ ReadToContainer(&add_full_hashes_, add_hash_count, fp, context) &&
+ ReadToContainer(&sub_full_hashes_, sub_hash_count, fp, context);
+ }
+
+ void ClearData() {
+ add_prefixes_.clear();
+ sub_prefixes_.clear();
+ add_full_hashes_.clear();
+ sub_full_hashes_.clear();
+ }
+
+ // Merge data from |beg|..|end| into receiver's state, then process the state.
+ // The current state and the range given should corrospond to the same sorted
+ // shard of data from different sources. |add_del_cache| and |sub_del_cache|
+ // indicate the chunk ids which should be deleted during processing (see
+ // SBProcessSubs).
+ void MergeDataAndProcess(const StateInternalPos& beg,
+ const StateInternalPos& end,
+ const base::hash_set<int32>& add_del_cache,
+ const base::hash_set<int32>& sub_del_cache) {
+ container_merge(&add_prefixes_,
+ beg.add_prefixes_iter_,
+ end.add_prefixes_iter_,
+ SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+
+ container_merge(&sub_prefixes_,
+ beg.sub_prefixes_iter_,
+ end.sub_prefixes_iter_,
+ SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+
+ container_merge(&add_full_hashes_,
+ beg.add_hashes_iter_,
+ end.add_hashes_iter_,
+ SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+
+ container_merge(&sub_full_hashes_,
+ beg.sub_hashes_iter_,
+ end.sub_hashes_iter_,
+ SBAddPrefixHashLess<SBSubFullHash, SBSubFullHash>);
+
+ SBProcessSubs(&add_prefixes_, &sub_prefixes_,
+ &add_full_hashes_, &sub_full_hashes_,
+ add_del_cache, sub_del_cache);
+ }
+
+ // Sort the data appropriately for the sharding, merging, and processing
+ // operations.
+ void SortData() {
+ std::sort(add_prefixes_.begin(), add_prefixes_.end(),
+ SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+ std::sort(sub_prefixes_.begin(), sub_prefixes_.end(),
+ SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+ std::sort(add_full_hashes_.begin(), add_full_hashes_.end(),
+ SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+ std::sort(sub_full_hashes_.begin(), sub_full_hashes_.end(),
+ SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+ }
+
+ // Iterator from the beginning of the state's data.
+ StateInternalPos StateBegin() {
+ return StateInternalPos(add_prefixes_.begin(),
+ sub_prefixes_.begin(),
+ add_full_hashes_.begin(),
+ sub_full_hashes_.begin());
+ }
+
+ // An iterator pointing just after the last possible element of the shard
+ // indicated by |shard_max|. Used to step through the state by shard.
+ // TODO(shess): Verify whether binary search really improves over linear.
+ // Merging or writing will immediately touch all of these elements.
+ StateInternalPos ShardEnd(const StateInternalPos& beg, SBPrefix shard_max) {
+ return StateInternalPos(
+ std::upper_bound(beg.add_prefixes_iter_, add_prefixes_.end(),
+ shard_max, prefix_bounder<SBAddPrefix>),
+ std::upper_bound(beg.sub_prefixes_iter_, sub_prefixes_.end(),
+ shard_max, prefix_bounder<SBSubPrefix>),
+ std::upper_bound(beg.add_hashes_iter_, add_full_hashes_.end(),
+ shard_max, prefix_bounder<SBAddFullHash>),
+ std::upper_bound(beg.sub_hashes_iter_, sub_full_hashes_.end(),
+ shard_max, prefix_bounder<SBSubFullHash>));
+ }
+
+ // Write a shard header and data for the shard starting at |beg| and ending at
+ // the element before |end|.
+ bool WriteShard(const StateInternalPos& beg, const StateInternalPos& end,
+ FILE* fp, base::MD5Context* context) {
+ ShardHeader shard_header;
+ shard_header.add_prefix_count =
+ end.add_prefixes_iter_ - beg.add_prefixes_iter_;
+ shard_header.sub_prefix_count =
+ end.sub_prefixes_iter_ - beg.sub_prefixes_iter_;
+ shard_header.add_hash_count =
+ end.add_hashes_iter_ - beg.add_hashes_iter_;
+ shard_header.sub_hash_count =
+ end.sub_hashes_iter_ - beg.sub_hashes_iter_;
+
+ return
+ WriteItem(shard_header, fp, context) &&
+ WriteRange(beg.add_prefixes_iter_, end.add_prefixes_iter_,
+ fp, context) &&
+ WriteRange(beg.sub_prefixes_iter_, end.sub_prefixes_iter_,
+ fp, context) &&
+ WriteRange(beg.add_hashes_iter_, end.add_hashes_iter_,
+ fp, context) &&
+ WriteRange(beg.sub_hashes_iter_, end.sub_hashes_iter_,
+ fp, context);
+ }
+
+ SBAddPrefixes add_prefixes_;
+ SBSubPrefixes sub_prefixes_;
+ std::vector<SBAddFullHash> add_full_hashes_;
+ std::vector<SBSubFullHash> sub_full_hashes_;
+};
+
+// True if |val| is an even power of two.
+template <typename T>
+bool IsPowerOfTwo(const T& val) {
+ return val && (val & (val - 1)) == 0;
+}
+
+// Helper to read the entire database state, used by GetAddPrefixes() and
+// GetAddFullHashes(). Those functions are generally used only for smaller
+// files. Returns false in case of errors reading the data.
+bool ReadDbStateHelper(const base::FilePath& filename,
+ StateInternal* db_state) {
+ file_util::ScopedFILE file(base::OpenFile(filename, "rb"));
+ if (file.get() == NULL)
+ return false;
+
+ std::set<int32> add_chunks;
+ std::set<int32> sub_chunks;
+
+ base::MD5Context context;
+ FileHeader header;
+ const int version =
+ ReadAndVerifyHeader(filename, &header, &add_chunks, &sub_chunks,
+ file.get(), &context);
+ if (version == kInvalidVersion)
+ return false;
+
+ if (version == 7) {
+ if (!db_state->AppendData(header.v7.add_prefix_count,
+ header.v7.sub_prefix_count,
+ header.v7.add_hash_count,
+ header.v7.sub_hash_count,
+ file.get(), &context)) {
+ return false;
+ }
+
+ // v7 data was not stored sorted.
+ db_state->SortData();
+ } else {
+ // Read until the shard start overflows, always at least one pass.
+ uint64 in_min = 0;
+ uint64 in_stride = header.v8.shard_stride;
+ if (!in_stride)
+ in_stride = kMaxShardStride;
+ if (!IsPowerOfTwo(in_stride))
+ return false;
+
+ do {
+ ShardHeader shard_header;
+ if (!ReadItem(&shard_header, file.get(), &context))
+ return false;
+
+ if (!db_state->AppendData(shard_header.add_prefix_count,
+ shard_header.sub_prefix_count,
+ shard_header.add_hash_count,
+ shard_header.sub_hash_count,
+ file.get(), &context)) {
+ return false;
+ }
+
+ in_min += in_stride;
+ } while (in_min <= kMaxSBPrefix);
+ }
+
+ if (!ReadAndVerifyChecksum(file.get(), &context))
+ return false;
+
+ int64 size = 0;
+ if (!base::GetFileSize(filename, &size))
+ return false;
+
+ return static_cast<int64>(ftell(file.get())) == size;
+}
+
} // namespace
// static
@@ -258,15 +683,7 @@ bool SafeBrowsingStoreFile::CheckValidity() {
bytes_left -= c;
}
- // Calculate the digest to this point.
- base::MD5Digest calculated_digest;
- base::MD5Final(&calculated_digest, &context);
-
- // Read the stored digest and verify it.
- base::MD5Digest file_digest;
- if (!ReadItem(&file_digest, file_.get(), NULL))
- return OnCorruptDatabase();
- if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) {
+ if (!ReadAndVerifyChecksum(file_.get(), &context)) {
RecordFormatEvent(FORMAT_EVENT_VALIDITY_CHECKSUM_FAILURE);
return OnCorruptDatabase();
}
@@ -293,48 +710,29 @@ bool SafeBrowsingStoreFile::WriteAddPrefix(int32 chunk_id, SBPrefix prefix) {
bool SafeBrowsingStoreFile::GetAddPrefixes(SBAddPrefixes* add_prefixes) {
add_prefixes->clear();
+ if (!base::PathExists(filename_))
+ return true;
- base::ScopedFILE file(base::OpenFile(filename_, "rb"));
- if (file.get() == NULL) return false;
-
- FileHeader header;
- if (!ReadAndVerifyHeader(filename_, file.get(), &header, NULL))
+ StateInternal db_state;
+ if (!ReadDbStateHelper(filename_, &db_state))
return OnCorruptDatabase();
- size_t add_prefix_offset = header.add_chunk_count * sizeof(int32) +
- header.sub_chunk_count * sizeof(int32);
- if (!FileSkip(add_prefix_offset, file.get()))
- return false;
-
- if (!ReadToContainer(add_prefixes, header.add_prefix_count, file.get(), NULL))
- return false;
-
+ add_prefixes->swap(db_state.add_prefixes_);
return true;
}
bool SafeBrowsingStoreFile::GetAddFullHashes(
std::vector<SBAddFullHash>* add_full_hashes) {
add_full_hashes->clear();
+ if (!base::PathExists(filename_))
+ return true;
- base::ScopedFILE file(base::OpenFile(filename_, "rb"));
- if (file.get() == NULL) return false;
-
- FileHeader header;
- if (!ReadAndVerifyHeader(filename_, file.get(), &header, NULL))
+ StateInternal db_state;
+ if (!ReadDbStateHelper(filename_, &db_state))
return OnCorruptDatabase();
- size_t offset =
- header.add_chunk_count * sizeof(int32) +
- header.sub_chunk_count * sizeof(int32) +
- header.add_prefix_count * sizeof(SBAddPrefix) +
- header.sub_prefix_count * sizeof(SBSubPrefix);
- if (!FileSkip(offset, file.get()))
- return false;
-
- return ReadToContainer(add_full_hashes,
- header.add_hash_count,
- file.get(),
- NULL);
+ add_full_hashes->swap(db_state.add_full_hashes_);
+ return true;
}
bool SafeBrowsingStoreFile::WriteAddHash(int32 chunk_id,
@@ -415,15 +813,26 @@ bool SafeBrowsingStoreFile::BeginUpdate() {
return true;
}
+ base::MD5Context context;
FileHeader header;
- if (!ReadItem(&header, file.get(), NULL))
- return OnCorruptDatabase();
-
- if (header.magic != kFileMagic || header.version != kFileVersion) {
- if (!strcmp(reinterpret_cast<char*>(&header.magic), "SQLite format 3")) {
- RecordFormatEvent(FORMAT_EVENT_FOUND_SQLITE);
- } else {
- RecordFormatEvent(FORMAT_EVENT_FOUND_UNKNOWN);
+ const int version =
+ ReadAndVerifyHeader(filename_, &header,
+ &add_chunks_cache_, &sub_chunks_cache_,
+ file.get(), &context);
+ if (version == kInvalidVersion) {
+ FileHeaderV8 retry_header;
+ if (FileRewind(file.get()) && ReadItem(&retry_header, file.get(), NULL) &&
+ (retry_header.magic != kFileMagic ||
+ (retry_header.version != 8 && retry_header.version != 7))) {
+ // TODO(shess): Think on whether these histograms are generating any
+ // actionable data. I kid you not, SQLITE happens many thousands of times
+ // per day, UNKNOWN about 3x higher than that.
+ if (!strcmp(reinterpret_cast<char*>(&retry_header.magic),
+ "SQLite format 3")) {
+ RecordFormatEvent(FORMAT_EVENT_FOUND_SQLITE);
+ } else {
+ RecordFormatEvent(FORMAT_EVENT_FOUND_UNKNOWN);
+ }
}
// Close the file so that it can be deleted.
@@ -432,20 +841,6 @@ bool SafeBrowsingStoreFile::BeginUpdate() {
return OnCorruptDatabase();
}
- // TODO(shess): Under POSIX it is possible that this could size a
- // file different from the file which was opened.
- if (!FileHeaderSanityCheck(filename_, header))
- return OnCorruptDatabase();
-
- // Pull in the chunks-seen data for purposes of implementing
- // |GetAddChunks()| and |GetSubChunks()|. This data is sent up to
- // the server at the beginning of an update.
- if (!ReadToContainer(&add_chunks_cache_, header.add_chunk_count,
- file.get(), NULL) ||
- !ReadToContainer(&sub_chunks_cache_, header.sub_chunk_count,
- file.get(), NULL))
- return OnCorruptDatabase();
-
file_.swap(file);
new_file_.swap(new_file);
return true;
@@ -485,80 +880,25 @@ bool SafeBrowsingStoreFile::DoUpdate(
CHECK(builder);
CHECK(add_full_hashes_result);
- SBAddPrefixes add_prefixes;
- SBSubPrefixes sub_prefixes;
- std::vector<SBAddFullHash> add_full_hashes;
- std::vector<SBSubFullHash> sub_full_hashes;
-
- // Read original data into the vectors.
- if (!empty_) {
- DCHECK(file_.get());
-
- if (!FileRewind(file_.get()))
- return OnCorruptDatabase();
-
- base::MD5Context context;
- base::MD5Init(&context);
-
- // Read the file header and make sure it looks right.
- FileHeader header;
- if (!ReadAndVerifyHeader(filename_, file_.get(), &header, &context))
- return OnCorruptDatabase();
-
- // Re-read the chunks-seen data to get to the later data in the
- // file and calculate the checksum. No new elements should be
- // added to the sets.
- if (!ReadToContainer(&add_chunks_cache_, header.add_chunk_count,
- file_.get(), &context) ||
- !ReadToContainer(&sub_chunks_cache_, header.sub_chunk_count,
- file_.get(), &context))
- return OnCorruptDatabase();
-
- if (!ReadToContainer(&add_prefixes, header.add_prefix_count,
- file_.get(), &context) ||
- !ReadToContainer(&sub_prefixes, header.sub_prefix_count,
- file_.get(), &context) ||
- !ReadToContainer(&add_full_hashes, header.add_hash_count,
- file_.get(), &context) ||
- !ReadToContainer(&sub_full_hashes, header.sub_hash_count,
- file_.get(), &context))
- return OnCorruptDatabase();
-
- // Calculate the digest to this point.
- base::MD5Digest calculated_digest;
- base::MD5Final(&calculated_digest, &context);
-
- // Read the stored checksum and verify it.
- base::MD5Digest file_digest;
- if (!ReadItem(&file_digest, file_.get(), NULL))
- return OnCorruptDatabase();
-
- if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) {
- RecordFormatEvent(FORMAT_EVENT_UPDATE_CHECKSUM_FAILURE);
- return OnCorruptDatabase();
- }
-
- // Close the file so we can later rename over it.
- file_.reset();
- }
- DCHECK(!file_.get());
-
// Rewind the temporary storage.
if (!FileRewind(new_file_.get()))
return false;
// Get chunk file's size for validating counts.
- int64 size = 0;
- if (!base::GetFileSize(TemporaryFileForFilename(filename_), &size))
+ int64 update_size = 0;
+ if (!base::GetFileSize(TemporaryFileForFilename(filename_), &update_size))
return OnCorruptDatabase();
// Track update size to answer questions at http://crbug.com/72216 .
// Log small updates as 1k so that the 0 (underflow) bucket can be
// used for "empty" in SafeBrowsingDatabase.
UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes",
- std::max(static_cast<int>(size / 1024), 1));
+ std::max(static_cast<int>(update_size / 1024), 1));
- // Append the accumulated chunks onto the vectors read from |file_|.
+ // Chunk updates to integrate.
+ StateInternal new_state(pending_adds);
+
+ // Read update chunks.
for (int i = 0; i < chunks_written_; ++i) {
ChunkHeader header;
@@ -576,73 +916,202 @@ bool SafeBrowsingStoreFile::DoUpdate(
expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
expected_size += header.add_hash_count * sizeof(SBAddFullHash);
expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
- if (expected_size > size)
+ if (expected_size > update_size)
return false;
- // TODO(shess): If the vectors were kept sorted, then this code
- // could use std::inplace_merge() to merge everything together in
- // sorted order. That might still be slower than just sorting at
- // the end if there were a large number of chunks. In that case
- // some sort of recursive binary merge might be in order (merge
- // chunks pairwise, merge those chunks pairwise, and so on, then
- // merge the result with the main list).
- if (!ReadToContainer(&add_prefixes, header.add_prefix_count,
- new_file_.get(), NULL) ||
- !ReadToContainer(&sub_prefixes, header.sub_prefix_count,
- new_file_.get(), NULL) ||
- !ReadToContainer(&add_full_hashes, header.add_hash_count,
- new_file_.get(), NULL) ||
- !ReadToContainer(&sub_full_hashes, header.sub_hash_count,
- new_file_.get(), NULL))
+ if (!new_state.AppendData(header.add_prefix_count, header.sub_prefix_count,
+ header.add_hash_count, header.sub_hash_count,
+ new_file_.get(), NULL)) {
return false;
+ }
}
- // Append items from |pending_adds|.
- add_full_hashes.insert(add_full_hashes.end(),
- pending_adds.begin(), pending_adds.end());
+ // The state was accumulated by chunk, sort by prefix.
+ new_state.SortData();
+
+ // These strides control how much data is loaded into memory per pass.
+ // Strides must be an even power of two. |in_stride| will be derived from the
+ // input file. |out_stride| will be derived from an estimate of the resulting
+ // file's size. |process_stride| will be the max of both.
+ uint64 in_stride = kMaxShardStride;
+ uint64 out_stride = kMaxShardStride;
+ uint64 process_stride = 0;
+
+ // The header info is only used later if |!empty_|. The v8 read loop only
+ // needs |in_stride|, while v7 needs to refer to header information.
+ base::MD5Context in_context;
+ int version = kInvalidVersion;
+ FileHeader header;
- // Knock the subs from the adds and process deleted chunks.
- SBProcessSubs(&add_prefixes, &sub_prefixes,
- &add_full_hashes, &sub_full_hashes,
- add_del_cache_, sub_del_cache_);
+ if (!empty_) {
+ DCHECK(file_.get());
+
+ version = ReadAndVerifyHeader(filename_, &header,
+ &add_chunks_cache_, &sub_chunks_cache_,
+ file_.get(), &in_context);
+ if (version == kInvalidVersion)
+ return OnCorruptDatabase();
+
+ if (version == 8 && header.v8.shard_stride)
+ in_stride = header.v8.shard_stride;
+
+ // The header checksum should have prevented this case, but the code will be
+ // broken if this is not correct.
+ if (!IsPowerOfTwo(in_stride))
+ return OnCorruptDatabase();
+ }
// We no longer need to track deleted chunks.
DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);
DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);
- // Write the new data to new_file_.
- if (!FileRewind(new_file_.get()))
- return false;
+ // Calculate |out_stride| to break the file down into reasonable shards.
+ {
+ int64 original_size = 0;
+ if (!empty_ && !base::GetFileSize(filename_, &original_size))
+ return OnCorruptDatabase();
- base::MD5Context context;
- base::MD5Init(&context);
+ // Approximate the final size as everything. Subs and deletes will reduce
+ // the size, but modest over-sharding won't hurt much.
+ int64 shard_size = original_size + update_size;
- // Write a file header.
- FileHeader header;
- header.magic = kFileMagic;
- header.version = kFileVersion;
- header.add_chunk_count = add_chunks_cache_.size();
- header.sub_chunk_count = sub_chunks_cache_.size();
- header.add_prefix_count = add_prefixes.size();
- header.sub_prefix_count = sub_prefixes.size();
- header.add_hash_count = add_full_hashes.size();
- header.sub_hash_count = sub_full_hashes.size();
- if (!WriteItem(header, new_file_.get(), &context))
- return false;
+ // Keep splitting until a single stride of data fits the target.
+ size_t shifts = 0;
+ while (out_stride > kMinShardStride && shard_size > kUpdateStorageBytes) {
+ out_stride >>= 1;
+ shard_size >>= 1;
+ ++shifts;
+ }
+ UMA_HISTOGRAM_COUNTS("SB2.OutShardShifts", shifts);
+
+ DCHECK(IsPowerOfTwo(out_stride));
+ }
- // Write all the chunk data.
- if (!WriteContainer(add_chunks_cache_, new_file_.get(), &context) ||
- !WriteContainer(sub_chunks_cache_, new_file_.get(), &context) ||
- !WriteContainer(add_prefixes, new_file_.get(), &context) ||
- !WriteContainer(sub_prefixes, new_file_.get(), &context) ||
- !WriteContainer(add_full_hashes, new_file_.get(), &context) ||
- !WriteContainer(sub_full_hashes, new_file_.get(), &context))
+ // Outer loop strides by the max of the input stride (to read integral shards)
+ // and the output stride (to write integral shards).
+ process_stride = std::max(in_stride, out_stride);
+ DCHECK(IsPowerOfTwo(process_stride));
+ DCHECK_EQ(0u, process_stride % in_stride);
+ DCHECK_EQ(0u, process_stride % out_stride);
+
+ // Start writing the new data to |new_file_|.
+ base::MD5Context out_context;
+ if (!WriteHeader(out_stride, add_chunks_cache_, sub_chunks_cache_,
+ new_file_.get(), &out_context)) {
return false;
+ }
+
+ // Start at the beginning of the SBPrefix space.
+ uint64 in_min = 0;
+ uint64 out_min = 0;
+ uint64 process_min = 0;
+
+ // Start at the beginning of the updates.
+ StateInternalPos new_pos = new_state.StateBegin();
+
+ // Re-usable container for shard processing.
+ StateInternal db_state;
+
+ // Track aggregate counts for histograms.
+ size_t add_prefix_count = 0;
+ size_t sub_prefix_count = 0;
+
+ do {
+ // Maximum element in the current shard.
+ SBPrefix process_max =
+ static_cast<SBPrefix>(process_min + process_stride - 1);
+ DCHECK_GT(process_max, process_min);
+
+ // Drop the data from previous pass.
+ db_state.ClearData();
+
+ // Fill the processing shard with one or more input shards.
+ if (!empty_) {
+ if (version == 7) {
+ // Treat v7 as a single-shard file.
+ DCHECK_EQ(in_min, 0u);
+ DCHECK_EQ(in_stride, kMaxShardStride);
+ DCHECK_EQ(process_stride, kMaxShardStride);
+ if (!db_state.AppendData(header.v7.add_prefix_count,
+ header.v7.sub_prefix_count,
+ header.v7.add_hash_count,
+ header.v7.sub_hash_count,
+ file_.get(), &in_context))
+ return OnCorruptDatabase();
+
+ // v7 data is not sorted correctly.
+ db_state.SortData();
+ } else {
+ do {
+ ShardHeader shard_header;
+ if (!ReadItem(&shard_header, file_.get(), &in_context))
+ return OnCorruptDatabase();
+
+ if (!db_state.AppendData(shard_header.add_prefix_count,
+ shard_header.sub_prefix_count,
+ shard_header.add_hash_count,
+ shard_header.sub_hash_count,
+ file_.get(), &in_context))
+ return OnCorruptDatabase();
+
+ in_min += in_stride;
+ } while (in_min <= kMaxSBPrefix && in_min < process_max);
+ }
+ }
- // Write the checksum at the end.
- base::MD5Digest digest;
- base::MD5Final(&digest, &context);
- if (!WriteItem(digest, new_file_.get(), NULL))
+ // Shard the update data to match the database data, then merge the update
+ // data and process the results.
+ {
+ StateInternalPos new_end = new_state.ShardEnd(new_pos, process_max);
+ db_state.MergeDataAndProcess(new_pos, new_end,
+ add_del_cache_, sub_del_cache_);
+ new_pos = new_end;
+ }
+
+ // Collect the processed data for return to caller.
+ for (size_t i = 0; i < db_state.add_prefixes_.size(); ++i) {
+ builder->AddPrefix(db_state.add_prefixes_[i].prefix);
+ }
+ add_full_hashes_result->insert(add_full_hashes_result->end(),
+ db_state.add_full_hashes_.begin(),
+ db_state.add_full_hashes_.end());
+ add_prefix_count += db_state.add_prefixes_.size();
+ sub_prefix_count += db_state.sub_prefixes_.size();
+
+ // Write one or more shards of processed output.
+ StateInternalPos out_pos = db_state.StateBegin();
+ do {
+ SBPrefix out_max = static_cast<SBPrefix>(out_min + out_stride - 1);
+ DCHECK_GT(out_max, out_min);
+
+ StateInternalPos out_end = db_state.ShardEnd(out_pos, out_max);
+ if (!db_state.WriteShard(out_pos, out_end, new_file_.get(), &out_context))
+ return false;
+ out_pos = out_end;
+
+ out_min += out_stride;
+ } while (out_min == static_cast<SBPrefix>(out_min) &&
+ out_min < process_max);
+
+ process_min += process_stride;
+ } while (process_min <= kMaxSBPrefix);
+
+ // Verify the overall checksum.
+ if (!empty_) {
+ if (!ReadAndVerifyChecksum(file_.get(), &in_context))
+ return OnCorruptDatabase();
+
+ // TODO(shess): Verify EOF?
+
+ // Close the input file so the new file can be renamed over it.
+ file_.reset();
+ }
+ DCHECK(!file_.get());
+
+ // Write the overall checksum.
+ base::MD5Digest out_digest;
+ base::MD5Final(&out_digest, &out_context);
+ if (!WriteItem(out_digest, new_file_.get(), NULL))
return false;
// Trim any excess left over from the temporary chunk data.
@@ -660,15 +1129,8 @@ bool SafeBrowsingStoreFile::DoUpdate(
return false;
// Record counts before swapping to caller.
- UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_prefixes.size());
- UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", sub_prefixes.size());
-
- // Pass the resulting data off to the caller.
- for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
- iter != add_prefixes.end(); ++iter) {
- builder->AddPrefix(iter->prefix);
- }
- add_full_hashes_result->swap(add_full_hashes);
+ UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_prefix_count);
+ UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", sub_prefix_count);
return true;
}
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.h b/chrome/browser/safe_browsing/safe_browsing_store_file.h
index 6b821b0..c8a2429 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store_file.h
+++ b/chrome/browser/safe_browsing/safe_browsing_store_file.h
@@ -21,38 +21,60 @@
// int32 version; // format version
//
// // Counts for the various data which follows the header.
-// uint32 add_chunk_count; // Chunks seen, including empties.
-// uint32 sub_chunk_count; // Ditto.
-// uint32 add_prefix_count;
-// uint32 sub_prefix_count;
-// uint32 add_hash_count;
-// uint32 sub_hash_count;
-//
+// uint32 add_chunk_count; // Chunks seen, including empties.
+// uint32 sub_chunk_count; // Ditto.
+// uint32 shard_stride; // SBPrefix space covered per shard.
+// // 0==entire space in one shard.
+// // Sorted by chunk_id.
// array[add_chunk_count] {
// int32 chunk_id;
// }
+// // Sorted by chunk_id.
// array[sub_chunk_count] {
// int32 chunk_id;
// }
-// array[add_prefix_count] {
-// int32 chunk_id;
-// uint32 prefix;
-// }
-// array[sub_prefix_count] {
-// int32 chunk_id;
-// int32 add_chunk_id;
-// uint32 add_prefix;
-// }
-// array[add_hash_count] {
-// int32 chunk_id;
-// int32 received_time; // From base::Time::ToTimeT().
-// char[32] full_hash;
-// array[sub_hash_count] {
-// int32 chunk_id;
-// int32 add_chunk_id;
-// char[32] add_full_hash;
+// MD5Digest header_checksum; // Checksum over preceeding data.
+//
+// // Sorted by prefix, then add chunk_id, then hash, both within shards and
+// // overall.
+// array[from 0 to wraparound to 0 by shard_stride] {
+// uint32 add_prefix_count;
+// uint32 sub_prefix_count;
+// uint32 add_hash_count;
+// uint32 sub_hash_count;
+// array[add_prefix_count] {
+// int32 chunk_id;
+// uint32 prefix;
+// }
+// array[sub_prefix_count] {
+// int32 chunk_id;
+// int32 add_chunk_id;
+// uint32 add_prefix;
+// }
+// array[add_hash_count] {
+// int32 chunk_id;
+// int32 received_time; // From base::Time::ToTimeT().
+// char[32] full_hash;
+// }
+// array[sub_hash_count] {
+// int32 chunk_id;
+// int32 add_chunk_id;
+// char[32] add_full_hash;
+// }
// }
-// MD5Digest checksum; // Checksum over preceeding data.
+// MD5Digest checksum; // Checksum over entire file.
+//
+// The checksums are used to allow writing the file without doing an expensive
+// fsync(). Since the data can be re-fetched, failing the checksum is not
+// catastrophic. Histograms indicate that file corruption here is pretty
+// uncommon.
+//
+// The |header_checksum| is present to guarantee valid header and chunk data for
+// updates. Only that part of the file needs to be read to post the update.
+//
+// |shard_stride| breaks the file into approximately-equal portions, allowing
+// updates to stream from one file to another with modest memory usage. It is
+// dynamic to adjust to different file sizes without adding excessive overhead.
//
// During the course of an update, uncommitted data is stored in a
// temporary file (which is later re-used to commit). This is an
@@ -91,19 +113,15 @@
// - Open a temp file for storing new chunk info.
// - Write new chunks to the temp file.
// - When the transaction is finished:
-// - Read the rest of the original file's data into buffers.
-// - Rewind the temp file and merge the new data into buffers.
-// - Process buffers for deletions and apply subs.
-// - Rewind and write the buffers out to temp file.
+// - Read the update data from the temp file into memory.
+// - Overwrite the temp file with new header data.
+// - Until done:
+// - Read shards of the original file's data into memory.
+// - Merge from the update data.
+// - Write shards to the temp file.
// - Delete original file.
// - Rename temp file to original filename.
-// TODO(shess): By using a checksum, this code can avoid doing an
-// fsync(), at the possible cost of more frequently retrieving the
-// full dataset. Measure how often this occurs, and if it occurs too
-// often, consider retaining the last known-good file for recovery
-// purposes, rather than deleting it.
-
class SafeBrowsingStoreFile : public SafeBrowsingStore {
public:
SafeBrowsingStoreFile();
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc
index 4f52241..c29fa60 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc
@@ -28,6 +28,9 @@ const SBFullHash kHash3 = SBFullHashForString("three");
const SBFullHash kHash4 = SBFullHashForString("four");
const SBFullHash kHash5 = SBFullHashForString("five");
+const SBPrefix kMinSBPrefix = 0u;
+const SBPrefix kMaxSBPrefix = ~kMinSBPrefix;
+
class SafeBrowsingStoreFileTest : public PlatformTest {
public:
virtual void SetUp() {
@@ -58,7 +61,7 @@ class SafeBrowsingStoreFileTest : public PlatformTest {
// Populate the store with some testing data.
void PopulateStore(const base::Time& now) {
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
EXPECT_TRUE(store_->BeginChunk());
store_->SetAddChunk(kAddChunk1);
@@ -85,7 +88,17 @@ class SafeBrowsingStoreFileTest : public PlatformTest {
EXPECT_TRUE(store_->FinishUpdate(pending_adds,
&builder,
&add_full_hashes_result));
-}
+ }
+
+ // Manually read the shard stride info from the file.
+ uint32 ReadStride() {
+ base::ScopedFILE file(base::OpenFile(filename_, "rb"));
+ const long kOffset = 4 * sizeof(uint32);
+ EXPECT_EQ(fseek(file.get(), kOffset, SEEK_SET), 0);
+ uint32 shard_stride = 0;
+ EXPECT_EQ(fread(&shard_stride, sizeof(shard_stride), 1, file.get()), 1U);
+ return shard_stride;
+ }
base::ScopedTempDir temp_dir_;
base::FilePath filename_;
@@ -95,7 +108,7 @@ class SafeBrowsingStoreFileTest : public PlatformTest {
// Test that the empty store looks empty.
TEST_F(SafeBrowsingStoreFileTest, Empty) {
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
std::vector<int> chunks;
store_->GetAddChunks(&chunks);
@@ -132,7 +145,7 @@ TEST_F(SafeBrowsingStoreFileTest, StorePrefix) {
const base::Time now = base::Time::Now();
PopulateStore(now);
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
std::vector<int> chunks;
store_->GetAddChunks(&chunks);
@@ -164,7 +177,7 @@ TEST_F(SafeBrowsingStoreFileTest, StorePrefix) {
EXPECT_TRUE(SBFullHashEqual(kHash2, add_full_hashes_result[0].full_hash));
}
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
// Still has the chunks expected in the next update.
store_->GetAddChunks(&chunks);
@@ -200,9 +213,63 @@ TEST_F(SafeBrowsingStoreFileTest, StorePrefix) {
}
}
+// Verify that the min and max prefixes are stored and operated on.
+TEST_F(SafeBrowsingStoreFileTest, PrefixMinMax) {
+ const base::Time now = base::Time::Now();
+ PopulateStore(now);
+
+ ASSERT_TRUE(store_->BeginUpdate());
+
+ EXPECT_TRUE(store_->BeginChunk());
+ store_->SetAddChunk(kAddChunk2);
+ EXPECT_TRUE(store_->WriteAddPrefix(kAddChunk2, kMinSBPrefix));
+ EXPECT_TRUE(store_->WriteAddPrefix(kAddChunk2, kMaxSBPrefix));
+ EXPECT_TRUE(store_->FinishChunk());
+
+ {
+ std::vector<SBAddFullHash> pending_adds;
+ safe_browsing::PrefixSetBuilder builder;
+ std::vector<SBAddFullHash> add_full_hashes_result;
+ EXPECT_TRUE(store_->FinishUpdate(pending_adds,
+ &builder,
+ &add_full_hashes_result));
+
+ std::vector<SBPrefix> prefixes_result;
+ builder.GetPrefixSet()->GetPrefixes(&prefixes_result);
+ ASSERT_EQ(4U, prefixes_result.size());
+ EXPECT_EQ(kMinSBPrefix, prefixes_result[0]);
+ EXPECT_EQ(kHash1.prefix, prefixes_result[1]);
+ EXPECT_EQ(kHash2.prefix, prefixes_result[2]);
+ EXPECT_EQ(kMaxSBPrefix, prefixes_result[3]);
+ }
+
+ ASSERT_TRUE(store_->BeginUpdate());
+
+ EXPECT_TRUE(store_->BeginChunk());
+ store_->SetAddChunk(kSubChunk2);
+ EXPECT_TRUE(store_->WriteSubPrefix(kSubChunk2, kAddChunk2, kMinSBPrefix));
+ EXPECT_TRUE(store_->WriteSubPrefix(kSubChunk2, kAddChunk2, kMaxSBPrefix));
+ EXPECT_TRUE(store_->FinishChunk());
+
+ {
+ std::vector<SBAddFullHash> pending_adds;
+ safe_browsing::PrefixSetBuilder builder;
+ std::vector<SBAddFullHash> add_full_hashes_result;
+ EXPECT_TRUE(store_->FinishUpdate(pending_adds,
+ &builder,
+ &add_full_hashes_result));
+
+ std::vector<SBPrefix> prefixes_result;
+ builder.GetPrefixSet()->GetPrefixes(&prefixes_result);
+ ASSERT_EQ(2U, prefixes_result.size());
+ EXPECT_EQ(kHash1.prefix, prefixes_result[0]);
+ EXPECT_EQ(kHash2.prefix, prefixes_result[1]);
+ }
+}
+
// Test that subs knockout adds.
TEST_F(SafeBrowsingStoreFileTest, SubKnockout) {
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
const base::Time now = base::Time::Now();
@@ -233,7 +300,7 @@ TEST_F(SafeBrowsingStoreFileTest, SubKnockout) {
EXPECT_TRUE(add_full_hashes_result.empty());
}
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
// This add should be knocked out by an existing sub.
EXPECT_TRUE(store_->BeginChunk());
@@ -256,7 +323,7 @@ TEST_F(SafeBrowsingStoreFileTest, SubKnockout) {
EXPECT_TRUE(add_full_hashes_result.empty());
}
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
// But by here the sub should be gone, so it should stick this time.
EXPECT_TRUE(store_->BeginChunk());
@@ -283,7 +350,7 @@ TEST_F(SafeBrowsingStoreFileTest, SubKnockout) {
// Test that deletes delete the chunk's data.
TEST_F(SafeBrowsingStoreFileTest, DeleteChunks) {
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
const base::Time now = base::Time::Now();
@@ -349,7 +416,7 @@ TEST_F(SafeBrowsingStoreFileTest, DeleteChunks) {
}
// Expected chunks are there in another update.
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
EXPECT_FALSE(store_->CheckAddChunk(kAddChunk1));
EXPECT_TRUE(store_->CheckAddChunk(kAddChunk2));
EXPECT_FALSE(store_->CheckSubChunk(kSubChunk1));
@@ -369,7 +436,7 @@ TEST_F(SafeBrowsingStoreFileTest, DeleteChunks) {
}
// Expect no more chunks.
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
EXPECT_FALSE(store_->CheckAddChunk(kAddChunk1));
EXPECT_FALSE(store_->CheckAddChunk(kAddChunk2));
EXPECT_FALSE(store_->CheckSubChunk(kSubChunk1));
@@ -413,7 +480,7 @@ TEST_F(SafeBrowsingStoreFileTest, DeleteTemp) {
EXPECT_FALSE(base::PathExists(temp_file));
// Starting a transaction creates a temporary file.
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
EXPECT_TRUE(base::PathExists(temp_file));
// Pull the rug out from under the existing store, simulating a
@@ -440,7 +507,7 @@ TEST_F(SafeBrowsingStoreFileTest, DetectsCorruption) {
std::vector<SBPrefix> orig_prefixes;
std::vector<SBAddFullHash> orig_hashes;
safe_browsing::PrefixSetBuilder builder;
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
EXPECT_TRUE(store_->FinishUpdate(pending_adds, &builder, &orig_hashes));
builder.GetPrefixSet()->GetPrefixes(&orig_prefixes);
EXPECT_GT(orig_prefixes.size(), 0U);
@@ -466,7 +533,7 @@ TEST_F(SafeBrowsingStoreFileTest, DetectsCorruption) {
{
std::vector<SBAddFullHash> pending_adds;
safe_browsing::PrefixSetBuilder builder;
- EXPECT_TRUE(store_->BeginUpdate());
+ ASSERT_TRUE(store_->BeginUpdate());
EXPECT_FALSE(store_->FinishUpdate(pending_adds, &builder, &add_hashes));
EXPECT_TRUE(corruption_detected_);
}
@@ -505,14 +572,13 @@ TEST_F(SafeBrowsingStoreFileTest, CheckValidity) {
EXPECT_TRUE(store_->CancelUpdate());
}
-// Corrupt the payload.
-TEST_F(SafeBrowsingStoreFileTest, CheckValidityPayload) {
+// Corrupt the header.
+TEST_F(SafeBrowsingStoreFileTest, CheckValidityHeader) {
PopulateStore(base::Time::Now());
EXPECT_TRUE(base::PathExists(filename_));
- // 37 is the most random prime number. It's also past the header,
- // as corrupting the header would fail BeginUpdate() in which case
- // CheckValidity() cannot be called.
+ // 37 is the most random prime number. It's also past the initial header
+ // struct, somewhere in the chunk list.
const size_t kOffset = 37;
{
@@ -520,6 +586,25 @@ TEST_F(SafeBrowsingStoreFileTest, CheckValidityPayload) {
EXPECT_EQ(0, fseek(file.get(), kOffset, SEEK_SET));
EXPECT_GE(fputs("hello", file.get()), 0);
}
+ ASSERT_FALSE(store_->BeginUpdate());
+ EXPECT_TRUE(corruption_detected_);
+}
+
+// Corrupt the prefix payload.
+TEST_F(SafeBrowsingStoreFileTest, CheckValidityPayload) {
+ PopulateStore(base::Time::Now());
+ EXPECT_TRUE(base::PathExists(filename_));
+
+ // 137 is the second most random prime number. It's also past the header and
+ // chunk-id area. Corrupting the header would fail BeginUpdate() in which
+ // case CheckValidity() cannot be called.
+ const size_t kOffset = 137;
+
+ {
+ base::ScopedFILE file(base::OpenFile(filename_, "rb+"));
+ EXPECT_EQ(0, fseek(file.get(), kOffset, SEEK_SET));
+ EXPECT_GE(fputs("hello", file.get()), 0);
+ }
ASSERT_TRUE(store_->BeginUpdate());
EXPECT_FALSE(corruption_detected_);
EXPECT_FALSE(store_->CheckValidity());
@@ -547,4 +632,131 @@ TEST_F(SafeBrowsingStoreFileTest, CheckValidityChecksum) {
EXPECT_TRUE(store_->CancelUpdate());
}
+TEST_F(SafeBrowsingStoreFileTest, GetAddPrefixesAndHashes) {
+ ASSERT_TRUE(store_->BeginUpdate());
+
+ const base::Time now = base::Time::Now();
+
+ EXPECT_TRUE(store_->BeginChunk());
+ store_->SetAddChunk(kAddChunk1);
+ EXPECT_TRUE(store_->CheckAddChunk(kAddChunk1));
+ EXPECT_TRUE(store_->WriteAddPrefix(kAddChunk1, kHash1.prefix));
+ EXPECT_TRUE(store_->WriteAddPrefix(kAddChunk1, kHash2.prefix));
+ EXPECT_TRUE(store_->WriteAddHash(kAddChunk1, now, kHash2));
+
+ store_->SetSubChunk(kSubChunk1);
+ EXPECT_TRUE(store_->CheckSubChunk(kSubChunk1));
+ EXPECT_TRUE(store_->WriteSubPrefix(kSubChunk1, kAddChunk3, kHash3.prefix));
+ EXPECT_TRUE(store_->WriteSubHash(kSubChunk1, kAddChunk3, kHash3));
+ EXPECT_TRUE(store_->FinishChunk());
+
+ // Chunk numbers shouldn't leak over.
+ EXPECT_FALSE(store_->CheckAddChunk(kSubChunk1));
+ EXPECT_FALSE(store_->CheckAddChunk(kAddChunk3));
+ EXPECT_FALSE(store_->CheckSubChunk(kAddChunk1));
+
+ std::vector<int> chunks;
+ store_->GetAddChunks(&chunks);
+ ASSERT_EQ(1U, chunks.size());
+ EXPECT_EQ(kAddChunk1, chunks[0]);
+
+ store_->GetSubChunks(&chunks);
+ ASSERT_EQ(1U, chunks.size());
+ EXPECT_EQ(kSubChunk1, chunks[0]);
+
+ safe_browsing::PrefixSetBuilder builder;
+ std::vector<SBAddFullHash> add_full_hashes_result;
+ EXPECT_TRUE(store_->FinishUpdate(std::vector<SBAddFullHash>(),
+ &builder,
+ &add_full_hashes_result));
+
+ SBAddPrefixes add_prefixes;
+ EXPECT_TRUE(store_->GetAddPrefixes(&add_prefixes));
+ ASSERT_EQ(2U, add_prefixes.size());
+ EXPECT_EQ(kAddChunk1, add_prefixes[0].chunk_id);
+ EXPECT_EQ(kHash1.prefix, add_prefixes[0].prefix);
+ EXPECT_EQ(kAddChunk1, add_prefixes[1].chunk_id);
+ EXPECT_EQ(kHash2.prefix, add_prefixes[1].prefix);
+
+ std::vector<SBAddFullHash> add_hashes;
+ EXPECT_TRUE(store_->GetAddFullHashes(&add_hashes));
+ ASSERT_EQ(1U, add_hashes.size());
+ EXPECT_EQ(kAddChunk1, add_hashes[0].chunk_id);
+ EXPECT_TRUE(SBFullHashEqual(kHash2, add_hashes[0].full_hash));
+}
+
+// Test that the database handles resharding correctly, both when growing and
+// which shrinking.
+TEST_F(SafeBrowsingStoreFileTest, Resharding) {
+ // Loop through multiple stride boundaries (1<<32, 1<<31, 1<<30, 1<<29).
+ const uint32 kTargetStride = 1 << 29;
+
+ // Each chunk will require 8 bytes per prefix, plus 4 bytes for chunk
+ // information. It should be less than |kTargetFootprint| in the
+ // implementation, but high enough to keep the number of rewrites modest (to
+ // keep the test fast).
+ const size_t kPrefixesPerChunk = 10000;
+
+ uint32 shard_stride = 0;
+ int chunk_id = 1;
+
+ // Add a series of chunks, tracking that the stride size changes in a
+ // direction appropriate to increasing file size.
+ do {
+ ASSERT_TRUE(store_->BeginUpdate());
+
+ EXPECT_TRUE(store_->BeginChunk());
+ store_->SetAddChunk(chunk_id);
+ EXPECT_TRUE(store_->CheckAddChunk(chunk_id));
+ for (size_t i = 0; i < kPrefixesPerChunk; ++i) {
+ EXPECT_TRUE(store_->WriteAddPrefix(chunk_id, static_cast<SBPrefix>(i)));
+ }
+ EXPECT_TRUE(store_->FinishChunk());
+
+ std::vector<SBAddFullHash> pending_adds;
+ safe_browsing::PrefixSetBuilder builder;
+ std::vector<SBAddFullHash> add_full_hashes_result;
+ EXPECT_TRUE(store_->FinishUpdate(pending_adds,
+ &builder,
+ &add_full_hashes_result));
+
+ SBAddPrefixes add_prefixes;
+ EXPECT_TRUE(store_->GetAddPrefixes(&add_prefixes));
+ ASSERT_EQ(chunk_id * kPrefixesPerChunk, add_prefixes.size());
+
+ // New stride should be the same, or shifted one right.
+ const uint32 new_shard_stride = ReadStride();
+ EXPECT_TRUE((new_shard_stride == shard_stride) ||
+ ((new_shard_stride << 1) == shard_stride));
+ shard_stride = new_shard_stride;
+ ++chunk_id;
+ } while (!shard_stride || shard_stride > kTargetStride);
+
+ // Guard against writing too many chunks. If this gets too big, adjust
+ // |kPrefixesPerChunk|.
+ EXPECT_LT(chunk_id, 20);
+
+ // Remove each chunk and check that the stride goes back to 0.
+ while (--chunk_id) {
+ ASSERT_TRUE(store_->BeginUpdate());
+ EXPECT_TRUE(store_->CheckAddChunk(chunk_id));
+ EXPECT_FALSE(store_->CheckAddChunk(chunk_id + 1));
+ store_->DeleteAddChunk(chunk_id);
+
+ std::vector<SBAddFullHash> pending_adds;
+ safe_browsing::PrefixSetBuilder builder;
+ std::vector<SBAddFullHash> add_full_hashes_result;
+ EXPECT_TRUE(store_->FinishUpdate(pending_adds,
+ &builder,
+ &add_full_hashes_result));
+
+ // New stride should be the same, or shifted one left.
+ const uint32 new_shard_stride = ReadStride();
+ EXPECT_TRUE((new_shard_stride == shard_stride) ||
+ (new_shard_stride == (shard_stride << 1)));
+ shard_stride = new_shard_stride;
+ }
+ EXPECT_EQ(0u, shard_stride);
+}
+
} // namespace
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc
index 199138e..420ed1c 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc
@@ -178,6 +178,15 @@ TEST(SafeBrowsingStoreTest, SBProcessSubsKnockout) {
add_hashes.push_back(SBAddFullHash(kAddChunk1, kNow, kHash4mod));
sub_hashes.push_back(SBSubFullHash(kSubChunk1, kAddChunk1, kHash4mod));
+ std::sort(add_prefixes.begin(), add_prefixes.end(),
+ SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+ std::sort(sub_prefixes.begin(), sub_prefixes.end(),
+ SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+ std::sort(add_hashes.begin(), add_hashes.end(),
+ SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+ std::sort(sub_hashes.begin(), sub_hashes.end(),
+ SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+
const base::hash_set<int32> no_deletions;
SBProcessSubs(&add_prefixes, &sub_prefixes, &add_hashes, &sub_hashes,
no_deletions, no_deletions);
@@ -249,6 +258,15 @@ TEST(SafeBrowsingStoreTest, SBProcessSubsDeleteChunk) {
sub_hashes.push_back(SBSubFullHash(kSubChunk1, kAddChunk1, kHash3));
sub_prefixes.push_back(SBSubPrefix(kSubChunk1, kAddChunk1, kHash3.prefix));
+ std::sort(add_prefixes.begin(), add_prefixes.end(),
+ SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+ std::sort(sub_prefixes.begin(), sub_prefixes.end(),
+ SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+ std::sort(add_hashes.begin(), add_hashes.end(),
+ SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+ std::sort(sub_hashes.begin(), sub_hashes.end(),
+ SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+
const base::hash_set<int32> no_deletions;
base::hash_set<int32> add_deletions;
add_deletions.insert(kAddChunk1);
@@ -268,6 +286,15 @@ TEST(SafeBrowsingStoreTest, SBProcessSubsDeleteChunk) {
EXPECT_EQ(kAddChunk1, sub_hashes[0].add_chunk_id);
EXPECT_TRUE(SBFullHashEqual(kHash3, sub_hashes[0].full_hash));
+ std::sort(add_prefixes.begin(), add_prefixes.end(),
+ SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+ std::sort(sub_prefixes.begin(), sub_prefixes.end(),
+ SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+ std::sort(add_hashes.begin(), add_hashes.end(),
+ SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+ std::sort(sub_hashes.begin(), sub_hashes.end(),
+ SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+
base::hash_set<int32> sub_deletions;
sub_deletions.insert(kSubChunk1);
SBProcessSubs(&add_prefixes, &sub_prefixes, &add_hashes, &sub_hashes,