Refactor safe-browsing file storage to allow incremental processing.

Previously the file was read into memory, processed, and written out to a new file. Modify the format to shard the data by prefix, so that only a portion needs to be read into memory at a given time. The sharding is dynamic based on target memory footprint (currently 100k), so small files have fewer shards than large files. Additionally, add a checksum over the header and chunk table-of-contents area to prevent corruption from causing broken update requests. BUG=120219,351448 Review URL: https://codereview.chromium.org/208583005 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@259791 0039d316-1c4b-4281-b951-d872f2087c98
author: shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-03-27 05:27:17 +0000
committer: shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-03-27 05:27:17 +0000
commit: d3dd0715afc50f2e999f5a28478e783519170a29 (patch)
tree: ac638131304a9a2524d10e08dee9b7929e55b239 /chrome/browser/safe_browsing
parent: f42a5a827ca1c591f1f9e918fce5ce6d485d0e3d (diff)
download: chromium_src-d3dd0715afc50f2e999f5a28478e783519170a29.zip
chromium_src-d3dd0715afc50f2e999f5a28478e783519170a29.tar.gz
chromium_src-d3dd0715afc50f2e999f5a28478e783519170a29.tar.bz2
6 files changed, 1015 insertions, 292 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_store.cc b/chrome/browser/safe_browsing/safe_browsing_store.cc
index 2638e58..a942b9a 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_store.cc
@@ -10,6 +10,17 @@
 
 namespace {
 
+// Return |true| if the range is sorted by the given comparator.
+template <typename CTI, typename LESS>
+bool sorted(CTI beg, CTI end, LESS less) {
+  while ((end - beg) > 2) {
+    CTI n = beg++;
+    if (less(*beg, *n))
+      return false;
+  }
+  return true;
+}
+
 // Find items matching between |subs| and |adds|, and remove them,
 // recording the item from |adds| in |adds_removed|.  To minimize
 // copies, the inputs are processing in parallel, so |subs| and |adds|
@@ -133,15 +144,15 @@ void SBProcessSubs(SBAddPrefixes* add_prefixes,
   // to qualify things.  It becomes very arbitrary, though, and less
   // clear how things are working.
 
-  // Sort the inputs by the SBAddPrefix bits.
-  std::sort(add_prefixes->begin(), add_prefixes->end(),
-            SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
-  std::sort(sub_prefixes->begin(), sub_prefixes->end(),
-            SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
-  std::sort(add_full_hashes->begin(), add_full_hashes->end(),
-            SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
-  std::sort(sub_full_hashes->begin(), sub_full_hashes->end(),
-            SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+  // Make sure things are sorted appropriately.
+  DCHECK(sorted(add_prefixes->begin(), add_prefixes->end(),
+                SBAddPrefixLess<SBAddPrefix,SBAddPrefix>));
+  DCHECK(sorted(sub_prefixes->begin(), sub_prefixes->end(),
+                SBAddPrefixLess<SBSubPrefix,SBSubPrefix>));
+  DCHECK(sorted(add_full_hashes->begin(), add_full_hashes->end(),
+                SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>));
+  DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(),
+                SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>));
 
   // Factor out the prefix subs.
   SBAddPrefixes removed_adds;
diff --git a/chrome/browser/safe_browsing/safe_browsing_store.h b/chrome/browser/safe_browsing/safe_browsing_store.h
index 76cf28a..b381743 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store.h
+++ b/chrome/browser/safe_browsing/safe_browsing_store.h
@@ -53,6 +53,8 @@ struct SBAddPrefix {
   SBPrefix GetAddPrefix() const { return prefix; }
 };
 
+// TODO(shess): Measure the performance impact of switching this back to
+// std::vector<> once the v8 file format dominates.  Also SBSubPrefixes.
 typedef std::deque<SBAddPrefix> SBAddPrefixes;
 
 struct SBSubPrefix {
@@ -134,12 +136,7 @@ bool SBAddPrefixHashLess(const T& a, const U& b) {
 // matched items from all vectors.  Additionally remove items from
 // deleted chunks.
 //
-// TODO(shess): Since the prefixes are uniformly-distributed hashes,
-// there aren't many ways to organize the inputs for efficient
-// processing.  For this reason, the vectors are sorted and processed
-// in parallel.  At this time this code does the sorting internally,
-// but it might make sense to make sorting an API requirement so that
-// the storage can optimize for it.
+// The inputs must be sorted by SBAddPrefixLess or SBAddPrefixHashLess.
 void SBProcessSubs(SBAddPrefixes* add_prefixes,
                    SBSubPrefixes* sub_prefixes,
                    std::vector<SBAddFullHash>* add_full_hashes,
@@ -147,10 +144,6 @@ void SBProcessSubs(SBAddPrefixes* add_prefixes,
                    const base::hash_set<int32>& add_chunks_deleted,
                    const base::hash_set<int32>& sub_chunks_deleted);
 
-// TODO(shess): This uses int32 rather than int because it's writing
-// specifically-sized items to files.  SBPrefix should likewise be
-// explicitly sized.
-
 // Abstract interface for storing data.
 class SafeBrowsingStore {
  public:
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.cc b/chrome/browser/safe_browsing/safe_browsing_store_file.cc
index a879096..b58d961 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store_file.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_store_file.cc
@@ -14,22 +14,68 @@ namespace {
 // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so
 // that byte-order changes force corruption.
 const int32 kFileMagic = 0x600D71FE;
-const int32 kFileVersion = 7;  // SQLite storage was 6...
+
+// Version history:
+// Version 6: aad08754/r2814 by erikkay@google.com on 2008-10-02 (sqlite)
+// Version 7: 6afe28a5/r37435 by shess@chromium.org on 2010-01-28
+// Version 8: ????????/r????? by shess@chromium.org on 2014-03-??
+const int32 kFileVersion = 8;
+
+// ReadAndVerifyHeader() returns this in case of error.
+const int32 kInvalidVersion = -1;
 
 // Header at the front of the main database file.
-struct FileHeader {
+struct FileHeaderV7 {
   int32 magic, version;
   uint32 add_chunk_count, sub_chunk_count;
   uint32 add_prefix_count, sub_prefix_count;
   uint32 add_hash_count, sub_hash_count;
 };
 
+// Starting with version 8, the storage is sorted and can be sharded to allow
+// updates to be done with lower memory requirements.  Newly written files will
+// be sharded to need less than this amount of memory during update.  Larger
+// values are preferred to minimize looping overhead during processing.
+const int64 kUpdateStorageBytes = 100 * 1024;
+
+// Prevent excessive sharding by setting a lower limit on the shard stride.
+// Smaller values should work fine, but very small values will probably lead to
+// poor performance.  Shard stride is indirectly related to
+// |kUpdateStorageBytes|, setting that very small will bump against this.
+const uint32 kMinShardStride = 1 << 24;
+
+// Strides over the entire SBPrefix space.
+const uint64 kMaxShardStride = GG_LONGLONG(1u) << 32;
+
+// Maximum SBPrefix value.
+const SBPrefix kMaxSBPrefix = ~0;
+
+// Header at the front of the main database file.
+struct FileHeaderV8 {
+  int32 magic, version;
+  uint32 add_chunk_count, sub_chunk_count;
+  uint32 shard_stride;
+  // TODO(shess): Is this where 64-bit will bite me?  Perhaps write a
+  // specialized read/write?
+};
+
+union FileHeader {
+  struct FileHeaderV7 v7;
+  struct FileHeaderV8 v8;
+};
+
 // Header for each chunk in the chunk-accumulation file.
 struct ChunkHeader {
   uint32 add_prefix_count, sub_prefix_count;
   uint32 add_hash_count, sub_hash_count;
 };
 
+// Header for each shard of data in the main database file.
+struct ShardHeader {
+  uint32 add_prefix_count, sub_prefix_count;
+  uint32 add_hash_count, sub_hash_count;
+};
+
 // Rewind the file.  Using fseek(2) because rewind(3) errors are
 // weird.
 bool FileRewind(FILE* fp) {
@@ -38,18 +84,6 @@ bool FileRewind(FILE* fp) {
   return rv == 0;
 }
 
-// Move file read pointer forward by |bytes| relative to current position.
-bool FileSkip(size_t bytes, FILE* fp) {
-  // Although fseek takes negative values, for this case, we only want
-  // to skip forward.
-  DCHECK(static_cast<long>(bytes) >= 0);
-  if (static_cast<long>(bytes) < 0)
-    return false;
-  int rv = fseek(fp, static_cast<long>(bytes), SEEK_CUR);
-  DCHECK_EQ(rv, 0);
-  return rv == 0;
-}
-
 // Read from |fp| into |item|, and fold the input data into the
 // checksum in |context|, if non-NULL.  Return true on success.
 template <class T>
@@ -104,22 +138,26 @@ bool ReadToContainer(CT* values, size_t count, FILE* fp,
   return true;
 }
 
-// Write all of |values| to |fp|, and fold the data into the checksum
-// in |context|, if non-NULL.  Returns true on succsess.
-template <typename CT>
-bool WriteContainer(const CT& values, FILE* fp,
-                    base::MD5Context* context) {
-  if (values.empty())
-    return true;
-
-  for (typename CT::const_iterator iter = values.begin();
-       iter != values.end(); ++iter) {
+// Write values between |beg| and |end| to |fp|, and fold the data into the
+// checksum in |context|, if non-NULL.  Returns true if all items successful.
+template <typename CTI>
+bool WriteRange(const CTI& beg, const CTI& end,
+                FILE* fp, base::MD5Context* context) {
+  for (CTI iter = beg; iter != end; ++iter) {
     if (!WriteItem(*iter, fp, context))
       return false;
   }
   return true;
 }
 
+// Write all of |values| to |fp|, and fold the data into the checksum
+// in |context|, if non-NULL.  Returns true if all items successful.
+template <typename CT>
+bool WriteContainer(const CT& values, FILE* fp,
+                    base::MD5Context* context) {
+  return WriteRange(values.begin(), values.end(), fp, context);
+}
+
 // Delete the chunks in |deleted| from |chunks|.
 void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
                          std::set<int32>* chunks) {
@@ -131,16 +169,37 @@ void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
   }
 }
 
+// base::MD5Final() modifies |context| in generating |digest|.  This wrapper
+// generates an intermediate digest without modifying the context.
+void MD5IntermediateDigest(base::MD5Digest* digest, base::MD5Context* context) {
+  base::MD5Context temp_context;
+  memcpy(&temp_context, context, sizeof(temp_context));
+  base::MD5Final(digest, &temp_context);
+}
+
+bool ReadAndVerifyChecksum(FILE* fp, base::MD5Context* context) {
+  base::MD5Digest calculated_digest;
+  MD5IntermediateDigest(&calculated_digest, context);
+
+  base::MD5Digest file_digest;
+  if (!ReadItem(&file_digest, fp, context))
+    return false;
+
+  return memcmp(&file_digest, &calculated_digest, sizeof(file_digest)) == 0;
+}
+
 // Sanity-check the header against the file's size to make sure our
 // vectors aren't gigantic.  This doubles as a cheap way to detect
 // corruption without having to checksum the entire file.
-bool FileHeaderSanityCheck(const base::FilePath& filename,
-                           const FileHeader& header) {
+bool FileHeaderV7SanityCheck(const base::FilePath& filename,
+                             const FileHeaderV7& header) {
+  DCHECK_EQ(header.version, 7);
+
   int64 size = 0;
   if (!base::GetFileSize(filename, &size))
     return false;
 
-  int64 expected_size = sizeof(FileHeader);
+  int64 expected_size = sizeof(FileHeaderV7);
   expected_size += header.add_chunk_count * sizeof(int32);
   expected_size += header.sub_chunk_count * sizeof(int32);
   expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
@@ -154,21 +213,387 @@ bool FileHeaderSanityCheck(const base::FilePath& filename,
   return true;
 }
 
-// This a helper function that reads header to |header|. Returns true if the
-// magic number is correct and santiy check passes.
-bool ReadAndVerifyHeader(const base::FilePath& filename,
-                         FILE* fp,
-                         FileHeader* header,
-                         base::MD5Context* context) {
-  if (!ReadItem(header, fp, context))
+// Helper function to read the file header and chunk TOC.  Rewinds |fp| and
+// initializes |context|.  The header is left in |header|, with the version
+// returned.  kInvalidVersion is returned for sanity check or checksum failure.
+int ReadAndVerifyHeader(const base::FilePath& filename,
+                        FileHeader* header,
+                        std::set<int32>* add_chunks,
+                        std::set<int32>* sub_chunks,
+                        FILE* fp,
+                        base::MD5Context* context) {
+  DCHECK(header);
+  DCHECK(add_chunks);
+  DCHECK(sub_chunks);
+  DCHECK(fp);
+  DCHECK(context);
+
+  int version = kInvalidVersion;
+
+  base::MD5Init(context);
+  if (!FileRewind(fp))
+    return kInvalidVersion;
+  if (!ReadItem(&header->v8, fp, context))
+    return kInvalidVersion;
+  if (header->v8.magic != kFileMagic)
+    return kInvalidVersion;
+
+  size_t add_chunks_count = 0;
+  size_t sub_chunks_count = 0;
+
+  if (header->v8.version == 7) {
+    version = 7;
+
+    // Reset the context and re-read the v7 header.
+    base::MD5Init(context);
+    if (!FileRewind(fp))
+      return kInvalidVersion;
+    if (!ReadItem(&header->v7, fp, context))
+      return kInvalidVersion;
+    if (header->v7.magic != kFileMagic || header->v7.version != 7)
+      return kInvalidVersion;
+    if (!FileHeaderV7SanityCheck(filename, header->v7))
+      return kInvalidVersion;
+
+    add_chunks_count = header->v7.add_chunk_count;
+    sub_chunks_count = header->v7.sub_chunk_count;
+  } else if (header->v8.version == kFileVersion) {
+    version = 8;
+    add_chunks_count = header->v8.add_chunk_count;
+    sub_chunks_count = header->v8.sub_chunk_count;
+  } else {
+    return kInvalidVersion;
+  }
+
+  if (!ReadToContainer(add_chunks, add_chunks_count, fp, context) ||
+      !ReadToContainer(sub_chunks, sub_chunks_count, fp, context)) {
+    return kInvalidVersion;
+  }
+
+  // v8 includes a checksum to validate the header.
+  if (version > 7 && !ReadAndVerifyChecksum(fp, context))
+    return kInvalidVersion;
+
+  return version;
+}
+
+// Helper function to write out the initial header and chunks-contained data.
+// Rewinds |fp|, initializes |context|, then writes a file header and
+// |add_chunks| and |sub_chunks|.
+bool WriteHeader(uint32 out_stride,
+                 const std::set<int32>& add_chunks,
+                 const std::set<int32>& sub_chunks,
+                 FILE* fp,
+                 base::MD5Context* context) {
+  if (!FileRewind(fp))
+    return false;
+
+  base::MD5Init(context);
+  FileHeaderV8 header;
+  header.magic = kFileMagic;
+  header.version = kFileVersion;
+  header.add_chunk_count = add_chunks.size();
+  header.sub_chunk_count = sub_chunks.size();
+  header.shard_stride = out_stride;
+  if (!WriteItem(header, fp, context))
     return false;
-  if (header->magic != kFileMagic || header->version != kFileVersion)
+
+  if (!WriteContainer(add_chunks, fp, context) ||
+      !WriteContainer(sub_chunks, fp, context))
     return false;
-  if (!FileHeaderSanityCheck(filename, *header))
+
+  // Write out the header digest.
+  base::MD5Digest header_digest;
+  MD5IntermediateDigest(&header_digest, context);
+  if (!WriteItem(header_digest, fp, context))
     return false;
+
+  return true;
+}
+
+// Return |true| if the range is sorted by the given comparator.
+template <typename CTI, typename LESS>
+bool sorted(CTI beg, CTI end, LESS less) {
+  while ((end - beg) > 2) {
+    CTI n = beg++;
+    DCHECK(!less(*beg, *n));
+    if (less(*beg, *n))
+      return false;
+  }
   return true;
 }
 
+// Merge |beg|..|end| into |container|.  Both should be sorted by the given
+// comparator, and the range iterators should not be derived from |container|.
+// Differs from std::inplace_merge() in that additional memory is not required
+// for linear performance.
+template <typename CT, typename CTI, typename COMP>
+void container_merge(CT* container, CTI beg, CTI end, const COMP& less) {
+  DCHECK(sorted(container->begin(), container->end(), less));
+  DCHECK(sorted(beg, end, less));
+
+  // Size the container to fit the results.
+  const size_t c_size = container->size();
+  container->resize(c_size + (end - beg));
+
+  // |c_end| points to the original endpoint, while |c_out| points to the
+  // endpoint that will scan from end to beginning while merging.
+  typename CT::iterator c_end = container->begin() + c_size;
+  typename CT::iterator c_out = container->end();
+
+  // While both inputs have data, move the greater to |c_out|.
+  while (c_end != container->begin() && end != beg) {
+    if (less(*(c_end - 1), *(end - 1))) {
+      *(--c_out) = *(--end);
+    } else {
+      *(--c_out) = *(--c_end);
+    }
+  }
+
+  // Copy any data remaining in the new range.
+  if (end != beg) {
+    // The original container data has been fully shifted.
+    DCHECK(c_end == container->begin());
+
+    // There is exactly the correct amount of space left.
+    DCHECK_EQ(c_out - c_end, end - beg);
+
+    std::copy(beg, end, container->begin());
+  }
+
+  DCHECK(sorted(container->begin(), container->end(), less));
+}
+
+// Collection of iterators used while stepping through StateInternal (see
+// below).
+class StateInternalPos {
+ public:
+  StateInternalPos(SBAddPrefixes::iterator add_prefixes_iter,
+                   SBSubPrefixes::iterator sub_prefixes_iter,
+                   std::vector<SBAddFullHash>::iterator add_hashes_iter,
+                   std::vector<SBSubFullHash>::iterator sub_hashes_iter)
+      : add_prefixes_iter_(add_prefixes_iter),
+        sub_prefixes_iter_(sub_prefixes_iter),
+        add_hashes_iter_(add_hashes_iter),
+        sub_hashes_iter_(sub_hashes_iter) {
+  }
+
+  SBAddPrefixes::iterator add_prefixes_iter_;
+  SBSubPrefixes::iterator sub_prefixes_iter_;
+  std::vector<SBAddFullHash>::iterator add_hashes_iter_;
+  std::vector<SBSubFullHash>::iterator sub_hashes_iter_;
+};
+
+// Helper to find the next shard boundary.
+template <class T>
+bool prefix_bounder(SBPrefix val, const T& elt) {
+  return val < elt.GetAddPrefix();
+}
+
+// Container for partial database state.  Includes add/sub prefixes/hashes, plus
+// aggregate operations on same.
+class StateInternal {
+ public:
+  explicit StateInternal(const std::vector<SBAddFullHash>& pending_adds)
+    : add_full_hashes_(pending_adds.begin(), pending_adds.end()) {
+  }
+
+  StateInternal() {}
+
+  // Append indicated amount of data from |fp|.
+  bool AppendData(size_t add_prefix_count, size_t sub_prefix_count,
+                  size_t add_hash_count, size_t sub_hash_count,
+                  FILE* fp, base::MD5Context* context) {
+    return
+        ReadToContainer(&add_prefixes_, add_prefix_count, fp, context) &&
+        ReadToContainer(&sub_prefixes_, sub_prefix_count, fp, context) &&
+        ReadToContainer(&add_full_hashes_, add_hash_count, fp, context) &&
+        ReadToContainer(&sub_full_hashes_, sub_hash_count, fp, context);
+  }
+
+  void ClearData() {
+    add_prefixes_.clear();
+    sub_prefixes_.clear();
+    add_full_hashes_.clear();
+    sub_full_hashes_.clear();
+  }
+
+  // Merge data from |beg|..|end| into receiver's state, then process the state.
+  // The current state and the range given should corrospond to the same sorted
+  // shard of data from different sources.  |add_del_cache| and |sub_del_cache|
+  // indicate the chunk ids which should be deleted during processing (see
+  // SBProcessSubs).
+  void MergeDataAndProcess(const StateInternalPos& beg,
+                           const StateInternalPos& end,
+                           const base::hash_set<int32>& add_del_cache,
+                           const base::hash_set<int32>& sub_del_cache) {
+    container_merge(&add_prefixes_,
+                    beg.add_prefixes_iter_,
+                    end.add_prefixes_iter_,
+                    SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+
+    container_merge(&sub_prefixes_,
+                    beg.sub_prefixes_iter_,
+                    end.sub_prefixes_iter_,
+                    SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+
+    container_merge(&add_full_hashes_,
+                    beg.add_hashes_iter_,
+                    end.add_hashes_iter_,
+                    SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+
+    container_merge(&sub_full_hashes_,
+                    beg.sub_hashes_iter_,
+                    end.sub_hashes_iter_,
+                    SBAddPrefixHashLess<SBSubFullHash, SBSubFullHash>);
+
+    SBProcessSubs(&add_prefixes_, &sub_prefixes_,
+                  &add_full_hashes_, &sub_full_hashes_,
+                  add_del_cache, sub_del_cache);
+  }
+
+  // Sort the data appropriately for the sharding, merging, and processing
+  // operations.
+  void SortData() {
+    std::sort(add_prefixes_.begin(), add_prefixes_.end(),
+              SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+    std::sort(sub_prefixes_.begin(), sub_prefixes_.end(),
+              SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+    std::sort(add_full_hashes_.begin(), add_full_hashes_.end(),
+              SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+    std::sort(sub_full_hashes_.begin(), sub_full_hashes_.end(),
+              SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+  }
+
+  // Iterator from the beginning of the state's data.
+  StateInternalPos StateBegin() {
+    return StateInternalPos(add_prefixes_.begin(),
+                            sub_prefixes_.begin(),
+                            add_full_hashes_.begin(),
+                            sub_full_hashes_.begin());
+  }
+
+  // An iterator pointing just after the last possible element of the shard
+  // indicated by |shard_max|.  Used to step through the state by shard.
+  // TODO(shess): Verify whether binary search really improves over linear.
+  // Merging or writing will immediately touch all of these elements.
+  StateInternalPos ShardEnd(const StateInternalPos& beg, SBPrefix shard_max) {
+    return StateInternalPos(
+        std::upper_bound(beg.add_prefixes_iter_, add_prefixes_.end(),
+                         shard_max, prefix_bounder<SBAddPrefix>),
+        std::upper_bound(beg.sub_prefixes_iter_, sub_prefixes_.end(),
+                         shard_max, prefix_bounder<SBSubPrefix>),
+        std::upper_bound(beg.add_hashes_iter_, add_full_hashes_.end(),
+                         shard_max, prefix_bounder<SBAddFullHash>),
+        std::upper_bound(beg.sub_hashes_iter_, sub_full_hashes_.end(),
+                         shard_max, prefix_bounder<SBSubFullHash>));
+  }
+
+  // Write a shard header and data for the shard starting at |beg| and ending at
+  // the element before |end|.
+  bool WriteShard(const StateInternalPos& beg, const StateInternalPos& end,
+                  FILE* fp, base::MD5Context* context) {
+    ShardHeader shard_header;
+    shard_header.add_prefix_count =
+        end.add_prefixes_iter_ - beg.add_prefixes_iter_;
+    shard_header.sub_prefix_count =
+        end.sub_prefixes_iter_ - beg.sub_prefixes_iter_;
+    shard_header.add_hash_count =
+        end.add_hashes_iter_ - beg.add_hashes_iter_;
+    shard_header.sub_hash_count =
+        end.sub_hashes_iter_ - beg.sub_hashes_iter_;
+
+    return
+        WriteItem(shard_header, fp, context) &&
+        WriteRange(beg.add_prefixes_iter_, end.add_prefixes_iter_,
+                   fp, context) &&
+        WriteRange(beg.sub_prefixes_iter_, end.sub_prefixes_iter_,
+                   fp, context) &&
+        WriteRange(beg.add_hashes_iter_, end.add_hashes_iter_,
+                   fp, context) &&
+        WriteRange(beg.sub_hashes_iter_, end.sub_hashes_iter_,
+                   fp, context);
+  }
+
+  SBAddPrefixes add_prefixes_;
+  SBSubPrefixes sub_prefixes_;
+  std::vector<SBAddFullHash> add_full_hashes_;
+  std::vector<SBSubFullHash> sub_full_hashes_;
+};
+
+// True if |val| is an even power of two.
+template <typename T>
+bool IsPowerOfTwo(const T& val) {
+  return val && (val & (val - 1)) == 0;
+}
+
+// Helper to read the entire database state, used by GetAddPrefixes() and
+// GetAddFullHashes().  Those functions are generally used only for smaller
+// files.  Returns false in case of errors reading the data.
+bool ReadDbStateHelper(const base::FilePath& filename,
+                       StateInternal* db_state) {
+  file_util::ScopedFILE file(base::OpenFile(filename, "rb"));
+  if (file.get() == NULL)
+    return false;
+
+  std::set<int32> add_chunks;
+  std::set<int32> sub_chunks;
+
+  base::MD5Context context;
+  FileHeader header;
+  const int version =
+      ReadAndVerifyHeader(filename, &header, &add_chunks, &sub_chunks,
+                          file.get(), &context);
+  if (version == kInvalidVersion)
+    return false;
+
+  if (version == 7) {
+    if (!db_state->AppendData(header.v7.add_prefix_count,
+                              header.v7.sub_prefix_count,
+                              header.v7.add_hash_count,
+                              header.v7.sub_hash_count,
+                              file.get(), &context)) {
+      return false;
+    }
+
+    // v7 data was not stored sorted.
+    db_state->SortData();
+  } else {
+    // Read until the shard start overflows, always at least one pass.
+    uint64 in_min = 0;
+    uint64 in_stride = header.v8.shard_stride;
+    if (!in_stride)
+      in_stride = kMaxShardStride;
+    if (!IsPowerOfTwo(in_stride))
+      return false;
+
+    do {
+      ShardHeader shard_header;
+      if (!ReadItem(&shard_header, file.get(), &context))
+        return false;
+
+      if (!db_state->AppendData(shard_header.add_prefix_count,
+                                shard_header.sub_prefix_count,
+                                shard_header.add_hash_count,
+                                shard_header.sub_hash_count,
+                                file.get(), &context)) {
+        return false;
+      }
+
+      in_min += in_stride;
+    } while (in_min <= kMaxSBPrefix);
+  }
+
+  if (!ReadAndVerifyChecksum(file.get(), &context))
+    return false;
+
+  int64 size = 0;
+  if (!base::GetFileSize(filename, &size))
+    return false;
+
+  return static_cast<int64>(ftell(file.get())) == size;
+}
+
 }  // namespace
 
 // static
@@ -258,15 +683,7 @@ bool SafeBrowsingStoreFile::CheckValidity() {
     bytes_left -= c;
   }
 
-  // Calculate the digest to this point.
-  base::MD5Digest calculated_digest;
-  base::MD5Final(&calculated_digest, &context);
-
-  // Read the stored digest and verify it.
-  base::MD5Digest file_digest;
-  if (!ReadItem(&file_digest, file_.get(), NULL))
-    return OnCorruptDatabase();
-  if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) {
+  if (!ReadAndVerifyChecksum(file_.get(), &context)) {
     RecordFormatEvent(FORMAT_EVENT_VALIDITY_CHECKSUM_FAILURE);
     return OnCorruptDatabase();
   }
@@ -293,48 +710,29 @@ bool SafeBrowsingStoreFile::WriteAddPrefix(int32 chunk_id, SBPrefix prefix) {
 
 bool SafeBrowsingStoreFile::GetAddPrefixes(SBAddPrefixes* add_prefixes) {
   add_prefixes->clear();
+  if (!base::PathExists(filename_))
+    return true;
 
-  base::ScopedFILE file(base::OpenFile(filename_, "rb"));
-  if (file.get() == NULL) return false;
-
-  FileHeader header;
-  if (!ReadAndVerifyHeader(filename_, file.get(), &header, NULL))
+  StateInternal db_state;
+  if (!ReadDbStateHelper(filename_, &db_state))
     return OnCorruptDatabase();
 
-  size_t add_prefix_offset = header.add_chunk_count * sizeof(int32) +
-      header.sub_chunk_count * sizeof(int32);
-  if (!FileSkip(add_prefix_offset, file.get()))
-    return false;
-
-  if (!ReadToContainer(add_prefixes, header.add_prefix_count, file.get(), NULL))
-    return false;
-
+  add_prefixes->swap(db_state.add_prefixes_);
   return true;
 }
 
 bool SafeBrowsingStoreFile::GetAddFullHashes(
     std::vector<SBAddFullHash>* add_full_hashes) {
   add_full_hashes->clear();
+  if (!base::PathExists(filename_))
+    return true;
 
-  base::ScopedFILE file(base::OpenFile(filename_, "rb"));
-  if (file.get() == NULL) return false;
-
-  FileHeader header;
-  if (!ReadAndVerifyHeader(filename_, file.get(), &header, NULL))
+  StateInternal db_state;
+  if (!ReadDbStateHelper(filename_, &db_state))
     return OnCorruptDatabase();
 
-  size_t offset =
-      header.add_chunk_count * sizeof(int32) +
-      header.sub_chunk_count * sizeof(int32) +
-      header.add_prefix_count * sizeof(SBAddPrefix) +
-      header.sub_prefix_count * sizeof(SBSubPrefix);
-  if (!FileSkip(offset, file.get()))
-    return false;
-
-  return ReadToContainer(add_full_hashes,
-                         header.add_hash_count,
-                         file.get(),
-                         NULL);
+  add_full_hashes->swap(db_state.add_full_hashes_);
+  return true;
 }
 
 bool SafeBrowsingStoreFile::WriteAddHash(int32 chunk_id,
@@ -415,15 +813,26 @@ bool SafeBrowsingStoreFile::BeginUpdate() {
     return true;
   }
 
+  base::MD5Context context;
   FileHeader header;
-  if (!ReadItem(&header, file.get(), NULL))
-      return OnCorruptDatabase();
-
-  if (header.magic != kFileMagic || header.version != kFileVersion) {
-    if (!strcmp(reinterpret_cast<char*>(&header.magic), "SQLite format 3")) {
-      RecordFormatEvent(FORMAT_EVENT_FOUND_SQLITE);
-    } else {
-      RecordFormatEvent(FORMAT_EVENT_FOUND_UNKNOWN);
+  const int version =
+      ReadAndVerifyHeader(filename_, &header,
+                          &add_chunks_cache_, &sub_chunks_cache_,
+                          file.get(), &context);
+  if (version == kInvalidVersion) {
+    FileHeaderV8 retry_header;
+    if (FileRewind(file.get()) && ReadItem(&retry_header, file.get(), NULL) &&
+        (retry_header.magic != kFileMagic ||
+         (retry_header.version != 8 && retry_header.version != 7))) {
+      // TODO(shess): Think on whether these histograms are generating any
+      // actionable data.  I kid you not, SQLITE happens many thousands of times
+      // per day, UNKNOWN about 3x higher than that.
+      if (!strcmp(reinterpret_cast<char*>(&retry_header.magic),
+                  "SQLite format 3")) {
+        RecordFormatEvent(FORMAT_EVENT_FOUND_SQLITE);
+      } else {
+        RecordFormatEvent(FORMAT_EVENT_FOUND_UNKNOWN);
+      }
     }
 
     // Close the file so that it can be deleted.
@@ -432,20 +841,6 @@ bool SafeBrowsingStoreFile::BeginUpdate() {
     return OnCorruptDatabase();
   }
 
-  // TODO(shess): Under POSIX it is possible that this could size a
-  // file different from the file which was opened.
-  if (!FileHeaderSanityCheck(filename_, header))
-    return OnCorruptDatabase();
-
-  // Pull in the chunks-seen data for purposes of implementing
-  // |GetAddChunks()| and |GetSubChunks()|.  This data is sent up to
-  // the server at the beginning of an update.
-  if (!ReadToContainer(&add_chunks_cache_, header.add_chunk_count,
-                       file.get(), NULL) ||
-      !ReadToContainer(&sub_chunks_cache_, header.sub_chunk_count,
-                       file.get(), NULL))
-    return OnCorruptDatabase();
-
   file_.swap(file);
   new_file_.swap(new_file);
   return true;
@@ -485,80 +880,25 @@ bool SafeBrowsingStoreFile::DoUpdate(
   CHECK(builder);
   CHECK(add_full_hashes_result);
 
-  SBAddPrefixes add_prefixes;
-  SBSubPrefixes sub_prefixes;
-  std::vector<SBAddFullHash> add_full_hashes;
-  std::vector<SBSubFullHash> sub_full_hashes;
-
-  // Read original data into the vectors.
-  if (!empty_) {
-    DCHECK(file_.get());
-
-    if (!FileRewind(file_.get()))
-      return OnCorruptDatabase();
-
-    base::MD5Context context;
-    base::MD5Init(&context);
-
-    // Read the file header and make sure it looks right.
-    FileHeader header;
-    if (!ReadAndVerifyHeader(filename_, file_.get(), &header, &context))
-      return OnCorruptDatabase();
-
-    // Re-read the chunks-seen data to get to the later data in the
-    // file and calculate the checksum.  No new elements should be
-    // added to the sets.
-    if (!ReadToContainer(&add_chunks_cache_, header.add_chunk_count,
-                         file_.get(), &context) ||
-        !ReadToContainer(&sub_chunks_cache_, header.sub_chunk_count,
-                         file_.get(), &context))
-      return OnCorruptDatabase();
-
-    if (!ReadToContainer(&add_prefixes, header.add_prefix_count,
-                         file_.get(), &context) ||
-        !ReadToContainer(&sub_prefixes, header.sub_prefix_count,
-                         file_.get(), &context) ||
-        !ReadToContainer(&add_full_hashes, header.add_hash_count,
-                         file_.get(), &context) ||
-        !ReadToContainer(&sub_full_hashes, header.sub_hash_count,
-                         file_.get(), &context))
-      return OnCorruptDatabase();
-
-    // Calculate the digest to this point.
-    base::MD5Digest calculated_digest;
-    base::MD5Final(&calculated_digest, &context);
-
-    // Read the stored checksum and verify it.
-    base::MD5Digest file_digest;
-    if (!ReadItem(&file_digest, file_.get(), NULL))
-      return OnCorruptDatabase();
-
-    if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) {
-      RecordFormatEvent(FORMAT_EVENT_UPDATE_CHECKSUM_FAILURE);
-      return OnCorruptDatabase();
-    }
-
-    // Close the file so we can later rename over it.
-    file_.reset();
-  }
-  DCHECK(!file_.get());
-
   // Rewind the temporary storage.
   if (!FileRewind(new_file_.get()))
     return false;
 
   // Get chunk file's size for validating counts.
-  int64 size = 0;
-  if (!base::GetFileSize(TemporaryFileForFilename(filename_), &size))
+  int64 update_size = 0;
+  if (!base::GetFileSize(TemporaryFileForFilename(filename_), &update_size))
     return OnCorruptDatabase();
 
   // Track update size to answer questions at http://crbug.com/72216 .
   // Log small updates as 1k so that the 0 (underflow) bucket can be
   // used for "empty" in SafeBrowsingDatabase.
   UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes",
-                       std::max(static_cast<int>(size / 1024), 1));
+                       std::max(static_cast<int>(update_size / 1024), 1));
 
-  // Append the accumulated chunks onto the vectors read from |file_|.
+  // Chunk updates to integrate.
+  StateInternal new_state(pending_adds);
+
+  // Read update chunks.
   for (int i = 0; i < chunks_written_; ++i) {
     ChunkHeader header;
 
@@ -576,73 +916,202 @@ bool SafeBrowsingStoreFile::DoUpdate(
     expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
     expected_size += header.add_hash_count * sizeof(SBAddFullHash);
     expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
-    if (expected_size > size)
+    if (expected_size > update_size)
       return false;
 
-    // TODO(shess): If the vectors were kept sorted, then this code
-    // could use std::inplace_merge() to merge everything together in
-    // sorted order.  That might still be slower than just sorting at
-    // the end if there were a large number of chunks.  In that case
-    // some sort of recursive binary merge might be in order (merge
-    // chunks pairwise, merge those chunks pairwise, and so on, then
-    // merge the result with the main list).
-    if (!ReadToContainer(&add_prefixes, header.add_prefix_count,
-                         new_file_.get(), NULL) ||
-        !ReadToContainer(&sub_prefixes, header.sub_prefix_count,
-                         new_file_.get(), NULL) ||
-        !ReadToContainer(&add_full_hashes, header.add_hash_count,
-                         new_file_.get(), NULL) ||
-        !ReadToContainer(&sub_full_hashes, header.sub_hash_count,
-                         new_file_.get(), NULL))
+    if (!new_state.AppendData(header.add_prefix_count, header.sub_prefix_count,
+                              header.add_hash_count, header.sub_hash_count,
+                              new_file_.get(), NULL)) {
       return false;
+    }
   }
 
-  // Append items from |pending_adds|.
-  add_full_hashes.insert(add_full_hashes.end(),
-                         pending_adds.begin(), pending_adds.end());
+  // The state was accumulated by chunk, sort by prefix.
+  new_state.SortData();
+
+  // These strides control how much data is loaded into memory per pass.
+  // Strides must be an even power of two.  |in_stride| will be derived from the
+  // input file.  |out_stride| will be derived from an estimate of the resulting
+  // file's size.  |process_stride| will be the max of both.
+  uint64 in_stride = kMaxShardStride;
+  uint64 out_stride = kMaxShardStride;
+  uint64 process_stride = 0;
+
+  // The header info is only used later if |!empty_|.  The v8 read loop only
+  // needs |in_stride|, while v7 needs to refer to header information.
+  base::MD5Context in_context;
+  int version = kInvalidVersion;
+  FileHeader header;
 
-  // Knock the subs from the adds and process deleted chunks.
-  SBProcessSubs(&add_prefixes, &sub_prefixes,
-                &add_full_hashes, &sub_full_hashes,
-                add_del_cache_, sub_del_cache_);
+  if (!empty_) {
+    DCHECK(file_.get());
+
+    version = ReadAndVerifyHeader(filename_, &header,
+                                  &add_chunks_cache_, &sub_chunks_cache_,
+                                  file_.get(), &in_context);
+    if (version == kInvalidVersion)
+      return OnCorruptDatabase();
+
+    if (version == 8 && header.v8.shard_stride)
+      in_stride = header.v8.shard_stride;
+
+    // The header checksum should have prevented this case, but the code will be
+    // broken if this is not correct.
+    if (!IsPowerOfTwo(in_stride))
+      return OnCorruptDatabase();
+  }
 
   // We no longer need to track deleted chunks.
   DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);
   DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);
 
-  // Write the new data to new_file_.
-  if (!FileRewind(new_file_.get()))
-    return false;
+  // Calculate |out_stride| to break the file down into reasonable shards.
+  {
+    int64 original_size = 0;
+    if (!empty_ && !base::GetFileSize(filename_, &original_size))
+      return OnCorruptDatabase();
 
-  base::MD5Context context;
-  base::MD5Init(&context);
+    // Approximate the final size as everything.  Subs and deletes will reduce
+    // the size, but modest over-sharding won't hurt much.
+    int64 shard_size = original_size + update_size;
 
-  // Write a file header.
-  FileHeader header;
-  header.magic = kFileMagic;
-  header.version = kFileVersion;
-  header.add_chunk_count = add_chunks_cache_.size();
-  header.sub_chunk_count = sub_chunks_cache_.size();
-  header.add_prefix_count = add_prefixes.size();
-  header.sub_prefix_count = sub_prefixes.size();
-  header.add_hash_count = add_full_hashes.size();
-  header.sub_hash_count = sub_full_hashes.size();
-  if (!WriteItem(header, new_file_.get(), &context))
-    return false;
+    // Keep splitting until a single stride of data fits the target.
+    size_t shifts = 0;
+    while (out_stride > kMinShardStride && shard_size > kUpdateStorageBytes) {
+      out_stride >>= 1;
+      shard_size >>= 1;
+      ++shifts;
+    }
+    UMA_HISTOGRAM_COUNTS("SB2.OutShardShifts", shifts);
+
+    DCHECK(IsPowerOfTwo(out_stride));
+  }
 
-  // Write all the chunk data.
-  if (!WriteContainer(add_chunks_cache_, new_file_.get(), &context) ||
-      !WriteContainer(sub_chunks_cache_, new_file_.get(), &context) ||
-      !WriteContainer(add_prefixes, new_file_.get(), &context) ||
-      !WriteContainer(sub_prefixes, new_file_.get(), &context) ||
-      !WriteContainer(add_full_hashes, new_file_.get(), &context) ||
-      !WriteContainer(sub_full_hashes, new_file_.get(), &context))
+  // Outer loop strides by the max of the input stride (to read integral shards)
+  // and the output stride (to write integral shards).
+  process_stride = std::max(in_stride, out_stride);
+  DCHECK(IsPowerOfTwo(process_stride));
+  DCHECK_EQ(0u, process_stride % in_stride);
+  DCHECK_EQ(0u, process_stride % out_stride);
+
+  // Start writing the new data to |new_file_|.
+  base::MD5Context out_context;
+  if (!WriteHeader(out_stride, add_chunks_cache_, sub_chunks_cache_,
+                   new_file_.get(), &out_context)) {
     return false;
+  }
+
+  // Start at the beginning of the SBPrefix space.
+  uint64 in_min = 0;
+  uint64 out_min = 0;
+  uint64 process_min = 0;
+
+  // Start at the beginning of the updates.
+  StateInternalPos new_pos = new_state.StateBegin();
+
+  // Re-usable container for shard processing.
+  StateInternal db_state;
+
+  // Track aggregate counts for histograms.
+  size_t add_prefix_count = 0;
+  size_t sub_prefix_count = 0;
+
+  do {
+    // Maximum element in the current shard.
+    SBPrefix process_max =
+        static_cast<SBPrefix>(process_min + process_stride - 1);
+    DCHECK_GT(process_max, process_min);
+
+    // Drop the data from previous pass.
+    db_state.ClearData();
+
+    // Fill the processing shard with one or more input shards.
+    if (!empty_) {
+      if (version == 7) {
+        // Treat v7 as a single-shard file.
+        DCHECK_EQ(in_min, 0u);
+        DCHECK_EQ(in_stride, kMaxShardStride);
+        DCHECK_EQ(process_stride, kMaxShardStride);
+        if (!db_state.AppendData(header.v7.add_prefix_count,
+                                 header.v7.sub_prefix_count,
+                                 header.v7.add_hash_count,
+                                 header.v7.sub_hash_count,
+                                 file_.get(), &in_context))
+          return OnCorruptDatabase();
+
+        // v7 data is not sorted correctly.
+        db_state.SortData();
+      } else {
+        do {
+          ShardHeader shard_header;
+          if (!ReadItem(&shard_header, file_.get(), &in_context))
+            return OnCorruptDatabase();
+
+          if (!db_state.AppendData(shard_header.add_prefix_count,
+                                   shard_header.sub_prefix_count,
+                                   shard_header.add_hash_count,
+                                   shard_header.sub_hash_count,
+                                   file_.get(), &in_context))
+            return OnCorruptDatabase();
+
+          in_min += in_stride;
+        } while (in_min <= kMaxSBPrefix && in_min < process_max);
+      }
+    }
 
-  // Write the checksum at the end.
-  base::MD5Digest digest;
-  base::MD5Final(&digest, &context);
-  if (!WriteItem(digest, new_file_.get(), NULL))
+    // Shard the update data to match the database data, then merge the update
+    // data and process the results.
+    {
+      StateInternalPos new_end = new_state.ShardEnd(new_pos, process_max);
+      db_state.MergeDataAndProcess(new_pos, new_end,
+                                   add_del_cache_, sub_del_cache_);
+      new_pos = new_end;
+    }
+
+    // Collect the processed data for return to caller.
+    for (size_t i = 0; i < db_state.add_prefixes_.size(); ++i) {
+      builder->AddPrefix(db_state.add_prefixes_[i].prefix);
+    }
+    add_full_hashes_result->insert(add_full_hashes_result->end(),
+                                   db_state.add_full_hashes_.begin(),
+                                   db_state.add_full_hashes_.end());
+    add_prefix_count += db_state.add_prefixes_.size();
+    sub_prefix_count += db_state.sub_prefixes_.size();
+
+    // Write one or more shards of processed output.
+    StateInternalPos out_pos = db_state.StateBegin();
+    do {
+      SBPrefix out_max = static_cast<SBPrefix>(out_min + out_stride - 1);
+      DCHECK_GT(out_max, out_min);
+
+      StateInternalPos out_end = db_state.ShardEnd(out_pos, out_max);
+      if (!db_state.WriteShard(out_pos, out_end, new_file_.get(), &out_context))
+        return false;
+      out_pos = out_end;
+
+      out_min += out_stride;
+    } while (out_min == static_cast<SBPrefix>(out_min) &&
+             out_min < process_max);
+
+    process_min += process_stride;
+  } while (process_min <= kMaxSBPrefix);
+
+  // Verify the overall checksum.
+  if (!empty_) {
+    if (!ReadAndVerifyChecksum(file_.get(), &in_context))
+      return OnCorruptDatabase();
+
+    // TODO(shess): Verify EOF?
+
+    // Close the input file so the new file can be renamed over it.
+    file_.reset();
+  }
+  DCHECK(!file_.get());
+
+  // Write the overall checksum.
+  base::MD5Digest out_digest;
+  base::MD5Final(&out_digest, &out_context);
+  if (!WriteItem(out_digest, new_file_.get(), NULL))
     return false;
 
   // Trim any excess left over from the temporary chunk data.
@@ -660,15 +1129,8 @@ bool SafeBrowsingStoreFile::DoUpdate(
     return false;
 
   // Record counts before swapping to caller.
-  UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_prefixes.size());
-  UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", sub_prefixes.size());
-
-  // Pass the resulting data off to the caller.
-  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
-       iter != add_prefixes.end(); ++iter) {
-    builder->AddPrefix(iter->prefix);
-  }
-  add_full_hashes_result->swap(add_full_hashes);
+  UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_prefix_count);
+  UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", sub_prefix_count);
 
   return true;
 }
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.h b/chrome/browser/safe_browsing/safe_browsing_store_file.h
index 6b821b0..c8a2429 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store_file.h
+++ b/chrome/browser/safe_browsing/safe_browsing_store_file.h
@@ -21,38 +21,60 @@
 // int32 version;           // format version
 //
 // // Counts for the various data which follows the header.
-// uint32 add_chunk_count;   // Chunks seen, including empties.
-// uint32 sub_chunk_count;   // Ditto.
-// uint32 add_prefix_count;
-// uint32 sub_prefix_count;
-// uint32 add_hash_count;
-// uint32 sub_hash_count;
-//
+// uint32 add_chunk_count;  // Chunks seen, including empties.
+// uint32 sub_chunk_count;  // Ditto.
+// uint32 shard_stride;     // SBPrefix space covered per shard.
+//                          // 0==entire space in one shard.
+// // Sorted by chunk_id.
 // array[add_chunk_count] {
 //   int32 chunk_id;
 // }
+// // Sorted by chunk_id.
 // array[sub_chunk_count] {
 //   int32 chunk_id;
 // }
-// array[add_prefix_count] {
-//   int32 chunk_id;
-//   uint32 prefix;
-// }
-// array[sub_prefix_count] {
-//   int32 chunk_id;
-//   int32 add_chunk_id;
-//   uint32 add_prefix;
-// }
-// array[add_hash_count] {
-//   int32 chunk_id;
-//   int32 received_time;     // From base::Time::ToTimeT().
-//   char[32] full_hash;
-// array[sub_hash_count] {
-//   int32 chunk_id;
-//   int32 add_chunk_id;
-//   char[32] add_full_hash;
+// MD5Digest header_checksum;  // Checksum over preceeding data.
+//
+// // Sorted by prefix, then add chunk_id, then hash, both within shards and
+// // overall.
+// array[from 0 to wraparound to 0 by shard_stride] {
+//   uint32 add_prefix_count;
+//   uint32 sub_prefix_count;
+//   uint32 add_hash_count;
+//   uint32 sub_hash_count;
+//   array[add_prefix_count] {
+//     int32 chunk_id;
+//     uint32 prefix;
+//   }
+//   array[sub_prefix_count] {
+//     int32 chunk_id;
+//     int32 add_chunk_id;
+//     uint32 add_prefix;
+//   }
+//   array[add_hash_count] {
+//     int32 chunk_id;
+//     int32 received_time;     // From base::Time::ToTimeT().
+//     char[32] full_hash;
+//   }
+//   array[sub_hash_count] {
+//     int32 chunk_id;
+//     int32 add_chunk_id;
+//     char[32] add_full_hash;
+//   }
 // }
-// MD5Digest checksum;      // Checksum over preceeding data.
+// MD5Digest checksum;      // Checksum over entire file.
+//
+// The checksums are used to allow writing the file without doing an expensive
+// fsync().  Since the data can be re-fetched, failing the checksum is not
+// catastrophic.  Histograms indicate that file corruption here is pretty
+// uncommon.
+//
+// The |header_checksum| is present to guarantee valid header and chunk data for
+// updates.  Only that part of the file needs to be read to post the update.
+//
+// |shard_stride| breaks the file into approximately-equal portions, allowing
+// updates to stream from one file to another with modest memory usage.  It is
+// dynamic to adjust to different file sizes without adding excessive overhead.
 //
 // During the course of an update, uncommitted data is stored in a
 // temporary file (which is later re-used to commit).  This is an
@@ -91,19 +113,15 @@
 // - Open a temp file for storing new chunk info.
 // - Write new chunks to the temp file.
 // - When the transaction is finished:
-//   - Read the rest of the original file's data into buffers.
-//   - Rewind the temp file and merge the new data into buffers.
-//   - Process buffers for deletions and apply subs.
-//   - Rewind and write the buffers out to temp file.
+//   - Read the update data from the temp file into memory.
+//   - Overwrite the temp file with new header data.
+//   - Until done:
+//     - Read shards of the original file's data into memory.
+//     - Merge from the update data.
+//     - Write shards to the temp file.
 //   - Delete original file.
 //   - Rename temp file to original filename.
 
-// TODO(shess): By using a checksum, this code can avoid doing an
-// fsync(), at the possible cost of more frequently retrieving the
-// full dataset.  Measure how often this occurs, and if it occurs too
-// often, consider retaining the last known-good file for recovery
-// purposes, rather than deleting it.
-
 class SafeBrowsingStoreFile : public SafeBrowsingStore {
  public:
   SafeBrowsingStoreFile();
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc
index 4f52241..c29fa60 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc
@@ -28,6 +28,9 @@ const SBFullHash kHash3 = SBFullHashForString("three");
 const SBFullHash kHash4 = SBFullHashForString("four");
 const SBFullHash kHash5 = SBFullHashForString("five");
 
+const SBPrefix kMinSBPrefix = 0u;
+const SBPrefix kMaxSBPrefix = ~kMinSBPrefix;
+
 class SafeBrowsingStoreFileTest : public PlatformTest {
  public:
   virtual void SetUp() {
@@ -58,7 +61,7 @@ class SafeBrowsingStoreFileTest : public PlatformTest {
 
   // Populate the store with some testing data.
   void PopulateStore(const base::Time& now) {
-    EXPECT_TRUE(store_->BeginUpdate());
+    ASSERT_TRUE(store_->BeginUpdate());
 
     EXPECT_TRUE(store_->BeginChunk());
     store_->SetAddChunk(kAddChunk1);
@@ -85,7 +88,17 @@ class SafeBrowsingStoreFileTest : public PlatformTest {
     EXPECT_TRUE(store_->FinishUpdate(pending_adds,
                                      &builder,
                                      &add_full_hashes_result));
-}
+  }
+
+  // Manually read the shard stride info from the file.
+  uint32 ReadStride() {
+    base::ScopedFILE file(base::OpenFile(filename_, "rb"));
+    const long kOffset = 4 * sizeof(uint32);
+    EXPECT_EQ(fseek(file.get(), kOffset, SEEK_SET), 0);
+    uint32 shard_stride = 0;
+    EXPECT_EQ(fread(&shard_stride, sizeof(shard_stride), 1, file.get()), 1U);
+    return shard_stride;
+  }
 
   base::ScopedTempDir temp_dir_;
   base::FilePath filename_;
@@ -95,7 +108,7 @@ class SafeBrowsingStoreFileTest : public PlatformTest {
 
 // Test that the empty store looks empty.
 TEST_F(SafeBrowsingStoreFileTest, Empty) {
-  EXPECT_TRUE(store_->BeginUpdate());
+  ASSERT_TRUE(store_->BeginUpdate());
 
   std::vector<int> chunks;
   store_->GetAddChunks(&chunks);
@@ -132,7 +145,7 @@ TEST_F(SafeBrowsingStoreFileTest, StorePrefix) {
   const base::Time now = base::Time::Now();
   PopulateStore(now);
 
-  EXPECT_TRUE(store_->BeginUpdate());
+  ASSERT_TRUE(store_->BeginUpdate());
 
   std::vector<int> chunks;
   store_->GetAddChunks(&chunks);
@@ -164,7 +177,7 @@ TEST_F(SafeBrowsingStoreFileTest, StorePrefix) {
     EXPECT_TRUE(SBFullHashEqual(kHash2, add_full_hashes_result[0].full_hash));
   }
 
-  EXPECT_TRUE(store_->BeginUpdate());
+  ASSERT_TRUE(store_->BeginUpdate());
 
   // Still has the chunks expected in the next update.
   store_->GetAddChunks(&chunks);
@@ -200,9 +213,63 @@ TEST_F(SafeBrowsingStoreFileTest, StorePrefix) {
   }
 }
 
+// Verify that the min and max prefixes are stored and operated on.
+TEST_F(SafeBrowsingStoreFileTest, PrefixMinMax) {
+  const base::Time now = base::Time::Now();
+  PopulateStore(now);
+
+  ASSERT_TRUE(store_->BeginUpdate());
+
+  EXPECT_TRUE(store_->BeginChunk());
+  store_->SetAddChunk(kAddChunk2);
+  EXPECT_TRUE(store_->WriteAddPrefix(kAddChunk2, kMinSBPrefix));
+  EXPECT_TRUE(store_->WriteAddPrefix(kAddChunk2, kMaxSBPrefix));
+  EXPECT_TRUE(store_->FinishChunk());
+
+  {
+    std::vector<SBAddFullHash> pending_adds;
+    safe_browsing::PrefixSetBuilder builder;
+    std::vector<SBAddFullHash> add_full_hashes_result;
+    EXPECT_TRUE(store_->FinishUpdate(pending_adds,
+                                     &builder,
+                                     &add_full_hashes_result));
+
+    std::vector<SBPrefix> prefixes_result;
+    builder.GetPrefixSet()->GetPrefixes(&prefixes_result);
+    ASSERT_EQ(4U, prefixes_result.size());
+    EXPECT_EQ(kMinSBPrefix, prefixes_result[0]);
+    EXPECT_EQ(kHash1.prefix, prefixes_result[1]);
+    EXPECT_EQ(kHash2.prefix, prefixes_result[2]);
+    EXPECT_EQ(kMaxSBPrefix, prefixes_result[3]);
+  }
+
+  ASSERT_TRUE(store_->BeginUpdate());
+
+  EXPECT_TRUE(store_->BeginChunk());
+  store_->SetAddChunk(kSubChunk2);
+  EXPECT_TRUE(store_->WriteSubPrefix(kSubChunk2, kAddChunk2, kMinSBPrefix));
+  EXPECT_TRUE(store_->WriteSubPrefix(kSubChunk2, kAddChunk2, kMaxSBPrefix));
+  EXPECT_TRUE(store_->FinishChunk());
+
+  {
+    std::vector<SBAddFullHash> pending_adds;
+    safe_browsing::PrefixSetBuilder builder;
+    std::vector<SBAddFullHash> add_full_hashes_result;
+    EXPECT_TRUE(store_->FinishUpdate(pending_adds,
+                                     &builder,
+                                     &add_full_hashes_result));
+
+    std::vector<SBPrefix> prefixes_result;
+    builder.GetPrefixSet()->GetPrefixes(&prefixes_result);
+    ASSERT_EQ(2U, prefixes_result.size());
+    EXPECT_EQ(kHash1.prefix, prefixes_result[0]);
+    EXPECT_EQ(kHash2.prefix, prefixes_result[1]);
+  }
+}
+
 // Test that subs knockout adds.
 TEST_F(SafeBrowsingStoreFileTest, SubKnockout) {
-  EXPECT_TRUE(store_->BeginUpdate());
+  ASSERT_TRUE(store_->BeginUpdate());
 
   const base::Time now = base::Time::Now();
 
@@ -233,7 +300,7 @@ TEST_F(SafeBrowsingStoreFileTest, SubKnockout) {
     EXPECT_TRUE(add_full_hashes_result.empty());
   }
 
-  EXPECT_TRUE(store_->BeginUpdate());
+  ASSERT_TRUE(store_->BeginUpdate());
 
   // This add should be knocked out by an existing sub.
   EXPECT_TRUE(store_->BeginChunk());
@@ -256,7 +323,7 @@ TEST_F(SafeBrowsingStoreFileTest, SubKnockout) {
     EXPECT_TRUE(add_full_hashes_result.empty());
   }
 
-  EXPECT_TRUE(store_->BeginUpdate());
+  ASSERT_TRUE(store_->BeginUpdate());
 
   // But by here the sub should be gone, so it should stick this time.
   EXPECT_TRUE(store_->BeginChunk());
@@ -283,7 +350,7 @@ TEST_F(SafeBrowsingStoreFileTest, SubKnockout) {
 
 // Test that deletes delete the chunk's data.
 TEST_F(SafeBrowsingStoreFileTest, DeleteChunks) {
-  EXPECT_TRUE(store_->BeginUpdate());
+  ASSERT_TRUE(store_->BeginUpdate());
 
   const base::Time now = base::Time::Now();
 
@@ -349,7 +416,7 @@ TEST_F(SafeBrowsingStoreFileTest, DeleteChunks) {
   }
 
   // Expected chunks are there in another update.
-  EXPECT_TRUE(store_->BeginUpdate());
+  ASSERT_TRUE(store_->BeginUpdate());
   EXPECT_FALSE(store_->CheckAddChunk(kAddChunk1));
   EXPECT_TRUE(store_->CheckAddChunk(kAddChunk2));
   EXPECT_FALSE(store_->CheckSubChunk(kSubChunk1));
@@ -369,7 +436,7 @@ TEST_F(SafeBrowsingStoreFileTest, DeleteChunks) {
   }
 
   // Expect no more chunks.
-  EXPECT_TRUE(store_->BeginUpdate());
+  ASSERT_TRUE(store_->BeginUpdate());
   EXPECT_FALSE(store_->CheckAddChunk(kAddChunk1));
   EXPECT_FALSE(store_->CheckAddChunk(kAddChunk2));
   EXPECT_FALSE(store_->CheckSubChunk(kSubChunk1));
@@ -413,7 +480,7 @@ TEST_F(SafeBrowsingStoreFileTest, DeleteTemp) {
   EXPECT_FALSE(base::PathExists(temp_file));
 
   // Starting a transaction creates a temporary file.
-  EXPECT_TRUE(store_->BeginUpdate());
+  ASSERT_TRUE(store_->BeginUpdate());
   EXPECT_TRUE(base::PathExists(temp_file));
 
   // Pull the rug out from under the existing store, simulating a
@@ -440,7 +507,7 @@ TEST_F(SafeBrowsingStoreFileTest, DetectsCorruption) {
     std::vector<SBPrefix> orig_prefixes;
     std::vector<SBAddFullHash> orig_hashes;
     safe_browsing::PrefixSetBuilder builder;
-    EXPECT_TRUE(store_->BeginUpdate());
+    ASSERT_TRUE(store_->BeginUpdate());
     EXPECT_TRUE(store_->FinishUpdate(pending_adds, &builder, &orig_hashes));
     builder.GetPrefixSet()->GetPrefixes(&orig_prefixes);
     EXPECT_GT(orig_prefixes.size(), 0U);
@@ -466,7 +533,7 @@ TEST_F(SafeBrowsingStoreFileTest, DetectsCorruption) {
   {
     std::vector<SBAddFullHash> pending_adds;
     safe_browsing::PrefixSetBuilder builder;
-    EXPECT_TRUE(store_->BeginUpdate());
+    ASSERT_TRUE(store_->BeginUpdate());
     EXPECT_FALSE(store_->FinishUpdate(pending_adds, &builder, &add_hashes));
     EXPECT_TRUE(corruption_detected_);
   }
@@ -505,14 +572,13 @@ TEST_F(SafeBrowsingStoreFileTest, CheckValidity) {
   EXPECT_TRUE(store_->CancelUpdate());
 }
 
-// Corrupt the payload.
-TEST_F(SafeBrowsingStoreFileTest, CheckValidityPayload) {
+// Corrupt the header.
+TEST_F(SafeBrowsingStoreFileTest, CheckValidityHeader) {
   PopulateStore(base::Time::Now());
   EXPECT_TRUE(base::PathExists(filename_));
 
-  // 37 is the most random prime number.  It's also past the header,
-  // as corrupting the header would fail BeginUpdate() in which case
-  // CheckValidity() cannot be called.
+  // 37 is the most random prime number.  It's also past the initial header
+  // struct, somewhere in the chunk list.
   const size_t kOffset = 37;
 
   {
@@ -520,6 +586,25 @@ TEST_F(SafeBrowsingStoreFileTest, CheckValidityPayload) {
     EXPECT_EQ(0, fseek(file.get(), kOffset, SEEK_SET));
     EXPECT_GE(fputs("hello", file.get()), 0);
   }
+  ASSERT_FALSE(store_->BeginUpdate());
+  EXPECT_TRUE(corruption_detected_);
+}
+
+// Corrupt the prefix payload.
+TEST_F(SafeBrowsingStoreFileTest, CheckValidityPayload) {
+  PopulateStore(base::Time::Now());
+  EXPECT_TRUE(base::PathExists(filename_));
+
+  // 137 is the second most random prime number.  It's also past the header and
+  // chunk-id area.  Corrupting the header would fail BeginUpdate() in which
+  // case CheckValidity() cannot be called.
+  const size_t kOffset = 137;
+
+  {
+    base::ScopedFILE file(base::OpenFile(filename_, "rb+"));
+    EXPECT_EQ(0, fseek(file.get(), kOffset, SEEK_SET));
+    EXPECT_GE(fputs("hello", file.get()), 0);
+  }
   ASSERT_TRUE(store_->BeginUpdate());
   EXPECT_FALSE(corruption_detected_);
   EXPECT_FALSE(store_->CheckValidity());
@@ -547,4 +632,131 @@ TEST_F(SafeBrowsingStoreFileTest, CheckValidityChecksum) {
   EXPECT_TRUE(store_->CancelUpdate());
 }
 
+TEST_F(SafeBrowsingStoreFileTest, GetAddPrefixesAndHashes) {
+  ASSERT_TRUE(store_->BeginUpdate());
+
+  const base::Time now = base::Time::Now();
+
+  EXPECT_TRUE(store_->BeginChunk());
+  store_->SetAddChunk(kAddChunk1);
+  EXPECT_TRUE(store_->CheckAddChunk(kAddChunk1));
+  EXPECT_TRUE(store_->WriteAddPrefix(kAddChunk1, kHash1.prefix));
+  EXPECT_TRUE(store_->WriteAddPrefix(kAddChunk1, kHash2.prefix));
+  EXPECT_TRUE(store_->WriteAddHash(kAddChunk1, now, kHash2));
+
+  store_->SetSubChunk(kSubChunk1);
+  EXPECT_TRUE(store_->CheckSubChunk(kSubChunk1));
+  EXPECT_TRUE(store_->WriteSubPrefix(kSubChunk1, kAddChunk3, kHash3.prefix));
+  EXPECT_TRUE(store_->WriteSubHash(kSubChunk1, kAddChunk3, kHash3));
+  EXPECT_TRUE(store_->FinishChunk());
+
+  // Chunk numbers shouldn't leak over.
+  EXPECT_FALSE(store_->CheckAddChunk(kSubChunk1));
+  EXPECT_FALSE(store_->CheckAddChunk(kAddChunk3));
+  EXPECT_FALSE(store_->CheckSubChunk(kAddChunk1));
+
+  std::vector<int> chunks;
+  store_->GetAddChunks(&chunks);
+  ASSERT_EQ(1U, chunks.size());
+  EXPECT_EQ(kAddChunk1, chunks[0]);
+
+  store_->GetSubChunks(&chunks);
+  ASSERT_EQ(1U, chunks.size());
+  EXPECT_EQ(kSubChunk1, chunks[0]);
+
+  safe_browsing::PrefixSetBuilder builder;
+  std::vector<SBAddFullHash> add_full_hashes_result;
+  EXPECT_TRUE(store_->FinishUpdate(std::vector<SBAddFullHash>(),
+                                   &builder,
+                                   &add_full_hashes_result));
+
+  SBAddPrefixes add_prefixes;
+  EXPECT_TRUE(store_->GetAddPrefixes(&add_prefixes));
+  ASSERT_EQ(2U, add_prefixes.size());
+  EXPECT_EQ(kAddChunk1, add_prefixes[0].chunk_id);
+  EXPECT_EQ(kHash1.prefix, add_prefixes[0].prefix);
+  EXPECT_EQ(kAddChunk1, add_prefixes[1].chunk_id);
+  EXPECT_EQ(kHash2.prefix, add_prefixes[1].prefix);
+
+  std::vector<SBAddFullHash> add_hashes;
+  EXPECT_TRUE(store_->GetAddFullHashes(&add_hashes));
+  ASSERT_EQ(1U, add_hashes.size());
+  EXPECT_EQ(kAddChunk1, add_hashes[0].chunk_id);
+  EXPECT_TRUE(SBFullHashEqual(kHash2, add_hashes[0].full_hash));
+}
+
+// Test that the database handles resharding correctly, both when growing and
+// which shrinking.
+TEST_F(SafeBrowsingStoreFileTest, Resharding) {
+  // Loop through multiple stride boundaries (1<<32, 1<<31, 1<<30, 1<<29).
+  const uint32 kTargetStride = 1 << 29;
+
+  // Each chunk will require 8 bytes per prefix, plus 4 bytes for chunk
+  // information.  It should be less than |kTargetFootprint| in the
+  // implementation, but high enough to keep the number of rewrites modest (to
+  // keep the test fast).
+  const size_t kPrefixesPerChunk = 10000;
+
+  uint32 shard_stride = 0;
+  int chunk_id = 1;
+
+  // Add a series of chunks, tracking that the stride size changes in a
+  // direction appropriate to increasing file size.
+  do {
+    ASSERT_TRUE(store_->BeginUpdate());
+
+    EXPECT_TRUE(store_->BeginChunk());
+    store_->SetAddChunk(chunk_id);
+    EXPECT_TRUE(store_->CheckAddChunk(chunk_id));
+    for (size_t i = 0; i < kPrefixesPerChunk; ++i) {
+      EXPECT_TRUE(store_->WriteAddPrefix(chunk_id, static_cast<SBPrefix>(i)));
+    }
+    EXPECT_TRUE(store_->FinishChunk());
+
+    std::vector<SBAddFullHash> pending_adds;
+    safe_browsing::PrefixSetBuilder builder;
+    std::vector<SBAddFullHash> add_full_hashes_result;
+    EXPECT_TRUE(store_->FinishUpdate(pending_adds,
+                                     &builder,
+                                     &add_full_hashes_result));
+
+    SBAddPrefixes add_prefixes;
+    EXPECT_TRUE(store_->GetAddPrefixes(&add_prefixes));
+    ASSERT_EQ(chunk_id * kPrefixesPerChunk, add_prefixes.size());
+
+    // New stride should be the same, or shifted one right.
+    const uint32 new_shard_stride = ReadStride();
+    EXPECT_TRUE((new_shard_stride == shard_stride) ||
+                ((new_shard_stride << 1) == shard_stride));
+    shard_stride = new_shard_stride;
+    ++chunk_id;
+  } while (!shard_stride || shard_stride > kTargetStride);
+
+  // Guard against writing too many chunks.  If this gets too big, adjust
+  // |kPrefixesPerChunk|.
+  EXPECT_LT(chunk_id, 20);
+
+  // Remove each chunk and check that the stride goes back to 0.
+  while (--chunk_id) {
+    ASSERT_TRUE(store_->BeginUpdate());
+    EXPECT_TRUE(store_->CheckAddChunk(chunk_id));
+    EXPECT_FALSE(store_->CheckAddChunk(chunk_id + 1));
+    store_->DeleteAddChunk(chunk_id);
+
+    std::vector<SBAddFullHash> pending_adds;
+    safe_browsing::PrefixSetBuilder builder;
+    std::vector<SBAddFullHash> add_full_hashes_result;
+    EXPECT_TRUE(store_->FinishUpdate(pending_adds,
+                                     &builder,
+                                     &add_full_hashes_result));
+
+    // New stride should be the same, or shifted one left.
+    const uint32 new_shard_stride = ReadStride();
+    EXPECT_TRUE((new_shard_stride == shard_stride) ||
+                (new_shard_stride == (shard_stride << 1)));
+    shard_stride = new_shard_stride;
+  }
+  EXPECT_EQ(0u, shard_stride);
+}
+
 }  // namespace
diff --git a/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc
index 199138e..420ed1c 100644
--- a/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc
@@ -178,6 +178,15 @@ TEST(SafeBrowsingStoreTest, SBProcessSubsKnockout) {
   add_hashes.push_back(SBAddFullHash(kAddChunk1, kNow, kHash4mod));
   sub_hashes.push_back(SBSubFullHash(kSubChunk1, kAddChunk1, kHash4mod));
 
+  std::sort(add_prefixes.begin(), add_prefixes.end(),
+            SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+  std::sort(sub_prefixes.begin(), sub_prefixes.end(),
+            SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+  std::sort(add_hashes.begin(), add_hashes.end(),
+            SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+  std::sort(sub_hashes.begin(), sub_hashes.end(),
+            SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+
   const base::hash_set<int32> no_deletions;
   SBProcessSubs(&add_prefixes, &sub_prefixes, &add_hashes, &sub_hashes,
                 no_deletions, no_deletions);
@@ -249,6 +258,15 @@ TEST(SafeBrowsingStoreTest, SBProcessSubsDeleteChunk) {
   sub_hashes.push_back(SBSubFullHash(kSubChunk1, kAddChunk1, kHash3));
   sub_prefixes.push_back(SBSubPrefix(kSubChunk1, kAddChunk1, kHash3.prefix));
 
+  std::sort(add_prefixes.begin(), add_prefixes.end(),
+            SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+  std::sort(sub_prefixes.begin(), sub_prefixes.end(),
+            SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+  std::sort(add_hashes.begin(), add_hashes.end(),
+            SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+  std::sort(sub_hashes.begin(), sub_hashes.end(),
+            SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+
   const base::hash_set<int32> no_deletions;
   base::hash_set<int32> add_deletions;
   add_deletions.insert(kAddChunk1);
@@ -268,6 +286,15 @@ TEST(SafeBrowsingStoreTest, SBProcessSubsDeleteChunk) {
   EXPECT_EQ(kAddChunk1, sub_hashes[0].add_chunk_id);
   EXPECT_TRUE(SBFullHashEqual(kHash3, sub_hashes[0].full_hash));
 
+  std::sort(add_prefixes.begin(), add_prefixes.end(),
+            SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
+  std::sort(sub_prefixes.begin(), sub_prefixes.end(),
+            SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
+  std::sort(add_hashes.begin(), add_hashes.end(),
+            SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
+  std::sort(sub_hashes.begin(), sub_hashes.end(),
+            SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
+
   base::hash_set<int32> sub_deletions;
   sub_deletions.insert(kSubChunk1);
   SBProcessSubs(&add_prefixes, &sub_prefixes, &add_hashes, &sub_hashes,
author	shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-03-27 05:27:17 +0000
committer	shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-03-27 05:27:17 +0000
commit	d3dd0715afc50f2e999f5a28478e783519170a29 (patch)
tree	ac638131304a9a2524d10e08dee9b7929e55b239 /chrome/browser/safe_browsing
parent	f42a5a827ca1c591f1f9e918fce5ce6d485d0e3d (diff)
download	chromium_src-d3dd0715afc50f2e999f5a28478e783519170a29.zip chromium_src-d3dd0715afc50f2e999f5a28478e783519170a29.tar.gz chromium_src-d3dd0715afc50f2e999f5a28478e783519170a29.tar.bz2