diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-03-18 20:41:06 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-03-18 20:41:06 +0000 |
commit | 2b7dea8f59e2517df1cd8bfba3dd90ce260dafc3 (patch) | |
tree | 90ae9fe84421fc07b14a3bec97c7edf178592824 | |
parent | 139c51038197965b0e010a1b4496829a6828310f (diff) | |
download | chromium_src-2b7dea8f59e2517df1cd8bfba3dd90ce260dafc3.zip chromium_src-2b7dea8f59e2517df1cd8bfba3dd90ce260dafc3.tar.gz chromium_src-2b7dea8f59e2517df1cd8bfba3dd90ce260dafc3.tar.bz2 |
Incremental PrefixSet builder for Safe Browsing store.
Incremental update of the database file implies incrementally building
the PrefixSet. Refactor the code to that end, taking advantage of the
change to get rid of the copy converting from SBAddPrefix to SBPrefix.
Change SBAddPrefix ordering to prefix-primary to remove the need to re-sort
things and get some bake time on the reordering.
BUG=351448
Review URL: https://codereview.chromium.org/200633002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@257735 0039d316-1c4b-4281-b951-d872f2087c98
9 files changed, 452 insertions, 314 deletions
diff --git a/chrome/browser/safe_browsing/prefix_set.cc b/chrome/browser/safe_browsing/prefix_set.cc index 006f17c..e830660 100644 --- a/chrome/browser/safe_browsing/prefix_set.cc +++ b/chrome/browser/safe_browsing/prefix_set.cc @@ -37,6 +37,41 @@ typedef struct { uint32 deltas_size; } FileHeader; +// Common std::vector<> implementations add capacity by multiplying from the +// current size (usually either by 2 or 1.5) to satisfy push_back() running in +// amortized constant time. This is not necessary for insert() at end(), but +// AFAICT it seems true for some implementations. SBPrefix values should +// uniformly cover the 32-bit space, so the final size can be estimated given a +// subset of the input. +// +// |kEstimateThreshold| is when estimates start converging. Results are strong +// starting around 1<<27. 1<<30 is chosen to prevent the degenerate case of +// resizing capacity from >50% to 100%. +// +// TODO(shess): I'm sure there is math in the world to describe good settings +// for estimating the size of a uniformly-distributed set of integers from a +// sorted subset. I do not have such math in me, so I assumed that my current +// organic database of prefixes was scale-free, and wrote a script to see how +// often given slop values would always suffice for given strides. At 1<<30, +// .5% slop was sufficient to cover all cases (though the code below uses 1%). +// +// TODO(shess): A smaller threshold uses less transient space in reallocation. +// 1<<30 uses between 125% and 150%, 1<<29 between 112% and 125%, etc. The cost +// is that a smaller threshold needs more slop (locked down for the long term). +// 1<<29 worked well with 1%, 1<<27 worked well with 2%. +const SBPrefix kEstimateThreshold = 1 << 30; +size_t EstimateFinalCount(SBPrefix current_prefix, size_t current_count) { + // estimated_count / current_count == estimated_max / current_prefix + // For large input sets, estimated_max of 2^32 is close enough. + const size_t estimated_prefix_count = static_cast<size_t>( + (static_cast<uint64>(current_count) << 32) / current_prefix); + + // The estimate has an error bar, if the final total is below the estimate, no + // harm, but if it is above a capacity resize will happen at nearly 100%. Add + // some slop to make sure all cases are covered. + return estimated_prefix_count + estimated_prefix_count / 100; +} + } // namespace namespace safe_browsing { @@ -47,56 +82,7 @@ bool PrefixSet::PrefixLess(const IndexPair& a, const IndexPair& b) { return a.first < b.first; } -PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) { - if (sorted_prefixes.size()) { - // Estimate the resulting vector sizes. There will be strictly - // more than |min_runs| entries in |index_|, but there generally - // aren't many forced breaks. - const size_t min_runs = sorted_prefixes.size() / kMaxRun; - index_.reserve(min_runs); - deltas_.reserve(sorted_prefixes.size() - min_runs); - - // Lead with the first prefix. - SBPrefix prev_prefix = sorted_prefixes[0]; - size_t run_length = 0; - index_.push_back(std::make_pair(prev_prefix, deltas_.size())); - - for (size_t i = 1; i < sorted_prefixes.size(); ++i) { - // Skip duplicates. - if (sorted_prefixes[i] == prev_prefix) - continue; - - // Calculate the delta. |unsigned| is mandatory, because the - // sorted_prefixes could be more than INT_MAX apart. - DCHECK_GT(sorted_prefixes[i], prev_prefix); - const unsigned delta = sorted_prefixes[i] - prev_prefix; - const uint16 delta16 = static_cast<uint16>(delta); - - // New index ref if the delta doesn't fit, or if too many - // consecutive deltas have been encoded. - if (delta != static_cast<unsigned>(delta16) || run_length >= kMaxRun) { - index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size())); - run_length = 0; - } else { - // Continue the run of deltas. - deltas_.push_back(delta16); - DCHECK_EQ(static_cast<unsigned>(deltas_.back()), delta); - ++run_length; - } - - prev_prefix = sorted_prefixes[i]; - } - - // Send up some memory-usage stats. Bits because fractional bytes - // are weird. - const size_t bits_used = index_.size() * sizeof(index_[0]) * CHAR_BIT + - deltas_.size() * sizeof(deltas_[0]) * CHAR_BIT; - const size_t unique_prefixes = index_.size() + deltas_.size(); - static const size_t kMaxBitsPerPrefix = sizeof(SBPrefix) * CHAR_BIT; - UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix", - bits_used / unique_prefixes, - kMaxBitsPerPrefix); - } +PrefixSet::PrefixSet() { } PrefixSet::PrefixSet(IndexVector* index, std::vector<uint16>* deltas) { @@ -158,29 +144,29 @@ void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) const { } // static -PrefixSet* PrefixSet::LoadFile(const base::FilePath& filter_name) { +scoped_ptr<PrefixSet> PrefixSet::LoadFile(const base::FilePath& filter_name) { int64 size_64; if (!base::GetFileSize(filter_name, &size_64)) - return NULL; + return scoped_ptr<PrefixSet>(); using base::MD5Digest; if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest))) - return NULL; + return scoped_ptr<PrefixSet>(); base::ScopedFILE file(base::OpenFile(filter_name, "rb")); if (!file.get()) - return NULL; + return scoped_ptr<PrefixSet>(); FileHeader header; size_t read = fread(&header, sizeof(header), 1, file.get()); if (read != 1) - return NULL; + return scoped_ptr<PrefixSet>(); // TODO(shess): Version 1 and 2 use the same file structure, with version 1 // data using a signed sort. For M-35, the data is re-sorted before return. // After M-35, just drop v1 support. <http://crbug.com/346405> if (header.magic != kMagic || (header.version != kVersion && header.version != 1)) { - return NULL; + return scoped_ptr<PrefixSet>(); } IndexVector index; @@ -193,7 +179,7 @@ PrefixSet* PrefixSet::LoadFile(const base::FilePath& filter_name) { const size_t expected_bytes = sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest); if (static_cast<int64>(expected_bytes) != size_64) - return NULL; + return scoped_ptr<PrefixSet>(); // The file looks valid, start building the digest. base::MD5Context context; @@ -208,7 +194,7 @@ PrefixSet* PrefixSet::LoadFile(const base::FilePath& filter_name) { index.resize(header.index_size); read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); if (read != index.size()) - return NULL; + return scoped_ptr<PrefixSet>(); base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&(index[0])), index_bytes)); @@ -219,7 +205,7 @@ PrefixSet* PrefixSet::LoadFile(const base::FilePath& filter_name) { deltas.resize(header.deltas_size); read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); if (read != deltas.size()) - return NULL; + return scoped_ptr<PrefixSet>(); base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&(deltas[0])), deltas_bytes)); @@ -231,21 +217,21 @@ PrefixSet* PrefixSet::LoadFile(const base::FilePath& filter_name) { base::MD5Digest file_digest; read = fread(&file_digest, sizeof(file_digest), 1, file.get()); if (read != 1) - return NULL; + return scoped_ptr<PrefixSet>(); if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) - return NULL; + return scoped_ptr<PrefixSet>(); // For version 1, fetch the prefixes and re-sort. if (header.version == 1) { std::vector<SBPrefix> prefixes; PrefixSet(&index, &deltas).GetPrefixes(&prefixes); std::sort(prefixes.begin(), prefixes.end()); - return new PrefixSet(prefixes); + return PrefixSetBuilder(prefixes).GetPrefixSet().Pass(); } // Steals contents of |index| and |deltas| via swap(). - return new PrefixSet(&index, &deltas); + return scoped_ptr<PrefixSet>(new PrefixSet(&index, &deltas)); } bool PrefixSet::WriteFile(const base::FilePath& filter_name) const { @@ -315,4 +301,97 @@ bool PrefixSet::WriteFile(const base::FilePath& filter_name) const { return true; } +void PrefixSet::AddRun(SBPrefix index_prefix, + const uint16* run_begin, const uint16* run_end) { + // Preempt organic capacity decisions for |delta_| once strong estimates can + // be made. + if (index_prefix > kEstimateThreshold && + deltas_.capacity() < deltas_.size() + (run_end - run_begin)) { + deltas_.reserve(EstimateFinalCount(index_prefix, deltas_.size())); + } + + index_.push_back(std::make_pair(index_prefix, deltas_.size())); + deltas_.insert(deltas_.end(), run_begin, run_end); +} + +PrefixSetBuilder::PrefixSetBuilder() + : prefix_set_(new PrefixSet()) { +} + +PrefixSetBuilder::PrefixSetBuilder(const std::vector<SBPrefix>& prefixes) + : prefix_set_(new PrefixSet()) { + for (size_t i = 0; i < prefixes.size(); ++i) { + AddPrefix(prefixes[i]); + } +} + +PrefixSetBuilder::~PrefixSetBuilder() { +} + +scoped_ptr<PrefixSet> PrefixSetBuilder::GetPrefixSet() { + DCHECK(prefix_set_.get()); + + // Flush runs until buffered data is gone. + while (!buffer_.empty()) { + EmitRun(); + } + + // Precisely size |index_| for read-only. It's 50k-60k, so minor savings, but + // they're almost free. + PrefixSet::IndexVector(prefix_set_->index_).swap(prefix_set_->index_); + + return prefix_set_.Pass(); +} + + +void PrefixSetBuilder::EmitRun() { + DCHECK(prefix_set_.get()); + + SBPrefix prev_prefix = buffer_[0]; + uint16 run[PrefixSet::kMaxRun]; + size_t run_pos = 0; + + size_t i; + for (i = 1; i < buffer_.size() && run_pos < PrefixSet::kMaxRun; ++i) { + // Calculate the delta. |unsigned| is mandatory, because the + // sorted_prefixes could be more than INT_MAX apart. + DCHECK_GT(buffer_[i], prev_prefix); + const unsigned delta = buffer_[i] - prev_prefix; + const uint16 delta16 = static_cast<uint16>(delta); + + // Break the run if the delta doesn't fit. + if (delta != static_cast<unsigned>(delta16)) + break; + + // Continue the run of deltas. + run[run_pos++] = delta16; + DCHECK_EQ(static_cast<unsigned>(run[run_pos - 1]), delta); + + prev_prefix = buffer_[i]; + } + prefix_set_->AddRun(buffer_[0], run, run + run_pos); + buffer_.erase(buffer_.begin(), buffer_.begin() + i); +} + +void PrefixSetBuilder::AddPrefix(SBPrefix prefix) { + DCHECK(prefix_set_.get()); + + if (buffer_.empty()) { + DCHECK(prefix_set_->index_.empty()); + DCHECK(prefix_set_->deltas_.empty()); + } else { + // Drop duplicates. + if (buffer_.back() == prefix) + return; + + DCHECK_LT(buffer_.back(), prefix); + } + buffer_.push_back(prefix); + + // Flush buffer when a run can be constructed. +1 for the index item, and +1 + // to leave at least one item in the buffer for dropping duplicates. + if (buffer_.size() > PrefixSet::kMaxRun + 2) + EmitRun(); +} + } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/prefix_set.h b/chrome/browser/safe_browsing/prefix_set.h index 997d6f3..7d02473 100644 --- a/chrome/browser/safe_browsing/prefix_set.h +++ b/chrome/browser/safe_browsing/prefix_set.h @@ -51,6 +51,7 @@ #include <vector> +#include "base/memory/scoped_ptr.h" #include "chrome/browser/safe_browsing/safe_browsing_util.h" namespace base { @@ -61,14 +62,13 @@ namespace safe_browsing { class PrefixSet { public: - explicit PrefixSet(const std::vector<SBPrefix>& sorted_prefixes); ~PrefixSet(); // |true| if |prefix| was in |prefixes| passed to the constructor. bool Exists(SBPrefix prefix) const; // Persist the set on disk. - static PrefixSet* LoadFile(const base::FilePath& filter_name); + static scoped_ptr<PrefixSet> LoadFile(const base::FilePath& filter_name); bool WriteFile(const base::FilePath& filter_name) const; // Regenerate the vector of prefixes passed to the constructor into @@ -76,6 +76,8 @@ class PrefixSet { void GetPrefixes(std::vector<SBPrefix>* prefixes) const; private: + friend class PrefixSetBuilder; + // Maximum number of consecutive deltas to encode before generating // a new index entry. This helps keep the worst-case performance // for |Exists()| under control. @@ -86,6 +88,14 @@ class PrefixSet { typedef std::vector<IndexPair> IndexVector; static bool PrefixLess(const IndexPair& a, const IndexPair& b); + // Helper to let |PrefixSetBuilder| add a run of data. |index_prefix| is + // added to |index_|, with the other elements added into |deltas_|. + void AddRun(SBPrefix index_prefix, + const uint16* run_begin, const uint16* run_end); + + // Used by |PrefixSetBuilder|. + PrefixSet(); + // Helper for |LoadFile()|. Steals the contents of |index| and // |deltas| using |swap()|. PrefixSet(IndexVector* index, std::vector<uint16>* deltas); @@ -104,6 +114,35 @@ class PrefixSet { DISALLOW_COPY_AND_ASSIGN(PrefixSet); }; +// Helper to incrementally build a PrefixSet from a stream of sorted prefixes. +class PrefixSetBuilder { + public: + PrefixSetBuilder(); + ~PrefixSetBuilder(); + + // Helper for unit tests and format conversion. + explicit PrefixSetBuilder(const std::vector<SBPrefix>& prefixes); + + // Add a prefix to the set. Prefixes must arrive in ascending order. + // Duplicate prefixes are dropped. + void AddPrefix(SBPrefix prefix); + + // Flush any buffered prefixes, and return the final PrefixSet instance. + // Any call other than the destructor is illegal after this call. + scoped_ptr<PrefixSet> GetPrefixSet(); + + private: + // Encode a run of deltas for |AddRun()|. The run is broken by a too-large + // delta, or kMaxRun, whichever comes first. + void EmitRun(); + + // Buffers prefixes until enough are avaliable to emit a run. + std::vector<SBPrefix> buffer_; + + // The PrefixSet being built. + scoped_ptr<PrefixSet> prefix_set_; +}; + } // namespace safe_browsing #endif // CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ diff --git a/chrome/browser/safe_browsing/prefix_set_unittest.cc b/chrome/browser/safe_browsing/prefix_set_unittest.cc index 36e670e..27b9ff5 100644 --- a/chrome/browser/safe_browsing/prefix_set_unittest.cc +++ b/chrome/browser/safe_browsing/prefix_set_unittest.cc @@ -95,8 +95,8 @@ class PrefixSetTest : public PlatformTest { base::FilePath filename = temp_dir_.path().AppendASCII("PrefixSetTest"); - safe_browsing::PrefixSet prefix_set(shared_prefixes_); - if (!prefix_set.WriteFile(filename)) + safe_browsing::PrefixSetBuilder builder(shared_prefixes_); + if (!builder.GetPrefixSet()->WriteFile(filename)) return false; *filenamep = filename; @@ -175,31 +175,33 @@ std::vector<SBPrefix> PrefixSetTest::shared_prefixes_; // Test that a small sparse random input works. TEST_F(PrefixSetTest, Baseline) { - safe_browsing::PrefixSet prefix_set(shared_prefixes_); - CheckPrefixes(prefix_set, shared_prefixes_); + safe_browsing::PrefixSetBuilder builder(shared_prefixes_); + CheckPrefixes(*builder.GetPrefixSet(), shared_prefixes_); } // Test that the empty set doesn't appear to have anything in it. TEST_F(PrefixSetTest, Empty) { const std::vector<SBPrefix> empty; - safe_browsing::PrefixSet prefix_set(empty); + safe_browsing::PrefixSetBuilder builder(empty); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = builder.GetPrefixSet(); for (size_t i = 0; i < shared_prefixes_.size(); ++i) { - EXPECT_FALSE(prefix_set.Exists(shared_prefixes_[i])); + EXPECT_FALSE(prefix_set->Exists(shared_prefixes_[i])); } } // Single-element set should work fine. TEST_F(PrefixSetTest, OneElement) { - const std::vector<SBPrefix> prefixes(100, 0); - safe_browsing::PrefixSet prefix_set(prefixes); - EXPECT_FALSE(prefix_set.Exists(-1)); - EXPECT_TRUE(prefix_set.Exists(prefixes[0])); - EXPECT_FALSE(prefix_set.Exists(1)); + const std::vector<SBPrefix> prefixes(100, 0u); + safe_browsing::PrefixSetBuilder builder(prefixes); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = builder.GetPrefixSet(); + EXPECT_FALSE(prefix_set->Exists(static_cast<SBPrefix>(-1))); + EXPECT_TRUE(prefix_set->Exists(prefixes[0])); + EXPECT_FALSE(prefix_set->Exists(1u)); // Check that |GetPrefixes()| returns the same set of prefixes as // was passed in. std::vector<SBPrefix> prefixes_copy; - prefix_set.GetPrefixes(&prefixes_copy); + prefix_set->GetPrefixes(&prefixes_copy); EXPECT_EQ(1U, prefixes_copy.size()); EXPECT_EQ(prefixes[0], prefixes_copy[0]); } @@ -220,12 +222,13 @@ TEST_F(PrefixSetTest, IntMinMax) { prefixes.push_back(0xFFFFFFFF); std::sort(prefixes.begin(), prefixes.end()); - safe_browsing::PrefixSet prefix_set(prefixes); + safe_browsing::PrefixSetBuilder builder(prefixes); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = builder.GetPrefixSet(); // Check that |GetPrefixes()| returns the same set of prefixes as // was passed in. std::vector<SBPrefix> prefixes_copy; - prefix_set.GetPrefixes(&prefixes_copy); + prefix_set->GetPrefixes(&prefixes_copy); ASSERT_EQ(prefixes_copy.size(), prefixes.size()); EXPECT_TRUE(std::equal(prefixes.begin(), prefixes.end(), prefixes_copy.begin())); @@ -242,12 +245,13 @@ TEST_F(PrefixSetTest, AllBig) { } std::sort(prefixes.begin(), prefixes.end()); - safe_browsing::PrefixSet prefix_set(prefixes); + safe_browsing::PrefixSetBuilder builder(prefixes); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = builder.GetPrefixSet(); // Check that |GetPrefixes()| returns the same set of prefixes as // was passed in. std::vector<SBPrefix> prefixes_copy; - prefix_set.GetPrefixes(&prefixes_copy); + prefix_set->GetPrefixes(&prefixes_copy); prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end()); EXPECT_EQ(prefixes_copy.size(), prefixes.size()); EXPECT_TRUE(std::equal(prefixes.begin(), prefixes.end(), @@ -296,29 +300,30 @@ TEST_F(PrefixSetTest, EdgeCases) { } std::sort(prefixes.begin(), prefixes.end()); - safe_browsing::PrefixSet prefix_set(prefixes); + safe_browsing::PrefixSetBuilder builder(prefixes); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = builder.GetPrefixSet(); // Check that |GetPrefixes()| returns the same set of prefixes as // was passed in. std::vector<SBPrefix> prefixes_copy; - prefix_set.GetPrefixes(&prefixes_copy); + prefix_set->GetPrefixes(&prefixes_copy); prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end()); EXPECT_EQ(prefixes_copy.size(), prefixes.size()); EXPECT_TRUE(std::equal(prefixes.begin(), prefixes.end(), prefixes_copy.begin())); // Items before and after the set are not present, and don't crash. - EXPECT_FALSE(prefix_set.Exists(kHighBitSet - 100)); - EXPECT_FALSE(prefix_set.Exists(kHighBitClear + 100)); + EXPECT_FALSE(prefix_set->Exists(kHighBitSet - 100)); + EXPECT_FALSE(prefix_set->Exists(kHighBitClear + 100)); // Check that the set correctly flags all of the inputs, and also // check items just above and below the inputs to make sure they // aren't present. for (size_t i = 0; i < prefixes.size(); ++i) { - EXPECT_TRUE(prefix_set.Exists(prefixes[i])); + EXPECT_TRUE(prefix_set->Exists(prefixes[i])); - EXPECT_FALSE(prefix_set.Exists(prefixes[i] - 1)); - EXPECT_FALSE(prefix_set.Exists(prefixes[i] + 1)); + EXPECT_FALSE(prefix_set->Exists(prefixes[i] - 1)); + EXPECT_FALSE(prefix_set->Exists(prefixes[i] + 1)); } } @@ -330,8 +335,8 @@ TEST_F(PrefixSetTest, ReadWrite) { // the prefixes. Leaves the path in |filename|. { ASSERT_TRUE(GetPrefixSetFile(&filename)); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_TRUE(prefix_set.get()); CheckPrefixes(*prefix_set, shared_prefixes_); } @@ -342,10 +347,11 @@ TEST_F(PrefixSetTest, ReadWrite) { prefixes.push_back(kHighBitClear); prefixes.push_back(kHighBitSet); - safe_browsing::PrefixSet prefix_set_to_write(prefixes); - ASSERT_TRUE(prefix_set_to_write.WriteFile(filename)); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + safe_browsing::PrefixSetBuilder builder(prefixes); + ASSERT_TRUE(builder.GetPrefixSet()->WriteFile(filename)); + + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_TRUE(prefix_set.get()); CheckPrefixes(*prefix_set, prefixes); } @@ -353,10 +359,11 @@ TEST_F(PrefixSetTest, ReadWrite) { // Test writing and reading an empty set. { std::vector<SBPrefix> prefixes; - safe_browsing::PrefixSet prefix_set_to_write(prefixes); - ASSERT_TRUE(prefix_set_to_write.WriteFile(filename)); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + safe_browsing::PrefixSetBuilder builder(prefixes); + ASSERT_TRUE(builder.GetPrefixSet()->WriteFile(filename)); + + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_TRUE(prefix_set.get()); CheckPrefixes(*prefix_set, prefixes); } @@ -371,8 +378,8 @@ TEST_F(PrefixSetTest, CorruptionHelpers) { base::ScopedFILE file(base::OpenFile(filename, "r+b")); IncrementIntAt(file.get(), kPayloadOffset, 1); file.reset(); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_FALSE(prefix_set.get()); // Fix up the checksum and it will read successfully (though the @@ -380,7 +387,7 @@ TEST_F(PrefixSetTest, CorruptionHelpers) { file.reset(base::OpenFile(filename, "r+b")); CleanChecksum(file.get()); file.reset(); - prefix_set.reset(safe_browsing::PrefixSet::LoadFile(filename)); + prefix_set = safe_browsing::PrefixSet::LoadFile(filename); ASSERT_TRUE(prefix_set.get()); } @@ -391,8 +398,8 @@ TEST_F(PrefixSetTest, CorruptionMagic) { ASSERT_NO_FATAL_FAILURE( ModifyAndCleanChecksum(filename, kMagicOffset, 1)); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_FALSE(prefix_set.get()); } @@ -403,8 +410,8 @@ TEST_F(PrefixSetTest, CorruptionVersion) { ASSERT_NO_FATAL_FAILURE( ModifyAndCleanChecksum(filename, kVersionOffset, 1)); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_FALSE(prefix_set.get()); } @@ -415,8 +422,8 @@ TEST_F(PrefixSetTest, CorruptionIndexSize) { ASSERT_NO_FATAL_FAILURE( ModifyAndCleanChecksum(filename, kIndexSizeOffset, 1)); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_FALSE(prefix_set.get()); } @@ -427,8 +434,8 @@ TEST_F(PrefixSetTest, CorruptionDeltasSize) { ASSERT_NO_FATAL_FAILURE( ModifyAndCleanChecksum(filename, kDeltasSizeOffset, 1)); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_FALSE(prefix_set.get()); } @@ -441,8 +448,8 @@ TEST_F(PrefixSetTest, CorruptionPayload) { base::ScopedFILE file(base::OpenFile(filename, "r+b")); ASSERT_NO_FATAL_FAILURE(IncrementIntAt(file.get(), 666, 1)); file.reset(); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_FALSE(prefix_set.get()); } @@ -457,8 +464,8 @@ TEST_F(PrefixSetTest, CorruptionDigest) { long digest_offset = static_cast<long>(size_64 - sizeof(base::MD5Digest)); ASSERT_NO_FATAL_FAILURE(IncrementIntAt(file.get(), digest_offset, 1)); file.reset(); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_FALSE(prefix_set.get()); } @@ -472,8 +479,8 @@ TEST_F(PrefixSetTest, CorruptionExcess) { const char buf[] = "im in ur base, killing ur d00dz."; ASSERT_EQ(strlen(buf), fwrite(buf, 1, strlen(buf), file.get())); file.reset(); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_FALSE(prefix_set.get()); } @@ -515,8 +522,8 @@ TEST_F(PrefixSetTest, SizeTRecovery) { CleanChecksum(file.get()); file.reset(); // Flush updates. - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_FALSE(prefix_set.get()); } @@ -561,8 +568,8 @@ TEST_F(PrefixSetTest, ReadWriteSigned) { CleanChecksum(file.get()); file.reset(); // Flush updates. - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(safe_browsing::PrefixSet::LoadFile(filename)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set = + safe_browsing::PrefixSet::LoadFile(filename); ASSERT_TRUE(prefix_set.get()); // |Exists()| uses |std::upper_bound()| to find a starting point, which diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index 100d8e2..2b5b2e2 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -606,9 +606,9 @@ void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) { if (base::GetFileInfo(side_effect_free_whitelist_filename_, &db_info) && db_info.size != 0) { const base::TimeTicks before = base::TimeTicks::Now(); - side_effect_free_whitelist_prefix_set_.reset( + side_effect_free_whitelist_prefix_set_ = safe_browsing::PrefixSet::LoadFile( - side_effect_free_whitelist_prefix_set_filename_)); + side_effect_free_whitelist_prefix_set_filename_); DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist " << "prefix set in " << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; @@ -1279,11 +1279,11 @@ void SafeBrowsingDatabaseNew::UpdateWhitelistStore( // hashes are already full. std::vector<SBAddFullHash> empty_add_hashes; - // Note: prefixes will not be empty. The current data store implementation + // Note: |builder| will not be empty. The current data store implementation // stores all full-length hashes as both full and prefix hashes. - SBAddPrefixes prefixes; + safe_browsing::PrefixSetBuilder builder; std::vector<SBAddFullHash> full_hashes; - if (!store->FinishUpdate(empty_add_hashes, &prefixes, &full_hashes)) { + if (!store->FinishUpdate(empty_add_hashes, &builder, &full_hashes)) { RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); WhitelistEverything(whitelist); return; @@ -1305,11 +1305,11 @@ int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( // These results are not used after this call. Simply ignore the // returned value after FinishUpdate(...). - SBAddPrefixes add_prefixes_result; + safe_browsing::PrefixSetBuilder builder; std::vector<SBAddFullHash> add_full_hashes_result; if (!store->FinishUpdate(empty_add_hashes, - &add_prefixes_result, + &builder, &add_full_hashes_result)) { RecordFailure(failure_type); } @@ -1351,28 +1351,14 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { const base::TimeTicks before = base::TimeTicks::Now(); - SBAddPrefixes add_prefixes; + safe_browsing::PrefixSetBuilder builder; std::vector<SBAddFullHash> add_full_hashes; if (!browse_store_->FinishUpdate(pending_add_hashes, - &add_prefixes, &add_full_hashes)) { + &builder, &add_full_hashes)) { RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); return; } - - // TODO(shess): If |add_prefixes| were sorted by the prefix, it - // could be passed directly to |PrefixSet()|, removing the need for - // |prefixes|. For now, |prefixes| is useful while debugging - // things. - std::vector<SBPrefix> prefixes; - prefixes.reserve(add_prefixes.size()); - for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); - iter != add_prefixes.end(); ++iter) { - prefixes.push_back(iter->prefix); - } - - std::sort(prefixes.begin(), prefixes.end()); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(new safe_browsing::PrefixSet(prefixes)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); // This needs to be in sorted order by prefix for efficient access. std::sort(add_full_hashes.begin(), add_full_hashes.end(), @@ -1395,7 +1381,7 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in " << (base::TimeTicks::Now() - before).InMilliseconds() - << " ms total. prefix count: " << add_prefixes.size(); + << " ms total."; UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); // Persist the prefix set to disk. Since only this thread changes @@ -1432,31 +1418,17 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { std::vector<SBAddFullHash> empty_add_hashes; - SBAddPrefixes add_prefixes; + safe_browsing::PrefixSetBuilder builder; std::vector<SBAddFullHash> add_full_hashes_result; if (!side_effect_free_whitelist_store_->FinishUpdate( empty_add_hashes, - &add_prefixes, + &builder, &add_full_hashes_result)) { RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); return; } - - // TODO(shess): If |add_prefixes| were sorted by the prefix, it - // could be passed directly to |PrefixSet()|, removing the need for - // |prefixes|. For now, |prefixes| is useful while debugging - // things. - std::vector<SBPrefix> prefixes; - prefixes.reserve(add_prefixes.size()); - for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); - iter != add_prefixes.end(); ++iter) { - prefixes.push_back(iter->prefix); - } - - std::sort(prefixes.begin(), prefixes.end()); - scoped_ptr<safe_browsing::PrefixSet> - prefix_set(new safe_browsing::PrefixSet(prefixes)); + scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); // Swap in the newly built prefix set. { @@ -1499,10 +1471,10 @@ void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { // Note: prefixes will not be empty. The current data store implementation // stores all full-length hashes as both full and prefix hashes. - SBAddPrefixes prefixes; + safe_browsing::PrefixSetBuilder builder; std::vector<SBAddFullHash> full_hashes; if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes, - &prefixes, &full_hashes)) { + &builder, &full_hashes)) { RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. return; @@ -1551,8 +1523,8 @@ void SafeBrowsingDatabaseNew::LoadPrefixSet() { base::DeleteFile(bloom_filter_filename, false); const base::TimeTicks before = base::TimeTicks::Now(); - browse_prefix_set_.reset(safe_browsing::PrefixSet::LoadFile( - browse_prefix_set_filename_)); + browse_prefix_set_ = safe_browsing::PrefixSet::LoadFile( + browse_prefix_set_filename_); DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in " << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before); diff --git a/chrome/browser/safe_browsing/safe_browsing_store.h b/chrome/browser/safe_browsing/safe_browsing_store.h index f804f1d..76cf28a 100644 --- a/chrome/browser/safe_browsing/safe_browsing_store.h +++ b/chrome/browser/safe_browsing/safe_browsing_store.h @@ -12,6 +12,7 @@ #include "base/callback_forward.h" #include "base/containers/hash_tables.h" #include "base/time/time.h" +#include "chrome/browser/safe_browsing/prefix_set.h" #include "chrome/browser/safe_browsing/safe_browsing_util.h" namespace base { @@ -104,16 +105,16 @@ struct SBSubFullHash { SBPrefix GetAddPrefix() const { return full_hash.prefix; } }; -// Determine less-than based on add chunk and prefix. +// Determine less-than based on prefix and add chunk. template <class T, class U> bool SBAddPrefixLess(const T& a, const U& b) { - if (a.GetAddChunkId() != b.GetAddChunkId()) - return a.GetAddChunkId() < b.GetAddChunkId(); + if (a.GetAddPrefix() != b.GetAddPrefix()) + return a.GetAddPrefix() < b.GetAddPrefix(); - return a.GetAddPrefix() < b.GetAddPrefix(); + return a.GetAddChunkId() < b.GetAddChunkId(); } -// Determine less-than based on add chunk, prefix, and full hash. +// Determine less-than based on prefix, add chunk, and full hash. // Prefix can compare differently than hash due to byte ordering, // so it must take precedence. template <class T, class U> @@ -232,7 +233,7 @@ class SafeBrowsingStore { // chunk to disk). virtual bool FinishUpdate( const std::vector<SBAddFullHash>& pending_adds, - SBAddPrefixes* add_prefixes_result, + safe_browsing::PrefixSetBuilder* builder, std::vector<SBAddFullHash>* add_full_hashes_result) = 0; // Cancel the update in process and remove any temporary disk diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.cc b/chrome/browser/safe_browsing/safe_browsing_store_file.cc index 95f7333..a879096 100644 --- a/chrome/browser/safe_browsing/safe_browsing_store_file.cc +++ b/chrome/browser/safe_browsing/safe_browsing_store_file.cc @@ -478,11 +478,11 @@ bool SafeBrowsingStoreFile::FinishChunk() { bool SafeBrowsingStoreFile::DoUpdate( const std::vector<SBAddFullHash>& pending_adds, - SBAddPrefixes* add_prefixes_result, + safe_browsing::PrefixSetBuilder* builder, std::vector<SBAddFullHash>* add_full_hashes_result) { DCHECK(file_.get() || empty_); DCHECK(new_file_.get()); - CHECK(add_prefixes_result); + CHECK(builder); CHECK(add_full_hashes_result); SBAddPrefixes add_prefixes; @@ -664,7 +664,10 @@ bool SafeBrowsingStoreFile::DoUpdate( UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", sub_prefixes.size()); // Pass the resulting data off to the caller. - add_prefixes_result->swap(add_prefixes); + for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); + iter != add_prefixes.end(); ++iter) { + builder->AddPrefix(iter->prefix); + } add_full_hashes_result->swap(add_full_hashes); return true; @@ -672,12 +675,12 @@ bool SafeBrowsingStoreFile::DoUpdate( bool SafeBrowsingStoreFile::FinishUpdate( const std::vector<SBAddFullHash>& pending_adds, - SBAddPrefixes* add_prefixes_result, + safe_browsing::PrefixSetBuilder* builder, std::vector<SBAddFullHash>* add_full_hashes_result) { - DCHECK(add_prefixes_result); + DCHECK(builder); DCHECK(add_full_hashes_result); - if (!DoUpdate(pending_adds, add_prefixes_result, add_full_hashes_result)) { + if (!DoUpdate(pending_adds, builder, add_full_hashes_result)) { CancelUpdate(); return false; } diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file.h b/chrome/browser/safe_browsing/safe_browsing_store_file.h index c704710..6b821b0 100644 --- a/chrome/browser/safe_browsing/safe_browsing_store_file.h +++ b/chrome/browser/safe_browsing/safe_browsing_store_file.h @@ -138,7 +138,7 @@ class SafeBrowsingStoreFile : public SafeBrowsingStore { // return |add_prefixes_result| and |add_full_hashes_result|. virtual bool FinishUpdate( const std::vector<SBAddFullHash>& pending_adds, - SBAddPrefixes* add_prefixes_result, + safe_browsing::PrefixSetBuilder* builder, std::vector<SBAddFullHash>* add_full_hashes_result) OVERRIDE; virtual bool CancelUpdate() OVERRIDE; @@ -169,7 +169,7 @@ class SafeBrowsingStoreFile : public SafeBrowsingStore { private: // Update store file with pending full hashes. virtual bool DoUpdate(const std::vector<SBAddFullHash>& pending_adds, - SBAddPrefixes* add_prefixes_result, + safe_browsing::PrefixSetBuilder* builder, std::vector<SBAddFullHash>* add_full_hashes_result); // Enumerate different format-change events for histogramming diff --git a/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc index fba3a3b..4f52241 100644 --- a/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc @@ -79,11 +79,11 @@ class SafeBrowsingStoreFileTest : public PlatformTest { EXPECT_FALSE(store_->CheckSubChunk(kAddChunk1)); std::vector<SBAddFullHash> pending_adds; - SBAddPrefixes add_prefixes_result; + safe_browsing::PrefixSetBuilder builder; std::vector<SBAddFullHash> add_full_hashes_result; EXPECT_TRUE(store_->FinishUpdate(pending_adds, - &add_prefixes_result, + &builder, &add_full_hashes_result)); } @@ -113,14 +113,17 @@ TEST_F(SafeBrowsingStoreFileTest, Empty) { EXPECT_FALSE(store_->CheckSubChunk(-1)); std::vector<SBAddFullHash> pending_adds; - SBAddPrefixes add_prefixes_result; + safe_browsing::PrefixSetBuilder builder; std::vector<SBAddFullHash> add_full_hashes_result; EXPECT_TRUE(store_->FinishUpdate(pending_adds, - &add_prefixes_result, + &builder, &add_full_hashes_result)); - EXPECT_TRUE(add_prefixes_result.empty()); EXPECT_TRUE(add_full_hashes_result.empty()); + + std::vector<SBPrefix> prefixes_result; + builder.GetPrefixSet()->GetPrefixes(&prefixes_result); + EXPECT_TRUE(prefixes_result.empty()); } // Write some prefix data to the store and verify that it looks like @@ -140,28 +143,26 @@ TEST_F(SafeBrowsingStoreFileTest, StorePrefix) { ASSERT_EQ(1U, chunks.size()); EXPECT_EQ(kSubChunk1, chunks[0]); - std::vector<SBAddFullHash> pending_adds; - SBAddPrefixes add_prefixes_result; - std::vector<SBAddFullHash> add_full_hashes_result; - - EXPECT_TRUE(store_->FinishUpdate(pending_adds, - &add_prefixes_result, - &add_full_hashes_result)); - - ASSERT_EQ(2U, add_prefixes_result.size()); - EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id); - EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix); - EXPECT_EQ(kAddChunk1, add_prefixes_result[1].chunk_id); - EXPECT_EQ(kHash2.prefix, add_prefixes_result[1].prefix); - - ASSERT_EQ(1U, add_full_hashes_result.size()); - EXPECT_EQ(kAddChunk1, add_full_hashes_result[0].chunk_id); - // EXPECT_TRUE(add_full_hashes_result[0].received == now)? - EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received); - EXPECT_TRUE(SBFullHashEqual(kHash2, add_full_hashes_result[0].full_hash)); + { + std::vector<SBAddFullHash> pending_adds; + safe_browsing::PrefixSetBuilder builder; + std::vector<SBAddFullHash> add_full_hashes_result; + EXPECT_TRUE(store_->FinishUpdate(pending_adds, + &builder, + &add_full_hashes_result)); - add_prefixes_result.clear(); - add_full_hashes_result.clear(); + std::vector<SBPrefix> prefixes_result; + builder.GetPrefixSet()->GetPrefixes(&prefixes_result); + ASSERT_EQ(2U, prefixes_result.size()); + EXPECT_EQ(kHash1.prefix, prefixes_result[0]); + EXPECT_EQ(kHash2.prefix, prefixes_result[1]); + + ASSERT_EQ(1U, add_full_hashes_result.size()); + EXPECT_EQ(kAddChunk1, add_full_hashes_result[0].chunk_id); + // EXPECT_TRUE(add_full_hashes_result[0].received == now)? + EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received); + EXPECT_TRUE(SBFullHashEqual(kHash2, add_full_hashes_result[0].full_hash)); + } EXPECT_TRUE(store_->BeginUpdate()); @@ -177,21 +178,26 @@ TEST_F(SafeBrowsingStoreFileTest, StorePrefix) { EXPECT_TRUE(store_->CheckAddChunk(kAddChunk1)); EXPECT_TRUE(store_->CheckSubChunk(kSubChunk1)); - EXPECT_TRUE(store_->FinishUpdate(pending_adds, - &add_prefixes_result, - &add_full_hashes_result)); + { + std::vector<SBAddFullHash> pending_adds; + safe_browsing::PrefixSetBuilder builder; + std::vector<SBAddFullHash> add_full_hashes_result; + EXPECT_TRUE(store_->FinishUpdate(pending_adds, + &builder, + &add_full_hashes_result)); - // Still has the expected contents. - ASSERT_EQ(2U, add_prefixes_result.size()); - EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id); - EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix); - EXPECT_EQ(kAddChunk1, add_prefixes_result[1].chunk_id); - EXPECT_EQ(kHash2.prefix, add_prefixes_result[1].prefix); - - ASSERT_EQ(1U, add_full_hashes_result.size()); - EXPECT_EQ(kAddChunk1, add_full_hashes_result[0].chunk_id); - EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received); - EXPECT_TRUE(SBFullHashEqual(kHash2, add_full_hashes_result[0].full_hash)); + // Still has the expected contents. + std::vector<SBPrefix> prefixes_result; + builder.GetPrefixSet()->GetPrefixes(&prefixes_result); + ASSERT_EQ(2U, prefixes_result.size()); + EXPECT_EQ(kHash1.prefix, prefixes_result[0]); + EXPECT_EQ(kHash2.prefix, prefixes_result[1]); + + ASSERT_EQ(1U, add_full_hashes_result.size()); + EXPECT_EQ(kAddChunk1, add_full_hashes_result[0].chunk_id); + EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received); + EXPECT_TRUE(SBFullHashEqual(kHash2, add_full_hashes_result[0].full_hash)); + } } // Test that subs knockout adds. @@ -211,21 +217,21 @@ TEST_F(SafeBrowsingStoreFileTest, SubKnockout) { EXPECT_TRUE(store_->WriteSubPrefix(kSubChunk1, kAddChunk1, kHash2.prefix)); EXPECT_TRUE(store_->FinishChunk()); - std::vector<SBAddFullHash> pending_adds; - SBAddPrefixes add_prefixes_result; - std::vector<SBAddFullHash> add_full_hashes_result; - - EXPECT_TRUE(store_->FinishUpdate(pending_adds, - &add_prefixes_result, - &add_full_hashes_result)); - - // Knocked out the chunk expected. - ASSERT_EQ(1U, add_prefixes_result.size()); - EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id); - EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix); - EXPECT_TRUE(add_full_hashes_result.empty()); + { + std::vector<SBAddFullHash> pending_adds; + safe_browsing::PrefixSetBuilder builder; + std::vector<SBAddFullHash> add_full_hashes_result; + EXPECT_TRUE(store_->FinishUpdate(pending_adds, + &builder, + &add_full_hashes_result)); - add_prefixes_result.clear(); + // Knocked out the chunk expected. + std::vector<SBPrefix> prefixes_result; + builder.GetPrefixSet()->GetPrefixes(&prefixes_result); + ASSERT_EQ(1U, prefixes_result.size()); + EXPECT_EQ(kHash1.prefix, prefixes_result[0]); + EXPECT_TRUE(add_full_hashes_result.empty()); + } EXPECT_TRUE(store_->BeginUpdate()); @@ -235,15 +241,20 @@ TEST_F(SafeBrowsingStoreFileTest, SubKnockout) { EXPECT_TRUE(store_->WriteAddPrefix(kAddChunk3, kHash3.prefix)); EXPECT_TRUE(store_->FinishChunk()); - EXPECT_TRUE(store_->FinishUpdate(pending_adds, - &add_prefixes_result, - &add_full_hashes_result)); - EXPECT_EQ(1U, add_prefixes_result.size()); - EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id); - EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix); - EXPECT_TRUE(add_full_hashes_result.empty()); + { + std::vector<SBAddFullHash> pending_adds; + safe_browsing::PrefixSetBuilder builder; + std::vector<SBAddFullHash> add_full_hashes_result; + EXPECT_TRUE(store_->FinishUpdate(pending_adds, + &builder, + &add_full_hashes_result)); - add_prefixes_result.clear(); + std::vector<SBPrefix> prefixes_result; + builder.GetPrefixSet()->GetPrefixes(&prefixes_result); + EXPECT_EQ(1U, prefixes_result.size()); + EXPECT_EQ(kHash1.prefix, prefixes_result[0]); + EXPECT_TRUE(add_full_hashes_result.empty()); + } EXPECT_TRUE(store_->BeginUpdate()); @@ -253,15 +264,21 @@ TEST_F(SafeBrowsingStoreFileTest, SubKnockout) { EXPECT_TRUE(store_->WriteAddPrefix(kAddChunk3, kHash3.prefix)); EXPECT_TRUE(store_->FinishChunk()); - EXPECT_TRUE(store_->FinishUpdate(pending_adds, - &add_prefixes_result, - &add_full_hashes_result)); - ASSERT_EQ(2U, add_prefixes_result.size()); - EXPECT_EQ(kAddChunk1, add_prefixes_result[0].chunk_id); - EXPECT_EQ(kHash1.prefix, add_prefixes_result[0].prefix); - EXPECT_EQ(kAddChunk3, add_prefixes_result[1].chunk_id); - EXPECT_EQ(kHash3.prefix, add_prefixes_result[1].prefix); - EXPECT_TRUE(add_full_hashes_result.empty()); + { + std::vector<SBAddFullHash> pending_adds; + safe_browsing::PrefixSetBuilder builder; + std::vector<SBAddFullHash> add_full_hashes_result; + EXPECT_TRUE(store_->FinishUpdate(pending_adds, + &builder, + &add_full_hashes_result)); + + std::vector<SBPrefix> prefixes_result; + builder.GetPrefixSet()->GetPrefixes(&prefixes_result); + ASSERT_EQ(2U, prefixes_result.size()); + EXPECT_EQ(kHash1.prefix, prefixes_result[0]); + EXPECT_EQ(kHash3.prefix, prefixes_result[1]); + EXPECT_TRUE(add_full_hashes_result.empty()); + } } // Test that deletes delete the chunk's data. @@ -312,21 +329,24 @@ TEST_F(SafeBrowsingStoreFileTest, DeleteChunks) { EXPECT_TRUE(store_->CheckSubChunk(kSubChunk1)); EXPECT_TRUE(store_->CheckSubChunk(kSubChunk2)); - std::vector<SBAddFullHash> pending_adds; - SBAddPrefixes add_prefixes_result; - std::vector<SBAddFullHash> add_full_hashes_result; + { + std::vector<SBAddFullHash> pending_adds; + safe_browsing::PrefixSetBuilder builder; + std::vector<SBAddFullHash> add_full_hashes_result; + EXPECT_TRUE(store_->FinishUpdate(pending_adds, + &builder, + &add_full_hashes_result)); - EXPECT_TRUE(store_->FinishUpdate(pending_adds, - &add_prefixes_result, - &add_full_hashes_result)); + std::vector<SBPrefix> prefixes_result; + builder.GetPrefixSet()->GetPrefixes(&prefixes_result); + EXPECT_EQ(1U, prefixes_result.size()); + EXPECT_EQ(kHash3.prefix, prefixes_result[0]); - EXPECT_EQ(1U, add_prefixes_result.size()); - EXPECT_EQ(kAddChunk2, add_prefixes_result[0].chunk_id); - EXPECT_EQ(kHash3.prefix, add_prefixes_result[0].prefix); - EXPECT_EQ(1U, add_full_hashes_result.size()); - EXPECT_EQ(kAddChunk2, add_full_hashes_result[0].chunk_id); - EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received); - EXPECT_TRUE(SBFullHashEqual(kHash3, add_full_hashes_result[0].full_hash)); + EXPECT_EQ(1U, add_full_hashes_result.size()); + EXPECT_EQ(kAddChunk2, add_full_hashes_result[0].chunk_id); + EXPECT_EQ(now.ToTimeT(), add_full_hashes_result[0].received); + EXPECT_TRUE(SBFullHashEqual(kHash3, add_full_hashes_result[0].full_hash)); + } // Expected chunks are there in another update. EXPECT_TRUE(store_->BeginUpdate()); @@ -339,11 +359,14 @@ TEST_F(SafeBrowsingStoreFileTest, DeleteChunks) { store_->DeleteAddChunk(kAddChunk2); store_->DeleteSubChunk(kSubChunk2); - add_prefixes_result.clear(); - add_full_hashes_result.clear(); - EXPECT_TRUE(store_->FinishUpdate(pending_adds, - &add_prefixes_result, - &add_full_hashes_result)); + { + std::vector<SBAddFullHash> pending_adds; + safe_browsing::PrefixSetBuilder builder; + std::vector<SBAddFullHash> add_full_hashes_result; + EXPECT_TRUE(store_->FinishUpdate(pending_adds, + &builder, + &add_full_hashes_result)); + } // Expect no more chunks. EXPECT_TRUE(store_->BeginUpdate()); @@ -351,13 +374,20 @@ TEST_F(SafeBrowsingStoreFileTest, DeleteChunks) { EXPECT_FALSE(store_->CheckAddChunk(kAddChunk2)); EXPECT_FALSE(store_->CheckSubChunk(kSubChunk1)); EXPECT_FALSE(store_->CheckSubChunk(kSubChunk2)); - add_prefixes_result.clear(); - add_full_hashes_result.clear(); - EXPECT_TRUE(store_->FinishUpdate(pending_adds, - &add_prefixes_result, - &add_full_hashes_result)); - EXPECT_TRUE(add_prefixes_result.empty()); - EXPECT_TRUE(add_full_hashes_result.empty()); + + { + std::vector<SBAddFullHash> pending_adds; + safe_browsing::PrefixSetBuilder builder; + std::vector<SBAddFullHash> add_full_hashes_result; + EXPECT_TRUE(store_->FinishUpdate(pending_adds, + &builder, + &add_full_hashes_result)); + + std::vector<SBPrefix> prefixes_result; + builder.GetPrefixSet()->GetPrefixes(&prefixes_result); + EXPECT_TRUE(prefixes_result.empty()); + EXPECT_TRUE(add_full_hashes_result.empty()); + } } // Test that deleting the store deletes the store. @@ -405,14 +435,18 @@ TEST_F(SafeBrowsingStoreFileTest, DetectsCorruption) { PopulateStore(base::Time::Now()); // Can successfully open and read the store. - std::vector<SBAddFullHash> pending_adds; - SBAddPrefixes orig_prefixes; - std::vector<SBAddFullHash> orig_hashes; - EXPECT_TRUE(store_->BeginUpdate()); - EXPECT_TRUE(store_->FinishUpdate(pending_adds, &orig_prefixes, &orig_hashes)); - EXPECT_GT(orig_prefixes.size(), 0U); - EXPECT_GT(orig_hashes.size(), 0U); - EXPECT_FALSE(corruption_detected_); + { + std::vector<SBAddFullHash> pending_adds; + std::vector<SBPrefix> orig_prefixes; + std::vector<SBAddFullHash> orig_hashes; + safe_browsing::PrefixSetBuilder builder; + EXPECT_TRUE(store_->BeginUpdate()); + EXPECT_TRUE(store_->FinishUpdate(pending_adds, &builder, &orig_hashes)); + builder.GetPrefixSet()->GetPrefixes(&orig_prefixes); + EXPECT_GT(orig_prefixes.size(), 0U); + EXPECT_GT(orig_hashes.size(), 0U); + EXPECT_FALSE(corruption_detected_); + } // Corrupt the store. base::ScopedFILE file(base::OpenFile(filename_, "rb+")); @@ -427,14 +461,15 @@ TEST_F(SafeBrowsingStoreFileTest, DetectsCorruption) { file.reset(); // Update fails and corruption callback is called. - SBAddPrefixes add_prefixes; std::vector<SBAddFullHash> add_hashes; corruption_detected_ = false; - EXPECT_TRUE(store_->BeginUpdate()); - EXPECT_FALSE(store_->FinishUpdate(pending_adds, &add_prefixes, &add_hashes)); - EXPECT_TRUE(corruption_detected_); - EXPECT_EQ(add_prefixes.size(), 0U); - EXPECT_EQ(add_hashes.size(), 0U); + { + std::vector<SBAddFullHash> pending_adds; + safe_browsing::PrefixSetBuilder builder; + EXPECT_TRUE(store_->BeginUpdate()); + EXPECT_FALSE(store_->FinishUpdate(pending_adds, &builder, &add_hashes)); + EXPECT_TRUE(corruption_detected_); + } // Make it look like there is a lot of add-chunks-seen data. const long kAddChunkCountOffset = 2 * sizeof(int32); diff --git a/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc b/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc index 9ccb0e1..199138e 100644 --- a/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc +++ b/chrome/browser/safe_browsing/safe_browsing_store_unittest.cc @@ -9,13 +9,15 @@ namespace { TEST(SafeBrowsingStoreTest, SBAddPrefixLess) { - // chunk_id then prefix. + // prefix dominates. + EXPECT_TRUE(SBAddPrefixLess(SBAddPrefix(11, 1), SBAddPrefix(10, 2))); + EXPECT_FALSE(SBAddPrefixLess(SBAddPrefix(10, 2), SBAddPrefix(11, 1))); + + // After prefix, chunk_id. EXPECT_TRUE(SBAddPrefixLess(SBAddPrefix(10, 1), SBAddPrefix(11, 1))); EXPECT_FALSE(SBAddPrefixLess(SBAddPrefix(11, 1), SBAddPrefix(10, 1))); - EXPECT_TRUE(SBAddPrefixLess(SBAddPrefix(10, 1), SBAddPrefix(10, 2))); - EXPECT_FALSE(SBAddPrefixLess(SBAddPrefix(10, 2), SBAddPrefix(10, 1))); - // Equal is not less. + // Equal is not less-than. EXPECT_FALSE(SBAddPrefixLess(SBAddPrefix(10, 1), SBAddPrefix(10, 1))); } @@ -36,19 +38,19 @@ TEST(SafeBrowsingStoreTest, SBAddPrefixHashLess) { const base::Time now = base::Time::Now(); - // add_id dominates. - EXPECT_TRUE(SBAddPrefixHashLess(SBAddFullHash(10, now, two), - SBAddFullHash(11, now, one))); + // prefix dominates. + EXPECT_TRUE(SBAddPrefixHashLess(SBAddFullHash(11, now, one), + SBAddFullHash(10, now, two))); EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(11, now, two), SBAddFullHash(10, now, one))); - // After add_id, prefix. + // After prefix, add_id. EXPECT_TRUE(SBAddPrefixHashLess(SBAddFullHash(10, now, one), - SBAddFullHash(10, now, two))); - EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(10, now, two), - SBAddFullHash(10, now, one))); + SBAddFullHash(11, now, onetwo))); + EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(11, now, one), + SBAddFullHash(10, now, onetwo))); - // After prefix, full hash. + // After add_id, full hash. EXPECT_TRUE(SBAddPrefixHashLess(SBAddFullHash(10, now, one), SBAddFullHash(10, now, onetwo))); EXPECT_FALSE(SBAddPrefixHashLess(SBAddFullHash(10, now, onetwo), @@ -60,13 +62,13 @@ TEST(SafeBrowsingStoreTest, SBAddPrefixHashLess) { } TEST(SafeBrowsingStoreTest, SBSubPrefixLess) { - // add_id dominates. - EXPECT_TRUE(SBAddPrefixLess(SBSubPrefix(12, 10, 2), SBSubPrefix(9, 11, 1))); + // prefix dominates. + EXPECT_TRUE(SBAddPrefixLess(SBSubPrefix(12, 11, 1), SBSubPrefix(9, 10, 2))); EXPECT_FALSE(SBAddPrefixLess(SBSubPrefix(12, 11, 2), SBSubPrefix(9, 10, 1))); - // After add_id, prefix. - EXPECT_TRUE(SBAddPrefixLess(SBSubPrefix(12, 10, 1), SBSubPrefix(9, 10, 2))); - EXPECT_FALSE(SBAddPrefixLess(SBSubPrefix(12, 10, 2), SBSubPrefix(9, 10, 1))); + // After prefix, add_id. + EXPECT_TRUE(SBAddPrefixLess(SBSubPrefix(12, 9, 1), SBSubPrefix(9, 10, 1))); + EXPECT_FALSE(SBAddPrefixLess(SBSubPrefix(12, 10, 1), SBSubPrefix(9, 9, 1))); // Equal is not less-than. EXPECT_FALSE(SBAddPrefixLess(SBSubPrefix(12, 10, 1), SBSubPrefix(12, 10, 1))); @@ -85,19 +87,19 @@ TEST(SafeBrowsingStoreTest, SBSubFullHashLess) { onetwo.full_hash[sizeof(SBPrefix)] = 2; two.prefix = 2; - // add_id dominates. - EXPECT_TRUE(SBAddPrefixHashLess(SBSubFullHash(12, 10, two), - SBSubFullHash(9, 11, one))); + // prefix dominates. + EXPECT_TRUE(SBAddPrefixHashLess(SBSubFullHash(12, 11, one), + SBSubFullHash(9, 10, two))); EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 11, two), SBSubFullHash(9, 10, one))); - // After add_id, prefix. + // After prefix, add_id. EXPECT_TRUE(SBAddPrefixHashLess(SBSubFullHash(12, 10, one), - SBSubFullHash(9, 10, two))); - EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 10, two), - SBSubFullHash(9, 10, one))); + SBSubFullHash(9, 11, onetwo))); + EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 11, one), + SBSubFullHash(9, 10, onetwo))); - // After prefix, full_hash. + // After add_id, full_hash. EXPECT_TRUE(SBAddPrefixHashLess(SBSubFullHash(12, 10, one), SBSubFullHash(9, 10, onetwo))); EXPECT_FALSE(SBAddPrefixHashLess(SBSubFullHash(12, 10, onetwo), |