diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-04 01:16:58 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-04 01:16:58 +0000 |
commit | cf50836f5fff555ab9b8d2e961029a31514ea239 (patch) | |
tree | 39e6f10ca0f7876888a637103a7747e607c6451f | |
parent | cff8c308df4b4f2388e7bd3cbe1b462228c12d4c (diff) | |
download | chromium_src-cf50836f5fff555ab9b8d2e961029a31514ea239.zip chromium_src-cf50836f5fff555ab9b8d2e961029a31514ea239.tar.gz chromium_src-cf50836f5fff555ab9b8d2e961029a31514ea239.tar.bz2 |
Additional validation code for PrefixSet.
The PrefixSet-vs-BloomFilter histograms showed a minor discrepency,
with a very small number of PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT
reports. This CL adds code to regenerate the prefix list and
manually double-check.
Additionally, reduce memory use by requiring the input prefix vector
to be pre-sorted.
BUG=71832
TEST=none
Review URL: http://codereview.chromium.org/6591087
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@76853 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/safe_browsing/prefix_set.cc | 40 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/prefix_set.h | 6 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/prefix_set_unittest.cc | 25 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database.cc | 76 |
4 files changed, 112 insertions, 35 deletions
diff --git a/chrome/browser/safe_browsing/prefix_set.cc b/chrome/browser/safe_browsing/prefix_set.cc index dae9578..9ddd123 100644 --- a/chrome/browser/safe_browsing/prefix_set.cc +++ b/chrome/browser/safe_browsing/prefix_set.cc @@ -22,30 +22,28 @@ bool PrefixLess(const std::pair<SBPrefix,size_t>& a, namespace safe_browsing { -PrefixSet::PrefixSet(const std::vector<SBPrefix>& prefixes) { - if (prefixes.size()) { - std::vector<SBPrefix> sorted(prefixes.begin(), prefixes.end()); - std::sort(sorted.begin(), sorted.end()); - +PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) { + if (sorted_prefixes.size()) { // Lead with the first prefix. - SBPrefix prev_prefix = sorted[0]; + SBPrefix prev_prefix = sorted_prefixes[0]; size_t run_length = 0; index_.push_back(std::make_pair(prev_prefix, deltas_.size())); - for (size_t i = 1; i < sorted.size(); ++i) { - // Don't encode zero deltas. - if (sorted[i] == prev_prefix) + for (size_t i = 1; i < sorted_prefixes.size(); ++i) { + // Skip duplicates. + if (sorted_prefixes[i] == prev_prefix) continue; // Calculate the delta. |unsigned| is mandatory, because the - // prefixes could be more than INT_MAX apart. - unsigned delta = sorted[i] - prev_prefix; + // sorted_prefixes could be more than INT_MAX apart. + DCHECK_GT(sorted_prefixes[i], prev_prefix); + unsigned delta = sorted_prefixes[i] - prev_prefix; // New index ref if the delta is too wide, or if too many // consecutive deltas have been encoded. Note that // |kMaxDelta| cannot itself be encoded. if (delta >= kMaxDelta || run_length >= kMaxRun) { - index_.push_back(std::make_pair(sorted[i], deltas_.size())); + index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size())); run_length = 0; } else { // Continue the run of deltas. @@ -53,7 +51,7 @@ PrefixSet::PrefixSet(const std::vector<SBPrefix>& prefixes) { ++run_length; } - prev_prefix = sorted[i]; + prev_prefix = sorted_prefixes[i]; } // Send up some memory-usage stats. Bits because fractional bytes @@ -103,4 +101,20 @@ bool PrefixSet::Exists(SBPrefix prefix) const { return current == prefix; } +void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) { + for (size_t ii = 0; ii < index_.size(); ++ii) { + // The deltas for this |index_| entry run to the next index entry, + // or the end of the deltas. + const size_t deltas_end = + (ii + 1 < index_.size()) ? index_[ii + 1].second : deltas_.size(); + + SBPrefix current = index_[ii].first; + prefixes->push_back(current); + for (size_t di = index_[ii].second; di < deltas_end; ++di) { + current += deltas_[di]; + prefixes->push_back(current); + } + } +} + } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/prefix_set.h b/chrome/browser/safe_browsing/prefix_set.h index 229b772..cb2cd9f 100644 --- a/chrome/browser/safe_browsing/prefix_set.h +++ b/chrome/browser/safe_browsing/prefix_set.h @@ -59,12 +59,16 @@ namespace safe_browsing { class PrefixSet { public: - explicit PrefixSet(const std::vector<SBPrefix>& prefixes); + explicit PrefixSet(const std::vector<SBPrefix>& sorted_prefixes); ~PrefixSet(); // |true| if |prefix| was in |prefixes| passed to the constructor. bool Exists(SBPrefix prefix) const; + // Regenerate the vector of prefixes passed to the constructor into + // |prefixes|. Prefixes will be added in sorted order. + void GetPrefixes(std::vector<SBPrefix>* prefixes); + private: // Maximum delta that can be encoded in a 16-bit unsigned. static const unsigned kMaxDelta = 256 * 256; diff --git a/chrome/browser/safe_browsing/prefix_set_unittest.cc b/chrome/browser/safe_browsing/prefix_set_unittest.cc index 9328b77..f7be5da 100644 --- a/chrome/browser/safe_browsing/prefix_set_unittest.cc +++ b/chrome/browser/safe_browsing/prefix_set_unittest.cc @@ -26,11 +26,19 @@ TEST(PrefixSetTest, Baseline) { prefixes.push_back(GenPrefix()); } + std::sort(prefixes.begin(), prefixes.end()); safe_browsing::PrefixSet prefix_set(prefixes); + // Check that |GetPrefixes()| returns exactly the same set of + // prefixes as was passed in. + std::set<SBPrefix> check(prefixes.begin(), prefixes.end()); + std::vector<SBPrefix> prefixes_copy; + prefix_set.GetPrefixes(&prefixes_copy); + EXPECT_EQ(prefixes_copy.size(), check.size()); + EXPECT_TRUE(std::equal(check.begin(), check.end(), prefixes_copy.begin())); + // Check that the set flags all of the inputs, and also check items // just above and below the inputs to make sure they aren't there. - std::set<SBPrefix> check(prefixes.begin(), prefixes.end()); for (size_t i = 0; i < prefixes.size(); ++i) { EXPECT_TRUE(prefix_set.Exists(prefixes[i])); @@ -45,7 +53,7 @@ TEST(PrefixSetTest, Baseline) { } // Test that the empty set doesn't appear to have anything in it. -TEST(PrefixSetTest, xEmpty) { +TEST(PrefixSetTest, Empty) { std::vector<SBPrefix> prefixes; safe_browsing::PrefixSet prefix_set(prefixes); for (size_t i = 0; i < 500; ++i) { @@ -97,11 +105,18 @@ TEST(PrefixSetTest, EdgeCases) { delta--; } - // Shuffle things up for giggles. - std::random_shuffle(prefixes.begin(), prefixes.end()); - + std::sort(prefixes.begin(), prefixes.end()); safe_browsing::PrefixSet prefix_set(prefixes); + // Check that |GetPrefixes()| returns the same set of prefixes as + // was passed in. + std::vector<SBPrefix> prefixes_copy; + prefix_set.GetPrefixes(&prefixes_copy); + prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end()); + EXPECT_EQ(prefixes_copy.size(), prefixes.size()); + EXPECT_TRUE(std::equal(prefixes.begin(), prefixes.end(), + prefixes_copy.begin())); + // Items before and after the set are not present, and don't crash. EXPECT_FALSE(prefix_set.Exists(kVeryNegative - 100)); EXPECT_FALSE(prefix_set.Exists(kVeryPositive + 100)); diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index c142907..00b16f7 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -178,16 +178,6 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { return a.full_hash.prefix < b.full_hash.prefix; } -// Create a |PrefixSet| from a vector of add-prefixes. -safe_browsing::PrefixSet* CreatePrefixSet( - const std::vector<SBAddPrefix>& add_prefixes) { - std::vector<SBPrefix> prefixes; - for (size_t i = 0; i < add_prefixes.size(); ++i) { - prefixes.push_back(add_prefixes[i].prefix); - } - return new safe_browsing::PrefixSet(prefixes); -} - // As compared to the bloom filter, PrefixSet should have these // properties: // - Any bloom filter miss should be a prefix set miss. @@ -198,6 +188,8 @@ enum PrefixSetEvent { PREFIX_SET_EVENT_HIT, PREFIX_SET_EVENT_BLOOM_HIT, PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT, + PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID, + PREFIX_SET_GETPREFIXES_BROKEN, // Memory space for histograms is determined by the max. ALWAYS ADD // NEW VALUES BEFORE THIS ONE. @@ -361,7 +353,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { BloomFilter::kBloomFilterSizeRatio); // TODO(shess): It is simpler for the code to assume that presence // of a bloom filter always implies presence of a prefix set. - prefix_set_.reset(CreatePrefixSet(std::vector<SBAddPrefix>())); + prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>())); } return true; @@ -392,6 +384,9 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( return false; DCHECK(prefix_set_.get()); + // Used to double-check in case of a hit mis-match. + std::vector<SBPrefix> restored; + size_t miss_count = 0; for (size_t i = 0; i < prefixes.size(); ++i) { bool found = prefix_set_->Exists(prefixes[i]); @@ -404,10 +399,26 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( if (prefix_miss_cache_.count(prefixes[i]) > 0) ++miss_count; } else { - // Bloom filter misses should never be in prefix set. + // Bloom filter misses should never be in prefix set. Re-create + // the original prefixes and manually search for it, to check if + // there's a bug with how |Exists()| is implemented. + // |UpdateBrowseStore()| previously verified that + // |GetPrefixes()| returns the same prefixes as were passed to + // the constructor. DCHECK(!found); - if (found) - RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT); + if (found) { + if (restored.empty()) + prefix_set_->GetPrefixes(&restored); + + // If the item is not in the re-created list, then there is an + // error in |PrefixSet::Exists()|. If the item is in the + // re-created list, then the bloom filter was wrong. + if (std::binary_search(restored.begin(), restored.end(), prefixes[i])) { + RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT); + } else { + RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID); + } + } } } @@ -843,8 +854,24 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { filter->Insert(add_prefixes[i].prefix); } + std::vector<SBPrefix> prefixes; + for (size_t i = 0; i < add_prefixes.size(); ++i) { + prefixes.push_back(add_prefixes[i].prefix); + } + std::sort(prefixes.begin(), prefixes.end()); scoped_ptr<safe_browsing::PrefixSet> - prefix_set(CreatePrefixSet(add_prefixes)); + prefix_set(new safe_browsing::PrefixSet(prefixes)); + + // Verify that |GetPrefixes()| returns the same set of prefixes as + // was passed to the constructor. + std::vector<SBPrefix> restored; + prefix_set->GetPrefixes(&restored); + prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end()); + if (restored.size() != prefixes.size() || + !std::equal(prefixes.begin(), prefixes.end(), restored.begin())) { + NOTREACHED(); + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); + } // This needs to be in sorted order by prefix for efficient access. std::sort(add_full_hashes.begin(), add_full_hashes.end(), @@ -952,7 +979,24 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() { // TODO(shess): Write/read for prefix set. std::vector<SBAddPrefix> add_prefixes; browse_store_->GetAddPrefixes(&add_prefixes); - prefix_set_.reset(CreatePrefixSet(add_prefixes)); + std::vector<SBPrefix> prefixes; + for (size_t i = 0; i < add_prefixes.size(); ++i) { + prefixes.push_back(add_prefixes[i].prefix); + } + std::sort(prefixes.begin(), prefixes.end()); + prefix_set_.reset(new safe_browsing::PrefixSet(prefixes)); + + // Double-check the prefixes so that the + // PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in + // ContainsBrowseUrl() can be trustworthy. + std::vector<SBPrefix> restored; + prefix_set_->GetPrefixes(&restored); + std::set<SBPrefix> unique(prefixes.begin(), prefixes.end()); + if (restored.size() != unique.size() || + !std::equal(unique.begin(), unique.end(), restored.begin())) { + NOTREACHED(); + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); + } } bool SafeBrowsingDatabaseNew::Delete() { |