diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-04 01:16:58 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-04 01:16:58 +0000 |
commit | cf50836f5fff555ab9b8d2e961029a31514ea239 (patch) | |
tree | 39e6f10ca0f7876888a637103a7747e607c6451f /chrome/browser/safe_browsing/safe_browsing_database.cc | |
parent | cff8c308df4b4f2388e7bd3cbe1b462228c12d4c (diff) | |
download | chromium_src-cf50836f5fff555ab9b8d2e961029a31514ea239.zip chromium_src-cf50836f5fff555ab9b8d2e961029a31514ea239.tar.gz chromium_src-cf50836f5fff555ab9b8d2e961029a31514ea239.tar.bz2 |
Additional validation code for PrefixSet.
The PrefixSet-vs-BloomFilter histograms showed a minor discrepency,
with a very small number of PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT
reports. This CL adds code to regenerate the prefix list and
manually double-check.
Additionally, reduce memory use by requiring the input prefix vector
to be pre-sorted.
BUG=71832
TEST=none
Review URL: http://codereview.chromium.org/6591087
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@76853 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/safe_browsing/safe_browsing_database.cc')
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database.cc | 76 |
1 files changed, 60 insertions, 16 deletions
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index c142907..00b16f7 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -178,16 +178,6 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { return a.full_hash.prefix < b.full_hash.prefix; } -// Create a |PrefixSet| from a vector of add-prefixes. -safe_browsing::PrefixSet* CreatePrefixSet( - const std::vector<SBAddPrefix>& add_prefixes) { - std::vector<SBPrefix> prefixes; - for (size_t i = 0; i < add_prefixes.size(); ++i) { - prefixes.push_back(add_prefixes[i].prefix); - } - return new safe_browsing::PrefixSet(prefixes); -} - // As compared to the bloom filter, PrefixSet should have these // properties: // - Any bloom filter miss should be a prefix set miss. @@ -198,6 +188,8 @@ enum PrefixSetEvent { PREFIX_SET_EVENT_HIT, PREFIX_SET_EVENT_BLOOM_HIT, PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT, + PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID, + PREFIX_SET_GETPREFIXES_BROKEN, // Memory space for histograms is determined by the max. ALWAYS ADD // NEW VALUES BEFORE THIS ONE. @@ -361,7 +353,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { BloomFilter::kBloomFilterSizeRatio); // TODO(shess): It is simpler for the code to assume that presence // of a bloom filter always implies presence of a prefix set. - prefix_set_.reset(CreatePrefixSet(std::vector<SBAddPrefix>())); + prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>())); } return true; @@ -392,6 +384,9 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( return false; DCHECK(prefix_set_.get()); + // Used to double-check in case of a hit mis-match. + std::vector<SBPrefix> restored; + size_t miss_count = 0; for (size_t i = 0; i < prefixes.size(); ++i) { bool found = prefix_set_->Exists(prefixes[i]); @@ -404,10 +399,26 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( if (prefix_miss_cache_.count(prefixes[i]) > 0) ++miss_count; } else { - // Bloom filter misses should never be in prefix set. + // Bloom filter misses should never be in prefix set. Re-create + // the original prefixes and manually search for it, to check if + // there's a bug with how |Exists()| is implemented. + // |UpdateBrowseStore()| previously verified that + // |GetPrefixes()| returns the same prefixes as were passed to + // the constructor. DCHECK(!found); - if (found) - RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT); + if (found) { + if (restored.empty()) + prefix_set_->GetPrefixes(&restored); + + // If the item is not in the re-created list, then there is an + // error in |PrefixSet::Exists()|. If the item is in the + // re-created list, then the bloom filter was wrong. + if (std::binary_search(restored.begin(), restored.end(), prefixes[i])) { + RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT); + } else { + RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID); + } + } } } @@ -843,8 +854,24 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { filter->Insert(add_prefixes[i].prefix); } + std::vector<SBPrefix> prefixes; + for (size_t i = 0; i < add_prefixes.size(); ++i) { + prefixes.push_back(add_prefixes[i].prefix); + } + std::sort(prefixes.begin(), prefixes.end()); scoped_ptr<safe_browsing::PrefixSet> - prefix_set(CreatePrefixSet(add_prefixes)); + prefix_set(new safe_browsing::PrefixSet(prefixes)); + + // Verify that |GetPrefixes()| returns the same set of prefixes as + // was passed to the constructor. + std::vector<SBPrefix> restored; + prefix_set->GetPrefixes(&restored); + prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end()); + if (restored.size() != prefixes.size() || + !std::equal(prefixes.begin(), prefixes.end(), restored.begin())) { + NOTREACHED(); + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); + } // This needs to be in sorted order by prefix for efficient access. std::sort(add_full_hashes.begin(), add_full_hashes.end(), @@ -952,7 +979,24 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() { // TODO(shess): Write/read for prefix set. std::vector<SBAddPrefix> add_prefixes; browse_store_->GetAddPrefixes(&add_prefixes); - prefix_set_.reset(CreatePrefixSet(add_prefixes)); + std::vector<SBPrefix> prefixes; + for (size_t i = 0; i < add_prefixes.size(); ++i) { + prefixes.push_back(add_prefixes[i].prefix); + } + std::sort(prefixes.begin(), prefixes.end()); + prefix_set_.reset(new safe_browsing::PrefixSet(prefixes)); + + // Double-check the prefixes so that the + // PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in + // ContainsBrowseUrl() can be trustworthy. + std::vector<SBPrefix> restored; + prefix_set_->GetPrefixes(&restored); + std::set<SBPrefix> unique(prefixes.begin(), prefixes.end()); + if (restored.size() != unique.size() || + !std::equal(unique.begin(), unique.end(), restored.begin())) { + NOTREACHED(); + RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); + } } bool SafeBrowsingDatabaseNew::Delete() { |