summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorshess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-03-04 01:16:58 +0000
committershess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-03-04 01:16:58 +0000
commitcf50836f5fff555ab9b8d2e961029a31514ea239 (patch)
tree39e6f10ca0f7876888a637103a7747e607c6451f
parentcff8c308df4b4f2388e7bd3cbe1b462228c12d4c (diff)
downloadchromium_src-cf50836f5fff555ab9b8d2e961029a31514ea239.zip
chromium_src-cf50836f5fff555ab9b8d2e961029a31514ea239.tar.gz
chromium_src-cf50836f5fff555ab9b8d2e961029a31514ea239.tar.bz2
Additional validation code for PrefixSet.
The PrefixSet-vs-BloomFilter histograms showed a minor discrepency, with a very small number of PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT reports. This CL adds code to regenerate the prefix list and manually double-check. Additionally, reduce memory use by requiring the input prefix vector to be pre-sorted. BUG=71832 TEST=none Review URL: http://codereview.chromium.org/6591087 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@76853 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/browser/safe_browsing/prefix_set.cc40
-rw-r--r--chrome/browser/safe_browsing/prefix_set.h6
-rw-r--r--chrome/browser/safe_browsing/prefix_set_unittest.cc25
-rw-r--r--chrome/browser/safe_browsing/safe_browsing_database.cc76
4 files changed, 112 insertions, 35 deletions
diff --git a/chrome/browser/safe_browsing/prefix_set.cc b/chrome/browser/safe_browsing/prefix_set.cc
index dae9578..9ddd123 100644
--- a/chrome/browser/safe_browsing/prefix_set.cc
+++ b/chrome/browser/safe_browsing/prefix_set.cc
@@ -22,30 +22,28 @@ bool PrefixLess(const std::pair<SBPrefix,size_t>& a,
namespace safe_browsing {
-PrefixSet::PrefixSet(const std::vector<SBPrefix>& prefixes) {
- if (prefixes.size()) {
- std::vector<SBPrefix> sorted(prefixes.begin(), prefixes.end());
- std::sort(sorted.begin(), sorted.end());
-
+PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) {
+ if (sorted_prefixes.size()) {
// Lead with the first prefix.
- SBPrefix prev_prefix = sorted[0];
+ SBPrefix prev_prefix = sorted_prefixes[0];
size_t run_length = 0;
index_.push_back(std::make_pair(prev_prefix, deltas_.size()));
- for (size_t i = 1; i < sorted.size(); ++i) {
- // Don't encode zero deltas.
- if (sorted[i] == prev_prefix)
+ for (size_t i = 1; i < sorted_prefixes.size(); ++i) {
+ // Skip duplicates.
+ if (sorted_prefixes[i] == prev_prefix)
continue;
// Calculate the delta. |unsigned| is mandatory, because the
- // prefixes could be more than INT_MAX apart.
- unsigned delta = sorted[i] - prev_prefix;
+ // sorted_prefixes could be more than INT_MAX apart.
+ DCHECK_GT(sorted_prefixes[i], prev_prefix);
+ unsigned delta = sorted_prefixes[i] - prev_prefix;
// New index ref if the delta is too wide, or if too many
// consecutive deltas have been encoded. Note that
// |kMaxDelta| cannot itself be encoded.
if (delta >= kMaxDelta || run_length >= kMaxRun) {
- index_.push_back(std::make_pair(sorted[i], deltas_.size()));
+ index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size()));
run_length = 0;
} else {
// Continue the run of deltas.
@@ -53,7 +51,7 @@ PrefixSet::PrefixSet(const std::vector<SBPrefix>& prefixes) {
++run_length;
}
- prev_prefix = sorted[i];
+ prev_prefix = sorted_prefixes[i];
}
// Send up some memory-usage stats. Bits because fractional bytes
@@ -103,4 +101,20 @@ bool PrefixSet::Exists(SBPrefix prefix) const {
return current == prefix;
}
+void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) {
+ for (size_t ii = 0; ii < index_.size(); ++ii) {
+ // The deltas for this |index_| entry run to the next index entry,
+ // or the end of the deltas.
+ const size_t deltas_end =
+ (ii + 1 < index_.size()) ? index_[ii + 1].second : deltas_.size();
+
+ SBPrefix current = index_[ii].first;
+ prefixes->push_back(current);
+ for (size_t di = index_[ii].second; di < deltas_end; ++di) {
+ current += deltas_[di];
+ prefixes->push_back(current);
+ }
+ }
+}
+
} // namespace safe_browsing
diff --git a/chrome/browser/safe_browsing/prefix_set.h b/chrome/browser/safe_browsing/prefix_set.h
index 229b772..cb2cd9f 100644
--- a/chrome/browser/safe_browsing/prefix_set.h
+++ b/chrome/browser/safe_browsing/prefix_set.h
@@ -59,12 +59,16 @@ namespace safe_browsing {
class PrefixSet {
public:
- explicit PrefixSet(const std::vector<SBPrefix>& prefixes);
+ explicit PrefixSet(const std::vector<SBPrefix>& sorted_prefixes);
~PrefixSet();
// |true| if |prefix| was in |prefixes| passed to the constructor.
bool Exists(SBPrefix prefix) const;
+ // Regenerate the vector of prefixes passed to the constructor into
+ // |prefixes|. Prefixes will be added in sorted order.
+ void GetPrefixes(std::vector<SBPrefix>* prefixes);
+
private:
// Maximum delta that can be encoded in a 16-bit unsigned.
static const unsigned kMaxDelta = 256 * 256;
diff --git a/chrome/browser/safe_browsing/prefix_set_unittest.cc b/chrome/browser/safe_browsing/prefix_set_unittest.cc
index 9328b77..f7be5da 100644
--- a/chrome/browser/safe_browsing/prefix_set_unittest.cc
+++ b/chrome/browser/safe_browsing/prefix_set_unittest.cc
@@ -26,11 +26,19 @@ TEST(PrefixSetTest, Baseline) {
prefixes.push_back(GenPrefix());
}
+ std::sort(prefixes.begin(), prefixes.end());
safe_browsing::PrefixSet prefix_set(prefixes);
+ // Check that |GetPrefixes()| returns exactly the same set of
+ // prefixes as was passed in.
+ std::set<SBPrefix> check(prefixes.begin(), prefixes.end());
+ std::vector<SBPrefix> prefixes_copy;
+ prefix_set.GetPrefixes(&prefixes_copy);
+ EXPECT_EQ(prefixes_copy.size(), check.size());
+ EXPECT_TRUE(std::equal(check.begin(), check.end(), prefixes_copy.begin()));
+
// Check that the set flags all of the inputs, and also check items
// just above and below the inputs to make sure they aren't there.
- std::set<SBPrefix> check(prefixes.begin(), prefixes.end());
for (size_t i = 0; i < prefixes.size(); ++i) {
EXPECT_TRUE(prefix_set.Exists(prefixes[i]));
@@ -45,7 +53,7 @@ TEST(PrefixSetTest, Baseline) {
}
// Test that the empty set doesn't appear to have anything in it.
-TEST(PrefixSetTest, xEmpty) {
+TEST(PrefixSetTest, Empty) {
std::vector<SBPrefix> prefixes;
safe_browsing::PrefixSet prefix_set(prefixes);
for (size_t i = 0; i < 500; ++i) {
@@ -97,11 +105,18 @@ TEST(PrefixSetTest, EdgeCases) {
delta--;
}
- // Shuffle things up for giggles.
- std::random_shuffle(prefixes.begin(), prefixes.end());
-
+ std::sort(prefixes.begin(), prefixes.end());
safe_browsing::PrefixSet prefix_set(prefixes);
+ // Check that |GetPrefixes()| returns the same set of prefixes as
+ // was passed in.
+ std::vector<SBPrefix> prefixes_copy;
+ prefix_set.GetPrefixes(&prefixes_copy);
+ prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end());
+ EXPECT_EQ(prefixes_copy.size(), prefixes.size());
+ EXPECT_TRUE(std::equal(prefixes.begin(), prefixes.end(),
+ prefixes_copy.begin()));
+
// Items before and after the set are not present, and don't crash.
EXPECT_FALSE(prefix_set.Exists(kVeryNegative - 100));
EXPECT_FALSE(prefix_set.Exists(kVeryPositive + 100));
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
index c142907..00b16f7 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
@@ -178,16 +178,6 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
return a.full_hash.prefix < b.full_hash.prefix;
}
-// Create a |PrefixSet| from a vector of add-prefixes.
-safe_browsing::PrefixSet* CreatePrefixSet(
- const std::vector<SBAddPrefix>& add_prefixes) {
- std::vector<SBPrefix> prefixes;
- for (size_t i = 0; i < add_prefixes.size(); ++i) {
- prefixes.push_back(add_prefixes[i].prefix);
- }
- return new safe_browsing::PrefixSet(prefixes);
-}
-
// As compared to the bloom filter, PrefixSet should have these
// properties:
// - Any bloom filter miss should be a prefix set miss.
@@ -198,6 +188,8 @@ enum PrefixSetEvent {
PREFIX_SET_EVENT_HIT,
PREFIX_SET_EVENT_BLOOM_HIT,
PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
+ PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
+ PREFIX_SET_GETPREFIXES_BROKEN,
// Memory space for histograms is determined by the max. ALWAYS ADD
// NEW VALUES BEFORE THIS ONE.
@@ -361,7 +353,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() {
BloomFilter::kBloomFilterSizeRatio);
// TODO(shess): It is simpler for the code to assume that presence
// of a bloom filter always implies presence of a prefix set.
- prefix_set_.reset(CreatePrefixSet(std::vector<SBAddPrefix>()));
+ prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
}
return true;
@@ -392,6 +384,9 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
return false;
DCHECK(prefix_set_.get());
+ // Used to double-check in case of a hit mis-match.
+ std::vector<SBPrefix> restored;
+
size_t miss_count = 0;
for (size_t i = 0; i < prefixes.size(); ++i) {
bool found = prefix_set_->Exists(prefixes[i]);
@@ -404,10 +399,26 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
if (prefix_miss_cache_.count(prefixes[i]) > 0)
++miss_count;
} else {
- // Bloom filter misses should never be in prefix set.
+ // Bloom filter misses should never be in prefix set. Re-create
+ // the original prefixes and manually search for it, to check if
+ // there's a bug with how |Exists()| is implemented.
+ // |UpdateBrowseStore()| previously verified that
+ // |GetPrefixes()| returns the same prefixes as were passed to
+ // the constructor.
DCHECK(!found);
- if (found)
- RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
+ if (found) {
+ if (restored.empty())
+ prefix_set_->GetPrefixes(&restored);
+
+ // If the item is not in the re-created list, then there is an
+ // error in |PrefixSet::Exists()|. If the item is in the
+ // re-created list, then the bloom filter was wrong.
+ if (std::binary_search(restored.begin(), restored.end(), prefixes[i])) {
+ RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
+ } else {
+ RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
+ }
+ }
}
}
@@ -843,8 +854,24 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
filter->Insert(add_prefixes[i].prefix);
}
+ std::vector<SBPrefix> prefixes;
+ for (size_t i = 0; i < add_prefixes.size(); ++i) {
+ prefixes.push_back(add_prefixes[i].prefix);
+ }
+ std::sort(prefixes.begin(), prefixes.end());
scoped_ptr<safe_browsing::PrefixSet>
- prefix_set(CreatePrefixSet(add_prefixes));
+ prefix_set(new safe_browsing::PrefixSet(prefixes));
+
+ // Verify that |GetPrefixes()| returns the same set of prefixes as
+ // was passed to the constructor.
+ std::vector<SBPrefix> restored;
+ prefix_set->GetPrefixes(&restored);
+ prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end());
+ if (restored.size() != prefixes.size() ||
+ !std::equal(prefixes.begin(), prefixes.end(), restored.begin())) {
+ NOTREACHED();
+ RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
+ }
// This needs to be in sorted order by prefix for efficient access.
std::sort(add_full_hashes.begin(), add_full_hashes.end(),
@@ -952,7 +979,24 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() {
// TODO(shess): Write/read for prefix set.
std::vector<SBAddPrefix> add_prefixes;
browse_store_->GetAddPrefixes(&add_prefixes);
- prefix_set_.reset(CreatePrefixSet(add_prefixes));
+ std::vector<SBPrefix> prefixes;
+ for (size_t i = 0; i < add_prefixes.size(); ++i) {
+ prefixes.push_back(add_prefixes[i].prefix);
+ }
+ std::sort(prefixes.begin(), prefixes.end());
+ prefix_set_.reset(new safe_browsing::PrefixSet(prefixes));
+
+ // Double-check the prefixes so that the
+ // PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
+ // ContainsBrowseUrl() can be trustworthy.
+ std::vector<SBPrefix> restored;
+ prefix_set_->GetPrefixes(&restored);
+ std::set<SBPrefix> unique(prefixes.begin(), prefixes.end());
+ if (restored.size() != unique.size() ||
+ !std::equal(unique.begin(), unique.end(), restored.begin())) {
+ NOTREACHED();
+ RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
+ }
}
bool SafeBrowsingDatabaseNew::Delete() {