diff options
author | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-30 23:19:08 +0000 |
---|---|---|
committer | shess@chromium.org <shess@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-30 23:19:08 +0000 |
commit | e32d4972fd0bdb2b0ee702bf7ebfd40eb946e8b4 (patch) | |
tree | ad64f56c60baf873fbbd4a2323f1b64632a5b308 /chrome/browser | |
parent | 769291df7f1134b22ce36f3cbc54f9b2e7094788 (diff) | |
download | chromium_src-e32d4972fd0bdb2b0ee702bf7ebfd40eb946e8b4.zip chromium_src-e32d4972fd0bdb2b0ee702bf7ebfd40eb946e8b4.tar.gz chromium_src-e32d4972fd0bdb2b0ee702bf7ebfd40eb946e8b4.tar.bz2 |
More PrefixSet diagnostics.
Info about where the unsortedness happens.
BUG=71832
TEST=I will monitor resulting histograms.
Review URL: http://codereview.chromium.org/6765035
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@79913 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser')
-rw-r--r-- | chrome/browser/safe_browsing/prefix_set.cc | 51 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/prefix_set.h | 16 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/prefix_set_unittest.cc | 21 | ||||
-rw-r--r-- | chrome/browser/safe_browsing/safe_browsing_database.cc | 47 |
4 files changed, 125 insertions, 10 deletions
diff --git a/chrome/browser/safe_browsing/prefix_set.cc b/chrome/browser/safe_browsing/prefix_set.cc index e72eb57..f2968a8 100644 --- a/chrome/browser/safe_browsing/prefix_set.cc +++ b/chrome/browser/safe_browsing/prefix_set.cc @@ -265,4 +265,55 @@ bool PrefixSet::WriteFile(const FilePath& filter_name) const { return true; } +size_t PrefixSet::IndexBinFor(size_t target_index) const { + // The items in |index_| have the logical index of each previous + // item in |index_| plus the count of deltas between the items. + // Since the indices into |deltas_| are absolute, the logical index + // is then the sum of the two indices. + size_t lo = 0; + size_t hi = index_.size(); + + // Binary search because linear search was too slow (really, the + // unit test sucked). Inline because the elements can't be compared + // in isolation (their position matters). + while (hi - lo > 1) { + const size_t i = (lo + hi) / 2; + + if (target_index < i + index_[i].second) { + DCHECK_LT(i, hi); // Always making progress. + hi = i; + } else { + DCHECK_GT(i, lo); // Always making progress. + lo = i; + } + } + return lo; +} + +size_t PrefixSet::GetSize() const { + return index_.size() + deltas_.size(); +} + +bool PrefixSet::IsDeltaAt(size_t target_index) const { + CHECK_LT(target_index, GetSize()); + + const size_t i = IndexBinFor(target_index); + return target_index > i + index_[i].second; +} + +uint16 PrefixSet::DeltaAt(size_t target_index) const { + CHECK_LT(target_index, GetSize()); + + // Find the |index_| entry which contains |target_index|. + const size_t i = IndexBinFor(target_index); + + // Exactly on the |index_| entry means no delta. + CHECK_GT(target_index, i + index_[i].second); + + // -i backs out the |index_| entries, -1 gets the delta that lead to + // the value at |target_index|. + CHECK_LT(target_index - i - 1, deltas_.size()); + return deltas_[target_index - i - 1]; +} + } // namespace safe_browsing diff --git a/chrome/browser/safe_browsing/prefix_set.h b/chrome/browser/safe_browsing/prefix_set.h index 025b163..f6d513b 100644 --- a/chrome/browser/safe_browsing/prefix_set.h +++ b/chrome/browser/safe_browsing/prefix_set.h @@ -74,6 +74,22 @@ class PrefixSet { // |prefixes|. Prefixes will be added in sorted order. void GetPrefixes(std::vector<SBPrefix>* prefixes) const; + // TODO(shess): The following are debugging accessors. Delete once + // the encoding problem is figured out. + + size_t IndexBinFor(size_t target_index) const; + + // The number of prefixes represented. + size_t GetSize() const; + + // Returns |true| if the element at |target_index| is between items in the + // |index_| array. + bool IsDeltaAt(size_t target_index) const; + + // Returns the delta used to calculate the element at + // |target_index|. Only call if |IsDeltaAt()| returned |true|. + uint16 DeltaAt(size_t target_index) const; + private: // Maximum number of consecutive deltas to encode before generating // a new index entry. This helps keep the worst-case performance diff --git a/chrome/browser/safe_browsing/prefix_set_unittest.cc b/chrome/browser/safe_browsing/prefix_set_unittest.cc index 7224e85..f0002b7f 100644 --- a/chrome/browser/safe_browsing/prefix_set_unittest.cc +++ b/chrome/browser/safe_browsing/prefix_set_unittest.cc @@ -432,4 +432,25 @@ TEST_F(PrefixSetTest, CorruptionExcess) { ASSERT_FALSE(prefix_set.get()); } +// TODO(shess): Remove once the problem is debugged. But, until then, +// make sure the accessors work! +TEST_F(PrefixSetTest, DebuggingAccessors) { + std::vector<SBPrefix> prefixes; + std::unique_copy(shared_prefixes_.begin(), shared_prefixes_.end(), + std::back_inserter(prefixes)); + safe_browsing::PrefixSet prefix_set(prefixes); + + EXPECT_EQ(prefixes.size(), prefix_set.GetSize()); + EXPECT_FALSE(prefix_set.IsDeltaAt(0)); + for (size_t i = 1; i < prefixes.size(); ++i) { + const int delta = prefixes[i] - prefixes[i - 1]; + if (delta > 0xFFFF) { + EXPECT_FALSE(prefix_set.IsDeltaAt(i)); + } else { + ASSERT_TRUE(prefix_set.IsDeltaAt(i)); + EXPECT_EQ(delta, prefix_set.DeltaAt(i)); + } + } +} + } // namespace diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc index 09854da..7c97b15 100644 --- a/chrome/browser/safe_browsing/safe_browsing_database.cc +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc @@ -221,6 +221,8 @@ enum PrefixSetEvent { PREFIX_SET_SBPREFIX_WAS_BROKEN, PREFIX_SET_GETPREFIXES_BROKEN_SORTING, PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION, + PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA, + PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX, // Memory space for histograms is determined by the max. ALWAYS ADD // NEW VALUES BEFORE THIS ONE. @@ -285,28 +287,53 @@ safe_browsing::PrefixSet* PrefixSetFromAddPrefixes( // Check whether |restored| is unsorted, or has duplication. if (restored.size()) { - bool unsorted = false; + size_t unsorted_count = 0; bool duplicates = false; - std::vector<SBPrefix>::const_iterator prev = restored.begin(); - for (std::vector<SBPrefix>::const_iterator iter = prev + 1; - iter != restored.end(); prev = iter, ++iter) { - if (*prev > *iter) - unsorted = true; - if (*prev == *iter) + SBPrefix prev = restored[0]; + for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) { + if (prev > restored[i]) { + unsorted_count++; + UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference", + prev - restored[i]); + + // When unsorted, how big is the set, and how far are we into + // it. If the set is very small or large, that might inform + // pursuit of a degenerate case. If the percentage is close + // to 0%, 100%, or 50%, then there might be an interesting + // degenerate case to explore. + UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size()); + UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent", + i * 100 / restored.size()); + + if (prefix_set->IsDeltaAt(i)) { + RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA); + + // Histograms require memory on the order of the number of + // buckets, making high-precision logging expensive. For + // now aim for a sense of the range of the problem. + UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta", + prefix_set->DeltaAt(i), 1, 0xFFFF, 50); + } else { + RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX); + } + } + if (prev == restored[i]) duplicates = true; } // Record findings. - if (unsorted) + if (unsorted_count) { RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING); + UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count); + } if (duplicates) RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION); // Fix the problems noted. If |restored| was unsorted, then // |duplicates| may give a false negative. - if (unsorted) + if (unsorted_count) std::sort(restored.begin(), restored.end()); - if (unsorted || duplicates) + if (unsorted_count || duplicates) restored.erase(std::unique(restored.begin(), restored.end()), restored.end()); } |