diff options
author | jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-11-01 23:40:51 +0000 |
---|---|---|
committer | jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-11-01 23:40:51 +0000 |
commit | 022001851f2f6be890f9b44c49fa45ea38fa80b2 (patch) | |
tree | 9ea0cc9ac76b7002421110a5dee0ae60082740aa /base/metrics | |
parent | ada794814ca34a4761b21b1c2562cc75de22ff52 (diff) | |
download | chromium_src-022001851f2f6be890f9b44c49fa45ea38fa80b2.zip chromium_src-022001851f2f6be890f9b44c49fa45ea38fa80b2.tar.gz chromium_src-022001851f2f6be890f9b44c49fa45ea38fa80b2.tar.bz2 |
Try to detect internal corruption of histogram instances.
Corruptions can include changes in bucket boundaries,
large changes in sample counts (in a bucket), etc.
We now detect problems, and don't forward the corrupt
data any further. This means it won't exit the renderer
and go to the browser if corrupt, and it won't exit
the browser and be sent up via UMA if corrupt.
IF the would-be corruption is caused by a race to
snapshot the data, then a later snapshot should get
the clean copy, and all data (across the precluded
period) will be sent onward.
BUG=61281
r=mbelshe
Review URL: http://codereview.chromium.org/4174002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@64687 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/metrics')
-rw-r--r-- | base/metrics/histogram.cc | 106 | ||||
-rw-r--r-- | base/metrics/histogram.h | 61 | ||||
-rw-r--r-- | base/metrics/histogram_unittest.cc | 53 |
3 files changed, 206 insertions, 14 deletions
diff --git a/base/metrics/histogram.cc b/base/metrics/histogram.cc index e07b3a8..2003f25 100644 --- a/base/metrics/histogram.cc +++ b/base/metrics/histogram.cc @@ -61,6 +61,7 @@ Histogram::Histogram(const std::string& name, Sample minimum, bucket_count_(bucket_count), flags_(kNoFlags), ranges_(bucket_count + 1, 0), + range_checksum_(0), sample_() { Initialize(); } @@ -73,6 +74,7 @@ Histogram::Histogram(const std::string& name, TimeDelta minimum, bucket_count_(bucket_count), flags_(kNoFlags), ranges_(bucket_count + 1, 0), + range_checksum_(0), sample_() { Initialize(); } @@ -86,6 +88,7 @@ Histogram::~Histogram() { // Just to make sure most derived class did this properly... DCHECK(ValidateBucketRanges()); + DCHECK(HasValidRangeChecksum()); } bool Histogram::PrintEmptyBucket(size_t index) const { @@ -218,7 +221,7 @@ void Histogram::Initialize() { // We have to be careful that we don't pick a ratio between starting points in // consecutive buckets that is sooo small, that the integer bounds are the same // (effectively making one bucket get no values). We need to avoid: -// (ranges_[i] == ranges_[i + 1] +// ranges_[i] == ranges_[i + 1] // To avoid that, we just do a fine-grained bucket width as far as we need to // until we get a ratio that moves us along at least 2 units at a time. From // that bucket onward we do use the exponential growth of buckets. @@ -244,6 +247,7 @@ void Histogram::InitializeBucketRange() { ++current; // Just do a narrow bucket, and keep trying. SetBucketRange(bucket_index, current); } + ResetRangeChecksum(); DCHECK_EQ(bucket_count(), bucket_index); } @@ -287,6 +291,23 @@ double Histogram::GetBucketSize(Count current, size_t i) const { return current/denominator; } +void Histogram::ResetRangeChecksum() { + range_checksum_ = CalculateRangeChecksum(); +} + +bool Histogram::HasValidRangeChecksum() const { + return CalculateRangeChecksum() == range_checksum_; +} + +Histogram::Sample Histogram::CalculateRangeChecksum() const { + DCHECK_EQ(ranges_.size(), bucket_count() + 1); + Sample checksum = 0; + for (size_t index = 0; index < bucket_count(); ++index) { + checksum += ranges(index); + } + return checksum; +} + //------------------------------------------------------------------------------ // The following two methods can be overridden to provide a thread safe // version of this class. The cost of locking is low... but an error in each @@ -417,6 +438,7 @@ std::string Histogram::SerializeHistogramInfo(const Histogram& histogram, pickle.WriteInt(histogram.declared_min()); pickle.WriteInt(histogram.declared_max()); pickle.WriteSize(histogram.bucket_count()); + pickle.WriteInt(histogram.range_checksum()); pickle.WriteInt(histogram.histogram_type()); pickle.WriteInt(histogram.flags()); @@ -432,19 +454,21 @@ bool Histogram::DeserializeHistogramInfo(const std::string& histogram_info) { Pickle pickle(histogram_info.data(), static_cast<int>(histogram_info.size())); - void* iter = NULL; - size_t bucket_count; + std::string histogram_name; int declared_min; int declared_max; + size_t bucket_count; + int range_checksum; int histogram_type; int pickle_flags; - std::string histogram_name; SampleSet sample; + void* iter = NULL; if (!pickle.ReadString(&iter, &histogram_name) || !pickle.ReadInt(&iter, &declared_min) || !pickle.ReadInt(&iter, &declared_max) || !pickle.ReadSize(&iter, &bucket_count) || + !pickle.ReadInt(&iter, &range_checksum) || !pickle.ReadInt(&iter, &histogram_type) || !pickle.ReadInt(&iter, &pickle_flags) || !sample.Histogram::SampleSet::Deserialize(&iter, pickle)) { @@ -483,6 +507,7 @@ bool Histogram::DeserializeHistogramInfo(const std::string& histogram_info) { DCHECK_EQ(render_histogram->declared_min(), declared_min); DCHECK_EQ(render_histogram->declared_max(), declared_max); DCHECK_EQ(render_histogram->bucket_count(), bucket_count); + DCHECK_EQ(render_histogram->range_checksum(), range_checksum); DCHECK_EQ(render_histogram->histogram_type(), histogram_type); if (render_histogram->flags() & kIPCSerializationSourceFlag) { @@ -497,13 +522,64 @@ bool Histogram::DeserializeHistogramInfo(const std::string& histogram_info) { } //------------------------------------------------------------------------------ +// Methods for the validating a sample and a related histogram. +//------------------------------------------------------------------------------ + +Histogram::Inconsistencies Histogram::FindCorruption( + const SampleSet& snapshot) const { + int inconsistencies = NO_INCONSISTENCIES; + Sample previous_range = -1; // Bottom range is always 0. + Sample checksum = 0; + int64 count = 0; + for (size_t index = 0; index < bucket_count(); ++index) { + count += snapshot.counts(index); + int new_range = ranges(index); + checksum += new_range; + if (previous_range >= new_range) + inconsistencies |= BUCKET_ORDER_ERROR; + previous_range = new_range; + } + + if (checksum != range_checksum_) + inconsistencies |= RANGE_CHECKSUM_ERROR; + + int64 delta64 = snapshot.redundant_count() - count; + if (delta64 != 0) { + int delta = static_cast<int>(delta64); + if (delta != delta64) + delta = INT_MAX; // Flag all giant errors as INT_MAX. + // Since snapshots of histograms are taken asynchronously relative to + // sampling (and snapped from different threads), it is pretty likely that + // we'll catch a redundant count that doesn't match the sample count. We + // allow for a certain amount of slop before flagging this as an + // inconsistency. Even with an inconsistency, we'll snapshot it again (for + // UMA in about a half hour, so we'll eventually get the data, if it was + // not the result of a corruption. If histograms show that 1 is "too tight" + // then we may try to use 2 or 3 for this slop value. + const int kCommonRaceBasedCountMismatch = 1; + if (delta > 0) { + UMA_HISTOGRAM_COUNTS("Histogram.InconsistentCountHigh", delta); + if (delta > kCommonRaceBasedCountMismatch) + inconsistencies |= COUNT_HIGH_ERROR; + } else { + DCHECK_GT(0, delta); + UMA_HISTOGRAM_COUNTS("Histogram.InconsistentCountLow", -delta); + if (-delta > kCommonRaceBasedCountMismatch) + inconsistencies |= COUNT_LOW_ERROR; + } + } + return static_cast<Inconsistencies>(inconsistencies); +} + +//------------------------------------------------------------------------------ // Methods for the Histogram::SampleSet class //------------------------------------------------------------------------------ Histogram::SampleSet::SampleSet() : counts_(), sum_(0), - square_sum_(0) { + square_sum_(0), + redundant_count_(0) { } Histogram::SampleSet::~SampleSet() { @@ -524,9 +600,11 @@ void Histogram::SampleSet::Accumulate(Sample value, Count count, counts_[index] += count; sum_ += count * value; square_sum_ += (count * value) * static_cast<int64>(value); + redundant_count_ += count; DCHECK_GE(counts_[index], 0); DCHECK_GE(sum_, 0); DCHECK_GE(square_sum_, 0); + DCHECK_GE(redundant_count_, 0); } Count Histogram::SampleSet::TotalCount() const { @@ -536,6 +614,7 @@ Count Histogram::SampleSet::TotalCount() const { ++it) { total += *it; } + DCHECK_EQ(total, redundant_count_); return total; } @@ -543,6 +622,7 @@ void Histogram::SampleSet::Add(const SampleSet& other) { DCHECK_EQ(counts_.size(), other.counts_.size()); sum_ += other.sum_; square_sum_ += other.square_sum_; + redundant_count_ += other.redundant_count_; for (size_t index = 0; index < counts_.size(); ++index) counts_[index] += other.counts_[index]; } @@ -554,6 +634,7 @@ void Histogram::SampleSet::Subtract(const SampleSet& other) { // calculated). As a result, we don't currently CHCEK() for positive values. sum_ -= other.sum_; square_sum_ -= other.square_sum_; + redundant_count_ -= other.redundant_count_; for (size_t index = 0; index < counts_.size(); ++index) { counts_[index] -= other.counts_[index]; DCHECK_GE(counts_[index], 0); @@ -563,6 +644,7 @@ void Histogram::SampleSet::Subtract(const SampleSet& other) { bool Histogram::SampleSet::Serialize(Pickle* pickle) const { pickle->WriteInt64(sum_); pickle->WriteInt64(square_sum_); + pickle->WriteInt64(redundant_count_); pickle->WriteSize(counts_.size()); for (size_t index = 0; index < counts_.size(); ++index) { @@ -576,11 +658,13 @@ bool Histogram::SampleSet::Deserialize(void** iter, const Pickle& pickle) { DCHECK_EQ(counts_.size(), 0u); DCHECK_EQ(sum_, 0); DCHECK_EQ(square_sum_, 0); + DCHECK_EQ(redundant_count_, 0); size_t counts_size; if (!pickle.ReadInt64(iter, &sum_) || !pickle.ReadInt64(iter, &square_sum_) || + !pickle.ReadInt64(iter, &redundant_count_) || !pickle.ReadSize(iter, &counts_size)) { return false; } @@ -588,14 +672,16 @@ bool Histogram::SampleSet::Deserialize(void** iter, const Pickle& pickle) { if (counts_size == 0) return false; + int count = 0; for (size_t index = 0; index < counts_size; ++index) { int i; if (!pickle.ReadInt(iter, &i)) return false; counts_.push_back(i); + count += i; } - - return true; + DCHECK_EQ(count, redundant_count_); + return count == redundant_count_; } //------------------------------------------------------------------------------ @@ -694,6 +780,7 @@ void LinearHistogram::InitializeBucketRange() { (bucket_count() - 2); SetBucketRange(i, static_cast<int> (linear_range + 0.5)); } + ResetRangeChecksum(); } double LinearHistogram::GetBucketSize(Count current, size_t i) const { @@ -740,7 +827,7 @@ BooleanHistogram::BooleanHistogram(const std::string& name) scoped_refptr<Histogram> CustomHistogram::FactoryGet( const std::string& name, - const std::vector<int>& custom_ranges, + const std::vector<Sample>& custom_ranges, Flags flags) { scoped_refptr<Histogram> histogram(NULL); @@ -774,7 +861,7 @@ Histogram::ClassType CustomHistogram::histogram_type() const { } CustomHistogram::CustomHistogram(const std::string& name, - const std::vector<int>& custom_ranges) + const std::vector<Sample>& custom_ranges) : Histogram(name, custom_ranges[1], custom_ranges.back(), custom_ranges.size()) { DCHECK_GT(custom_ranges.size(), 1u); @@ -789,6 +876,7 @@ void CustomHistogram::InitializeBucketRange() { DCHECK_LE(ranges_vector_->size(), bucket_count()); for (size_t index = 0; index < ranges_vector_->size(); ++index) SetBucketRange(index, (*ranges_vector_)[index]); + ResetRangeChecksum(); } double CustomHistogram::GetBucketSize(Count current, size_t i) const { diff --git a/base/metrics/histogram.h b/base/metrics/histogram.h index 20f67c2..000807a 100644 --- a/base/metrics/histogram.h +++ b/base/metrics/histogram.h @@ -36,6 +36,7 @@ #include <string> #include <vector> +#include "base/gtest_prod_util.h" #include "base/ref_counted.h" #include "base/logging.h" #include "base/time.h" @@ -243,8 +244,8 @@ class Histogram : public base::RefCountedThreadSafe<Histogram> { typedef std::vector<Count> Counts; typedef std::vector<Sample> Ranges; - /* These enums are meant to facilitate deserialization of renderer histograms - into the browser. */ + // These enums are used to facilitate deserialization of renderer histograms + // into the browser. enum ClassType { HISTOGRAM, LINEAR_HISTOGRAM, @@ -273,6 +274,16 @@ class Histogram : public base::RefCountedThreadSafe<Histogram> { kHexRangePrintingFlag = 0x8000, // Fancy bucket-naming supported. }; + enum Inconsistencies { + NO_INCONSISTENCIES = 0x0, + RANGE_CHECKSUM_ERROR = 0x1, + BUCKET_ORDER_ERROR = 0x2, + COUNT_HIGH_ERROR = 0x4, + COUNT_LOW_ERROR = 0x8, + + NEVER_EXCEEDED_VALUE = 0x10 + }; + struct DescriptionPair { Sample sample; const char* description; // Null means end of a list of pairs. @@ -298,6 +309,7 @@ class Histogram : public base::RefCountedThreadSafe<Histogram> { Count TotalCount() const; int64 sum() const { return sum_; } int64 square_sum() const { return square_sum_; } + int64 redundant_count() const { return redundant_count_; } // Arithmetic manipulation of corresponding elements of the set. void Add(const SampleSet& other); @@ -315,7 +327,21 @@ class Histogram : public base::RefCountedThreadSafe<Histogram> { // without shared memory at some point. int64 sum_; // sum of samples. int64 square_sum_; // sum of squares of samples. + + private: + // Allow tests to corrupt our innards for testing purposes. + FRIEND_TEST(HistogramTest, CorruptSampleCounts); + + // To help identify memory corruption, we reduntantly save the number of + // samples we've accumulated into all of our buckets. We can compare this + // count to the sum of the counts in all buckets, and detect problems. Note + // that due to races in histogram accumulation (if a histogram is indeed + // updated on several threads simultaneously), the tallies might mismatch, + // and also the snapshotting code may asynchronously get a mismatch (though + // generally either race based mismatch cause is VERY rare). + int64 redundant_count_; }; + //---------------------------------------------------------------------------- // minimum should start from 1. 0 is invalid as a minimum. 0 is an implicit // default underflow bucket. @@ -367,6 +393,13 @@ class Histogram : public base::RefCountedThreadSafe<Histogram> { // browser process. static bool DeserializeHistogramInfo(const std::string& histogram_info); + // Check to see if bucket ranges, counts and tallies in the snapshot are + // consistent with the bucket ranges and checksums in our histogram. This can + // produce a false-alarm if a race occurred in the reading of the data during + // a SnapShot process, but should otherwise be false at all times (unless we + // have memory over-writes, or DRAM failures). + Inconsistencies FindCorruption(const SampleSet& snapshot) const; + //---------------------------------------------------------------------------- // Accessors for factory constuction, serialization and testing. //---------------------------------------------------------------------------- @@ -375,6 +408,7 @@ class Histogram : public base::RefCountedThreadSafe<Histogram> { Sample declared_min() const { return declared_min_; } Sample declared_max() const { return declared_max_; } virtual Sample ranges(size_t i) const { return ranges_[i];} + Sample range_checksum() const { return range_checksum_; } virtual size_t bucket_count() const { return bucket_count_; } // Snapshot the current complete set of sample data. // Override with atomic/locked snapshot if needed. @@ -409,6 +443,9 @@ class Histogram : public base::RefCountedThreadSafe<Histogram> { // Get normalized size, relative to the ranges_[i]. virtual double GetBucketSize(Count current, size_t i) const; + // Recalculate range_checksum_. + void ResetRangeChecksum(); + // Return a string description of what goes in a given bucket. // Most commonly this is the numeric value, but in derived classes it may // be a name (or string description) given to the bucket. @@ -430,9 +467,18 @@ class Histogram : public base::RefCountedThreadSafe<Histogram> { bool ValidateBucketRanges() const; private: + // Allow tests to corrupt our innards for testing purposes. + FRIEND_TEST(HistogramTest, CorruptBucketBounds); + FRIEND_TEST(HistogramTest, CorruptSampleCounts); + // Post constructor initialization. void Initialize(); + // Return true iff the range_checksum_ matches current ranges_ vector. + bool HasValidRangeChecksum() const; + + Sample CalculateRangeChecksum() const; + //---------------------------------------------------------------------------- // Helpers for emitting Ascii graphic. Each method appends data to output. @@ -477,6 +523,11 @@ class Histogram : public base::RefCountedThreadSafe<Histogram> { // The dimension of ranges_ is bucket_count + 1. Ranges ranges_; + // For redundancy, we store the sum of all the sample ranges when ranges are + // generated. If ever there is ever a difference, then the histogram must + // have been corrupted. + Sample range_checksum_; + // Finally, provide the state that changes with the addition of each new // sample. SampleSet sample_; @@ -561,11 +612,11 @@ class CustomHistogram : public Histogram { virtual ClassType histogram_type() const; static scoped_refptr<Histogram> FactoryGet(const std::string& name, - const std::vector<int>& custom_ranges, Flags flags); + const std::vector<Sample>& custom_ranges, Flags flags); protected: CustomHistogram(const std::string& name, - const std::vector<int>& custom_ranges); + const std::vector<Sample>& custom_ranges); // Initialize ranges_ mapping. virtual void InitializeBucketRange(); @@ -573,7 +624,7 @@ class CustomHistogram : public Histogram { private: // Temporary pointer used during construction/initialization, and then NULLed. - const std::vector<int>* ranges_vector_; + const std::vector<Sample>* ranges_vector_; DISALLOW_COPY_AND_ASSIGN(CustomHistogram); }; diff --git a/base/metrics/histogram_unittest.cc b/base/metrics/histogram_unittest.cc index e7e3983..b9c51ad 100644 --- a/base/metrics/histogram_unittest.cc +++ b/base/metrics/histogram_unittest.cc @@ -308,4 +308,57 @@ TEST(HistogramTest, BucketPlacementTest) { } } // namespace + +//------------------------------------------------------------------------------ +// We can't be an an anonymous namespace while being friends, so we pop back +// out to the base namespace here. We need to be friends to corrupt the +// internals of the histogram and/or sampleset. +TEST(HistogramTest, CorruptSampleCounts) { + scoped_refptr<Histogram> histogram = Histogram::FactoryGet( + "Histogram", 1, 64, 8, Histogram::kNoFlags); // As per header file. + + EXPECT_EQ(0, histogram->sample_.redundant_count()); + histogram->Add(20); // Add some samples. + histogram->Add(40); + EXPECT_EQ(2, histogram->sample_.redundant_count()); + + Histogram::SampleSet snapshot; + histogram->SnapshotSample(&snapshot); + EXPECT_EQ(Histogram::NO_INCONSISTENCIES, 0); + EXPECT_EQ(0, histogram->FindCorruption(snapshot)); // No default corruption. + EXPECT_EQ(2, snapshot.redundant_count()); + + snapshot.counts_[3] += 100; // Sample count won't match redundant count. + EXPECT_EQ(Histogram::COUNT_LOW_ERROR, histogram->FindCorruption(snapshot)); + snapshot.counts_[2] -= 200; + EXPECT_EQ(Histogram::COUNT_HIGH_ERROR, histogram->FindCorruption(snapshot)); + + // But we can't spot a corruption if it is compensated for. + snapshot.counts_[1] += 100; + EXPECT_EQ(0, histogram->FindCorruption(snapshot)); +} + +TEST(HistogramTest, CorruptBucketBounds) { + scoped_refptr<Histogram> histogram = Histogram::FactoryGet( + "Histogram", 1, 64, 8, Histogram::kNoFlags); // As per header file. + + Histogram::SampleSet snapshot; + histogram->SnapshotSample(&snapshot); + EXPECT_EQ(Histogram::NO_INCONSISTENCIES, 0); + EXPECT_EQ(0, histogram->FindCorruption(snapshot)); // No default corruption. + + std::swap(histogram->ranges_[1], histogram->ranges_[2]); + EXPECT_EQ(Histogram::BUCKET_ORDER_ERROR, histogram->FindCorruption(snapshot)); + + std::swap(histogram->ranges_[1], histogram->ranges_[2]); + EXPECT_EQ(0, histogram->FindCorruption(snapshot)); + + ++histogram->ranges_[3]; + EXPECT_EQ(Histogram::RANGE_CHECKSUM_ERROR, + histogram->FindCorruption(snapshot)); + + // Repair histogram so that destructor won't DCHECK(). + --histogram->ranges_[3]; +} + } // namespace base |