diff options
author | holte <holte@chromium.org> | 2015-04-27 18:05:01 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-04-28 01:05:30 +0000 |
commit | feb4e55d126e711b0a5e9dd321371a4923768346 (patch) | |
tree | afac78319235b9c32bd1b26771f932449eee1a64 | |
parent | 3bd4ef1379b970784d0b9cc304cdc1b361dd2646 (diff) | |
download | chromium_src-feb4e55d126e711b0a5e9dd321371a4923768346.zip chromium_src-feb4e55d126e711b0a5e9dd321371a4923768346.tar.gz chromium_src-feb4e55d126e711b0a5e9dd321371a4923768346.tar.bz2 |
Multi-dimensional Rappor Implementation
This implements support for Rappor metrics which contain
multiple fields that support correlation analysis.
Example:
scoped_ptr<Sample> sample = rappor_service->CreateSample(COARSE_RAPPOR_TYPE);
sample->SetStringField("Domain", "google.com");
sample->SetFlagsField("Flags", 0x10, 8 /* # of bits */);
rappor_service->RecordSampleObj("MyMetric", sample.Pass());
This change also removes metric_name from the personalization string for the PRR. This means that two different metrics which report the same value will use the same PRR, which will improve privacy when multiple metrics report the same value.
BUG=451647
Review URL: https://codereview.chromium.org/1090683003
Cr-Commit-Position: refs/heads/master@{#327207}
-rw-r--r-- | components/components_tests.gyp | 1 | ||||
-rw-r--r-- | components/rappor.gypi | 6 | ||||
-rw-r--r-- | components/rappor/BUILD.gn | 7 | ||||
-rw-r--r-- | components/rappor/bloom_filter.cc | 34 | ||||
-rw-r--r-- | components/rappor/bloom_filter.h | 11 | ||||
-rw-r--r-- | components/rappor/bloom_filter_unittest.cc | 11 | ||||
-rw-r--r-- | components/rappor/rappor_metric.cc | 31 | ||||
-rw-r--r-- | components/rappor/rappor_service.cc | 46 | ||||
-rw-r--r-- | components/rappor/rappor_service.h | 24 | ||||
-rw-r--r-- | components/rappor/rappor_service_unittest.cc | 21 | ||||
-rw-r--r-- | components/rappor/reports.cc | 52 | ||||
-rw-r--r-- | components/rappor/reports.h | 27 | ||||
-rw-r--r-- | components/rappor/sample.cc | 77 | ||||
-rw-r--r-- | components/rappor/sample.h | 70 | ||||
-rw-r--r-- | components/rappor/sampler.cc | 39 | ||||
-rw-r--r-- | components/rappor/sampler.h | 53 | ||||
-rw-r--r-- | components/rappor/sampler_unittest.cc | 60 |
17 files changed, 524 insertions, 46 deletions
diff --git a/components/components_tests.gyp b/components/components_tests.gyp index c9191c7..a0f1aa0 100644 --- a/components/components_tests.gyp +++ b/components/components_tests.gyp @@ -432,6 +432,7 @@ 'rappor/rappor_prefs_unittest.cc', 'rappor/rappor_service_unittest.cc', 'rappor/rappor_utils_unittest.cc', + 'rappor/sampler_unittest.cc', ], 'scheduler_unittest_sources': [ 'scheduler/child/nestable_task_runner_for_test.cc', diff --git a/components/rappor.gypi b/components/rappor.gypi index 3fed810..88c77f0 100644 --- a/components/rappor.gypi +++ b/components/rappor.gypi @@ -41,6 +41,12 @@ 'rappor/rappor_service.h', 'rappor/rappor_utils.cc', 'rappor/rappor_utils.h', + 'rappor/reports.cc', + 'rappor/reports.h', + 'rappor/sample.cc', + 'rappor/sample.h', + 'rappor/sampler.cc', + 'rappor/sampler.h', ], 'variables': { 'proto_in_dir': 'rappor/proto', diff --git a/components/rappor/BUILD.gn b/components/rappor/BUILD.gn index 8bad347..416d69c 100644 --- a/components/rappor/BUILD.gn +++ b/components/rappor/BUILD.gn @@ -24,6 +24,12 @@ static_library("rappor") { "rappor_service.h", "rappor_utils.cc", "rappor_utils.h", + "reports.cc", + "reports.h", + "sample.cc", + "sample.h", + "sampler.cc", + "sampler.h", ] deps = [ @@ -63,6 +69,7 @@ source_set("unit_tests") { "rappor_prefs_unittest.cc", "rappor_service_unittest.cc", "rappor_utils_unittest.cc", + "sampler_unittest.cc", ] deps = [ diff --git a/components/rappor/bloom_filter.cc b/components/rappor/bloom_filter.cc index 9ad8f3c..6c6f654 100644 --- a/components/rappor/bloom_filter.cc +++ b/components/rappor/bloom_filter.cc @@ -9,6 +9,16 @@ namespace rappor { +namespace { + +uint32_t ComputeHash(const std::string& str, uint32_t seed) { + // Using CityHash here because we have support for it in Dremel. Many hash + // functions, such as MD5, SHA1, or Murmur, would probably also work. + return CityHash64WithSeed(str.data(), str.size(), seed); +} + +} // namespace + BloomFilter::BloomFilter(uint32_t bytes_size, uint32_t hash_function_count, uint32_t hash_seed_offset) @@ -25,10 +35,7 @@ void BloomFilter::SetString(const std::string& str) { bytes_[i] = 0; } for (size_t i = 0; i < hash_function_count_; ++i) { - // Using CityHash here because we have support for it in Dremel. Many hash - // functions, such as MD5, SHA1, or Murmur, would probably also work. - uint32_t index = - CityHash64WithSeed(str.data(), str.size(), hash_seed_offset_ + i); + uint32_t index = ComputeHash(str, hash_seed_offset_ + i); // Note that the "bytes" are uint8_t, so they are always 8-bits. uint32_t byte_index = (index / 8) % bytes_.size(); uint32_t bit_index = index % 8; @@ -43,4 +50,23 @@ void BloomFilter::SetBytesForTesting(const ByteVector& bytes) { } } +namespace internal { + +uint64_t GetBloomBits(uint32_t bytes_size, + uint32_t hash_function_count, + uint32_t hash_seed_offset, + const std::string& str) { + // Make sure result fits in uint64. + DCHECK_LE(bytes_size, 8u); + uint64_t output = 0; + const uint32_t bits_size = bytes_size * 8; + for (size_t i = 0; i < hash_function_count; ++i) { + uint32_t index = ComputeHash(str, hash_seed_offset + i); + output |= 1ULL << uint64_t(index % bits_size); + } + return output; +} + +} // namespace internal + } // namespace rappor diff --git a/components/rappor/bloom_filter.h b/components/rappor/bloom_filter.h index 482f1c3..20adfab 100644 --- a/components/rappor/bloom_filter.h +++ b/components/rappor/bloom_filter.h @@ -47,6 +47,17 @@ class BloomFilter { DISALLOW_COPY_AND_ASSIGN(BloomFilter); }; + +namespace internal { + +// A function for getting bloom filters less than or equal to 64 bits. +uint64_t GetBloomBits(uint32_t bytes_size, + uint32_t hash_function_count, + uint32_t hash_seed_offset, + const std::string& str); + +} // namespace internal + } // namespace rappor #endif // COMPONENTS_RAPPOR_BLOOM_FILTER_H_ diff --git a/components/rappor/bloom_filter_unittest.cc b/components/rappor/bloom_filter_unittest.cc index 25f965f..a7ca069 100644 --- a/components/rappor/bloom_filter_unittest.cc +++ b/components/rappor/bloom_filter_unittest.cc @@ -51,4 +51,15 @@ TEST(BloomFilterTest, HugeFilter) { EXPECT_EQ(1, CountBits(filter.bytes())); } +TEST(BloomFilterTest, GetBloomBitsSmall) { + uint64_t bytes_from_get = internal::GetBloomBits(1u, 4u, 0u, "Bar"); + EXPECT_EQ(0xa8u, bytes_from_get); +} + +TEST(BloomFilterTest, GetBloomBitsLarge) { + // Make sure that a 64-bit bloom filter can set the full range of bits. + uint64_t bytes_from_get = internal::GetBloomBits(8u, 1024u, 0u, "Bar"); + EXPECT_EQ(0xffffffffffffffffu, bytes_from_get); +} + } // namespace rappor diff --git a/components/rappor/rappor_metric.cc b/components/rappor/rappor_metric.cc index 862ead8..3c368a3 100644 --- a/components/rappor/rappor_metric.cc +++ b/components/rappor/rappor_metric.cc @@ -6,6 +6,7 @@ #include "base/logging.h" #include "base/rand_util.h" +#include "components/rappor/reports.h" namespace rappor { @@ -38,35 +39,7 @@ void RapporMetric::AddSample(const std::string& str) { } ByteVector RapporMetric::GetReport(const std::string& secret) const { - // Generate a deterministically random mask of fake data using the - // client's secret key + real data as a seed. The inclusion of the secret - // in the seed avoids correlations between real and fake data. - // The seed isn't a human-readable string. - const std::string personalization_string = metric_name_ + - std::string(bytes().begin(), bytes().end()); - HmacByteVectorGenerator hmac_generator(bytes().size(), secret, - personalization_string); - const ByteVector fake_mask = - hmac_generator.GetWeightedRandomByteVector(parameters().fake_prob); - ByteVector fake_bits = - hmac_generator.GetWeightedRandomByteVector(parameters().fake_one_prob); - - // Redact most of the real data by replacing it with the fake data, hiding - // and limiting the amount of information an individual client reports on. - const ByteVector* fake_and_redacted_bits = - ByteVectorMerge(fake_mask, bytes(), &fake_bits); - - // Generate biased coin flips for each bit. - ByteVectorGenerator coin_generator(bytes().size()); - const ByteVector zero_coins = - coin_generator.GetWeightedRandomByteVector(parameters().zero_coin_prob); - ByteVector one_coins = - coin_generator.GetWeightedRandomByteVector(parameters().one_coin_prob); - - // Create a randomized response report on the fake and redacted data, sending - // the outcome of flipping a zero coin for the zero bits in that data, and of - // flipping a one coin for the one bits in that data, as the final report. - return *ByteVectorMerge(*fake_and_redacted_bits, zero_coins, &one_coins); + return internal::GenerateReport(secret, parameters(), bytes()); } void RapporMetric::SetBytesForTesting(const ByteVector& bytes) { diff --git a/components/rappor/rappor_service.cc b/components/rappor/rappor_service.cc index 7fc4fce..9c950b5 100644 --- a/components/rappor/rappor_service.cc +++ b/components/rappor/rappor_service.cc @@ -184,26 +184,23 @@ void RapporService::OnLogInterval() { } bool RapporService::ExportMetrics(RapporReports* reports) { - if (metrics_map_.empty()) { - DVLOG(2) << "metrics_map_ is empty."; - return false; - } - DCHECK_GE(cohort_, 0); reports->set_cohort(cohort_); - for (std::map<std::string, RapporMetric*>::const_iterator it = - metrics_map_.begin(); - it != metrics_map_.end(); - ++it) { - const RapporMetric* metric = it->second; + for (const auto& kv : metrics_map_) { + const RapporMetric* metric = kv.second; RapporReports::Report* report = reports->add_report(); - report->set_name_hash(metrics::HashMetricName(it->first)); + report->set_name_hash(metrics::HashMetricName(kv.first)); ByteVector bytes = metric->GetReport(secret_); report->set_bits(std::string(bytes.begin(), bytes.end())); } STLDeleteValues(&metrics_map_); - return true; + + sampler_.ExportMetrics(secret_, reports); + + DVLOG(2) << "Generated a report with " << reports->report_size() + << "metrics."; + return reports->report_size() > 0; } bool RapporService::IsInitialized() const { @@ -232,7 +229,7 @@ void RapporService::RecordSampleInternal(const std::string& metric_name, DVLOG(2) << "Metric not logged due to incognito mode."; return; } - // Skip this metric if it's reporting level is less than the enabled + // Skip this metric if its reporting level is less than the enabled // reporting level. if (recording_level_ < parameters.recording_level) { DVLOG(2) << "Metric not logged due to recording_level " @@ -259,4 +256,27 @@ RapporMetric* RapporService::LookUpMetric(const std::string& metric_name, return new_metric; } +scoped_ptr<Sample> RapporService::CreateSample(RapporType type) { + DCHECK(IsInitialized()); + return scoped_ptr<Sample>( + new Sample(cohort_, kRapporParametersForType[type])); +} + +void RapporService::RecordSampleObj(const std::string& metric_name, + scoped_ptr<Sample> sample) { + if (is_incognito_callback_.Run()) { + DVLOG(2) << "Metric not logged due to incognito mode."; + return; + } + // Skip this metric if its reporting level is less than the enabled + // reporting level. + if (recording_level_ < sample->parameters().recording_level) { + DVLOG(2) << "Metric not logged due to recording_level " + << recording_level_ << " < " + << sample->parameters().recording_level; + return; + } + sampler_.AddSample(metric_name, sample.Pass()); +} + } // namespace rappor diff --git a/components/rappor/rappor_service.h b/components/rappor/rappor_service.h index d50b91e..564c7f5 100644 --- a/components/rappor/rappor_service.h +++ b/components/rappor/rappor_service.h @@ -15,6 +15,8 @@ #include "base/timer/timer.h" #include "components/metrics/daily_event.h" #include "components/rappor/rappor_parameters.h" +#include "components/rappor/sample.h" +#include "components/rappor/sampler.h" class PrefRegistrySimple; class PrefService; @@ -63,6 +65,26 @@ class RapporService { // If |may_upload| is true, reports will be uploaded from the queue. void Update(RecordingLevel recording_level, bool may_upload); + // Constructs a Sample object for the caller to record fields in. + scoped_ptr<Sample> CreateSample(RapporType); + + // Records a Sample of rappor metric specified by |metric_name|. + // + // TODO(holte): Rename RecordSample to RecordString and then rename this + // to RecordSample. + // + // example: + // scoped_ptr<Sample> sample = rappor_service->CreateSample(MY_METRIC_TYPE); + // sample->SetStringField("Field1", "some string"); + // sample->SetFlagsValue("Field2", SOME|FLAGS); + // rappor_service->RecordSample("MyMetric", sample.Pass()); + // + // This will result in a report setting two metrics "MyMetric.Field1" and + // "MyMetric.Field2", and they will both be generated from the same sample, + // to allow for correllations to be computed. + void RecordSampleObj(const std::string& metric_name, + scoped_ptr<Sample> sample); + // Records a sample of the rappor metric specified by |metric_name|. // Creates and initializes the metric, if it doesn't yet exist. virtual void RecordSample(const std::string& metric_name, @@ -141,6 +163,8 @@ class RapporService { // The map owns the metrics it contains. std::map<std::string, RapporMetric*> metrics_map_; + internal::Sampler sampler_; + DISALLOW_COPY_AND_ASSIGN(RapporService); }; diff --git a/components/rappor/rappor_service_unittest.cc b/components/rappor/rappor_service_unittest.cc index eefdb52e..de612cc 100644 --- a/components/rappor/rappor_service_unittest.cc +++ b/components/rappor/rappor_service_unittest.cc @@ -6,6 +6,7 @@ #include "base/base64.h" #include "base/prefs/testing_pref_service.h" +#include "components/metrics/metrics_hashes.h" #include "components/rappor/byte_vector_utils.h" #include "components/rappor/proto/rappor_metric.pb.h" #include "components/rappor/rappor_parameters.h" @@ -95,4 +96,24 @@ TEST(RapporServiceTest, Incognito) { EXPECT_EQ(0, reports.report_size()); } +// Check that Sample objects record correctly. +TEST(RapporServiceTest, RecordSample) { + TestRapporService rappor_service; + scoped_ptr<Sample> sample = rappor_service.CreateSample(COARSE_RAPPOR_TYPE); + sample->SetStringField("Url", "example.com"); + sample->SetFlagsField("Flags1", 0xbcd, 12); + rappor_service.RecordSampleObj("ObjMetric", sample.Pass()); + uint64_t url_hash = metrics::HashMetricName("ObjMetric.Url"); + uint64_t flags_hash = metrics::HashMetricName("ObjMetric.Flags1"); + RapporReports reports; + rappor_service.GetReports(&reports); + EXPECT_EQ(2, reports.report_size()); + size_t url_index = reports.report(0).name_hash() == url_hash ? 0 : 1; + size_t flags_index = url_index == 0 ? 1 : 0; + EXPECT_EQ(url_hash, reports.report(url_index).name_hash()); + EXPECT_EQ(1u, reports.report(url_index).bits().size()); + EXPECT_EQ(flags_hash, reports.report(flags_index).name_hash()); + EXPECT_EQ(2u, reports.report(flags_index).bits().size()); +} + } // namespace rappor diff --git a/components/rappor/reports.cc b/components/rappor/reports.cc new file mode 100644 index 0000000..91f01e9 --- /dev/null +++ b/components/rappor/reports.cc @@ -0,0 +1,52 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/reports.h" + +#include "base/logging.h" +#include "base/rand_util.h" +#include "components/rappor/byte_vector_utils.h" +#include "components/rappor/rappor_parameters.h" + +namespace rappor { + +namespace internal { + +ByteVector GenerateReport(const std::string& secret, + const RapporParameters& parameters, + const ByteVector& value) { + // Generate a deterministically random mask of fake data using the + // client's secret key + real data as a seed. The inclusion of the secret + // in the seed avoids correlations between real and fake data. + // The seed isn't a human-readable string. + const std::string personalization_string = + std::string(value.begin(), value.end()); + HmacByteVectorGenerator hmac_generator(value.size(), secret, + personalization_string); + const ByteVector fake_mask = + hmac_generator.GetWeightedRandomByteVector(parameters.fake_prob); + ByteVector fake_bits = + hmac_generator.GetWeightedRandomByteVector(parameters.fake_one_prob); + + // Redact most of the real data by replacing it with the fake data, hiding + // and limiting the amount of information an individual client reports on. + const ByteVector* fake_and_redacted_bits = + ByteVectorMerge(fake_mask, value, &fake_bits); + + // Generate biased coin flips for each bit. + ByteVectorGenerator coin_generator(value.size()); + const ByteVector zero_coins = + coin_generator.GetWeightedRandomByteVector(parameters.zero_coin_prob); + ByteVector one_coins = + coin_generator.GetWeightedRandomByteVector(parameters.one_coin_prob); + + // Create a randomized response report on the fake and redacted data, sending + // the outcome of flipping a zero coin for the zero bits in that data, and of + // flipping a one coin for the one bits in that data, as the final report. + return *ByteVectorMerge(*fake_and_redacted_bits, zero_coins, &one_coins); +} + +} // namespace internal + +} // namespace rappor diff --git a/components/rappor/reports.h b/components/rappor/reports.h new file mode 100644 index 0000000..de2006b --- /dev/null +++ b/components/rappor/reports.h @@ -0,0 +1,27 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_RAPPOR_REPORTS_H_ +#define COMPONENTS_RAPPOR_REPORTS_H_ + +#include "components/rappor/reports.h" + +#include "components/rappor/byte_vector_utils.h" + +namespace rappor { + +struct RapporParameters; + +namespace internal { + +// Generate a randomized report for a single metric/field. +ByteVector GenerateReport(const std::string& secret, + const RapporParameters& parameters, + const ByteVector& value); + +} // namespace internal + +} // namespace rappor + +#endif // COMPONENTS_RAPPOR_REPORTS_H_ diff --git a/components/rappor/sample.cc b/components/rappor/sample.cc new file mode 100644 index 0000000..6fbe615 --- /dev/null +++ b/components/rappor/sample.cc @@ -0,0 +1,77 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/sample.h" + +#include <map> +#include <string> + +#include "base/logging.h" +#include "components/metrics/metrics_hashes.h" +#include "components/rappor/bloom_filter.h" +#include "components/rappor/byte_vector_utils.h" +#include "components/rappor/proto/rappor_metric.pb.h" +#include "components/rappor/reports.h" + +namespace rappor { + +Sample::Sample(int32_t cohort_seed, const RapporParameters& parameters) + : parameters_(parameters), + bloom_offset_((cohort_seed % parameters_.num_cohorts) * + parameters_.bloom_filter_hash_function_count) { + // Must use bloom filter size that fits in uint64. + DCHECK_LE(parameters_.bloom_filter_size_bytes, 8); +} + +Sample::~Sample() { +} + +void Sample::SetStringField(const std::string& field_name, + const std::string& value) { + DCHECK_EQ(0u, sizes_[field_name]); + fields_[field_name] = internal::GetBloomBits( + parameters_.bloom_filter_size_bytes, + parameters_.bloom_filter_hash_function_count, + bloom_offset_, + value); + sizes_[field_name] = parameters_.bloom_filter_size_bytes; +} + +void Sample::SetFlagsField(const std::string& field_name, + uint64_t flags, + size_t num_flags) { + DCHECK_EQ(0u, sizes_[field_name]); + DCHECK_GT(num_flags, 0u); + DCHECK_LE(num_flags, 64u); + DCHECK(num_flags == 64u || flags >> num_flags == 0); + fields_[field_name] = flags; + sizes_[field_name] = (num_flags + 7) / 8; +} + +void Sample::ExportMetrics(const std::string& secret, + const std::string& metric_name, + RapporReports* reports) const { + for (const auto& kv : fields_) { + uint64_t value = kv.second; + const auto it = sizes_.find(kv.first); + DCHECK(it != sizes_.end()); + uint64_t size = it->second; + ByteVector value_bytes(size); + for (size_t i = 0; i < size; i++) { + // Get the value of the i-th smallest byte and copy it to the byte vector. + uint64_t shift = i * 8; + uint64_t byte_mask = 0xff << shift; + value_bytes[i] = (value & byte_mask) >> shift; + } + ByteVector report_bytes = internal::GenerateReport( + secret, parameters_, value_bytes); + + RapporReports::Report* report = reports->add_report(); + report->set_name_hash(metrics::HashMetricName( + metric_name + "." + kv.first)); + report->set_bits(std::string(report_bytes.begin(), report_bytes.end())); + } +} + +} // namespace rappor diff --git a/components/rappor/sample.h b/components/rappor/sample.h new file mode 100644 index 0000000..5eaf1de --- /dev/null +++ b/components/rappor/sample.h @@ -0,0 +1,70 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_RAPPOR_SAMPLE_H_ +#define COMPONENTS_RAPPOR_SAMPLE_H_ + +#include <map> +#include <string> + +#include "base/basictypes.h" +#include "base/macros.h" +#include "components/rappor/rappor_parameters.h" + +namespace rappor { + +class RapporReports; +class RapporService; +class TestSamplerFactory; + +// Sample is a container for information about a single instance of some event +// we are sending Rappor data about. It may contain multiple different fields, +// which describe different details of the event, and they will be sent in the +// same Rappor report, enabling analysis of correlations between those fields. +class Sample { + public: + ~Sample(); + + // Sets a string value field in this sample. + void SetStringField(const std::string& field_name, const std::string& value); + + // Sets a group of boolean flags as a field in this sample. + // |flags| should be a set of boolean flags stored in the lowest |num_flags| + // bits of |flags|. + void SetFlagsField(const std::string& field_name, + uint64_t flags, + size_t num_flags); + + // Generate randomized reports and store them in |reports|. + void ExportMetrics(const std::string& secret, + const std::string& metric_name, + RapporReports* reports) const; + + const RapporParameters& parameters() { return parameters_; } + + private: + friend class TestSamplerFactory; + friend class RapporService; + + // Constructs a sample. Instead of calling this directly, call + // RapporService::MakeSampleObj to create a sample. + Sample(int32_t cohort_seed, const RapporParameters& parameters); + + const RapporParameters parameters_; + + // Offset used for bloom filter hash functions. + uint32_t bloom_offset_; + + // Size of each of the different fields, in bytes. + std::map<std::string, size_t> sizes_; + + // The non-randomized report values for each field. + std::map<std::string, uint64_t> fields_; + + DISALLOW_COPY_AND_ASSIGN(Sample); +}; + +} // namespace rappor + +#endif // COMPONENTS_RAPPOR_SAMPLE_H_ diff --git a/components/rappor/sampler.cc b/components/rappor/sampler.cc new file mode 100644 index 0000000..5d3e8e3 --- /dev/null +++ b/components/rappor/sampler.cc @@ -0,0 +1,39 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/sampler.h" + +#include <map> +#include <string> + +#include "base/rand_util.h" + +namespace rappor { + +namespace internal { + +Sampler::Sampler() {} + +Sampler::~Sampler() {} + +void Sampler::AddSample(const std::string& metric_name, + scoped_ptr<Sample> sample) { + ++sample_counts_[metric_name]; + // Replace the previous sample with a 1 in sample_count_ chance so that each + // sample has equal probability of being reported. + if (base::RandGenerator(sample_counts_[metric_name]) == 0) + samples_.set(metric_name, sample.Pass()); +} + +void Sampler::ExportMetrics(const std::string& secret, RapporReports* reports) { + for (const auto& kv : samples_) { + kv.second->ExportMetrics(secret, kv.first, reports); + } + samples_.clear(); + sample_counts_.clear(); +} + +} // namespace internal + +} // namespace rappor diff --git a/components/rappor/sampler.h b/components/rappor/sampler.h new file mode 100644 index 0000000..c2b13e1 --- /dev/null +++ b/components/rappor/sampler.h @@ -0,0 +1,53 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_RAPPOR_SAMPLER_H_ +#define COMPONENTS_RAPPOR_SAMPLER_H_ + +#include <map> +#include <string> + +#include "base/containers/scoped_ptr_hash_map.h" +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "components/rappor/rappor_parameters.h" +#include "components/rappor/sample.h" + +namespace rappor { + +class RapporReports; + +namespace internal { + +// Sampler manages the collection and storage of Sample objects. +// For each metric name, it will randomly select one Sample to store and +// use when generating RapporReports. +class Sampler { + public: + Sampler(); + ~Sampler(); + + // Store this sample for metric name, randomly selecting a sample if + // others have already been recorded. + void AddSample(const std::string& metric_name, scoped_ptr<Sample> sample); + + // Generate randomized reports for all stored samples and store them + // in |reports|, then discard the samples. + void ExportMetrics(const std::string& secret, RapporReports* reports); + + private: + // The number of samples recorded for each metric since the last export. + std::map<std::string, int> sample_counts_; + + // Stores a Sample for each metric, by metric name. + base::ScopedPtrHashMap<std::string, Sample> samples_; + + DISALLOW_COPY_AND_ASSIGN(Sampler); +}; + +} // namespace internal + +} // namespace rappor + +#endif // COMPONENTS_RAPPOR_SAMPLER_H_ diff --git a/components/rappor/sampler_unittest.cc b/components/rappor/sampler_unittest.cc new file mode 100644 index 0000000..2790c6f --- /dev/null +++ b/components/rappor/sampler_unittest.cc @@ -0,0 +1,60 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/sampler.h" + +#include "components/rappor/byte_vector_utils.h" +#include "components/rappor/proto/rappor_metric.pb.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace rappor { + +const RapporParameters kTestRapporParameters = { + 1 /* Num cohorts */, + 1 /* Bloom filter size bytes */, + 4 /* Bloom filter hash count */, + PROBABILITY_75 /* Fake data probability */, + PROBABILITY_50 /* Fake one probability */, + PROBABILITY_75 /* One coin probability */, + PROBABILITY_50 /* Zero coin probability */, + FINE_LEVEL /* Reporting level (not used) */}; + +class TestSamplerFactory { + public: + static scoped_ptr<Sample> CreateSample() { + return scoped_ptr<Sample>(new Sample(0, kTestRapporParameters)); + } +}; + +namespace internal { + +// Test that exporting deletes samples. +TEST(RapporSamplerTest, TestExport) { + Sampler sampler; + + scoped_ptr<Sample> sample1 = TestSamplerFactory::CreateSample(); + sample1->SetStringField("Foo", "Junk"); + sampler.AddSample("Metric1", sample1.Pass()); + + scoped_ptr<Sample> sample2 = TestSamplerFactory::CreateSample(); + sample2->SetStringField("Foo", "Junk2"); + sampler.AddSample("Metric1", sample2.Pass()); + + // Since the two samples were for one metric, we should randomly get one + // of the two. + RapporReports reports; + std::string secret = HmacByteVectorGenerator::GenerateEntropyInput(); + sampler.ExportMetrics(secret, &reports); + EXPECT_EQ(1, reports.report_size()); + EXPECT_EQ(1u, reports.report(0).bits().size()); + + // First export should clear the metric. + RapporReports reports2; + sampler.ExportMetrics(secret, &reports2); + EXPECT_EQ(0, reports2.report_size()); +} + +} // namespace internal + +} // namespace rappor |