diff options
-rw-r--r-- | components/rappor/byte_vector_utils.cc | 14 | ||||
-rw-r--r-- | components/rappor/rappor_metric.cc | 2 | ||||
-rw-r--r-- | components/rappor/rappor_metric_unittest.cc | 19 | ||||
-rw-r--r-- | components/rappor/rappor_parameters.cc | 15 | ||||
-rw-r--r-- | components/rappor/rappor_parameters.h | 31 | ||||
-rw-r--r-- | components/rappor/rappor_service.cc | 32 | ||||
-rw-r--r-- | components/rappor/reports.cc | 2 | ||||
-rw-r--r-- | components/rappor/reports.h | 4 | ||||
-rw-r--r-- | components/rappor/sample.cc | 53 | ||||
-rw-r--r-- | components/rappor/sample.h | 11 | ||||
-rw-r--r-- | components/rappor/sampler_unittest.cc | 13 | ||||
-rwxr-xr-x | tools/metrics/rappor/pretty_print.py | 21 | ||||
-rwxr-xr-x | tools/metrics/rappor/pretty_print_test.py | 7 | ||||
-rw-r--r-- | tools/metrics/rappor/rappor.xml | 21 |
14 files changed, 173 insertions, 72 deletions
diff --git a/components/rappor/byte_vector_utils.cc b/components/rappor/byte_vector_utils.cc index 433f97a..b020f88 100644 --- a/components/rappor/byte_vector_utils.cc +++ b/components/rappor/byte_vector_utils.cc @@ -142,12 +142,26 @@ ByteVector ByteVectorGenerator::GetWeightedRandomByteVector( Probability probability) { ByteVector bytes = GetRandomByteVector(); switch (probability) { + case PROBABILITY_9375: + ByteVectorOr(GetRandomByteVector(), &bytes); + ByteVectorOr(GetRandomByteVector(), &bytes); + return *ByteVectorOr(GetRandomByteVector(), &bytes); + case PROBABILITY_875: + ByteVectorOr(GetRandomByteVector(), &bytes); + return *ByteVectorOr(GetRandomByteVector(), &bytes); case PROBABILITY_75: return *ByteVectorOr(GetRandomByteVector(), &bytes); case PROBABILITY_50: return bytes; case PROBABILITY_25: return *ByteVectorAnd(GetRandomByteVector(), &bytes); + case PROBABILITY_125: + ByteVectorAnd(GetRandomByteVector(), &bytes); + return *ByteVectorAnd(GetRandomByteVector(), &bytes); + case PROBABILITY_0625: + ByteVectorAnd(GetRandomByteVector(), &bytes); + ByteVectorAnd(GetRandomByteVector(), &bytes); + return *ByteVectorAnd(GetRandomByteVector(), &bytes); } NOTREACHED(); return bytes; diff --git a/components/rappor/rappor_metric.cc b/components/rappor/rappor_metric.cc index 3c368a3..84a5a6b 100644 --- a/components/rappor/rappor_metric.cc +++ b/components/rappor/rappor_metric.cc @@ -39,7 +39,7 @@ void RapporMetric::AddSample(const std::string& str) { } ByteVector RapporMetric::GetReport(const std::string& secret) const { - return internal::GenerateReport(secret, parameters(), bytes()); + return internal::GenerateReport(secret, parameters().string_noise, bytes()); } void RapporMetric::SetBytesForTesting(const ByteVector& bytes) { diff --git a/components/rappor/rappor_metric_unittest.cc b/components/rappor/rappor_metric_unittest.cc index b8e28dd..5b6f117 100644 --- a/components/rappor/rappor_metric_unittest.cc +++ b/components/rappor/rappor_metric_unittest.cc @@ -12,24 +12,27 @@ namespace rappor { -const RapporParameters kTestRapporParameters = { - 1 /* Num cohorts */, - 16 /* Bloom filter size bytes */, - 4 /* Bloom filter hash count */, +const NoiseParameters kTestNoiseParameters = { PROBABILITY_75 /* Fake data probability */, PROBABILITY_50 /* Fake one probability */, PROBABILITY_75 /* One coin probability */, PROBABILITY_50 /* Zero coin probability */, +}; + +const RapporParameters kTestRapporParameters = { + 1 /* Num cohorts */, + 16 /* Bloom filter size bytes */, + 4 /* Bloom filter hash count */, + kTestNoiseParameters /* String noise */, + kTestNoiseParameters /* Flags noise */, FINE_LEVEL /* Reporting level (not used) */}; const RapporParameters kTestStatsRapporParameters = { 1 /* Num cohorts */, 50 /* Bloom filter size bytes */, 4 /* Bloom filter hash count */, - PROBABILITY_75 /* Fake data probability */, - PROBABILITY_50 /* Fake one probability */, - PROBABILITY_75 /* One coin probability */, - PROBABILITY_50 /* Zero coin probability */, + kTestNoiseParameters /* String noise */, + kTestNoiseParameters /* Flags noise */, FINE_LEVEL /* Reporting level (not used) */}; // Check for basic syntax and use. diff --git a/components/rappor/rappor_parameters.cc b/components/rappor/rappor_parameters.cc index ef2eeb8..a08c68d 100644 --- a/components/rappor/rappor_parameters.cc +++ b/components/rappor/rappor_parameters.cc @@ -10,14 +10,19 @@ namespace rappor { std::string RapporParameters::ToString() const { - return base::StringPrintf("{ %d, %d, %d, %d, %d, %d, %d, %d }", + return base::StringPrintf( + "{ %d, %d, %d, { %d, %d, %d, %d }, { %d, %d, %d, %d }, %d }", num_cohorts, bloom_filter_size_bytes, bloom_filter_hash_function_count, - fake_prob, - fake_one_prob, - one_coin_prob, - zero_coin_prob, + string_noise.fake_prob, + string_noise.fake_one_prob, + string_noise.one_coin_prob, + string_noise.zero_coin_prob, + flag_noise.fake_prob, + flag_noise.fake_one_prob, + flag_noise.one_coin_prob, + flag_noise.zero_coin_prob, recording_level); } diff --git a/components/rappor/rappor_parameters.h b/components/rappor/rappor_parameters.h index 814ed2f..9a61cbd 100644 --- a/components/rappor/rappor_parameters.h +++ b/components/rappor/rappor_parameters.h @@ -10,9 +10,13 @@ namespace rappor { enum Probability { + PROBABILITY_9375, // 93.75% + PROBABILITY_875, // 87.5% PROBABILITY_75, // 75% PROBABILITY_50, // 50% PROBABILITY_25, // 25% + PROBABILITY_125, // 12.5% + PROBABILITY_0625, // 06.25% }; @@ -27,6 +31,21 @@ enum RecordingLevel { FINE_LEVEL, }; + +// Parameters controlling how much noise to add to a field. +struct NoiseParameters { + // The probability that a bit will be redacted with fake data. + Probability fake_prob; + // The probability that a fake bit will be a one. + Probability fake_one_prob; + + // The probability that a one bit in the redacted data reports as one. + Probability one_coin_prob; + // The probability that a zero bit in the redacted data reports as one. + Probability zero_coin_prob; +}; + + // An object describing a rappor metric and the parameters used to generate it. // // For a full description of the rappor metrics, see @@ -48,15 +67,11 @@ struct RapporParameters { // The number of hash functions used in the Bloom filter. int bloom_filter_hash_function_count; - // The probability that a bit will be redacted with fake data. - Probability fake_prob; - // The probability that a fake bit will be a one. - Probability fake_one_prob; + // Noise parameters for string fields. + NoiseParameters string_noise; - // The probability that a one bit in the redacted data reports as one. - Probability one_coin_prob; - // The probability that a zero bit in the redacted data reports as one. - Probability zero_coin_prob; + // Noise parameters for flags fields. + NoiseParameters flag_noise; // The reporting level this metric is reported at. RecordingLevel recording_level; diff --git a/components/rappor/rappor_service.cc b/components/rappor/rappor_service.cc index b6f8835..fc79c3f 100644 --- a/components/rappor/rappor_service.cc +++ b/components/rappor/rappor_service.cc @@ -47,33 +47,41 @@ GURL GetServerUrl() { return GURL(kDefaultServerUrl); } +const NoiseParameters kBasicNoiseParameters = { + rappor::PROBABILITY_50 /* Fake data probability */, + rappor::PROBABILITY_50 /* Fake one probability */, + rappor::PROBABILITY_75 /* One coin probability */, + rappor::PROBABILITY_25 /* Zero coin probability */, +}; + +const NoiseParameters kWeakNoiseParameters = { + rappor::PROBABILITY_25 /* Fake data probability */, + rappor::PROBABILITY_50 /* Fake one probability */, + rappor::PROBABILITY_875 /* One coin probability */, + rappor::PROBABILITY_125 /* Zero coin probability */, +}; + const RapporParameters kRapporParametersForType[NUM_RAPPOR_TYPES] = { // UMA_RAPPOR_TYPE {128 /* Num cohorts */, 4 /* Bloom filter size bytes */, 2 /* Bloom filter hash count */, - rappor::PROBABILITY_50 /* Fake data probability */, - rappor::PROBABILITY_50 /* Fake one probability */, - rappor::PROBABILITY_75 /* One coin probability */, - rappor::PROBABILITY_25 /* Zero coin probability */, + kBasicNoiseParameters /* String noise parameters */, + kWeakNoiseParameters /* Flags noise parameters */, FINE_LEVEL /* Recording level */}, // COARSE_RAPPOR_TYPE {128 /* Num cohorts */, 1 /* Bloom filter size bytes */, 2 /* Bloom filter hash count */, - rappor::PROBABILITY_50 /* Fake data probability */, - rappor::PROBABILITY_50 /* Fake one probability */, - rappor::PROBABILITY_75 /* One coin probability */, - rappor::PROBABILITY_25 /* Zero coin probability */, + kBasicNoiseParameters /* String noise parameters */, + kWeakNoiseParameters /* Flags noise parameters */, COARSE_LEVEL /* Recording level */}, // ETLD_PLUS_ONE_RAPPOR_TYPE {128 /* Num cohorts */, 16 /* Bloom filter size bytes */, 2 /* Bloom filter hash count */, - rappor::PROBABILITY_50 /* Fake data probability */, - rappor::PROBABILITY_50 /* Fake one probability */, - rappor::PROBABILITY_75 /* One coin probability */, - rappor::PROBABILITY_25 /* Zero coin probability */, + kBasicNoiseParameters /* String noise parameters */, + kWeakNoiseParameters /* Flags noise parameters */, FINE_LEVEL /* Recording level */}, }; diff --git a/components/rappor/reports.cc b/components/rappor/reports.cc index 91f01e9..45d73b6 100644 --- a/components/rappor/reports.cc +++ b/components/rappor/reports.cc @@ -14,7 +14,7 @@ namespace rappor { namespace internal { ByteVector GenerateReport(const std::string& secret, - const RapporParameters& parameters, + const NoiseParameters& parameters, const ByteVector& value) { // Generate a deterministically random mask of fake data using the // client's secret key + real data as a seed. The inclusion of the secret diff --git a/components/rappor/reports.h b/components/rappor/reports.h index de2006b..e95cc3a 100644 --- a/components/rappor/reports.h +++ b/components/rappor/reports.h @@ -11,13 +11,13 @@ namespace rappor { -struct RapporParameters; +struct NoiseParameters; namespace internal { // Generate a randomized report for a single metric/field. ByteVector GenerateReport(const std::string& secret, - const RapporParameters& parameters, + const NoiseParameters& parameters, const ByteVector& value); } // namespace internal diff --git a/components/rappor/sample.cc b/components/rappor/sample.cc index 1aec616..33a99e2 100644 --- a/components/rappor/sample.cc +++ b/components/rappor/sample.cc @@ -30,7 +30,7 @@ Sample::~Sample() { void Sample::SetStringField(const std::string& field_name, const std::string& value) { DCHECK_EQ(0u, sizes_[field_name]); - fields_[field_name] = internal::GetBloomBits( + string_fields_[field_name] = internal::GetBloomBits( parameters_.bloom_filter_size_bytes, parameters_.bloom_filter_hash_function_count, bloom_offset_, @@ -45,27 +45,48 @@ void Sample::SetFlagsField(const std::string& field_name, DCHECK_GT(num_flags, 0u); DCHECK_LE(num_flags, 64u); DCHECK(num_flags == 64u || flags >> num_flags == 0); - fields_[field_name] = flags; + flags_fields_[field_name] = flags; sizes_[field_name] = (num_flags + 7) / 8; } +void Sample::ExportField(const std::string& secret, + const std::string& metric_name, + const std::string& field_name, + uint64_t field_value, + const NoiseParameters& parameters, + RapporReports* reports) const { + const auto it = sizes_.find(field_name); + DCHECK(it != sizes_.end()); + size_t size = it->second; + ByteVector value_bytes(size); + Uint64ToByteVector(field_value, size, &value_bytes); + ByteVector report_bytes = internal::GenerateReport( + secret, parameters, value_bytes); + + RapporReports::Report* report = reports->add_report(); + report->set_name_hash(metrics::HashMetricName( + metric_name + "." + field_name)); + report->set_bits(std::string(report_bytes.begin(), report_bytes.end())); +} + void Sample::ExportMetrics(const std::string& secret, const std::string& metric_name, RapporReports* reports) const { - for (const auto& kv : fields_) { - uint64_t value = kv.second; - const auto it = sizes_.find(kv.first); - DCHECK(it != sizes_.end()); - size_t size = it->second; - ByteVector value_bytes(size); - Uint64ToByteVector(value, size, &value_bytes); - ByteVector report_bytes = internal::GenerateReport( - secret, parameters_, value_bytes); - - RapporReports::Report* report = reports->add_report(); - report->set_name_hash(metrics::HashMetricName( - metric_name + "." + kv.first)); - report->set_bits(std::string(report_bytes.begin(), report_bytes.end())); + for (const auto& kv : string_fields_) { + ExportField(secret, + metric_name, + kv.first, + kv.second, + parameters_.string_noise, + reports); + } + for (const auto& kv : flags_fields_) { + ExportField(secret, + metric_name, + kv.first, + kv.second, + parameters_.flag_noise, + reports); } } diff --git a/components/rappor/sample.h b/components/rappor/sample.h index 5eaf1de..1d1708d 100644 --- a/components/rappor/sample.h +++ b/components/rappor/sample.h @@ -51,6 +51,14 @@ class Sample { // RapporService::MakeSampleObj to create a sample. Sample(int32_t cohort_seed, const RapporParameters& parameters); + // Generate randomized report for one field and store it in |reports|. + void ExportField(const std::string& secret, + const std::string& metric_name, + const std::string& field_name, + uint64_t field_value, + const NoiseParameters& parameters, + RapporReports* reports) const; + const RapporParameters parameters_; // Offset used for bloom filter hash functions. @@ -60,7 +68,8 @@ class Sample { std::map<std::string, size_t> sizes_; // The non-randomized report values for each field. - std::map<std::string, uint64_t> fields_; + std::map<std::string, uint64_t> flags_fields_; + std::map<std::string, uint64_t> string_fields_; DISALLOW_COPY_AND_ASSIGN(Sample); }; diff --git a/components/rappor/sampler_unittest.cc b/components/rappor/sampler_unittest.cc index 2790c6f..55c532e 100644 --- a/components/rappor/sampler_unittest.cc +++ b/components/rappor/sampler_unittest.cc @@ -10,14 +10,19 @@ namespace rappor { -const RapporParameters kTestRapporParameters = { - 1 /* Num cohorts */, - 1 /* Bloom filter size bytes */, - 4 /* Bloom filter hash count */, +const NoiseParameters kTestNoiseParameters = { PROBABILITY_75 /* Fake data probability */, PROBABILITY_50 /* Fake one probability */, PROBABILITY_75 /* One coin probability */, PROBABILITY_50 /* Zero coin probability */, +}; + +const RapporParameters kTestRapporParameters = { + 1 /* Num cohorts */, + 1 /* Bloom filter size bytes */, + 4 /* Bloom filter hash count */, + kTestNoiseParameters /* String noise */, + kTestNoiseParameters /* Flags noise */, FINE_LEVEL /* Reporting level (not used) */}; class TestSamplerFactory { diff --git a/tools/metrics/rappor/pretty_print.py b/tools/metrics/rappor/pretty_print.py index f88e1c3..b5b1ba3 100755 --- a/tools/metrics/rappor/pretty_print.py +++ b/tools/metrics/rappor/pretty_print.py @@ -15,18 +15,25 @@ import presubmit_util # Model definitions for rappor.xml content _SUMMARY_TYPE = models.TextNodeType('summary') +_NOISE_ATTRIBUTES = [ + 'fake-prob', + 'fake-one-prob', + 'one-coin-prob', + 'zero-coin-prob', +] + +_STRING_NOISE_TYPE = models.ObjectNodeType('string-noise', + float_attributes=_NOISE_ATTRIBUTES) + +_FLAG_NOISE_TYPE = models.ObjectNodeType('flag-noise', + float_attributes=_NOISE_ATTRIBUTES) + _PARAMETERS_TYPE = models.ObjectNodeType('parameters', int_attributes=[ 'num-cohorts', 'bytes', 'hash-functions', ], - float_attributes=[ - 'fake-prob', - 'fake-one-prob', - 'one-coin-prob', - 'zero-coin-prob', - ], string_attributes=[ 'reporting-level' ]) @@ -37,6 +44,8 @@ _RAPPOR_PARAMETERS_TYPE = models.ObjectNodeType('rappor-parameters', children=[ models.ChildType('summary', _SUMMARY_TYPE, False), models.ChildType('parameters', _PARAMETERS_TYPE, False), + models.ChildType('stringNoise', _STRING_NOISE_TYPE, False), + models.ChildType('flagNoise', _FLAG_NOISE_TYPE, False), ]) _RAPPOR_PARAMETERS_TYPES_TYPE = models.ObjectNodeType('rappor-parameter-types', diff --git a/tools/metrics/rappor/pretty_print_test.py b/tools/metrics/rappor/pretty_print_test.py index f69b7df..58478dd 100755 --- a/tools/metrics/rappor/pretty_print_test.py +++ b/tools/metrics/rappor/pretty_print_test.py @@ -21,9 +21,12 @@ PRETTY_XML = """ <summary> Fake type for tests. </summary> - <parameters num-cohorts="128" bytes="1" hash-functions="2" fake-prob="0.5" - fake-one-prob="0.5" one-coin-prob="0.75" zero-coin-prob="0.25" + <parameters num-cohorts="128" bytes="1" hash-functions="2" reporting-level="COARSE"/> + <string-noise fake-prob="0.5" fake-one-prob="0.5" one-coin-prob="0.75" + zero-coin-prob="0.25"/> + <flag-noise fake-prob="0.5" fake-one-prob="0.75" one-coin-prob="0.875" + zero-coin-prob="0.125"/> </rappor-parameters> </rappor-parameter-types> diff --git a/tools/metrics/rappor/rappor.xml b/tools/metrics/rappor/rappor.xml index f457557..a577c1e 100644 --- a/tools/metrics/rappor/rappor.xml +++ b/tools/metrics/rappor/rappor.xml @@ -23,9 +23,12 @@ components/rappor/rappor_service.cc. <summary> Stricter parameters for metrics collected from a broader population. </summary> - <parameters num-cohorts="128" bytes="1" hash-functions="2" fake-prob="0.5" - fake-one-prob="0.5" one-coin-prob="0.75" zero-coin-prob="0.25" + <parameters num-cohorts="128" bytes="1" hash-functions="2" reporting-level="COARSE"/> + <string-noise fake-prob="0.5" fake-one-prob="0.5" one-coin-prob="0.75" + zero-coin-prob="0.25"/> + <flag-noise fake-prob="0.25" fake-one-prob="0.5" one-coin-prob="0.875" + zero-coin-prob="0.125"/> </rappor-parameters> <rappor-parameters name="ETLD_PLUS_ONE"> @@ -33,18 +36,24 @@ components/rappor/rappor_service.cc. Deprecated parameters for collecting the domain and registry of a URL from UMA opt-in users. </summary> - <parameters num-cohorts="128" bytes="16" hash-functions="2" fake-prob="0.5" - fake-one-prob="0.5" one-coin-prob="0.75" zero-coin-prob="0.25" + <parameters num-cohorts="128" bytes="16" hash-functions="2" reporting-level="FINE"/> + <string-noise fake-prob="0.5" fake-one-prob="0.5" one-coin-prob="0.75" + zero-coin-prob="0.25"/> + <flag-noise fake-prob="0.25" fake-one-prob="0.5" one-coin-prob="0.875" + zero-coin-prob="0.125"/> </rappor-parameters> <rappor-parameters name="UMA_RAPPOR_TYPE"> <summary> Parameters suitable for metrics from UMA opt-in users. </summary> - <parameters num-cohorts="128" bytes="4" hash-functions="2" fake-prob="0.5" - fake-one-prob="0.5" one-coin-prob="0.75" zero-coin-prob="0.25" + <parameters num-cohorts="128" bytes="4" hash-functions="2" reporting-level="FINE"/> + <string-noise fake-prob="0.5" fake-one-prob="0.5" one-coin-prob="0.75" + zero-coin-prob="0.25"/> + <flag-noise fake-prob="0.25" fake-one-prob="0.5" one-coin-prob="0.875" + zero-coin-prob="0.125"/> </rappor-parameters> </rappor-parameter-types> |