diff options
author | holte@chromium.org <holte@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-02-21 04:06:10 +0000 |
---|---|---|
committer | holte@chromium.org <holte@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-02-21 04:06:10 +0000 |
commit | 2a172e455f7060e0786abd37de53a39e21aa5b5e (patch) | |
tree | dd883647e4c6931f92123fe44e738a93f49b924e /components/rappor | |
parent | 0433b638d68d751a87134b026034e2e46bef4f7b (diff) | |
download | chromium_src-2a172e455f7060e0786abd37de53a39e21aa5b5e.zip chromium_src-2a172e455f7060e0786abd37de53a39e21aa5b5e.tar.gz chromium_src-2a172e455f7060e0786abd37de53a39e21aa5b5e.tar.bz2 |
Implementation of Randomized Aggregatable Privacy-Preserving Ordinal Responses (RAPPORs).
See the design doc at http://www.chromium.org/developers/design-documents/rappor
BUG=328168
TBR=darin@chromium.org
Review URL: https://codereview.chromium.org/49753002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@252492 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'components/rappor')
22 files changed, 1699 insertions, 0 deletions
diff --git a/components/rappor/DEPS b/components/rappor/DEPS new file mode 100644 index 0000000..14115d9 --- /dev/null +++ b/components/rappor/DEPS @@ -0,0 +1,7 @@ +include_rules = [ + "+components/metrics", + "+components/variations", + "+crypto", + "+net", + "+third_party/smhasher", +] diff --git a/components/rappor/OWNERS b/components/rappor/OWNERS new file mode 100644 index 0000000..8d96ade --- /dev/null +++ b/components/rappor/OWNERS @@ -0,0 +1,2 @@ +asvitkine@chromium.org +# Please include holte@ on reviews diff --git a/components/rappor/bloom_filter.cc b/components/rappor/bloom_filter.cc new file mode 100644 index 0000000..2880ae0 --- /dev/null +++ b/components/rappor/bloom_filter.cc @@ -0,0 +1,38 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/bloom_filter.h" + +#include "base/logging.h" + +// TODO(holte): we can't include "City.h" due to typedef conflicts. +extern uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed); + +namespace rappor { + +BloomFilter::BloomFilter(uint32_t bytes_size, + uint32_t hash_function_count, + uint32_t hash_seed_offset) + : bytes_(bytes_size), + hash_function_count_(hash_function_count), + hash_seed_offset_(hash_seed_offset) { + DCHECK_GT(bytes_size, 0u); +} + +BloomFilter::~BloomFilter() {} + +void BloomFilter::AddString(const std::string& str) { + for (size_t i = 0; i < hash_function_count_; ++i) { + // Using CityHash here because we have support for it in Dremel. Many hash + // functions, such as MD5, SHA1, or Murmur, would probably also work. + uint32_t index = + CityHash64WithSeed(str.data(), str.size(), hash_seed_offset_ + i); + // Note that the "bytes" are uint8_t, so they are always 8-bits. + uint32_t byte_index = (index / 8) % bytes_.size(); + uint32_t bit_index = index % 8; + bytes_[byte_index] |= 1 << bit_index; + } +} + +} // namespace rappor diff --git a/components/rappor/bloom_filter.h b/components/rappor/bloom_filter.h new file mode 100644 index 0000000..0bb9a57 --- /dev/null +++ b/components/rappor/bloom_filter.h @@ -0,0 +1,48 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_RAPPOR_BLOOM_FILTER_H_ +#define COMPONENTS_RAPPOR_BLOOM_FILTER_H_ + +#include <string> + +#include "base/basictypes.h" +#include "components/rappor/byte_vector_utils.h" + +namespace rappor { + +// BloomFilter is a simple Bloom filter for keeping track of a set of strings. +class BloomFilter { + public: + // Constructs a BloomFilter using |bytes_size| bytes of Bloom filter bits, + // and |hash_function_count| hash functions to set bits in the filter. The + // hash functions will be generated by using seeds in the range + // |hash_seed_offset| to (|hash_seed_offset| + |hash_function_count|). + BloomFilter(uint32_t bytes_size, + uint32_t hash_function_count, + uint32_t hash_seed_offset); + ~BloomFilter(); + + // Add a single string to the Bloom filter. + void AddString(const std::string& str); + + // Returns the current value of the Bloom filter's bit array. + const ByteVector& bytes() const { return bytes_; }; + + private: + // Stores the byte array of the Bloom filter. + ByteVector bytes_; + + // The number of bits to set for each string added. + uint32_t hash_function_count_; + + // A number add to a hash function index to get a seed for that hash function. + uint32_t hash_seed_offset_; + + DISALLOW_COPY_AND_ASSIGN(BloomFilter); +}; + +} // namespace rappor + +#endif // COMPONENTS_RAPPOR_BLOOM_FILTER_H_ diff --git a/components/rappor/bloom_filter_unittest.cc b/components/rappor/bloom_filter_unittest.cc new file mode 100644 index 0000000..9a64e25 --- /dev/null +++ b/components/rappor/bloom_filter_unittest.cc @@ -0,0 +1,54 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/bloom_filter.h" + +#include "components/rappor/byte_vector_utils.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace rappor { + +TEST(BloomFilterTest, TinyFilter) { + BloomFilter filter(1u, 4u, 0u); + + // Size is 1 and it's initially empty + EXPECT_EQ(1u, filter.bytes().size()); + EXPECT_EQ(0x00, filter.bytes()[0]); + + // "Test" has a self-collision, and only sets 3 bits. + filter.AddString("Test"); + EXPECT_EQ(0x2a, filter.bytes()[0]); + + // Adding the same value shouldn't change anything. + filter.AddString("Test"); + EXPECT_EQ(0x2a, filter.bytes()[0]); + + BloomFilter filter2(1u, 4u, 0u); + EXPECT_EQ(0x00, filter2.bytes()[0]); + filter2.AddString("Bar"); + EXPECT_EQ(0xa8, filter2.bytes()[0]); + + // Adding a colliding string should just set new bits. + filter.AddString("Bar"); + EXPECT_EQ(0xaa, filter.bytes()[0]); +} + +TEST(BloomFilterTest, HugeFilter) { + // Create a 500 bit filter, and use a large seed offset to see if anything + // breaks. + BloomFilter filter(500u, 1u, 0xabdef123); + + // Size is 500 and it's initially empty + EXPECT_EQ(500u, filter.bytes().size()); + EXPECT_EQ(0, CountBits(filter.bytes())); + + filter.AddString("Bar"); + EXPECT_EQ(1, CountBits(filter.bytes())); + + // Adding the same value shouldn't change anything. + filter.AddString("Bar"); + EXPECT_EQ(1, CountBits(filter.bytes())); +} + +} // namespace rappor diff --git a/components/rappor/byte_vector_utils.cc b/components/rappor/byte_vector_utils.cc new file mode 100644 index 0000000..57f2574 --- /dev/null +++ b/components/rappor/byte_vector_utils.cc @@ -0,0 +1,215 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/byte_vector_utils.h" + +#include <string> + +#include "base/logging.h" +#include "base/rand_util.h" +#include "base/strings/string_number_conversions.h" +#include "crypto/random.h" + +namespace rappor { + +namespace { + +// Reinterpets a ByteVector as a StringPiece. +base::StringPiece ByteVectorAsStringPiece(const ByteVector& lhs) { + return base::StringPiece(reinterpret_cast<const char *>(&lhs[0]), lhs.size()); +} + +// Concatenates parameters together as a string. +std::string Concat(const ByteVector& value, char c, const std::string& data) { + return std::string(value.begin(), value.end()) + c + data; +} + +// Performs the operation: K = HMAC(K, data) +// The input "K" is passed by initializing |hmac| with it. +// The output "K" is returned by initializing |result| with it. +// Returns false on an error. +bool HMAC_Rotate(const crypto::HMAC& hmac, + const std::string& data, + crypto::HMAC* result) { + ByteVector key(hmac.DigestLength()); + if (!hmac.Sign(data, &key[0], key.size())) + return false; + return result->Init(ByteVectorAsStringPiece(key)); +} + +// Performs the operation: V = HMAC(K, V) +// The input "K" is passed by initializing |hmac| with it. +// "V" is read from and written to |value|. +// Returns false on an error. +bool HMAC_Rehash(const crypto::HMAC& hmac, ByteVector* value) { + return hmac.Sign(ByteVectorAsStringPiece(*value), + &(*value)[0], value->size()); +} + +// Implements (Key, V) = HMAC_DRBG_Update(provided_data, Key, V) +// See: http://csrc.nist.gov/publications/nistpubs/800-90A/SP800-90A.pdf +// "V" is read from and written to |value|. +// The input "Key" is passed by initializing |hmac1| with it. +// The output "Key" is returned by initializing |out_hmac| with it. +// Returns false on an error. +bool HMAC_DRBG_Update(const std::string& provided_data, + const crypto::HMAC& hmac1, + ByteVector* value, + crypto::HMAC* out_hmac) { + // HMAC_DRBG Update Process + crypto::HMAC temp_hmac(crypto::HMAC::SHA256); + crypto::HMAC* hmac2 = provided_data.size() > 0 ? &temp_hmac : out_hmac; + // 1. K = HMAC(K, V || 0x00 || provided_data) + if (!HMAC_Rotate(hmac1, Concat(*value, 0x00, provided_data), hmac2)) + return false; + // 2. V = HMAC(K, V) + if (!HMAC_Rehash(*hmac2, value)) + return false; + // 3. If (provided_data = Null), then return K and V. + if (hmac2 == out_hmac) + return true; + // 4. K = HMAC(K, V || 0x01 || provided_data) + if (!HMAC_Rotate(*hmac2, Concat(*value, 0x01, provided_data), out_hmac)) + return false; + // 5. V = HMAC(K, V) + return HMAC_Rehash(*out_hmac, value); +} + +} // namespace + +ByteVector* ByteVectorOr(const ByteVector& lhs, ByteVector* rhs) { + DCHECK_EQ(lhs.size(), rhs->size()); + for (size_t i = 0, len = lhs.size(); i < len; ++i) { + (*rhs)[i] = lhs[i] | (*rhs)[i]; + } + return rhs; +} + +ByteVector* ByteVectorMerge(const ByteVector& mask, + const ByteVector& lhs, + ByteVector* rhs) { + DCHECK_EQ(lhs.size(), rhs->size()); + for (size_t i = 0, len = lhs.size(); i < len; ++i) { + (*rhs)[i] = (lhs[i] & ~mask[i]) | ((*rhs)[i] & mask[i]); + } + return rhs; +} + +int CountBits(const ByteVector& vector) { + int bit_count = 0; + for (size_t i = 0; i < vector.size(); ++i) { + uint8_t byte = vector[i]; + for (int j = 0; j < 8 ; ++j) { + if (byte & (1 << j)) + bit_count++; + } + } + return bit_count; +} + +ByteVectorGenerator::ByteVectorGenerator(size_t byte_count) + : byte_count_(byte_count) {} + +ByteVectorGenerator::~ByteVectorGenerator() {} + +ByteVector ByteVectorGenerator::GetRandomByteVector() { + ByteVector bytes(byte_count_); + crypto::RandBytes(&bytes[0], bytes.size()); + return bytes; +} + +ByteVector ByteVectorGenerator::GetWeightedRandomByteVector( + Probability probability) { + ByteVector bytes = GetRandomByteVector(); + switch (probability) { + case PROBABILITY_75: + return *ByteVectorOr(GetRandomByteVector(), &bytes); + case PROBABILITY_50: + return bytes; + } + NOTREACHED(); + return bytes; +} + +HmacByteVectorGenerator::HmacByteVectorGenerator( + size_t byte_count, + const std::string& entropy_input, + const std::string& personalization_string) + : ByteVectorGenerator(byte_count), + hmac_(crypto::HMAC::SHA256), + value_(hmac_.DigestLength(), 0x01), + generated_bytes_(0) { + // HMAC_DRBG Instantiate Process + // See: http://csrc.nist.gov/publications/nistpubs/800-90A/SP800-90A.pdf + // 1. seed_material = entropy_input + nonce + personalization_string + // Note: We are using the 8.6.7 interpretation, where the entropy_input and + // nonce are acquired at the same time from the same source. + DCHECK_EQ(kEntropyInputSize, entropy_input.size()); + std::string seed_material(entropy_input + personalization_string); + // 2. Key = 0x00 00...00 + crypto::HMAC hmac1(crypto::HMAC::SHA256); + if (!hmac1.Init(std::string(hmac_.DigestLength(), 0x00))) + NOTREACHED(); + // 3. V = 0x01 01...01 + // (value_ in initializer list) + + // 4. (Key, V) = HMAC_DRBG_Update(seed_material, Key, V) + if (!HMAC_DRBG_Update(seed_material, hmac1, &value_, &hmac_)) + NOTREACHED(); +} + +HmacByteVectorGenerator::~HmacByteVectorGenerator() {} + +HmacByteVectorGenerator::HmacByteVectorGenerator( + const HmacByteVectorGenerator& prev_request) + : ByteVectorGenerator(prev_request.byte_count()), + hmac_(crypto::HMAC::SHA256), + value_(prev_request.value_), + generated_bytes_(0) { + if (!HMAC_DRBG_Update("", prev_request.hmac_, &value_, &hmac_)) + NOTREACHED(); +} + +// HMAC_DRBG requires entropy input to be security_strength bits long, +// and nonce to be at least 1/2 security_strength bits long. We +// generate them both as a single "extra strong" entropy input. +// max_security_strength for SHA256 is 256 bits. +// See: http://csrc.nist.gov/publications/nistpubs/800-90A/SP800-90A.pdf +const size_t HmacByteVectorGenerator::kEntropyInputSize = (256 / 8) * 3 / 2; + +// static +std::string HmacByteVectorGenerator::GenerateEntropyInput() { + return base::RandBytesAsString(kEntropyInputSize); +} + +ByteVector HmacByteVectorGenerator::GetRandomByteVector() { + // Streams bytes from HMAC_DRBG_Generate + // See: http://csrc.nist.gov/publications/nistpubs/800-90A/SP800-90A.pdf + const size_t digest_length = hmac_.DigestLength(); + DCHECK_EQ(value_.size(), digest_length); + ByteVector bytes(byte_count()); + uint8_t* data = &bytes[0]; + size_t bytes_to_go = byte_count(); + while (bytes_to_go > 0) { + size_t requested_byte_in_digest = generated_bytes_ % digest_length; + if (requested_byte_in_digest == 0) { + // Do step 4.1 of the HMAC_DRBG Generate Process for more bits. + // V = HMAC(Key, V) + if (!HMAC_Rehash(hmac_, &value_)) + NOTREACHED(); + } + size_t n = std::min(bytes_to_go, + digest_length - requested_byte_in_digest); + memcpy(data, &value_[requested_byte_in_digest], n); + data += n; + bytes_to_go -= n; + generated_bytes_ += n; + // Check max_number_of_bits_per_request from 10.1 Table 2 + // max_number_of_bits_per_request == 2^19 bits == 2^16 bytes + DCHECK_LT(generated_bytes_, 1U << 16); + } + return bytes; +} + +} // namespace rappor diff --git a/components/rappor/byte_vector_utils.h b/components/rappor/byte_vector_utils.h new file mode 100644 index 0000000..9154cab --- /dev/null +++ b/components/rappor/byte_vector_utils.h @@ -0,0 +1,110 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_RAPPOR_BYTE_VECTOR_UTILS_H_ +#define COMPONENTS_RAPPOR_BYTE_VECTOR_UTILS_H_ + +#include <vector> + +#include "base/basictypes.h" +#include "components/rappor/rappor_parameters.h" +#include "crypto/hmac.h" + +namespace rappor { + +// A vector of 8-bit integers used to store a set of binary bits. +typedef std::vector<uint8_t> ByteVector; + +// Computes a bitwise OR of byte vectors and stores the result in rhs. +// Returns rhs for chaining. +ByteVector* ByteVectorOr(const ByteVector& lhs, ByteVector* rhs); + +// Merges the contents of lhs and rhs vectors according to a mask vector. +// The i-th bit of the result vector will be the i-th bit of either the lhs +// or rhs vector, based on the i-th bit of the mask vector. +// Equivalent to (lhs & ~mask) | (rhs & mask). Stores the result in rhs. +// Returns rhs for chaining. +ByteVector* ByteVectorMerge(const ByteVector& mask, + const ByteVector& lhs, + ByteVector* rhs); + +// Counts the number of bits set in the byte vector. +int CountBits(const ByteVector& vector); + +// A utility object for generating random binary data with different +// likelihood of bits being true, using entropy from crypto::RandBytes(). +class ByteVectorGenerator { + public: + explicit ByteVectorGenerator(size_t byte_count); + + ~ByteVectorGenerator(); + + // Generates a random byte vector where the bits are independent random + // variables which are true with the given |probability|. + ByteVector GetWeightedRandomByteVector(Probability probability); + + protected: + // Size of vectors to be generated. + size_t byte_count() const { return byte_count_; } + + // Generates a random vector of bytes from a uniform distribution. + virtual ByteVector GetRandomByteVector(); + + private: + size_t byte_count_; + + DISALLOW_COPY_AND_ASSIGN(ByteVectorGenerator); +}; + +// A ByteVectorGenerator that uses a psuedo-random function to generate a +// deterministically random bits. This class only implements a single request +// from HMAC_DRBG and streams up to 2^19 bits from that request. +// Ref: http://csrc.nist.gov/publications/nistpubs/800-90A/SP800-90A.pdf +// We're using our own PRNG instead of crypto::RandBytes because we need to +// generate a repeatable sequence of bits from the same seed. Conservatively, +// we're choosing to use HMAC_DRBG here, as it is one of the best studied +// and standardized ways of generating deterministic, unpredictable sequences +// based on a secret seed. +class HmacByteVectorGenerator : public ByteVectorGenerator { + public: + // Constructor takes the size of the vector to generate, along with a + // |entropy_input| and |personalization_string| to seed the pseudo-random + // number generator. The string parameters are treated as byte arrays. + HmacByteVectorGenerator(size_t byte_count, + const std::string& entropy_input, + const std::string& personalization_string); + + ~HmacByteVectorGenerator(); + + // Generates a random string suitable for passing to the constructor as + // |entropy_input|. + static std::string GenerateEntropyInput(); + + // Key size required for 128-bit security strength (including nonce). + static const size_t kEntropyInputSize; + + protected: + // Generate byte vector generator that streams from the next request instead + // of the current one. For testing against NIST test vectors only. + explicit HmacByteVectorGenerator(const HmacByteVectorGenerator& prev_request); + + // ByteVector implementation: + virtual ByteVector GetRandomByteVector() OVERRIDE; + + private: + // HMAC initalized with the value of "Key" HMAC_DRBG_Initialize. + crypto::HMAC hmac_; + + // The "V" value from HMAC_DRBG. + ByteVector value_; + + // Total number of bytes streamed from the HMAC_DRBG Generate Process. + size_t generated_bytes_; + + DISALLOW_ASSIGN(HmacByteVectorGenerator); +}; + +} // namespace rappor + +#endif // COMPONENTS_RAPPOR_BYTE_VECTOR_UTILS_H_ diff --git a/components/rappor/byte_vector_utils_unittest.cc b/components/rappor/byte_vector_utils_unittest.cc new file mode 100644 index 0000000..54df9ea --- /dev/null +++ b/components/rappor/byte_vector_utils_unittest.cc @@ -0,0 +1,128 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/byte_vector_utils.h" + +#include "base/rand_util.h" +#include "base/strings/string_number_conversions.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace rappor { + +namespace { + +class SecondRequest : public HmacByteVectorGenerator { + public: + SecondRequest(const HmacByteVectorGenerator& first_request) + : HmacByteVectorGenerator(first_request) {} +}; + +std::string HexToString(const char* hex) { + ByteVector bv; + base::HexStringToBytes(hex, &bv); + return std::string(bv.begin(), bv.end()); +} + +} // namespace + +TEST(ByteVectorTest, ByteVectorOr) { + ByteVector lhs(2); + lhs[1] = 0x12; + ByteVector rhs(2); + rhs[1] = 0x03; + + EXPECT_EQ(0x13, (*ByteVectorOr(lhs, &rhs))[1]); +} + +TEST(ByteVectorTest, ByteVectorMerge) { + ByteVector lhs(2); + lhs[1] = 0x33; + ByteVector rhs(2); + rhs[1] = 0x55; + ByteVector mask(2); + mask[1] = 0x0f; + + EXPECT_EQ(0x35, (*ByteVectorMerge(mask, lhs, &rhs))[1]); +} + +TEST(ByteVectorTest, ByteVectorGenerator) { + ByteVectorGenerator generator(2u); + ByteVector random_50 = generator.GetWeightedRandomByteVector(PROBABILITY_50); + EXPECT_EQ(random_50.size(), 2u); + ByteVector random_75 = generator.GetWeightedRandomByteVector(PROBABILITY_75); + EXPECT_EQ(random_75.size(), 2u); +} + +TEST(ByteVectorTest, HmacByteVectorGenerator) { + HmacByteVectorGenerator generator(1u, + std::string(HmacByteVectorGenerator::kEntropyInputSize, 0x00), ""); + ByteVector random_50 = generator.GetWeightedRandomByteVector(PROBABILITY_50); + EXPECT_EQ(random_50.size(), 1u); + EXPECT_EQ(random_50[0], 0x0B); + ByteVector random_75 = generator.GetWeightedRandomByteVector(PROBABILITY_75); + EXPECT_EQ(random_75.size(), 1u); + EXPECT_EQ(random_75[0], 0xdf); +} + +TEST(ByteVectorTest, HmacNist) { + // Test case 0 for SHA-256 HMAC_DRBG no reseed tests from + // http://csrc.nist.gov/groups/STM/cavp/ + const char entropy[] = + "ca851911349384bffe89de1cbdc46e6831e44d34a4fb935ee285dd14b71a7488"; + const char nonce[] = "659ba96c601dc69fc902940805ec0ca8"; + const char expected[] = + "e528e9abf2dece54d47c7e75e5fe302149f817ea9fb4bee6f4199697d04d5b89" + "d54fbb978a15b5c443c9ec21036d2460b6f73ebad0dc2aba6e624abf07745bc1" + "07694bb7547bb0995f70de25d6b29e2d3011bb19d27676c07162c8b5ccde0668" + "961df86803482cb37ed6d5c0bb8d50cf1f50d476aa0458bdaba806f48be9dcb8"; + + std::string entropy_input = HexToString(entropy) + HexToString(nonce); + HmacByteVectorGenerator generator(1024u / 8, entropy_input, ""); + generator.GetWeightedRandomByteVector(PROBABILITY_50); + SecondRequest generator2(generator); + ByteVector random_50 = generator2.GetWeightedRandomByteVector(PROBABILITY_50); + + EXPECT_EQ(HexToString(expected), + std::string(random_50.begin(), random_50.end())); +} + +TEST(ByteVectorTest, WeightedRandomStatistics50) { + ByteVectorGenerator generator(50u); + ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_50); + int bit_count = CountBits(random); + // Check bounds on bit counts that are true with 99.999% probability. + EXPECT_GT(bit_count, 155); // Binomial(400, .5) CDF(155) ~= 0.000004 + EXPECT_LE(bit_count, 244); // Binomial(400, .5) CDF(244) ~= 0.999996 +} + +TEST(ByteVectorTest, WeightedRandomStatistics75) { + ByteVectorGenerator generator(50u); + ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_75); + int bit_count = CountBits(random); + // Check bounds on bit counts that are true with 99.999% probability. + EXPECT_GT(bit_count, 259); // Binomial(400, .75) CDF(259) ~= 0.000003 + EXPECT_LE(bit_count, 337); // Binomial(400, .75) CDF(337) ~= 0.999997 +} + +TEST(ByteVectorTest, HmacWeightedRandomStatistics50) { + HmacByteVectorGenerator generator(50u, + HmacByteVectorGenerator::GenerateEntropyInput(), ""); + ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_50); + int bit_count = CountBits(random); + // Check bounds on bit counts that are true with 99.999% probability. + EXPECT_GT(bit_count, 155); // Binomial(400, .5) CDF(155) ~= 0.000004 + EXPECT_LE(bit_count, 244); // Binomial(400, .5) CDF(244) ~= 0.999996 +} + +TEST(ByteVectorTest, HmacWeightedRandomStatistics75) { + HmacByteVectorGenerator generator(50u, + HmacByteVectorGenerator::GenerateEntropyInput(), ""); + ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_75); + int bit_count = CountBits(random); + // Check bounds on bit counts that are true with 99.999% probability. + EXPECT_GT(bit_count, 259); // Binomial(400, .75) CDF(259) ~= 0.000003 + EXPECT_LE(bit_count, 337); // Binomial(400, .75) CDF(337) ~= 0.999997 +} + +} // namespace rappor diff --git a/components/rappor/log_uploader.cc b/components/rappor/log_uploader.cc new file mode 100644 index 0000000..4837ba1 --- /dev/null +++ b/components/rappor/log_uploader.cc @@ -0,0 +1,149 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/log_uploader.h" + +#include "base/metrics/histogram.h" +#include "base/metrics/sparse_histogram.h" +#include "net/base/load_flags.h" +#include "net/url_request/url_fetcher.h" + +namespace { + +// The delay, in seconds, between uploading when there are queued logs to send. +const int kUnsentLogsIntervalSeconds = 3; + +// When uploading metrics to the server fails, we progressively wait longer and +// longer before sending the next log. This backoff process helps reduce load +// on a server that is having issues. +// The following is the multiplier we use to expand that inter-log duration. +const double kBackoffMultiplier = 1.1; + +// The maximum backoff multiplier. +const int kMaxBackoffIntervalSeconds = 60 * 60; + +// The maximum number of unsent logs we will keep. +// TODO(holte): Limit based on log size instead. +const size_t kMaxQueuedLogs = 10; + +enum DiscardReason { + UPLOAD_SUCCESS, + UPLOAD_REJECTED, + QUEUE_OVERFLOW, + NUM_DISCARD_REASONS +}; + +} // namespace + +namespace rappor { + +LogUploader::LogUploader(const GURL& server_url, + const std::string& mime_type, + net::URLRequestContextGetter* request_context) + : server_url_(server_url), + mime_type_(mime_type), + request_context_(request_context), + has_callback_pending_(false), + upload_interval_(base::TimeDelta::FromSeconds( + kUnsentLogsIntervalSeconds)) { +} + +LogUploader::~LogUploader() {} + +void LogUploader::QueueLog(const std::string& log) { + queued_logs_.push(log); + if (!IsUploadScheduled() && !has_callback_pending_) + StartScheduledUpload(); +} + +bool LogUploader::IsUploadScheduled() const { + return upload_timer_.IsRunning(); +} + +void LogUploader::ScheduleNextUpload(base::TimeDelta interval) { + if (IsUploadScheduled() || has_callback_pending_) + return; + + upload_timer_.Start( + FROM_HERE, interval, this, &LogUploader::StartScheduledUpload); +} + +void LogUploader::StartScheduledUpload() { + DCHECK(!has_callback_pending_); + has_callback_pending_ = true; + current_fetch_.reset( + net::URLFetcher::Create(server_url_, net::URLFetcher::POST, this)); + current_fetch_->SetRequestContext(request_context_.get()); + current_fetch_->SetUploadData(mime_type_, queued_logs_.front()); + + // We already drop cookies server-side, but we might as well strip them out + // client-side as well. + current_fetch_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES | + net::LOAD_DO_NOT_SEND_COOKIES); + current_fetch_->Start(); +} + +// static +base::TimeDelta LogUploader::BackOffUploadInterval(base::TimeDelta interval) { + DCHECK_GT(kBackoffMultiplier, 1.0); + interval = base::TimeDelta::FromMicroseconds(static_cast<int64>( + kBackoffMultiplier * interval.InMicroseconds())); + + base::TimeDelta max_interval = + base::TimeDelta::FromSeconds(kMaxBackoffIntervalSeconds); + return interval > max_interval ? max_interval : interval; +} + +void LogUploader::OnURLFetchComplete(const net::URLFetcher* source) { + // We're not allowed to re-use the existing |URLFetcher|s, so free them here. + // Note however that |source| is aliased to the fetcher, so we should be + // careful not to delete it too early. + DCHECK_EQ(current_fetch_.get(), source); + scoped_ptr<net::URLFetcher> fetch(current_fetch_.Pass()); + + int response_code = source->GetResponseCode(); + + // Log a histogram to track response success vs. failure rates. + UMA_HISTOGRAM_SPARSE_SLOWLY("Rappor.UploadResponseCode", response_code); + + bool upload_succeeded = response_code == 200; + + // Determine whether this log should be retransmitted. + DiscardReason reason = NUM_DISCARD_REASONS; + if (upload_succeeded) { + reason = UPLOAD_SUCCESS; + } else if (response_code == 400) { + reason = UPLOAD_REJECTED; + } else if (queued_logs_.size() > kMaxQueuedLogs) { + reason = QUEUE_OVERFLOW; + } + + if (reason != NUM_DISCARD_REASONS) { + UMA_HISTOGRAM_ENUMERATION("Rappor.DiscardReason", + reason, + NUM_DISCARD_REASONS); + queued_logs_.pop(); + } + + // Error 400 indicates a problem with the log, not with the server, so + // don't consider that a sign that the server is in trouble. + bool server_is_healthy = upload_succeeded || response_code == 400; + OnUploadFinished(server_is_healthy, !queued_logs_.empty()); +} + +void LogUploader::OnUploadFinished(bool server_is_healthy, + bool more_logs_remaining) { + DCHECK(has_callback_pending_); + has_callback_pending_ = false; + // If the server is having issues, back off. Otherwise, reset to default. + if (!server_is_healthy) + upload_interval_ = BackOffUploadInterval(upload_interval_); + else + upload_interval_ = base::TimeDelta::FromSeconds(kUnsentLogsIntervalSeconds); + + if (more_logs_remaining) + ScheduleNextUpload(upload_interval_); +} + +} // namespace rappor diff --git a/components/rappor/log_uploader.h b/components/rappor/log_uploader.h new file mode 100644 index 0000000..30a129a --- /dev/null +++ b/components/rappor/log_uploader.h @@ -0,0 +1,94 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_RAPPOR_LOG_UPLOADER_H_ +#define COMPONENTS_RAPPOR_LOG_UPLOADER_H_ + +#include <queue> +#include <string> + +#include "base/memory/scoped_ptr.h" +#include "base/time/time.h" +#include "base/timer/timer.h" +#include "net/url_request/url_fetcher_delegate.h" +#include "net/url_request/url_request_context_getter.h" +#include "url/gurl.h" + +namespace net { +class URLFetcher; +} + +namespace rappor { + +// Handles uploading logs to an external server. +class LogUploader : public net::URLFetcherDelegate { + public: + // Constructor takes the server_url that logs should be uploaded to, the + // mime_type of the uploaded data, and request_context to create uploads + // with. + LogUploader(const GURL& server_url, + const std::string& mime_type, + net::URLRequestContextGetter* request_context); + + virtual ~LogUploader(); + + // Adds an entry to the queue of logs to be uploaded to the server. The + // uploader makes no assumptions about the format of |log| and simply sends + // it verbatim to the server. + void QueueLog(const std::string& log); + + protected: + // Check if an upload has been scheduled. + virtual bool IsUploadScheduled() const; + + // Schedules a future call to StartScheduledUpload if one isn't already + // pending. Can be overridden for testing. + virtual void ScheduleNextUpload(base::TimeDelta interval); + + // Starts transmission of the next log. Exposed for tests. + void StartScheduledUpload(); + + // Increases the upload interval each time it's called, to handle the case + // where the server is having issues. Exposed for tests. + static base::TimeDelta BackOffUploadInterval(base::TimeDelta); + + private: + // Implementation of net::URLFetcherDelegate. Called after transmission + // completes (either successfully or with failure). + virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE; + + // Called when the upload is completed. + void OnUploadFinished(bool server_is_healthy, bool more_logs_remaining); + + // The server URL to upload logs to. + const GURL server_url_; + + // The mime type to specify on uploaded logs. + const std::string mime_type_; + + // The request context used to send uploads. + scoped_refptr<net::URLRequestContextGetter> request_context_; + + // The outstanding transmission that appears as a URL Fetch operation. + scoped_ptr<net::URLFetcher> current_fetch_; + + // The logs that still need to be uploaded. + std::queue<std::string> queued_logs_; + + // A timer used to delay before attempting another upload. + base::OneShotTimer<LogUploader> upload_timer_; + + // Indicates that the last triggered upload hasn't resolved yet. + bool has_callback_pending_; + + // The interval to wait after an upload's URLFetcher completion before + // starting the next upload attempt. + base::TimeDelta upload_interval_; + + DISALLOW_COPY_AND_ASSIGN(LogUploader); +}; + +} // namespace rappor + +#endif // COMPONENTS_RAPPOR_LOG_UPLOADER_H_ diff --git a/components/rappor/log_uploader_unittest.cc b/components/rappor/log_uploader_unittest.cc new file mode 100644 index 0000000..46db7db --- /dev/null +++ b/components/rappor/log_uploader_unittest.cc @@ -0,0 +1,153 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/log_uploader.h" + +#include "base/message_loop/message_loop_proxy.h" +#include "net/url_request/test_url_fetcher_factory.h" +#include "net/url_request/url_request_test_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace rappor { + +namespace { + +const char kTestServerURL[] = "http://a.com/"; +const char kTestMimeType[] = "text/plain"; + +class TestLogUploader : public LogUploader { + public: + TestLogUploader(net::URLRequestContextGetter* request_context) : + LogUploader(GURL(kTestServerURL), kTestMimeType, request_context) { + } + + base::TimeDelta last_interval_set() const { return last_interval_set_; }; + + void StartUpload() { + last_interval_set_ = base::TimeDelta(); + StartScheduledUpload(); + } + + static base::TimeDelta BackOff(base::TimeDelta t) { + return LogUploader::BackOffUploadInterval(t); + } + + protected: + virtual bool IsUploadScheduled() const OVERRIDE { + return last_interval_set() != base::TimeDelta(); + } + + // Schedules a future call to StartScheduledUpload if one isn't already + // pending. + virtual void ScheduleNextUpload(base::TimeDelta interval) OVERRIDE { + EXPECT_EQ(last_interval_set(), base::TimeDelta()); + last_interval_set_ = interval; + } + + base::TimeDelta last_interval_set_; + + DISALLOW_COPY_AND_ASSIGN(TestLogUploader); +}; + +} // namespace + +class LogUploaderTest : public testing::Test { + public: + LogUploaderTest() + : request_context_(new net::TestURLRequestContextGetter( + base::MessageLoopProxy::current())), + factory_(NULL) {} + + protected: + // Required for base::MessageLoopProxy::current(). + base::MessageLoopForUI loop_; + scoped_refptr<net::TestURLRequestContextGetter> request_context_; + net::FakeURLFetcherFactory factory_; + + DISALLOW_COPY_AND_ASSIGN(LogUploaderTest); +}; + +TEST_F(LogUploaderTest, Success) { + TestLogUploader uploader(request_context_); + + factory_.SetFakeResponse(GURL(kTestServerURL), + std::string(), + net::HTTP_OK, + net::URLRequestStatus::SUCCESS); + + uploader.QueueLog("log1"); + base::MessageLoop::current()->RunUntilIdle(); + // Log should be discarded instead of retransmitted. + EXPECT_EQ(uploader.last_interval_set(), base::TimeDelta()); +} + +TEST_F(LogUploaderTest, Rejection) { + TestLogUploader uploader(request_context_); + + factory_.SetFakeResponse(GURL(kTestServerURL), + std::string(), + net::HTTP_BAD_REQUEST, + net::URLRequestStatus::SUCCESS); + + uploader.QueueLog("log1"); + base::MessageLoop::current()->RunUntilIdle(); + // Log should be discarded instead of retransmitted. + EXPECT_EQ(uploader.last_interval_set(), base::TimeDelta()); +} + +TEST_F(LogUploaderTest, Failure) { + TestLogUploader uploader(request_context_); + + factory_.SetFakeResponse(GURL(kTestServerURL), + std::string(), + net::HTTP_INTERNAL_SERVER_ERROR, + net::URLRequestStatus::SUCCESS); + + uploader.QueueLog("log1"); + base::MessageLoop::current()->RunUntilIdle(); + // Log should be scheduled for retransmission. + base::TimeDelta error_interval = uploader.last_interval_set(); + EXPECT_GT(error_interval, base::TimeDelta()); + + for (int i = 0; i < 10; i++) { + uploader.QueueLog("logX"); + } + + // A second failure should lead to a longer interval, and the log should + // be discarded due to full queue. + uploader.StartUpload(); + base::MessageLoop::current()->RunUntilIdle(); + EXPECT_GT(uploader.last_interval_set(), error_interval); + + factory_.SetFakeResponse(GURL(kTestServerURL), + std::string(), + net::HTTP_OK, + net::URLRequestStatus::SUCCESS); + + // A success should revert to base interval while queue is not empty. + for (int i = 0; i < 9; i++) { + uploader.StartUpload(); + base::MessageLoop::current()->RunUntilIdle(); + EXPECT_LT(uploader.last_interval_set(), error_interval); + } + + // Queue should be empty. + uploader.StartUpload(); + base::MessageLoop::current()->RunUntilIdle(); + EXPECT_EQ(uploader.last_interval_set(), base::TimeDelta()); +} + +TEST_F(LogUploaderTest, Backoff) { + base::TimeDelta current = base::TimeDelta(); + base::TimeDelta next = base::TimeDelta::FromSeconds(1); + // Backoff until the maximum is reached. + while (next > current) { + current = next; + next = TestLogUploader::BackOff(current); + } + // Maximum backoff should have been reached. + EXPECT_EQ(next, current); +} + +} // namespace rappor diff --git a/components/rappor/proto/rappor_metric.proto b/components/rappor/proto/rappor_metric.proto new file mode 100644 index 0000000..e5efe21 --- /dev/null +++ b/components/rappor/proto/rappor_metric.proto @@ -0,0 +1,39 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Contains information collected by the RAPPOR (Randomized Aggregatable +// Privacy-Preserving Ordinal Responses) system. +// +// For a full description of the rappor metrics, see +// http://www.chromium.org/developers/design-documents/rappor + +syntax = "proto2"; + +option optimize_for = LITE_RUNTIME; + +package rappor; + +// Next tag: 3 +message RapporReports { + // Which cohort these reports belong to. The RAPPOR participants are + // partioned into cohorts in different ways, to allow better statistics and + // increased coverage. In particular, the cohort will serve to choose the + // hash functions used for Bloom-filter-based reports. The cohort is + // generated randomly by the client and is currently in the range [0,32). + optional int32 cohort = 1; + + // Each Report contains the values generated by the RAPPOR process for one + // metric. + message Report { + // The name of the metric, hashed (first 8 bytes of MD5 hash). + optional fixed64 name_hash = 1; + + // The sequence of bits produced by random coin flips in + // RapporMetric::GetReport(). For a complete description of RAPPOR + // metrics, refer to the design document at: + // http://www.chromium.org/developers/design-documents/rappor + optional bytes bits = 2; + } + repeated Report report = 2; +} diff --git a/components/rappor/rappor_metric.cc b/components/rappor/rappor_metric.cc new file mode 100644 index 0000000..2da3b31 --- /dev/null +++ b/components/rappor/rappor_metric.cc @@ -0,0 +1,60 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/rappor_metric.h" + +#include "base/logging.h" + +namespace rappor { + +RapporMetric::RapporMetric(const std::string& metric_name, + const RapporParameters& parameters, + int32_t cohort) + : metric_name_(metric_name), + parameters_(parameters), + bloom_filter_(parameters.bloom_filter_size_bytes, + parameters.bloom_filter_hash_function_count, + cohort * parameters.bloom_filter_hash_function_count) { + DCHECK_GE(cohort, 0); +} + +RapporMetric::~RapporMetric() {} + +void RapporMetric::AddSample(const std::string& str) { + bloom_filter_.AddString(str); +} + +ByteVector RapporMetric::GetReport(const std::string& secret) const { + // Generate a deterministically random mask of fake data using the + // client's secret key + real data as a seed. The inclusion of the secret + // in the seed avoids correlations between real and fake data. + // The seed isn't a human-readable string. + std::string personalization_string = metric_name_ + + std::string(bytes().begin(), bytes().end()); + HmacByteVectorGenerator hmac_generator(bytes().size(), secret, + personalization_string); + const ByteVector fake_mask = + hmac_generator.GetWeightedRandomByteVector(parameters().fake_prob); + ByteVector fake_bits = + hmac_generator.GetWeightedRandomByteVector(parameters().fake_one_prob); + + // Redact most of the real data by replacing it with the fake data, hiding + // and limiting the amount of information an individual client reports on. + const ByteVector* fake_and_redacted_bits = + ByteVectorMerge(fake_mask, bytes(), &fake_bits); + + // Generate biased coin flips for each bit. + ByteVectorGenerator coin_generator(bytes().size()); + const ByteVector zero_coins = + coin_generator.GetWeightedRandomByteVector(parameters().zero_coin_prob); + ByteVector one_coins = + coin_generator.GetWeightedRandomByteVector(parameters().one_coin_prob); + + // Create a randomized response report on the fake and redacted data, sending + // the outcome of flipping a zero coin for the zero bits in that data, and of + // flipping a one coin for the one bits in that data, as the final report. + return *ByteVectorMerge(*fake_and_redacted_bits, zero_coins, &one_coins); +} + +} // namespace rappor diff --git a/components/rappor/rappor_metric.h b/components/rappor/rappor_metric.h new file mode 100644 index 0000000..30147c4 --- /dev/null +++ b/components/rappor/rappor_metric.h @@ -0,0 +1,56 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_RAPPOR_RAPPOR_H_ +#define COMPONENTS_RAPPOR_RAPPOR_H_ + +#include <string> + +#include "components/rappor/bloom_filter.h" +#include "components/rappor/byte_vector_utils.h" +#include "components/rappor/rappor_parameters.h" + +namespace rappor { + +// A RapporMetric is an object that collects string samples into a Bloom filter, +// and generates randomized reports about the collected data. +// +// For a full description of the rappor metrics, see +// http://www.chromium.org/developers/design-documents/rappor +class RapporMetric { + public: + // Takes the |metric_name| that this will be reported to the server with, + // a |parameters| describing size and probability weights to be used in + // recording this metric, and cohort value, which modifies the hash + // functions and used in the bloom filter. + explicit RapporMetric(const std::string& metric_name, + const RapporParameters& parameters, + int32_t cohort); + ~RapporMetric(); + + // Records an additional sample in the Bloom filter. + void AddSample(const std::string& str); + + // Retrieves the current Bloom filter bits. + const ByteVector& bytes() const { return bloom_filter_.bytes(); } + + // Gets the parameter values this metric was constructed with. + const RapporParameters& parameters() const { return parameters_; } + + // Generates the bits to report for this metric. Using the secret as a seed, + // randomly selects bits for redaction. Then flips coins to generate the + // final report bits. + ByteVector GetReport(const std::string& secret) const; + + private: + const std::string metric_name_; + const RapporParameters parameters_; + BloomFilter bloom_filter_; + + DISALLOW_COPY_AND_ASSIGN(RapporMetric); +}; + +} // namespace rappor + +#endif // COMPONENTS_RAPPOR_RAPPOR_H_ diff --git a/components/rappor/rappor_metric_unittest.cc b/components/rappor/rappor_metric_unittest.cc new file mode 100644 index 0000000..bbcb1ab --- /dev/null +++ b/components/rappor/rappor_metric_unittest.cc @@ -0,0 +1,97 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/rappor_metric.h" + +#include <stdlib.h> + +#include "base/rand_util.h" +#include "base/strings/stringprintf.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace rappor { + +const RapporParameters kTestRapporParameters = { + 16 /* Bloom filter size bytes */, + 4 /* Bloom filter hash count */, + PROBABILITY_75 /* Fake data probability */, + PROBABILITY_50 /* Fake one probability */, + PROBABILITY_75 /* One coin probability */, + PROBABILITY_50 /* Zero coin probability */ +}; + +const RapporParameters kTestStatsRapporParameters = { + 50 /* Bloom filter size bytes */, + 4 /* Bloom filter hash count */, + PROBABILITY_75 /* Fake data probability */, + PROBABILITY_50 /* Fake one probability */, + PROBABILITY_75 /* One coin probability */, + PROBABILITY_50 /* Zero coin probability */ +}; + +// Check for basic syntax and use. +TEST(RapporMetricTest, BasicMetric) { + RapporMetric testMetric("MyRappor", kTestRapporParameters, 0); + testMetric.AddSample("Foo"); + testMetric.AddSample("Bar"); + EXPECT_EQ(0x80, testMetric.bytes()[1]); +} + +TEST(RapporMetricTest, GetReport) { + RapporMetric metric("MyRappor", kTestRapporParameters, 0); + + ByteVector report = metric.GetReport( + HmacByteVectorGenerator::GenerateEntropyInput()); + EXPECT_EQ(16u, report.size()); +} + +TEST(RapporMetricTest, GetReportStatistics) { + RapporMetric metric("MyStatsRappor", kTestStatsRapporParameters, 0); + + for (char i = 0; i < 50; i++) { + metric.AddSample(base::StringPrintf("%d", i)); + } + ByteVector real_bits = metric.bytes(); + int real_bit_count = CountBits(real_bits); + EXPECT_EQ(real_bit_count, 152); + + std::string secret = HmacByteVectorGenerator::GenerateEntropyInput(); + ByteVector report = metric.GetReport(secret); + + // For the bits we actually set in the bloom filter, get a count of how + // many of them reported true. + ByteVector from_true_reports = report; + // Real bits AND report bits. + ByteVectorMerge(real_bits, real_bits, &from_true_reports); + int true_from_true_count = CountBits(from_true_reports); + + // For the bits we didn't set in the bloom filter, get a count of how + // many of them reported true. + ByteVector from_false_reports = report; + ByteVectorOr(real_bits, &from_false_reports); + int true_from_false_count = CountBits(from_false_reports) - real_bit_count; + + // The probability of a true bit being true after redaction = + // [fake_prob]*[fake_true_prob] + (1-[fake_prob]) = + // .75 * .5 + (1-.75) = .625 + // The probablity of a false bit being true after redaction = + // [fake_prob]*[fake_true_prob] = .375 + // The probability of a bit reporting true = + // [redacted_prob] * [one_coin_prob:.75] + + // (1-[redacted_prob]) * [zero_coin_prob:.5] = + // 0.65625 for true bits + // 0.59375 for false bits + + // stats.binom(152, 0.65625).ppf(0.000005) = 73 + EXPECT_GT(true_from_true_count, 73); + // stats.binom(152, 0.65625).ppf(0.999995) = 124 + EXPECT_LE(true_from_true_count, 124); + + // stats.binom(248, 0.59375).ppf(.000005) = 113 + EXPECT_GT(true_from_false_count, 113); + // stats.binom(248, 0.59375).ppf(.999995) = 181 + EXPECT_LE(true_from_false_count, 181); +} + +} // namespace rappor diff --git a/components/rappor/rappor_parameters.cc b/components/rappor/rappor_parameters.cc new file mode 100644 index 0000000..ede7636 --- /dev/null +++ b/components/rappor/rappor_parameters.cc @@ -0,0 +1,21 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/rappor_parameters.h" + +#include "base/strings/stringprintf.h" + +namespace rappor { + +std::string RapporParameters::ToString() const { + return base::StringPrintf("{ %d, %d, %d, %d, %d, %d }", + bloom_filter_size_bytes, + bloom_filter_hash_function_count, + fake_prob, + fake_one_prob, + one_coin_prob, + zero_coin_prob); +} + +} // namespace rappor diff --git a/components/rappor/rappor_parameters.h b/components/rappor/rappor_parameters.h new file mode 100644 index 0000000..77b1614 --- /dev/null +++ b/components/rappor/rappor_parameters.h @@ -0,0 +1,43 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_RAPPOR_RAPPOR_PARAMETERS_H_ +#define COMPONENTS_RAPPOR_RAPPOR_PARAMETERS_H_ + +#include <string> + +namespace rappor { + +enum Probability { + PROBABILITY_75, // 75% + PROBABILITY_50, // 50% +}; + +// An object describing a rappor metric and the parameters used to generate it. +// +// For a full description of the rappor metrics, see +// http://www.chromium.org/developers/design-documents/rappor +struct RapporParameters { + // Get a string representing the parameters, for DCHECK_EQ. + std::string ToString() const; + + // The number of bytes stored in the Bloom filter. + int bloom_filter_size_bytes; + // The number of hash functions used in the Bloom filter. + int bloom_filter_hash_function_count; + + // The probability that a bit will be redacted with fake data. + Probability fake_prob; + // The probability that a fake bit will be a one. + Probability fake_one_prob; + + // The probability that a one bit in the redacted data reports as one. + Probability one_coin_prob; + // The probability that a zero bit in the redacted data reports as one. + Probability zero_coin_prob; +}; + +} // namespace rappor + +#endif // COMPONENTS_RAPPOR_RAPPOR_PARAMETERS_H_ diff --git a/components/rappor/rappor_pref_names.cc b/components/rappor/rappor_pref_names.cc new file mode 100644 index 0000000..e62c764 --- /dev/null +++ b/components/rappor/rappor_pref_names.cc @@ -0,0 +1,20 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/rappor_pref_names.h" + +namespace rappor { +namespace prefs { + +// A randomly generated number, which determines cohort data is reported for. +const char kRapporCohort[] = "rappor.cohort"; + +// A base-64 encoded, randomly generated byte string, which is used as a seed +// for redacting collected data. +// Important: This value should remain secret at the client, and never be +// reported on the network, or to the server. +const char kRapporSecret[] = "rappor.secret"; + +} // namespace prefs +} // namespace rappor diff --git a/components/rappor/rappor_pref_names.h b/components/rappor/rappor_pref_names.h new file mode 100644 index 0000000..ec42b76 --- /dev/null +++ b/components/rappor/rappor_pref_names.h @@ -0,0 +1,19 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_RAPPOR_RAPPOR_PREF_NAMES_H_ +#define COMPONENTS_RAPPOR_RAPPOR_PREF_NAMES_H_ + +namespace rappor { +namespace prefs { + +// Alphabetical list of preference names specific to the Rappor +// component. Keep alphabetized, and document each in the .cc file. +extern const char kRapporCohort[]; +extern const char kRapporSecret[]; + +} // namespace prefs +} // namespace rappor + +#endif // COMPONENTS_RAPPOR_RAPPOR_PREF_NAMES_H_ diff --git a/components/rappor/rappor_service.cc b/components/rappor/rappor_service.cc new file mode 100644 index 0000000..493e7b9 --- /dev/null +++ b/components/rappor/rappor_service.cc @@ -0,0 +1,187 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/rappor_service.h" + +#include "base/base64.h" +#include "base/metrics/field_trial.h" +#include "base/prefs/pref_registry_simple.h" +#include "base/prefs/pref_service.h" +#include "base/rand_util.h" +#include "base/stl_util.h" +#include "components/metrics/metrics_hashes.h" +#include "components/rappor/proto/rappor_metric.pb.h" +#include "components/rappor/rappor_pref_names.h" +#include "components/variations/variations_associated_data.h" + +namespace rappor { + +namespace { + +// The number of cohorts we divide clients into. +const int kNumCohorts = 32; + +// Seconds before the initial log is generated. +const int kInitialLogIntervalSeconds = 15; +// Interval between ongoing logs. +const int kLogIntervalSeconds = 30 * 60; + +const char kMimeType[] = "application/vnd.chrome.rappor"; + +// Constants for the RAPPOR rollout field trial. +const char kRapporRolloutFieldTrialName[] = "RapporRollout"; + +// Constant for the finch parameter name for the server URL +const char kRapporRolloutServerUrlParam[] = "ServerUrl"; + +GURL GetServerUrl() { + return GURL(chrome_variations::GetVariationParamValue( + kRapporRolloutFieldTrialName, + kRapporRolloutServerUrlParam)); +} + +const RapporParameters kRapporParametersForType[NUM_RAPPOR_TYPES] = { + { // ETLD_PLUS_ONE_RAPPOR_TYPE + 16 /* Bloom filter size bytes */, + 2 /* Bloom filter hash count */, + rappor::PROBABILITY_75 /* Fake data probability */, + rappor::PROBABILITY_50 /* Fake one probability */, + rappor::PROBABILITY_75 /* One coin probability */, + rappor::PROBABILITY_50 /* Zero coin probability */ + }, +}; + +} // namespace + +RapporService::RapporService() : cohort_(-1) {} + +RapporService::~RapporService() { + STLDeleteValues(&metrics_map_); +} + +void RapporService::Start(PrefService* pref_service, + net::URLRequestContextGetter* request_context) { + GURL server_url = GetServerUrl(); + if (!server_url.is_valid()) + return; + DCHECK(!uploader_); + LoadSecret(pref_service); + LoadCohort(pref_service); + uploader_.reset(new LogUploader(server_url, kMimeType, request_context)); + log_rotation_timer_.Start( + FROM_HERE, + base::TimeDelta::FromSeconds(kInitialLogIntervalSeconds), + this, + &RapporService::OnLogInterval); +} + +void RapporService::OnLogInterval() { + DCHECK(uploader_); + RapporReports reports; + if (ExportMetrics(&reports)) { + std::string log_text; + bool success = reports.SerializeToString(&log_text); + DCHECK(success); + uploader_->QueueLog(log_text); + } + log_rotation_timer_.Start(FROM_HERE, + base::TimeDelta::FromSeconds(kLogIntervalSeconds), + this, + &RapporService::OnLogInterval); +} + +// static +void RapporService::RegisterPrefs(PrefRegistrySimple* registry) { + registry->RegisterStringPref(prefs::kRapporSecret, std::string()); + registry->RegisterIntegerPref(prefs::kRapporCohort, -1); +} + +void RapporService::LoadCohort(PrefService* pref_service) { + DCHECK_EQ(cohort_, -1); + cohort_ = pref_service->GetInteger(prefs::kRapporCohort); + if (cohort_ >= 0 && cohort_ < kNumCohorts) + return; + + cohort_ = base::RandGenerator(kNumCohorts); + pref_service->SetInteger(prefs::kRapporCohort, cohort_); +} + +void RapporService::LoadSecret(PrefService* pref_service) { + DCHECK(secret_.empty()); + std::string secret_base64 = + pref_service->GetString(prefs::kRapporSecret); + if (!secret_base64.empty()) { + bool decoded = base::Base64Decode(secret_base64, &secret_); + if (decoded && secret_.size() == HmacByteVectorGenerator::kEntropyInputSize) + return; + // If the preference fails to decode, or is the wrong size, it must be + // corrupt, so continue as though it didn't exist yet and generate a new + // one. + } + + secret_ = HmacByteVectorGenerator::GenerateEntropyInput(); + base::Base64Encode(secret_, &secret_base64); + pref_service->SetString(prefs::kRapporSecret, secret_base64); +} + +bool RapporService::ExportMetrics(RapporReports* reports) { + if (metrics_map_.empty()) + return false; + + DCHECK_GE(cohort_, 0); + reports->set_cohort(cohort_); + + for (std::map<std::string, RapporMetric*>::iterator it = metrics_map_.begin(); + metrics_map_.end() != it; + ++it) { + const RapporMetric* metric = it->second; + RapporReports::Report* report = reports->add_report(); + report->set_name_hash(metrics::HashMetricName(it->first)); + ByteVector bytes = metric->GetReport(secret_); + report->set_bits(std::string(bytes.begin(), bytes.end())); + } + STLDeleteValues(&metrics_map_); + return true; +} + +bool RapporService::IsInitialized() const { + return cohort_ >= 0; +} + +void RapporService::RecordSample(const std::string& metric_name, + RapporType type, + const std::string& sample) { + // Ignore the sample if the service hasn't started yet. + if (!IsInitialized()) + return; + DCHECK_LT(type, NUM_RAPPOR_TYPES); + RecordSampleInternal(metric_name, kRapporParametersForType[type], sample); +} + +void RapporService::RecordSampleInternal(const std::string& metric_name, + const RapporParameters& parameters, + const std::string& sample) { + DCHECK(IsInitialized()); + + RapporMetric* metric = LookUpMetric(metric_name, parameters); + metric->AddSample(sample); +} + +RapporMetric* RapporService::LookUpMetric(const std::string& metric_name, + const RapporParameters& parameters) { + DCHECK(IsInitialized()); + std::map<std::string, RapporMetric*>::iterator it = + metrics_map_.find(metric_name); + if (metrics_map_.end() != it) { + RapporMetric* metric = it->second; + DCHECK_EQ(parameters.ToString(), metric->parameters().ToString()); + return metric; + } + + RapporMetric* new_metric = new RapporMetric(metric_name, parameters, cohort_); + metrics_map_[metric_name] = new_metric; + return new_metric; +} + +} // namespace rappor diff --git a/components/rappor/rappor_service.h b/components/rappor/rappor_service.h new file mode 100644 index 0000000..9e2cd9b --- /dev/null +++ b/components/rappor/rappor_service.h @@ -0,0 +1,109 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_RAPPOR_RAPPOR_SERVICE_H_ +#define COMPONENTS_RAPPOR_RAPPOR_SERVICE_H_ + +#include <string> + +#include "base/basictypes.h" +#include "base/prefs/pref_service.h" +#include "base/time/time.h" +#include "base/timer/timer.h" +#include "components/rappor/log_uploader.h" +#include "components/rappor/proto/rappor_metric.pb.h" +#include "components/rappor/rappor_metric.h" + +class PrefRegistrySimple; + +namespace rappor { + +// The type of data stored in a metric. +enum RapporType { + ETLD_PLUS_ONE_RAPPOR_TYPE = 0, + NUM_RAPPOR_TYPES +}; + +// This class provides an interface for recording samples for rappor metrics, +// and periodically generates and uploads reports based on the collected data. +class RapporService { + public: + RapporService(); + virtual ~RapporService(); + + // Starts the periodic generation of reports and upload attempts. + void Start(PrefService* pref_service, net::URLRequestContextGetter* context); + + // Records a sample of the rappor metric specified by |metric_name|. + // Creates and initializes the metric, if it doesn't yet exist. + void RecordSample(const std::string& metric_name, + RapporType type, + const std::string& sample); + + // Sets the cohort value. For use by tests only. + void SetCohortForTesting(uint32_t cohort) { cohort_ = cohort; } + + // Sets the secret value. For use by tests only. + void SetSecretForTesting(const std::string& secret) { secret_ = secret; } + + // Registers the names of all of the preferences used by RapporService in the + // provided PrefRegistry. This should be called before calling Start(). + static void RegisterPrefs(PrefRegistrySimple* registry); + + protected: + // Logs all of the collected metrics to the reports proto message. Exposed + // for tests. Returns true if any metrics were recorded. + bool ExportMetrics(RapporReports* reports); + + // Records a sample of the rappor metric specified by |parameters|. + // Creates and initializes the metric, if it doesn't yet exist. + // Exposed for tests. + void RecordSampleInternal(const std::string& metric_name, + const RapporParameters& parameters, + const std::string& sample); + + private: + // Check if the service has been started successfully. + bool IsInitialized() const; + + // Retrieves the cohort number this client was assigned to, generating it if + // doesn't already exist. The cohort should be persistent. + void LoadCohort(PrefService* pref_service); + + // Retrieves the value for secret_ from preferences, generating it if doesn't + // already exist. The secret should be persistent, so that additional bits + // from the client do not get exposed over time. + void LoadSecret(PrefService* pref_service); + + // Called whenever the logging interval elapses to generate a new log of + // reports and pass it to the uploader. + void OnLogInterval(); + + // Finds a metric in the metrics_map_, creating it if it doesn't already + // exist. + RapporMetric* LookUpMetric(const std::string& metric_name, + const RapporParameters& parameters); + + // Client-side secret used to generate fake bits. + std::string secret_; + + // The cohort this client is assigned to. -1 is uninitialized. + int32_t cohort_; + + // Timer which schedules calls to OnLogInterval() + base::OneShotTimer<RapporService> log_rotation_timer_; + + // A private LogUploader instance for sending reports to the server. + scoped_ptr<LogUploader> uploader_; + + // We keep all registered metrics in a map, from name to metric. + // The map owns the metrics it contains. + std::map<std::string, RapporMetric*> metrics_map_; + + DISALLOW_COPY_AND_ASSIGN(RapporService); +}; + +} // namespace rappor + +#endif // COMPONENTS_RAPPOR_RAPPOR_SERVICE_H_ diff --git a/components/rappor/rappor_service_unittest.cc b/components/rappor/rappor_service_unittest.cc new file mode 100644 index 0000000..790eae7 --- /dev/null +++ b/components/rappor/rappor_service_unittest.cc @@ -0,0 +1,50 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/rappor/rappor_service.h" + +#include "testing/gtest/include/gtest/gtest.h" + +namespace rappor { + +class TestRapporService : public RapporService { + public: + void GetReports(RapporReports* reports) { + ExportMetrics(reports); + } + void TestRecordSample(const std::string& metric_name, + const RapporParameters& parameters, + const std::string& sample) { + RecordSampleInternal(metric_name, parameters, sample); + } +}; + +TEST(RapporServiceTest, RecordAndExportMetrics) { + const RapporParameters kTestRapporParameters = { + 16 /* Bloom filter size bytes */, + 4 /* Bloom filter hash count */, + PROBABILITY_75 /* Fake data probability */, + PROBABILITY_50 /* Fake one probability */, + PROBABILITY_75 /* One coin probability */, + PROBABILITY_50 /* Zero coin probability */ + }; + + TestRapporService rappor_service; + rappor_service.SetCohortForTesting(0); + rappor_service.SetSecretForTesting( + HmacByteVectorGenerator::GenerateEntropyInput()); + + rappor_service.TestRecordSample("MyMetric", kTestRapporParameters, "foo"); + rappor_service.TestRecordSample("MyMetric", kTestRapporParameters, "bar"); + + RapporReports reports; + rappor_service.GetReports(&reports); + EXPECT_EQ(1, reports.report_size()); + + const RapporReports::Report& report = reports.report(0); + EXPECT_TRUE(report.name_hash()); + EXPECT_EQ(16u, report.bits().size()); +} + +} // namespace rappor |