summaryrefslogtreecommitdiffstats
path: root/components/rappor
diff options
context:
space:
mode:
authorholte@chromium.org <holte@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-02-21 04:06:10 +0000
committerholte@chromium.org <holte@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-02-21 04:06:10 +0000
commit2a172e455f7060e0786abd37de53a39e21aa5b5e (patch)
treedd883647e4c6931f92123fe44e738a93f49b924e /components/rappor
parent0433b638d68d751a87134b026034e2e46bef4f7b (diff)
downloadchromium_src-2a172e455f7060e0786abd37de53a39e21aa5b5e.zip
chromium_src-2a172e455f7060e0786abd37de53a39e21aa5b5e.tar.gz
chromium_src-2a172e455f7060e0786abd37de53a39e21aa5b5e.tar.bz2
Implementation of Randomized Aggregatable Privacy-Preserving Ordinal Responses (RAPPORs).
See the design doc at http://www.chromium.org/developers/design-documents/rappor BUG=328168 TBR=darin@chromium.org Review URL: https://codereview.chromium.org/49753002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@252492 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'components/rappor')
-rw-r--r--components/rappor/DEPS7
-rw-r--r--components/rappor/OWNERS2
-rw-r--r--components/rappor/bloom_filter.cc38
-rw-r--r--components/rappor/bloom_filter.h48
-rw-r--r--components/rappor/bloom_filter_unittest.cc54
-rw-r--r--components/rappor/byte_vector_utils.cc215
-rw-r--r--components/rappor/byte_vector_utils.h110
-rw-r--r--components/rappor/byte_vector_utils_unittest.cc128
-rw-r--r--components/rappor/log_uploader.cc149
-rw-r--r--components/rappor/log_uploader.h94
-rw-r--r--components/rappor/log_uploader_unittest.cc153
-rw-r--r--components/rappor/proto/rappor_metric.proto39
-rw-r--r--components/rappor/rappor_metric.cc60
-rw-r--r--components/rappor/rappor_metric.h56
-rw-r--r--components/rappor/rappor_metric_unittest.cc97
-rw-r--r--components/rappor/rappor_parameters.cc21
-rw-r--r--components/rappor/rappor_parameters.h43
-rw-r--r--components/rappor/rappor_pref_names.cc20
-rw-r--r--components/rappor/rappor_pref_names.h19
-rw-r--r--components/rappor/rappor_service.cc187
-rw-r--r--components/rappor/rappor_service.h109
-rw-r--r--components/rappor/rappor_service_unittest.cc50
22 files changed, 1699 insertions, 0 deletions
diff --git a/components/rappor/DEPS b/components/rappor/DEPS
new file mode 100644
index 0000000..14115d9
--- /dev/null
+++ b/components/rappor/DEPS
@@ -0,0 +1,7 @@
+include_rules = [
+ "+components/metrics",
+ "+components/variations",
+ "+crypto",
+ "+net",
+ "+third_party/smhasher",
+]
diff --git a/components/rappor/OWNERS b/components/rappor/OWNERS
new file mode 100644
index 0000000..8d96ade
--- /dev/null
+++ b/components/rappor/OWNERS
@@ -0,0 +1,2 @@
+asvitkine@chromium.org
+# Please include holte@ on reviews
diff --git a/components/rappor/bloom_filter.cc b/components/rappor/bloom_filter.cc
new file mode 100644
index 0000000..2880ae0
--- /dev/null
+++ b/components/rappor/bloom_filter.cc
@@ -0,0 +1,38 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/bloom_filter.h"
+
+#include "base/logging.h"
+
+// TODO(holte): we can't include "City.h" due to typedef conflicts.
+extern uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
+
+namespace rappor {
+
+BloomFilter::BloomFilter(uint32_t bytes_size,
+ uint32_t hash_function_count,
+ uint32_t hash_seed_offset)
+ : bytes_(bytes_size),
+ hash_function_count_(hash_function_count),
+ hash_seed_offset_(hash_seed_offset) {
+ DCHECK_GT(bytes_size, 0u);
+}
+
+BloomFilter::~BloomFilter() {}
+
+void BloomFilter::AddString(const std::string& str) {
+ for (size_t i = 0; i < hash_function_count_; ++i) {
+ // Using CityHash here because we have support for it in Dremel. Many hash
+ // functions, such as MD5, SHA1, or Murmur, would probably also work.
+ uint32_t index =
+ CityHash64WithSeed(str.data(), str.size(), hash_seed_offset_ + i);
+ // Note that the "bytes" are uint8_t, so they are always 8-bits.
+ uint32_t byte_index = (index / 8) % bytes_.size();
+ uint32_t bit_index = index % 8;
+ bytes_[byte_index] |= 1 << bit_index;
+ }
+}
+
+} // namespace rappor
diff --git a/components/rappor/bloom_filter.h b/components/rappor/bloom_filter.h
new file mode 100644
index 0000000..0bb9a57
--- /dev/null
+++ b/components/rappor/bloom_filter.h
@@ -0,0 +1,48 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_RAPPOR_BLOOM_FILTER_H_
+#define COMPONENTS_RAPPOR_BLOOM_FILTER_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "components/rappor/byte_vector_utils.h"
+
+namespace rappor {
+
+// BloomFilter is a simple Bloom filter for keeping track of a set of strings.
+class BloomFilter {
+ public:
+ // Constructs a BloomFilter using |bytes_size| bytes of Bloom filter bits,
+ // and |hash_function_count| hash functions to set bits in the filter. The
+ // hash functions will be generated by using seeds in the range
+ // |hash_seed_offset| to (|hash_seed_offset| + |hash_function_count|).
+ BloomFilter(uint32_t bytes_size,
+ uint32_t hash_function_count,
+ uint32_t hash_seed_offset);
+ ~BloomFilter();
+
+ // Add a single string to the Bloom filter.
+ void AddString(const std::string& str);
+
+ // Returns the current value of the Bloom filter's bit array.
+ const ByteVector& bytes() const { return bytes_; };
+
+ private:
+ // Stores the byte array of the Bloom filter.
+ ByteVector bytes_;
+
+ // The number of bits to set for each string added.
+ uint32_t hash_function_count_;
+
+ // A number add to a hash function index to get a seed for that hash function.
+ uint32_t hash_seed_offset_;
+
+ DISALLOW_COPY_AND_ASSIGN(BloomFilter);
+};
+
+} // namespace rappor
+
+#endif // COMPONENTS_RAPPOR_BLOOM_FILTER_H_
diff --git a/components/rappor/bloom_filter_unittest.cc b/components/rappor/bloom_filter_unittest.cc
new file mode 100644
index 0000000..9a64e25
--- /dev/null
+++ b/components/rappor/bloom_filter_unittest.cc
@@ -0,0 +1,54 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/bloom_filter.h"
+
+#include "components/rappor/byte_vector_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace rappor {
+
+TEST(BloomFilterTest, TinyFilter) {
+ BloomFilter filter(1u, 4u, 0u);
+
+ // Size is 1 and it's initially empty
+ EXPECT_EQ(1u, filter.bytes().size());
+ EXPECT_EQ(0x00, filter.bytes()[0]);
+
+ // "Test" has a self-collision, and only sets 3 bits.
+ filter.AddString("Test");
+ EXPECT_EQ(0x2a, filter.bytes()[0]);
+
+ // Adding the same value shouldn't change anything.
+ filter.AddString("Test");
+ EXPECT_EQ(0x2a, filter.bytes()[0]);
+
+ BloomFilter filter2(1u, 4u, 0u);
+ EXPECT_EQ(0x00, filter2.bytes()[0]);
+ filter2.AddString("Bar");
+ EXPECT_EQ(0xa8, filter2.bytes()[0]);
+
+ // Adding a colliding string should just set new bits.
+ filter.AddString("Bar");
+ EXPECT_EQ(0xaa, filter.bytes()[0]);
+}
+
+TEST(BloomFilterTest, HugeFilter) {
+ // Create a 500 bit filter, and use a large seed offset to see if anything
+ // breaks.
+ BloomFilter filter(500u, 1u, 0xabdef123);
+
+ // Size is 500 and it's initially empty
+ EXPECT_EQ(500u, filter.bytes().size());
+ EXPECT_EQ(0, CountBits(filter.bytes()));
+
+ filter.AddString("Bar");
+ EXPECT_EQ(1, CountBits(filter.bytes()));
+
+ // Adding the same value shouldn't change anything.
+ filter.AddString("Bar");
+ EXPECT_EQ(1, CountBits(filter.bytes()));
+}
+
+} // namespace rappor
diff --git a/components/rappor/byte_vector_utils.cc b/components/rappor/byte_vector_utils.cc
new file mode 100644
index 0000000..57f2574
--- /dev/null
+++ b/components/rappor/byte_vector_utils.cc
@@ -0,0 +1,215 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/byte_vector_utils.h"
+
+#include <string>
+
+#include "base/logging.h"
+#include "base/rand_util.h"
+#include "base/strings/string_number_conversions.h"
+#include "crypto/random.h"
+
+namespace rappor {
+
+namespace {
+
+// Reinterpets a ByteVector as a StringPiece.
+base::StringPiece ByteVectorAsStringPiece(const ByteVector& lhs) {
+ return base::StringPiece(reinterpret_cast<const char *>(&lhs[0]), lhs.size());
+}
+
+// Concatenates parameters together as a string.
+std::string Concat(const ByteVector& value, char c, const std::string& data) {
+ return std::string(value.begin(), value.end()) + c + data;
+}
+
+// Performs the operation: K = HMAC(K, data)
+// The input "K" is passed by initializing |hmac| with it.
+// The output "K" is returned by initializing |result| with it.
+// Returns false on an error.
+bool HMAC_Rotate(const crypto::HMAC& hmac,
+ const std::string& data,
+ crypto::HMAC* result) {
+ ByteVector key(hmac.DigestLength());
+ if (!hmac.Sign(data, &key[0], key.size()))
+ return false;
+ return result->Init(ByteVectorAsStringPiece(key));
+}
+
+// Performs the operation: V = HMAC(K, V)
+// The input "K" is passed by initializing |hmac| with it.
+// "V" is read from and written to |value|.
+// Returns false on an error.
+bool HMAC_Rehash(const crypto::HMAC& hmac, ByteVector* value) {
+ return hmac.Sign(ByteVectorAsStringPiece(*value),
+ &(*value)[0], value->size());
+}
+
+// Implements (Key, V) = HMAC_DRBG_Update(provided_data, Key, V)
+// See: http://csrc.nist.gov/publications/nistpubs/800-90A/SP800-90A.pdf
+// "V" is read from and written to |value|.
+// The input "Key" is passed by initializing |hmac1| with it.
+// The output "Key" is returned by initializing |out_hmac| with it.
+// Returns false on an error.
+bool HMAC_DRBG_Update(const std::string& provided_data,
+ const crypto::HMAC& hmac1,
+ ByteVector* value,
+ crypto::HMAC* out_hmac) {
+ // HMAC_DRBG Update Process
+ crypto::HMAC temp_hmac(crypto::HMAC::SHA256);
+ crypto::HMAC* hmac2 = provided_data.size() > 0 ? &temp_hmac : out_hmac;
+ // 1. K = HMAC(K, V || 0x00 || provided_data)
+ if (!HMAC_Rotate(hmac1, Concat(*value, 0x00, provided_data), hmac2))
+ return false;
+ // 2. V = HMAC(K, V)
+ if (!HMAC_Rehash(*hmac2, value))
+ return false;
+ // 3. If (provided_data = Null), then return K and V.
+ if (hmac2 == out_hmac)
+ return true;
+ // 4. K = HMAC(K, V || 0x01 || provided_data)
+ if (!HMAC_Rotate(*hmac2, Concat(*value, 0x01, provided_data), out_hmac))
+ return false;
+ // 5. V = HMAC(K, V)
+ return HMAC_Rehash(*out_hmac, value);
+}
+
+} // namespace
+
+ByteVector* ByteVectorOr(const ByteVector& lhs, ByteVector* rhs) {
+ DCHECK_EQ(lhs.size(), rhs->size());
+ for (size_t i = 0, len = lhs.size(); i < len; ++i) {
+ (*rhs)[i] = lhs[i] | (*rhs)[i];
+ }
+ return rhs;
+}
+
+ByteVector* ByteVectorMerge(const ByteVector& mask,
+ const ByteVector& lhs,
+ ByteVector* rhs) {
+ DCHECK_EQ(lhs.size(), rhs->size());
+ for (size_t i = 0, len = lhs.size(); i < len; ++i) {
+ (*rhs)[i] = (lhs[i] & ~mask[i]) | ((*rhs)[i] & mask[i]);
+ }
+ return rhs;
+}
+
+int CountBits(const ByteVector& vector) {
+ int bit_count = 0;
+ for (size_t i = 0; i < vector.size(); ++i) {
+ uint8_t byte = vector[i];
+ for (int j = 0; j < 8 ; ++j) {
+ if (byte & (1 << j))
+ bit_count++;
+ }
+ }
+ return bit_count;
+}
+
+ByteVectorGenerator::ByteVectorGenerator(size_t byte_count)
+ : byte_count_(byte_count) {}
+
+ByteVectorGenerator::~ByteVectorGenerator() {}
+
+ByteVector ByteVectorGenerator::GetRandomByteVector() {
+ ByteVector bytes(byte_count_);
+ crypto::RandBytes(&bytes[0], bytes.size());
+ return bytes;
+}
+
+ByteVector ByteVectorGenerator::GetWeightedRandomByteVector(
+ Probability probability) {
+ ByteVector bytes = GetRandomByteVector();
+ switch (probability) {
+ case PROBABILITY_75:
+ return *ByteVectorOr(GetRandomByteVector(), &bytes);
+ case PROBABILITY_50:
+ return bytes;
+ }
+ NOTREACHED();
+ return bytes;
+}
+
+HmacByteVectorGenerator::HmacByteVectorGenerator(
+ size_t byte_count,
+ const std::string& entropy_input,
+ const std::string& personalization_string)
+ : ByteVectorGenerator(byte_count),
+ hmac_(crypto::HMAC::SHA256),
+ value_(hmac_.DigestLength(), 0x01),
+ generated_bytes_(0) {
+ // HMAC_DRBG Instantiate Process
+ // See: http://csrc.nist.gov/publications/nistpubs/800-90A/SP800-90A.pdf
+ // 1. seed_material = entropy_input + nonce + personalization_string
+ // Note: We are using the 8.6.7 interpretation, where the entropy_input and
+ // nonce are acquired at the same time from the same source.
+ DCHECK_EQ(kEntropyInputSize, entropy_input.size());
+ std::string seed_material(entropy_input + personalization_string);
+ // 2. Key = 0x00 00...00
+ crypto::HMAC hmac1(crypto::HMAC::SHA256);
+ if (!hmac1.Init(std::string(hmac_.DigestLength(), 0x00)))
+ NOTREACHED();
+ // 3. V = 0x01 01...01
+ // (value_ in initializer list)
+
+ // 4. (Key, V) = HMAC_DRBG_Update(seed_material, Key, V)
+ if (!HMAC_DRBG_Update(seed_material, hmac1, &value_, &hmac_))
+ NOTREACHED();
+}
+
+HmacByteVectorGenerator::~HmacByteVectorGenerator() {}
+
+HmacByteVectorGenerator::HmacByteVectorGenerator(
+ const HmacByteVectorGenerator& prev_request)
+ : ByteVectorGenerator(prev_request.byte_count()),
+ hmac_(crypto::HMAC::SHA256),
+ value_(prev_request.value_),
+ generated_bytes_(0) {
+ if (!HMAC_DRBG_Update("", prev_request.hmac_, &value_, &hmac_))
+ NOTREACHED();
+}
+
+// HMAC_DRBG requires entropy input to be security_strength bits long,
+// and nonce to be at least 1/2 security_strength bits long. We
+// generate them both as a single "extra strong" entropy input.
+// max_security_strength for SHA256 is 256 bits.
+// See: http://csrc.nist.gov/publications/nistpubs/800-90A/SP800-90A.pdf
+const size_t HmacByteVectorGenerator::kEntropyInputSize = (256 / 8) * 3 / 2;
+
+// static
+std::string HmacByteVectorGenerator::GenerateEntropyInput() {
+ return base::RandBytesAsString(kEntropyInputSize);
+}
+
+ByteVector HmacByteVectorGenerator::GetRandomByteVector() {
+ // Streams bytes from HMAC_DRBG_Generate
+ // See: http://csrc.nist.gov/publications/nistpubs/800-90A/SP800-90A.pdf
+ const size_t digest_length = hmac_.DigestLength();
+ DCHECK_EQ(value_.size(), digest_length);
+ ByteVector bytes(byte_count());
+ uint8_t* data = &bytes[0];
+ size_t bytes_to_go = byte_count();
+ while (bytes_to_go > 0) {
+ size_t requested_byte_in_digest = generated_bytes_ % digest_length;
+ if (requested_byte_in_digest == 0) {
+ // Do step 4.1 of the HMAC_DRBG Generate Process for more bits.
+ // V = HMAC(Key, V)
+ if (!HMAC_Rehash(hmac_, &value_))
+ NOTREACHED();
+ }
+ size_t n = std::min(bytes_to_go,
+ digest_length - requested_byte_in_digest);
+ memcpy(data, &value_[requested_byte_in_digest], n);
+ data += n;
+ bytes_to_go -= n;
+ generated_bytes_ += n;
+ // Check max_number_of_bits_per_request from 10.1 Table 2
+ // max_number_of_bits_per_request == 2^19 bits == 2^16 bytes
+ DCHECK_LT(generated_bytes_, 1U << 16);
+ }
+ return bytes;
+}
+
+} // namespace rappor
diff --git a/components/rappor/byte_vector_utils.h b/components/rappor/byte_vector_utils.h
new file mode 100644
index 0000000..9154cab
--- /dev/null
+++ b/components/rappor/byte_vector_utils.h
@@ -0,0 +1,110 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_RAPPOR_BYTE_VECTOR_UTILS_H_
+#define COMPONENTS_RAPPOR_BYTE_VECTOR_UTILS_H_
+
+#include <vector>
+
+#include "base/basictypes.h"
+#include "components/rappor/rappor_parameters.h"
+#include "crypto/hmac.h"
+
+namespace rappor {
+
+// A vector of 8-bit integers used to store a set of binary bits.
+typedef std::vector<uint8_t> ByteVector;
+
+// Computes a bitwise OR of byte vectors and stores the result in rhs.
+// Returns rhs for chaining.
+ByteVector* ByteVectorOr(const ByteVector& lhs, ByteVector* rhs);
+
+// Merges the contents of lhs and rhs vectors according to a mask vector.
+// The i-th bit of the result vector will be the i-th bit of either the lhs
+// or rhs vector, based on the i-th bit of the mask vector.
+// Equivalent to (lhs & ~mask) | (rhs & mask). Stores the result in rhs.
+// Returns rhs for chaining.
+ByteVector* ByteVectorMerge(const ByteVector& mask,
+ const ByteVector& lhs,
+ ByteVector* rhs);
+
+// Counts the number of bits set in the byte vector.
+int CountBits(const ByteVector& vector);
+
+// A utility object for generating random binary data with different
+// likelihood of bits being true, using entropy from crypto::RandBytes().
+class ByteVectorGenerator {
+ public:
+ explicit ByteVectorGenerator(size_t byte_count);
+
+ ~ByteVectorGenerator();
+
+ // Generates a random byte vector where the bits are independent random
+ // variables which are true with the given |probability|.
+ ByteVector GetWeightedRandomByteVector(Probability probability);
+
+ protected:
+ // Size of vectors to be generated.
+ size_t byte_count() const { return byte_count_; }
+
+ // Generates a random vector of bytes from a uniform distribution.
+ virtual ByteVector GetRandomByteVector();
+
+ private:
+ size_t byte_count_;
+
+ DISALLOW_COPY_AND_ASSIGN(ByteVectorGenerator);
+};
+
+// A ByteVectorGenerator that uses a psuedo-random function to generate a
+// deterministically random bits. This class only implements a single request
+// from HMAC_DRBG and streams up to 2^19 bits from that request.
+// Ref: http://csrc.nist.gov/publications/nistpubs/800-90A/SP800-90A.pdf
+// We're using our own PRNG instead of crypto::RandBytes because we need to
+// generate a repeatable sequence of bits from the same seed. Conservatively,
+// we're choosing to use HMAC_DRBG here, as it is one of the best studied
+// and standardized ways of generating deterministic, unpredictable sequences
+// based on a secret seed.
+class HmacByteVectorGenerator : public ByteVectorGenerator {
+ public:
+ // Constructor takes the size of the vector to generate, along with a
+ // |entropy_input| and |personalization_string| to seed the pseudo-random
+ // number generator. The string parameters are treated as byte arrays.
+ HmacByteVectorGenerator(size_t byte_count,
+ const std::string& entropy_input,
+ const std::string& personalization_string);
+
+ ~HmacByteVectorGenerator();
+
+ // Generates a random string suitable for passing to the constructor as
+ // |entropy_input|.
+ static std::string GenerateEntropyInput();
+
+ // Key size required for 128-bit security strength (including nonce).
+ static const size_t kEntropyInputSize;
+
+ protected:
+ // Generate byte vector generator that streams from the next request instead
+ // of the current one. For testing against NIST test vectors only.
+ explicit HmacByteVectorGenerator(const HmacByteVectorGenerator& prev_request);
+
+ // ByteVector implementation:
+ virtual ByteVector GetRandomByteVector() OVERRIDE;
+
+ private:
+ // HMAC initalized with the value of "Key" HMAC_DRBG_Initialize.
+ crypto::HMAC hmac_;
+
+ // The "V" value from HMAC_DRBG.
+ ByteVector value_;
+
+ // Total number of bytes streamed from the HMAC_DRBG Generate Process.
+ size_t generated_bytes_;
+
+ DISALLOW_ASSIGN(HmacByteVectorGenerator);
+};
+
+} // namespace rappor
+
+#endif // COMPONENTS_RAPPOR_BYTE_VECTOR_UTILS_H_
diff --git a/components/rappor/byte_vector_utils_unittest.cc b/components/rappor/byte_vector_utils_unittest.cc
new file mode 100644
index 0000000..54df9ea
--- /dev/null
+++ b/components/rappor/byte_vector_utils_unittest.cc
@@ -0,0 +1,128 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/byte_vector_utils.h"
+
+#include "base/rand_util.h"
+#include "base/strings/string_number_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace rappor {
+
+namespace {
+
+class SecondRequest : public HmacByteVectorGenerator {
+ public:
+ SecondRequest(const HmacByteVectorGenerator& first_request)
+ : HmacByteVectorGenerator(first_request) {}
+};
+
+std::string HexToString(const char* hex) {
+ ByteVector bv;
+ base::HexStringToBytes(hex, &bv);
+ return std::string(bv.begin(), bv.end());
+}
+
+} // namespace
+
+TEST(ByteVectorTest, ByteVectorOr) {
+ ByteVector lhs(2);
+ lhs[1] = 0x12;
+ ByteVector rhs(2);
+ rhs[1] = 0x03;
+
+ EXPECT_EQ(0x13, (*ByteVectorOr(lhs, &rhs))[1]);
+}
+
+TEST(ByteVectorTest, ByteVectorMerge) {
+ ByteVector lhs(2);
+ lhs[1] = 0x33;
+ ByteVector rhs(2);
+ rhs[1] = 0x55;
+ ByteVector mask(2);
+ mask[1] = 0x0f;
+
+ EXPECT_EQ(0x35, (*ByteVectorMerge(mask, lhs, &rhs))[1]);
+}
+
+TEST(ByteVectorTest, ByteVectorGenerator) {
+ ByteVectorGenerator generator(2u);
+ ByteVector random_50 = generator.GetWeightedRandomByteVector(PROBABILITY_50);
+ EXPECT_EQ(random_50.size(), 2u);
+ ByteVector random_75 = generator.GetWeightedRandomByteVector(PROBABILITY_75);
+ EXPECT_EQ(random_75.size(), 2u);
+}
+
+TEST(ByteVectorTest, HmacByteVectorGenerator) {
+ HmacByteVectorGenerator generator(1u,
+ std::string(HmacByteVectorGenerator::kEntropyInputSize, 0x00), "");
+ ByteVector random_50 = generator.GetWeightedRandomByteVector(PROBABILITY_50);
+ EXPECT_EQ(random_50.size(), 1u);
+ EXPECT_EQ(random_50[0], 0x0B);
+ ByteVector random_75 = generator.GetWeightedRandomByteVector(PROBABILITY_75);
+ EXPECT_EQ(random_75.size(), 1u);
+ EXPECT_EQ(random_75[0], 0xdf);
+}
+
+TEST(ByteVectorTest, HmacNist) {
+ // Test case 0 for SHA-256 HMAC_DRBG no reseed tests from
+ // http://csrc.nist.gov/groups/STM/cavp/
+ const char entropy[] =
+ "ca851911349384bffe89de1cbdc46e6831e44d34a4fb935ee285dd14b71a7488";
+ const char nonce[] = "659ba96c601dc69fc902940805ec0ca8";
+ const char expected[] =
+ "e528e9abf2dece54d47c7e75e5fe302149f817ea9fb4bee6f4199697d04d5b89"
+ "d54fbb978a15b5c443c9ec21036d2460b6f73ebad0dc2aba6e624abf07745bc1"
+ "07694bb7547bb0995f70de25d6b29e2d3011bb19d27676c07162c8b5ccde0668"
+ "961df86803482cb37ed6d5c0bb8d50cf1f50d476aa0458bdaba806f48be9dcb8";
+
+ std::string entropy_input = HexToString(entropy) + HexToString(nonce);
+ HmacByteVectorGenerator generator(1024u / 8, entropy_input, "");
+ generator.GetWeightedRandomByteVector(PROBABILITY_50);
+ SecondRequest generator2(generator);
+ ByteVector random_50 = generator2.GetWeightedRandomByteVector(PROBABILITY_50);
+
+ EXPECT_EQ(HexToString(expected),
+ std::string(random_50.begin(), random_50.end()));
+}
+
+TEST(ByteVectorTest, WeightedRandomStatistics50) {
+ ByteVectorGenerator generator(50u);
+ ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_50);
+ int bit_count = CountBits(random);
+ // Check bounds on bit counts that are true with 99.999% probability.
+ EXPECT_GT(bit_count, 155); // Binomial(400, .5) CDF(155) ~= 0.000004
+ EXPECT_LE(bit_count, 244); // Binomial(400, .5) CDF(244) ~= 0.999996
+}
+
+TEST(ByteVectorTest, WeightedRandomStatistics75) {
+ ByteVectorGenerator generator(50u);
+ ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_75);
+ int bit_count = CountBits(random);
+ // Check bounds on bit counts that are true with 99.999% probability.
+ EXPECT_GT(bit_count, 259); // Binomial(400, .75) CDF(259) ~= 0.000003
+ EXPECT_LE(bit_count, 337); // Binomial(400, .75) CDF(337) ~= 0.999997
+}
+
+TEST(ByteVectorTest, HmacWeightedRandomStatistics50) {
+ HmacByteVectorGenerator generator(50u,
+ HmacByteVectorGenerator::GenerateEntropyInput(), "");
+ ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_50);
+ int bit_count = CountBits(random);
+ // Check bounds on bit counts that are true with 99.999% probability.
+ EXPECT_GT(bit_count, 155); // Binomial(400, .5) CDF(155) ~= 0.000004
+ EXPECT_LE(bit_count, 244); // Binomial(400, .5) CDF(244) ~= 0.999996
+}
+
+TEST(ByteVectorTest, HmacWeightedRandomStatistics75) {
+ HmacByteVectorGenerator generator(50u,
+ HmacByteVectorGenerator::GenerateEntropyInput(), "");
+ ByteVector random = generator.GetWeightedRandomByteVector(PROBABILITY_75);
+ int bit_count = CountBits(random);
+ // Check bounds on bit counts that are true with 99.999% probability.
+ EXPECT_GT(bit_count, 259); // Binomial(400, .75) CDF(259) ~= 0.000003
+ EXPECT_LE(bit_count, 337); // Binomial(400, .75) CDF(337) ~= 0.999997
+}
+
+} // namespace rappor
diff --git a/components/rappor/log_uploader.cc b/components/rappor/log_uploader.cc
new file mode 100644
index 0000000..4837ba1
--- /dev/null
+++ b/components/rappor/log_uploader.cc
@@ -0,0 +1,149 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/log_uploader.h"
+
+#include "base/metrics/histogram.h"
+#include "base/metrics/sparse_histogram.h"
+#include "net/base/load_flags.h"
+#include "net/url_request/url_fetcher.h"
+
+namespace {
+
+// The delay, in seconds, between uploading when there are queued logs to send.
+const int kUnsentLogsIntervalSeconds = 3;
+
+// When uploading metrics to the server fails, we progressively wait longer and
+// longer before sending the next log. This backoff process helps reduce load
+// on a server that is having issues.
+// The following is the multiplier we use to expand that inter-log duration.
+const double kBackoffMultiplier = 1.1;
+
+// The maximum backoff multiplier.
+const int kMaxBackoffIntervalSeconds = 60 * 60;
+
+// The maximum number of unsent logs we will keep.
+// TODO(holte): Limit based on log size instead.
+const size_t kMaxQueuedLogs = 10;
+
+enum DiscardReason {
+ UPLOAD_SUCCESS,
+ UPLOAD_REJECTED,
+ QUEUE_OVERFLOW,
+ NUM_DISCARD_REASONS
+};
+
+} // namespace
+
+namespace rappor {
+
+LogUploader::LogUploader(const GURL& server_url,
+ const std::string& mime_type,
+ net::URLRequestContextGetter* request_context)
+ : server_url_(server_url),
+ mime_type_(mime_type),
+ request_context_(request_context),
+ has_callback_pending_(false),
+ upload_interval_(base::TimeDelta::FromSeconds(
+ kUnsentLogsIntervalSeconds)) {
+}
+
+LogUploader::~LogUploader() {}
+
+void LogUploader::QueueLog(const std::string& log) {
+ queued_logs_.push(log);
+ if (!IsUploadScheduled() && !has_callback_pending_)
+ StartScheduledUpload();
+}
+
+bool LogUploader::IsUploadScheduled() const {
+ return upload_timer_.IsRunning();
+}
+
+void LogUploader::ScheduleNextUpload(base::TimeDelta interval) {
+ if (IsUploadScheduled() || has_callback_pending_)
+ return;
+
+ upload_timer_.Start(
+ FROM_HERE, interval, this, &LogUploader::StartScheduledUpload);
+}
+
+void LogUploader::StartScheduledUpload() {
+ DCHECK(!has_callback_pending_);
+ has_callback_pending_ = true;
+ current_fetch_.reset(
+ net::URLFetcher::Create(server_url_, net::URLFetcher::POST, this));
+ current_fetch_->SetRequestContext(request_context_.get());
+ current_fetch_->SetUploadData(mime_type_, queued_logs_.front());
+
+ // We already drop cookies server-side, but we might as well strip them out
+ // client-side as well.
+ current_fetch_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
+ net::LOAD_DO_NOT_SEND_COOKIES);
+ current_fetch_->Start();
+}
+
+// static
+base::TimeDelta LogUploader::BackOffUploadInterval(base::TimeDelta interval) {
+ DCHECK_GT(kBackoffMultiplier, 1.0);
+ interval = base::TimeDelta::FromMicroseconds(static_cast<int64>(
+ kBackoffMultiplier * interval.InMicroseconds()));
+
+ base::TimeDelta max_interval =
+ base::TimeDelta::FromSeconds(kMaxBackoffIntervalSeconds);
+ return interval > max_interval ? max_interval : interval;
+}
+
+void LogUploader::OnURLFetchComplete(const net::URLFetcher* source) {
+ // We're not allowed to re-use the existing |URLFetcher|s, so free them here.
+ // Note however that |source| is aliased to the fetcher, so we should be
+ // careful not to delete it too early.
+ DCHECK_EQ(current_fetch_.get(), source);
+ scoped_ptr<net::URLFetcher> fetch(current_fetch_.Pass());
+
+ int response_code = source->GetResponseCode();
+
+ // Log a histogram to track response success vs. failure rates.
+ UMA_HISTOGRAM_SPARSE_SLOWLY("Rappor.UploadResponseCode", response_code);
+
+ bool upload_succeeded = response_code == 200;
+
+ // Determine whether this log should be retransmitted.
+ DiscardReason reason = NUM_DISCARD_REASONS;
+ if (upload_succeeded) {
+ reason = UPLOAD_SUCCESS;
+ } else if (response_code == 400) {
+ reason = UPLOAD_REJECTED;
+ } else if (queued_logs_.size() > kMaxQueuedLogs) {
+ reason = QUEUE_OVERFLOW;
+ }
+
+ if (reason != NUM_DISCARD_REASONS) {
+ UMA_HISTOGRAM_ENUMERATION("Rappor.DiscardReason",
+ reason,
+ NUM_DISCARD_REASONS);
+ queued_logs_.pop();
+ }
+
+ // Error 400 indicates a problem with the log, not with the server, so
+ // don't consider that a sign that the server is in trouble.
+ bool server_is_healthy = upload_succeeded || response_code == 400;
+ OnUploadFinished(server_is_healthy, !queued_logs_.empty());
+}
+
+void LogUploader::OnUploadFinished(bool server_is_healthy,
+ bool more_logs_remaining) {
+ DCHECK(has_callback_pending_);
+ has_callback_pending_ = false;
+ // If the server is having issues, back off. Otherwise, reset to default.
+ if (!server_is_healthy)
+ upload_interval_ = BackOffUploadInterval(upload_interval_);
+ else
+ upload_interval_ = base::TimeDelta::FromSeconds(kUnsentLogsIntervalSeconds);
+
+ if (more_logs_remaining)
+ ScheduleNextUpload(upload_interval_);
+}
+
+} // namespace rappor
diff --git a/components/rappor/log_uploader.h b/components/rappor/log_uploader.h
new file mode 100644
index 0000000..30a129a
--- /dev/null
+++ b/components/rappor/log_uploader.h
@@ -0,0 +1,94 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_RAPPOR_LOG_UPLOADER_H_
+#define COMPONENTS_RAPPOR_LOG_UPLOADER_H_
+
+#include <queue>
+#include <string>
+
+#include "base/memory/scoped_ptr.h"
+#include "base/time/time.h"
+#include "base/timer/timer.h"
+#include "net/url_request/url_fetcher_delegate.h"
+#include "net/url_request/url_request_context_getter.h"
+#include "url/gurl.h"
+
+namespace net {
+class URLFetcher;
+}
+
+namespace rappor {
+
+// Handles uploading logs to an external server.
+class LogUploader : public net::URLFetcherDelegate {
+ public:
+ // Constructor takes the server_url that logs should be uploaded to, the
+ // mime_type of the uploaded data, and request_context to create uploads
+ // with.
+ LogUploader(const GURL& server_url,
+ const std::string& mime_type,
+ net::URLRequestContextGetter* request_context);
+
+ virtual ~LogUploader();
+
+ // Adds an entry to the queue of logs to be uploaded to the server. The
+ // uploader makes no assumptions about the format of |log| and simply sends
+ // it verbatim to the server.
+ void QueueLog(const std::string& log);
+
+ protected:
+ // Check if an upload has been scheduled.
+ virtual bool IsUploadScheduled() const;
+
+ // Schedules a future call to StartScheduledUpload if one isn't already
+ // pending. Can be overridden for testing.
+ virtual void ScheduleNextUpload(base::TimeDelta interval);
+
+ // Starts transmission of the next log. Exposed for tests.
+ void StartScheduledUpload();
+
+ // Increases the upload interval each time it's called, to handle the case
+ // where the server is having issues. Exposed for tests.
+ static base::TimeDelta BackOffUploadInterval(base::TimeDelta);
+
+ private:
+ // Implementation of net::URLFetcherDelegate. Called after transmission
+ // completes (either successfully or with failure).
+ virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
+
+ // Called when the upload is completed.
+ void OnUploadFinished(bool server_is_healthy, bool more_logs_remaining);
+
+ // The server URL to upload logs to.
+ const GURL server_url_;
+
+ // The mime type to specify on uploaded logs.
+ const std::string mime_type_;
+
+ // The request context used to send uploads.
+ scoped_refptr<net::URLRequestContextGetter> request_context_;
+
+ // The outstanding transmission that appears as a URL Fetch operation.
+ scoped_ptr<net::URLFetcher> current_fetch_;
+
+ // The logs that still need to be uploaded.
+ std::queue<std::string> queued_logs_;
+
+ // A timer used to delay before attempting another upload.
+ base::OneShotTimer<LogUploader> upload_timer_;
+
+ // Indicates that the last triggered upload hasn't resolved yet.
+ bool has_callback_pending_;
+
+ // The interval to wait after an upload's URLFetcher completion before
+ // starting the next upload attempt.
+ base::TimeDelta upload_interval_;
+
+ DISALLOW_COPY_AND_ASSIGN(LogUploader);
+};
+
+} // namespace rappor
+
+#endif // COMPONENTS_RAPPOR_LOG_UPLOADER_H_
diff --git a/components/rappor/log_uploader_unittest.cc b/components/rappor/log_uploader_unittest.cc
new file mode 100644
index 0000000..46db7db
--- /dev/null
+++ b/components/rappor/log_uploader_unittest.cc
@@ -0,0 +1,153 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/log_uploader.h"
+
+#include "base/message_loop/message_loop_proxy.h"
+#include "net/url_request/test_url_fetcher_factory.h"
+#include "net/url_request/url_request_test_util.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace rappor {
+
+namespace {
+
+const char kTestServerURL[] = "http://a.com/";
+const char kTestMimeType[] = "text/plain";
+
+class TestLogUploader : public LogUploader {
+ public:
+ TestLogUploader(net::URLRequestContextGetter* request_context) :
+ LogUploader(GURL(kTestServerURL), kTestMimeType, request_context) {
+ }
+
+ base::TimeDelta last_interval_set() const { return last_interval_set_; };
+
+ void StartUpload() {
+ last_interval_set_ = base::TimeDelta();
+ StartScheduledUpload();
+ }
+
+ static base::TimeDelta BackOff(base::TimeDelta t) {
+ return LogUploader::BackOffUploadInterval(t);
+ }
+
+ protected:
+ virtual bool IsUploadScheduled() const OVERRIDE {
+ return last_interval_set() != base::TimeDelta();
+ }
+
+ // Schedules a future call to StartScheduledUpload if one isn't already
+ // pending.
+ virtual void ScheduleNextUpload(base::TimeDelta interval) OVERRIDE {
+ EXPECT_EQ(last_interval_set(), base::TimeDelta());
+ last_interval_set_ = interval;
+ }
+
+ base::TimeDelta last_interval_set_;
+
+ DISALLOW_COPY_AND_ASSIGN(TestLogUploader);
+};
+
+} // namespace
+
+class LogUploaderTest : public testing::Test {
+ public:
+ LogUploaderTest()
+ : request_context_(new net::TestURLRequestContextGetter(
+ base::MessageLoopProxy::current())),
+ factory_(NULL) {}
+
+ protected:
+ // Required for base::MessageLoopProxy::current().
+ base::MessageLoopForUI loop_;
+ scoped_refptr<net::TestURLRequestContextGetter> request_context_;
+ net::FakeURLFetcherFactory factory_;
+
+ DISALLOW_COPY_AND_ASSIGN(LogUploaderTest);
+};
+
+TEST_F(LogUploaderTest, Success) {
+ TestLogUploader uploader(request_context_);
+
+ factory_.SetFakeResponse(GURL(kTestServerURL),
+ std::string(),
+ net::HTTP_OK,
+ net::URLRequestStatus::SUCCESS);
+
+ uploader.QueueLog("log1");
+ base::MessageLoop::current()->RunUntilIdle();
+ // Log should be discarded instead of retransmitted.
+ EXPECT_EQ(uploader.last_interval_set(), base::TimeDelta());
+}
+
+TEST_F(LogUploaderTest, Rejection) {
+ TestLogUploader uploader(request_context_);
+
+ factory_.SetFakeResponse(GURL(kTestServerURL),
+ std::string(),
+ net::HTTP_BAD_REQUEST,
+ net::URLRequestStatus::SUCCESS);
+
+ uploader.QueueLog("log1");
+ base::MessageLoop::current()->RunUntilIdle();
+ // Log should be discarded instead of retransmitted.
+ EXPECT_EQ(uploader.last_interval_set(), base::TimeDelta());
+}
+
+TEST_F(LogUploaderTest, Failure) {
+ TestLogUploader uploader(request_context_);
+
+ factory_.SetFakeResponse(GURL(kTestServerURL),
+ std::string(),
+ net::HTTP_INTERNAL_SERVER_ERROR,
+ net::URLRequestStatus::SUCCESS);
+
+ uploader.QueueLog("log1");
+ base::MessageLoop::current()->RunUntilIdle();
+ // Log should be scheduled for retransmission.
+ base::TimeDelta error_interval = uploader.last_interval_set();
+ EXPECT_GT(error_interval, base::TimeDelta());
+
+ for (int i = 0; i < 10; i++) {
+ uploader.QueueLog("logX");
+ }
+
+ // A second failure should lead to a longer interval, and the log should
+ // be discarded due to full queue.
+ uploader.StartUpload();
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_GT(uploader.last_interval_set(), error_interval);
+
+ factory_.SetFakeResponse(GURL(kTestServerURL),
+ std::string(),
+ net::HTTP_OK,
+ net::URLRequestStatus::SUCCESS);
+
+ // A success should revert to base interval while queue is not empty.
+ for (int i = 0; i < 9; i++) {
+ uploader.StartUpload();
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_LT(uploader.last_interval_set(), error_interval);
+ }
+
+ // Queue should be empty.
+ uploader.StartUpload();
+ base::MessageLoop::current()->RunUntilIdle();
+ EXPECT_EQ(uploader.last_interval_set(), base::TimeDelta());
+}
+
+TEST_F(LogUploaderTest, Backoff) {
+ base::TimeDelta current = base::TimeDelta();
+ base::TimeDelta next = base::TimeDelta::FromSeconds(1);
+ // Backoff until the maximum is reached.
+ while (next > current) {
+ current = next;
+ next = TestLogUploader::BackOff(current);
+ }
+ // Maximum backoff should have been reached.
+ EXPECT_EQ(next, current);
+}
+
+} // namespace rappor
diff --git a/components/rappor/proto/rappor_metric.proto b/components/rappor/proto/rappor_metric.proto
new file mode 100644
index 0000000..e5efe21
--- /dev/null
+++ b/components/rappor/proto/rappor_metric.proto
@@ -0,0 +1,39 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// Contains information collected by the RAPPOR (Randomized Aggregatable
+// Privacy-Preserving Ordinal Responses) system.
+//
+// For a full description of the rappor metrics, see
+// http://www.chromium.org/developers/design-documents/rappor
+
+syntax = "proto2";
+
+option optimize_for = LITE_RUNTIME;
+
+package rappor;
+
+// Next tag: 3
+message RapporReports {
+ // Which cohort these reports belong to. The RAPPOR participants are
+ // partioned into cohorts in different ways, to allow better statistics and
+ // increased coverage. In particular, the cohort will serve to choose the
+ // hash functions used for Bloom-filter-based reports. The cohort is
+ // generated randomly by the client and is currently in the range [0,32).
+ optional int32 cohort = 1;
+
+ // Each Report contains the values generated by the RAPPOR process for one
+ // metric.
+ message Report {
+ // The name of the metric, hashed (first 8 bytes of MD5 hash).
+ optional fixed64 name_hash = 1;
+
+ // The sequence of bits produced by random coin flips in
+ // RapporMetric::GetReport(). For a complete description of RAPPOR
+ // metrics, refer to the design document at:
+ // http://www.chromium.org/developers/design-documents/rappor
+ optional bytes bits = 2;
+ }
+ repeated Report report = 2;
+}
diff --git a/components/rappor/rappor_metric.cc b/components/rappor/rappor_metric.cc
new file mode 100644
index 0000000..2da3b31
--- /dev/null
+++ b/components/rappor/rappor_metric.cc
@@ -0,0 +1,60 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/rappor_metric.h"
+
+#include "base/logging.h"
+
+namespace rappor {
+
+RapporMetric::RapporMetric(const std::string& metric_name,
+ const RapporParameters& parameters,
+ int32_t cohort)
+ : metric_name_(metric_name),
+ parameters_(parameters),
+ bloom_filter_(parameters.bloom_filter_size_bytes,
+ parameters.bloom_filter_hash_function_count,
+ cohort * parameters.bloom_filter_hash_function_count) {
+ DCHECK_GE(cohort, 0);
+}
+
+RapporMetric::~RapporMetric() {}
+
+void RapporMetric::AddSample(const std::string& str) {
+ bloom_filter_.AddString(str);
+}
+
+ByteVector RapporMetric::GetReport(const std::string& secret) const {
+ // Generate a deterministically random mask of fake data using the
+ // client's secret key + real data as a seed. The inclusion of the secret
+ // in the seed avoids correlations between real and fake data.
+ // The seed isn't a human-readable string.
+ std::string personalization_string = metric_name_ +
+ std::string(bytes().begin(), bytes().end());
+ HmacByteVectorGenerator hmac_generator(bytes().size(), secret,
+ personalization_string);
+ const ByteVector fake_mask =
+ hmac_generator.GetWeightedRandomByteVector(parameters().fake_prob);
+ ByteVector fake_bits =
+ hmac_generator.GetWeightedRandomByteVector(parameters().fake_one_prob);
+
+ // Redact most of the real data by replacing it with the fake data, hiding
+ // and limiting the amount of information an individual client reports on.
+ const ByteVector* fake_and_redacted_bits =
+ ByteVectorMerge(fake_mask, bytes(), &fake_bits);
+
+ // Generate biased coin flips for each bit.
+ ByteVectorGenerator coin_generator(bytes().size());
+ const ByteVector zero_coins =
+ coin_generator.GetWeightedRandomByteVector(parameters().zero_coin_prob);
+ ByteVector one_coins =
+ coin_generator.GetWeightedRandomByteVector(parameters().one_coin_prob);
+
+ // Create a randomized response report on the fake and redacted data, sending
+ // the outcome of flipping a zero coin for the zero bits in that data, and of
+ // flipping a one coin for the one bits in that data, as the final report.
+ return *ByteVectorMerge(*fake_and_redacted_bits, zero_coins, &one_coins);
+}
+
+} // namespace rappor
diff --git a/components/rappor/rappor_metric.h b/components/rappor/rappor_metric.h
new file mode 100644
index 0000000..30147c4
--- /dev/null
+++ b/components/rappor/rappor_metric.h
@@ -0,0 +1,56 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_RAPPOR_RAPPOR_H_
+#define COMPONENTS_RAPPOR_RAPPOR_H_
+
+#include <string>
+
+#include "components/rappor/bloom_filter.h"
+#include "components/rappor/byte_vector_utils.h"
+#include "components/rappor/rappor_parameters.h"
+
+namespace rappor {
+
+// A RapporMetric is an object that collects string samples into a Bloom filter,
+// and generates randomized reports about the collected data.
+//
+// For a full description of the rappor metrics, see
+// http://www.chromium.org/developers/design-documents/rappor
+class RapporMetric {
+ public:
+ // Takes the |metric_name| that this will be reported to the server with,
+ // a |parameters| describing size and probability weights to be used in
+ // recording this metric, and cohort value, which modifies the hash
+ // functions and used in the bloom filter.
+ explicit RapporMetric(const std::string& metric_name,
+ const RapporParameters& parameters,
+ int32_t cohort);
+ ~RapporMetric();
+
+ // Records an additional sample in the Bloom filter.
+ void AddSample(const std::string& str);
+
+ // Retrieves the current Bloom filter bits.
+ const ByteVector& bytes() const { return bloom_filter_.bytes(); }
+
+ // Gets the parameter values this metric was constructed with.
+ const RapporParameters& parameters() const { return parameters_; }
+
+ // Generates the bits to report for this metric. Using the secret as a seed,
+ // randomly selects bits for redaction. Then flips coins to generate the
+ // final report bits.
+ ByteVector GetReport(const std::string& secret) const;
+
+ private:
+ const std::string metric_name_;
+ const RapporParameters parameters_;
+ BloomFilter bloom_filter_;
+
+ DISALLOW_COPY_AND_ASSIGN(RapporMetric);
+};
+
+} // namespace rappor
+
+#endif // COMPONENTS_RAPPOR_RAPPOR_H_
diff --git a/components/rappor/rappor_metric_unittest.cc b/components/rappor/rappor_metric_unittest.cc
new file mode 100644
index 0000000..bbcb1ab
--- /dev/null
+++ b/components/rappor/rappor_metric_unittest.cc
@@ -0,0 +1,97 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/rappor_metric.h"
+
+#include <stdlib.h>
+
+#include "base/rand_util.h"
+#include "base/strings/stringprintf.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace rappor {
+
+const RapporParameters kTestRapporParameters = {
+ 16 /* Bloom filter size bytes */,
+ 4 /* Bloom filter hash count */,
+ PROBABILITY_75 /* Fake data probability */,
+ PROBABILITY_50 /* Fake one probability */,
+ PROBABILITY_75 /* One coin probability */,
+ PROBABILITY_50 /* Zero coin probability */
+};
+
+const RapporParameters kTestStatsRapporParameters = {
+ 50 /* Bloom filter size bytes */,
+ 4 /* Bloom filter hash count */,
+ PROBABILITY_75 /* Fake data probability */,
+ PROBABILITY_50 /* Fake one probability */,
+ PROBABILITY_75 /* One coin probability */,
+ PROBABILITY_50 /* Zero coin probability */
+};
+
+// Check for basic syntax and use.
+TEST(RapporMetricTest, BasicMetric) {
+ RapporMetric testMetric("MyRappor", kTestRapporParameters, 0);
+ testMetric.AddSample("Foo");
+ testMetric.AddSample("Bar");
+ EXPECT_EQ(0x80, testMetric.bytes()[1]);
+}
+
+TEST(RapporMetricTest, GetReport) {
+ RapporMetric metric("MyRappor", kTestRapporParameters, 0);
+
+ ByteVector report = metric.GetReport(
+ HmacByteVectorGenerator::GenerateEntropyInput());
+ EXPECT_EQ(16u, report.size());
+}
+
+TEST(RapporMetricTest, GetReportStatistics) {
+ RapporMetric metric("MyStatsRappor", kTestStatsRapporParameters, 0);
+
+ for (char i = 0; i < 50; i++) {
+ metric.AddSample(base::StringPrintf("%d", i));
+ }
+ ByteVector real_bits = metric.bytes();
+ int real_bit_count = CountBits(real_bits);
+ EXPECT_EQ(real_bit_count, 152);
+
+ std::string secret = HmacByteVectorGenerator::GenerateEntropyInput();
+ ByteVector report = metric.GetReport(secret);
+
+ // For the bits we actually set in the bloom filter, get a count of how
+ // many of them reported true.
+ ByteVector from_true_reports = report;
+ // Real bits AND report bits.
+ ByteVectorMerge(real_bits, real_bits, &from_true_reports);
+ int true_from_true_count = CountBits(from_true_reports);
+
+ // For the bits we didn't set in the bloom filter, get a count of how
+ // many of them reported true.
+ ByteVector from_false_reports = report;
+ ByteVectorOr(real_bits, &from_false_reports);
+ int true_from_false_count = CountBits(from_false_reports) - real_bit_count;
+
+ // The probability of a true bit being true after redaction =
+ // [fake_prob]*[fake_true_prob] + (1-[fake_prob]) =
+ // .75 * .5 + (1-.75) = .625
+ // The probablity of a false bit being true after redaction =
+ // [fake_prob]*[fake_true_prob] = .375
+ // The probability of a bit reporting true =
+ // [redacted_prob] * [one_coin_prob:.75] +
+ // (1-[redacted_prob]) * [zero_coin_prob:.5] =
+ // 0.65625 for true bits
+ // 0.59375 for false bits
+
+ // stats.binom(152, 0.65625).ppf(0.000005) = 73
+ EXPECT_GT(true_from_true_count, 73);
+ // stats.binom(152, 0.65625).ppf(0.999995) = 124
+ EXPECT_LE(true_from_true_count, 124);
+
+ // stats.binom(248, 0.59375).ppf(.000005) = 113
+ EXPECT_GT(true_from_false_count, 113);
+ // stats.binom(248, 0.59375).ppf(.999995) = 181
+ EXPECT_LE(true_from_false_count, 181);
+}
+
+} // namespace rappor
diff --git a/components/rappor/rappor_parameters.cc b/components/rappor/rappor_parameters.cc
new file mode 100644
index 0000000..ede7636
--- /dev/null
+++ b/components/rappor/rappor_parameters.cc
@@ -0,0 +1,21 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/rappor_parameters.h"
+
+#include "base/strings/stringprintf.h"
+
+namespace rappor {
+
+std::string RapporParameters::ToString() const {
+ return base::StringPrintf("{ %d, %d, %d, %d, %d, %d }",
+ bloom_filter_size_bytes,
+ bloom_filter_hash_function_count,
+ fake_prob,
+ fake_one_prob,
+ one_coin_prob,
+ zero_coin_prob);
+}
+
+} // namespace rappor
diff --git a/components/rappor/rappor_parameters.h b/components/rappor/rappor_parameters.h
new file mode 100644
index 0000000..77b1614
--- /dev/null
+++ b/components/rappor/rappor_parameters.h
@@ -0,0 +1,43 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_RAPPOR_RAPPOR_PARAMETERS_H_
+#define COMPONENTS_RAPPOR_RAPPOR_PARAMETERS_H_
+
+#include <string>
+
+namespace rappor {
+
+enum Probability {
+ PROBABILITY_75, // 75%
+ PROBABILITY_50, // 50%
+};
+
+// An object describing a rappor metric and the parameters used to generate it.
+//
+// For a full description of the rappor metrics, see
+// http://www.chromium.org/developers/design-documents/rappor
+struct RapporParameters {
+ // Get a string representing the parameters, for DCHECK_EQ.
+ std::string ToString() const;
+
+ // The number of bytes stored in the Bloom filter.
+ int bloom_filter_size_bytes;
+ // The number of hash functions used in the Bloom filter.
+ int bloom_filter_hash_function_count;
+
+ // The probability that a bit will be redacted with fake data.
+ Probability fake_prob;
+ // The probability that a fake bit will be a one.
+ Probability fake_one_prob;
+
+ // The probability that a one bit in the redacted data reports as one.
+ Probability one_coin_prob;
+ // The probability that a zero bit in the redacted data reports as one.
+ Probability zero_coin_prob;
+};
+
+} // namespace rappor
+
+#endif // COMPONENTS_RAPPOR_RAPPOR_PARAMETERS_H_
diff --git a/components/rappor/rappor_pref_names.cc b/components/rappor/rappor_pref_names.cc
new file mode 100644
index 0000000..e62c764
--- /dev/null
+++ b/components/rappor/rappor_pref_names.cc
@@ -0,0 +1,20 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/rappor_pref_names.h"
+
+namespace rappor {
+namespace prefs {
+
+// A randomly generated number, which determines cohort data is reported for.
+const char kRapporCohort[] = "rappor.cohort";
+
+// A base-64 encoded, randomly generated byte string, which is used as a seed
+// for redacting collected data.
+// Important: This value should remain secret at the client, and never be
+// reported on the network, or to the server.
+const char kRapporSecret[] = "rappor.secret";
+
+} // namespace prefs
+} // namespace rappor
diff --git a/components/rappor/rappor_pref_names.h b/components/rappor/rappor_pref_names.h
new file mode 100644
index 0000000..ec42b76
--- /dev/null
+++ b/components/rappor/rappor_pref_names.h
@@ -0,0 +1,19 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_RAPPOR_RAPPOR_PREF_NAMES_H_
+#define COMPONENTS_RAPPOR_RAPPOR_PREF_NAMES_H_
+
+namespace rappor {
+namespace prefs {
+
+// Alphabetical list of preference names specific to the Rappor
+// component. Keep alphabetized, and document each in the .cc file.
+extern const char kRapporCohort[];
+extern const char kRapporSecret[];
+
+} // namespace prefs
+} // namespace rappor
+
+#endif // COMPONENTS_RAPPOR_RAPPOR_PREF_NAMES_H_
diff --git a/components/rappor/rappor_service.cc b/components/rappor/rappor_service.cc
new file mode 100644
index 0000000..493e7b9
--- /dev/null
+++ b/components/rappor/rappor_service.cc
@@ -0,0 +1,187 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/rappor_service.h"
+
+#include "base/base64.h"
+#include "base/metrics/field_trial.h"
+#include "base/prefs/pref_registry_simple.h"
+#include "base/prefs/pref_service.h"
+#include "base/rand_util.h"
+#include "base/stl_util.h"
+#include "components/metrics/metrics_hashes.h"
+#include "components/rappor/proto/rappor_metric.pb.h"
+#include "components/rappor/rappor_pref_names.h"
+#include "components/variations/variations_associated_data.h"
+
+namespace rappor {
+
+namespace {
+
+// The number of cohorts we divide clients into.
+const int kNumCohorts = 32;
+
+// Seconds before the initial log is generated.
+const int kInitialLogIntervalSeconds = 15;
+// Interval between ongoing logs.
+const int kLogIntervalSeconds = 30 * 60;
+
+const char kMimeType[] = "application/vnd.chrome.rappor";
+
+// Constants for the RAPPOR rollout field trial.
+const char kRapporRolloutFieldTrialName[] = "RapporRollout";
+
+// Constant for the finch parameter name for the server URL
+const char kRapporRolloutServerUrlParam[] = "ServerUrl";
+
+GURL GetServerUrl() {
+ return GURL(chrome_variations::GetVariationParamValue(
+ kRapporRolloutFieldTrialName,
+ kRapporRolloutServerUrlParam));
+}
+
+const RapporParameters kRapporParametersForType[NUM_RAPPOR_TYPES] = {
+ { // ETLD_PLUS_ONE_RAPPOR_TYPE
+ 16 /* Bloom filter size bytes */,
+ 2 /* Bloom filter hash count */,
+ rappor::PROBABILITY_75 /* Fake data probability */,
+ rappor::PROBABILITY_50 /* Fake one probability */,
+ rappor::PROBABILITY_75 /* One coin probability */,
+ rappor::PROBABILITY_50 /* Zero coin probability */
+ },
+};
+
+} // namespace
+
+RapporService::RapporService() : cohort_(-1) {}
+
+RapporService::~RapporService() {
+ STLDeleteValues(&metrics_map_);
+}
+
+void RapporService::Start(PrefService* pref_service,
+ net::URLRequestContextGetter* request_context) {
+ GURL server_url = GetServerUrl();
+ if (!server_url.is_valid())
+ return;
+ DCHECK(!uploader_);
+ LoadSecret(pref_service);
+ LoadCohort(pref_service);
+ uploader_.reset(new LogUploader(server_url, kMimeType, request_context));
+ log_rotation_timer_.Start(
+ FROM_HERE,
+ base::TimeDelta::FromSeconds(kInitialLogIntervalSeconds),
+ this,
+ &RapporService::OnLogInterval);
+}
+
+void RapporService::OnLogInterval() {
+ DCHECK(uploader_);
+ RapporReports reports;
+ if (ExportMetrics(&reports)) {
+ std::string log_text;
+ bool success = reports.SerializeToString(&log_text);
+ DCHECK(success);
+ uploader_->QueueLog(log_text);
+ }
+ log_rotation_timer_.Start(FROM_HERE,
+ base::TimeDelta::FromSeconds(kLogIntervalSeconds),
+ this,
+ &RapporService::OnLogInterval);
+}
+
+// static
+void RapporService::RegisterPrefs(PrefRegistrySimple* registry) {
+ registry->RegisterStringPref(prefs::kRapporSecret, std::string());
+ registry->RegisterIntegerPref(prefs::kRapporCohort, -1);
+}
+
+void RapporService::LoadCohort(PrefService* pref_service) {
+ DCHECK_EQ(cohort_, -1);
+ cohort_ = pref_service->GetInteger(prefs::kRapporCohort);
+ if (cohort_ >= 0 && cohort_ < kNumCohorts)
+ return;
+
+ cohort_ = base::RandGenerator(kNumCohorts);
+ pref_service->SetInteger(prefs::kRapporCohort, cohort_);
+}
+
+void RapporService::LoadSecret(PrefService* pref_service) {
+ DCHECK(secret_.empty());
+ std::string secret_base64 =
+ pref_service->GetString(prefs::kRapporSecret);
+ if (!secret_base64.empty()) {
+ bool decoded = base::Base64Decode(secret_base64, &secret_);
+ if (decoded && secret_.size() == HmacByteVectorGenerator::kEntropyInputSize)
+ return;
+ // If the preference fails to decode, or is the wrong size, it must be
+ // corrupt, so continue as though it didn't exist yet and generate a new
+ // one.
+ }
+
+ secret_ = HmacByteVectorGenerator::GenerateEntropyInput();
+ base::Base64Encode(secret_, &secret_base64);
+ pref_service->SetString(prefs::kRapporSecret, secret_base64);
+}
+
+bool RapporService::ExportMetrics(RapporReports* reports) {
+ if (metrics_map_.empty())
+ return false;
+
+ DCHECK_GE(cohort_, 0);
+ reports->set_cohort(cohort_);
+
+ for (std::map<std::string, RapporMetric*>::iterator it = metrics_map_.begin();
+ metrics_map_.end() != it;
+ ++it) {
+ const RapporMetric* metric = it->second;
+ RapporReports::Report* report = reports->add_report();
+ report->set_name_hash(metrics::HashMetricName(it->first));
+ ByteVector bytes = metric->GetReport(secret_);
+ report->set_bits(std::string(bytes.begin(), bytes.end()));
+ }
+ STLDeleteValues(&metrics_map_);
+ return true;
+}
+
+bool RapporService::IsInitialized() const {
+ return cohort_ >= 0;
+}
+
+void RapporService::RecordSample(const std::string& metric_name,
+ RapporType type,
+ const std::string& sample) {
+ // Ignore the sample if the service hasn't started yet.
+ if (!IsInitialized())
+ return;
+ DCHECK_LT(type, NUM_RAPPOR_TYPES);
+ RecordSampleInternal(metric_name, kRapporParametersForType[type], sample);
+}
+
+void RapporService::RecordSampleInternal(const std::string& metric_name,
+ const RapporParameters& parameters,
+ const std::string& sample) {
+ DCHECK(IsInitialized());
+
+ RapporMetric* metric = LookUpMetric(metric_name, parameters);
+ metric->AddSample(sample);
+}
+
+RapporMetric* RapporService::LookUpMetric(const std::string& metric_name,
+ const RapporParameters& parameters) {
+ DCHECK(IsInitialized());
+ std::map<std::string, RapporMetric*>::iterator it =
+ metrics_map_.find(metric_name);
+ if (metrics_map_.end() != it) {
+ RapporMetric* metric = it->second;
+ DCHECK_EQ(parameters.ToString(), metric->parameters().ToString());
+ return metric;
+ }
+
+ RapporMetric* new_metric = new RapporMetric(metric_name, parameters, cohort_);
+ metrics_map_[metric_name] = new_metric;
+ return new_metric;
+}
+
+} // namespace rappor
diff --git a/components/rappor/rappor_service.h b/components/rappor/rappor_service.h
new file mode 100644
index 0000000..9e2cd9b
--- /dev/null
+++ b/components/rappor/rappor_service.h
@@ -0,0 +1,109 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_RAPPOR_RAPPOR_SERVICE_H_
+#define COMPONENTS_RAPPOR_RAPPOR_SERVICE_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/prefs/pref_service.h"
+#include "base/time/time.h"
+#include "base/timer/timer.h"
+#include "components/rappor/log_uploader.h"
+#include "components/rappor/proto/rappor_metric.pb.h"
+#include "components/rappor/rappor_metric.h"
+
+class PrefRegistrySimple;
+
+namespace rappor {
+
+// The type of data stored in a metric.
+enum RapporType {
+ ETLD_PLUS_ONE_RAPPOR_TYPE = 0,
+ NUM_RAPPOR_TYPES
+};
+
+// This class provides an interface for recording samples for rappor metrics,
+// and periodically generates and uploads reports based on the collected data.
+class RapporService {
+ public:
+ RapporService();
+ virtual ~RapporService();
+
+ // Starts the periodic generation of reports and upload attempts.
+ void Start(PrefService* pref_service, net::URLRequestContextGetter* context);
+
+ // Records a sample of the rappor metric specified by |metric_name|.
+ // Creates and initializes the metric, if it doesn't yet exist.
+ void RecordSample(const std::string& metric_name,
+ RapporType type,
+ const std::string& sample);
+
+ // Sets the cohort value. For use by tests only.
+ void SetCohortForTesting(uint32_t cohort) { cohort_ = cohort; }
+
+ // Sets the secret value. For use by tests only.
+ void SetSecretForTesting(const std::string& secret) { secret_ = secret; }
+
+ // Registers the names of all of the preferences used by RapporService in the
+ // provided PrefRegistry. This should be called before calling Start().
+ static void RegisterPrefs(PrefRegistrySimple* registry);
+
+ protected:
+ // Logs all of the collected metrics to the reports proto message. Exposed
+ // for tests. Returns true if any metrics were recorded.
+ bool ExportMetrics(RapporReports* reports);
+
+ // Records a sample of the rappor metric specified by |parameters|.
+ // Creates and initializes the metric, if it doesn't yet exist.
+ // Exposed for tests.
+ void RecordSampleInternal(const std::string& metric_name,
+ const RapporParameters& parameters,
+ const std::string& sample);
+
+ private:
+ // Check if the service has been started successfully.
+ bool IsInitialized() const;
+
+ // Retrieves the cohort number this client was assigned to, generating it if
+ // doesn't already exist. The cohort should be persistent.
+ void LoadCohort(PrefService* pref_service);
+
+ // Retrieves the value for secret_ from preferences, generating it if doesn't
+ // already exist. The secret should be persistent, so that additional bits
+ // from the client do not get exposed over time.
+ void LoadSecret(PrefService* pref_service);
+
+ // Called whenever the logging interval elapses to generate a new log of
+ // reports and pass it to the uploader.
+ void OnLogInterval();
+
+ // Finds a metric in the metrics_map_, creating it if it doesn't already
+ // exist.
+ RapporMetric* LookUpMetric(const std::string& metric_name,
+ const RapporParameters& parameters);
+
+ // Client-side secret used to generate fake bits.
+ std::string secret_;
+
+ // The cohort this client is assigned to. -1 is uninitialized.
+ int32_t cohort_;
+
+ // Timer which schedules calls to OnLogInterval()
+ base::OneShotTimer<RapporService> log_rotation_timer_;
+
+ // A private LogUploader instance for sending reports to the server.
+ scoped_ptr<LogUploader> uploader_;
+
+ // We keep all registered metrics in a map, from name to metric.
+ // The map owns the metrics it contains.
+ std::map<std::string, RapporMetric*> metrics_map_;
+
+ DISALLOW_COPY_AND_ASSIGN(RapporService);
+};
+
+} // namespace rappor
+
+#endif // COMPONENTS_RAPPOR_RAPPOR_SERVICE_H_
diff --git a/components/rappor/rappor_service_unittest.cc b/components/rappor/rappor_service_unittest.cc
new file mode 100644
index 0000000..790eae7
--- /dev/null
+++ b/components/rappor/rappor_service_unittest.cc
@@ -0,0 +1,50 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/rappor/rappor_service.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace rappor {
+
+class TestRapporService : public RapporService {
+ public:
+ void GetReports(RapporReports* reports) {
+ ExportMetrics(reports);
+ }
+ void TestRecordSample(const std::string& metric_name,
+ const RapporParameters& parameters,
+ const std::string& sample) {
+ RecordSampleInternal(metric_name, parameters, sample);
+ }
+};
+
+TEST(RapporServiceTest, RecordAndExportMetrics) {
+ const RapporParameters kTestRapporParameters = {
+ 16 /* Bloom filter size bytes */,
+ 4 /* Bloom filter hash count */,
+ PROBABILITY_75 /* Fake data probability */,
+ PROBABILITY_50 /* Fake one probability */,
+ PROBABILITY_75 /* One coin probability */,
+ PROBABILITY_50 /* Zero coin probability */
+ };
+
+ TestRapporService rappor_service;
+ rappor_service.SetCohortForTesting(0);
+ rappor_service.SetSecretForTesting(
+ HmacByteVectorGenerator::GenerateEntropyInput());
+
+ rappor_service.TestRecordSample("MyMetric", kTestRapporParameters, "foo");
+ rappor_service.TestRecordSample("MyMetric", kTestRapporParameters, "bar");
+
+ RapporReports reports;
+ rappor_service.GetReports(&reports);
+ EXPECT_EQ(1, reports.report_size());
+
+ const RapporReports::Report& report = reports.report(0);
+ EXPECT_TRUE(report.name_hash());
+ EXPECT_EQ(16u, report.bits().size());
+}
+
+} // namespace rappor