summaryrefslogtreecommitdiffstats
path: root/chrome/common/metrics/entropy_provider_unittest.cc
blob: c3547ab7cade8676b270a9b816e40e2f93531111 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <cmath>
#include <limits>
#include <numeric>

#include "base/basictypes.h"
#include "base/guid.h"
#include "base/memory/scoped_ptr.h"
#include "base/rand_util.h"
#include "base/strings/string_number_conversions.h"
#include "chrome/common/metrics/entropy_provider.h"
#include "chrome/common/metrics/metrics_util.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace metrics {

namespace {

// Size of the low entropy source to use for the permuted entropy provider
// in tests.
const size_t kMaxLowEntropySize = (1 << 13);

// Field trial names used in unit tests.
const std::string kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
                                        "NewTabButton" };

// Computes the Chi-Square statistic for |values| assuming they follow a uniform
// distribution, where each entry has expected value |expected_value|.
//
// The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
// value and E is the expected value.
double ComputeChiSquare(const std::vector<int>& values,
                        double expected_value) {
  double sum = 0;
  for (size_t i = 0; i < values.size(); ++i) {
    const double delta = values[i] - expected_value;
    sum += (delta * delta) / expected_value;
  }
  return sum;
}

// Computes SHA1-based entropy for the given |trial_name| based on
// |entropy_source|
double GenerateSHA1Entropy(const std::string& entropy_source,
                           const std::string& trial_name) {
  SHA1EntropyProvider sha1_provider(entropy_source);
  return sha1_provider.GetEntropyForTrial(trial_name);
}

// Generates permutation-based entropy for the given |trial_name| based on
// |entropy_source| which must be in the range [0, entropy_max).
double GeneratePermutedEntropy(uint16 entropy_source,
                               size_t entropy_max,
                               const std::string& trial_name) {
  PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
  return permuted_provider.GetEntropyForTrial(trial_name);
}

// Helper interface for testing used to generate entropy values for a given
// field trial. Unlike EntropyProvider, which keeps the low/high entropy source
// value constant and generates entropy for different trial names, instances
// of TrialEntropyGenerator keep the trial name constant and generate low/high
// entropy source values internally to produce each output entropy value.
class TrialEntropyGenerator {
 public:
  virtual ~TrialEntropyGenerator() {}
  virtual double GenerateEntropyValue() const = 0;
};

// An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
// entropy source (random GUID with 128 bits of entropy + 13 additional bits of
// entropy corresponding to a low entropy source).
class SHA1EntropyGenerator : public TrialEntropyGenerator {
 public:
  explicit SHA1EntropyGenerator(const std::string& trial_name)
      : trial_name_(trial_name) {
  }

  virtual ~SHA1EntropyGenerator() {
  }

  virtual double GenerateEntropyValue() const OVERRIDE {
    // Use a random GUID + 13 additional bits of entropy to match how the
    // SHA1EntropyProvider is used in metrics_service.cc.
    const int low_entropy_source =
        static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
    const std::string high_entropy_source =
        base::GenerateGUID() + base::IntToString(low_entropy_source);
    return GenerateSHA1Entropy(high_entropy_source, trial_name_);
  }

 private:
  const std::string& trial_name_;

  DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
};

// An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
// using 13-bit low entropy source values.
class PermutedEntropyGenerator : public TrialEntropyGenerator {
 public:
  explicit PermutedEntropyGenerator(const std::string& trial_name)
      : mapping_(kMaxLowEntropySize) {
    // Note: Given a trial name, the computed mapping will be the same.
    // As a performance optimization, pre-compute the mapping once per trial
    // name and index into it for each entropy value.
    internal::PermuteMappingUsingTrialName(trial_name, &mapping_);
  }

  virtual ~PermutedEntropyGenerator() {
  }

  virtual double GenerateEntropyValue() const OVERRIDE {
    const int low_entropy_source =
        static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
    return mapping_[low_entropy_source] /
           static_cast<double>(kMaxLowEntropySize);
  }

 private:
  std::vector<uint16> mapping_;

  DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
};

// Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
// of Fit Test.
void PerformEntropyUniformityTest(
    const std::string& trial_name,
    const TrialEntropyGenerator& entropy_generator) {
  // Number of buckets in the simulated field trials.
  const size_t kBucketCount = 20;
  // Max number of iterations to perform before giving up and failing.
  const size_t kMaxIterationCount = 100000;
  // The number of iterations to perform before each time the statistical
  // significance of the results is checked.
  const size_t kCheckIterationCount = 10000;
  // This is the Chi-Square threshold from the Chi-Square statistic table for
  // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
  // level. See: http://www.medcalc.org/manual/chi-square-table.php
  const double kChiSquareThreshold = 43.82;

  std::vector<int> distribution(kBucketCount);

  for (size_t i = 1; i <= kMaxIterationCount; ++i) {
    const double entropy_value = entropy_generator.GenerateEntropyValue();
    const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
    ASSERT_LT(bucket, kBucketCount);
    distribution[bucket] += 1;

    // After |kCheckIterationCount| iterations, compute the Chi-Square
    // statistic of the distribution. If the resulting statistic is greater
    // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
    // that the observed samples do not follow a uniform distribution.
    //
    // However, since 99.9% would still result in a false negative every
    // 1000 runs of the test, do not treat it as a failure (else the test
    // will be flaky). Instead, perform additional iterations to determine
    // if the distribution will converge, up to |kMaxIterationCount|.
    if ((i % kCheckIterationCount) == 0) {
      const double expected_value_per_bucket =
          static_cast<double>(i) / kBucketCount;
      const double chi_square =
          ComputeChiSquare(distribution, expected_value_per_bucket);
      if (chi_square < kChiSquareThreshold)
        break;

      // If |i == kMaxIterationCount|, the Chi-Square statistic did not
      // converge after |kMaxIterationCount|.
      EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
          trial_name << " with chi_square = " << chi_square <<
          " after " << kMaxIterationCount << " iterations.";
    }
  }
}

}  // namespace

class EntropyProviderTest : public testing::Test {
};

TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
  // Simply asserts that two trials using one-time randomization
  // that have different names, normally generate different results.
  //
  // Note that depending on the one-time random initialization, they
  // _might_ actually give the same result, but we know that given
  // the particular client_id we use for unit tests they won't.
  base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
  scoped_refptr<base::FieldTrial> trials[] = {
      base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
          base::FieldTrialList::kNoExpirationYear, 1, 1, NULL),
      base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
          base::FieldTrialList::kNoExpirationYear, 1, 1, NULL) };

  for (size_t i = 0; i < arraysize(trials); ++i) {
    trials[i]->UseOneTimeRandomization();

    for (int j = 0; j < 100; ++j)
      trials[i]->AppendGroup(std::string(), 1);
  }

  // The trials are most likely to give different results since they have
  // different names.
  EXPECT_NE(trials[0]->group(), trials[1]->group());
  EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
}

TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
  // Simply asserts that two trials using one-time randomization
  // that have different names, normally generate different results.
  //
  // Note that depending on the one-time random initialization, they
  // _might_ actually give the same result, but we know that given
  // the particular client_id we use for unit tests they won't.
  base::FieldTrialList field_trial_list(
      new PermutedEntropyProvider(1234, kMaxLowEntropySize));
  scoped_refptr<base::FieldTrial> trials[] = {
      base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
          base::FieldTrialList::kNoExpirationYear, 1, 1, NULL),
      base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
          base::FieldTrialList::kNoExpirationYear, 1, 1, NULL) };

  for (size_t i = 0; i < arraysize(trials); ++i) {
    trials[i]->UseOneTimeRandomization();

    for (int j = 0; j < 100; ++j)
      trials[i]->AppendGroup(std::string(), 1);
  }

  // The trials are most likely to give different results since they have
  // different names.
  EXPECT_NE(trials[0]->group(), trials[1]->group());
  EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
}

TEST_F(EntropyProviderTest, SHA1Entropy) {
  const double results[] = { GenerateSHA1Entropy("hi", "1"),
                             GenerateSHA1Entropy("there", "1") };

  EXPECT_NE(results[0], results[1]);
  for (size_t i = 0; i < arraysize(results); ++i) {
    EXPECT_LE(0.0, results[i]);
    EXPECT_GT(1.0, results[i]);
  }

  EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
            GenerateSHA1Entropy("yo", "1"));
  EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
            GenerateSHA1Entropy("yo", "else"));
}

TEST_F(EntropyProviderTest, PermutedEntropy) {
  const double results[] = {
      GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
      GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };

  EXPECT_NE(results[0], results[1]);
  for (size_t i = 0; i < arraysize(results); ++i) {
    EXPECT_LE(0.0, results[i]);
    EXPECT_GT(1.0, results[i]);
  }

  EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
            GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
  EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
            GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
}

TEST_F(EntropyProviderTest, PermutedEntropyProviderResults) {
  // Verifies that PermutedEntropyProvider produces expected results. This
  // ensures that the results are the same between platforms and ensures that
  // changes to the implementation do not regress this accidentally.

  EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
                   GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
  EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
                   GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
  EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
                   GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
}

TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) {
  for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
    SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
    PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
  }
}

TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) {
  for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
    PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
    PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
  }
}

TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) {
  // Verifies that SeededRandGenerator has a uniform distribution.
  //
  // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.

  const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
  const uint32 kExpectedAverage = kTopOfRange / 2ULL;
  const uint32 kAllowedVariance = kExpectedAverage / 50ULL;  // +/- 2%
  const int kMinAttempts = 1000;
  const int kMaxAttempts = 1000000;

  for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
    const uint32 seed = HashName(kTestTrialNames[i]);
    internal::SeededRandGenerator rand_generator(seed);

    double cumulative_average = 0.0;
    int count = 0;
    while (count < kMaxAttempts) {
      uint32 value = rand_generator(kTopOfRange);
      cumulative_average = (count * cumulative_average + value) / (count + 1);

      // Don't quit too quickly for things to start converging, or we may have
      // a false positive.
      if (count > kMinAttempts &&
          kExpectedAverage - kAllowedVariance < cumulative_average &&
          cumulative_average < kExpectedAverage + kAllowedVariance) {
        break;
      }

      ++count;
    }

    ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
        kExpectedAverage << ", average ended at " << cumulative_average <<
        ", for trial " << kTestTrialNames[i];
  }
}

}  // namespace metrics