blob: 5b4d56b0324f5be2d5b39dc1ce9da230d71e6cd7 [file] [log] [blame]
[email protected]20f999b52012-08-24 22:32:591// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <cmath>
6#include <limits>
7#include <numeric>
8
9#include "base/basictypes.h"
10#include "base/guid.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/rand_util.h"
13#include "base/string_number_conversions.h"
14#include "chrome/common/metrics/entropy_provider.h"
[email protected]bca34942012-09-05 18:23:2515#include "chrome/common/metrics/metrics_util.h"
[email protected]20f999b52012-08-24 22:32:5916#include "testing/gtest/include/gtest/gtest.h"
17
18namespace metrics {
19
20namespace {
21
22// Size of the low entropy source to use for the permuted entropy provider
23// in tests.
24const size_t kMaxLowEntropySize = (1 << 13);
25
26// Field trial names used in unit tests.
27const std::string kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
28 "NewTabButton" };
29
30// Computes the Chi-Square statistic for |values| assuming they follow a uniform
31// distribution, where each entry has expected value |expected_value|.
32//
33// The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
34// value and E is the expected value.
35double ComputeChiSquare(const std::vector<int>& values,
36 double expected_value) {
37 double sum = 0;
38 for (size_t i = 0; i < values.size(); ++i) {
39 const double delta = values[i] - expected_value;
40 sum += (delta * delta) / expected_value;
41 }
42 return sum;
43}
44
45// Computes SHA1-based entropy for the given |trial_name| based on
46// |entropy_source|
47double GenerateSHA1Entropy(const std::string& entropy_source,
48 const std::string& trial_name) {
49 SHA1EntropyProvider sha1_provider(entropy_source);
50 return sha1_provider.GetEntropyForTrial(trial_name);
51}
52
53// Generates permutation-based entropy for the given |trial_name| based on
54// |entropy_source| which must be in the range [0, entropy_max).
55double GeneratePermutedEntropy(uint16 entropy_source,
56 size_t entropy_max,
57 const std::string& trial_name) {
58 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
59 return permuted_provider.GetEntropyForTrial(trial_name);
60}
61
62// Helper interface for testing used to generate entropy values for a given
63// field trial. Unlike EntropyProvider, which keeps the low/high entropy source
64// value constant and generates entropy for different trial names, instances
65// of TrialEntropyGenerator keep the trial name constant and generate low/high
66// entropy source values internally to produce each output entropy value.
67class TrialEntropyGenerator {
68 public:
69 virtual ~TrialEntropyGenerator() {}
70 virtual double GenerateEntropyValue() const = 0;
71};
72
73// An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
74// entropy source (random GUID with 128 bits of entropy + 13 additional bits of
75// entropy corresponding to a low entropy source).
76class SHA1EntropyGenerator : public TrialEntropyGenerator {
77 public:
78 explicit SHA1EntropyGenerator(const std::string& trial_name)
79 : trial_name_(trial_name) {
80 }
81
82 ~SHA1EntropyGenerator() {
83 }
84
85 virtual double GenerateEntropyValue() const OVERRIDE {
86 // Use a random GUID + 13 additional bits of entropy to match how the
87 // SHA1EntropyProvider is used in metrics_service.cc.
88 const int low_entropy_source =
89 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
90 const std::string high_entropy_source =
91 base::GenerateGUID() + base::IntToString(low_entropy_source);
92 return GenerateSHA1Entropy(high_entropy_source, trial_name_);
93 }
94
95 private:
96 const std::string& trial_name_;
97
98 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
99};
100
101// An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
102// using 13-bit low entropy source values.
103class PermutedEntropyGenerator : public TrialEntropyGenerator {
104 public:
105 explicit PermutedEntropyGenerator(const std::string& trial_name)
106 : mapping_(kMaxLowEntropySize) {
107 // Note: Given a trial name, the computed mapping will be the same.
108 // As a performance optimization, pre-compute the mapping once per trial
109 // name and index into it for each entropy value.
110 internal::PermuteMappingUsingTrialName(trial_name, &mapping_);
111 }
112
113 ~PermutedEntropyGenerator() {
114 }
115
116 virtual double GenerateEntropyValue() const OVERRIDE {
117 const int low_entropy_source =
118 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
119 return mapping_[low_entropy_source] /
120 static_cast<double>(kMaxLowEntropySize);
121 }
122
123 private:
124 std::vector<uint16> mapping_;
125
126 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
127};
128
129// Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
130// of Fit Test.
131void PerformEntropyUniformityTest(
132 const std::string& trial_name,
133 const TrialEntropyGenerator& entropy_generator) {
134 // Number of buckets in the simulated field trials.
135 const size_t kBucketCount = 20;
136 // Max number of iterations to perform before giving up and failing.
137 const size_t kMaxIterationCount = 100000;
138 // The number of iterations to perform before each time the statistical
139 // significance of the results is checked.
140 const size_t kCheckIterationCount = 10000;
141 // This is the Chi-Square threshold from the Chi-Square statistic table for
142 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
143 // level. See: http://www.medcalc.org/manual/chi-square-table.php
144 const double kChiSquareThreshold = 43.82;
145
146 std::vector<int> distribution(kBucketCount);
147
148 for (size_t i = 1; i <= kMaxIterationCount; ++i) {
149 const double entropy_value = entropy_generator.GenerateEntropyValue();
150 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
151 ASSERT_LT(bucket, kBucketCount);
152 distribution[bucket] += 1;
153
154 // After |kCheckIterationCount| iterations, compute the Chi-Square
155 // statistic of the distribution. If the resulting statistic is greater
156 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
157 // that the observed samples do not follow a uniform distribution.
158 //
159 // However, since 99.9% would still result in a false negative every
160 // 1000 runs of the test, do not treat it as a failure (else the test
161 // will be flaky). Instead, perform additional iterations to determine
162 // if the distribution will converge, up to |kMaxIterationCount|.
163 if ((i % kCheckIterationCount) == 0) {
164 const double expected_value_per_bucket =
165 static_cast<double>(i) / kBucketCount;
166 const double chi_square =
167 ComputeChiSquare(distribution, expected_value_per_bucket);
168 if (chi_square < kChiSquareThreshold)
169 break;
170
171 // If |i == kMaxIterationCount|, the Chi-Square statistic did not
172 // converge after |kMaxIterationCount|.
173 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
174 trial_name << " with chi_square = " << chi_square <<
175 " after " << kMaxIterationCount << " iterations.";
176 }
177 }
178}
179
180} // namespace
181
182class EntropyProviderTest : public testing::Test {
183};
184
185TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
186 // Simply asserts that two trials using one-time randomization
187 // that have different names, normally generate different results.
188 //
189 // Note that depending on the one-time random initialization, they
190 // _might_ actually give the same result, but we know that given
191 // the particular client_id we use for unit tests they won't.
192 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
193 scoped_refptr<base::FieldTrial> trials[] = {
194 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
195 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
196 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
197 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };
198
199 for (size_t i = 0; i < arraysize(trials); ++i) {
200 trials[i]->UseOneTimeRandomization();
201
202 for (int j = 0; j < 100; ++j)
203 trials[i]->AppendGroup("", 1);
204 }
205
206 // The trials are most likely to give different results since they have
207 // different names.
208 EXPECT_NE(trials[0]->group(), trials[1]->group());
209 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
210}
211
212TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
213 // Simply asserts that two trials using one-time randomization
214 // that have different names, normally generate different results.
215 //
216 // Note that depending on the one-time random initialization, they
217 // _might_ actually give the same result, but we know that given
218 // the particular client_id we use for unit tests they won't.
219 base::FieldTrialList field_trial_list(
220 new PermutedEntropyProvider(1234, kMaxLowEntropySize));
221 scoped_refptr<base::FieldTrial> trials[] = {
222 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
223 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
224 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
225 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };
226
227 for (size_t i = 0; i < arraysize(trials); ++i) {
228 trials[i]->UseOneTimeRandomization();
229
230 for (int j = 0; j < 100; ++j)
231 trials[i]->AppendGroup("", 1);
232 }
233
234 // The trials are most likely to give different results since they have
235 // different names.
236 EXPECT_NE(trials[0]->group(), trials[1]->group());
237 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
238}
239
240TEST_F(EntropyProviderTest, SHA1Entropy) {
241 const double results[] = { GenerateSHA1Entropy("hi", "1"),
242 GenerateSHA1Entropy("there", "1") };
243
244 EXPECT_NE(results[0], results[1]);
245 for (size_t i = 0; i < arraysize(results); ++i) {
246 EXPECT_LE(0.0, results[i]);
247 EXPECT_GT(1.0, results[i]);
248 }
249
250 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
251 GenerateSHA1Entropy("yo", "1"));
252 EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
253 GenerateSHA1Entropy("yo", "else"));
254}
255
256TEST_F(EntropyProviderTest, PermutedEntropy) {
257 const double results[] = {
258 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
259 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
260
261 EXPECT_NE(results[0], results[1]);
262 for (size_t i = 0; i < arraysize(results); ++i) {
263 EXPECT_LE(0.0, results[i]);
264 EXPECT_GT(1.0, results[i]);
265 }
266
267 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
268 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
269 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
270 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
271}
272
273TEST_F(EntropyProviderTest, PermutedEntropyProviderResults) {
274 // Verifies that PermutedEntropyProvider produces expected results. This
275 // ensures that the results are the same between platforms and ensures that
276 // changes to the implementation do not regress this accidentally.
277
278 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
279 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
280 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
281 GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
282 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
283 GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
284}
285
286TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) {
287 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
288 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
289 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
290 }
291}
292
293TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) {
294 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
295 PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
296 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
297 }
298}
299
300TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) {
301 // Verifies that SeededRandGenerator has a uniform distribution.
302 //
303 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
304
305 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
306 const uint32 kExpectedAverage = kTopOfRange / 2ULL;
307 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%
308 const int kMinAttempts = 1000;
309 const int kMaxAttempts = 1000000;
310
311 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
[email protected]bca34942012-09-05 18:23:25312 const uint32 seed = HashName(kTestTrialNames[i]);
[email protected]20f999b52012-08-24 22:32:59313 internal::SeededRandGenerator rand_generator(seed);
314
315 double cumulative_average = 0.0;
316 int count = 0;
317 while (count < kMaxAttempts) {
318 uint32 value = rand_generator(kTopOfRange);
319 cumulative_average = (count * cumulative_average + value) / (count + 1);
320
321 // Don't quit too quickly for things to start converging, or we may have
322 // a false positive.
323 if (count > kMinAttempts &&
324 kExpectedAverage - kAllowedVariance < cumulative_average &&
325 cumulative_average < kExpectedAverage + kAllowedVariance) {
326 break;
327 }
328
329 ++count;
330 }
331
332 ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
333 kExpectedAverage << ", average ended at " << cumulative_average <<
334 ", for trial " << kTestTrialNames[i];
335 }
336}
337
338} // namespace metrics