blob: b54a5d50f858b7022172f1a295d97c24ba10409c [file] [log] [blame]
[email protected]20f999b52012-08-24 22:32:591// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <cmath>
6#include <limits>
7#include <numeric>
8
9#include "base/basictypes.h"
10#include "base/guid.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/rand_util.h"
13#include "base/string_number_conversions.h"
14#include "chrome/common/metrics/entropy_provider.h"
15#include "testing/gtest/include/gtest/gtest.h"
16
17namespace metrics {
18
19namespace {
20
21// Size of the low entropy source to use for the permuted entropy provider
22// in tests.
23const size_t kMaxLowEntropySize = (1 << 13);
24
25// Field trial names used in unit tests.
26const std::string kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
27 "NewTabButton" };
28
29// Computes the Chi-Square statistic for |values| assuming they follow a uniform
30// distribution, where each entry has expected value |expected_value|.
31//
32// The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
33// value and E is the expected value.
34double ComputeChiSquare(const std::vector<int>& values,
35 double expected_value) {
36 double sum = 0;
37 for (size_t i = 0; i < values.size(); ++i) {
38 const double delta = values[i] - expected_value;
39 sum += (delta * delta) / expected_value;
40 }
41 return sum;
42}
43
44// Computes SHA1-based entropy for the given |trial_name| based on
45// |entropy_source|
46double GenerateSHA1Entropy(const std::string& entropy_source,
47 const std::string& trial_name) {
48 SHA1EntropyProvider sha1_provider(entropy_source);
49 return sha1_provider.GetEntropyForTrial(trial_name);
50}
51
52// Generates permutation-based entropy for the given |trial_name| based on
53// |entropy_source| which must be in the range [0, entropy_max).
54double GeneratePermutedEntropy(uint16 entropy_source,
55 size_t entropy_max,
56 const std::string& trial_name) {
57 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
58 return permuted_provider.GetEntropyForTrial(trial_name);
59}
60
61// Helper interface for testing used to generate entropy values for a given
62// field trial. Unlike EntropyProvider, which keeps the low/high entropy source
63// value constant and generates entropy for different trial names, instances
64// of TrialEntropyGenerator keep the trial name constant and generate low/high
65// entropy source values internally to produce each output entropy value.
66class TrialEntropyGenerator {
67 public:
68 virtual ~TrialEntropyGenerator() {}
69 virtual double GenerateEntropyValue() const = 0;
70};
71
72// An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
73// entropy source (random GUID with 128 bits of entropy + 13 additional bits of
74// entropy corresponding to a low entropy source).
75class SHA1EntropyGenerator : public TrialEntropyGenerator {
76 public:
77 explicit SHA1EntropyGenerator(const std::string& trial_name)
78 : trial_name_(trial_name) {
79 }
80
81 ~SHA1EntropyGenerator() {
82 }
83
84 virtual double GenerateEntropyValue() const OVERRIDE {
85 // Use a random GUID + 13 additional bits of entropy to match how the
86 // SHA1EntropyProvider is used in metrics_service.cc.
87 const int low_entropy_source =
88 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
89 const std::string high_entropy_source =
90 base::GenerateGUID() + base::IntToString(low_entropy_source);
91 return GenerateSHA1Entropy(high_entropy_source, trial_name_);
92 }
93
94 private:
95 const std::string& trial_name_;
96
97 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
98};
99
100// An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
101// using 13-bit low entropy source values.
102class PermutedEntropyGenerator : public TrialEntropyGenerator {
103 public:
104 explicit PermutedEntropyGenerator(const std::string& trial_name)
105 : mapping_(kMaxLowEntropySize) {
106 // Note: Given a trial name, the computed mapping will be the same.
107 // As a performance optimization, pre-compute the mapping once per trial
108 // name and index into it for each entropy value.
109 internal::PermuteMappingUsingTrialName(trial_name, &mapping_);
110 }
111
112 ~PermutedEntropyGenerator() {
113 }
114
115 virtual double GenerateEntropyValue() const OVERRIDE {
116 const int low_entropy_source =
117 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
118 return mapping_[low_entropy_source] /
119 static_cast<double>(kMaxLowEntropySize);
120 }
121
122 private:
123 std::vector<uint16> mapping_;
124
125 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
126};
127
128// Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
129// of Fit Test.
130void PerformEntropyUniformityTest(
131 const std::string& trial_name,
132 const TrialEntropyGenerator& entropy_generator) {
133 // Number of buckets in the simulated field trials.
134 const size_t kBucketCount = 20;
135 // Max number of iterations to perform before giving up and failing.
136 const size_t kMaxIterationCount = 100000;
137 // The number of iterations to perform before each time the statistical
138 // significance of the results is checked.
139 const size_t kCheckIterationCount = 10000;
140 // This is the Chi-Square threshold from the Chi-Square statistic table for
141 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
142 // level. See: http://www.medcalc.org/manual/chi-square-table.php
143 const double kChiSquareThreshold = 43.82;
144
145 std::vector<int> distribution(kBucketCount);
146
147 for (size_t i = 1; i <= kMaxIterationCount; ++i) {
148 const double entropy_value = entropy_generator.GenerateEntropyValue();
149 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
150 ASSERT_LT(bucket, kBucketCount);
151 distribution[bucket] += 1;
152
153 // After |kCheckIterationCount| iterations, compute the Chi-Square
154 // statistic of the distribution. If the resulting statistic is greater
155 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
156 // that the observed samples do not follow a uniform distribution.
157 //
158 // However, since 99.9% would still result in a false negative every
159 // 1000 runs of the test, do not treat it as a failure (else the test
160 // will be flaky). Instead, perform additional iterations to determine
161 // if the distribution will converge, up to |kMaxIterationCount|.
162 if ((i % kCheckIterationCount) == 0) {
163 const double expected_value_per_bucket =
164 static_cast<double>(i) / kBucketCount;
165 const double chi_square =
166 ComputeChiSquare(distribution, expected_value_per_bucket);
167 if (chi_square < kChiSquareThreshold)
168 break;
169
170 // If |i == kMaxIterationCount|, the Chi-Square statistic did not
171 // converge after |kMaxIterationCount|.
172 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
173 trial_name << " with chi_square = " << chi_square <<
174 " after " << kMaxIterationCount << " iterations.";
175 }
176 }
177}
178
179} // namespace
180
181class EntropyProviderTest : public testing::Test {
182};
183
184TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
185 // Simply asserts that two trials using one-time randomization
186 // that have different names, normally generate different results.
187 //
188 // Note that depending on the one-time random initialization, they
189 // _might_ actually give the same result, but we know that given
190 // the particular client_id we use for unit tests they won't.
191 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
192 scoped_refptr<base::FieldTrial> trials[] = {
193 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
194 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
195 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
196 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };
197
198 for (size_t i = 0; i < arraysize(trials); ++i) {
199 trials[i]->UseOneTimeRandomization();
200
201 for (int j = 0; j < 100; ++j)
202 trials[i]->AppendGroup("", 1);
203 }
204
205 // The trials are most likely to give different results since they have
206 // different names.
207 EXPECT_NE(trials[0]->group(), trials[1]->group());
208 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
209}
210
211TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
212 // Simply asserts that two trials using one-time randomization
213 // that have different names, normally generate different results.
214 //
215 // Note that depending on the one-time random initialization, they
216 // _might_ actually give the same result, but we know that given
217 // the particular client_id we use for unit tests they won't.
218 base::FieldTrialList field_trial_list(
219 new PermutedEntropyProvider(1234, kMaxLowEntropySize));
220 scoped_refptr<base::FieldTrial> trials[] = {
221 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
222 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
223 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
224 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };
225
226 for (size_t i = 0; i < arraysize(trials); ++i) {
227 trials[i]->UseOneTimeRandomization();
228
229 for (int j = 0; j < 100; ++j)
230 trials[i]->AppendGroup("", 1);
231 }
232
233 // The trials are most likely to give different results since they have
234 // different names.
235 EXPECT_NE(trials[0]->group(), trials[1]->group());
236 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
237}
238
239TEST_F(EntropyProviderTest, SHA1Entropy) {
240 const double results[] = { GenerateSHA1Entropy("hi", "1"),
241 GenerateSHA1Entropy("there", "1") };
242
243 EXPECT_NE(results[0], results[1]);
244 for (size_t i = 0; i < arraysize(results); ++i) {
245 EXPECT_LE(0.0, results[i]);
246 EXPECT_GT(1.0, results[i]);
247 }
248
249 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
250 GenerateSHA1Entropy("yo", "1"));
251 EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
252 GenerateSHA1Entropy("yo", "else"));
253}
254
255TEST_F(EntropyProviderTest, PermutedEntropy) {
256 const double results[] = {
257 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
258 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
259
260 EXPECT_NE(results[0], results[1]);
261 for (size_t i = 0; i < arraysize(results); ++i) {
262 EXPECT_LE(0.0, results[i]);
263 EXPECT_GT(1.0, results[i]);
264 }
265
266 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
267 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
268 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
269 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
270}
271
272TEST_F(EntropyProviderTest, PermutedEntropyProviderResults) {
273 // Verifies that PermutedEntropyProvider produces expected results. This
274 // ensures that the results are the same between platforms and ensures that
275 // changes to the implementation do not regress this accidentally.
276
277 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
278 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
279 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
280 GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
281 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
282 GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
283}
284
285TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) {
286 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
287 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
288 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
289 }
290}
291
292TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) {
293 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
294 PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
295 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
296 }
297}
298
299TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) {
300 // Verifies that SeededRandGenerator has a uniform distribution.
301 //
302 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
303
304 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
305 const uint32 kExpectedAverage = kTopOfRange / 2ULL;
306 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%
307 const int kMinAttempts = 1000;
308 const int kMaxAttempts = 1000000;
309
310 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
311 const uint32 seed = internal::HashName(kTestTrialNames[i]);
312 internal::SeededRandGenerator rand_generator(seed);
313
314 double cumulative_average = 0.0;
315 int count = 0;
316 while (count < kMaxAttempts) {
317 uint32 value = rand_generator(kTopOfRange);
318 cumulative_average = (count * cumulative_average + value) / (count + 1);
319
320 // Don't quit too quickly for things to start converging, or we may have
321 // a false positive.
322 if (count > kMinAttempts &&
323 kExpectedAverage - kAllowedVariance < cumulative_average &&
324 cumulative_average < kExpectedAverage + kAllowedVariance) {
325 break;
326 }
327
328 ++count;
329 }
330
331 ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
332 kExpectedAverage << ", average ended at " << cumulative_average <<
333 ", for trial " << kTestTrialNames[i];
334 }
335}
336
337} // namespace metrics