blob: 5c54128e21b573bbd4c61d61fd30aaaffe455768 [file] [log] [blame]
[email protected]20f999b52012-08-24 22:32:591// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <cmath>
6#include <limits>
7#include <numeric>
8
9#include "base/basictypes.h"
10#include "base/guid.h"
11#include "base/memory/scoped_ptr.h"
[email protected]9d7c4a82013-05-07 12:10:4912#include "base/prefs/testing_pref_service.h"
[email protected]20f999b52012-08-24 22:32:5913#include "base/rand_util.h"
[email protected]3ea1b182013-02-08 22:38:4114#include "base/strings/string_number_conversions.h"
[email protected]20f999b52012-08-24 22:32:5915#include "chrome/common/metrics/entropy_provider.h"
[email protected]bca34942012-09-05 18:23:2516#include "chrome/common/metrics/metrics_util.h"
[email protected]20f999b52012-08-24 22:32:5917#include "testing/gtest/include/gtest/gtest.h"
18
19namespace metrics {
20
21namespace {
22
23// Size of the low entropy source to use for the permuted entropy provider
24// in tests.
[email protected]9556a892013-06-21 16:53:2025const size_t kMaxLowEntropySize = 8000;
[email protected]20f999b52012-08-24 22:32:5926
27// Field trial names used in unit tests.
28const std::string kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
29 "NewTabButton" };
30
31// Computes the Chi-Square statistic for |values| assuming they follow a uniform
32// distribution, where each entry has expected value |expected_value|.
33//
34// The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
35// value and E is the expected value.
36double ComputeChiSquare(const std::vector<int>& values,
37 double expected_value) {
38 double sum = 0;
39 for (size_t i = 0; i < values.size(); ++i) {
40 const double delta = values[i] - expected_value;
41 sum += (delta * delta) / expected_value;
42 }
43 return sum;
44}
45
46// Computes SHA1-based entropy for the given |trial_name| based on
47// |entropy_source|
48double GenerateSHA1Entropy(const std::string& entropy_source,
49 const std::string& trial_name) {
50 SHA1EntropyProvider sha1_provider(entropy_source);
[email protected]6fded222013-04-11 20:59:5051 return sha1_provider.GetEntropyForTrial(trial_name, 0);
[email protected]20f999b52012-08-24 22:32:5952}
53
54// Generates permutation-based entropy for the given |trial_name| based on
55// |entropy_source| which must be in the range [0, entropy_max).
56double GeneratePermutedEntropy(uint16 entropy_source,
57 size_t entropy_max,
58 const std::string& trial_name) {
59 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
[email protected]6fded222013-04-11 20:59:5060 return permuted_provider.GetEntropyForTrial(trial_name, 0);
[email protected]20f999b52012-08-24 22:32:5961}
62
63// Helper interface for testing used to generate entropy values for a given
64// field trial. Unlike EntropyProvider, which keeps the low/high entropy source
65// value constant and generates entropy for different trial names, instances
66// of TrialEntropyGenerator keep the trial name constant and generate low/high
67// entropy source values internally to produce each output entropy value.
68class TrialEntropyGenerator {
69 public:
70 virtual ~TrialEntropyGenerator() {}
71 virtual double GenerateEntropyValue() const = 0;
72};
73
74// An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
75// entropy source (random GUID with 128 bits of entropy + 13 additional bits of
76// entropy corresponding to a low entropy source).
77class SHA1EntropyGenerator : public TrialEntropyGenerator {
78 public:
79 explicit SHA1EntropyGenerator(const std::string& trial_name)
80 : trial_name_(trial_name) {
81 }
82
[email protected]be9826e62013-02-07 02:00:5883 virtual ~SHA1EntropyGenerator() {
[email protected]20f999b52012-08-24 22:32:5984 }
85
86 virtual double GenerateEntropyValue() const OVERRIDE {
87 // Use a random GUID + 13 additional bits of entropy to match how the
88 // SHA1EntropyProvider is used in metrics_service.cc.
89 const int low_entropy_source =
90 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
91 const std::string high_entropy_source =
92 base::GenerateGUID() + base::IntToString(low_entropy_source);
93 return GenerateSHA1Entropy(high_entropy_source, trial_name_);
94 }
95
96 private:
97 const std::string& trial_name_;
98
99 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
100};
101
102// An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
103// using 13-bit low entropy source values.
104class PermutedEntropyGenerator : public TrialEntropyGenerator {
105 public:
106 explicit PermutedEntropyGenerator(const std::string& trial_name)
107 : mapping_(kMaxLowEntropySize) {
108 // Note: Given a trial name, the computed mapping will be the same.
109 // As a performance optimization, pre-compute the mapping once per trial
110 // name and index into it for each entropy value.
[email protected]6fded222013-04-11 20:59:50111 const uint32 randomization_seed = HashName(trial_name);
112 internal::PermuteMappingUsingRandomizationSeed(randomization_seed,
113 &mapping_);
[email protected]20f999b52012-08-24 22:32:59114 }
115
[email protected]be9826e62013-02-07 02:00:58116 virtual ~PermutedEntropyGenerator() {
[email protected]20f999b52012-08-24 22:32:59117 }
118
119 virtual double GenerateEntropyValue() const OVERRIDE {
120 const int low_entropy_source =
121 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
122 return mapping_[low_entropy_source] /
123 static_cast<double>(kMaxLowEntropySize);
124 }
125
126 private:
127 std::vector<uint16> mapping_;
128
129 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
130};
131
132// Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
133// of Fit Test.
134void PerformEntropyUniformityTest(
135 const std::string& trial_name,
136 const TrialEntropyGenerator& entropy_generator) {
137 // Number of buckets in the simulated field trials.
138 const size_t kBucketCount = 20;
139 // Max number of iterations to perform before giving up and failing.
140 const size_t kMaxIterationCount = 100000;
141 // The number of iterations to perform before each time the statistical
142 // significance of the results is checked.
143 const size_t kCheckIterationCount = 10000;
144 // This is the Chi-Square threshold from the Chi-Square statistic table for
145 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
146 // level. See: http://www.medcalc.org/manual/chi-square-table.php
147 const double kChiSquareThreshold = 43.82;
148
149 std::vector<int> distribution(kBucketCount);
150
151 for (size_t i = 1; i <= kMaxIterationCount; ++i) {
152 const double entropy_value = entropy_generator.GenerateEntropyValue();
153 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
154 ASSERT_LT(bucket, kBucketCount);
155 distribution[bucket] += 1;
156
157 // After |kCheckIterationCount| iterations, compute the Chi-Square
158 // statistic of the distribution. If the resulting statistic is greater
159 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
160 // that the observed samples do not follow a uniform distribution.
161 //
162 // However, since 99.9% would still result in a false negative every
163 // 1000 runs of the test, do not treat it as a failure (else the test
164 // will be flaky). Instead, perform additional iterations to determine
165 // if the distribution will converge, up to |kMaxIterationCount|.
166 if ((i % kCheckIterationCount) == 0) {
167 const double expected_value_per_bucket =
168 static_cast<double>(i) / kBucketCount;
169 const double chi_square =
170 ComputeChiSquare(distribution, expected_value_per_bucket);
171 if (chi_square < kChiSquareThreshold)
172 break;
173
174 // If |i == kMaxIterationCount|, the Chi-Square statistic did not
175 // converge after |kMaxIterationCount|.
176 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
177 trial_name << " with chi_square = " << chi_square <<
178 " after " << kMaxIterationCount << " iterations.";
179 }
180 }
181}
182
183} // namespace
184
185class EntropyProviderTest : public testing::Test {
186};
187
188TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
189 // Simply asserts that two trials using one-time randomization
190 // that have different names, normally generate different results.
191 //
192 // Note that depending on the one-time random initialization, they
193 // _might_ actually give the same result, but we know that given
194 // the particular client_id we use for unit tests they won't.
195 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
[email protected]ebcf69f02013-07-30 15:11:29196 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
[email protected]20f999b52012-08-24 22:32:59197 scoped_refptr<base::FieldTrial> trials[] = {
[email protected]ebcf69f02013-07-30 15:11:29198 base::FieldTrialList::FactoryGetFieldTrial(
199 "one", 100, "default", kNoExpirationYear, 1, 1,
200 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
201 base::FieldTrialList::FactoryGetFieldTrial(
202 "two", 100, "default", kNoExpirationYear, 1, 1,
203 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
204 };
[email protected]20f999b52012-08-24 22:32:59205
206 for (size_t i = 0; i < arraysize(trials); ++i) {
[email protected]20f999b52012-08-24 22:32:59207 for (int j = 0; j < 100; ++j)
[email protected]007b3f82013-04-09 08:46:45208 trials[i]->AppendGroup(std::string(), 1);
[email protected]20f999b52012-08-24 22:32:59209 }
210
211 // The trials are most likely to give different results since they have
212 // different names.
213 EXPECT_NE(trials[0]->group(), trials[1]->group());
214 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
215}
216
217TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
218 // Simply asserts that two trials using one-time randomization
219 // that have different names, normally generate different results.
220 //
221 // Note that depending on the one-time random initialization, they
222 // _might_ actually give the same result, but we know that given
223 // the particular client_id we use for unit tests they won't.
224 base::FieldTrialList field_trial_list(
225 new PermutedEntropyProvider(1234, kMaxLowEntropySize));
[email protected]ebcf69f02013-07-30 15:11:29226 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
[email protected]20f999b52012-08-24 22:32:59227 scoped_refptr<base::FieldTrial> trials[] = {
[email protected]ebcf69f02013-07-30 15:11:29228 base::FieldTrialList::FactoryGetFieldTrial(
229 "one", 100, "default", kNoExpirationYear, 1, 1,
230 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
231 base::FieldTrialList::FactoryGetFieldTrial(
232 "two", 100, "default", kNoExpirationYear, 1, 1,
233 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
234 };
[email protected]20f999b52012-08-24 22:32:59235
236 for (size_t i = 0; i < arraysize(trials); ++i) {
[email protected]20f999b52012-08-24 22:32:59237 for (int j = 0; j < 100; ++j)
[email protected]007b3f82013-04-09 08:46:45238 trials[i]->AppendGroup(std::string(), 1);
[email protected]20f999b52012-08-24 22:32:59239 }
240
241 // The trials are most likely to give different results since they have
242 // different names.
243 EXPECT_NE(trials[0]->group(), trials[1]->group());
244 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
245}
246
[email protected]6fded222013-04-11 20:59:50247TEST_F(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
248 // Ensures that two trials with different names but the same custom seed used
249 // for one time randomization produce the same group assignments.
250 base::FieldTrialList field_trial_list(
251 new PermutedEntropyProvider(1234, kMaxLowEntropySize));
[email protected]ebcf69f02013-07-30 15:11:29252 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
[email protected]6fded222013-04-11 20:59:50253 const uint32 kCustomSeed = 9001;
[email protected]ebcf69f02013-07-30 15:11:29254 scoped_refptr<base::FieldTrial> trials[] = {
255 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
256 "one", 100, "default", kNoExpirationYear, 1, 1,
257 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
258 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
259 "two", 100, "default", kNoExpirationYear, 1, 1,
260 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
261 };
[email protected]6fded222013-04-11 20:59:50262
263 for (size_t i = 0; i < arraysize(trials); ++i) {
[email protected]6fded222013-04-11 20:59:50264 for (int j = 0; j < 100; ++j)
265 trials[i]->AppendGroup(std::string(), 1);
266 }
267
268 // Normally, these trials should produce different groups, but if the same
269 // custom seed is used, they should produce the same group assignment.
270 EXPECT_EQ(trials[0]->group(), trials[1]->group());
271 EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
272}
273
[email protected]20f999b52012-08-24 22:32:59274TEST_F(EntropyProviderTest, SHA1Entropy) {
275 const double results[] = { GenerateSHA1Entropy("hi", "1"),
276 GenerateSHA1Entropy("there", "1") };
277
278 EXPECT_NE(results[0], results[1]);
279 for (size_t i = 0; i < arraysize(results); ++i) {
280 EXPECT_LE(0.0, results[i]);
281 EXPECT_GT(1.0, results[i]);
282 }
283
284 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
285 GenerateSHA1Entropy("yo", "1"));
286 EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
287 GenerateSHA1Entropy("yo", "else"));
288}
289
290TEST_F(EntropyProviderTest, PermutedEntropy) {
291 const double results[] = {
292 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
293 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
294
295 EXPECT_NE(results[0], results[1]);
296 for (size_t i = 0; i < arraysize(results); ++i) {
297 EXPECT_LE(0.0, results[i]);
298 EXPECT_GT(1.0, results[i]);
299 }
300
301 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
302 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
303 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
304 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
305}
306
307TEST_F(EntropyProviderTest, PermutedEntropyProviderResults) {
308 // Verifies that PermutedEntropyProvider produces expected results. This
309 // ensures that the results are the same between platforms and ensures that
310 // changes to the implementation do not regress this accidentally.
311
312 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
313 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
314 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
315 GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
316 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
317 GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
318}
319
320TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) {
321 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
322 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
323 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
324 }
325}
326
327TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) {
328 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
329 PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
330 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
331 }
332}
333
334TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) {
335 // Verifies that SeededRandGenerator has a uniform distribution.
336 //
337 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
338
339 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
340 const uint32 kExpectedAverage = kTopOfRange / 2ULL;
341 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%
342 const int kMinAttempts = 1000;
343 const int kMaxAttempts = 1000000;
344
345 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
[email protected]bca34942012-09-05 18:23:25346 const uint32 seed = HashName(kTestTrialNames[i]);
[email protected]20f999b52012-08-24 22:32:59347 internal::SeededRandGenerator rand_generator(seed);
348
349 double cumulative_average = 0.0;
350 int count = 0;
351 while (count < kMaxAttempts) {
352 uint32 value = rand_generator(kTopOfRange);
353 cumulative_average = (count * cumulative_average + value) / (count + 1);
354
355 // Don't quit too quickly for things to start converging, or we may have
356 // a false positive.
357 if (count > kMinAttempts &&
358 kExpectedAverage - kAllowedVariance < cumulative_average &&
359 cumulative_average < kExpectedAverage + kAllowedVariance) {
360 break;
361 }
362
363 ++count;
364 }
365
366 ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
367 kExpectedAverage << ", average ended at " << cumulative_average <<
368 ", for trial " << kTestTrialNames[i];
369 }
370}
371
[email protected]9d7c4a82013-05-07 12:10:49372TEST_F(EntropyProviderTest, CachingPermutedEntropyProvider) {
373 TestingPrefServiceSimple prefs;
374 CachingPermutedEntropyProvider::RegisterPrefs(prefs.registry());
375 const int kEntropyValue = 1234;
376
377 // Check that the caching provider returns the same results as the non caching
378 // one. Loop over the trial names twice, to test that caching returns the
379 // expected results.
380 PermutedEntropyProvider provider(kEntropyValue, kMaxLowEntropySize);
381 for (size_t i = 0; i < 2 * arraysize(kTestTrialNames); ++i) {
382 CachingPermutedEntropyProvider cached_provider(&prefs, kEntropyValue,
383 kMaxLowEntropySize);
384 const std::string trial_name =
385 kTestTrialNames[i % arraysize(kTestTrialNames)];
386 EXPECT_EQ(provider.GetEntropyForTrial(trial_name, 0),
387 cached_provider.GetEntropyForTrial(trial_name, 0));
388 }
389
390 // Now, do the same test re-using the same caching provider.
391 CachingPermutedEntropyProvider cached_provider(&prefs, kEntropyValue,
392 kMaxLowEntropySize);
393 for (size_t i = 0; i < 2 * arraysize(kTestTrialNames); ++i) {
394 const std::string trial_name =
395 kTestTrialNames[i % arraysize(kTestTrialNames)];
396 EXPECT_EQ(provider.GetEntropyForTrial(trial_name, 0),
397 cached_provider.GetEntropyForTrial(trial_name, 0));
398 }
399}
400
[email protected]20f999b52012-08-24 22:32:59401} // namespace metrics