blob: 57aae6a1bb7ccea6c8abb797dd96ffa208ee2d4f [file] [log] [blame]
[email protected]20f999b52012-08-24 22:32:591// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
[email protected]50ae9f12013-08-29 18:03:225#include "components/variations/entropy_provider.h"
[email protected]c277e2b2013-08-02 15:41:086
avi5dd91f82015-12-25 22:30:467#include <stddef.h>
8#include <stdint.h>
9
[email protected]20f999b52012-08-24 22:32:5910#include <cmath>
11#include <limits>
12#include <numeric>
13
[email protected]20f999b52012-08-24 22:32:5914#include "base/guid.h"
avi5dd91f82015-12-25 22:30:4615#include "base/macros.h"
robliao79393ffb2016-09-21 18:45:2916#include "base/memory/ptr_util.h"
[email protected]20f999b52012-08-24 22:32:5917#include "base/rand_util.h"
[email protected]3ea1b182013-02-08 22:38:4118#include "base/strings/string_number_conversions.h"
[email protected]50ae9f12013-08-29 18:03:2219#include "components/variations/metrics_util.h"
[email protected]20f999b52012-08-24 22:32:5920#include "testing/gtest/include/gtest/gtest.h"
21
22namespace metrics {
23
24namespace {
25
26// Size of the low entropy source to use for the permuted entropy provider
27// in tests.
[email protected]9556a892013-06-21 16:53:2028const size_t kMaxLowEntropySize = 8000;
[email protected]20f999b52012-08-24 22:32:5929
30// Field trial names used in unit tests.
[email protected]c277e2b2013-08-02 15:41:0831const char* const kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
[email protected]20f999b52012-08-24 22:32:5932 "NewTabButton" };
33
34// Computes the Chi-Square statistic for |values| assuming they follow a uniform
35// distribution, where each entry has expected value |expected_value|.
36//
37// The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
38// value and E is the expected value.
39double ComputeChiSquare(const std::vector<int>& values,
40 double expected_value) {
41 double sum = 0;
42 for (size_t i = 0; i < values.size(); ++i) {
43 const double delta = values[i] - expected_value;
44 sum += (delta * delta) / expected_value;
45 }
46 return sum;
47}
48
49// Computes SHA1-based entropy for the given |trial_name| based on
50// |entropy_source|
51double GenerateSHA1Entropy(const std::string& entropy_source,
52 const std::string& trial_name) {
53 SHA1EntropyProvider sha1_provider(entropy_source);
[email protected]6fded222013-04-11 20:59:5054 return sha1_provider.GetEntropyForTrial(trial_name, 0);
[email protected]20f999b52012-08-24 22:32:5955}
56
57// Generates permutation-based entropy for the given |trial_name| based on
58// |entropy_source| which must be in the range [0, entropy_max).
avi5dd91f82015-12-25 22:30:4659double GeneratePermutedEntropy(uint16_t entropy_source,
[email protected]20f999b52012-08-24 22:32:5960 size_t entropy_max,
61 const std::string& trial_name) {
62 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
[email protected]6fded222013-04-11 20:59:5063 return permuted_provider.GetEntropyForTrial(trial_name, 0);
[email protected]20f999b52012-08-24 22:32:5964}
65
66// Helper interface for testing used to generate entropy values for a given
67// field trial. Unlike EntropyProvider, which keeps the low/high entropy source
68// value constant and generates entropy for different trial names, instances
69// of TrialEntropyGenerator keep the trial name constant and generate low/high
70// entropy source values internally to produce each output entropy value.
71class TrialEntropyGenerator {
72 public:
73 virtual ~TrialEntropyGenerator() {}
74 virtual double GenerateEntropyValue() const = 0;
75};
76
77// An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
78// entropy source (random GUID with 128 bits of entropy + 13 additional bits of
79// entropy corresponding to a low entropy source).
80class SHA1EntropyGenerator : public TrialEntropyGenerator {
81 public:
82 explicit SHA1EntropyGenerator(const std::string& trial_name)
83 : trial_name_(trial_name) {
84 }
85
dcheng00ea022b2014-10-21 11:24:5686 ~SHA1EntropyGenerator() override {}
[email protected]20f999b52012-08-24 22:32:5987
dcheng00ea022b2014-10-21 11:24:5688 double GenerateEntropyValue() const override {
[email protected]20f999b52012-08-24 22:32:5989 // Use a random GUID + 13 additional bits of entropy to match how the
90 // SHA1EntropyProvider is used in metrics_service.cc.
91 const int low_entropy_source =
avi5dd91f82015-12-25 22:30:4692 static_cast<uint16_t>(base::RandInt(0, kMaxLowEntropySize - 1));
[email protected]20f999b52012-08-24 22:32:5993 const std::string high_entropy_source =
94 base::GenerateGUID() + base::IntToString(low_entropy_source);
95 return GenerateSHA1Entropy(high_entropy_source, trial_name_);
96 }
97
98 private:
[email protected]c277e2b2013-08-02 15:41:0899 std::string trial_name_;
[email protected]20f999b52012-08-24 22:32:59100
101 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
102};
103
104// An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
105// using 13-bit low entropy source values.
106class PermutedEntropyGenerator : public TrialEntropyGenerator {
107 public:
108 explicit PermutedEntropyGenerator(const std::string& trial_name)
109 : mapping_(kMaxLowEntropySize) {
110 // Note: Given a trial name, the computed mapping will be the same.
111 // As a performance optimization, pre-compute the mapping once per trial
112 // name and index into it for each entropy value.
avi5dd91f82015-12-25 22:30:46113 const uint32_t randomization_seed = HashName(trial_name);
[email protected]6fded222013-04-11 20:59:50114 internal::PermuteMappingUsingRandomizationSeed(randomization_seed,
115 &mapping_);
[email protected]20f999b52012-08-24 22:32:59116 }
117
dcheng00ea022b2014-10-21 11:24:56118 ~PermutedEntropyGenerator() override {}
[email protected]20f999b52012-08-24 22:32:59119
dcheng00ea022b2014-10-21 11:24:56120 double GenerateEntropyValue() const override {
[email protected]20f999b52012-08-24 22:32:59121 const int low_entropy_source =
avi5dd91f82015-12-25 22:30:46122 static_cast<uint16_t>(base::RandInt(0, kMaxLowEntropySize - 1));
[email protected]20f999b52012-08-24 22:32:59123 return mapping_[low_entropy_source] /
124 static_cast<double>(kMaxLowEntropySize);
125 }
126
127 private:
avi5dd91f82015-12-25 22:30:46128 std::vector<uint16_t> mapping_;
[email protected]20f999b52012-08-24 22:32:59129
130 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
131};
132
133// Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
134// of Fit Test.
135void PerformEntropyUniformityTest(
136 const std::string& trial_name,
137 const TrialEntropyGenerator& entropy_generator) {
138 // Number of buckets in the simulated field trials.
139 const size_t kBucketCount = 20;
140 // Max number of iterations to perform before giving up and failing.
141 const size_t kMaxIterationCount = 100000;
142 // The number of iterations to perform before each time the statistical
143 // significance of the results is checked.
144 const size_t kCheckIterationCount = 10000;
145 // This is the Chi-Square threshold from the Chi-Square statistic table for
146 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
147 // level. See: http://www.medcalc.org/manual/chi-square-table.php
148 const double kChiSquareThreshold = 43.82;
149
150 std::vector<int> distribution(kBucketCount);
151
152 for (size_t i = 1; i <= kMaxIterationCount; ++i) {
153 const double entropy_value = entropy_generator.GenerateEntropyValue();
154 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
155 ASSERT_LT(bucket, kBucketCount);
156 distribution[bucket] += 1;
157
158 // After |kCheckIterationCount| iterations, compute the Chi-Square
159 // statistic of the distribution. If the resulting statistic is greater
160 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
161 // that the observed samples do not follow a uniform distribution.
162 //
163 // However, since 99.9% would still result in a false negative every
164 // 1000 runs of the test, do not treat it as a failure (else the test
165 // will be flaky). Instead, perform additional iterations to determine
166 // if the distribution will converge, up to |kMaxIterationCount|.
167 if ((i % kCheckIterationCount) == 0) {
168 const double expected_value_per_bucket =
169 static_cast<double>(i) / kBucketCount;
170 const double chi_square =
171 ComputeChiSquare(distribution, expected_value_per_bucket);
172 if (chi_square < kChiSquareThreshold)
173 break;
174
175 // If |i == kMaxIterationCount|, the Chi-Square statistic did not
176 // converge after |kMaxIterationCount|.
177 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
178 trial_name << " with chi_square = " << chi_square <<
179 " after " << kMaxIterationCount << " iterations.";
180 }
181 }
182}
183
184} // namespace
185
[email protected]c277e2b2013-08-02 15:41:08186TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
[email protected]20f999b52012-08-24 22:32:59187 // Simply asserts that two trials using one-time randomization
188 // that have different names, normally generate different results.
189 //
190 // Note that depending on the one-time random initialization, they
191 // _might_ actually give the same result, but we know that given
192 // the particular client_id we use for unit tests they won't.
robliao79393ffb2016-09-21 18:45:29193 base::FieldTrialList field_trial_list(
194 base::MakeUnique<SHA1EntropyProvider>("client_id"));
[email protected]ebcf69f02013-07-30 15:11:29195 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
[email protected]20f999b52012-08-24 22:32:59196 scoped_refptr<base::FieldTrial> trials[] = {
[email protected]ebcf69f02013-07-30 15:11:29197 base::FieldTrialList::FactoryGetFieldTrial(
198 "one", 100, "default", kNoExpirationYear, 1, 1,
199 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
200 base::FieldTrialList::FactoryGetFieldTrial(
201 "two", 100, "default", kNoExpirationYear, 1, 1,
202 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
203 };
[email protected]20f999b52012-08-24 22:32:59204
205 for (size_t i = 0; i < arraysize(trials); ++i) {
[email protected]20f999b52012-08-24 22:32:59206 for (int j = 0; j < 100; ++j)
[email protected]007b3f82013-04-09 08:46:45207 trials[i]->AppendGroup(std::string(), 1);
[email protected]20f999b52012-08-24 22:32:59208 }
209
210 // The trials are most likely to give different results since they have
211 // different names.
212 EXPECT_NE(trials[0]->group(), trials[1]->group());
213 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
214}
215
[email protected]c277e2b2013-08-02 15:41:08216TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
[email protected]20f999b52012-08-24 22:32:59217 // Simply asserts that two trials using one-time randomization
218 // that have different names, normally generate different results.
219 //
220 // Note that depending on the one-time random initialization, they
221 // _might_ actually give the same result, but we know that given
222 // the particular client_id we use for unit tests they won't.
223 base::FieldTrialList field_trial_list(
robliao79393ffb2016-09-21 18:45:29224 base::MakeUnique<PermutedEntropyProvider>(1234, kMaxLowEntropySize));
[email protected]ebcf69f02013-07-30 15:11:29225 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
[email protected]20f999b52012-08-24 22:32:59226 scoped_refptr<base::FieldTrial> trials[] = {
[email protected]ebcf69f02013-07-30 15:11:29227 base::FieldTrialList::FactoryGetFieldTrial(
228 "one", 100, "default", kNoExpirationYear, 1, 1,
229 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
230 base::FieldTrialList::FactoryGetFieldTrial(
231 "two", 100, "default", kNoExpirationYear, 1, 1,
232 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
233 };
[email protected]20f999b52012-08-24 22:32:59234
235 for (size_t i = 0; i < arraysize(trials); ++i) {
[email protected]20f999b52012-08-24 22:32:59236 for (int j = 0; j < 100; ++j)
[email protected]007b3f82013-04-09 08:46:45237 trials[i]->AppendGroup(std::string(), 1);
[email protected]20f999b52012-08-24 22:32:59238 }
239
240 // The trials are most likely to give different results since they have
241 // different names.
242 EXPECT_NE(trials[0]->group(), trials[1]->group());
243 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
244}
245
[email protected]c277e2b2013-08-02 15:41:08246TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
[email protected]6fded222013-04-11 20:59:50247 // Ensures that two trials with different names but the same custom seed used
248 // for one time randomization produce the same group assignments.
249 base::FieldTrialList field_trial_list(
robliao79393ffb2016-09-21 18:45:29250 base::MakeUnique<PermutedEntropyProvider>(1234, kMaxLowEntropySize));
[email protected]ebcf69f02013-07-30 15:11:29251 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
avi5dd91f82015-12-25 22:30:46252 const uint32_t kCustomSeed = 9001;
[email protected]ebcf69f02013-07-30 15:11:29253 scoped_refptr<base::FieldTrial> trials[] = {
254 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
255 "one", 100, "default", kNoExpirationYear, 1, 1,
jwd67c08f752016-05-18 21:04:59256 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL, NULL),
[email protected]ebcf69f02013-07-30 15:11:29257 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
258 "two", 100, "default", kNoExpirationYear, 1, 1,
jwd67c08f752016-05-18 21:04:59259 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL, NULL),
[email protected]ebcf69f02013-07-30 15:11:29260 };
[email protected]6fded222013-04-11 20:59:50261
262 for (size_t i = 0; i < arraysize(trials); ++i) {
[email protected]6fded222013-04-11 20:59:50263 for (int j = 0; j < 100; ++j)
264 trials[i]->AppendGroup(std::string(), 1);
265 }
266
267 // Normally, these trials should produce different groups, but if the same
268 // custom seed is used, they should produce the same group assignment.
269 EXPECT_EQ(trials[0]->group(), trials[1]->group());
270 EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
271}
272
jwdc6e07e22016-11-21 16:36:54273TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedSHA1) {
274 // Ensures that two trials with different names but the same custom seed used
275 // for one time randomization produce the same group assignments.
276 base::FieldTrialList field_trial_list(
277 base::MakeUnique<SHA1EntropyProvider>("client_id"));
278 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
279 const uint32_t kCustomSeed = 9001;
280 scoped_refptr<base::FieldTrial> trials[] = {
281 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
282 "one", 100, "default", kNoExpirationYear, 1, 1,
283 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL, NULL),
284 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
285 "two", 100, "default", kNoExpirationYear, 1, 1,
286 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL, NULL),
287 };
288
289 for (size_t i = 0; i < arraysize(trials); ++i) {
290 for (int j = 0; j < 100; ++j)
291 trials[i]->AppendGroup(std::string(), 1);
292 }
293
294 // Normally, these trials should produce different groups, but if the same
295 // custom seed is used, they should produce the same group assignment.
296 EXPECT_EQ(trials[0]->group(), trials[1]->group());
297 EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
298}
299
[email protected]c277e2b2013-08-02 15:41:08300TEST(EntropyProviderTest, SHA1Entropy) {
[email protected]20f999b52012-08-24 22:32:59301 const double results[] = { GenerateSHA1Entropy("hi", "1"),
302 GenerateSHA1Entropy("there", "1") };
303
304 EXPECT_NE(results[0], results[1]);
305 for (size_t i = 0; i < arraysize(results); ++i) {
306 EXPECT_LE(0.0, results[i]);
307 EXPECT_GT(1.0, results[i]);
308 }
309
310 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
311 GenerateSHA1Entropy("yo", "1"));
312 EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
313 GenerateSHA1Entropy("yo", "else"));
314}
315
[email protected]c277e2b2013-08-02 15:41:08316TEST(EntropyProviderTest, PermutedEntropy) {
[email protected]20f999b52012-08-24 22:32:59317 const double results[] = {
318 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
319 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
320
321 EXPECT_NE(results[0], results[1]);
322 for (size_t i = 0; i < arraysize(results); ++i) {
323 EXPECT_LE(0.0, results[i]);
324 EXPECT_GT(1.0, results[i]);
325 }
326
327 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
328 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
329 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
330 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
331}
332
[email protected]c277e2b2013-08-02 15:41:08333TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
[email protected]20f999b52012-08-24 22:32:59334 // Verifies that PermutedEntropyProvider produces expected results. This
335 // ensures that the results are the same between platforms and ensures that
336 // changes to the implementation do not regress this accidentally.
337
338 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
339 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
340 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
341 GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
342 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
343 GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
344}
345
[email protected]c277e2b2013-08-02 15:41:08346TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
[email protected]20f999b52012-08-24 22:32:59347 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
348 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
349 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
350 }
351}
352
[email protected]c277e2b2013-08-02 15:41:08353TEST(EntropyProviderTest, PermutedEntropyIsUniform) {
[email protected]20f999b52012-08-24 22:32:59354 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
355 PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
356 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
357 }
358}
359
[email protected]c277e2b2013-08-02 15:41:08360TEST(EntropyProviderTest, SeededRandGeneratorIsUniform) {
[email protected]20f999b52012-08-24 22:32:59361 // Verifies that SeededRandGenerator has a uniform distribution.
362 //
363 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
364
avi5dd91f82015-12-25 22:30:46365 const uint32_t kTopOfRange =
366 (std::numeric_limits<uint32_t>::max() / 4ULL) * 3ULL;
367 const uint32_t kExpectedAverage = kTopOfRange / 2ULL;
368 const uint32_t kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%
[email protected]20f999b52012-08-24 22:32:59369 const int kMinAttempts = 1000;
370 const int kMaxAttempts = 1000000;
371
372 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
avi5dd91f82015-12-25 22:30:46373 const uint32_t seed = HashName(kTestTrialNames[i]);
[email protected]20f999b52012-08-24 22:32:59374 internal::SeededRandGenerator rand_generator(seed);
375
376 double cumulative_average = 0.0;
377 int count = 0;
378 while (count < kMaxAttempts) {
avi5dd91f82015-12-25 22:30:46379 uint32_t value = rand_generator(kTopOfRange);
[email protected]20f999b52012-08-24 22:32:59380 cumulative_average = (count * cumulative_average + value) / (count + 1);
381
382 // Don't quit too quickly for things to start converging, or we may have
383 // a false positive.
384 if (count > kMinAttempts &&
385 kExpectedAverage - kAllowedVariance < cumulative_average &&
386 cumulative_average < kExpectedAverage + kAllowedVariance) {
387 break;
388 }
389
390 ++count;
391 }
392
393 ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
394 kExpectedAverage << ", average ended at " << cumulative_average <<
395 ", for trial " << kTestTrialNames[i];
396 }
397}
398
[email protected]20f999b52012-08-24 22:32:59399} // namespace metrics