blob: cde08e0aed9c1505455a3b3a32e6bba5db131d8c [file] [log] [blame]
[email protected]20f999b52012-08-24 22:32:591// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
[email protected]50ae9f12013-08-29 18:03:225#include "components/variations/entropy_provider.h"
[email protected]c277e2b2013-08-02 15:41:086
avi5dd91f82015-12-25 22:30:467#include <stddef.h>
8#include <stdint.h>
9
[email protected]20f999b52012-08-24 22:32:5910#include <cmath>
11#include <limits>
12#include <numeric>
13
[email protected]20f999b52012-08-24 22:32:5914#include "base/guid.h"
avi5dd91f82015-12-25 22:30:4615#include "base/macros.h"
robliao79393ffb2016-09-21 18:45:2916#include "base/memory/ptr_util.h"
[email protected]20f999b52012-08-24 22:32:5917#include "base/rand_util.h"
[email protected]3ea1b182013-02-08 22:38:4118#include "base/strings/string_number_conversions.h"
[email protected]50ae9f12013-08-29 18:03:2219#include "components/variations/metrics_util.h"
[email protected]20f999b52012-08-24 22:32:5920#include "testing/gtest/include/gtest/gtest.h"
21
22namespace metrics {
23
24namespace {
25
26// Size of the low entropy source to use for the permuted entropy provider
27// in tests.
[email protected]9556a892013-06-21 16:53:2028const size_t kMaxLowEntropySize = 8000;
[email protected]20f999b52012-08-24 22:32:5929
30// Field trial names used in unit tests.
[email protected]c277e2b2013-08-02 15:41:0831const char* const kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
[email protected]20f999b52012-08-24 22:32:5932 "NewTabButton" };
33
34// Computes the Chi-Square statistic for |values| assuming they follow a uniform
35// distribution, where each entry has expected value |expected_value|.
36//
37// The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
38// value and E is the expected value.
39double ComputeChiSquare(const std::vector<int>& values,
40 double expected_value) {
41 double sum = 0;
42 for (size_t i = 0; i < values.size(); ++i) {
43 const double delta = values[i] - expected_value;
44 sum += (delta * delta) / expected_value;
45 }
46 return sum;
47}
48
49// Computes SHA1-based entropy for the given |trial_name| based on
50// |entropy_source|
51double GenerateSHA1Entropy(const std::string& entropy_source,
52 const std::string& trial_name) {
53 SHA1EntropyProvider sha1_provider(entropy_source);
[email protected]6fded222013-04-11 20:59:5054 return sha1_provider.GetEntropyForTrial(trial_name, 0);
[email protected]20f999b52012-08-24 22:32:5955}
56
57// Generates permutation-based entropy for the given |trial_name| based on
58// |entropy_source| which must be in the range [0, entropy_max).
avi5dd91f82015-12-25 22:30:4659double GeneratePermutedEntropy(uint16_t entropy_source,
[email protected]20f999b52012-08-24 22:32:5960 size_t entropy_max,
61 const std::string& trial_name) {
62 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
[email protected]6fded222013-04-11 20:59:5063 return permuted_provider.GetEntropyForTrial(trial_name, 0);
[email protected]20f999b52012-08-24 22:32:5964}
65
66// Helper interface for testing used to generate entropy values for a given
67// field trial. Unlike EntropyProvider, which keeps the low/high entropy source
68// value constant and generates entropy for different trial names, instances
69// of TrialEntropyGenerator keep the trial name constant and generate low/high
70// entropy source values internally to produce each output entropy value.
71class TrialEntropyGenerator {
72 public:
73 virtual ~TrialEntropyGenerator() {}
74 virtual double GenerateEntropyValue() const = 0;
75};
76
77// An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
78// entropy source (random GUID with 128 bits of entropy + 13 additional bits of
79// entropy corresponding to a low entropy source).
80class SHA1EntropyGenerator : public TrialEntropyGenerator {
81 public:
82 explicit SHA1EntropyGenerator(const std::string& trial_name)
83 : trial_name_(trial_name) {
84 }
85
dcheng00ea022b2014-10-21 11:24:5686 ~SHA1EntropyGenerator() override {}
[email protected]20f999b52012-08-24 22:32:5987
dcheng00ea022b2014-10-21 11:24:5688 double GenerateEntropyValue() const override {
[email protected]20f999b52012-08-24 22:32:5989 // Use a random GUID + 13 additional bits of entropy to match how the
90 // SHA1EntropyProvider is used in metrics_service.cc.
91 const int low_entropy_source =
avi5dd91f82015-12-25 22:30:4692 static_cast<uint16_t>(base::RandInt(0, kMaxLowEntropySize - 1));
[email protected]20f999b52012-08-24 22:32:5993 const std::string high_entropy_source =
94 base::GenerateGUID() + base::IntToString(low_entropy_source);
95 return GenerateSHA1Entropy(high_entropy_source, trial_name_);
96 }
97
98 private:
[email protected]c277e2b2013-08-02 15:41:0899 std::string trial_name_;
[email protected]20f999b52012-08-24 22:32:59100
101 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
102};
103
104// An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
105// using 13-bit low entropy source values.
106class PermutedEntropyGenerator : public TrialEntropyGenerator {
107 public:
108 explicit PermutedEntropyGenerator(const std::string& trial_name)
109 : mapping_(kMaxLowEntropySize) {
110 // Note: Given a trial name, the computed mapping will be the same.
111 // As a performance optimization, pre-compute the mapping once per trial
112 // name and index into it for each entropy value.
avi5dd91f82015-12-25 22:30:46113 const uint32_t randomization_seed = HashName(trial_name);
[email protected]6fded222013-04-11 20:59:50114 internal::PermuteMappingUsingRandomizationSeed(randomization_seed,
115 &mapping_);
[email protected]20f999b52012-08-24 22:32:59116 }
117
dcheng00ea022b2014-10-21 11:24:56118 ~PermutedEntropyGenerator() override {}
[email protected]20f999b52012-08-24 22:32:59119
dcheng00ea022b2014-10-21 11:24:56120 double GenerateEntropyValue() const override {
[email protected]20f999b52012-08-24 22:32:59121 const int low_entropy_source =
avi5dd91f82015-12-25 22:30:46122 static_cast<uint16_t>(base::RandInt(0, kMaxLowEntropySize - 1));
[email protected]20f999b52012-08-24 22:32:59123 return mapping_[low_entropy_source] /
124 static_cast<double>(kMaxLowEntropySize);
125 }
126
127 private:
avi5dd91f82015-12-25 22:30:46128 std::vector<uint16_t> mapping_;
[email protected]20f999b52012-08-24 22:32:59129
130 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
131};
132
133// Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
134// of Fit Test.
135void PerformEntropyUniformityTest(
136 const std::string& trial_name,
137 const TrialEntropyGenerator& entropy_generator) {
138 // Number of buckets in the simulated field trials.
139 const size_t kBucketCount = 20;
140 // Max number of iterations to perform before giving up and failing.
141 const size_t kMaxIterationCount = 100000;
142 // The number of iterations to perform before each time the statistical
143 // significance of the results is checked.
144 const size_t kCheckIterationCount = 10000;
145 // This is the Chi-Square threshold from the Chi-Square statistic table for
146 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
147 // level. See: http://www.medcalc.org/manual/chi-square-table.php
148 const double kChiSquareThreshold = 43.82;
149
150 std::vector<int> distribution(kBucketCount);
151
152 for (size_t i = 1; i <= kMaxIterationCount; ++i) {
153 const double entropy_value = entropy_generator.GenerateEntropyValue();
154 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
155 ASSERT_LT(bucket, kBucketCount);
156 distribution[bucket] += 1;
157
158 // After |kCheckIterationCount| iterations, compute the Chi-Square
159 // statistic of the distribution. If the resulting statistic is greater
160 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
161 // that the observed samples do not follow a uniform distribution.
162 //
163 // However, since 99.9% would still result in a false negative every
164 // 1000 runs of the test, do not treat it as a failure (else the test
165 // will be flaky). Instead, perform additional iterations to determine
166 // if the distribution will converge, up to |kMaxIterationCount|.
167 if ((i % kCheckIterationCount) == 0) {
168 const double expected_value_per_bucket =
169 static_cast<double>(i) / kBucketCount;
170 const double chi_square =
171 ComputeChiSquare(distribution, expected_value_per_bucket);
172 if (chi_square < kChiSquareThreshold)
173 break;
174
175 // If |i == kMaxIterationCount|, the Chi-Square statistic did not
176 // converge after |kMaxIterationCount|.
177 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
178 trial_name << " with chi_square = " << chi_square <<
179 " after " << kMaxIterationCount << " iterations.";
180 }
181 }
182}
183
184} // namespace
185
[email protected]c277e2b2013-08-02 15:41:08186TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
[email protected]20f999b52012-08-24 22:32:59187 // Simply asserts that two trials using one-time randomization
188 // that have different names, normally generate different results.
189 //
190 // Note that depending on the one-time random initialization, they
191 // _might_ actually give the same result, but we know that given
192 // the particular client_id we use for unit tests they won't.
robliao79393ffb2016-09-21 18:45:29193 base::FieldTrialList field_trial_list(
194 base::MakeUnique<SHA1EntropyProvider>("client_id"));
[email protected]ebcf69f02013-07-30 15:11:29195 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
[email protected]20f999b52012-08-24 22:32:59196 scoped_refptr<base::FieldTrial> trials[] = {
[email protected]ebcf69f02013-07-30 15:11:29197 base::FieldTrialList::FactoryGetFieldTrial(
198 "one", 100, "default", kNoExpirationYear, 1, 1,
199 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
200 base::FieldTrialList::FactoryGetFieldTrial(
201 "two", 100, "default", kNoExpirationYear, 1, 1,
202 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
203 };
[email protected]20f999b52012-08-24 22:32:59204
205 for (size_t i = 0; i < arraysize(trials); ++i) {
[email protected]20f999b52012-08-24 22:32:59206 for (int j = 0; j < 100; ++j)
[email protected]007b3f82013-04-09 08:46:45207 trials[i]->AppendGroup(std::string(), 1);
[email protected]20f999b52012-08-24 22:32:59208 }
209
210 // The trials are most likely to give different results since they have
211 // different names.
212 EXPECT_NE(trials[0]->group(), trials[1]->group());
213 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
214}
215
[email protected]c277e2b2013-08-02 15:41:08216TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
[email protected]20f999b52012-08-24 22:32:59217 // Simply asserts that two trials using one-time randomization
218 // that have different names, normally generate different results.
219 //
220 // Note that depending on the one-time random initialization, they
221 // _might_ actually give the same result, but we know that given
222 // the particular client_id we use for unit tests they won't.
223 base::FieldTrialList field_trial_list(
robliao79393ffb2016-09-21 18:45:29224 base::MakeUnique<PermutedEntropyProvider>(1234, kMaxLowEntropySize));
[email protected]ebcf69f02013-07-30 15:11:29225 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
[email protected]20f999b52012-08-24 22:32:59226 scoped_refptr<base::FieldTrial> trials[] = {
[email protected]ebcf69f02013-07-30 15:11:29227 base::FieldTrialList::FactoryGetFieldTrial(
228 "one", 100, "default", kNoExpirationYear, 1, 1,
229 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
230 base::FieldTrialList::FactoryGetFieldTrial(
231 "two", 100, "default", kNoExpirationYear, 1, 1,
232 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
233 };
[email protected]20f999b52012-08-24 22:32:59234
235 for (size_t i = 0; i < arraysize(trials); ++i) {
[email protected]20f999b52012-08-24 22:32:59236 for (int j = 0; j < 100; ++j)
[email protected]007b3f82013-04-09 08:46:45237 trials[i]->AppendGroup(std::string(), 1);
[email protected]20f999b52012-08-24 22:32:59238 }
239
240 // The trials are most likely to give different results since they have
241 // different names.
242 EXPECT_NE(trials[0]->group(), trials[1]->group());
243 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
244}
245
[email protected]c277e2b2013-08-02 15:41:08246TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
[email protected]6fded222013-04-11 20:59:50247 // Ensures that two trials with different names but the same custom seed used
248 // for one time randomization produce the same group assignments.
249 base::FieldTrialList field_trial_list(
robliao79393ffb2016-09-21 18:45:29250 base::MakeUnique<PermutedEntropyProvider>(1234, kMaxLowEntropySize));
[email protected]ebcf69f02013-07-30 15:11:29251 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
avi5dd91f82015-12-25 22:30:46252 const uint32_t kCustomSeed = 9001;
[email protected]ebcf69f02013-07-30 15:11:29253 scoped_refptr<base::FieldTrial> trials[] = {
254 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
255 "one", 100, "default", kNoExpirationYear, 1, 1,
jwd67c08f752016-05-18 21:04:59256 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL, NULL),
[email protected]ebcf69f02013-07-30 15:11:29257 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
258 "two", 100, "default", kNoExpirationYear, 1, 1,
jwd67c08f752016-05-18 21:04:59259 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL, NULL),
[email protected]ebcf69f02013-07-30 15:11:29260 };
[email protected]6fded222013-04-11 20:59:50261
262 for (size_t i = 0; i < arraysize(trials); ++i) {
[email protected]6fded222013-04-11 20:59:50263 for (int j = 0; j < 100; ++j)
264 trials[i]->AppendGroup(std::string(), 1);
265 }
266
267 // Normally, these trials should produce different groups, but if the same
268 // custom seed is used, they should produce the same group assignment.
269 EXPECT_EQ(trials[0]->group(), trials[1]->group());
270 EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
271}
272
[email protected]c277e2b2013-08-02 15:41:08273TEST(EntropyProviderTest, SHA1Entropy) {
[email protected]20f999b52012-08-24 22:32:59274 const double results[] = { GenerateSHA1Entropy("hi", "1"),
275 GenerateSHA1Entropy("there", "1") };
276
277 EXPECT_NE(results[0], results[1]);
278 for (size_t i = 0; i < arraysize(results); ++i) {
279 EXPECT_LE(0.0, results[i]);
280 EXPECT_GT(1.0, results[i]);
281 }
282
283 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
284 GenerateSHA1Entropy("yo", "1"));
285 EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
286 GenerateSHA1Entropy("yo", "else"));
287}
288
[email protected]c277e2b2013-08-02 15:41:08289TEST(EntropyProviderTest, PermutedEntropy) {
[email protected]20f999b52012-08-24 22:32:59290 const double results[] = {
291 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
292 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
293
294 EXPECT_NE(results[0], results[1]);
295 for (size_t i = 0; i < arraysize(results); ++i) {
296 EXPECT_LE(0.0, results[i]);
297 EXPECT_GT(1.0, results[i]);
298 }
299
300 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
301 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
302 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
303 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
304}
305
[email protected]c277e2b2013-08-02 15:41:08306TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
[email protected]20f999b52012-08-24 22:32:59307 // Verifies that PermutedEntropyProvider produces expected results. This
308 // ensures that the results are the same between platforms and ensures that
309 // changes to the implementation do not regress this accidentally.
310
311 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
312 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
313 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
314 GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
315 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
316 GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
317}
318
[email protected]c277e2b2013-08-02 15:41:08319TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
[email protected]20f999b52012-08-24 22:32:59320 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
321 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
322 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
323 }
324}
325
[email protected]c277e2b2013-08-02 15:41:08326TEST(EntropyProviderTest, PermutedEntropyIsUniform) {
[email protected]20f999b52012-08-24 22:32:59327 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
328 PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
329 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
330 }
331}
332
[email protected]c277e2b2013-08-02 15:41:08333TEST(EntropyProviderTest, SeededRandGeneratorIsUniform) {
[email protected]20f999b52012-08-24 22:32:59334 // Verifies that SeededRandGenerator has a uniform distribution.
335 //
336 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
337
avi5dd91f82015-12-25 22:30:46338 const uint32_t kTopOfRange =
339 (std::numeric_limits<uint32_t>::max() / 4ULL) * 3ULL;
340 const uint32_t kExpectedAverage = kTopOfRange / 2ULL;
341 const uint32_t kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%
[email protected]20f999b52012-08-24 22:32:59342 const int kMinAttempts = 1000;
343 const int kMaxAttempts = 1000000;
344
345 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
avi5dd91f82015-12-25 22:30:46346 const uint32_t seed = HashName(kTestTrialNames[i]);
[email protected]20f999b52012-08-24 22:32:59347 internal::SeededRandGenerator rand_generator(seed);
348
349 double cumulative_average = 0.0;
350 int count = 0;
351 while (count < kMaxAttempts) {
avi5dd91f82015-12-25 22:30:46352 uint32_t value = rand_generator(kTopOfRange);
[email protected]20f999b52012-08-24 22:32:59353 cumulative_average = (count * cumulative_average + value) / (count + 1);
354
355 // Don't quit too quickly for things to start converging, or we may have
356 // a false positive.
357 if (count > kMinAttempts &&
358 kExpectedAverage - kAllowedVariance < cumulative_average &&
359 cumulative_average < kExpectedAverage + kAllowedVariance) {
360 break;
361 }
362
363 ++count;
364 }
365
366 ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
367 kExpectedAverage << ", average ended at " << cumulative_average <<
368 ", for trial " << kTestTrialNames[i];
369 }
370}
371
[email protected]20f999b52012-08-24 22:32:59372} // namespace metrics