blob: 121bc0903d89929b5bb26c99c2db888d4befcbcf [file] [log] [blame]
[email protected]20f999b52012-08-24 22:32:591// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
[email protected]50ae9f12013-08-29 18:03:225#include "components/variations/entropy_provider.h"
[email protected]c277e2b2013-08-02 15:41:086
avi5dd91f82015-12-25 22:30:467#include <stddef.h>
8#include <stdint.h>
9
[email protected]20f999b52012-08-24 22:32:5910#include <cmath>
11#include <limits>
Jinho Bangc3bcb5c2018-01-15 16:13:0012#include <memory>
[email protected]20f999b52012-08-24 22:32:5913#include <numeric>
14
[email protected]20f999b52012-08-24 22:32:5915#include "base/guid.h"
avi5dd91f82015-12-25 22:30:4616#include "base/macros.h"
[email protected]20f999b52012-08-24 22:32:5917#include "base/rand_util.h"
Byoungkown1bb50222018-09-11 01:14:4118#include "base/stl_util.h"
[email protected]3ea1b182013-02-08 22:38:4119#include "base/strings/string_number_conversions.h"
Alexei Svitkine9de32cb2018-02-06 20:21:2120#include "components/variations/hashing.h"
[email protected]20f999b52012-08-24 22:32:5921#include "testing/gtest/include/gtest/gtest.h"
22
Alexei Svitkine9de32cb2018-02-06 20:21:2123namespace variations {
[email protected]20f999b52012-08-24 22:32:5924
25namespace {
26
Paul Millerc4267fb2019-01-03 03:20:3527// Size of the low entropy source for testing.
[email protected]9556a892013-06-21 16:53:2028const size_t kMaxLowEntropySize = 8000;
[email protected]20f999b52012-08-24 22:32:5929
30// Field trial names used in unit tests.
[email protected]c277e2b2013-08-02 15:41:0831const char* const kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
[email protected]20f999b52012-08-24 22:32:5932 "NewTabButton" };
33
34// Computes the Chi-Square statistic for |values| assuming they follow a uniform
35// distribution, where each entry has expected value |expected_value|.
36//
37// The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
38// value and E is the expected value.
39double ComputeChiSquare(const std::vector<int>& values,
40 double expected_value) {
41 double sum = 0;
42 for (size_t i = 0; i < values.size(); ++i) {
43 const double delta = values[i] - expected_value;
44 sum += (delta * delta) / expected_value;
45 }
46 return sum;
47}
48
49// Computes SHA1-based entropy for the given |trial_name| based on
50// |entropy_source|
51double GenerateSHA1Entropy(const std::string& entropy_source,
52 const std::string& trial_name) {
53 SHA1EntropyProvider sha1_provider(entropy_source);
[email protected]6fded222013-04-11 20:59:5054 return sha1_provider.GetEntropyForTrial(trial_name, 0);
[email protected]20f999b52012-08-24 22:32:5955}
56
Paul Miller7c0efea2018-11-13 23:49:0057// Generates normalized MurmurHash-based entropy for the given |trial_name|
58// based on |entropy_source| which must be in the range [0, entropy_max).
59double GenerateNormalizedMurmurHashEntropy(uint16_t entropy_source,
60 size_t entropy_max,
61 const std::string& trial_name) {
62 NormalizedMurmurHashEntropyProvider provider(entropy_source, entropy_max);
63 return provider.GetEntropyForTrial(trial_name, 0);
64}
65
[email protected]20f999b52012-08-24 22:32:5966// Helper interface for testing used to generate entropy values for a given
67// field trial. Unlike EntropyProvider, which keeps the low/high entropy source
68// value constant and generates entropy for different trial names, instances
69// of TrialEntropyGenerator keep the trial name constant and generate low/high
70// entropy source values internally to produce each output entropy value.
71class TrialEntropyGenerator {
72 public:
73 virtual ~TrialEntropyGenerator() {}
74 virtual double GenerateEntropyValue() const = 0;
75};
76
77// An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
78// entropy source (random GUID with 128 bits of entropy + 13 additional bits of
79// entropy corresponding to a low entropy source).
80class SHA1EntropyGenerator : public TrialEntropyGenerator {
81 public:
82 explicit SHA1EntropyGenerator(const std::string& trial_name)
83 : trial_name_(trial_name) {
84 }
85
dcheng00ea022b2014-10-21 11:24:5686 ~SHA1EntropyGenerator() override {}
[email protected]20f999b52012-08-24 22:32:5987
dcheng00ea022b2014-10-21 11:24:5688 double GenerateEntropyValue() const override {
[email protected]20f999b52012-08-24 22:32:5989 // Use a random GUID + 13 additional bits of entropy to match how the
90 // SHA1EntropyProvider is used in metrics_service.cc.
91 const int low_entropy_source =
avi5dd91f82015-12-25 22:30:4692 static_cast<uint16_t>(base::RandInt(0, kMaxLowEntropySize - 1));
[email protected]20f999b52012-08-24 22:32:5993 const std::string high_entropy_source =
Raul Tambref88e5102019-02-06 10:54:0394 base::GenerateGUID() + base::NumberToString(low_entropy_source);
[email protected]20f999b52012-08-24 22:32:5995 return GenerateSHA1Entropy(high_entropy_source, trial_name_);
96 }
97
98 private:
Paul Miller7c0efea2018-11-13 23:49:0099 const std::string trial_name_;
[email protected]20f999b52012-08-24 22:32:59100
101 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
102};
103
Paul Miller7c0efea2018-11-13 23:49:00104// An TrialEntropyGenerator that uses the normalized MurmurHash entropy provider
105// algorithm, using 13-bit low entropy source values.
106class NormalizedMurmurHashEntropyGenerator : public TrialEntropyGenerator {
107 public:
108 explicit NormalizedMurmurHashEntropyGenerator(const std::string& trial_name)
109 : trial_name_(trial_name) {}
110
111 ~NormalizedMurmurHashEntropyGenerator() override {}
112
113 double GenerateEntropyValue() const override {
114 const int low_entropy_source =
115 static_cast<uint16_t>(base::RandInt(0, kMaxLowEntropySize - 1));
116 return GenerateNormalizedMurmurHashEntropy(low_entropy_source,
117 kMaxLowEntropySize, trial_name_);
118 }
119
120 private:
121 const std::string trial_name_;
122
123 DISALLOW_COPY_AND_ASSIGN(NormalizedMurmurHashEntropyGenerator);
124};
125
[email protected]20f999b52012-08-24 22:32:59126// Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
127// of Fit Test.
128void PerformEntropyUniformityTest(
129 const std::string& trial_name,
130 const TrialEntropyGenerator& entropy_generator) {
131 // Number of buckets in the simulated field trials.
132 const size_t kBucketCount = 20;
133 // Max number of iterations to perform before giving up and failing.
134 const size_t kMaxIterationCount = 100000;
135 // The number of iterations to perform before each time the statistical
136 // significance of the results is checked.
137 const size_t kCheckIterationCount = 10000;
138 // This is the Chi-Square threshold from the Chi-Square statistic table for
139 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
140 // level. See: http://www.medcalc.org/manual/chi-square-table.php
141 const double kChiSquareThreshold = 43.82;
142
143 std::vector<int> distribution(kBucketCount);
144
145 for (size_t i = 1; i <= kMaxIterationCount; ++i) {
146 const double entropy_value = entropy_generator.GenerateEntropyValue();
147 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
148 ASSERT_LT(bucket, kBucketCount);
149 distribution[bucket] += 1;
150
151 // After |kCheckIterationCount| iterations, compute the Chi-Square
152 // statistic of the distribution. If the resulting statistic is greater
153 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
154 // that the observed samples do not follow a uniform distribution.
155 //
156 // However, since 99.9% would still result in a false negative every
157 // 1000 runs of the test, do not treat it as a failure (else the test
158 // will be flaky). Instead, perform additional iterations to determine
159 // if the distribution will converge, up to |kMaxIterationCount|.
160 if ((i % kCheckIterationCount) == 0) {
161 const double expected_value_per_bucket =
162 static_cast<double>(i) / kBucketCount;
163 const double chi_square =
164 ComputeChiSquare(distribution, expected_value_per_bucket);
165 if (chi_square < kChiSquareThreshold)
166 break;
167
168 // If |i == kMaxIterationCount|, the Chi-Square statistic did not
169 // converge after |kMaxIterationCount|.
170 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
171 trial_name << " with chi_square = " << chi_square <<
172 " after " << kMaxIterationCount << " iterations.";
173 }
174 }
175}
176
177} // namespace
178
[email protected]c277e2b2013-08-02 15:41:08179TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
[email protected]20f999b52012-08-24 22:32:59180 // Simply asserts that two trials using one-time randomization
181 // that have different names, normally generate different results.
182 //
183 // Note that depending on the one-time random initialization, they
Paul Miller7c0efea2018-11-13 23:49:00184 // _might_ actually give the same result, but we know that given the
185 // particular client_id we use for unit tests they won't.
robliao79393ffb2016-09-21 18:45:29186 base::FieldTrialList field_trial_list(
Jinho Bangc3bcb5c2018-01-15 16:13:00187 std::make_unique<SHA1EntropyProvider>("client_id"));
[email protected]20f999b52012-08-24 22:32:59188 scoped_refptr<base::FieldTrial> trials[] = {
[email protected]ebcf69f02013-07-30 15:11:29189 base::FieldTrialList::FactoryGetFieldTrial(
Alexei Svitkinecde0b632019-05-29 14:22:35190 "one", 100, "default", base::FieldTrial::ONE_TIME_RANDOMIZED,
191 nullptr),
[email protected]ebcf69f02013-07-30 15:11:29192 base::FieldTrialList::FactoryGetFieldTrial(
Alexei Svitkinecde0b632019-05-29 14:22:35193 "two", 100, "default", base::FieldTrial::ONE_TIME_RANDOMIZED,
194 nullptr),
[email protected]ebcf69f02013-07-30 15:11:29195 };
[email protected]20f999b52012-08-24 22:32:59196
Byoungkown1bb50222018-09-11 01:14:41197 for (size_t i = 0; i < base::size(trials); ++i) {
[email protected]20f999b52012-08-24 22:32:59198 for (int j = 0; j < 100; ++j)
[email protected]007b3f82013-04-09 08:46:45199 trials[i]->AppendGroup(std::string(), 1);
[email protected]20f999b52012-08-24 22:32:59200 }
201
202 // The trials are most likely to give different results since they have
203 // different names.
204 EXPECT_NE(trials[0]->group(), trials[1]->group());
205 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
206}
207
Paul Miller7c0efea2018-11-13 23:49:00208TEST(EntropyProviderTest, UseOneTimeRandomizationNormalizedMurmurHash) {
209 // Simply asserts that two trials using one-time randomization
210 // that have different names, normally generate different results.
211 //
212 // Note that depending on the one-time random initialization, they
213 // _might_ actually give the same result, but we know that given
214 // the particular low_entropy_source we use for unit tests they won't.
215 base::FieldTrialList field_trial_list(
216 std::make_unique<NormalizedMurmurHashEntropyProvider>(
217 1234, kMaxLowEntropySize));
Paul Miller7c0efea2018-11-13 23:49:00218 scoped_refptr<base::FieldTrial> trials[] = {
219 base::FieldTrialList::FactoryGetFieldTrial(
Alexei Svitkinecde0b632019-05-29 14:22:35220 "one", 100, "default", base::FieldTrial::ONE_TIME_RANDOMIZED,
221 nullptr),
Paul Miller7c0efea2018-11-13 23:49:00222 base::FieldTrialList::FactoryGetFieldTrial(
Alexei Svitkinecde0b632019-05-29 14:22:35223 "two", 100, "default", base::FieldTrial::ONE_TIME_RANDOMIZED,
224 nullptr),
Paul Miller7c0efea2018-11-13 23:49:00225 };
226
227 for (size_t i = 0; i < base::size(trials); ++i) {
228 for (int j = 0; j < 100; ++j)
229 trials[i]->AppendGroup(std::string(), 1);
230 }
231
232 // The trials are most likely to give different results since they have
233 // different names.
234 EXPECT_NE(trials[0]->group(), trials[1]->group());
235 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
236}
237
jwdc6e07e22016-11-21 16:36:54238TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedSHA1) {
239 // Ensures that two trials with different names but the same custom seed used
240 // for one time randomization produce the same group assignments.
241 base::FieldTrialList field_trial_list(
Jinho Bangc3bcb5c2018-01-15 16:13:00242 std::make_unique<SHA1EntropyProvider>("client_id"));
jwdc6e07e22016-11-21 16:36:54243 const uint32_t kCustomSeed = 9001;
244 scoped_refptr<base::FieldTrial> trials[] = {
245 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
Alexei Svitkinecde0b632019-05-29 14:22:35246 "one", 100, "default", base::FieldTrial::ONE_TIME_RANDOMIZED,
247 kCustomSeed, nullptr, nullptr),
jwdc6e07e22016-11-21 16:36:54248 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
Alexei Svitkinecde0b632019-05-29 14:22:35249 "two", 100, "default", base::FieldTrial::ONE_TIME_RANDOMIZED,
250 kCustomSeed, nullptr, nullptr),
jwdc6e07e22016-11-21 16:36:54251 };
252
Byoungkown1bb50222018-09-11 01:14:41253 for (size_t i = 0; i < base::size(trials); ++i) {
jwdc6e07e22016-11-21 16:36:54254 for (int j = 0; j < 100; ++j)
255 trials[i]->AppendGroup(std::string(), 1);
256 }
257
258 // Normally, these trials should produce different groups, but if the same
259 // custom seed is used, they should produce the same group assignment.
260 EXPECT_EQ(trials[0]->group(), trials[1]->group());
261 EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
262}
263
Paul Miller7c0efea2018-11-13 23:49:00264TEST(EntropyProviderTest,
265 UseOneTimeRandomizationWithCustomSeedNormalizedMurmurHash) {
266 // Ensures that two trials with different names but the same custom seed used
267 // for one time randomization produce the same group assignments.
268 base::FieldTrialList field_trial_list(
269 std::make_unique<NormalizedMurmurHashEntropyProvider>(
270 1234, kMaxLowEntropySize));
Paul Miller7c0efea2018-11-13 23:49:00271 const uint32_t kCustomSeed = 9001;
272 scoped_refptr<base::FieldTrial> trials[] = {
273 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
Alexei Svitkinecde0b632019-05-29 14:22:35274 "one", 100, "default", base::FieldTrial::ONE_TIME_RANDOMIZED,
275 kCustomSeed, nullptr, nullptr),
Paul Miller7c0efea2018-11-13 23:49:00276 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
Alexei Svitkinecde0b632019-05-29 14:22:35277 "two", 100, "default", base::FieldTrial::ONE_TIME_RANDOMIZED,
278 kCustomSeed, nullptr, nullptr),
Paul Miller7c0efea2018-11-13 23:49:00279 };
280
281 for (size_t i = 0; i < base::size(trials); ++i) {
282 for (int j = 0; j < 100; ++j)
283 trials[i]->AppendGroup(std::string(), 1);
284 }
285
286 // Normally, these trials should produce different groups, but if the same
287 // custom seed is used, they should produce the same group assignment.
288 EXPECT_EQ(trials[0]->group(), trials[1]->group());
289 EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
290}
291
[email protected]c277e2b2013-08-02 15:41:08292TEST(EntropyProviderTest, SHA1Entropy) {
[email protected]20f999b52012-08-24 22:32:59293 const double results[] = { GenerateSHA1Entropy("hi", "1"),
294 GenerateSHA1Entropy("there", "1") };
295
296 EXPECT_NE(results[0], results[1]);
Byoungkown1bb50222018-09-11 01:14:41297 for (size_t i = 0; i < base::size(results); ++i) {
[email protected]20f999b52012-08-24 22:32:59298 EXPECT_LE(0.0, results[i]);
299 EXPECT_GT(1.0, results[i]);
300 }
301
302 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
303 GenerateSHA1Entropy("yo", "1"));
304 EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
305 GenerateSHA1Entropy("yo", "else"));
306}
307
Paul Miller7c0efea2018-11-13 23:49:00308TEST(EntropyProviderTest, NormalizedMurmurHashEntropy) {
309 const double results[] = {
310 GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"),
311 GenerateNormalizedMurmurHashEntropy(4321, kMaxLowEntropySize, "1")};
312
313 EXPECT_NE(results[0], results[1]);
314 for (size_t i = 0; i < base::size(results); ++i) {
315 EXPECT_LE(0.0, results[i]);
316 EXPECT_GT(1.0, results[i]);
317 }
318
319 EXPECT_EQ(GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"),
320 GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"));
321 EXPECT_NE(GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize,
322 "something"),
323 GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize,
324 "else"));
325}
326
Paul Miller7c0efea2018-11-13 23:49:00327TEST(EntropyProviderTest, NormalizedMurmurHashEntropyProviderResults) {
328 // Verifies that NormalizedMurmurHashEntropyProvider produces expected
329 // results. This ensures that the results are the same between platforms and
330 // ensures that changes to the implementation do not regress this
331 // accidentally.
332
333 EXPECT_DOUBLE_EQ(
334 1612 / static_cast<double>(kMaxLowEntropySize),
335 GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "XYZ"));
336 EXPECT_DOUBLE_EQ(
337 7066 / static_cast<double>(kMaxLowEntropySize),
338 GenerateNormalizedMurmurHashEntropy(1, kMaxLowEntropySize, "Test"));
339 EXPECT_DOUBLE_EQ(
340 5668 / static_cast<double>(kMaxLowEntropySize),
341 GenerateNormalizedMurmurHashEntropy(5000, kMaxLowEntropySize, "Foo"));
342}
343
[email protected]c277e2b2013-08-02 15:41:08344TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
Byoungkown1bb50222018-09-11 01:14:41345 for (size_t i = 0; i < base::size(kTestTrialNames); ++i) {
[email protected]20f999b52012-08-24 22:32:59346 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
347 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
348 }
349}
350
Paul Miller7c0efea2018-11-13 23:49:00351TEST(EntropyProviderTest, NormalizedMurmurHashEntropyIsUniform) {
352 for (size_t i = 0; i < base::size(kTestTrialNames); ++i) {
353 NormalizedMurmurHashEntropyGenerator entropy_generator(kTestTrialNames[i]);
354 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
355 }
356}
357
Alexei Svitkine9de32cb2018-02-06 20:21:21358} // namespace variations