[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include <cmath> |
| 6 | #include <limits> |
| 7 | #include <numeric> |
| 8 | |
| 9 | #include "base/basictypes.h" |
| 10 | #include "base/guid.h" |
| 11 | #include "base/memory/scoped_ptr.h" |
| 12 | #include "base/rand_util.h" |
| 13 | #include "base/string_number_conversions.h" |
| 14 | #include "chrome/common/metrics/entropy_provider.h" |
[email protected] | bca3494 | 2012-09-05 18:23:25 | [diff] [blame^] | 15 | #include "chrome/common/metrics/metrics_util.h" |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 16 | #include "testing/gtest/include/gtest/gtest.h" |
| 17 | |
| 18 | namespace metrics { |
| 19 | |
| 20 | namespace { |
| 21 | |
| 22 | // Size of the low entropy source to use for the permuted entropy provider |
| 23 | // in tests. |
| 24 | const size_t kMaxLowEntropySize = (1 << 13); |
| 25 | |
| 26 | // Field trial names used in unit tests. |
| 27 | const std::string kTestTrialNames[] = { "TestTrial", "AnotherTestTrial", |
| 28 | "NewTabButton" }; |
| 29 | |
| 30 | // Computes the Chi-Square statistic for |values| assuming they follow a uniform |
| 31 | // distribution, where each entry has expected value |expected_value|. |
| 32 | // |
| 33 | // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed |
| 34 | // value and E is the expected value. |
| 35 | double ComputeChiSquare(const std::vector<int>& values, |
| 36 | double expected_value) { |
| 37 | double sum = 0; |
| 38 | for (size_t i = 0; i < values.size(); ++i) { |
| 39 | const double delta = values[i] - expected_value; |
| 40 | sum += (delta * delta) / expected_value; |
| 41 | } |
| 42 | return sum; |
| 43 | } |
| 44 | |
| 45 | // Computes SHA1-based entropy for the given |trial_name| based on |
| 46 | // |entropy_source| |
| 47 | double GenerateSHA1Entropy(const std::string& entropy_source, |
| 48 | const std::string& trial_name) { |
| 49 | SHA1EntropyProvider sha1_provider(entropy_source); |
| 50 | return sha1_provider.GetEntropyForTrial(trial_name); |
| 51 | } |
| 52 | |
| 53 | // Generates permutation-based entropy for the given |trial_name| based on |
| 54 | // |entropy_source| which must be in the range [0, entropy_max). |
| 55 | double GeneratePermutedEntropy(uint16 entropy_source, |
| 56 | size_t entropy_max, |
| 57 | const std::string& trial_name) { |
| 58 | PermutedEntropyProvider permuted_provider(entropy_source, entropy_max); |
| 59 | return permuted_provider.GetEntropyForTrial(trial_name); |
| 60 | } |
| 61 | |
| 62 | // Helper interface for testing used to generate entropy values for a given |
| 63 | // field trial. Unlike EntropyProvider, which keeps the low/high entropy source |
| 64 | // value constant and generates entropy for different trial names, instances |
| 65 | // of TrialEntropyGenerator keep the trial name constant and generate low/high |
| 66 | // entropy source values internally to produce each output entropy value. |
| 67 | class TrialEntropyGenerator { |
| 68 | public: |
| 69 | virtual ~TrialEntropyGenerator() {} |
| 70 | virtual double GenerateEntropyValue() const = 0; |
| 71 | }; |
| 72 | |
| 73 | // An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high |
| 74 | // entropy source (random GUID with 128 bits of entropy + 13 additional bits of |
| 75 | // entropy corresponding to a low entropy source). |
| 76 | class SHA1EntropyGenerator : public TrialEntropyGenerator { |
| 77 | public: |
| 78 | explicit SHA1EntropyGenerator(const std::string& trial_name) |
| 79 | : trial_name_(trial_name) { |
| 80 | } |
| 81 | |
| 82 | ~SHA1EntropyGenerator() { |
| 83 | } |
| 84 | |
| 85 | virtual double GenerateEntropyValue() const OVERRIDE { |
| 86 | // Use a random GUID + 13 additional bits of entropy to match how the |
| 87 | // SHA1EntropyProvider is used in metrics_service.cc. |
| 88 | const int low_entropy_source = |
| 89 | static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); |
| 90 | const std::string high_entropy_source = |
| 91 | base::GenerateGUID() + base::IntToString(low_entropy_source); |
| 92 | return GenerateSHA1Entropy(high_entropy_source, trial_name_); |
| 93 | } |
| 94 | |
| 95 | private: |
| 96 | const std::string& trial_name_; |
| 97 | |
| 98 | DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator); |
| 99 | }; |
| 100 | |
| 101 | // An TrialEntropyGenerator that uses the permuted entropy provider algorithm, |
| 102 | // using 13-bit low entropy source values. |
| 103 | class PermutedEntropyGenerator : public TrialEntropyGenerator { |
| 104 | public: |
| 105 | explicit PermutedEntropyGenerator(const std::string& trial_name) |
| 106 | : mapping_(kMaxLowEntropySize) { |
| 107 | // Note: Given a trial name, the computed mapping will be the same. |
| 108 | // As a performance optimization, pre-compute the mapping once per trial |
| 109 | // name and index into it for each entropy value. |
| 110 | internal::PermuteMappingUsingTrialName(trial_name, &mapping_); |
| 111 | } |
| 112 | |
| 113 | ~PermutedEntropyGenerator() { |
| 114 | } |
| 115 | |
| 116 | virtual double GenerateEntropyValue() const OVERRIDE { |
| 117 | const int low_entropy_source = |
| 118 | static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); |
| 119 | return mapping_[low_entropy_source] / |
| 120 | static_cast<double>(kMaxLowEntropySize); |
| 121 | } |
| 122 | |
| 123 | private: |
| 124 | std::vector<uint16> mapping_; |
| 125 | |
| 126 | DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator); |
| 127 | }; |
| 128 | |
| 129 | // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness |
| 130 | // of Fit Test. |
| 131 | void PerformEntropyUniformityTest( |
| 132 | const std::string& trial_name, |
| 133 | const TrialEntropyGenerator& entropy_generator) { |
| 134 | // Number of buckets in the simulated field trials. |
| 135 | const size_t kBucketCount = 20; |
| 136 | // Max number of iterations to perform before giving up and failing. |
| 137 | const size_t kMaxIterationCount = 100000; |
| 138 | // The number of iterations to perform before each time the statistical |
| 139 | // significance of the results is checked. |
| 140 | const size_t kCheckIterationCount = 10000; |
| 141 | // This is the Chi-Square threshold from the Chi-Square statistic table for |
| 142 | // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence |
| 143 | // level. See: http://www.medcalc.org/manual/chi-square-table.php |
| 144 | const double kChiSquareThreshold = 43.82; |
| 145 | |
| 146 | std::vector<int> distribution(kBucketCount); |
| 147 | |
| 148 | for (size_t i = 1; i <= kMaxIterationCount; ++i) { |
| 149 | const double entropy_value = entropy_generator.GenerateEntropyValue(); |
| 150 | const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value); |
| 151 | ASSERT_LT(bucket, kBucketCount); |
| 152 | distribution[bucket] += 1; |
| 153 | |
| 154 | // After |kCheckIterationCount| iterations, compute the Chi-Square |
| 155 | // statistic of the distribution. If the resulting statistic is greater |
| 156 | // than |kChiSquareThreshold|, we can conclude with 99.9% confidence |
| 157 | // that the observed samples do not follow a uniform distribution. |
| 158 | // |
| 159 | // However, since 99.9% would still result in a false negative every |
| 160 | // 1000 runs of the test, do not treat it as a failure (else the test |
| 161 | // will be flaky). Instead, perform additional iterations to determine |
| 162 | // if the distribution will converge, up to |kMaxIterationCount|. |
| 163 | if ((i % kCheckIterationCount) == 0) { |
| 164 | const double expected_value_per_bucket = |
| 165 | static_cast<double>(i) / kBucketCount; |
| 166 | const double chi_square = |
| 167 | ComputeChiSquare(distribution, expected_value_per_bucket); |
| 168 | if (chi_square < kChiSquareThreshold) |
| 169 | break; |
| 170 | |
| 171 | // If |i == kMaxIterationCount|, the Chi-Square statistic did not |
| 172 | // converge after |kMaxIterationCount|. |
| 173 | EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " << |
| 174 | trial_name << " with chi_square = " << chi_square << |
| 175 | " after " << kMaxIterationCount << " iterations."; |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | } // namespace |
| 181 | |
| 182 | class EntropyProviderTest : public testing::Test { |
| 183 | }; |
| 184 | |
| 185 | TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) { |
| 186 | // Simply asserts that two trials using one-time randomization |
| 187 | // that have different names, normally generate different results. |
| 188 | // |
| 189 | // Note that depending on the one-time random initialization, they |
| 190 | // _might_ actually give the same result, but we know that given |
| 191 | // the particular client_id we use for unit tests they won't. |
| 192 | base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id")); |
| 193 | scoped_refptr<base::FieldTrial> trials[] = { |
| 194 | base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default", |
| 195 | base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), |
| 196 | base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default", |
| 197 | base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) }; |
| 198 | |
| 199 | for (size_t i = 0; i < arraysize(trials); ++i) { |
| 200 | trials[i]->UseOneTimeRandomization(); |
| 201 | |
| 202 | for (int j = 0; j < 100; ++j) |
| 203 | trials[i]->AppendGroup("", 1); |
| 204 | } |
| 205 | |
| 206 | // The trials are most likely to give different results since they have |
| 207 | // different names. |
| 208 | EXPECT_NE(trials[0]->group(), trials[1]->group()); |
| 209 | EXPECT_NE(trials[0]->group_name(), trials[1]->group_name()); |
| 210 | } |
| 211 | |
| 212 | TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) { |
| 213 | // Simply asserts that two trials using one-time randomization |
| 214 | // that have different names, normally generate different results. |
| 215 | // |
| 216 | // Note that depending on the one-time random initialization, they |
| 217 | // _might_ actually give the same result, but we know that given |
| 218 | // the particular client_id we use for unit tests they won't. |
| 219 | base::FieldTrialList field_trial_list( |
| 220 | new PermutedEntropyProvider(1234, kMaxLowEntropySize)); |
| 221 | scoped_refptr<base::FieldTrial> trials[] = { |
| 222 | base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default", |
| 223 | base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), |
| 224 | base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default", |
| 225 | base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) }; |
| 226 | |
| 227 | for (size_t i = 0; i < arraysize(trials); ++i) { |
| 228 | trials[i]->UseOneTimeRandomization(); |
| 229 | |
| 230 | for (int j = 0; j < 100; ++j) |
| 231 | trials[i]->AppendGroup("", 1); |
| 232 | } |
| 233 | |
| 234 | // The trials are most likely to give different results since they have |
| 235 | // different names. |
| 236 | EXPECT_NE(trials[0]->group(), trials[1]->group()); |
| 237 | EXPECT_NE(trials[0]->group_name(), trials[1]->group_name()); |
| 238 | } |
| 239 | |
| 240 | TEST_F(EntropyProviderTest, SHA1Entropy) { |
| 241 | const double results[] = { GenerateSHA1Entropy("hi", "1"), |
| 242 | GenerateSHA1Entropy("there", "1") }; |
| 243 | |
| 244 | EXPECT_NE(results[0], results[1]); |
| 245 | for (size_t i = 0; i < arraysize(results); ++i) { |
| 246 | EXPECT_LE(0.0, results[i]); |
| 247 | EXPECT_GT(1.0, results[i]); |
| 248 | } |
| 249 | |
| 250 | EXPECT_EQ(GenerateSHA1Entropy("yo", "1"), |
| 251 | GenerateSHA1Entropy("yo", "1")); |
| 252 | EXPECT_NE(GenerateSHA1Entropy("yo", "something"), |
| 253 | GenerateSHA1Entropy("yo", "else")); |
| 254 | } |
| 255 | |
| 256 | TEST_F(EntropyProviderTest, PermutedEntropy) { |
| 257 | const double results[] = { |
| 258 | GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), |
| 259 | GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") }; |
| 260 | |
| 261 | EXPECT_NE(results[0], results[1]); |
| 262 | for (size_t i = 0; i < arraysize(results); ++i) { |
| 263 | EXPECT_LE(0.0, results[i]); |
| 264 | EXPECT_GT(1.0, results[i]); |
| 265 | } |
| 266 | |
| 267 | EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), |
| 268 | GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1")); |
| 269 | EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"), |
| 270 | GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else")); |
| 271 | } |
| 272 | |
| 273 | TEST_F(EntropyProviderTest, PermutedEntropyProviderResults) { |
| 274 | // Verifies that PermutedEntropyProvider produces expected results. This |
| 275 | // ensures that the results are the same between platforms and ensures that |
| 276 | // changes to the implementation do not regress this accidentally. |
| 277 | |
| 278 | EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize), |
| 279 | GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ")); |
| 280 | EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize), |
| 281 | GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test")); |
| 282 | EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize), |
| 283 | GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo")); |
| 284 | } |
| 285 | |
| 286 | TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) { |
| 287 | for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) { |
| 288 | SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]); |
| 289 | PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator); |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) { |
| 294 | for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) { |
| 295 | PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]); |
| 296 | PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator); |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) { |
| 301 | // Verifies that SeededRandGenerator has a uniform distribution. |
| 302 | // |
| 303 | // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc. |
| 304 | |
| 305 | const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL; |
| 306 | const uint32 kExpectedAverage = kTopOfRange / 2ULL; |
| 307 | const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2% |
| 308 | const int kMinAttempts = 1000; |
| 309 | const int kMaxAttempts = 1000000; |
| 310 | |
| 311 | for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) { |
[email protected] | bca3494 | 2012-09-05 18:23:25 | [diff] [blame^] | 312 | const uint32 seed = HashName(kTestTrialNames[i]); |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 313 | internal::SeededRandGenerator rand_generator(seed); |
| 314 | |
| 315 | double cumulative_average = 0.0; |
| 316 | int count = 0; |
| 317 | while (count < kMaxAttempts) { |
| 318 | uint32 value = rand_generator(kTopOfRange); |
| 319 | cumulative_average = (count * cumulative_average + value) / (count + 1); |
| 320 | |
| 321 | // Don't quit too quickly for things to start converging, or we may have |
| 322 | // a false positive. |
| 323 | if (count > kMinAttempts && |
| 324 | kExpectedAverage - kAllowedVariance < cumulative_average && |
| 325 | cumulative_average < kExpectedAverage + kAllowedVariance) { |
| 326 | break; |
| 327 | } |
| 328 | |
| 329 | ++count; |
| 330 | } |
| 331 | |
| 332 | ASSERT_LT(count, kMaxAttempts) << "Expected average was " << |
| 333 | kExpectedAverage << ", average ended at " << cumulative_average << |
| 334 | ", for trial " << kTestTrialNames[i]; |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | } // namespace metrics |