Blame - chrome/common/metrics/entropy_provider_unittest.cc - chromium/src

blob: 5b4d56b0324f5be2d5b39dc1ce9da230d71e6cd7 [file] [log] [blame]

[email protected]	20f999b5	2012-08-24 22:32:59	[diff] [blame]	1	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include <cmath>
				6	#include <limits>
				7	#include <numeric>
				8
				9	#include "base/basictypes.h"
				10	#include "base/guid.h"
				11	#include "base/memory/scoped_ptr.h"
				12	#include "base/rand_util.h"
				13	#include "base/string_number_conversions.h"
				14	#include "chrome/common/metrics/entropy_provider.h"
[email protected]	bca3494	2012-09-05 18:23:25	[diff] [blame^]	15	#include "chrome/common/metrics/metrics_util.h"
[email protected]	20f999b5	2012-08-24 22:32:59	[diff] [blame]	16	#include "testing/gtest/include/gtest/gtest.h"
				17
				18	namespace metrics {
				19
				20	namespace {
				21
				22	// Size of the low entropy source to use for the permuted entropy provider
				23	// in tests.
				24	const size_t kMaxLowEntropySize = (1 << 13);
				25
				26	// Field trial names used in unit tests.
				27	const std::string kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
				28	"NewTabButton" };
				29
				30	// Computes the Chi-Square statistic for \|values\| assuming they follow a uniform
				31	// distribution, where each entry has expected value \|expected_value\|.
				32	//
				33	// The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
				34	// value and E is the expected value.
				35	double ComputeChiSquare(const std::vector<int>& values,
				36	double expected_value) {
				37	double sum = 0;
				38	for (size_t i = 0; i < values.size(); ++i) {
				39	const double delta = values[i] - expected_value;
				40	sum += (delta * delta) / expected_value;
				41	}
				42	return sum;
				43	}
				44
				45	// Computes SHA1-based entropy for the given \|trial_name\| based on
				46	// \|entropy_source\|
				47	double GenerateSHA1Entropy(const std::string& entropy_source,
				48	const std::string& trial_name) {
				49	SHA1EntropyProvider sha1_provider(entropy_source);
				50	return sha1_provider.GetEntropyForTrial(trial_name);
				51	}
				52
				53	// Generates permutation-based entropy for the given \|trial_name\| based on
				54	// \|entropy_source\| which must be in the range [0, entropy_max).
				55	double GeneratePermutedEntropy(uint16 entropy_source,
				56	size_t entropy_max,
				57	const std::string& trial_name) {
				58	PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
				59	return permuted_provider.GetEntropyForTrial(trial_name);
				60	}
				61
				62	// Helper interface for testing used to generate entropy values for a given
				63	// field trial. Unlike EntropyProvider, which keeps the low/high entropy source
				64	// value constant and generates entropy for different trial names, instances
				65	// of TrialEntropyGenerator keep the trial name constant and generate low/high
				66	// entropy source values internally to produce each output entropy value.
				67	class TrialEntropyGenerator {
				68	public:
				69	virtual ~TrialEntropyGenerator() {}
				70	virtual double GenerateEntropyValue() const = 0;
				71	};
				72
				73	// An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
				74	// entropy source (random GUID with 128 bits of entropy + 13 additional bits of
				75	// entropy corresponding to a low entropy source).
				76	class SHA1EntropyGenerator : public TrialEntropyGenerator {
				77	public:
				78	explicit SHA1EntropyGenerator(const std::string& trial_name)
				79	: trial_name_(trial_name) {
				80	}
				81
				82	~SHA1EntropyGenerator() {
				83	}
				84
				85	virtual double GenerateEntropyValue() const OVERRIDE {
				86	// Use a random GUID + 13 additional bits of entropy to match how the
				87	// SHA1EntropyProvider is used in metrics_service.cc.
				88	const int low_entropy_source =
				89	static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
				90	const std::string high_entropy_source =
				91	base::GenerateGUID() + base::IntToString(low_entropy_source);
				92	return GenerateSHA1Entropy(high_entropy_source, trial_name_);
				93	}
				94
				95	private:
				96	const std::string& trial_name_;
				97
				98	DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
				99	};
				100
				101	// An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
				102	// using 13-bit low entropy source values.
				103	class PermutedEntropyGenerator : public TrialEntropyGenerator {
				104	public:
				105	explicit PermutedEntropyGenerator(const std::string& trial_name)
				106	: mapping_(kMaxLowEntropySize) {
				107	// Note: Given a trial name, the computed mapping will be the same.
				108	// As a performance optimization, pre-compute the mapping once per trial
				109	// name and index into it for each entropy value.
				110	internal::PermuteMappingUsingTrialName(trial_name, &mapping_);
				111	}
				112
				113	~PermutedEntropyGenerator() {
				114	}
				115
				116	virtual double GenerateEntropyValue() const OVERRIDE {
				117	const int low_entropy_source =
				118	static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
				119	return mapping_[low_entropy_source] /
				120	static_cast<double>(kMaxLowEntropySize);
				121	}
				122
				123	private:
				124	std::vector<uint16> mapping_;
				125
				126	DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
				127	};
				128
				129	// Tests uniformity of a given \|entropy_generator\| using the Chi-Square Goodness
				130	// of Fit Test.
				131	void PerformEntropyUniformityTest(
				132	const std::string& trial_name,
				133	const TrialEntropyGenerator& entropy_generator) {
				134	// Number of buckets in the simulated field trials.
				135	const size_t kBucketCount = 20;
				136	// Max number of iterations to perform before giving up and failing.
				137	const size_t kMaxIterationCount = 100000;
				138	// The number of iterations to perform before each time the statistical
				139	// significance of the results is checked.
				140	const size_t kCheckIterationCount = 10000;
				141	// This is the Chi-Square threshold from the Chi-Square statistic table for
				142	// 19 degrees of freedom (based on \|kBucketCount\|) with a 99.9% confidence
				143	// level. See: http://www.medcalc.org/manual/chi-square-table.php
				144	const double kChiSquareThreshold = 43.82;
				145
				146	std::vector<int> distribution(kBucketCount);
				147
				148	for (size_t i = 1; i <= kMaxIterationCount; ++i) {
				149	const double entropy_value = entropy_generator.GenerateEntropyValue();
				150	const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
				151	ASSERT_LT(bucket, kBucketCount);
				152	distribution[bucket] += 1;
				153
				154	// After \|kCheckIterationCount\| iterations, compute the Chi-Square
				155	// statistic of the distribution. If the resulting statistic is greater
				156	// than \|kChiSquareThreshold\|, we can conclude with 99.9% confidence
				157	// that the observed samples do not follow a uniform distribution.
				158	//
				159	// However, since 99.9% would still result in a false negative every
				160	// 1000 runs of the test, do not treat it as a failure (else the test
				161	// will be flaky). Instead, perform additional iterations to determine
				162	// if the distribution will converge, up to \|kMaxIterationCount\|.
				163	if ((i % kCheckIterationCount) == 0) {
				164	const double expected_value_per_bucket =
				165	static_cast<double>(i) / kBucketCount;
				166	const double chi_square =
				167	ComputeChiSquare(distribution, expected_value_per_bucket);
				168	if (chi_square < kChiSquareThreshold)
				169	break;
				170
				171	// If \|i == kMaxIterationCount\|, the Chi-Square statistic did not
				172	// converge after \|kMaxIterationCount\|.
				173	EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
				174	trial_name << " with chi_square = " << chi_square <<
				175	" after " << kMaxIterationCount << " iterations.";
				176	}
				177	}
				178	}
				179
				180	} // namespace
				181
				182	class EntropyProviderTest : public testing::Test {
				183	};
				184
				185	TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
				186	// Simply asserts that two trials using one-time randomization
				187	// that have different names, normally generate different results.
				188	//
				189	// Note that depending on the one-time random initialization, they
				190	// _might_ actually give the same result, but we know that given
				191	// the particular client_id we use for unit tests they won't.
				192	base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
				193	scoped_refptr<base::FieldTrial> trials[] = {
				194	base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
				195	base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
				196	base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
				197	base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };
				198
				199	for (size_t i = 0; i < arraysize(trials); ++i) {
				200	trials[i]->UseOneTimeRandomization();
				201
				202	for (int j = 0; j < 100; ++j)
				203	trials[i]->AppendGroup("", 1);
				204	}
				205
				206	// The trials are most likely to give different results since they have
				207	// different names.
				208	EXPECT_NE(trials[0]->group(), trials[1]->group());
				209	EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
				210	}
				211
				212	TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
				213	// Simply asserts that two trials using one-time randomization
				214	// that have different names, normally generate different results.
				215	//
				216	// Note that depending on the one-time random initialization, they
				217	// _might_ actually give the same result, but we know that given
				218	// the particular client_id we use for unit tests they won't.
				219	base::FieldTrialList field_trial_list(
				220	new PermutedEntropyProvider(1234, kMaxLowEntropySize));
				221	scoped_refptr<base::FieldTrial> trials[] = {
				222	base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
				223	base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
				224	base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
				225	base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };
				226
				227	for (size_t i = 0; i < arraysize(trials); ++i) {
				228	trials[i]->UseOneTimeRandomization();
				229
				230	for (int j = 0; j < 100; ++j)
				231	trials[i]->AppendGroup("", 1);
				232	}
				233
				234	// The trials are most likely to give different results since they have
				235	// different names.
				236	EXPECT_NE(trials[0]->group(), trials[1]->group());
				237	EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
				238	}
				239
				240	TEST_F(EntropyProviderTest, SHA1Entropy) {
				241	const double results[] = { GenerateSHA1Entropy("hi", "1"),
				242	GenerateSHA1Entropy("there", "1") };
				243
				244	EXPECT_NE(results[0], results[1]);
				245	for (size_t i = 0; i < arraysize(results); ++i) {
				246	EXPECT_LE(0.0, results[i]);
				247	EXPECT_GT(1.0, results[i]);
				248	}
				249
				250	EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
				251	GenerateSHA1Entropy("yo", "1"));
				252	EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
				253	GenerateSHA1Entropy("yo", "else"));
				254	}
				255
				256	TEST_F(EntropyProviderTest, PermutedEntropy) {
				257	const double results[] = {
				258	GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
				259	GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
				260
				261	EXPECT_NE(results[0], results[1]);
				262	for (size_t i = 0; i < arraysize(results); ++i) {
				263	EXPECT_LE(0.0, results[i]);
				264	EXPECT_GT(1.0, results[i]);
				265	}
				266
				267	EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
				268	GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
				269	EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
				270	GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
				271	}
				272
				273	TEST_F(EntropyProviderTest, PermutedEntropyProviderResults) {
				274	// Verifies that PermutedEntropyProvider produces expected results. This
				275	// ensures that the results are the same between platforms and ensures that
				276	// changes to the implementation do not regress this accidentally.
				277
				278	EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
				279	GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
				280	EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
				281	GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
				282	EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
				283	GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
				284	}
				285
				286	TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) {
				287	for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
				288	SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
				289	PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
				290	}
				291	}
				292
				293	TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) {
				294	for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
				295	PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
				296	PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
				297	}
				298	}
				299
				300	TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) {
				301	// Verifies that SeededRandGenerator has a uniform distribution.
				302	//
				303	// Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
				304
				305	const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
				306	const uint32 kExpectedAverage = kTopOfRange / 2ULL;
				307	const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%
				308	const int kMinAttempts = 1000;
				309	const int kMaxAttempts = 1000000;
				310
				311	for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
[email protected]	bca3494	2012-09-05 18:23:25	[diff] [blame^]	312	const uint32 seed = HashName(kTestTrialNames[i]);
[email protected]	20f999b5	2012-08-24 22:32:59	[diff] [blame]	313	internal::SeededRandGenerator rand_generator(seed);
				314
				315	double cumulative_average = 0.0;
				316	int count = 0;
				317	while (count < kMaxAttempts) {
				318	uint32 value = rand_generator(kTopOfRange);
				319	cumulative_average = (count * cumulative_average + value) / (count + 1);
				320
				321	// Don't quit too quickly for things to start converging, or we may have
				322	// a false positive.
				323	if (count > kMinAttempts &&
				324	kExpectedAverage - kAllowedVariance < cumulative_average &&
				325	cumulative_average < kExpectedAverage + kAllowedVariance) {
				326	break;
				327	}
				328
				329	++count;
				330	}
				331
				332	ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
				333	kExpectedAverage << ", average ended at " << cumulative_average <<
				334	", for trial " << kTestTrialNames[i];
				335	}
				336	}
				337
				338	} // namespace metrics