Blame - chrome/common/metrics/entropy_provider_unittest.cc - chromium/src

blob: b54a5d50f858b7022172f1a295d97c24ba10409c [file] [log] [blame]

[email protected]	20f999b5	2012-08-24 22:32:59	[diff] [blame^]	1	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include <cmath>
				6	#include <limits>
				7	#include <numeric>
				8
				9	#include "base/basictypes.h"
				10	#include "base/guid.h"
				11	#include "base/memory/scoped_ptr.h"
				12	#include "base/rand_util.h"
				13	#include "base/string_number_conversions.h"
				14	#include "chrome/common/metrics/entropy_provider.h"
				15	#include "testing/gtest/include/gtest/gtest.h"
				16
				17	namespace metrics {
				18
				19	namespace {
				20
				21	// Size of the low entropy source to use for the permuted entropy provider
				22	// in tests.
				23	const size_t kMaxLowEntropySize = (1 << 13);
				24
				25	// Field trial names used in unit tests.
				26	const std::string kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
				27	"NewTabButton" };
				28
				29	// Computes the Chi-Square statistic for \|values\| assuming they follow a uniform
				30	// distribution, where each entry has expected value \|expected_value\|.
				31	//
				32	// The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
				33	// value and E is the expected value.
				34	double ComputeChiSquare(const std::vector<int>& values,
				35	double expected_value) {
				36	double sum = 0;
				37	for (size_t i = 0; i < values.size(); ++i) {
				38	const double delta = values[i] - expected_value;
				39	sum += (delta * delta) / expected_value;
				40	}
				41	return sum;
				42	}
				43
				44	// Computes SHA1-based entropy for the given \|trial_name\| based on
				45	// \|entropy_source\|
				46	double GenerateSHA1Entropy(const std::string& entropy_source,
				47	const std::string& trial_name) {
				48	SHA1EntropyProvider sha1_provider(entropy_source);
				49	return sha1_provider.GetEntropyForTrial(trial_name);
				50	}
				51
				52	// Generates permutation-based entropy for the given \|trial_name\| based on
				53	// \|entropy_source\| which must be in the range [0, entropy_max).
				54	double GeneratePermutedEntropy(uint16 entropy_source,
				55	size_t entropy_max,
				56	const std::string& trial_name) {
				57	PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
				58	return permuted_provider.GetEntropyForTrial(trial_name);
				59	}
				60
				61	// Helper interface for testing used to generate entropy values for a given
				62	// field trial. Unlike EntropyProvider, which keeps the low/high entropy source
				63	// value constant and generates entropy for different trial names, instances
				64	// of TrialEntropyGenerator keep the trial name constant and generate low/high
				65	// entropy source values internally to produce each output entropy value.
				66	class TrialEntropyGenerator {
				67	public:
				68	virtual ~TrialEntropyGenerator() {}
				69	virtual double GenerateEntropyValue() const = 0;
				70	};
				71
				72	// An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
				73	// entropy source (random GUID with 128 bits of entropy + 13 additional bits of
				74	// entropy corresponding to a low entropy source).
				75	class SHA1EntropyGenerator : public TrialEntropyGenerator {
				76	public:
				77	explicit SHA1EntropyGenerator(const std::string& trial_name)
				78	: trial_name_(trial_name) {
				79	}
				80
				81	~SHA1EntropyGenerator() {
				82	}
				83
				84	virtual double GenerateEntropyValue() const OVERRIDE {
				85	// Use a random GUID + 13 additional bits of entropy to match how the
				86	// SHA1EntropyProvider is used in metrics_service.cc.
				87	const int low_entropy_source =
				88	static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
				89	const std::string high_entropy_source =
				90	base::GenerateGUID() + base::IntToString(low_entropy_source);
				91	return GenerateSHA1Entropy(high_entropy_source, trial_name_);
				92	}
				93
				94	private:
				95	const std::string& trial_name_;
				96
				97	DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
				98	};
				99
				100	// An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
				101	// using 13-bit low entropy source values.
				102	class PermutedEntropyGenerator : public TrialEntropyGenerator {
				103	public:
				104	explicit PermutedEntropyGenerator(const std::string& trial_name)
				105	: mapping_(kMaxLowEntropySize) {
				106	// Note: Given a trial name, the computed mapping will be the same.
				107	// As a performance optimization, pre-compute the mapping once per trial
				108	// name and index into it for each entropy value.
				109	internal::PermuteMappingUsingTrialName(trial_name, &mapping_);
				110	}
				111
				112	~PermutedEntropyGenerator() {
				113	}
				114
				115	virtual double GenerateEntropyValue() const OVERRIDE {
				116	const int low_entropy_source =
				117	static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
				118	return mapping_[low_entropy_source] /
				119	static_cast<double>(kMaxLowEntropySize);
				120	}
				121
				122	private:
				123	std::vector<uint16> mapping_;
				124
				125	DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
				126	};
				127
				128	// Tests uniformity of a given \|entropy_generator\| using the Chi-Square Goodness
				129	// of Fit Test.
				130	void PerformEntropyUniformityTest(
				131	const std::string& trial_name,
				132	const TrialEntropyGenerator& entropy_generator) {
				133	// Number of buckets in the simulated field trials.
				134	const size_t kBucketCount = 20;
				135	// Max number of iterations to perform before giving up and failing.
				136	const size_t kMaxIterationCount = 100000;
				137	// The number of iterations to perform before each time the statistical
				138	// significance of the results is checked.
				139	const size_t kCheckIterationCount = 10000;
				140	// This is the Chi-Square threshold from the Chi-Square statistic table for
				141	// 19 degrees of freedom (based on \|kBucketCount\|) with a 99.9% confidence
				142	// level. See: http://www.medcalc.org/manual/chi-square-table.php
				143	const double kChiSquareThreshold = 43.82;
				144
				145	std::vector<int> distribution(kBucketCount);
				146
				147	for (size_t i = 1; i <= kMaxIterationCount; ++i) {
				148	const double entropy_value = entropy_generator.GenerateEntropyValue();
				149	const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
				150	ASSERT_LT(bucket, kBucketCount);
				151	distribution[bucket] += 1;
				152
				153	// After \|kCheckIterationCount\| iterations, compute the Chi-Square
				154	// statistic of the distribution. If the resulting statistic is greater
				155	// than \|kChiSquareThreshold\|, we can conclude with 99.9% confidence
				156	// that the observed samples do not follow a uniform distribution.
				157	//
				158	// However, since 99.9% would still result in a false negative every
				159	// 1000 runs of the test, do not treat it as a failure (else the test
				160	// will be flaky). Instead, perform additional iterations to determine
				161	// if the distribution will converge, up to \|kMaxIterationCount\|.
				162	if ((i % kCheckIterationCount) == 0) {
				163	const double expected_value_per_bucket =
				164	static_cast<double>(i) / kBucketCount;
				165	const double chi_square =
				166	ComputeChiSquare(distribution, expected_value_per_bucket);
				167	if (chi_square < kChiSquareThreshold)
				168	break;
				169
				170	// If \|i == kMaxIterationCount\|, the Chi-Square statistic did not
				171	// converge after \|kMaxIterationCount\|.
				172	EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
				173	trial_name << " with chi_square = " << chi_square <<
				174	" after " << kMaxIterationCount << " iterations.";
				175	}
				176	}
				177	}
				178
				179	} // namespace
				180
				181	class EntropyProviderTest : public testing::Test {
				182	};
				183
				184	TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
				185	// Simply asserts that two trials using one-time randomization
				186	// that have different names, normally generate different results.
				187	//
				188	// Note that depending on the one-time random initialization, they
				189	// _might_ actually give the same result, but we know that given
				190	// the particular client_id we use for unit tests they won't.
				191	base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
				192	scoped_refptr<base::FieldTrial> trials[] = {
				193	base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
				194	base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
				195	base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
				196	base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };
				197
				198	for (size_t i = 0; i < arraysize(trials); ++i) {
				199	trials[i]->UseOneTimeRandomization();
				200
				201	for (int j = 0; j < 100; ++j)
				202	trials[i]->AppendGroup("", 1);
				203	}
				204
				205	// The trials are most likely to give different results since they have
				206	// different names.
				207	EXPECT_NE(trials[0]->group(), trials[1]->group());
				208	EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
				209	}
				210
				211	TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
				212	// Simply asserts that two trials using one-time randomization
				213	// that have different names, normally generate different results.
				214	//
				215	// Note that depending on the one-time random initialization, they
				216	// _might_ actually give the same result, but we know that given
				217	// the particular client_id we use for unit tests they won't.
				218	base::FieldTrialList field_trial_list(
				219	new PermutedEntropyProvider(1234, kMaxLowEntropySize));
				220	scoped_refptr<base::FieldTrial> trials[] = {
				221	base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
				222	base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
				223	base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
				224	base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };
				225
				226	for (size_t i = 0; i < arraysize(trials); ++i) {
				227	trials[i]->UseOneTimeRandomization();
				228
				229	for (int j = 0; j < 100; ++j)
				230	trials[i]->AppendGroup("", 1);
				231	}
				232
				233	// The trials are most likely to give different results since they have
				234	// different names.
				235	EXPECT_NE(trials[0]->group(), trials[1]->group());
				236	EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
				237	}
				238
				239	TEST_F(EntropyProviderTest, SHA1Entropy) {
				240	const double results[] = { GenerateSHA1Entropy("hi", "1"),
				241	GenerateSHA1Entropy("there", "1") };
				242
				243	EXPECT_NE(results[0], results[1]);
				244	for (size_t i = 0; i < arraysize(results); ++i) {
				245	EXPECT_LE(0.0, results[i]);
				246	EXPECT_GT(1.0, results[i]);
				247	}
				248
				249	EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
				250	GenerateSHA1Entropy("yo", "1"));
				251	EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
				252	GenerateSHA1Entropy("yo", "else"));
				253	}
				254
				255	TEST_F(EntropyProviderTest, PermutedEntropy) {
				256	const double results[] = {
				257	GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
				258	GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
				259
				260	EXPECT_NE(results[0], results[1]);
				261	for (size_t i = 0; i < arraysize(results); ++i) {
				262	EXPECT_LE(0.0, results[i]);
				263	EXPECT_GT(1.0, results[i]);
				264	}
				265
				266	EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
				267	GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
				268	EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
				269	GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
				270	}
				271
				272	TEST_F(EntropyProviderTest, PermutedEntropyProviderResults) {
				273	// Verifies that PermutedEntropyProvider produces expected results. This
				274	// ensures that the results are the same between platforms and ensures that
				275	// changes to the implementation do not regress this accidentally.
				276
				277	EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
				278	GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
				279	EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
				280	GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
				281	EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
				282	GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
				283	}
				284
				285	TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) {
				286	for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
				287	SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
				288	PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
				289	}
				290	}
				291
				292	TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) {
				293	for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
				294	PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
				295	PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
				296	}
				297	}
				298
				299	TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) {
				300	// Verifies that SeededRandGenerator has a uniform distribution.
				301	//
				302	// Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
				303
				304	const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
				305	const uint32 kExpectedAverage = kTopOfRange / 2ULL;
				306	const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%
				307	const int kMinAttempts = 1000;
				308	const int kMaxAttempts = 1000000;
				309
				310	for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
				311	const uint32 seed = internal::HashName(kTestTrialNames[i]);
				312	internal::SeededRandGenerator rand_generator(seed);
				313
				314	double cumulative_average = 0.0;
				315	int count = 0;
				316	while (count < kMaxAttempts) {
				317	uint32 value = rand_generator(kTopOfRange);
				318	cumulative_average = (count * cumulative_average + value) / (count + 1);
				319
				320	// Don't quit too quickly for things to start converging, or we may have
				321	// a false positive.
				322	if (count > kMinAttempts &&
				323	kExpectedAverage - kAllowedVariance < cumulative_average &&
				324	cumulative_average < kExpectedAverage + kAllowedVariance) {
				325	break;
				326	}
				327
				328	++count;
				329	}
				330
				331	ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
				332	kExpectedAverage << ", average ended at " << cumulative_average <<
				333	", for trial " << kTestTrialNames[i];
				334	}
				335	}
				336
				337	} // namespace metrics