[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
[email protected] | 50ae9f1 | 2013-08-29 18:03:22 | [diff] [blame] | 5 | #include "components/variations/entropy_provider.h" |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 6 | |
| 7 | #include <algorithm> |
| 8 | #include <limits> |
| 9 | #include <vector> |
| 10 | |
| 11 | #include "base/logging.h" |
| 12 | #include "base/rand_util.h" |
| 13 | #include "base/sha1.h" |
jwd | c6e07e2 | 2016-11-21 16:36:54 | [diff] [blame] | 14 | #include "base/strings/string_number_conversions.h" |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 15 | #include "base/sys_byteorder.h" |
Alexei Svitkine | 9de32cb | 2018-02-06 20:21:21 | [diff] [blame] | 16 | #include "components/variations/hashing.h" |
Paul Miller | 7c0efea | 2018-11-13 23:49:00 | [diff] [blame] | 17 | #include "components/variations/variations_murmur_hash.h" |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 18 | |
Alexei Svitkine | 9de32cb | 2018-02-06 20:21:21 | [diff] [blame] | 19 | namespace variations { |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 20 | |
| 21 | namespace internal { |
| 22 | |
Alexei Svitkine | 7251664 | 2018-02-28 03:08:52 | [diff] [blame] | 23 | SeededRandGenerator::SeededRandGenerator(uint32_t seed) |
| 24 | : mersenne_twister_(seed) {} |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 25 | |
| 26 | SeededRandGenerator::~SeededRandGenerator() { |
| 27 | } |
| 28 | |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 29 | uint32_t SeededRandGenerator::operator()(uint32_t range) { |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 30 | // Based on base::RandGenerator(). |
| 31 | DCHECK_GT(range, 0u); |
| 32 | |
| 33 | // We must discard random results above this number, as they would |
| 34 | // make the random generator non-uniform (consider e.g. if |
| 35 | // MAX_UINT64 was 7 and |range| was 5, then a result of 1 would be twice |
| 36 | // as likely as a result of 3 or 4). |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 37 | uint32_t max_acceptable_value = |
| 38 | (std::numeric_limits<uint32_t>::max() / range) * range - 1; |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 39 | |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 40 | uint32_t value; |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 41 | do { |
Alexei Svitkine | 7251664 | 2018-02-28 03:08:52 | [diff] [blame] | 42 | value = mersenne_twister_(); |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 43 | } while (value > max_acceptable_value); |
| 44 | |
| 45 | return value % range; |
| 46 | } |
| 47 | |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 48 | void PermuteMappingUsingRandomizationSeed(uint32_t randomization_seed, |
| 49 | std::vector<uint16_t>* mapping) { |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 50 | for (size_t i = 0; i < mapping->size(); ++i) |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 51 | (*mapping)[i] = static_cast<uint16_t>(i); |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 52 | |
[email protected] | 6fded22 | 2013-04-11 20:59:50 | [diff] [blame] | 53 | SeededRandGenerator generator(randomization_seed); |
[email protected] | 9e49614d | 2014-04-03 14:41:36 | [diff] [blame] | 54 | |
| 55 | // Do a deterministic random shuffle of the mapping using |generator|. |
| 56 | // |
| 57 | // Note: This logic is identical to the following call with libstdc++ and VS: |
| 58 | // |
| 59 | // std::random_shuffle(mapping->begin(), mapping->end(), generator); |
| 60 | // |
| 61 | // However, this is not guaranteed by the spec and some implementations (e.g. |
| 62 | // libc++) use a different algorithm. To ensure results are consistent |
| 63 | // regardless of the compiler toolchain used, use our own version. |
| 64 | for (size_t i = 1; i < mapping->size(); ++i) { |
| 65 | // Pick an element in mapping[:i+1] with which to exchange mapping[i]. |
| 66 | size_t j = generator(i + 1); |
| 67 | std::swap((*mapping)[i], (*mapping)[j]); |
| 68 | } |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 69 | } |
| 70 | |
| 71 | } // namespace internal |
| 72 | |
| 73 | SHA1EntropyProvider::SHA1EntropyProvider(const std::string& entropy_source) |
| 74 | : entropy_source_(entropy_source) { |
| 75 | } |
| 76 | |
| 77 | SHA1EntropyProvider::~SHA1EntropyProvider() { |
| 78 | } |
| 79 | |
| 80 | double SHA1EntropyProvider::GetEntropyForTrial( |
[email protected] | 6fded22 | 2013-04-11 20:59:50 | [diff] [blame] | 81 | const std::string& trial_name, |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 82 | uint32_t randomization_seed) const { |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 83 | // Given enough input entropy, SHA-1 will produce a uniformly random spread |
| 84 | // in its output space. In this case, the input entropy that is used is the |
| 85 | // combination of the original |entropy_source_| and the |trial_name|. |
| 86 | // |
| 87 | // Note: If |entropy_source_| has very low entropy, such as 13 bits or less, |
| 88 | // it has been observed that this method does not result in a uniform |
| 89 | // distribution given the same |trial_name|. When using such a low entropy |
| 90 | // source, PermutedEntropyProvider should be used instead. |
jwd | c6e07e2 | 2016-11-21 16:36:54 | [diff] [blame] | 91 | std::string input(entropy_source_); |
| 92 | input.append(randomization_seed == 0 ? trial_name : base::UintToString( |
| 93 | randomization_seed)); |
| 94 | |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 95 | unsigned char sha1_hash[base::kSHA1Length]; |
| 96 | base::SHA1HashBytes(reinterpret_cast<const unsigned char*>(input.c_str()), |
| 97 | input.size(), |
| 98 | sha1_hash); |
| 99 | |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 100 | uint64_t bits; |
mostynb | 470748ce | 2014-12-22 21:14:46 | [diff] [blame] | 101 | static_assert(sizeof(bits) < sizeof(sha1_hash), "more data required"); |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 102 | memcpy(&bits, sha1_hash, sizeof(bits)); |
| 103 | bits = base::ByteSwapToLE64(bits); |
| 104 | |
| 105 | return base::BitsToOpenEndedUnitInterval(bits); |
| 106 | } |
| 107 | |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 108 | PermutedEntropyProvider::PermutedEntropyProvider(uint16_t low_entropy_source, |
| 109 | size_t low_entropy_source_max) |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 110 | : low_entropy_source_(low_entropy_source), |
| 111 | low_entropy_source_max_(low_entropy_source_max) { |
| 112 | DCHECK_LT(low_entropy_source, low_entropy_source_max); |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 113 | DCHECK_LE(low_entropy_source_max, std::numeric_limits<uint16_t>::max()); |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 114 | } |
| 115 | |
| 116 | PermutedEntropyProvider::~PermutedEntropyProvider() { |
| 117 | } |
| 118 | |
| 119 | double PermutedEntropyProvider::GetEntropyForTrial( |
[email protected] | 6fded22 | 2013-04-11 20:59:50 | [diff] [blame] | 120 | const std::string& trial_name, |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 121 | uint32_t randomization_seed) const { |
[email protected] | 6fded22 | 2013-04-11 20:59:50 | [diff] [blame] | 122 | if (randomization_seed == 0) |
| 123 | randomization_seed = HashName(trial_name); |
[email protected] | 9d7c4a8 | 2013-05-07 12:10:49 | [diff] [blame] | 124 | |
[email protected] | fb6670a | 2013-07-31 13:31:35 | [diff] [blame] | 125 | return GetPermutedValue(randomization_seed) / |
| 126 | static_cast<double>(low_entropy_source_max_); |
| 127 | } |
| 128 | |
avi | 5dd91f8 | 2015-12-25 22:30:46 | [diff] [blame] | 129 | uint16_t PermutedEntropyProvider::GetPermutedValue( |
| 130 | uint32_t randomization_seed) const { |
| 131 | std::vector<uint16_t> mapping(low_entropy_source_max_); |
[email protected] | 6fded22 | 2013-04-11 20:59:50 | [diff] [blame] | 132 | internal::PermuteMappingUsingRandomizationSeed(randomization_seed, &mapping); |
[email protected] | fb6670a | 2013-07-31 13:31:35 | [diff] [blame] | 133 | return mapping[low_entropy_source_]; |
[email protected] | 20f999b5 | 2012-08-24 22:32:59 | [diff] [blame] | 134 | } |
| 135 | |
Paul Miller | 7c0efea | 2018-11-13 23:49:00 | [diff] [blame] | 136 | NormalizedMurmurHashEntropyProvider::NormalizedMurmurHashEntropyProvider( |
| 137 | uint16_t low_entropy_source, |
| 138 | size_t low_entropy_source_max) |
| 139 | : low_entropy_source_(low_entropy_source), |
| 140 | low_entropy_source_max_(low_entropy_source_max) { |
| 141 | DCHECK_LT(low_entropy_source, low_entropy_source_max); |
| 142 | DCHECK_LE(low_entropy_source_max, std::numeric_limits<uint16_t>::max()); |
| 143 | } |
| 144 | |
| 145 | NormalizedMurmurHashEntropyProvider::~NormalizedMurmurHashEntropyProvider() {} |
| 146 | |
| 147 | double NormalizedMurmurHashEntropyProvider::GetEntropyForTrial( |
| 148 | const std::string& trial_name, |
| 149 | uint32_t randomization_seed) const { |
| 150 | if (randomization_seed == 0) { |
| 151 | randomization_seed = internal::VariationsMurmurHash::Hash( |
| 152 | internal::VariationsMurmurHash::StringToLE32(trial_name), |
| 153 | trial_name.length()); |
| 154 | } |
| 155 | |
| 156 | uint32_t x = internal::VariationsMurmurHash::Hash16(randomization_seed, |
| 157 | low_entropy_source_); |
| 158 | int x_ordinal = 0; |
| 159 | for (uint32_t i = 0; i < low_entropy_source_max_; i++) { |
| 160 | uint32_t y = internal::VariationsMurmurHash::Hash16(randomization_seed, i); |
| 161 | x_ordinal += (y < x); |
| 162 | } |
| 163 | |
| 164 | DCHECK_GE(x_ordinal, 0); |
| 165 | // There must have been at least one iteration where |x| == |y|, because |
| 166 | // |i| == |low_entropy_source_|, and |x_ordinal| was not incremented in that |
| 167 | // iteration, so |x_ordinal| < |low_entropy_source_max_|. |
| 168 | DCHECK_LT(static_cast<size_t>(x_ordinal), low_entropy_source_max_); |
| 169 | |
| 170 | return static_cast<double>(x_ordinal) / low_entropy_source_max_; |
| 171 | } |
| 172 | |
Alexei Svitkine | 9de32cb | 2018-02-06 20:21:21 | [diff] [blame] | 173 | } // namespace variations |