Implement NormalizedMurmurHashEntropyProvider

Design document:
https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo

BUG=890413

Change-Id: Ib372a573b1a0f68467f785ce74ef7821c9d48614
Reviewed-on: https://chromium-review.googlesource.com/c/1322350
Reviewed-by: Grace Kloba <[email protected]>
Reviewed-by: Alexei Svitkine <[email protected]>
Commit-Queue: Paul Miller <[email protected]>
Cr-Commit-Position: refs/heads/master@{#607816}
diff --git a/components/variations/entropy_provider_unittest.cc b/components/variations/entropy_provider_unittest.cc
index 70db5c58..0f77af3 100644
--- a/components/variations/entropy_provider_unittest.cc
+++ b/components/variations/entropy_provider_unittest.cc
@@ -64,6 +64,15 @@
   return permuted_provider.GetEntropyForTrial(trial_name, 0);
 }
 
+// Generates normalized MurmurHash-based entropy for the given |trial_name|
+// based on |entropy_source| which must be in the range [0, entropy_max).
+double GenerateNormalizedMurmurHashEntropy(uint16_t entropy_source,
+                                           size_t entropy_max,
+                                           const std::string& trial_name) {
+  NormalizedMurmurHashEntropyProvider provider(entropy_source, entropy_max);
+  return provider.GetEntropyForTrial(trial_name, 0);
+}
+
 // Make a vector of consecutive integers for shuffling.
 std::vector<uint16_t> MakeRange(size_t vector_size) {
   std::vector<uint16_t> range(vector_size);
@@ -105,7 +114,7 @@
   }
 
  private:
-  std::string trial_name_;
+  const std::string trial_name_;
 
   DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
 };
@@ -139,6 +148,28 @@
   DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
 };
 
+// An TrialEntropyGenerator that uses the normalized MurmurHash entropy provider
+// algorithm, using 13-bit low entropy source values.
+class NormalizedMurmurHashEntropyGenerator : public TrialEntropyGenerator {
+ public:
+  explicit NormalizedMurmurHashEntropyGenerator(const std::string& trial_name)
+      : trial_name_(trial_name) {}
+
+  ~NormalizedMurmurHashEntropyGenerator() override {}
+
+  double GenerateEntropyValue() const override {
+    const int low_entropy_source =
+        static_cast<uint16_t>(base::RandInt(0, kMaxLowEntropySize - 1));
+    return GenerateNormalizedMurmurHashEntropy(low_entropy_source,
+                                               kMaxLowEntropySize, trial_name_);
+  }
+
+ private:
+  const std::string trial_name_;
+
+  DISALLOW_COPY_AND_ASSIGN(NormalizedMurmurHashEntropyGenerator);
+};
+
 // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
 // of Fit Test.
 void PerformEntropyUniformityTest(
@@ -197,8 +228,8 @@
   // that have different names, normally generate different results.
   //
   // Note that depending on the one-time random initialization, they
-  // _might_ actually give the same result, but we know that given
-  // the particular client_id we use for unit tests they won't.
+  // _might_ actually give the same result, but we know that given the
+  // particular client_id we use for unit tests they won't.
   base::FieldTrialList field_trial_list(
       std::make_unique<SHA1EntropyProvider>("client_id"));
   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
@@ -227,8 +258,8 @@
   // that have different names, normally generate different results.
   //
   // Note that depending on the one-time random initialization, they
-  // _might_ actually give the same result, but we know that given
-  // the particular client_id we use for unit tests they won't.
+  // _might_ actually give the same result, but we know that given the
+  // particular low_entropy_source we use for unit tests they won't.
   base::FieldTrialList field_trial_list(
       std::make_unique<PermutedEntropyProvider>(1234, kMaxLowEntropySize));
   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
@@ -252,6 +283,37 @@
   EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
 }
 
+TEST(EntropyProviderTest, UseOneTimeRandomizationNormalizedMurmurHash) {
+  // Simply asserts that two trials using one-time randomization
+  // that have different names, normally generate different results.
+  //
+  // Note that depending on the one-time random initialization, they
+  // _might_ actually give the same result, but we know that given
+  // the particular low_entropy_source we use for unit tests they won't.
+  base::FieldTrialList field_trial_list(
+      std::make_unique<NormalizedMurmurHashEntropyProvider>(
+          1234, kMaxLowEntropySize));
+  const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
+  scoped_refptr<base::FieldTrial> trials[] = {
+      base::FieldTrialList::FactoryGetFieldTrial(
+          "one", 100, "default", kNoExpirationYear, 1, 1,
+          base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr),
+      base::FieldTrialList::FactoryGetFieldTrial(
+          "two", 100, "default", kNoExpirationYear, 1, 1,
+          base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr),
+  };
+
+  for (size_t i = 0; i < base::size(trials); ++i) {
+    for (int j = 0; j < 100; ++j)
+      trials[i]->AppendGroup(std::string(), 1);
+  }
+
+  // The trials are most likely to give different results since they have
+  // different names.
+  EXPECT_NE(trials[0]->group(), trials[1]->group());
+  EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
+}
+
 TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
   // Ensures that two trials with different names but the same custom seed used
   // for one time randomization produce the same group assignments.
@@ -306,6 +368,35 @@
   EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
 }
 
+TEST(EntropyProviderTest,
+     UseOneTimeRandomizationWithCustomSeedNormalizedMurmurHash) {
+  // Ensures that two trials with different names but the same custom seed used
+  // for one time randomization produce the same group assignments.
+  base::FieldTrialList field_trial_list(
+      std::make_unique<NormalizedMurmurHashEntropyProvider>(
+          1234, kMaxLowEntropySize));
+  const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
+  const uint32_t kCustomSeed = 9001;
+  scoped_refptr<base::FieldTrial> trials[] = {
+      base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
+          "one", 100, "default", kNoExpirationYear, 1, 1,
+          base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, nullptr, nullptr),
+      base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
+          "two", 100, "default", kNoExpirationYear, 1, 1,
+          base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, nullptr, nullptr),
+  };
+
+  for (size_t i = 0; i < base::size(trials); ++i) {
+    for (int j = 0; j < 100; ++j)
+      trials[i]->AppendGroup(std::string(), 1);
+  }
+
+  // Normally, these trials should produce different groups, but if the same
+  // custom seed is used, they should produce the same group assignment.
+  EXPECT_EQ(trials[0]->group(), trials[1]->group());
+  EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
+}
+
 TEST(EntropyProviderTest, SHA1Entropy) {
   const double results[] = { GenerateSHA1Entropy("hi", "1"),
                              GenerateSHA1Entropy("there", "1") };
@@ -339,6 +430,25 @@
             GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
 }
 
+TEST(EntropyProviderTest, NormalizedMurmurHashEntropy) {
+  const double results[] = {
+      GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"),
+      GenerateNormalizedMurmurHashEntropy(4321, kMaxLowEntropySize, "1")};
+
+  EXPECT_NE(results[0], results[1]);
+  for (size_t i = 0; i < base::size(results); ++i) {
+    EXPECT_LE(0.0, results[i]);
+    EXPECT_GT(1.0, results[i]);
+  }
+
+  EXPECT_EQ(GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"),
+            GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"));
+  EXPECT_NE(GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize,
+                                                "something"),
+            GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize,
+                                                "else"));
+}
+
 TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
   // Verifies that PermutedEntropyProvider produces expected results. This
   // ensures that the results are the same between platforms and ensures that
@@ -352,6 +462,23 @@
                    GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
 }
 
+TEST(EntropyProviderTest, NormalizedMurmurHashEntropyProviderResults) {
+  // Verifies that NormalizedMurmurHashEntropyProvider produces expected
+  // results. This ensures that the results are the same between platforms and
+  // ensures that changes to the implementation do not regress this
+  // accidentally.
+
+  EXPECT_DOUBLE_EQ(
+      1612 / static_cast<double>(kMaxLowEntropySize),
+      GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "XYZ"));
+  EXPECT_DOUBLE_EQ(
+      7066 / static_cast<double>(kMaxLowEntropySize),
+      GenerateNormalizedMurmurHashEntropy(1, kMaxLowEntropySize, "Test"));
+  EXPECT_DOUBLE_EQ(
+      5668 / static_cast<double>(kMaxLowEntropySize),
+      GenerateNormalizedMurmurHashEntropy(5000, kMaxLowEntropySize, "Foo"));
+}
+
 TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
   for (size_t i = 0; i < base::size(kTestTrialNames); ++i) {
     SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
@@ -366,6 +493,13 @@
   }
 }
 
+TEST(EntropyProviderTest, NormalizedMurmurHashEntropyIsUniform) {
+  for (size_t i = 0; i < base::size(kTestTrialNames); ++i) {
+    NormalizedMurmurHashEntropyGenerator entropy_generator(kTestTrialNames[i]);
+    PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
+  }
+}
+
 TEST(EntropyProviderTest, PermutedEntropyConsistency) {
   std::vector<uint16_t> to_shuffle = MakeRange(10);
   std::vector<uint16_t> expected = {7, 6, 8, 3, 2, 0, 1, 4, 9, 5};