Implement NormalizedMurmurHashEntropyProvider

Design document:
https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo

BUG=890413

Change-Id: Ib372a573b1a0f68467f785ce74ef7821c9d48614
Reviewed-on: https://chromium-review.googlesource.com/c/1322350
Reviewed-by: Grace Kloba <[email protected]>
Reviewed-by: Alexei Svitkine <[email protected]>
Commit-Queue: Paul Miller <[email protected]>
Cr-Commit-Position: refs/heads/master@{#607816}
diff --git a/components/variations/BUILD.gn b/components/variations/BUILD.gn
index 5b535dd..6edab521 100644
--- a/components/variations/BUILD.gn
+++ b/components/variations/BUILD.gn
@@ -58,6 +58,8 @@
     "variations_http_header_provider.h",
     "variations_id_collection.cc",
     "variations_id_collection.h",
+    "variations_murmur_hash.cc",
+    "variations_murmur_hash.h",
     "variations_request_scheduler.cc",
     "variations_request_scheduler.h",
     "variations_seed_processor.cc",
@@ -147,6 +149,7 @@
     "variations_crash_keys_unittest.cc",
     "variations_http_header_provider_unittest.cc",
     "variations_id_collection_unittest.cc",
+    "variations_murmur_hash_unittest.cc",
     "variations_request_scheduler_unittest.cc",
     "variations_seed_processor_unittest.cc",
     "variations_seed_simulator_unittest.cc",
@@ -167,6 +170,7 @@
     "//components/variations/field_trial_config:field_trial_config",
     "//testing/gmock",
     "//testing/gtest",
+    "//third_party/smhasher:murmurhash3",
     "//third_party/zlib/google:compression_utils",
   ]
 }
diff --git a/components/variations/DEPS b/components/variations/DEPS
index 9f3a043..2e7e5d4 100644
--- a/components/variations/DEPS
+++ b/components/variations/DEPS
@@ -9,5 +9,6 @@
   "+crypto",
   "-net",
   "+third_party/protobuf",
+  "+third_party/smhasher",
   "+third_party/zlib/google",
 ]
diff --git a/components/variations/entropy_provider.cc b/components/variations/entropy_provider.cc
index 897b4fa6..cea9aa6e 100644
--- a/components/variations/entropy_provider.cc
+++ b/components/variations/entropy_provider.cc
@@ -14,6 +14,7 @@
 #include "base/strings/string_number_conversions.h"
 #include "base/sys_byteorder.h"
 #include "components/variations/hashing.h"
+#include "components/variations/variations_murmur_hash.h"
 
 namespace variations {
 
@@ -132,4 +133,41 @@
   return mapping[low_entropy_source_];
 }
 
+NormalizedMurmurHashEntropyProvider::NormalizedMurmurHashEntropyProvider(
+    uint16_t low_entropy_source,
+    size_t low_entropy_source_max)
+    : low_entropy_source_(low_entropy_source),
+      low_entropy_source_max_(low_entropy_source_max) {
+  DCHECK_LT(low_entropy_source, low_entropy_source_max);
+  DCHECK_LE(low_entropy_source_max, std::numeric_limits<uint16_t>::max());
+}
+
+NormalizedMurmurHashEntropyProvider::~NormalizedMurmurHashEntropyProvider() {}
+
+double NormalizedMurmurHashEntropyProvider::GetEntropyForTrial(
+    const std::string& trial_name,
+    uint32_t randomization_seed) const {
+  if (randomization_seed == 0) {
+    randomization_seed = internal::VariationsMurmurHash::Hash(
+        internal::VariationsMurmurHash::StringToLE32(trial_name),
+        trial_name.length());
+  }
+
+  uint32_t x = internal::VariationsMurmurHash::Hash16(randomization_seed,
+                                                      low_entropy_source_);
+  int x_ordinal = 0;
+  for (uint32_t i = 0; i < low_entropy_source_max_; i++) {
+    uint32_t y = internal::VariationsMurmurHash::Hash16(randomization_seed, i);
+    x_ordinal += (y < x);
+  }
+
+  DCHECK_GE(x_ordinal, 0);
+  // There must have been at least one iteration where |x| == |y|, because
+  // |i| == |low_entropy_source_|, and |x_ordinal| was not incremented in that
+  // iteration, so |x_ordinal| < |low_entropy_source_max_|.
+  DCHECK_LT(static_cast<size_t>(x_ordinal), low_entropy_source_max_);
+
+  return static_cast<double>(x_ordinal) / low_entropy_source_max_;
+}
+
 }  // namespace variations
diff --git a/components/variations/entropy_provider.h b/components/variations/entropy_provider.h
index 29001f1..4c32bdb 100644
--- a/components/variations/entropy_provider.h
+++ b/components/variations/entropy_provider.h
@@ -58,7 +58,7 @@
                             uint32_t randomization_seed) const override;
 
  private:
-  std::string entropy_source_;
+  const std::string entropy_source_;
 
   DISALLOW_COPY_AND_ASSIGN(SHA1EntropyProvider);
 };
@@ -86,12 +86,36 @@
   virtual uint16_t GetPermutedValue(uint32_t randomization_seed) const;
 
  private:
-  uint16_t low_entropy_source_;
-  size_t low_entropy_source_max_;
+  const uint16_t low_entropy_source_;
+  const size_t low_entropy_source_max_;
 
   DISALLOW_COPY_AND_ASSIGN(PermutedEntropyProvider);
 };
 
+// NormalizedMurmurHashEntropyProvider is an entropy provider suitable for low
+// entropy sources (below 16 bits). It uses MurmurHash3_32 to hash the study
+// name along with all possible low entropy sources. It finds the index where
+// the actual low entropy source's hash would fall in the sorted list of all
+// those hashes, and uses that as the final value. For more info, see:
+// https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo
+class NormalizedMurmurHashEntropyProvider
+    : public base::FieldTrial::EntropyProvider {
+ public:
+  NormalizedMurmurHashEntropyProvider(uint16_t low_entropy_source,
+                                      size_t low_entropy_source_max);
+  ~NormalizedMurmurHashEntropyProvider() override;
+
+  // base::FieldTrial::EntropyProvider:
+  double GetEntropyForTrial(const std::string& trial_name,
+                            uint32_t randomization_seed) const override;
+
+ private:
+  const uint16_t low_entropy_source_;
+  const size_t low_entropy_source_max_;
+
+  DISALLOW_COPY_AND_ASSIGN(NormalizedMurmurHashEntropyProvider);
+};
+
 }  // namespace variations
 
 #endif  // COMPONENTS_VARIATIONS_ENTROPY_PROVIDER_H_
diff --git a/components/variations/entropy_provider_unittest.cc b/components/variations/entropy_provider_unittest.cc
index 70db5c58..0f77af3 100644
--- a/components/variations/entropy_provider_unittest.cc
+++ b/components/variations/entropy_provider_unittest.cc
@@ -64,6 +64,15 @@
   return permuted_provider.GetEntropyForTrial(trial_name, 0);
 }
 
+// Generates normalized MurmurHash-based entropy for the given |trial_name|
+// based on |entropy_source| which must be in the range [0, entropy_max).
+double GenerateNormalizedMurmurHashEntropy(uint16_t entropy_source,
+                                           size_t entropy_max,
+                                           const std::string& trial_name) {
+  NormalizedMurmurHashEntropyProvider provider(entropy_source, entropy_max);
+  return provider.GetEntropyForTrial(trial_name, 0);
+}
+
 // Make a vector of consecutive integers for shuffling.
 std::vector<uint16_t> MakeRange(size_t vector_size) {
   std::vector<uint16_t> range(vector_size);
@@ -105,7 +114,7 @@
   }
 
  private:
-  std::string trial_name_;
+  const std::string trial_name_;
 
   DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
 };
@@ -139,6 +148,28 @@
   DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
 };
 
+// An TrialEntropyGenerator that uses the normalized MurmurHash entropy provider
+// algorithm, using 13-bit low entropy source values.
+class NormalizedMurmurHashEntropyGenerator : public TrialEntropyGenerator {
+ public:
+  explicit NormalizedMurmurHashEntropyGenerator(const std::string& trial_name)
+      : trial_name_(trial_name) {}
+
+  ~NormalizedMurmurHashEntropyGenerator() override {}
+
+  double GenerateEntropyValue() const override {
+    const int low_entropy_source =
+        static_cast<uint16_t>(base::RandInt(0, kMaxLowEntropySize - 1));
+    return GenerateNormalizedMurmurHashEntropy(low_entropy_source,
+                                               kMaxLowEntropySize, trial_name_);
+  }
+
+ private:
+  const std::string trial_name_;
+
+  DISALLOW_COPY_AND_ASSIGN(NormalizedMurmurHashEntropyGenerator);
+};
+
 // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
 // of Fit Test.
 void PerformEntropyUniformityTest(
@@ -197,8 +228,8 @@
   // that have different names, normally generate different results.
   //
   // Note that depending on the one-time random initialization, they
-  // _might_ actually give the same result, but we know that given
-  // the particular client_id we use for unit tests they won't.
+  // _might_ actually give the same result, but we know that given the
+  // particular client_id we use for unit tests they won't.
   base::FieldTrialList field_trial_list(
       std::make_unique<SHA1EntropyProvider>("client_id"));
   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
@@ -227,8 +258,8 @@
   // that have different names, normally generate different results.
   //
   // Note that depending on the one-time random initialization, they
-  // _might_ actually give the same result, but we know that given
-  // the particular client_id we use for unit tests they won't.
+  // _might_ actually give the same result, but we know that given the
+  // particular low_entropy_source we use for unit tests they won't.
   base::FieldTrialList field_trial_list(
       std::make_unique<PermutedEntropyProvider>(1234, kMaxLowEntropySize));
   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
@@ -252,6 +283,37 @@
   EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
 }
 
+TEST(EntropyProviderTest, UseOneTimeRandomizationNormalizedMurmurHash) {
+  // Simply asserts that two trials using one-time randomization
+  // that have different names, normally generate different results.
+  //
+  // Note that depending on the one-time random initialization, they
+  // _might_ actually give the same result, but we know that given
+  // the particular low_entropy_source we use for unit tests they won't.
+  base::FieldTrialList field_trial_list(
+      std::make_unique<NormalizedMurmurHashEntropyProvider>(
+          1234, kMaxLowEntropySize));
+  const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
+  scoped_refptr<base::FieldTrial> trials[] = {
+      base::FieldTrialList::FactoryGetFieldTrial(
+          "one", 100, "default", kNoExpirationYear, 1, 1,
+          base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr),
+      base::FieldTrialList::FactoryGetFieldTrial(
+          "two", 100, "default", kNoExpirationYear, 1, 1,
+          base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr),
+  };
+
+  for (size_t i = 0; i < base::size(trials); ++i) {
+    for (int j = 0; j < 100; ++j)
+      trials[i]->AppendGroup(std::string(), 1);
+  }
+
+  // The trials are most likely to give different results since they have
+  // different names.
+  EXPECT_NE(trials[0]->group(), trials[1]->group());
+  EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
+}
+
 TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
   // Ensures that two trials with different names but the same custom seed used
   // for one time randomization produce the same group assignments.
@@ -306,6 +368,35 @@
   EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
 }
 
+TEST(EntropyProviderTest,
+     UseOneTimeRandomizationWithCustomSeedNormalizedMurmurHash) {
+  // Ensures that two trials with different names but the same custom seed used
+  // for one time randomization produce the same group assignments.
+  base::FieldTrialList field_trial_list(
+      std::make_unique<NormalizedMurmurHashEntropyProvider>(
+          1234, kMaxLowEntropySize));
+  const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
+  const uint32_t kCustomSeed = 9001;
+  scoped_refptr<base::FieldTrial> trials[] = {
+      base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
+          "one", 100, "default", kNoExpirationYear, 1, 1,
+          base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, nullptr, nullptr),
+      base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
+          "two", 100, "default", kNoExpirationYear, 1, 1,
+          base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, nullptr, nullptr),
+  };
+
+  for (size_t i = 0; i < base::size(trials); ++i) {
+    for (int j = 0; j < 100; ++j)
+      trials[i]->AppendGroup(std::string(), 1);
+  }
+
+  // Normally, these trials should produce different groups, but if the same
+  // custom seed is used, they should produce the same group assignment.
+  EXPECT_EQ(trials[0]->group(), trials[1]->group());
+  EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
+}
+
 TEST(EntropyProviderTest, SHA1Entropy) {
   const double results[] = { GenerateSHA1Entropy("hi", "1"),
                              GenerateSHA1Entropy("there", "1") };
@@ -339,6 +430,25 @@
             GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
 }
 
+TEST(EntropyProviderTest, NormalizedMurmurHashEntropy) {
+  const double results[] = {
+      GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"),
+      GenerateNormalizedMurmurHashEntropy(4321, kMaxLowEntropySize, "1")};
+
+  EXPECT_NE(results[0], results[1]);
+  for (size_t i = 0; i < base::size(results); ++i) {
+    EXPECT_LE(0.0, results[i]);
+    EXPECT_GT(1.0, results[i]);
+  }
+
+  EXPECT_EQ(GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"),
+            GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"));
+  EXPECT_NE(GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize,
+                                                "something"),
+            GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize,
+                                                "else"));
+}
+
 TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
   // Verifies that PermutedEntropyProvider produces expected results. This
   // ensures that the results are the same between platforms and ensures that
@@ -352,6 +462,23 @@
                    GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
 }
 
+TEST(EntropyProviderTest, NormalizedMurmurHashEntropyProviderResults) {
+  // Verifies that NormalizedMurmurHashEntropyProvider produces expected
+  // results. This ensures that the results are the same between platforms and
+  // ensures that changes to the implementation do not regress this
+  // accidentally.
+
+  EXPECT_DOUBLE_EQ(
+      1612 / static_cast<double>(kMaxLowEntropySize),
+      GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "XYZ"));
+  EXPECT_DOUBLE_EQ(
+      7066 / static_cast<double>(kMaxLowEntropySize),
+      GenerateNormalizedMurmurHashEntropy(1, kMaxLowEntropySize, "Test"));
+  EXPECT_DOUBLE_EQ(
+      5668 / static_cast<double>(kMaxLowEntropySize),
+      GenerateNormalizedMurmurHashEntropy(5000, kMaxLowEntropySize, "Foo"));
+}
+
 TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
   for (size_t i = 0; i < base::size(kTestTrialNames); ++i) {
     SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
@@ -366,6 +493,13 @@
   }
 }
 
+TEST(EntropyProviderTest, NormalizedMurmurHashEntropyIsUniform) {
+  for (size_t i = 0; i < base::size(kTestTrialNames); ++i) {
+    NormalizedMurmurHashEntropyGenerator entropy_generator(kTestTrialNames[i]);
+    PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
+  }
+}
+
 TEST(EntropyProviderTest, PermutedEntropyConsistency) {
   std::vector<uint16_t> to_shuffle = MakeRange(10);
   std::vector<uint16_t> expected = {7, 6, 8, 3, 2, 0, 1, 4, 9, 5};
diff --git a/components/variations/variations_murmur_hash.cc b/components/variations/variations_murmur_hash.cc
new file mode 100644
index 0000000..661dbf7
--- /dev/null
+++ b/components/variations/variations_murmur_hash.cc
@@ -0,0 +1,81 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/variations/variations_murmur_hash.h"
+
+#include "base/compiler_specific.h"
+#include "base/sys_byteorder.h"
+#include "build/build_config.h"
+
+#if !(defined(ARCH_CPU_LITTLE_ENDIAN) || defined(ARCH_CPU_BIG_ENDIAN))
+#error "unknown endianness"
+#endif
+
+namespace variations {
+namespace internal {
+
+// static
+std::vector<uint32_t> VariationsMurmurHash::StringToLE32(
+    const std::string& data) {
+  const size_t data_size = data.size();
+  const size_t word_num = (data_size + 3) / 4;  // data_size / 4, rounding up
+  std::vector<uint32_t> words(word_num, 0);
+  DCHECK_GE(words.size() * sizeof(uint32_t), data_size * sizeof(char));
+  memcpy(words.data(), data.data(), data_size);
+
+#if defined(ARCH_CPU_BIG_ENDIAN)
+  // When packing chars into uint32_t, "abcd" may become 0x61626364 (big endian)
+  // or 0x64636261 (little endian). If big endian, swap everything, so we get
+  // the same values across platforms.
+  for (auto it = words.begin(); it != words.end(); ++it)
+    *it = base::ByteSwapToLE32(*it);
+#endif  // defined(ARCH_CPU_BIG_ENDIAN)
+
+  return words;
+}
+
+// static
+uint32_t VariationsMurmurHash::Hash(const std::vector<uint32_t>& data,
+                                    size_t length) {
+  DCHECK_LE(length, data.size() * sizeof(uint32_t));
+  uint32_t h1 = 0;
+
+  // body
+  size_t num_full_blocks = length / sizeof(uint32_t);
+  for (size_t i = 0; i < num_full_blocks; i++) {
+    uint32_t k1 = data[i];
+    k1 *= c1;
+    k1 = RotateLeft(k1, 15);
+    k1 *= c2;
+    h1 ^= k1;
+    h1 = RotateLeft(h1, 13);
+    h1 = h1 * 5 + 0xe6546b64;
+  }
+
+  // tail
+  uint32_t k1 = 0;
+  switch (length & 3) {
+    case 3:
+      k1 |= data[num_full_blocks] & 0xFF0000;
+      FALLTHROUGH;
+    case 2:
+      k1 |= data[num_full_blocks] & 0xFF00;
+      FALLTHROUGH;
+    case 1:
+      k1 |= data[num_full_blocks] & 0xFF;
+  }
+  k1 *= c1;
+  k1 = RotateLeft(k1, 15);
+  k1 *= c2;
+  h1 ^= k1;
+
+  // finalization
+  h1 ^= length;
+  h1 = FinalMix(h1);
+
+  return h1;
+}
+
+}  // namespace internal
+}  // namespace variations
diff --git a/components/variations/variations_murmur_hash.h b/components/variations/variations_murmur_hash.h
new file mode 100644
index 0000000..9ec77ad4
--- /dev/null
+++ b/components/variations/variations_murmur_hash.h
@@ -0,0 +1,84 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
+#define COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "base/compiler_specific.h"
+
+namespace variations {
+namespace internal {
+
+// Hash utilities for NormalizedMurmurHashEntropyProvider. For more info, see:
+// https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo
+class VariationsMurmurHash {
+ public:
+  // Prepares data to be hashed by VariationsMurmurHash: align and zero-pad to a
+  // multiple of 4 bytes, and produce the same uint32_t values regardless of
+  // platform endianness. ("abcd" will always become 0x64636261). Any padding
+  // will appear in the more-significant bytes of the last uint32_t.
+  static std::vector<uint32_t> StringToLE32(const std::string& data);
+
+  // Hash is a reimplementation of MurmurHash3_x86_32 from third_party/smhasher/
+  // which works on all architectures. MurmurHash3_x86_32 does unaligned reads
+  // (not generally safe on ARM) if the input bytes start on an unaligned
+  // address, and it assumes little-endianness. Hash produces the same result
+  // for the same input uint32_t values, regardless of platform endianness, and
+  // it produces the same results that MurmurHash3_x86_32 would produce on a
+  // little-endian platform.
+  //
+  // |length| is the number of bytes to hash. It mustn't exceed
+  // padded_data.size() * 4. If length % 4 != 0, Hash will consume the
+  // less-significant bytes of the last uint32_t first.
+  //
+  // MurmurHash3_x86_32 takes a seed, for which 0 is the typical value. Hash
+  // hard-codes the seed to 0, since NormalizedMurmurHashEntropyProvider doesn't
+  // use it.
+  static uint32_t Hash(const std::vector<uint32_t>& data, size_t length);
+
+  // A version of Hash which is specialized for exactly 2 bytes of data and
+  // allows a nonzero seed. NormalizedMurmurHashEntropyProvider calls this in a
+  // loop, |kMaxLowEntropySize| times per study, so it must be fast.
+  ALWAYS_INLINE static uint32_t Hash16(uint32_t seed, uint16_t data) {
+    uint32_t h1 = seed, k1 = data;
+
+    // tail
+    k1 *= c1;
+    k1 = RotateLeft(k1, 15);
+    k1 *= c2;
+    h1 ^= k1;
+
+    // finalization
+    h1 ^= 2;
+    h1 = FinalMix(h1);
+
+    return h1;
+  }
+
+ private:
+  static const uint32_t c1 = 0xcc9e2d51;
+  static const uint32_t c2 = 0x1b873593;
+
+  ALWAYS_INLINE static uint32_t RotateLeft(uint32_t x, int n) {
+    return (x << n) | (x >> (32 - n));
+  }
+
+  ALWAYS_INLINE static uint32_t FinalMix(uint32_t h) {
+    h ^= h >> 16;
+    h *= 0x85ebca6b;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35;
+    h ^= h >> 16;
+    return h;
+  }
+};
+
+}  // namespace internal
+}  // namespace variations
+
+#endif  // COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
diff --git a/components/variations/variations_murmur_hash_unittest.cc b/components/variations/variations_murmur_hash_unittest.cc
new file mode 100644
index 0000000..3059cd3
--- /dev/null
+++ b/components/variations/variations_murmur_hash_unittest.cc
@@ -0,0 +1,82 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/variations/variations_murmur_hash.h"
+
+#include <limits>
+#include <vector>
+
+#include "build/build_config.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/smhasher/src/MurmurHash3.h"
+
+namespace variations {
+namespace internal {
+
+TEST(VariationsMurmurHashTest, StringToLE32) {
+  EXPECT_EQ(std::vector<uint32_t>(),
+            VariationsMurmurHash::StringToLE32(""));
+  EXPECT_EQ(std::vector<uint32_t>({0x00000061}),
+            VariationsMurmurHash::StringToLE32("a"));
+  EXPECT_EQ(std::vector<uint32_t>({0x00006261}),
+            VariationsMurmurHash::StringToLE32("ab"));
+  EXPECT_EQ(std::vector<uint32_t>({0x00636261}),
+            VariationsMurmurHash::StringToLE32("abc"));
+  EXPECT_EQ(std::vector<uint32_t>({0x64636261}),
+            VariationsMurmurHash::StringToLE32("abcd"));
+  EXPECT_EQ(std::vector<uint32_t>({0x64636261, 0x00000065}),
+            VariationsMurmurHash::StringToLE32("abcde"));
+  EXPECT_EQ(std::vector<uint32_t>({0x64636261, 0x00006665}),
+            VariationsMurmurHash::StringToLE32("abcdef"));
+}
+
+// The tests inside this #if compare VariationsMurmurHash to the reference
+// implementation, MurmurHash3_x86_32, which only works on little-endian.
+#if defined(ARCH_CPU_LITTLE_ENDIAN)
+
+// Compare VariationsMurmurHash::Hash to MurmurHash3_x86_32 for every prefix of
+// |data|, from the empty string to all of |data|.
+TEST(VariationsMurmurHashTest, Hash) {
+  // Random bytes generated manually and hard-coded for reproducability
+  const std::vector<uint32_t> data({
+      2704264845, 2929902289, 1679431515, 1427187834, 1300338468,
+       576307953, 1209988079, 1918627109, 3926412991,   74087765});
+
+  size_t max_size = data.size() * sizeof(uint32_t);
+  for (size_t size = 0; size <= max_size; size++) {
+    uint32_t expected;
+    MurmurHash3_x86_32(data.data(), size, /*seed=*/0, &expected);
+    EXPECT_EQ(expected, VariationsMurmurHash::Hash(data, size))
+        << "size=" << size;
+  }
+}
+
+TEST(VariationsMurmurHashTest, Hash16) {
+  // Pick some likely edge case values.
+  constexpr uint32_t max32 = std::numeric_limits<uint32_t>::max();
+  uint32_t seeds[] = {
+    0, max32 / 2 - 1, max32 - 2,
+    1, max32 / 2,     max32 - 1,
+    2, max32 / 2 + 1, max32};
+
+  constexpr uint16_t max16 = std::numeric_limits<uint16_t>::max();
+  uint16_t data[] = {
+    0, max16 / 2 - 1, max16 - 2,
+    1, max16 / 2,     max16 - 1,
+    2, max16 / 2 + 1, max16};
+
+  for (uint32_t seed : seeds) {
+    for (uint16_t datum : data) {
+      uint32_t expected;
+      MurmurHash3_x86_32(&datum, sizeof(datum), seed, &expected);
+      EXPECT_EQ(expected, VariationsMurmurHash::Hash16(seed, datum))
+          << "seed=" << seed << ", datum=" << datum;
+    }
+  }
+}
+
+#endif  // defined(ARCH_CPU_LITTLE_ENDIAN)
+
+}  // namespace internal
+}  // namespace variations