Implement NormalizedMurmurHashEntropyProvider
Design document:
https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo
BUG=890413
Change-Id: Ib372a573b1a0f68467f785ce74ef7821c9d48614
Reviewed-on: https://chromium-review.googlesource.com/c/1322350
Reviewed-by: Grace Kloba <[email protected]>
Reviewed-by: Alexei Svitkine <[email protected]>
Commit-Queue: Paul Miller <[email protected]>
Cr-Commit-Position: refs/heads/master@{#607816}
diff --git a/components/variations/BUILD.gn b/components/variations/BUILD.gn
index 5b535dd..6edab521 100644
--- a/components/variations/BUILD.gn
+++ b/components/variations/BUILD.gn
@@ -58,6 +58,8 @@
"variations_http_header_provider.h",
"variations_id_collection.cc",
"variations_id_collection.h",
+ "variations_murmur_hash.cc",
+ "variations_murmur_hash.h",
"variations_request_scheduler.cc",
"variations_request_scheduler.h",
"variations_seed_processor.cc",
@@ -147,6 +149,7 @@
"variations_crash_keys_unittest.cc",
"variations_http_header_provider_unittest.cc",
"variations_id_collection_unittest.cc",
+ "variations_murmur_hash_unittest.cc",
"variations_request_scheduler_unittest.cc",
"variations_seed_processor_unittest.cc",
"variations_seed_simulator_unittest.cc",
@@ -167,6 +170,7 @@
"//components/variations/field_trial_config:field_trial_config",
"//testing/gmock",
"//testing/gtest",
+ "//third_party/smhasher:murmurhash3",
"//third_party/zlib/google:compression_utils",
]
}
diff --git a/components/variations/DEPS b/components/variations/DEPS
index 9f3a043..2e7e5d4 100644
--- a/components/variations/DEPS
+++ b/components/variations/DEPS
@@ -9,5 +9,6 @@
"+crypto",
"-net",
"+third_party/protobuf",
+ "+third_party/smhasher",
"+third_party/zlib/google",
]
diff --git a/components/variations/entropy_provider.cc b/components/variations/entropy_provider.cc
index 897b4fa6..cea9aa6e 100644
--- a/components/variations/entropy_provider.cc
+++ b/components/variations/entropy_provider.cc
@@ -14,6 +14,7 @@
#include "base/strings/string_number_conversions.h"
#include "base/sys_byteorder.h"
#include "components/variations/hashing.h"
+#include "components/variations/variations_murmur_hash.h"
namespace variations {
@@ -132,4 +133,41 @@
return mapping[low_entropy_source_];
}
+NormalizedMurmurHashEntropyProvider::NormalizedMurmurHashEntropyProvider(
+ uint16_t low_entropy_source,
+ size_t low_entropy_source_max)
+ : low_entropy_source_(low_entropy_source),
+ low_entropy_source_max_(low_entropy_source_max) {
+ DCHECK_LT(low_entropy_source, low_entropy_source_max);
+ DCHECK_LE(low_entropy_source_max, std::numeric_limits<uint16_t>::max());
+}
+
+NormalizedMurmurHashEntropyProvider::~NormalizedMurmurHashEntropyProvider() {}
+
+double NormalizedMurmurHashEntropyProvider::GetEntropyForTrial(
+ const std::string& trial_name,
+ uint32_t randomization_seed) const {
+ if (randomization_seed == 0) {
+ randomization_seed = internal::VariationsMurmurHash::Hash(
+ internal::VariationsMurmurHash::StringToLE32(trial_name),
+ trial_name.length());
+ }
+
+ uint32_t x = internal::VariationsMurmurHash::Hash16(randomization_seed,
+ low_entropy_source_);
+ int x_ordinal = 0;
+ for (uint32_t i = 0; i < low_entropy_source_max_; i++) {
+ uint32_t y = internal::VariationsMurmurHash::Hash16(randomization_seed, i);
+ x_ordinal += (y < x);
+ }
+
+ DCHECK_GE(x_ordinal, 0);
+ // There must have been at least one iteration where |x| == |y|, because
+ // |i| == |low_entropy_source_|, and |x_ordinal| was not incremented in that
+ // iteration, so |x_ordinal| < |low_entropy_source_max_|.
+ DCHECK_LT(static_cast<size_t>(x_ordinal), low_entropy_source_max_);
+
+ return static_cast<double>(x_ordinal) / low_entropy_source_max_;
+}
+
} // namespace variations
diff --git a/components/variations/entropy_provider.h b/components/variations/entropy_provider.h
index 29001f1..4c32bdb 100644
--- a/components/variations/entropy_provider.h
+++ b/components/variations/entropy_provider.h
@@ -58,7 +58,7 @@
uint32_t randomization_seed) const override;
private:
- std::string entropy_source_;
+ const std::string entropy_source_;
DISALLOW_COPY_AND_ASSIGN(SHA1EntropyProvider);
};
@@ -86,12 +86,36 @@
virtual uint16_t GetPermutedValue(uint32_t randomization_seed) const;
private:
- uint16_t low_entropy_source_;
- size_t low_entropy_source_max_;
+ const uint16_t low_entropy_source_;
+ const size_t low_entropy_source_max_;
DISALLOW_COPY_AND_ASSIGN(PermutedEntropyProvider);
};
+// NormalizedMurmurHashEntropyProvider is an entropy provider suitable for low
+// entropy sources (below 16 bits). It uses MurmurHash3_32 to hash the study
+// name along with all possible low entropy sources. It finds the index where
+// the actual low entropy source's hash would fall in the sorted list of all
+// those hashes, and uses that as the final value. For more info, see:
+// https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo
+class NormalizedMurmurHashEntropyProvider
+ : public base::FieldTrial::EntropyProvider {
+ public:
+ NormalizedMurmurHashEntropyProvider(uint16_t low_entropy_source,
+ size_t low_entropy_source_max);
+ ~NormalizedMurmurHashEntropyProvider() override;
+
+ // base::FieldTrial::EntropyProvider:
+ double GetEntropyForTrial(const std::string& trial_name,
+ uint32_t randomization_seed) const override;
+
+ private:
+ const uint16_t low_entropy_source_;
+ const size_t low_entropy_source_max_;
+
+ DISALLOW_COPY_AND_ASSIGN(NormalizedMurmurHashEntropyProvider);
+};
+
} // namespace variations
#endif // COMPONENTS_VARIATIONS_ENTROPY_PROVIDER_H_
diff --git a/components/variations/entropy_provider_unittest.cc b/components/variations/entropy_provider_unittest.cc
index 70db5c58..0f77af3 100644
--- a/components/variations/entropy_provider_unittest.cc
+++ b/components/variations/entropy_provider_unittest.cc
@@ -64,6 +64,15 @@
return permuted_provider.GetEntropyForTrial(trial_name, 0);
}
+// Generates normalized MurmurHash-based entropy for the given |trial_name|
+// based on |entropy_source| which must be in the range [0, entropy_max).
+double GenerateNormalizedMurmurHashEntropy(uint16_t entropy_source,
+ size_t entropy_max,
+ const std::string& trial_name) {
+ NormalizedMurmurHashEntropyProvider provider(entropy_source, entropy_max);
+ return provider.GetEntropyForTrial(trial_name, 0);
+}
+
// Make a vector of consecutive integers for shuffling.
std::vector<uint16_t> MakeRange(size_t vector_size) {
std::vector<uint16_t> range(vector_size);
@@ -105,7 +114,7 @@
}
private:
- std::string trial_name_;
+ const std::string trial_name_;
DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
};
@@ -139,6 +148,28 @@
DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
};
+// An TrialEntropyGenerator that uses the normalized MurmurHash entropy provider
+// algorithm, using 13-bit low entropy source values.
+class NormalizedMurmurHashEntropyGenerator : public TrialEntropyGenerator {
+ public:
+ explicit NormalizedMurmurHashEntropyGenerator(const std::string& trial_name)
+ : trial_name_(trial_name) {}
+
+ ~NormalizedMurmurHashEntropyGenerator() override {}
+
+ double GenerateEntropyValue() const override {
+ const int low_entropy_source =
+ static_cast<uint16_t>(base::RandInt(0, kMaxLowEntropySize - 1));
+ return GenerateNormalizedMurmurHashEntropy(low_entropy_source,
+ kMaxLowEntropySize, trial_name_);
+ }
+
+ private:
+ const std::string trial_name_;
+
+ DISALLOW_COPY_AND_ASSIGN(NormalizedMurmurHashEntropyGenerator);
+};
+
// Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
// of Fit Test.
void PerformEntropyUniformityTest(
@@ -197,8 +228,8 @@
// that have different names, normally generate different results.
//
// Note that depending on the one-time random initialization, they
- // _might_ actually give the same result, but we know that given
- // the particular client_id we use for unit tests they won't.
+ // _might_ actually give the same result, but we know that given the
+ // particular client_id we use for unit tests they won't.
base::FieldTrialList field_trial_list(
std::make_unique<SHA1EntropyProvider>("client_id"));
const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
@@ -227,8 +258,8 @@
// that have different names, normally generate different results.
//
// Note that depending on the one-time random initialization, they
- // _might_ actually give the same result, but we know that given
- // the particular client_id we use for unit tests they won't.
+ // _might_ actually give the same result, but we know that given the
+ // particular low_entropy_source we use for unit tests they won't.
base::FieldTrialList field_trial_list(
std::make_unique<PermutedEntropyProvider>(1234, kMaxLowEntropySize));
const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
@@ -252,6 +283,37 @@
EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
}
+TEST(EntropyProviderTest, UseOneTimeRandomizationNormalizedMurmurHash) {
+ // Simply asserts that two trials using one-time randomization
+ // that have different names, normally generate different results.
+ //
+ // Note that depending on the one-time random initialization, they
+ // _might_ actually give the same result, but we know that given
+ // the particular low_entropy_source we use for unit tests they won't.
+ base::FieldTrialList field_trial_list(
+ std::make_unique<NormalizedMurmurHashEntropyProvider>(
+ 1234, kMaxLowEntropySize));
+ const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
+ scoped_refptr<base::FieldTrial> trials[] = {
+ base::FieldTrialList::FactoryGetFieldTrial(
+ "one", 100, "default", kNoExpirationYear, 1, 1,
+ base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr),
+ base::FieldTrialList::FactoryGetFieldTrial(
+ "two", 100, "default", kNoExpirationYear, 1, 1,
+ base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr),
+ };
+
+ for (size_t i = 0; i < base::size(trials); ++i) {
+ for (int j = 0; j < 100; ++j)
+ trials[i]->AppendGroup(std::string(), 1);
+ }
+
+ // The trials are most likely to give different results since they have
+ // different names.
+ EXPECT_NE(trials[0]->group(), trials[1]->group());
+ EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
+}
+
TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
// Ensures that two trials with different names but the same custom seed used
// for one time randomization produce the same group assignments.
@@ -306,6 +368,35 @@
EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
}
+TEST(EntropyProviderTest,
+ UseOneTimeRandomizationWithCustomSeedNormalizedMurmurHash) {
+ // Ensures that two trials with different names but the same custom seed used
+ // for one time randomization produce the same group assignments.
+ base::FieldTrialList field_trial_list(
+ std::make_unique<NormalizedMurmurHashEntropyProvider>(
+ 1234, kMaxLowEntropySize));
+ const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
+ const uint32_t kCustomSeed = 9001;
+ scoped_refptr<base::FieldTrial> trials[] = {
+ base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
+ "one", 100, "default", kNoExpirationYear, 1, 1,
+ base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, nullptr, nullptr),
+ base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
+ "two", 100, "default", kNoExpirationYear, 1, 1,
+ base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, nullptr, nullptr),
+ };
+
+ for (size_t i = 0; i < base::size(trials); ++i) {
+ for (int j = 0; j < 100; ++j)
+ trials[i]->AppendGroup(std::string(), 1);
+ }
+
+ // Normally, these trials should produce different groups, but if the same
+ // custom seed is used, they should produce the same group assignment.
+ EXPECT_EQ(trials[0]->group(), trials[1]->group());
+ EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
+}
+
TEST(EntropyProviderTest, SHA1Entropy) {
const double results[] = { GenerateSHA1Entropy("hi", "1"),
GenerateSHA1Entropy("there", "1") };
@@ -339,6 +430,25 @@
GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
}
+TEST(EntropyProviderTest, NormalizedMurmurHashEntropy) {
+ const double results[] = {
+ GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"),
+ GenerateNormalizedMurmurHashEntropy(4321, kMaxLowEntropySize, "1")};
+
+ EXPECT_NE(results[0], results[1]);
+ for (size_t i = 0; i < base::size(results); ++i) {
+ EXPECT_LE(0.0, results[i]);
+ EXPECT_GT(1.0, results[i]);
+ }
+
+ EXPECT_EQ(GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"),
+ GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"));
+ EXPECT_NE(GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize,
+ "something"),
+ GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize,
+ "else"));
+}
+
TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
// Verifies that PermutedEntropyProvider produces expected results. This
// ensures that the results are the same between platforms and ensures that
@@ -352,6 +462,23 @@
GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
}
+TEST(EntropyProviderTest, NormalizedMurmurHashEntropyProviderResults) {
+ // Verifies that NormalizedMurmurHashEntropyProvider produces expected
+ // results. This ensures that the results are the same between platforms and
+ // ensures that changes to the implementation do not regress this
+ // accidentally.
+
+ EXPECT_DOUBLE_EQ(
+ 1612 / static_cast<double>(kMaxLowEntropySize),
+ GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "XYZ"));
+ EXPECT_DOUBLE_EQ(
+ 7066 / static_cast<double>(kMaxLowEntropySize),
+ GenerateNormalizedMurmurHashEntropy(1, kMaxLowEntropySize, "Test"));
+ EXPECT_DOUBLE_EQ(
+ 5668 / static_cast<double>(kMaxLowEntropySize),
+ GenerateNormalizedMurmurHashEntropy(5000, kMaxLowEntropySize, "Foo"));
+}
+
TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
for (size_t i = 0; i < base::size(kTestTrialNames); ++i) {
SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
@@ -366,6 +493,13 @@
}
}
+TEST(EntropyProviderTest, NormalizedMurmurHashEntropyIsUniform) {
+ for (size_t i = 0; i < base::size(kTestTrialNames); ++i) {
+ NormalizedMurmurHashEntropyGenerator entropy_generator(kTestTrialNames[i]);
+ PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
+ }
+}
+
TEST(EntropyProviderTest, PermutedEntropyConsistency) {
std::vector<uint16_t> to_shuffle = MakeRange(10);
std::vector<uint16_t> expected = {7, 6, 8, 3, 2, 0, 1, 4, 9, 5};
diff --git a/components/variations/variations_murmur_hash.cc b/components/variations/variations_murmur_hash.cc
new file mode 100644
index 0000000..661dbf7
--- /dev/null
+++ b/components/variations/variations_murmur_hash.cc
@@ -0,0 +1,81 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/variations/variations_murmur_hash.h"
+
+#include "base/compiler_specific.h"
+#include "base/sys_byteorder.h"
+#include "build/build_config.h"
+
+#if !(defined(ARCH_CPU_LITTLE_ENDIAN) || defined(ARCH_CPU_BIG_ENDIAN))
+#error "unknown endianness"
+#endif
+
+namespace variations {
+namespace internal {
+
+// static
+std::vector<uint32_t> VariationsMurmurHash::StringToLE32(
+ const std::string& data) {
+ const size_t data_size = data.size();
+ const size_t word_num = (data_size + 3) / 4; // data_size / 4, rounding up
+ std::vector<uint32_t> words(word_num, 0);
+ DCHECK_GE(words.size() * sizeof(uint32_t), data_size * sizeof(char));
+ memcpy(words.data(), data.data(), data_size);
+
+#if defined(ARCH_CPU_BIG_ENDIAN)
+ // When packing chars into uint32_t, "abcd" may become 0x61626364 (big endian)
+ // or 0x64636261 (little endian). If big endian, swap everything, so we get
+ // the same values across platforms.
+ for (auto it = words.begin(); it != words.end(); ++it)
+ *it = base::ByteSwapToLE32(*it);
+#endif // defined(ARCH_CPU_BIG_ENDIAN)
+
+ return words;
+}
+
+// static
+uint32_t VariationsMurmurHash::Hash(const std::vector<uint32_t>& data,
+ size_t length) {
+ DCHECK_LE(length, data.size() * sizeof(uint32_t));
+ uint32_t h1 = 0;
+
+ // body
+ size_t num_full_blocks = length / sizeof(uint32_t);
+ for (size_t i = 0; i < num_full_blocks; i++) {
+ uint32_t k1 = data[i];
+ k1 *= c1;
+ k1 = RotateLeft(k1, 15);
+ k1 *= c2;
+ h1 ^= k1;
+ h1 = RotateLeft(h1, 13);
+ h1 = h1 * 5 + 0xe6546b64;
+ }
+
+ // tail
+ uint32_t k1 = 0;
+ switch (length & 3) {
+ case 3:
+ k1 |= data[num_full_blocks] & 0xFF0000;
+ FALLTHROUGH;
+ case 2:
+ k1 |= data[num_full_blocks] & 0xFF00;
+ FALLTHROUGH;
+ case 1:
+ k1 |= data[num_full_blocks] & 0xFF;
+ }
+ k1 *= c1;
+ k1 = RotateLeft(k1, 15);
+ k1 *= c2;
+ h1 ^= k1;
+
+ // finalization
+ h1 ^= length;
+ h1 = FinalMix(h1);
+
+ return h1;
+}
+
+} // namespace internal
+} // namespace variations
diff --git a/components/variations/variations_murmur_hash.h b/components/variations/variations_murmur_hash.h
new file mode 100644
index 0000000..9ec77ad4
--- /dev/null
+++ b/components/variations/variations_murmur_hash.h
@@ -0,0 +1,84 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
+#define COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "base/compiler_specific.h"
+
+namespace variations {
+namespace internal {
+
+// Hash utilities for NormalizedMurmurHashEntropyProvider. For more info, see:
+// https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo
+class VariationsMurmurHash {
+ public:
+ // Prepares data to be hashed by VariationsMurmurHash: align and zero-pad to a
+ // multiple of 4 bytes, and produce the same uint32_t values regardless of
+ // platform endianness. ("abcd" will always become 0x64636261). Any padding
+ // will appear in the more-significant bytes of the last uint32_t.
+ static std::vector<uint32_t> StringToLE32(const std::string& data);
+
+ // Hash is a reimplementation of MurmurHash3_x86_32 from third_party/smhasher/
+ // which works on all architectures. MurmurHash3_x86_32 does unaligned reads
+ // (not generally safe on ARM) if the input bytes start on an unaligned
+ // address, and it assumes little-endianness. Hash produces the same result
+ // for the same input uint32_t values, regardless of platform endianness, and
+ // it produces the same results that MurmurHash3_x86_32 would produce on a
+ // little-endian platform.
+ //
+ // |length| is the number of bytes to hash. It mustn't exceed
+ // padded_data.size() * 4. If length % 4 != 0, Hash will consume the
+ // less-significant bytes of the last uint32_t first.
+ //
+ // MurmurHash3_x86_32 takes a seed, for which 0 is the typical value. Hash
+ // hard-codes the seed to 0, since NormalizedMurmurHashEntropyProvider doesn't
+ // use it.
+ static uint32_t Hash(const std::vector<uint32_t>& data, size_t length);
+
+ // A version of Hash which is specialized for exactly 2 bytes of data and
+ // allows a nonzero seed. NormalizedMurmurHashEntropyProvider calls this in a
+ // loop, |kMaxLowEntropySize| times per study, so it must be fast.
+ ALWAYS_INLINE static uint32_t Hash16(uint32_t seed, uint16_t data) {
+ uint32_t h1 = seed, k1 = data;
+
+ // tail
+ k1 *= c1;
+ k1 = RotateLeft(k1, 15);
+ k1 *= c2;
+ h1 ^= k1;
+
+ // finalization
+ h1 ^= 2;
+ h1 = FinalMix(h1);
+
+ return h1;
+ }
+
+ private:
+ static const uint32_t c1 = 0xcc9e2d51;
+ static const uint32_t c2 = 0x1b873593;
+
+ ALWAYS_INLINE static uint32_t RotateLeft(uint32_t x, int n) {
+ return (x << n) | (x >> (32 - n));
+ }
+
+ ALWAYS_INLINE static uint32_t FinalMix(uint32_t h) {
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+ return h;
+ }
+};
+
+} // namespace internal
+} // namespace variations
+
+#endif // COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
diff --git a/components/variations/variations_murmur_hash_unittest.cc b/components/variations/variations_murmur_hash_unittest.cc
new file mode 100644
index 0000000..3059cd3
--- /dev/null
+++ b/components/variations/variations_murmur_hash_unittest.cc
@@ -0,0 +1,82 @@
+// Copyright 2018 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/variations/variations_murmur_hash.h"
+
+#include <limits>
+#include <vector>
+
+#include "build/build_config.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/smhasher/src/MurmurHash3.h"
+
+namespace variations {
+namespace internal {
+
+TEST(VariationsMurmurHashTest, StringToLE32) {
+ EXPECT_EQ(std::vector<uint32_t>(),
+ VariationsMurmurHash::StringToLE32(""));
+ EXPECT_EQ(std::vector<uint32_t>({0x00000061}),
+ VariationsMurmurHash::StringToLE32("a"));
+ EXPECT_EQ(std::vector<uint32_t>({0x00006261}),
+ VariationsMurmurHash::StringToLE32("ab"));
+ EXPECT_EQ(std::vector<uint32_t>({0x00636261}),
+ VariationsMurmurHash::StringToLE32("abc"));
+ EXPECT_EQ(std::vector<uint32_t>({0x64636261}),
+ VariationsMurmurHash::StringToLE32("abcd"));
+ EXPECT_EQ(std::vector<uint32_t>({0x64636261, 0x00000065}),
+ VariationsMurmurHash::StringToLE32("abcde"));
+ EXPECT_EQ(std::vector<uint32_t>({0x64636261, 0x00006665}),
+ VariationsMurmurHash::StringToLE32("abcdef"));
+}
+
+// The tests inside this #if compare VariationsMurmurHash to the reference
+// implementation, MurmurHash3_x86_32, which only works on little-endian.
+#if defined(ARCH_CPU_LITTLE_ENDIAN)
+
+// Compare VariationsMurmurHash::Hash to MurmurHash3_x86_32 for every prefix of
+// |data|, from the empty string to all of |data|.
+TEST(VariationsMurmurHashTest, Hash) {
+ // Random bytes generated manually and hard-coded for reproducability
+ const std::vector<uint32_t> data({
+ 2704264845, 2929902289, 1679431515, 1427187834, 1300338468,
+ 576307953, 1209988079, 1918627109, 3926412991, 74087765});
+
+ size_t max_size = data.size() * sizeof(uint32_t);
+ for (size_t size = 0; size <= max_size; size++) {
+ uint32_t expected;
+ MurmurHash3_x86_32(data.data(), size, /*seed=*/0, &expected);
+ EXPECT_EQ(expected, VariationsMurmurHash::Hash(data, size))
+ << "size=" << size;
+ }
+}
+
+TEST(VariationsMurmurHashTest, Hash16) {
+ // Pick some likely edge case values.
+ constexpr uint32_t max32 = std::numeric_limits<uint32_t>::max();
+ uint32_t seeds[] = {
+ 0, max32 / 2 - 1, max32 - 2,
+ 1, max32 / 2, max32 - 1,
+ 2, max32 / 2 + 1, max32};
+
+ constexpr uint16_t max16 = std::numeric_limits<uint16_t>::max();
+ uint16_t data[] = {
+ 0, max16 / 2 - 1, max16 - 2,
+ 1, max16 / 2, max16 - 1,
+ 2, max16 / 2 + 1, max16};
+
+ for (uint32_t seed : seeds) {
+ for (uint16_t datum : data) {
+ uint32_t expected;
+ MurmurHash3_x86_32(&datum, sizeof(datum), seed, &expected);
+ EXPECT_EQ(expected, VariationsMurmurHash::Hash16(seed, datum))
+ << "seed=" << seed << ", datum=" << datum;
+ }
+ }
+}
+
+#endif // defined(ARCH_CPU_LITTLE_ENDIAN)
+
+} // namespace internal
+} // namespace variations