blob: 9ec77ad416e78b52ab2cf3833b8a9586a66e6e6e [file] [log] [blame]
Paul Miller7c0efea2018-11-13 23:49:001// Copyright 2018 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
6#define COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
7
8#include <cstdint>
9#include <string>
10#include <vector>
11
12#include "base/compiler_specific.h"
13
14namespace variations {
15namespace internal {
16
17// Hash utilities for NormalizedMurmurHashEntropyProvider. For more info, see:
18// https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo
19class VariationsMurmurHash {
20 public:
21 // Prepares data to be hashed by VariationsMurmurHash: align and zero-pad to a
22 // multiple of 4 bytes, and produce the same uint32_t values regardless of
23 // platform endianness. ("abcd" will always become 0x64636261). Any padding
24 // will appear in the more-significant bytes of the last uint32_t.
25 static std::vector<uint32_t> StringToLE32(const std::string& data);
26
27 // Hash is a reimplementation of MurmurHash3_x86_32 from third_party/smhasher/
28 // which works on all architectures. MurmurHash3_x86_32 does unaligned reads
29 // (not generally safe on ARM) if the input bytes start on an unaligned
30 // address, and it assumes little-endianness. Hash produces the same result
31 // for the same input uint32_t values, regardless of platform endianness, and
32 // it produces the same results that MurmurHash3_x86_32 would produce on a
33 // little-endian platform.
34 //
35 // |length| is the number of bytes to hash. It mustn't exceed
36 // padded_data.size() * 4. If length % 4 != 0, Hash will consume the
37 // less-significant bytes of the last uint32_t first.
38 //
39 // MurmurHash3_x86_32 takes a seed, for which 0 is the typical value. Hash
40 // hard-codes the seed to 0, since NormalizedMurmurHashEntropyProvider doesn't
41 // use it.
42 static uint32_t Hash(const std::vector<uint32_t>& data, size_t length);
43
44 // A version of Hash which is specialized for exactly 2 bytes of data and
45 // allows a nonzero seed. NormalizedMurmurHashEntropyProvider calls this in a
46 // loop, |kMaxLowEntropySize| times per study, so it must be fast.
47 ALWAYS_INLINE static uint32_t Hash16(uint32_t seed, uint16_t data) {
48 uint32_t h1 = seed, k1 = data;
49
50 // tail
51 k1 *= c1;
52 k1 = RotateLeft(k1, 15);
53 k1 *= c2;
54 h1 ^= k1;
55
56 // finalization
57 h1 ^= 2;
58 h1 = FinalMix(h1);
59
60 return h1;
61 }
62
63 private:
64 static const uint32_t c1 = 0xcc9e2d51;
65 static const uint32_t c2 = 0x1b873593;
66
67 ALWAYS_INLINE static uint32_t RotateLeft(uint32_t x, int n) {
68 return (x << n) | (x >> (32 - n));
69 }
70
71 ALWAYS_INLINE static uint32_t FinalMix(uint32_t h) {
72 h ^= h >> 16;
73 h *= 0x85ebca6b;
74 h ^= h >> 13;
75 h *= 0xc2b2ae35;
76 h ^= h >> 16;
77 return h;
78 }
79};
80
81} // namespace internal
82} // namespace variations
83
84#endif // COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_