Paul Miller | 7c0efea | 2018-11-13 23:49:00 | [diff] [blame] | 1 | // Copyright 2018 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_ |
| 6 | #define COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_ |
| 7 | |
| 8 | #include <cstdint> |
| 9 | #include <string> |
| 10 | #include <vector> |
| 11 | |
| 12 | #include "base/compiler_specific.h" |
| 13 | |
| 14 | namespace variations { |
| 15 | namespace internal { |
| 16 | |
| 17 | // Hash utilities for NormalizedMurmurHashEntropyProvider. For more info, see: |
| 18 | // https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo |
| 19 | class VariationsMurmurHash { |
| 20 | public: |
| 21 | // Prepares data to be hashed by VariationsMurmurHash: align and zero-pad to a |
| 22 | // multiple of 4 bytes, and produce the same uint32_t values regardless of |
| 23 | // platform endianness. ("abcd" will always become 0x64636261). Any padding |
| 24 | // will appear in the more-significant bytes of the last uint32_t. |
| 25 | static std::vector<uint32_t> StringToLE32(const std::string& data); |
| 26 | |
| 27 | // Hash is a reimplementation of MurmurHash3_x86_32 from third_party/smhasher/ |
| 28 | // which works on all architectures. MurmurHash3_x86_32 does unaligned reads |
| 29 | // (not generally safe on ARM) if the input bytes start on an unaligned |
| 30 | // address, and it assumes little-endianness. Hash produces the same result |
| 31 | // for the same input uint32_t values, regardless of platform endianness, and |
| 32 | // it produces the same results that MurmurHash3_x86_32 would produce on a |
| 33 | // little-endian platform. |
| 34 | // |
| 35 | // |length| is the number of bytes to hash. It mustn't exceed |
| 36 | // padded_data.size() * 4. If length % 4 != 0, Hash will consume the |
| 37 | // less-significant bytes of the last uint32_t first. |
| 38 | // |
| 39 | // MurmurHash3_x86_32 takes a seed, for which 0 is the typical value. Hash |
| 40 | // hard-codes the seed to 0, since NormalizedMurmurHashEntropyProvider doesn't |
| 41 | // use it. |
| 42 | static uint32_t Hash(const std::vector<uint32_t>& data, size_t length); |
| 43 | |
| 44 | // A version of Hash which is specialized for exactly 2 bytes of data and |
| 45 | // allows a nonzero seed. NormalizedMurmurHashEntropyProvider calls this in a |
| 46 | // loop, |kMaxLowEntropySize| times per study, so it must be fast. |
| 47 | ALWAYS_INLINE static uint32_t Hash16(uint32_t seed, uint16_t data) { |
| 48 | uint32_t h1 = seed, k1 = data; |
| 49 | |
| 50 | // tail |
| 51 | k1 *= c1; |
| 52 | k1 = RotateLeft(k1, 15); |
| 53 | k1 *= c2; |
| 54 | h1 ^= k1; |
| 55 | |
| 56 | // finalization |
| 57 | h1 ^= 2; |
| 58 | h1 = FinalMix(h1); |
| 59 | |
| 60 | return h1; |
| 61 | } |
| 62 | |
| 63 | private: |
| 64 | static const uint32_t c1 = 0xcc9e2d51; |
| 65 | static const uint32_t c2 = 0x1b873593; |
| 66 | |
| 67 | ALWAYS_INLINE static uint32_t RotateLeft(uint32_t x, int n) { |
| 68 | return (x << n) | (x >> (32 - n)); |
| 69 | } |
| 70 | |
| 71 | ALWAYS_INLINE static uint32_t FinalMix(uint32_t h) { |
| 72 | h ^= h >> 16; |
| 73 | h *= 0x85ebca6b; |
| 74 | h ^= h >> 13; |
| 75 | h *= 0xc2b2ae35; |
| 76 | h ^= h >> 16; |
| 77 | return h; |
| 78 | } |
| 79 | }; |
| 80 | |
| 81 | } // namespace internal |
| 82 | } // namespace variations |
| 83 | |
| 84 | #endif // COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_ |