Philippe Hamel | cb63f2a | 2017-10-18 19:51:28 | [diff] [blame] | 1 | // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
Tom Anderson | 9f63f94 | 2018-01-27 20:03:37 | [diff] [blame] | 5 | #include <math.h> |
| 6 | |
Philippe Hamel | 6956483 | 2017-11-22 20:39:48 | [diff] [blame] | 7 | #include "components/assist_ranker/ranker_example_util.h" |
Charles Zhao | 100d8c7 | 2018-01-18 00:23:25 | [diff] [blame] | 8 | #include "base/bit_cast.h" |
Philippe Hamel | c97535b | 2017-12-15 19:15:40 | [diff] [blame] | 9 | #include "base/format_macros.h" |
Philippe Hamel | d324d5db | 2017-10-19 20:09:51 | [diff] [blame] | 10 | #include "base/logging.h" |
Philippe Hamel | c97535b | 2017-12-15 19:15:40 | [diff] [blame] | 11 | #include "base/metrics/metrics_hashes.h" |
| 12 | #include "base/strings/stringprintf.h" |
Philippe Hamel | cb63f2a | 2017-10-18 19:51:28 | [diff] [blame] | 13 | |
Philippe Hamel | 6956483 | 2017-11-22 20:39:48 | [diff] [blame] | 14 | namespace assist_ranker { |
Charles Zhao | 100d8c7 | 2018-01-18 00:23:25 | [diff] [blame] | 15 | namespace { |
| 16 | const uint64_t MASK32Bits = (1LL << 32) - 1; |
| 17 | constexpr int kFloatMainDigits = 23; |
| 18 | // Returns lower 32 bits of the hash of the input. |
| 19 | int32_t StringToIntBits(const std::string& str) { |
| 20 | return base::HashMetricName(str) & MASK32Bits; |
| 21 | } |
| 22 | |
| 23 | // Converts float to int32 |
| 24 | int32_t FloatToIntBits(float f) { |
| 25 | if (std::numeric_limits<float>::is_iec559) { |
| 26 | // Directly bit_cast if float follows ieee754 standard. |
| 27 | return bit_cast<int32_t>(f); |
| 28 | } else { |
| 29 | // Otherwise, manually calculate sign, exp and mantissa. |
| 30 | // For sign. |
| 31 | const uint32_t sign = f < 0; |
| 32 | |
| 33 | // For exponent. |
| 34 | int exp; |
| 35 | f = std::abs(std::frexp(f, &exp)); |
| 36 | // Add 126 to get non-negative format of exp. |
| 37 | // This should not be 127 because the return of frexp is different from |
| 38 | // ieee754 with a multiple of 2. |
| 39 | const uint32_t exp_u = exp + 126; |
| 40 | |
| 41 | // Get mantissa. |
| 42 | const uint32_t mantissa = std::ldexp(f * 2.0f - 1.0f, kFloatMainDigits); |
| 43 | // Set each bits and return. |
| 44 | return (sign << 31) | (exp_u << kFloatMainDigits) | mantissa; |
| 45 | } |
| 46 | } |
| 47 | |
| 48 | // Pair type, value and index into one int64. |
| 49 | int64_t PairInt(const uint64_t type, |
| 50 | const uint32_t value, |
| 51 | const uint64_t index) { |
| 52 | return (type << 56) | (index << 32) | static_cast<uint64_t>(value); |
| 53 | } |
| 54 | |
| 55 | } // namespace |
Philippe Hamel | cb63f2a | 2017-10-18 19:51:28 | [diff] [blame] | 56 | |
Philippe Hamel | cb63f2a | 2017-10-18 19:51:28 | [diff] [blame] | 57 | bool SafeGetFeature(const std::string& key, |
| 58 | const RankerExample& example, |
| 59 | Feature* feature) { |
| 60 | auto p_feature = example.features().find(key); |
| 61 | if (p_feature != example.features().end()) { |
Philippe Hamel | d324d5db | 2017-10-19 20:09:51 | [diff] [blame] | 62 | if (feature) |
| 63 | *feature = p_feature->second; |
Philippe Hamel | cb63f2a | 2017-10-18 19:51:28 | [diff] [blame] | 64 | return true; |
| 65 | } |
| 66 | return false; |
| 67 | } |
| 68 | |
| 69 | bool GetFeatureValueAsFloat(const std::string& key, |
| 70 | const RankerExample& example, |
| 71 | float* value) { |
| 72 | Feature feature; |
| 73 | if (!SafeGetFeature(key, example, &feature)) { |
| 74 | return false; |
| 75 | } |
| 76 | switch (feature.feature_type_case()) { |
| 77 | case Feature::kBoolValue: |
| 78 | *value = static_cast<float>(feature.bool_value()); |
| 79 | break; |
Philippe Hamel | d324d5db | 2017-10-19 20:09:51 | [diff] [blame] | 80 | case Feature::kInt32Value: |
| 81 | *value = static_cast<float>(feature.int32_value()); |
Philippe Hamel | cb63f2a | 2017-10-18 19:51:28 | [diff] [blame] | 82 | break; |
| 83 | case Feature::kFloatValue: |
| 84 | *value = feature.float_value(); |
| 85 | break; |
| 86 | default: |
| 87 | return false; |
| 88 | } |
| 89 | return true; |
| 90 | } |
| 91 | |
Charles Zhao | 100d8c7 | 2018-01-18 00:23:25 | [diff] [blame] | 92 | bool FeatureToInt64(const Feature& feature, |
| 93 | int64_t* const res, |
| 94 | const int index) { |
| 95 | int32_t value = -1; |
| 96 | int32_t type = feature.feature_type_case(); |
| 97 | switch (type) { |
Philippe Hamel | c97535b | 2017-12-15 19:15:40 | [diff] [blame] | 98 | case Feature::kBoolValue: |
Charles Zhao | 100d8c7 | 2018-01-18 00:23:25 | [diff] [blame] | 99 | value = static_cast<int32_t>(feature.bool_value()); |
| 100 | break; |
Philippe Hamel | c97535b | 2017-12-15 19:15:40 | [diff] [blame] | 101 | case Feature::kFloatValue: |
Charles Zhao | 100d8c7 | 2018-01-18 00:23:25 | [diff] [blame] | 102 | value = FloatToIntBits(feature.float_value()); |
| 103 | break; |
| 104 | case Feature::kInt32Value: |
| 105 | value = feature.int32_value(); |
| 106 | break; |
Philippe Hamel | c97535b | 2017-12-15 19:15:40 | [diff] [blame] | 107 | case Feature::kStringValue: |
Charles Zhao | 100d8c7 | 2018-01-18 00:23:25 | [diff] [blame] | 108 | value = StringToIntBits(feature.string_value()); |
| 109 | break; |
| 110 | case Feature::kStringList: |
| 111 | if (index >= 0 && index < feature.string_list().string_value_size()) { |
| 112 | value = StringToIntBits(feature.string_list().string_value(index)); |
| 113 | } else { |
| 114 | DVLOG(3) << "Invalid index for string list: " << index; |
| 115 | NOTREACHED(); |
| 116 | return false; |
| 117 | } |
| 118 | break; |
Philippe Hamel | c97535b | 2017-12-15 19:15:40 | [diff] [blame] | 119 | default: |
Charles Zhao | 100d8c7 | 2018-01-18 00:23:25 | [diff] [blame] | 120 | DVLOG(3) << "Feature type is supported for logging: " << type; |
Philippe Hamel | c97535b | 2017-12-15 19:15:40 | [diff] [blame] | 121 | NOTREACHED(); |
| 122 | return false; |
| 123 | } |
Charles Zhao | 100d8c7 | 2018-01-18 00:23:25 | [diff] [blame] | 124 | *res = PairInt(type, value, index); |
| 125 | return true; |
| 126 | } |
Philippe Hamel | c97535b | 2017-12-15 19:15:40 | [diff] [blame] | 127 | |
Philippe Hamel | cb63f2a | 2017-10-18 19:51:28 | [diff] [blame] | 128 | bool GetOneHotValue(const std::string& key, |
| 129 | const RankerExample& example, |
| 130 | std::string* value) { |
| 131 | Feature feature; |
| 132 | if (!SafeGetFeature(key, example, &feature)) { |
| 133 | return false; |
| 134 | } |
| 135 | if (feature.feature_type_case() != Feature::kStringValue) { |
Philippe Hamel | d324d5db | 2017-10-19 20:09:51 | [diff] [blame] | 136 | DVLOG(1) << "Feature " << key |
| 137 | << " exists, but is not the right type (Expected: " |
| 138 | << Feature::kStringValue |
| 139 | << " vs. Actual: " << feature.feature_type_case() << ")"; |
Philippe Hamel | cb63f2a | 2017-10-18 19:51:28 | [diff] [blame] | 140 | return false; |
| 141 | } |
| 142 | *value = feature.string_value(); |
| 143 | return true; |
| 144 | } |
| 145 | |
Philippe Hamel | c97535b | 2017-12-15 19:15:40 | [diff] [blame] | 146 | // Converts string to a hex hash string. |
| 147 | std::string HashFeatureName(const std::string& feature_name) { |
| 148 | uint64_t feature_key = base::HashMetricName(feature_name); |
| 149 | return base::StringPrintf("%016" PRIx64, feature_key); |
| 150 | } |
| 151 | |
| 152 | RankerExample HashExampleFeatureNames(const RankerExample& example) { |
| 153 | RankerExample hashed_example; |
| 154 | auto& output_features = *hashed_example.mutable_features(); |
| 155 | for (const auto& feature : example.features()) { |
| 156 | output_features[HashFeatureName(feature.first)] = feature.second; |
| 157 | } |
| 158 | *hashed_example.mutable_target() = example.target(); |
| 159 | return hashed_example; |
| 160 | } |
| 161 | |
Philippe Hamel | 6956483 | 2017-11-22 20:39:48 | [diff] [blame] | 162 | } // namespace assist_ranker |