Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 1 | // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
Samuel Huang | 577ef6c | 2018-03-13 18:19:34 | [diff] [blame] | 5 | #ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ |
| 6 | #define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 7 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 8 | #include <stdint.h> |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 9 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 10 | #include <tuple> |
| 11 | #include <vector> |
| 12 | |
| 13 | #include "base/macros.h" |
Samuel Huang | 577ef6c | 2018-03-13 18:19:34 | [diff] [blame] | 14 | #include "components/zucchini/algorithm.h" |
| 15 | #include "components/zucchini/image_utils.h" |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 16 | |
| 17 | namespace zucchini { |
| 18 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 19 | // There are several ways to reason about addresses in an image: |
| 20 | // - Offset: Position relative to start of image. |
| 21 | // - VA (Virtual Address): Virtual memory address of a loaded image. This is |
| 22 | // subject to relocation by the OS. |
| 23 | // - RVA (Relative Virtual Address): VA relative to some base address. This is |
| 24 | // the preferred way to specify pointers in an image. |
| 25 | // |
| 26 | // Zucchini is primarily concerned with offsets and RVAs. Executable images like |
| 27 | // PE and ELF are organized into sections. Each section specifies offset and RVA |
| 28 | // ranges as: |
| 29 | // {Offset start, offset size, RVA start, RVA size}. |
| 30 | // This constitutes a basic unit to translate between offsets and RVAs. Note: |
| 31 | // |offset size| < |RVA size| is possible. For example, the .bss section can can |
| 32 | // have zero-filled statically-allocated data that have no corresponding bytes |
| 33 | // on image (to save space). This poses a problem for Zucchini, which stores |
| 34 | // addresses as offsets: now we'd have "dangling RVAs" that don't map to |
| 35 | // offsets! Some ways to handling this are: |
| 36 | // 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means |
| 37 | // some reference targets would escape detection and processing. |
| 38 | // 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data |
| 39 | // must not be read on these fake offsets, which are only valid as target |
| 40 | // addresses for reference matching. |
| 41 | // As for |RVA size| < |offset size|, the extra portion just gets ignored. |
| 42 | // |
| 43 | // Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to |
| 44 | // fake offsets by adding a large value. This value can be chosen as an |
| 45 | // exclusive upper bound of all offsets (i.e., image size). This allows them to |
| 46 | // be easily detected and processed as a special-case. |
| 47 | // TODO(huangs): Investigate option (1), now that the refactored code makes |
| 48 | // experimentation easier. |
| 49 | // TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t| |
| 50 | // ranges and create "fake" units to accommodate dangling RVAs. Then |
| 51 | // AddressTranslator can be simplified. |
| 52 | |
Samuel Huang | 3102b95 | 2017-10-12 21:25:47 | [diff] [blame] | 53 | // Virtual Address relative to some base address (RVA). There's distinction |
| 54 | // between "valid RVA" and "existent RVA": |
| 55 | // - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|. |
| 56 | // - Existent RVA: An RVA that has semantic meaning in an image, and may |
| 57 | // translate to an offset in an image or (if a dangling RVA) a fake offset. |
| 58 | // All existent RVAs are valid RVAs. |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 59 | using rva_t = uint32_t; |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 60 | // Divide by 2 to match |kOffsetBound|. |
| 61 | constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2; |
Samuel Huang | b6d108f | 2018-10-10 15:48:10 | [diff] [blame] | 62 | constexpr rva_t kInvalidRva = static_cast<rva_t>(-2); |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 63 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 64 | // A utility to translate between offsets and RVAs in an image. |
| 65 | class AddressTranslator { |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 66 | public: |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 67 | // A basic unit for address translation, roughly maps to a section, but may |
| 68 | // be processed (e.g., merged) as an optimization. |
| 69 | struct Unit { |
| 70 | offset_t offset_end() const { return offset_begin + offset_size; } |
| 71 | rva_t rva_end() const { return rva_begin + rva_size; } |
| 72 | bool IsEmpty() const { |
| 73 | // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed |
| 74 | // yet, and once it is then it's empty. |
| 75 | // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but |
| 76 | // is not empty. |
| 77 | return rva_size == 0; |
| 78 | } |
| 79 | bool CoversOffset(offset_t offset) const { |
| 80 | return RangeCovers(offset_begin, offset_size, offset); |
| 81 | } |
| 82 | bool CoversRva(rva_t rva) const { |
| 83 | return RangeCovers(rva_begin, rva_size, rva); |
| 84 | } |
| 85 | bool CoversDanglingRva(rva_t rva) const { |
| 86 | return CoversRva(rva) && rva - rva_begin >= offset_size; |
| 87 | } |
| 88 | // Assumes valid |offset| (*cannot* be fake offset). |
| 89 | rva_t OffsetToRvaUnsafe(offset_t offset) const { |
| 90 | return offset - offset_begin + rva_begin; |
| 91 | } |
| 92 | // Assumes valid |rva| (*can* be danging RVA). |
| 93 | offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const { |
| 94 | rva_t delta = rva - rva_begin; |
| 95 | return delta < offset_size ? delta + offset_begin |
| 96 | : fake_offset_begin + rva; |
| 97 | } |
| 98 | bool HasDanglingRva() const { return rva_size > offset_size; } |
| 99 | friend bool operator==(const Unit& a, const Unit& b) { |
| 100 | return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) == |
| 101 | std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size); |
| 102 | } |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 103 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 104 | offset_t offset_begin; |
| 105 | offset_t offset_size; |
| 106 | rva_t rva_begin; |
| 107 | rva_t rva_size; |
| 108 | }; |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 109 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 110 | // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit |
| 111 | // found, to reduce the number of OffsetToUnit() calls for clustered queries. |
| 112 | class OffsetToRvaCache { |
| 113 | public: |
| 114 | // Embeds |translator| for use. Now object lifetime is tied to |translator| |
| 115 | // lifetime. |
| 116 | explicit OffsetToRvaCache(const AddressTranslator& translator); |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 117 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 118 | rva_t Convert(offset_t offset) const; |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 119 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 120 | private: |
| 121 | const AddressTranslator& translator_; |
| 122 | mutable const AddressTranslator::Unit* cached_unit_ = nullptr; |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 123 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 124 | DISALLOW_COPY_AND_ASSIGN(OffsetToRvaCache); |
| 125 | }; |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 126 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 127 | // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit |
| 128 | // found, to reduce the number of RvaToUnit() calls for clustered queries. |
| 129 | class RvaToOffsetCache { |
| 130 | public: |
| 131 | // Embeds |translator| for use. Now object lifetime is tied to |translator| |
| 132 | // lifetime. |
| 133 | explicit RvaToOffsetCache(const AddressTranslator& translator); |
| 134 | |
| 135 | bool IsValid(rva_t rva) const; |
Samuel Huang | b6d108f | 2018-10-10 15:48:10 | [diff] [blame] | 136 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 137 | offset_t Convert(rva_t rva) const; |
| 138 | |
| 139 | private: |
| 140 | const AddressTranslator& translator_; |
| 141 | mutable const AddressTranslator::Unit* cached_unit_ = nullptr; |
| 142 | |
| 143 | DISALLOW_COPY_AND_ASSIGN(RvaToOffsetCache); |
| 144 | }; |
| 145 | |
| 146 | enum Status { |
| 147 | kSuccess = 0, |
| 148 | kErrorOverflow, |
| 149 | kErrorBadOverlap, |
| 150 | kErrorBadOverlapDanglingRva, |
| 151 | kErrorFakeOffsetBeginTooLarge, |
| 152 | }; |
| 153 | |
| 154 | AddressTranslator(); |
| 155 | ~AddressTranslator(); |
| 156 | |
| 157 | // Consumes |units| to populate data in this class. Performs consistency |
| 158 | // checks and overlapping Units. Returns Status to indicate success. |
| 159 | Status Initialize(std::vector<Unit>&& units); |
| 160 | |
| 161 | // Returns the (possibly dangling) RVA corresponding to |offset|, or |
| 162 | // kInvalidRva if not found. |
| 163 | rva_t OffsetToRva(offset_t offset) const; |
| 164 | |
| 165 | // Returns the (possibly fake) offset corresponding to |rva|, or |
Samuel Huang | 3102b95 | 2017-10-12 21:25:47 | [diff] [blame] | 166 | // kInvalidOffset if not found (i.e., |rva| is non-existent). |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 167 | offset_t RvaToOffset(rva_t rva) const; |
| 168 | |
Samuel Huang | 6bea2b8 | 2017-08-25 21:09:57 | [diff] [blame] | 169 | // For testing. |
| 170 | offset_t fake_offset_begin() const { return fake_offset_begin_; } |
| 171 | |
| 172 | const std::vector<Unit>& units_sorted_by_offset() const { |
| 173 | return units_sorted_by_offset_; |
| 174 | } |
| 175 | |
| 176 | const std::vector<Unit>& units_sorted_by_rva() const { |
| 177 | return units_sorted_by_rva_; |
| 178 | } |
| 179 | |
| 180 | private: |
| 181 | // Helper to find the Unit that contains given |offset| or |rva|. Returns null |
| 182 | // if not found. |
| 183 | const Unit* OffsetToUnit(offset_t offset) const; |
| 184 | const Unit* RvaToUnit(rva_t rva) const; |
| 185 | |
| 186 | // Storage of Units. All offset ranges are non-empty and disjoint. Likewise |
| 187 | // for all RVA ranges. |
| 188 | std::vector<Unit> units_sorted_by_offset_; |
| 189 | std::vector<Unit> units_sorted_by_rva_; |
| 190 | |
| 191 | // Conversion factor to translate between dangling RVAs and fake offsets. |
| 192 | offset_t fake_offset_begin_; |
| 193 | |
| 194 | DISALLOW_COPY_AND_ASSIGN(AddressTranslator); |
Etienne Pierre-Doray | 6cc37d60 | 2017-07-28 01:52:46 | [diff] [blame] | 195 | }; |
| 196 | |
| 197 | } // namespace zucchini |
| 198 | |
Samuel Huang | 577ef6c | 2018-03-13 18:19:34 | [diff] [blame] | 199 | #endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ |