huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 1 | // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef COURGETTE_IMAGE_UTILS_H_ |
| 6 | #define COURGETTE_IMAGE_UTILS_H_ |
| 7 | |
avi | ab98dcc9 | 2015-12-21 19:35:33 | [diff] [blame] | 8 | #include <stddef.h> |
| 9 | #include <stdint.h> |
huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 10 | |
huangs | 9faa303 | 2016-04-07 03:37:11 | [diff] [blame] | 11 | #include <iterator> |
| 12 | #include <vector> |
| 13 | |
huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 14 | // COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently |
| 15 | // different target addresses are referenced. Purely for debugging. |
| 16 | #define COURGETTE_HISTOGRAM_TARGETS 0 |
| 17 | |
| 18 | namespace courgette { |
| 19 | |
huangs | dda11d06 | 2016-03-14 16:35:39 | [diff] [blame] | 20 | // There are several ways to reason about addresses in an image: |
| 21 | // - File Offset: Position relative to start of image. |
| 22 | // - VA (Virtual Address): Virtual memory address of a loaded image. This is |
| 23 | // subject to relocation by the OS. |
| 24 | // - RVA (Relative Virtual Address): VA relative to some base address. This is |
huangs | f940a8c9 | 2016-03-23 20:40:35 | [diff] [blame] | 25 | // the preferred way to specify pointers in an image. |
| 26 | // |
| 27 | // In Courgette we consider two types of addresses: |
| 28 | // - abs32: In an image these are directly stored as VA whose locations are |
| 29 | // stored in the relocation table. |
| 30 | // - rel32: In an image these appear in branch/call opcodes, and are represented |
| 31 | // as offsets from an instruction address. |
huangs | dda11d06 | 2016-03-14 16:35:39 | [diff] [blame] | 32 | |
| 33 | using RVA = uint32_t; |
huangs | bb4b8a9 | 2016-01-19 22:09:03 | [diff] [blame] | 34 | const RVA kUnassignedRVA = 0xFFFFFFFFU; |
huangs | dda11d06 | 2016-03-14 16:35:39 | [diff] [blame] | 35 | const RVA kNoRVA = 0xFFFFFFFFU; |
| 36 | |
| 37 | using FileOffset = size_t; |
| 38 | const FileOffset kNoFileOffset = UINTPTR_MAX; |
| 39 | |
huangs | f940a8c9 | 2016-03-23 20:40:35 | [diff] [blame] | 40 | // An interface translate and read addresses. The main conversion path is: |
| 41 | // (1) Location RVA. |
| 42 | // (2) Location FileOffset. |
| 43 | // (3) Pointer in image. |
| 44 | // (4) Target VA (32-bit or 64-bit). |
| 45 | // (5) Target RVA (32-bit). |
| 46 | // For abs32, we get (1) from relocation table, and convert to (5). |
| 47 | // For rel32, we get (2) from scanning opcode, and convert to (1). |
huangs | dda11d06 | 2016-03-14 16:35:39 | [diff] [blame] | 48 | class AddressTranslator { |
| 49 | public: |
huangs | f940a8c9 | 2016-03-23 20:40:35 | [diff] [blame] | 50 | // (2) -> (1): Returns the RVA corresponding to |file_offset|, or kNoRVA if |
| 51 | // nonexistent. |
huangs | dda11d06 | 2016-03-14 16:35:39 | [diff] [blame] | 52 | virtual RVA FileOffsetToRVA(FileOffset file_offset) const = 0; |
| 53 | |
huangs | f940a8c9 | 2016-03-23 20:40:35 | [diff] [blame] | 54 | // (1) -> (2): Returns the file offset corresponding to |rva|, or |
| 55 | // kNoFileOffset if nonexistent. |
huangs | dda11d06 | 2016-03-14 16:35:39 | [diff] [blame] | 56 | virtual FileOffset RVAToFileOffset(RVA rva) const = 0; |
| 57 | |
huangs | f940a8c9 | 2016-03-23 20:40:35 | [diff] [blame] | 58 | // (2) -> (3): Returns image data pointer correspnoding to |file_offset|. |
| 59 | // Assumes 0 <= |file_offset| <= image size. |
| 60 | // If |file_offset| == image size, then the resulting pointer is an end bound |
| 61 | // for iteration, and should not be dereferenced. |
huangs | dda11d06 | 2016-03-14 16:35:39 | [diff] [blame] | 62 | virtual const uint8_t* FileOffsetToPointer(FileOffset file_offset) const = 0; |
| 63 | |
huangs | f940a8c9 | 2016-03-23 20:40:35 | [diff] [blame] | 64 | // (1) -> (3): Returns the pointer to the image data for |rva|, or null if |
| 65 | // |rva| is invalid. |
huangs | dda11d06 | 2016-03-14 16:35:39 | [diff] [blame] | 66 | virtual const uint8_t* RVAToPointer(RVA rva) const = 0; |
huangs | f940a8c9 | 2016-03-23 20:40:35 | [diff] [blame] | 67 | |
| 68 | // (3) -> (5): Returns the target RVA located at |p|, where |p| is a pointer |
| 69 | // to image data. |
| 70 | virtual RVA PointerToTargetRVA(const uint8_t* p) const = 0; |
huangs | dda11d06 | 2016-03-14 16:35:39 | [diff] [blame] | 71 | }; |
huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 72 | |
huangs | 7a2fea25 | 2015-12-08 01:27:46 | [diff] [blame] | 73 | // A Label is a symbolic reference to an address. Unlike a conventional |
| 74 | // assembly language, we always know the address. The address will later be |
| 75 | // stored in a table and the Label will be replaced with the index into the |
| 76 | // table. |
| 77 | // TODO(huangs): Make this a struct, and remove "_" from member names. |
| 78 | class Label { |
| 79 | public: |
| 80 | enum : int { kNoIndex = -1 }; |
| 81 | explicit Label(RVA rva) : rva_(rva) {} |
huangs | bb4b8a9 | 2016-01-19 22:09:03 | [diff] [blame] | 82 | Label(RVA rva, int index) : rva_(rva), index_(index) {} |
| 83 | Label(RVA rva, int index, int32_t count) |
| 84 | : rva_(rva), index_(index), count_(count) {} |
huangs | 7a2fea25 | 2015-12-08 01:27:46 | [diff] [blame] | 85 | |
| 86 | bool operator==(const Label& other) const { |
| 87 | return rva_ == other.rva_ && index_ == other.index_ && |
| 88 | count_ == other.count_; |
| 89 | } |
| 90 | |
huangs | bb4b8a9 | 2016-01-19 22:09:03 | [diff] [blame] | 91 | RVA rva_ = kUnassignedRVA; // Address referred to by the label. |
huangs | 7a2fea25 | 2015-12-08 01:27:46 | [diff] [blame] | 92 | int index_ = kNoIndex; // Index of address in address table. |
avi | ab98dcc9 | 2015-12-21 19:35:33 | [diff] [blame] | 93 | int32_t count_ = 0; |
huangs | 7a2fea25 | 2015-12-08 01:27:46 | [diff] [blame] | 94 | }; |
| 95 | |
huangs | 9faa303 | 2016-04-07 03:37:11 | [diff] [blame] | 96 | // An interface for sequential visit of RVAs. |
| 97 | // Use case: Translating from RVA locations to RVA targets is platform-specific, |
| 98 | // and works differently for abs32 vs. rel32. A function that sequentually |
| 99 | // visits RVA targets only requires an RvaVisitor. The caller can provide an |
| 100 | // implementation that stores a fixed list of RVA locations, and translates each |
| 101 | // to the matching RVA target on demand without extra storage. |
| 102 | class RvaVisitor { |
| 103 | public: |
huangs | c803763 | 2016-05-19 18:16:40 | [diff] [blame] | 104 | virtual ~RvaVisitor() { } |
| 105 | |
huangs | 9faa303 | 2016-04-07 03:37:11 | [diff] [blame] | 106 | // Returns the number of remaining RVAs to visit. |
| 107 | virtual size_t Remaining() const = 0; |
| 108 | |
| 109 | // Returns the current RVA. |
| 110 | virtual RVA Get() const = 0; |
| 111 | |
| 112 | // Advances to the next RVA. |
| 113 | virtual void Next() = 0; |
| 114 | }; |
| 115 | |
| 116 | // RvaVisitor whose data are backed by std::vector<T>. Translating from T to RVA |
| 117 | // is should be implemented in Get(). |
| 118 | template <typename T> |
| 119 | class VectorRvaVisitor : public RvaVisitor { |
| 120 | public: |
| 121 | // Assumes |v| does not change for the lifetime of this instance. |
| 122 | explicit VectorRvaVisitor(const std::vector<T>& v) |
huangs | c803763 | 2016-05-19 18:16:40 | [diff] [blame] | 123 | : it_(v.begin()), end_(v.end()) { } |
| 124 | ~VectorRvaVisitor() override { } |
huangs | 9faa303 | 2016-04-07 03:37:11 | [diff] [blame] | 125 | |
| 126 | // RvaVisitor interfaces. |
| 127 | size_t Remaining() const override { return std::distance(it_, end_); } |
| 128 | virtual RVA Get() const override = 0; |
| 129 | void Next() override { ++it_; } |
| 130 | |
| 131 | protected: |
| 132 | typename std::vector<T>::const_iterator it_; |
| 133 | typename std::vector<T>::const_iterator end_; |
| 134 | }; |
| 135 | |
| 136 | // RvaVisitor that simply stores a list of RVAs for traversal. For testing. |
| 137 | class TrivialRvaVisitor : public VectorRvaVisitor<RVA> { |
| 138 | public: |
| 139 | explicit TrivialRvaVisitor(const std::vector<RVA>& rvas) |
huangs | c803763 | 2016-05-19 18:16:40 | [diff] [blame] | 140 | : VectorRvaVisitor<RVA>(rvas) { } |
| 141 | ~TrivialRvaVisitor() override { } |
huangs | 9faa303 | 2016-04-07 03:37:11 | [diff] [blame] | 142 | |
| 143 | // VectorRvaVisitor<RVA> interfaces. |
| 144 | RVA Get() const override { return *it_; } |
| 145 | }; |
| 146 | |
huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 147 | // These helper functions avoid the need for casts in the main code. |
avi | ab98dcc9 | 2015-12-21 19:35:33 | [diff] [blame] | 148 | inline uint16_t ReadU16(const uint8_t* address, size_t offset) { |
| 149 | return *reinterpret_cast<const uint16_t*>(address + offset); |
huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 150 | } |
| 151 | |
avi | ab98dcc9 | 2015-12-21 19:35:33 | [diff] [blame] | 152 | inline uint32_t ReadU32(const uint8_t* address, size_t offset) { |
| 153 | return *reinterpret_cast<const uint32_t*>(address + offset); |
huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 154 | } |
| 155 | |
avi | ab98dcc9 | 2015-12-21 19:35:33 | [diff] [blame] | 156 | inline uint64_t ReadU64(const uint8_t* address, size_t offset) { |
| 157 | return *reinterpret_cast<const uint64_t*>(address + offset); |
huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 158 | } |
| 159 | |
avi | ab98dcc9 | 2015-12-21 19:35:33 | [diff] [blame] | 160 | inline uint16_t Read16LittleEndian(const void* address) { |
| 161 | return *reinterpret_cast<const uint16_t*>(address); |
huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 162 | } |
| 163 | |
avi | ab98dcc9 | 2015-12-21 19:35:33 | [diff] [blame] | 164 | inline uint32_t Read32LittleEndian(const void* address) { |
| 165 | return *reinterpret_cast<const uint32_t*>(address); |
huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 166 | } |
| 167 | |
avi | ab98dcc9 | 2015-12-21 19:35:33 | [diff] [blame] | 168 | inline uint64_t Read64LittleEndian(const void* address) { |
| 169 | return *reinterpret_cast<const uint64_t*>(address); |
huangs | 6d2a303 | 2015-09-18 18:52:56 | [diff] [blame] | 170 | } |
| 171 | |
| 172 | } // namespace courgette |
| 173 | |
| 174 | #endif // COURGETTE_IMAGE_UTILS_H_ |