blob: 19a04d52e1f7167a239cdcc3cac168b0a6f7177b [file] [log] [blame]
huangs6d2a3032015-09-18 18:52:561// Copyright 2015 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COURGETTE_IMAGE_UTILS_H_
6#define COURGETTE_IMAGE_UTILS_H_
7
aviab98dcc92015-12-21 19:35:338#include <stddef.h>
9#include <stdint.h>
huangs6d2a3032015-09-18 18:52:5610
huangs9faa3032016-04-07 03:37:1111#include <iterator>
12#include <vector>
13
huangs6d2a3032015-09-18 18:52:5614// COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently
15// different target addresses are referenced. Purely for debugging.
16#define COURGETTE_HISTOGRAM_TARGETS 0
17
18namespace courgette {
19
huangsdda11d062016-03-14 16:35:3920// There are several ways to reason about addresses in an image:
21// - File Offset: Position relative to start of image.
22// - VA (Virtual Address): Virtual memory address of a loaded image. This is
23// subject to relocation by the OS.
24// - RVA (Relative Virtual Address): VA relative to some base address. This is
huangsf940a8c92016-03-23 20:40:3525// the preferred way to specify pointers in an image.
26//
27// In Courgette we consider two types of addresses:
28// - abs32: In an image these are directly stored as VA whose locations are
29// stored in the relocation table.
30// - rel32: In an image these appear in branch/call opcodes, and are represented
31// as offsets from an instruction address.
huangsdda11d062016-03-14 16:35:3932
33using RVA = uint32_t;
huangsbb4b8a92016-01-19 22:09:0334const RVA kUnassignedRVA = 0xFFFFFFFFU;
huangsdda11d062016-03-14 16:35:3935const RVA kNoRVA = 0xFFFFFFFFU;
36
37using FileOffset = size_t;
38const FileOffset kNoFileOffset = UINTPTR_MAX;
39
huangsf940a8c92016-03-23 20:40:3540// An interface translate and read addresses. The main conversion path is:
41// (1) Location RVA.
42// (2) Location FileOffset.
43// (3) Pointer in image.
44// (4) Target VA (32-bit or 64-bit).
45// (5) Target RVA (32-bit).
46// For abs32, we get (1) from relocation table, and convert to (5).
47// For rel32, we get (2) from scanning opcode, and convert to (1).
huangsdda11d062016-03-14 16:35:3948class AddressTranslator {
49 public:
huangsf940a8c92016-03-23 20:40:3550 // (2) -> (1): Returns the RVA corresponding to |file_offset|, or kNoRVA if
51 // nonexistent.
huangsdda11d062016-03-14 16:35:3952 virtual RVA FileOffsetToRVA(FileOffset file_offset) const = 0;
53
huangsf940a8c92016-03-23 20:40:3554 // (1) -> (2): Returns the file offset corresponding to |rva|, or
55 // kNoFileOffset if nonexistent.
huangsdda11d062016-03-14 16:35:3956 virtual FileOffset RVAToFileOffset(RVA rva) const = 0;
57
huangsf940a8c92016-03-23 20:40:3558 // (2) -> (3): Returns image data pointer correspnoding to |file_offset|.
59 // Assumes 0 <= |file_offset| <= image size.
60 // If |file_offset| == image size, then the resulting pointer is an end bound
61 // for iteration, and should not be dereferenced.
huangsdda11d062016-03-14 16:35:3962 virtual const uint8_t* FileOffsetToPointer(FileOffset file_offset) const = 0;
63
huangsf940a8c92016-03-23 20:40:3564 // (1) -> (3): Returns the pointer to the image data for |rva|, or null if
65 // |rva| is invalid.
huangsdda11d062016-03-14 16:35:3966 virtual const uint8_t* RVAToPointer(RVA rva) const = 0;
huangsf940a8c92016-03-23 20:40:3567
68 // (3) -> (5): Returns the target RVA located at |p|, where |p| is a pointer
69 // to image data.
70 virtual RVA PointerToTargetRVA(const uint8_t* p) const = 0;
huangsdda11d062016-03-14 16:35:3971};
huangs6d2a3032015-09-18 18:52:5672
huangs7a2fea252015-12-08 01:27:4673// A Label is a symbolic reference to an address. Unlike a conventional
74// assembly language, we always know the address. The address will later be
75// stored in a table and the Label will be replaced with the index into the
76// table.
77// TODO(huangs): Make this a struct, and remove "_" from member names.
78class Label {
79 public:
80 enum : int { kNoIndex = -1 };
81 explicit Label(RVA rva) : rva_(rva) {}
huangsbb4b8a92016-01-19 22:09:0382 Label(RVA rva, int index) : rva_(rva), index_(index) {}
83 Label(RVA rva, int index, int32_t count)
84 : rva_(rva), index_(index), count_(count) {}
huangs7a2fea252015-12-08 01:27:4685
86 bool operator==(const Label& other) const {
87 return rva_ == other.rva_ && index_ == other.index_ &&
88 count_ == other.count_;
89 }
90
huangsbb4b8a92016-01-19 22:09:0391 RVA rva_ = kUnassignedRVA; // Address referred to by the label.
huangs7a2fea252015-12-08 01:27:4692 int index_ = kNoIndex; // Index of address in address table.
aviab98dcc92015-12-21 19:35:3393 int32_t count_ = 0;
huangs7a2fea252015-12-08 01:27:4694};
95
huangs9faa3032016-04-07 03:37:1196// An interface for sequential visit of RVAs.
97// Use case: Translating from RVA locations to RVA targets is platform-specific,
98// and works differently for abs32 vs. rel32. A function that sequentually
99// visits RVA targets only requires an RvaVisitor. The caller can provide an
100// implementation that stores a fixed list of RVA locations, and translates each
101// to the matching RVA target on demand without extra storage.
102class RvaVisitor {
103 public:
huangsc8037632016-05-19 18:16:40104 virtual ~RvaVisitor() { }
105
huangs9faa3032016-04-07 03:37:11106 // Returns the number of remaining RVAs to visit.
107 virtual size_t Remaining() const = 0;
108
109 // Returns the current RVA.
110 virtual RVA Get() const = 0;
111
112 // Advances to the next RVA.
113 virtual void Next() = 0;
114};
115
116// RvaVisitor whose data are backed by std::vector<T>. Translating from T to RVA
117// is should be implemented in Get().
118template <typename T>
119class VectorRvaVisitor : public RvaVisitor {
120 public:
121 // Assumes |v| does not change for the lifetime of this instance.
122 explicit VectorRvaVisitor(const std::vector<T>& v)
huangsc8037632016-05-19 18:16:40123 : it_(v.begin()), end_(v.end()) { }
124 ~VectorRvaVisitor() override { }
huangs9faa3032016-04-07 03:37:11125
126 // RvaVisitor interfaces.
127 size_t Remaining() const override { return std::distance(it_, end_); }
128 virtual RVA Get() const override = 0;
129 void Next() override { ++it_; }
130
131 protected:
132 typename std::vector<T>::const_iterator it_;
133 typename std::vector<T>::const_iterator end_;
134};
135
136// RvaVisitor that simply stores a list of RVAs for traversal. For testing.
137class TrivialRvaVisitor : public VectorRvaVisitor<RVA> {
138 public:
139 explicit TrivialRvaVisitor(const std::vector<RVA>& rvas)
huangsc8037632016-05-19 18:16:40140 : VectorRvaVisitor<RVA>(rvas) { }
141 ~TrivialRvaVisitor() override { }
huangs9faa3032016-04-07 03:37:11142
143 // VectorRvaVisitor<RVA> interfaces.
144 RVA Get() const override { return *it_; }
145};
146
huangs6d2a3032015-09-18 18:52:56147// These helper functions avoid the need for casts in the main code.
aviab98dcc92015-12-21 19:35:33148inline uint16_t ReadU16(const uint8_t* address, size_t offset) {
149 return *reinterpret_cast<const uint16_t*>(address + offset);
huangs6d2a3032015-09-18 18:52:56150}
151
aviab98dcc92015-12-21 19:35:33152inline uint32_t ReadU32(const uint8_t* address, size_t offset) {
153 return *reinterpret_cast<const uint32_t*>(address + offset);
huangs6d2a3032015-09-18 18:52:56154}
155
aviab98dcc92015-12-21 19:35:33156inline uint64_t ReadU64(const uint8_t* address, size_t offset) {
157 return *reinterpret_cast<const uint64_t*>(address + offset);
huangs6d2a3032015-09-18 18:52:56158}
159
aviab98dcc92015-12-21 19:35:33160inline uint16_t Read16LittleEndian(const void* address) {
161 return *reinterpret_cast<const uint16_t*>(address);
huangs6d2a3032015-09-18 18:52:56162}
163
aviab98dcc92015-12-21 19:35:33164inline uint32_t Read32LittleEndian(const void* address) {
165 return *reinterpret_cast<const uint32_t*>(address);
huangs6d2a3032015-09-18 18:52:56166}
167
aviab98dcc92015-12-21 19:35:33168inline uint64_t Read64LittleEndian(const void* address) {
169 return *reinterpret_cast<const uint64_t*>(address);
huangs6d2a3032015-09-18 18:52:56170}
171
172} // namespace courgette
173
174#endif // COURGETTE_IMAGE_UTILS_H_