blob: 5666b99b8bb6c6fbb4a27e61f7c70e93abe00a85 [file] [log] [blame]
Etienne Pierre-Doray6cc37d602017-07-28 01:52:461// Copyright 2017 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Samuel Huang577ef6c2018-03-13 18:19:345#ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
6#define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
Etienne Pierre-Doray6cc37d602017-07-28 01:52:467
Samuel Huang6bea2b82017-08-25 21:09:578#include <stdint.h>
Etienne Pierre-Doray6cc37d602017-07-28 01:52:469
Samuel Huang6bea2b82017-08-25 21:09:5710#include <tuple>
11#include <vector>
12
13#include "base/macros.h"
Samuel Huang577ef6c2018-03-13 18:19:3414#include "components/zucchini/algorithm.h"
15#include "components/zucchini/image_utils.h"
Etienne Pierre-Doray6cc37d602017-07-28 01:52:4616
17namespace zucchini {
18
Samuel Huang6bea2b82017-08-25 21:09:5719// There are several ways to reason about addresses in an image:
20// - Offset: Position relative to start of image.
21// - VA (Virtual Address): Virtual memory address of a loaded image. This is
22// subject to relocation by the OS.
23// - RVA (Relative Virtual Address): VA relative to some base address. This is
24// the preferred way to specify pointers in an image.
25//
26// Zucchini is primarily concerned with offsets and RVAs. Executable images like
27// PE and ELF are organized into sections. Each section specifies offset and RVA
28// ranges as:
29// {Offset start, offset size, RVA start, RVA size}.
30// This constitutes a basic unit to translate between offsets and RVAs. Note:
31// |offset size| < |RVA size| is possible. For example, the .bss section can can
32// have zero-filled statically-allocated data that have no corresponding bytes
33// on image (to save space). This poses a problem for Zucchini, which stores
34// addresses as offsets: now we'd have "dangling RVAs" that don't map to
35// offsets! Some ways to handling this are:
36// 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means
37// some reference targets would escape detection and processing.
38// 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data
39// must not be read on these fake offsets, which are only valid as target
40// addresses for reference matching.
41// As for |RVA size| < |offset size|, the extra portion just gets ignored.
42//
43// Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to
44// fake offsets by adding a large value. This value can be chosen as an
45// exclusive upper bound of all offsets (i.e., image size). This allows them to
46// be easily detected and processed as a special-case.
47// TODO(huangs): Investigate option (1), now that the refactored code makes
48// experimentation easier.
49// TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t|
50// ranges and create "fake" units to accommodate dangling RVAs. Then
51// AddressTranslator can be simplified.
52
Samuel Huang3102b952017-10-12 21:25:4753// Virtual Address relative to some base address (RVA). There's distinction
54// between "valid RVA" and "existent RVA":
55// - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|.
56// - Existent RVA: An RVA that has semantic meaning in an image, and may
57// translate to an offset in an image or (if a dangling RVA) a fake offset.
58// All existent RVAs are valid RVAs.
Etienne Pierre-Doray6cc37d602017-07-28 01:52:4659using rva_t = uint32_t;
Samuel Huang6bea2b82017-08-25 21:09:5760// Divide by 2 to match |kOffsetBound|.
61constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2;
Samuel Huangb6d108f2018-10-10 15:48:1062constexpr rva_t kInvalidRva = static_cast<rva_t>(-2);
Etienne Pierre-Doray6cc37d602017-07-28 01:52:4663
Samuel Huang6bea2b82017-08-25 21:09:5764// A utility to translate between offsets and RVAs in an image.
65class AddressTranslator {
Etienne Pierre-Doray6cc37d602017-07-28 01:52:4666 public:
Samuel Huang6bea2b82017-08-25 21:09:5767 // A basic unit for address translation, roughly maps to a section, but may
68 // be processed (e.g., merged) as an optimization.
69 struct Unit {
70 offset_t offset_end() const { return offset_begin + offset_size; }
71 rva_t rva_end() const { return rva_begin + rva_size; }
72 bool IsEmpty() const {
73 // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed
74 // yet, and once it is then it's empty.
75 // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but
76 // is not empty.
77 return rva_size == 0;
78 }
79 bool CoversOffset(offset_t offset) const {
80 return RangeCovers(offset_begin, offset_size, offset);
81 }
82 bool CoversRva(rva_t rva) const {
83 return RangeCovers(rva_begin, rva_size, rva);
84 }
85 bool CoversDanglingRva(rva_t rva) const {
86 return CoversRva(rva) && rva - rva_begin >= offset_size;
87 }
88 // Assumes valid |offset| (*cannot* be fake offset).
89 rva_t OffsetToRvaUnsafe(offset_t offset) const {
90 return offset - offset_begin + rva_begin;
91 }
92 // Assumes valid |rva| (*can* be danging RVA).
93 offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const {
94 rva_t delta = rva - rva_begin;
95 return delta < offset_size ? delta + offset_begin
96 : fake_offset_begin + rva;
97 }
98 bool HasDanglingRva() const { return rva_size > offset_size; }
99 friend bool operator==(const Unit& a, const Unit& b) {
100 return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) ==
101 std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size);
102 }
Etienne Pierre-Doray6cc37d602017-07-28 01:52:46103
Samuel Huang6bea2b82017-08-25 21:09:57104 offset_t offset_begin;
105 offset_t offset_size;
106 rva_t rva_begin;
107 rva_t rva_size;
108 };
Etienne Pierre-Doray6cc37d602017-07-28 01:52:46109
Samuel Huang6bea2b82017-08-25 21:09:57110 // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit
111 // found, to reduce the number of OffsetToUnit() calls for clustered queries.
112 class OffsetToRvaCache {
113 public:
114 // Embeds |translator| for use. Now object lifetime is tied to |translator|
115 // lifetime.
116 explicit OffsetToRvaCache(const AddressTranslator& translator);
Etienne Pierre-Doray6cc37d602017-07-28 01:52:46117
Samuel Huang6bea2b82017-08-25 21:09:57118 rva_t Convert(offset_t offset) const;
Etienne Pierre-Doray6cc37d602017-07-28 01:52:46119
Samuel Huang6bea2b82017-08-25 21:09:57120 private:
121 const AddressTranslator& translator_;
122 mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
Etienne Pierre-Doray6cc37d602017-07-28 01:52:46123
Samuel Huang6bea2b82017-08-25 21:09:57124 DISALLOW_COPY_AND_ASSIGN(OffsetToRvaCache);
125 };
Etienne Pierre-Doray6cc37d602017-07-28 01:52:46126
Samuel Huang6bea2b82017-08-25 21:09:57127 // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit
128 // found, to reduce the number of RvaToUnit() calls for clustered queries.
129 class RvaToOffsetCache {
130 public:
131 // Embeds |translator| for use. Now object lifetime is tied to |translator|
132 // lifetime.
133 explicit RvaToOffsetCache(const AddressTranslator& translator);
134
135 bool IsValid(rva_t rva) const;
Samuel Huangb6d108f2018-10-10 15:48:10136
Samuel Huang6bea2b82017-08-25 21:09:57137 offset_t Convert(rva_t rva) const;
138
139 private:
140 const AddressTranslator& translator_;
141 mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
142
143 DISALLOW_COPY_AND_ASSIGN(RvaToOffsetCache);
144 };
145
146 enum Status {
147 kSuccess = 0,
148 kErrorOverflow,
149 kErrorBadOverlap,
150 kErrorBadOverlapDanglingRva,
151 kErrorFakeOffsetBeginTooLarge,
152 };
153
154 AddressTranslator();
155 ~AddressTranslator();
156
157 // Consumes |units| to populate data in this class. Performs consistency
158 // checks and overlapping Units. Returns Status to indicate success.
159 Status Initialize(std::vector<Unit>&& units);
160
161 // Returns the (possibly dangling) RVA corresponding to |offset|, or
162 // kInvalidRva if not found.
163 rva_t OffsetToRva(offset_t offset) const;
164
165 // Returns the (possibly fake) offset corresponding to |rva|, or
Samuel Huang3102b952017-10-12 21:25:47166 // kInvalidOffset if not found (i.e., |rva| is non-existent).
Samuel Huang6bea2b82017-08-25 21:09:57167 offset_t RvaToOffset(rva_t rva) const;
168
Samuel Huang6bea2b82017-08-25 21:09:57169 // For testing.
170 offset_t fake_offset_begin() const { return fake_offset_begin_; }
171
172 const std::vector<Unit>& units_sorted_by_offset() const {
173 return units_sorted_by_offset_;
174 }
175
176 const std::vector<Unit>& units_sorted_by_rva() const {
177 return units_sorted_by_rva_;
178 }
179
180 private:
181 // Helper to find the Unit that contains given |offset| or |rva|. Returns null
182 // if not found.
183 const Unit* OffsetToUnit(offset_t offset) const;
184 const Unit* RvaToUnit(rva_t rva) const;
185
186 // Storage of Units. All offset ranges are non-empty and disjoint. Likewise
187 // for all RVA ranges.
188 std::vector<Unit> units_sorted_by_offset_;
189 std::vector<Unit> units_sorted_by_rva_;
190
191 // Conversion factor to translate between dangling RVAs and fake offsets.
192 offset_t fake_offset_begin_;
193
194 DISALLOW_COPY_AND_ASSIGN(AddressTranslator);
Etienne Pierre-Doray6cc37d602017-07-28 01:52:46195};
196
197} // namespace zucchini
198
Samuel Huang577ef6c2018-03-13 18:19:34199#endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_