blob: 9225578488791391e92a961941c9a0661d5ba749 [file] [log] [blame]
[email protected]3b63f8f42011-03-28 01:54:151// Copyright (c) 2011 The Chromium Authors. All rights reserved.
[email protected]04ca1bc2009-05-08 23:00:292// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "courgette/assembly_program.h"
6
7#include <memory.h>
8#include <algorithm>
9#include <map>
10#include <set>
11#include <sstream>
12#include <vector>
13
14#include "base/logging.h"
[email protected]3b63f8f42011-03-28 01:54:1515#include "base/memory/scoped_ptr.h"
[email protected]04ca1bc2009-05-08 23:00:2916
17#include "courgette/courgette.h"
18#include "courgette/encoded_program.h"
19
20namespace courgette {
21
22// Opcodes of simple assembly language
23enum OP {
24 ORIGIN, // ORIGIN <rva> - set current address for assembly.
[email protected]4b3d192b2011-11-08 20:32:2625 MAKEPERELOCS, // Generates a base relocation table.
26 MAKEELFRELOCS, // Generates a base relocation table.
[email protected]04ca1bc2009-05-08 23:00:2927 DEFBYTE, // DEFBYTE <value> - emit a byte literal.
28 REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'.
29 ABS32, // REL32 <label> - emit am abs32 encoded reference to 'label'.
30 LAST_OP
31};
32
33// Base class for instructions. Because we have so many instructions we want to
34// keep them as small as possible. For this reason we avoid virtual functions.
35//
36class Instruction {
37 public:
38 OP op() const { return static_cast<OP>(op_); }
39
40 protected:
41 explicit Instruction(OP op) : op_(op), info_(0) {}
42 Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
43
44 uint32 op_ : 4; // A few bits to store the OP code.
45 uint32 info_ : 28; // Remaining bits in first word available to subclass.
46
47 private:
48 DISALLOW_COPY_AND_ASSIGN(Instruction);
49};
50
51namespace {
52
53// Sets the current address for the emitting instructions.
54class OriginInstruction : public Instruction {
55 public:
56 explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
57 RVA origin_rva() const { return rva_; }
58 private:
59 RVA rva_;
60};
61
[email protected]4b3d192b2011-11-08 20:32:2662// Emits an entire PE base relocation table.
63class PeRelocsInstruction : public Instruction {
[email protected]04ca1bc2009-05-08 23:00:2964 public:
[email protected]4b3d192b2011-11-08 20:32:2665 PeRelocsInstruction() : Instruction(MAKEPERELOCS) {}
66};
67
68// Emits an ELF relocation table.
69class ElfRelocsInstruction : public Instruction {
70 public:
71 ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {}
[email protected]04ca1bc2009-05-08 23:00:2972};
73
74// Emits a single byte.
75class ByteInstruction : public Instruction {
76 public:
77 explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
78 uint8 byte_value() const { return info_; }
79};
80
81// A ABS32 to REL32 instruction emits a reference to a label's address.
82class InstructionWithLabel : public Instruction {
83 public:
84 InstructionWithLabel(OP op, Label* label)
85 : Instruction(op, 0), label_(label) {
86 if (label == NULL) NOTREACHED();
87 }
88 Label* label() const { return label_; }
89 private:
90 Label* label_;
91};
92
93} // namespace
94
95AssemblyProgram::AssemblyProgram()
[email protected]43a9e242011-04-06 17:42:4596 : image_base_(0) {
[email protected]04ca1bc2009-05-08 23:00:2997}
98
99static void DeleteContainedLabels(const RVAToLabel& labels) {
100 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p)
101 delete p->second;
102}
103
104AssemblyProgram::~AssemblyProgram() {
105 for (size_t i = 0; i < instructions_.size(); ++i) {
106 Instruction* instruction = instructions_[i];
107 if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_.
108 delete instruction;
109 }
[email protected]43a9e242011-04-06 17:42:45110 if (byte_instruction_cache_.get()) {
[email protected]04ca1bc2009-05-08 23:00:29111 for (size_t i = 0; i < 256; ++i)
112 delete byte_instruction_cache_[i];
[email protected]04ca1bc2009-05-08 23:00:29113 }
114 DeleteContainedLabels(rel32_labels_);
115 DeleteContainedLabels(abs32_labels_);
116}
117
[email protected]4b3d192b2011-11-08 20:32:26118CheckBool AssemblyProgram::EmitPeRelocsInstruction() {
119 return Emit(new(std::nothrow) PeRelocsInstruction());
120}
121
122CheckBool AssemblyProgram::EmitElfRelocationInstruction() {
123 return Emit(new(std::nothrow) ElfRelocsInstruction());
[email protected]04ca1bc2009-05-08 23:00:29124}
125
[email protected]43a9e242011-04-06 17:42:45126CheckBool AssemblyProgram::EmitOriginInstruction(RVA rva) {
127 return Emit(new(std::nothrow) OriginInstruction(rva));
[email protected]04ca1bc2009-05-08 23:00:29128}
129
[email protected]43a9e242011-04-06 17:42:45130CheckBool AssemblyProgram::EmitByteInstruction(uint8 byte) {
131 return Emit(GetByteInstruction(byte));
[email protected]04ca1bc2009-05-08 23:00:29132}
133
[email protected]43a9e242011-04-06 17:42:45134CheckBool AssemblyProgram::EmitRel32(Label* label) {
135 return Emit(new(std::nothrow) InstructionWithLabel(REL32, label));
[email protected]04ca1bc2009-05-08 23:00:29136}
137
[email protected]43a9e242011-04-06 17:42:45138CheckBool AssemblyProgram::EmitAbs32(Label* label) {
139 return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label));
[email protected]04ca1bc2009-05-08 23:00:29140}
141
142Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
143 return FindLabel(rva, &abs32_labels_);
144}
145
146Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
147 return FindLabel(rva, &rel32_labels_);
148}
149
150void AssemblyProgram::DefaultAssignIndexes() {
151 DefaultAssignIndexes(&abs32_labels_);
152 DefaultAssignIndexes(&rel32_labels_);
153}
154
155void AssemblyProgram::UnassignIndexes() {
156 UnassignIndexes(&abs32_labels_);
157 UnassignIndexes(&rel32_labels_);
158}
159
160void AssemblyProgram::AssignRemainingIndexes() {
161 AssignRemainingIndexes(&abs32_labels_);
162 AssignRemainingIndexes(&rel32_labels_);
163}
164
165Label* AssemblyProgram::InstructionAbs32Label(
166 const Instruction* instruction) const {
167 if (instruction->op() == ABS32)
168 return static_cast<const InstructionWithLabel*>(instruction)->label();
169 return NULL;
170}
171
172Label* AssemblyProgram::InstructionRel32Label(
173 const Instruction* instruction) const {
174 if (instruction->op() == REL32)
175 return static_cast<const InstructionWithLabel*>(instruction)->label();
176 return NULL;
177}
178
[email protected]43a9e242011-04-06 17:42:45179CheckBool AssemblyProgram::Emit(Instruction* instruction) {
180 if (!instruction)
181 return false;
182 bool ok = instructions_.push_back(instruction);
183 if (!ok)
184 delete instruction;
185 return ok;
186}
187
[email protected]04ca1bc2009-05-08 23:00:29188Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
189 Label*& slot = (*labels)[rva];
[email protected]43a9e242011-04-06 17:42:45190 if (slot == NULL) {
191 slot = new(std::nothrow) Label(rva);
[email protected]04ca1bc2009-05-08 23:00:29192 }
193 return slot;
194}
195
196void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
197 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
198 Label* current = p->second;
199 current->index_ = Label::kNoIndex;
200 }
201}
202
203// DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
204// address order.
205//
206void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
207 int index = 0;
208 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
209 Label* current = p->second;
210 if (current->index_ != Label::kNoIndex)
211 NOTREACHED();
212 current->index_ = index;
213 ++index;
214 }
215}
216
217// AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
218// yet assigned an index.
219//
220void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
221 // An address table compresses best when each index is associated with an
222 // address that is slight larger than the previous index.
223
224 // First see which indexes have not been used. The 'available' vector could
225 // grow even bigger, but the number of addresses is a better starting size
226 // than empty.
227 std::vector<bool> available(labels->size(), true);
228 int used = 0;
229
230 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
[email protected]54f1b822009-07-18 03:28:40231 int index = p->second->index_;
[email protected]04ca1bc2009-05-08 23:00:29232 if (index != Label::kNoIndex) {
[email protected]54f1b822009-07-18 03:28:40233 while (static_cast<size_t>(index) >= available.size())
[email protected]04ca1bc2009-05-08 23:00:29234 available.push_back(true);
235 available.at(index) = false;
236 ++used;
237 }
238 }
239
[email protected]18cca202010-10-21 20:40:58240 VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";
[email protected]04ca1bc2009-05-08 23:00:29241
242 // Are there any unused labels that happen to be adjacent following a used
243 // label?
244 //
245 int fill_forward_count = 0;
246 Label* prev = 0;
247 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
248 Label* current = p->second;
249 if (current->index_ == Label::kNoIndex) {
[email protected]104a6082010-12-21 01:03:43250 int index = 0;
[email protected]04ca1bc2009-05-08 23:00:29251 if (prev && prev->index_ != Label::kNoIndex)
252 index = prev->index_ + 1;
[email protected]104a6082010-12-21 01:03:43253 if (index < static_cast<int>(available.size()) && available.at(index)) {
[email protected]04ca1bc2009-05-08 23:00:29254 current->index_ = index;
255 available.at(index) = false;
256 ++fill_forward_count;
257 }
258 }
259 prev = current;
260 }
261
262 // Are there any unused labels that happen to be adjacent preceeding a used
263 // label?
264 //
265 int fill_backward_count = 0;
[email protected]04ca1bc2009-05-08 23:00:29266 prev = 0;
267 for (RVAToLabel::reverse_iterator p = labels->rbegin();
268 p != labels->rend();
269 ++p) {
270 Label* current = p->second;
271 if (current->index_ == Label::kNoIndex) {
272 int prev_index;
273 if (prev)
274 prev_index = prev->index_;
275 else
[email protected]104a6082010-12-21 01:03:43276 prev_index = static_cast<uint32>(available.size());
[email protected]04ca1bc2009-05-08 23:00:29277 if (prev_index != 0 &&
278 prev_index != Label::kNoIndex &&
279 available.at(prev_index - 1)) {
280 current->index_ = prev_index - 1;
281 available.at(current->index_) = false;
282 ++fill_backward_count;
283 }
284 }
285 prev = current;
286 }
287
288 // Fill in any remaining indexes
289 int fill_infill_count = 0;
290 int index = 0;
291 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
292 Label* current = p->second;
293 if (current->index_ == Label::kNoIndex) {
294 while (!available.at(index)) {
295 ++index;
296 }
297 current->index_ = index;
298 available.at(index) = false;
299 ++index;
300 ++fill_infill_count;
301 }
302 }
303
[email protected]18cca202010-10-21 20:40:58304 VLOG(1) << " fill forward " << fill_forward_count
305 << " backward " << fill_backward_count
306 << " infill " << fill_infill_count;
[email protected]04ca1bc2009-05-08 23:00:29307}
308
[email protected]c8240b12011-03-22 20:19:49309typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
[email protected]04ca1bc2009-05-08 23:00:29310
[email protected]6c131952011-03-03 23:39:32311#if defined(OS_WIN)
312__declspec(noinline)
313#endif
[email protected]c8240b12011-03-22 20:19:49314static CheckBool DefineLabels(const RVAToLabel& labels,
315 EncodedProgram* encoded_format,
316 DefineLabelMethod define_label) {
317 bool ok = true;
318 for (RVAToLabel::const_iterator p = labels.begin();
319 ok && p != labels.end();
320 ++p) {
[email protected]04ca1bc2009-05-08 23:00:29321 Label* label = p->second;
[email protected]c8240b12011-03-22 20:19:49322 ok = (encoded_format->*define_label)(label->index_, label->rva_);
[email protected]04ca1bc2009-05-08 23:00:29323 }
[email protected]c8240b12011-03-22 20:19:49324 return ok;
[email protected]04ca1bc2009-05-08 23:00:29325}
326
327EncodedProgram* AssemblyProgram::Encode() const {
[email protected]43a9e242011-04-06 17:42:45328 scoped_ptr<EncodedProgram> encoded(new(std::nothrow) EncodedProgram());
329 if (!encoded.get())
330 return NULL;
331
[email protected]04ca1bc2009-05-08 23:00:29332 encoded->set_image_base(image_base_);
[email protected]c8240b12011-03-22 20:19:49333
334 if (!DefineLabels(abs32_labels_, encoded.get(),
335 &EncodedProgram::DefineAbs32Label) ||
336 !DefineLabels(rel32_labels_, encoded.get(),
337 &EncodedProgram::DefineRel32Label)) {
338 return NULL;
339 }
340
[email protected]04ca1bc2009-05-08 23:00:29341 encoded->EndLabels();
342
343 for (size_t i = 0; i < instructions_.size(); ++i) {
344 Instruction* instruction = instructions_[i];
345
346 switch (instruction->op()) {
347 case ORIGIN: {
348 OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
[email protected]c8240b12011-03-22 20:19:49349 if (!encoded->AddOrigin(org->origin_rva()))
350 return NULL;
[email protected]04ca1bc2009-05-08 23:00:29351 break;
352 }
353 case DEFBYTE: {
354 uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
[email protected]c8240b12011-03-22 20:19:49355 if (!encoded->AddCopy(1, &b))
356 return NULL;
[email protected]04ca1bc2009-05-08 23:00:29357 break;
358 }
359 case REL32: {
360 Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
[email protected]c8240b12011-03-22 20:19:49361 if (!encoded->AddRel32(label->index_))
362 return NULL;
[email protected]04ca1bc2009-05-08 23:00:29363 break;
364 }
365 case ABS32: {
366 Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
[email protected]c8240b12011-03-22 20:19:49367 if (!encoded->AddAbs32(label->index_))
368 return NULL;
[email protected]04ca1bc2009-05-08 23:00:29369 break;
370 }
[email protected]4b3d192b2011-11-08 20:32:26371 case MAKEPERELOCS: {
372 if (!encoded->AddPeMakeRelocs())
373 return NULL;
374 break;
375 }
376 case MAKEELFRELOCS: {
377 if (!encoded->AddElfMakeRelocs())
[email protected]c8240b12011-03-22 20:19:49378 return NULL;
[email protected]04ca1bc2009-05-08 23:00:29379 break;
380 }
381 default: {
382 NOTREACHED() << "Unknown Insn OP kind";
383 }
384 }
385 }
386
[email protected]c8240b12011-03-22 20:19:49387 return encoded.release();
[email protected]04ca1bc2009-05-08 23:00:29388}
389
390Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
[email protected]43a9e242011-04-06 17:42:45391 if (!byte_instruction_cache_.get()) {
392 byte_instruction_cache_.reset(new(std::nothrow) Instruction*[256]);
393 if (!byte_instruction_cache_.get())
394 return NULL;
395
[email protected]04ca1bc2009-05-08 23:00:29396 for (int i = 0; i < 256; ++i) {
[email protected]43a9e242011-04-06 17:42:45397 byte_instruction_cache_[i] =
398 new(std::nothrow) ByteInstruction(static_cast<uint8>(i));
399 if (!byte_instruction_cache_[i]) {
400 for (int j = 0; j < i; ++j)
401 delete byte_instruction_cache_[j];
402 byte_instruction_cache_.reset();
403 return NULL;
404 }
[email protected]04ca1bc2009-05-08 23:00:29405 }
406 }
407
408 return byte_instruction_cache_[byte];
409}
410
411////////////////////////////////////////////////////////////////////////////////
412
413Status Encode(AssemblyProgram* program, EncodedProgram** output) {
414 *output = NULL;
415 EncodedProgram *encoded = program->Encode();
416 if (encoded) {
417 *output = encoded;
418 return C_OK;
419 } else {
420 return C_GENERAL_ERROR;
421 }
422}
423
424} // namespace courgette