blob: f6490d9423c00c6cc6063e0f827c82f891763099 [file] [log] [blame]
[email protected]39ed9732013-06-20 10:17:531// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "courgette/disassembler_elf_32_arm.h"
6
aviab98dcc92015-12-21 19:35:337#include <stddef.h>
8#include <stdint.h>
9
[email protected]39ed9732013-06-20 10:17:5310#include <algorithm>
11#include <string>
12#include <vector>
13
[email protected]39ed9732013-06-20 10:17:5314#include "base/logging.h"
15
16#include "courgette/assembly_program.h"
17#include "courgette/courgette.h"
18#include "courgette/encoded_program.h"
19
20namespace courgette {
21
aviab98dcc92015-12-21 19:35:3322CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
23 uint32_t arm_op,
24 RVA rva,
25 uint16_t* c_op,
26 uint32_t* addr) {
[email protected]2b637b62013-08-01 00:11:2427 // This method takes an ARM or thumb opcode, extracts the relative
28 // target address from it (addr), and creates a corresponding
29 // Courgette opcode (c_op).
30 //
31 // Details on ARM the opcodes, and how the relative targets are
32 // computed were taken from the "ARM Architecture Reference Manual",
33 // section A4.1.5 and the "Thumb-2 supplement", section 4.6.12.
34 // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes.
35 switch (type) {
36 case ARM_OFF8: {
[email protected]144c8e92013-07-23 21:18:1937 // The offset is given by lower 8 bits of the op. It is a 9-bit
38 // offset, shifted right one bit and signed extended.
aviab98dcc92015-12-21 19:35:3339 uint32_t temp = (arm_op & 0x00FF) << 1;
[email protected]144c8e92013-07-23 21:18:1940 if (temp & 0x0100)
41 temp |= 0xFFFFFE00;
42 temp += 4; // Offset from _next_ PC.
[email protected]2b637b62013-08-01 00:11:2443 fflush(stdout);
44
45 (*addr) = temp;
aviab98dcc92015-12-21 19:35:3346 (*c_op) = static_cast<uint16_t>(arm_op >> 8) | 0x1000;
[email protected]144c8e92013-07-23 21:18:1947 break;
[email protected]2b637b62013-08-01 00:11:2448 }
49 case ARM_OFF11: {
[email protected]144c8e92013-07-23 21:18:1950 // The offset is given by lower 11 bits of the op, and is a
51 // 12-bit offset, shifted right one bit and sign extended.
aviab98dcc92015-12-21 19:35:3352 uint32_t temp = (arm_op & 0x07FF) << 1;
[email protected]144c8e92013-07-23 21:18:1953 if (temp & 0x00000800)
54 temp |= 0xFFFFF000;
55 temp += 4; // Offset from _next_ PC.
[email protected]2b637b62013-08-01 00:11:2456
57 (*addr) = temp;
aviab98dcc92015-12-21 19:35:3358 (*c_op) = static_cast<uint16_t>(arm_op >> 11) | 0x2000;
[email protected]144c8e92013-07-23 21:18:1959 break;
[email protected]2b637b62013-08-01 00:11:2460 }
61 case ARM_OFF24: {
62 // The offset is given by the lower 24-bits of the op, shifted
63 // left 2 bits, and sign extended.
aviab98dcc92015-12-21 19:35:3364 uint32_t temp = (arm_op & 0x00FFFFFF) << 2;
[email protected]2b637b62013-08-01 00:11:2465 if (temp & 0x02000000)
66 temp |= 0xFC000000;
67 temp += 8;
68
69 (*addr) = temp;
70 (*c_op) = (arm_op >> 24) | 0x3000;
71 break;
72 }
73 case ARM_OFF25: {
aviab98dcc92015-12-21 19:35:3374 uint32_t temp = 0;
[email protected]2b637b62013-08-01 00:11:2475 temp |= (arm_op & 0x000007FF) << 1; // imm11
76 temp |= (arm_op & 0x03FF0000) >> 4; // imm10
77
aviab98dcc92015-12-21 19:35:3378 uint32_t S = (arm_op & (1 << 26)) >> 26;
79 uint32_t j2 = (arm_op & (1 << 11)) >> 11;
80 uint32_t j1 = (arm_op & (1 << 13)) >> 13;
[email protected]2b637b62013-08-01 00:11:2481 bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0;
82 bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0;
83
aviab98dcc92015-12-21 19:35:3384 uint32_t i2 = ~(j2 ^ S) & 1;
85 uint32_t i1 = ~(j1 ^ S) & 1;
[email protected]2b637b62013-08-01 00:11:2486 bool toARM = bit14 && !bit12;
87
88 temp |= (S << 24) | (i1 << 23) | (i2 << 22);
89
90 if (temp & 0x01000000) // sign extension
91 temp |= 0xFE000000;
aviab98dcc92015-12-21 19:35:3392 uint32_t prefetch;
[email protected]2b637b62013-08-01 00:11:2493 if (toARM) {
94 // Align PC on 4-byte boundary
aviab98dcc92015-12-21 19:35:3395 uint32_t align4byte = (rva % 4) ? 2 : 4;
[email protected]2b637b62013-08-01 00:11:2496 prefetch = align4byte;
97 } else {
98 prefetch = 4;
99 }
100 temp += prefetch;
101 (*addr) = temp;
102
aviab98dcc92015-12-21 19:35:33103 uint32_t temp2 = 0x4000;
[email protected]2b637b62013-08-01 00:11:24104 temp2 |= (arm_op & (1 << 12)) >> 12;
105 temp2 |= (arm_op & (1 << 14)) >> 13;
106 temp2 |= (arm_op & (1 << 15)) >> 13;
107 temp2 |= (arm_op & 0xF8000000) >> 24;
108 temp2 |= (prefetch & 0x0000000F) << 8;
aviab98dcc92015-12-21 19:35:33109 (*c_op) = static_cast<uint16_t>(temp2);
[email protected]2b637b62013-08-01 00:11:24110 break;
111 }
112 case ARM_OFF21: {
aviab98dcc92015-12-21 19:35:33113 uint32_t temp = 0;
[email protected]11336c02013-09-25 19:05:51114 temp |= (arm_op & 0x000007FF) << 1; // imm11
115 temp |= (arm_op & 0x003F0000) >> 4; // imm6
[email protected]2b637b62013-08-01 00:11:24116
aviab98dcc92015-12-21 19:35:33117 uint32_t S = (arm_op & (1 << 26)) >> 26;
118 uint32_t j2 = (arm_op & (1 << 11)) >> 11;
119 uint32_t j1 = (arm_op & (1 << 13)) >> 13;
[email protected]2b637b62013-08-01 00:11:24120
121 temp |= (S << 20) | (j1 << 19) | (j2 << 18);
122
[email protected]11336c02013-09-25 19:05:51123 if (temp & 0x00100000) // sign extension
[email protected]2b637b62013-08-01 00:11:24124 temp |= 0xFFE00000;
125 temp += 4;
126 (*addr) = temp;
127
aviab98dcc92015-12-21 19:35:33128 uint32_t temp2 = 0x5000;
[email protected]2b637b62013-08-01 00:11:24129 temp2 |= (arm_op & 0x03C00000) >> 22; // just save the cond
aviab98dcc92015-12-21 19:35:33130 (*c_op) = static_cast<uint16_t>(temp2);
[email protected]2b637b62013-08-01 00:11:24131 break;
132 }
[email protected]144c8e92013-07-23 21:18:19133 default:
134 return false;
135 }
[email protected]144c8e92013-07-23 21:18:19136 return true;
137}
138
aviab98dcc92015-12-21 19:35:33139CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type,
140 uint16_t c_op,
141 uint32_t addr,
142 uint32_t* arm_op) {
[email protected]2b637b62013-08-01 00:11:24143 // Reverses the process in the compress() method. Takes the
144 // Courgette op and relative address and reconstructs the original
145 // ARM or thumb op.
146 switch (type) {
147 case ARM_OFF8:
148 (*arm_op) = ((c_op & 0x0FFF) << 8) | (((addr - 4) >> 1) & 0x000000FF);
149 break;
150 case ARM_OFF11:
151 (*arm_op) = ((c_op & 0x0FFF) << 11) | (((addr - 4) >> 1) & 0x000007FF);
152 break;
153 case ARM_OFF24:
154 (*arm_op) = ((c_op & 0x0FFF) << 24) | (((addr - 8) >> 2) & 0x00FFFFFF);
155 break;
156 case ARM_OFF25: {
aviab98dcc92015-12-21 19:35:33157 uint32_t temp = 0;
[email protected]2b637b62013-08-01 00:11:24158 temp |= (c_op & (1 << 0)) << 12;
159 temp |= (c_op & (1 << 1)) << 13;
160 temp |= (c_op & (1 << 2)) << 13;
161 temp |= (c_op & (0xF8000000 >> 24)) << 24;
162
aviab98dcc92015-12-21 19:35:33163 uint32_t prefetch = (c_op & 0x0F00) >> 8;
[email protected]2b637b62013-08-01 00:11:24164 addr -= prefetch;
165
166 addr &= 0x01FFFFFF;
167
aviab98dcc92015-12-21 19:35:33168 uint32_t S = (addr & (1 << 24)) >> 24;
169 uint32_t i1 = (addr & (1 << 23)) >> 23;
170 uint32_t i2 = (addr & (1 << 22)) >> 22;
[email protected]2b637b62013-08-01 00:11:24171
aviab98dcc92015-12-21 19:35:33172 uint32_t j1 = ((~i1) ^ S) & 1;
173 uint32_t j2 = ((~i2) ^ S) & 1;
[email protected]2b637b62013-08-01 00:11:24174
175 temp |= S << 26;
176 temp |= j2 << 11;
177 temp |= j1 << 13;
178
179 temp |= (addr & (0x000007FF << 1)) >> 1;
180 temp |= (addr & (0x03FF0000 >> 4)) << 4;
181
182 (*arm_op) = temp;
183 break;
184 }
185 case ARM_OFF21: {
aviab98dcc92015-12-21 19:35:33186 uint32_t temp = 0xF0008000;
[email protected]2b637b62013-08-01 00:11:24187 temp |= (c_op & (0x03C00000 >> 22)) << 22;
188
189 addr -= 4;
190 addr &= 0x001FFFFF;
191
aviab98dcc92015-12-21 19:35:33192 uint32_t S = (addr & (1 << 20)) >> 20;
193 uint32_t j1 = (addr & (1 << 19)) >> 19;
194 uint32_t j2 = (addr & (1 << 18)) >> 18;
[email protected]2b637b62013-08-01 00:11:24195
196 temp |= S << 26;
197 temp |= j2 << 11;
198 temp |= j1 << 13;
199
200 temp |= (addr & (0x000007FF << 1)) >> 1;
201 temp |= (addr & (0x003F0000 >> 4)) << 4;
202
203 (*arm_op) = temp;
204 break;
205 }
206 default:
207 return false;
208 }
209 return true;
210}
211
aviab98dcc92015-12-21 19:35:33212uint16_t DisassemblerElf32ARM::TypedRVAARM::op_size() const {
[email protected]2b637b62013-08-01 00:11:24213 switch (type_) {
214 case ARM_OFF8:
215 return 2;
216 case ARM_OFF11:
217 return 2;
218 case ARM_OFF24:
219 return 4;
220 case ARM_OFF25:
221 return 4;
222 case ARM_OFF21:
223 return 4;
224 default:
[email protected]0ef486b2014-07-08 08:40:56225 return 0xFFFF;
[email protected]2b637b62013-08-01 00:11:24226 }
227}
228
229CheckBool DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget(
aviab98dcc92015-12-21 19:35:33230 const uint8_t* op_pointer) {
[email protected]2b637b62013-08-01 00:11:24231 arm_op_ = op_pointer;
232 switch (type_) {
233 case ARM_OFF8:
234 // Fall through
235 case ARM_OFF11: {
236 RVA relative_target;
237 CheckBool ret = Compress(type_, Read16LittleEndian(op_pointer), rva(),
238 &c_op_, &relative_target);
239 set_relative_target(relative_target);
240 return ret;
241 }
242 case ARM_OFF24: {
243 RVA relative_target;
244 CheckBool ret = Compress(type_, Read32LittleEndian(op_pointer), rva(),
245 &c_op_, &relative_target);
246 set_relative_target(relative_target);
247 return ret;
248 }
249 case ARM_OFF25:
250 // Fall through
251 case ARM_OFF21: {
252 // A thumb-2 op is 32 bits stored as two 16-bit words
aviab98dcc92015-12-21 19:35:33253 uint32_t pval = (Read16LittleEndian(op_pointer) << 16) |
254 Read16LittleEndian(op_pointer + 2);
[email protected]2b637b62013-08-01 00:11:24255 RVA relative_target;
256 CheckBool ret = Compress(type_, pval, rva(), &c_op_, &relative_target);
257 set_relative_target(relative_target);
258 return ret;
259 }
260 default:
261 return false;
262 }
263}
264
265CheckBool DisassemblerElf32ARM::TypedRVAARM::EmitInstruction(
266 AssemblyProgram* program,
267 RVA target_rva) {
268 return program->EmitRel32ARM(c_op(),
269 program->FindOrMakeRel32Label(target_rva),
270 arm_op_,
271 op_size());
272}
273
[email protected]39ed9732013-06-20 10:17:53274DisassemblerElf32ARM::DisassemblerElf32ARM(const void* start, size_t length)
275 : DisassemblerElf32(start, length) {
276}
277
278// Convert an ELF relocation struction into an RVA
279CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const {
280
281 // The rightmost byte of r_info is the type...
282 elf32_rel_arm_type_values type =
283 (elf32_rel_arm_type_values)(unsigned char)rel.r_info;
284
285 // The other 3 bytes of r_info are the symbol
aviab98dcc92015-12-21 19:35:33286 uint32_t symbol = rel.r_info >> 8;
[email protected]39ed9732013-06-20 10:17:53287
288 switch(type)
289 {
290 case R_ARM_RELATIVE:
291 if (symbol != 0)
292 return false;
293
294 // This is a basic ABS32 relocation address
295 *result = rel.r_offset;
296 return true;
297
298 default:
299 return false;
300 }
[email protected]39ed9732013-06-20 10:17:53301}
302
303CheckBool DisassemblerElf32ARM::ParseRelocationSection(
304 const Elf32_Shdr *section_header,
305 AssemblyProgram* program) {
[email protected]a8e80412013-07-18 22:07:53306 // This method compresses a contiguous stretch of R_ARM_RELATIVE
307 // entries in the relocation table with a Courgette relocation table
308 // instruction. It skips any entries at the beginning that appear
309 // in a section that Courgette doesn't support, e.g. INIT.
310 // Specifically, the entries should be
311 // (1) In the same relocation table
312 // (2) Are consecutive
313 // (3) Are sorted in memory address order
[email protected]39ed9732013-06-20 10:17:53314 //
315 // Happily, this is normally the case, but it's not required by spec
316 // so we check, and just don't do it if we don't match up.
[email protected]a8e80412013-07-18 22:07:53317 //
[email protected]39ed9732013-06-20 10:17:53318 // The expectation is that one relocation section will contain
[email protected]a8e80412013-07-18 22:07:53319 // all of our R_ARM_RELATIVE entries in the expected order followed
[email protected]39ed9732013-06-20 10:17:53320 // by assorted other entries we can't use special handling for.
321
322 bool match = true;
323
324 // Walk all the bytes in the section, matching relocation table or not
325 size_t file_offset = section_header->sh_offset;
326 size_t section_end = section_header->sh_offset + section_header->sh_size;
327
328 Elf32_Rel *section_relocs_iter =
329 (Elf32_Rel *)OffsetToPointer(section_header->sh_offset);
330
aviab98dcc92015-12-21 19:35:33331 uint32_t section_relocs_count =
332 section_header->sh_size / section_header->sh_entsize;
[email protected]39ed9732013-06-20 10:17:53333
334 if (abs32_locations_.size() > section_relocs_count)
335 match = false;
336
[email protected]a8e80412013-07-18 22:07:53337 if (!abs32_locations_.empty()) {
338 std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin();
[email protected]39ed9732013-06-20 10:17:53339
aviab98dcc92015-12-21 19:35:33340 for (uint32_t i = 0; i < section_relocs_count; i++) {
[email protected]a8e80412013-07-18 22:07:53341 if (section_relocs_iter->r_offset == *reloc_iter)
342 break;
[email protected]39ed9732013-06-20 10:17:53343
[email protected]a8e80412013-07-18 22:07:53344 if (!ParseSimpleRegion(file_offset, file_offset + sizeof(Elf32_Rel),
345 program))
346 return false;
347
348 file_offset += sizeof(Elf32_Rel);
349 ++section_relocs_iter;
350 }
351
352 while (match && (reloc_iter != abs32_locations_.end())) {
353 if (section_relocs_iter->r_info != R_ARM_RELATIVE ||
354 section_relocs_iter->r_offset != *reloc_iter)
355 match = false;
356
357 section_relocs_iter++;
358 reloc_iter++;
359 file_offset += sizeof(Elf32_Rel);
360 }
361
362 if (match) {
363 // Skip over relocation tables
364 if (!program->EmitElfARMRelocationInstruction())
365 return false;
366 }
[email protected]39ed9732013-06-20 10:17:53367 }
368
369 return ParseSimpleRegion(file_offset, section_end, program);
370}
371
372CheckBool DisassemblerElf32ARM::ParseRel32RelocsFromSection(
373 const Elf32_Shdr* section_header) {
aviab98dcc92015-12-21 19:35:33374 uint32_t start_file_offset = section_header->sh_offset;
375 uint32_t end_file_offset = start_file_offset + section_header->sh_size;
[email protected]2b637b62013-08-01 00:11:24376
aviab98dcc92015-12-21 19:35:33377 const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
378 const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
[email protected]2b637b62013-08-01 00:11:24379
380 // Quick way to convert from Pointer to RVA within a single Section is to
381 // subtract 'pointer_to_rva'.
aviab98dcc92015-12-21 19:35:33382 const uint8_t* const adjust_pointer_to_rva =
383 start_pointer - section_header->sh_addr;
[email protected]2b637b62013-08-01 00:11:24384
385 // Find the rel32 relocations.
aviab98dcc92015-12-21 19:35:33386 const uint8_t* p = start_pointer;
[email protected]2b637b62013-08-01 00:11:24387 bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it
388 while (p < end_pointer) {
389 // Heuristic discovery of rel32 locations in instruction stream: are the
390 // next few bytes the start of an instruction containing a rel32
391 // addressing mode?
392
393 TypedRVAARM* rel32_rva = NULL;
[email protected]093688992014-04-03 11:35:46394 RVA target_rva = 0;
[email protected]2b637b62013-08-01 00:11:24395 bool found = false;
396
397 // 16-bit thumb ops
398 if (!found && (p + 3) <= end_pointer) {
aviab98dcc92015-12-21 19:35:33399 uint16_t pval = Read16LittleEndian(p);
[email protected]2b637b62013-08-01 00:11:24400 if ((pval & 0xF000) == 0xD000) {
401 RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
402
403 rel32_rva = new TypedRVAARM(ARM_OFF8, rva);
aviab98dcc92015-12-21 19:35:33404 if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
[email protected]2b637b62013-08-01 00:11:24405 return false;
406 }
407 target_rva = rel32_rva->rva() + rel32_rva->relative_target();
408 found = true;
409 } else if ((pval & 0xF800) == 0xE000) {
410 RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
411
412 rel32_rva = new TypedRVAARM(ARM_OFF11, rva);
aviab98dcc92015-12-21 19:35:33413 if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
[email protected]2b637b62013-08-01 00:11:24414 return false;
415 }
416 target_rva = rel32_rva->rva() + rel32_rva->relative_target();
417 found = true;
418 }
419 }
420
421 // thumb-2 ops comprised of two 16-bit words
422 if (!found && (p + 5) <= end_pointer) {
423 // This is really two 16-bit words, not one 32-bit word.
aviab98dcc92015-12-21 19:35:33424 uint32_t pval = (Read16LittleEndian(p) << 16) | Read16LittleEndian(p + 2);
[email protected]2b637b62013-08-01 00:11:24425 if ((pval & 0xF8008000) == 0xF0008000) {
426 // Covers thumb-2's 32-bit conditional/unconditional branches
427
428 if ( (pval & (1 << 14)) || (pval & (1 << 12)) ) {
429 // A branch, with link, or with link and exchange.
430 RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
431
432 rel32_rva = new TypedRVAARM(ARM_OFF25, rva);
aviab98dcc92015-12-21 19:35:33433 if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
[email protected]2b637b62013-08-01 00:11:24434 return false;
435 }
436 target_rva = rel32_rva->rva() + rel32_rva->relative_target();
437 found = true;
438 } else {
439 // TODO(paulgazz) make sure cond is not 111
440 // A conditional branch instruction
441 RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
442
443 rel32_rva = new TypedRVAARM(ARM_OFF21, rva);
aviab98dcc92015-12-21 19:35:33444 if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
[email protected]2b637b62013-08-01 00:11:24445 return false;
446 }
447 target_rva = rel32_rva->rva() + rel32_rva->relative_target();
448 found = true;
449 }
450 }
451 }
452
453 // 32-bit ARM ops
454 if (!found && on_32bit && (p + 5) <= end_pointer) {
aviab98dcc92015-12-21 19:35:33455 uint32_t pval = Read32LittleEndian(p);
[email protected]2b637b62013-08-01 00:11:24456 if ((pval & 0x0E000000) == 0x0A000000) {
457 // Covers both 0x0A 0x0B ARM relative branches
458 RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
459
460 rel32_rva = new TypedRVAARM(ARM_OFF24, rva);
aviab98dcc92015-12-21 19:35:33461 if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
[email protected]2b637b62013-08-01 00:11:24462 return false;
463 }
464 target_rva = rel32_rva->rva() + rel32_rva->relative_target();
465 found = true;
466 }
467 }
468
469 if (found && IsValidRVA(target_rva)) {
470 rel32_locations_.push_back(rel32_rva);
471#if COURGETTE_HISTOGRAM_TARGETS
472 ++rel32_target_rvas_[target_rva];
473#endif
474 p += rel32_rva->op_size();
475
476 // A tricky way to update the on_32bit flag. Here is the truth table:
477 // on_32bit | on_32bit size is 4
478 // ---------+---------------------
479 // 1 | 0 0
480 // 0 | 0 1
481 // 0 | 1 0
482 // 1 | 1 1
483 on_32bit = (~(on_32bit ^ (rel32_rva->op_size() == 4))) != 0;
484 } else {
485 // Move 2 bytes at a time, but track 32-bit boundaries
486 p += 2;
487 on_32bit = ((on_32bit + 1) % 2) != 0;
488 }
489 }
490
[email protected]39ed9732013-06-20 10:17:53491 return true;
492}
493
494} // namespace courgette