blob: c6139a88390dc6e43f153f553c8d14a55e94e2cb [file] [log] [blame]
[email protected]39ed9732013-06-20 10:17:531// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "courgette/disassembler_elf_32.h"
6
aviab98dcc92015-12-21 19:35:337#include <stddef.h>
8#include <stdint.h>
9
[email protected]39ed9732013-06-20 10:17:5310#include <algorithm>
11#include <string>
12#include <vector>
13
[email protected]39ed9732013-06-20 10:17:5314#include "base/logging.h"
[email protected]144c8e92013-07-23 21:18:1915#include "base/memory/scoped_vector.h"
[email protected]39ed9732013-06-20 10:17:5316
17#include "courgette/assembly_program.h"
18#include "courgette/courgette.h"
19#include "courgette/encoded_program.h"
20
21namespace courgette {
22
23DisassemblerElf32::DisassemblerElf32(const void* start, size_t length)
24 : Disassembler(start, length),
25 header_(NULL),
26 section_header_table_(NULL),
27 section_header_table_size_(0),
28 program_header_table_(NULL),
29 program_header_table_size_(0),
30 default_string_section_(NULL) {
31}
32
33bool DisassemblerElf32::ParseHeader() {
34 if (length() < sizeof(Elf32_Ehdr))
35 return Bad("Too small");
36
37 header_ = (Elf32_Ehdr *)start();
38
39 // Have magic for elf header?
40 if (header_->e_ident[0] != 0x7f ||
41 header_->e_ident[1] != 'E' ||
42 header_->e_ident[2] != 'L' ||
43 header_->e_ident[3] != 'F')
44 return Bad("No Magic Number");
45
46 if (header_->e_type != ET_EXEC &&
47 header_->e_type != ET_DYN)
48 return Bad("Not an executable file or shared library");
49
50 if (header_->e_machine != ElfEM())
51 return Bad("Not a supported architecture");
52
53 if (header_->e_version != 1)
54 return Bad("Unknown file version");
55
56 if (header_->e_shentsize != sizeof(Elf32_Shdr))
57 return Bad("Unexpected section header size");
58
halyavinc9de6f72015-03-24 15:40:1259 if (!IsArrayInBounds(header_->e_shoff, header_->e_shnum, sizeof(Elf32_Shdr)))
60 return Bad("Out of bounds section header table");
[email protected]39ed9732013-06-20 10:17:5361
62 section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff);
63 section_header_table_size_ = header_->e_shnum;
64
halyavinc9de6f72015-03-24 15:40:1265 if (!IsArrayInBounds(header_->e_phoff, header_->e_phnum, sizeof(Elf32_Phdr)))
66 return Bad("Out of bounds program header table");
[email protected]39ed9732013-06-20 10:17:5367
68 program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff);
69 program_header_table_size_ = header_->e_phnum;
70
halyavinc9de6f72015-03-24 15:40:1271 if (header_->e_shstrndx >= header_->e_shnum)
72 return Bad("Out of bounds string section index");
[email protected]39ed9732013-06-20 10:17:5373
74 default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx);
75
halyavinc9de6f72015-03-24 15:40:1276 if (!UpdateLength()) {
77 return Bad("Out of bounds section or segment");
78 }
[email protected]39ed9732013-06-20 10:17:5379
80 return Good();
81}
82
83bool DisassemblerElf32::Disassemble(AssemblyProgram* target) {
84 if (!ok())
85 return false;
86
87 // The Image Base is always 0 for ELF Executables
88 target->set_image_base(0);
89
90 if (!ParseAbs32Relocs())
91 return false;
92
93 if (!ParseRel32RelocsFromSections())
94 return false;
95
96 if (!ParseFile(target))
97 return false;
98
99 target->DefaultAssignIndexes();
100
101 return true;
102}
103
halyavinc9de6f72015-03-24 15:40:12104bool DisassemblerElf32::UpdateLength() {
105 Elf32_Off result = 0;
[email protected]39ed9732013-06-20 10:17:53106
107 // Find the end of the last section
108 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
109 const Elf32_Shdr *section_header = SectionHeader(section_id);
110
111 if (section_header->sh_type == SHT_NOBITS)
112 continue;
113
halyavinc9de6f72015-03-24 15:40:12114 if (!IsArrayInBounds(section_header->sh_offset, section_header->sh_size, 1))
115 return false;
[email protected]39ed9732013-06-20 10:17:53116
halyavinc9de6f72015-03-24 15:40:12117 Elf32_Off section_end = section_header->sh_offset + section_header->sh_size;
118 result = std::max(result, section_end);
[email protected]39ed9732013-06-20 10:17:53119 }
120
121 // Find the end of the last segment
122 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
123 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
124
halyavinc9de6f72015-03-24 15:40:12125 if (!IsArrayInBounds(segment_header->p_offset, segment_header->p_filesz, 1))
126 return false;
[email protected]39ed9732013-06-20 10:17:53127
halyavinc9de6f72015-03-24 15:40:12128 Elf32_Off segment_end = segment_header->p_offset + segment_header->p_filesz;
129 result = std::max(result, segment_end);
[email protected]39ed9732013-06-20 10:17:53130 }
131
halyavinc9de6f72015-03-24 15:40:12132 Elf32_Off section_table_end = header_->e_shoff +
133 (header_->e_shnum * sizeof(Elf32_Shdr));
134 result = std::max(result, section_table_end);
[email protected]39ed9732013-06-20 10:17:53135
halyavinc9de6f72015-03-24 15:40:12136 Elf32_Off segment_table_end = header_->e_phoff +
137 (header_->e_phnum * sizeof(Elf32_Phdr));
138 result = std::max(result, segment_table_end);
[email protected]39ed9732013-06-20 10:17:53139
halyavinc9de6f72015-03-24 15:40:12140 ReduceLength(result);
141 return true;
[email protected]39ed9732013-06-20 10:17:53142}
143
144CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const {
145
146 // It's valid if it's contained in any program segment
147 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
148 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
149
150 if (segment_header->p_type != PT_LOAD)
151 continue;
152
153 Elf32_Addr begin = segment_header->p_vaddr;
154 Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz;
155
156 if (rva >= begin && rva < end)
157 return true;
158 }
159
160 return false;
161}
162
163// Returns RVA for an in memory address, or NULL.
164CheckBool DisassemblerElf32::RVAToFileOffset(Elf32_Addr addr,
165 size_t* result) const {
166
167 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
168 Elf32_Addr begin = ProgramSegmentMemoryBegin(i);
169 Elf32_Addr end = begin + ProgramSegmentMemorySize(i);
170
171 if (addr >= begin && addr < end) {
172 Elf32_Addr offset = addr - begin;
173
174 if (offset < ProgramSegmentFileSize(i)) {
175 *result = ProgramSegmentFileOffset(i) + offset;
176 return true;
177 }
178 }
179 }
180
181 return false;
182}
183
184RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const {
185 // File offsets can be 64 bit values, but we are dealing with 32
186 // bit executables and so only need to support 32bit file sizes.
aviab98dcc92015-12-21 19:35:33187 uint32_t offset32 = (uint32_t)offset;
[email protected]39ed9732013-06-20 10:17:53188
189 for (int i = 0; i < SectionHeaderCount(); i++) {
190
191 const Elf32_Shdr *section_header = SectionHeader(i);
192
193 // These can appear to have a size in the file, but don't.
194 if (section_header->sh_type == SHT_NOBITS)
195 continue;
196
197 Elf32_Off section_begin = section_header->sh_offset;
198 Elf32_Off section_end = section_begin + section_header->sh_size;
199
200 if (offset32 >= section_begin && offset32 < section_end) {
201 return section_header->sh_addr + (offset32 - section_begin);
202 }
203 }
204
205 return 0;
206}
207
208CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas,
[email protected]144c8e92013-07-23 21:18:19209 std::vector<size_t>* offsets) {
[email protected]39ed9732013-06-20 10:17:53210 offsets->clear();
211
212 for (std::vector<RVA>::iterator rva = rvas->begin();
213 rva != rvas->end();
214 rva++) {
215
216 size_t offset;
217
218 if (!RVAToFileOffset(*rva, &offset))
219 return false;
220
221 offsets->push_back(offset);
222 }
223
224 return true;
225}
226
[email protected]144c8e92013-07-23 21:18:19227CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) {
228 for (ScopedVector<TypedRVA>::iterator rva = rvas->begin();
229 rva != rvas->end();
230 rva++) {
231
232 size_t offset;
233
234 if (!RVAToFileOffset((*rva)->rva(), &offset))
235 return false;
236
237 (*rva)->set_offset(offset);
238 }
239
240 return true;
241}
242
[email protected]39ed9732013-06-20 10:17:53243CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
244 // Walk all the bytes in the file, whether or not in a section.
aviab98dcc92015-12-21 19:35:33245 uint32_t file_offset = 0;
[email protected]39ed9732013-06-20 10:17:53246
247 std::vector<size_t> abs_offsets;
[email protected]39ed9732013-06-20 10:17:53248
249 if (!RVAsToOffsets(&abs32_locations_, &abs_offsets))
250 return false;
251
[email protected]144c8e92013-07-23 21:18:19252 if (!RVAsToOffsets(&rel32_locations_))
[email protected]39ed9732013-06-20 10:17:53253 return false;
254
255 std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin();
[email protected]144c8e92013-07-23 21:18:19256 ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin();
[email protected]39ed9732013-06-20 10:17:53257
258 std::vector<size_t>::iterator end_abs_offset = abs_offsets.end();
[email protected]144c8e92013-07-23 21:18:19259 ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end();
[email protected]39ed9732013-06-20 10:17:53260
261 for (int section_id = 0;
262 section_id < SectionHeaderCount();
263 section_id++) {
264
265 const Elf32_Shdr *section_header = SectionHeader(section_id);
266
Will Harris3e6fa972015-03-02 21:14:25267 if (section_header->sh_type == SHT_NOBITS)
268 continue;
269
[email protected]39ed9732013-06-20 10:17:53270 if (!ParseSimpleRegion(file_offset,
271 section_header->sh_offset,
272 program))
273 return false;
274 file_offset = section_header->sh_offset;
275
276 switch (section_header->sh_type) {
277 case SHT_REL:
278 if (!ParseRelocationSection(section_header, program))
279 return false;
280 file_offset = section_header->sh_offset + section_header->sh_size;
281 break;
282 case SHT_PROGBITS:
283 if (!ParseProgbitsSection(section_header,
284 &current_abs_offset, end_abs_offset,
[email protected]144c8e92013-07-23 21:18:19285 &current_rel, end_rel,
[email protected]39ed9732013-06-20 10:17:53286 program))
287 return false;
288 file_offset = section_header->sh_offset + section_header->sh_size;
289 break;
[email protected]39ed9732013-06-20 10:17:53290 case SHT_INIT_ARRAY:
291 // Fall through
292 case SHT_FINI_ARRAY:
293 while (current_abs_offset != end_abs_offset &&
294 *current_abs_offset >= section_header->sh_offset &&
295 *current_abs_offset <
296 (section_header->sh_offset + section_header->sh_size)) {
297 // Skip any abs_offsets appear in the unsupported INIT_ARRAY section
298 VLOG(1) << "Skipping relocation entry for unsupported section: " <<
299 section_header->sh_type;
300 current_abs_offset++;
301 }
302 break;
303 default:
304 if (current_abs_offset != end_abs_offset &&
305 *current_abs_offset >= section_header->sh_offset &&
306 *current_abs_offset <
307 (section_header->sh_offset + section_header->sh_size))
308 VLOG(1) << "Relocation address in unrecognized ELF section: " << \
309 section_header->sh_type;
310 break;
311 }
312 }
313
314 // Rest of the file past the last section
315 if (!ParseSimpleRegion(file_offset,
316 length(),
317 program))
318 return false;
319
320 // Make certain we consume all of the relocations as expected
321 return (current_abs_offset == end_abs_offset);
322}
323
324CheckBool DisassemblerElf32::ParseProgbitsSection(
325 const Elf32_Shdr *section_header,
326 std::vector<size_t>::iterator* current_abs_offset,
327 std::vector<size_t>::iterator end_abs_offset,
[email protected]144c8e92013-07-23 21:18:19328 ScopedVector<TypedRVA>::iterator* current_rel,
329 ScopedVector<TypedRVA>::iterator end_rel,
[email protected]39ed9732013-06-20 10:17:53330 AssemblyProgram* program) {
331
332 // Walk all the bytes in the file, whether or not in a section.
333 size_t file_offset = section_header->sh_offset;
334 size_t section_end = section_header->sh_offset + section_header->sh_size;
335
336 Elf32_Addr origin = section_header->sh_addr;
337 size_t origin_offset = section_header->sh_offset;
338 if (!program->EmitOriginInstruction(origin))
339 return false;
340
341 while (file_offset < section_end) {
342
343 if (*current_abs_offset != end_abs_offset &&
344 file_offset > **current_abs_offset)
345 return false;
346
[email protected]144c8e92013-07-23 21:18:19347 while (*current_rel != end_rel &&
348 file_offset > (**current_rel)->get_offset()) {
349 (*current_rel)++;
[email protected]39ed9732013-06-20 10:17:53350 }
351
352 size_t next_relocation = section_end;
353
354 if (*current_abs_offset != end_abs_offset &&
355 next_relocation > **current_abs_offset)
356 next_relocation = **current_abs_offset;
357
358 // Rel offsets are heuristically derived, and might (incorrectly) overlap
359 // an Abs value, or the end of the section, so +3 to make sure there is
360 // room for the full 4 byte value.
[email protected]144c8e92013-07-23 21:18:19361 if (*current_rel != end_rel &&
362 next_relocation > ((**current_rel)->get_offset() + 3))
363 next_relocation = (**current_rel)->get_offset();
[email protected]39ed9732013-06-20 10:17:53364
365 if (next_relocation > file_offset) {
366 if (!ParseSimpleRegion(file_offset, next_relocation, program))
367 return false;
368
369 file_offset = next_relocation;
370 continue;
371 }
372
373 if (*current_abs_offset != end_abs_offset &&
374 file_offset == **current_abs_offset) {
aviab98dcc92015-12-21 19:35:33375 const uint8_t* p = OffsetToPointer(file_offset);
[email protected]39ed9732013-06-20 10:17:53376 RVA target_rva = Read32LittleEndian(p);
377
378 if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
379 return false;
380 file_offset += sizeof(RVA);
381 (*current_abs_offset)++;
382 continue;
383 }
384
[email protected]144c8e92013-07-23 21:18:19385 if (*current_rel != end_rel &&
386 file_offset == (**current_rel)->get_offset()) {
aviab98dcc92015-12-21 19:35:33387 uint32_t relative_target = (**current_rel)->relative_target();
[email protected]39ed9732013-06-20 10:17:53388 // This cast is for 64 bit systems, and is only safe because we
389 // are working on 32 bit executables.
390 RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
[email protected]144c8e92013-07-23 21:18:19391 relative_target);
[email protected]39ed9732013-06-20 10:17:53392
[email protected]2b637b62013-08-01 00:11:24393 if (! (**current_rel)->EmitInstruction(program, target_rva))
[email protected]39ed9732013-06-20 10:17:53394 return false;
[email protected]2b637b62013-08-01 00:11:24395 file_offset += (**current_rel)->op_size();
[email protected]144c8e92013-07-23 21:18:19396 (*current_rel)++;
[email protected]39ed9732013-06-20 10:17:53397 continue;
398 }
399 }
400
401 // Rest of the section (if any)
402 return ParseSimpleRegion(file_offset, section_end, program);
403}
404
405CheckBool DisassemblerElf32::ParseSimpleRegion(
406 size_t start_file_offset,
407 size_t end_file_offset,
408 AssemblyProgram* program) {
[email protected]c092858a2013-08-13 00:46:30409 // Callers don't guarantee start < end
pkasting8e3a26a2014-10-03 18:52:29410 if (start_file_offset >= end_file_offset) return true;
[email protected]39ed9732013-06-20 10:17:53411
pkasting8e3a26a2014-10-03 18:52:29412 const size_t len = end_file_offset - start_file_offset;
[email protected]c092858a2013-08-13 00:46:30413
pkasting8e3a26a2014-10-03 18:52:29414 if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), len))
[email protected]c092858a2013-08-13 00:46:30415 return false;
[email protected]39ed9732013-06-20 10:17:53416
417 return true;
418}
419
420CheckBool DisassemblerElf32::ParseAbs32Relocs() {
421 abs32_locations_.clear();
422
423 // Loop through sections for relocation sections
424 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
425 const Elf32_Shdr *section_header = SectionHeader(section_id);
426
427 if (section_header->sh_type == SHT_REL) {
428
429 Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id);
430
431 int relocs_table_count = section_header->sh_size /
432 section_header->sh_entsize;
433
434 // Elf32_Word relocation_section_id = section_header->sh_info;
435
436 // Loop through relocation objects in the relocation section
437 for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) {
438 RVA rva;
439
440 // Quite a few of these conversions fail, and we simply skip
441 // them, that's okay.
[email protected]a8e80412013-07-18 22:07:53442 if (RelToRVA(relocs_table[rel_id], &rva) && CheckSection(rva))
[email protected]39ed9732013-06-20 10:17:53443 abs32_locations_.push_back(rva);
444 }
445 }
446 }
447
448 std::sort(abs32_locations_.begin(), abs32_locations_.end());
449 return true;
450}
451
[email protected]a8e80412013-07-18 22:07:53452CheckBool DisassemblerElf32::CheckSection(RVA rva) {
453 size_t offset;
454
455 if (!RVAToFileOffset(rva, &offset)) {
456 return false;
457 }
458
459 for (int section_id = 0;
460 section_id < SectionHeaderCount();
461 section_id++) {
462
463 const Elf32_Shdr *section_header = SectionHeader(section_id);
464
465 if (offset >= section_header->sh_offset &&
466 offset < (section_header->sh_offset + section_header->sh_size)) {
467 switch (section_header->sh_type) {
468 case SHT_REL:
469 // Fall-through
470 case SHT_PROGBITS:
471 return true;
472 }
473 }
474 }
475
476 return false;
477}
478
[email protected]39ed9732013-06-20 10:17:53479CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() {
480
481 rel32_locations_.clear();
482
483 // Loop through sections for relocation sections
484 for (int section_id = 0;
485 section_id < SectionHeaderCount();
486 section_id++) {
487
488 const Elf32_Shdr *section_header = SectionHeader(section_id);
489
490 if (section_header->sh_type != SHT_PROGBITS)
491 continue;
492
493 if (!ParseRel32RelocsFromSection(section_header))
494 return false;
495 }
496
[email protected]144c8e92013-07-23 21:18:19497 std::sort(rel32_locations_.begin(),
498 rel32_locations_.end(),
499 TypedRVA::IsLessThan);
[email protected]39ed9732013-06-20 10:17:53500 return true;
501}
502
503} // namespace courgette