blob: be420d4179f319be5893e21c03eedef4bcfc82cf [file] [log] [blame]
[email protected]39ed9732013-06-20 10:17:531// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "courgette/disassembler_elf_32.h"
6
7#include <algorithm>
8#include <string>
9#include <vector>
10
11#include "base/basictypes.h"
12#include "base/logging.h"
[email protected]144c8e92013-07-23 21:18:1913#include "base/memory/scoped_vector.h"
[email protected]39ed9732013-06-20 10:17:5314
15#include "courgette/assembly_program.h"
16#include "courgette/courgette.h"
17#include "courgette/encoded_program.h"
18
19namespace courgette {
20
21DisassemblerElf32::DisassemblerElf32(const void* start, size_t length)
22 : Disassembler(start, length),
23 header_(NULL),
24 section_header_table_(NULL),
25 section_header_table_size_(0),
26 program_header_table_(NULL),
27 program_header_table_size_(0),
28 default_string_section_(NULL) {
29}
30
31bool DisassemblerElf32::ParseHeader() {
32 if (length() < sizeof(Elf32_Ehdr))
33 return Bad("Too small");
34
35 header_ = (Elf32_Ehdr *)start();
36
37 // Have magic for elf header?
38 if (header_->e_ident[0] != 0x7f ||
39 header_->e_ident[1] != 'E' ||
40 header_->e_ident[2] != 'L' ||
41 header_->e_ident[3] != 'F')
42 return Bad("No Magic Number");
43
44 if (header_->e_type != ET_EXEC &&
45 header_->e_type != ET_DYN)
46 return Bad("Not an executable file or shared library");
47
48 if (header_->e_machine != ElfEM())
49 return Bad("Not a supported architecture");
50
51 if (header_->e_version != 1)
52 return Bad("Unknown file version");
53
54 if (header_->e_shentsize != sizeof(Elf32_Shdr))
55 return Bad("Unexpected section header size");
56
halyavinc9de6f72015-03-24 15:40:1257 if (!IsArrayInBounds(header_->e_shoff, header_->e_shnum, sizeof(Elf32_Shdr)))
58 return Bad("Out of bounds section header table");
[email protected]39ed9732013-06-20 10:17:5359
60 section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff);
61 section_header_table_size_ = header_->e_shnum;
62
halyavinc9de6f72015-03-24 15:40:1263 if (!IsArrayInBounds(header_->e_phoff, header_->e_phnum, sizeof(Elf32_Phdr)))
64 return Bad("Out of bounds program header table");
[email protected]39ed9732013-06-20 10:17:5365
66 program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff);
67 program_header_table_size_ = header_->e_phnum;
68
halyavinc9de6f72015-03-24 15:40:1269 if (header_->e_shstrndx >= header_->e_shnum)
70 return Bad("Out of bounds string section index");
[email protected]39ed9732013-06-20 10:17:5371
72 default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx);
73
halyavinc9de6f72015-03-24 15:40:1274 if (!UpdateLength()) {
75 return Bad("Out of bounds section or segment");
76 }
[email protected]39ed9732013-06-20 10:17:5377
78 return Good();
79}
80
81bool DisassemblerElf32::Disassemble(AssemblyProgram* target) {
82 if (!ok())
83 return false;
84
85 // The Image Base is always 0 for ELF Executables
86 target->set_image_base(0);
87
88 if (!ParseAbs32Relocs())
89 return false;
90
91 if (!ParseRel32RelocsFromSections())
92 return false;
93
94 if (!ParseFile(target))
95 return false;
96
97 target->DefaultAssignIndexes();
98
99 return true;
100}
101
halyavinc9de6f72015-03-24 15:40:12102bool DisassemblerElf32::UpdateLength() {
103 Elf32_Off result = 0;
[email protected]39ed9732013-06-20 10:17:53104
105 // Find the end of the last section
106 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
107 const Elf32_Shdr *section_header = SectionHeader(section_id);
108
109 if (section_header->sh_type == SHT_NOBITS)
110 continue;
111
halyavinc9de6f72015-03-24 15:40:12112 if (!IsArrayInBounds(section_header->sh_offset, section_header->sh_size, 1))
113 return false;
[email protected]39ed9732013-06-20 10:17:53114
halyavinc9de6f72015-03-24 15:40:12115 Elf32_Off section_end = section_header->sh_offset + section_header->sh_size;
116 result = std::max(result, section_end);
[email protected]39ed9732013-06-20 10:17:53117 }
118
119 // Find the end of the last segment
120 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
121 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
122
halyavinc9de6f72015-03-24 15:40:12123 if (!IsArrayInBounds(segment_header->p_offset, segment_header->p_filesz, 1))
124 return false;
[email protected]39ed9732013-06-20 10:17:53125
halyavinc9de6f72015-03-24 15:40:12126 Elf32_Off segment_end = segment_header->p_offset + segment_header->p_filesz;
127 result = std::max(result, segment_end);
[email protected]39ed9732013-06-20 10:17:53128 }
129
halyavinc9de6f72015-03-24 15:40:12130 Elf32_Off section_table_end = header_->e_shoff +
131 (header_->e_shnum * sizeof(Elf32_Shdr));
132 result = std::max(result, section_table_end);
[email protected]39ed9732013-06-20 10:17:53133
halyavinc9de6f72015-03-24 15:40:12134 Elf32_Off segment_table_end = header_->e_phoff +
135 (header_->e_phnum * sizeof(Elf32_Phdr));
136 result = std::max(result, segment_table_end);
[email protected]39ed9732013-06-20 10:17:53137
halyavinc9de6f72015-03-24 15:40:12138 ReduceLength(result);
139 return true;
[email protected]39ed9732013-06-20 10:17:53140}
141
142CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const {
143
144 // It's valid if it's contained in any program segment
145 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
146 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
147
148 if (segment_header->p_type != PT_LOAD)
149 continue;
150
151 Elf32_Addr begin = segment_header->p_vaddr;
152 Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz;
153
154 if (rva >= begin && rva < end)
155 return true;
156 }
157
158 return false;
159}
160
161// Returns RVA for an in memory address, or NULL.
162CheckBool DisassemblerElf32::RVAToFileOffset(Elf32_Addr addr,
163 size_t* result) const {
164
165 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
166 Elf32_Addr begin = ProgramSegmentMemoryBegin(i);
167 Elf32_Addr end = begin + ProgramSegmentMemorySize(i);
168
169 if (addr >= begin && addr < end) {
170 Elf32_Addr offset = addr - begin;
171
172 if (offset < ProgramSegmentFileSize(i)) {
173 *result = ProgramSegmentFileOffset(i) + offset;
174 return true;
175 }
176 }
177 }
178
179 return false;
180}
181
182RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const {
183 // File offsets can be 64 bit values, but we are dealing with 32
184 // bit executables and so only need to support 32bit file sizes.
185 uint32 offset32 = (uint32)offset;
186
187 for (int i = 0; i < SectionHeaderCount(); i++) {
188
189 const Elf32_Shdr *section_header = SectionHeader(i);
190
191 // These can appear to have a size in the file, but don't.
192 if (section_header->sh_type == SHT_NOBITS)
193 continue;
194
195 Elf32_Off section_begin = section_header->sh_offset;
196 Elf32_Off section_end = section_begin + section_header->sh_size;
197
198 if (offset32 >= section_begin && offset32 < section_end) {
199 return section_header->sh_addr + (offset32 - section_begin);
200 }
201 }
202
203 return 0;
204}
205
206CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas,
[email protected]144c8e92013-07-23 21:18:19207 std::vector<size_t>* offsets) {
[email protected]39ed9732013-06-20 10:17:53208 offsets->clear();
209
210 for (std::vector<RVA>::iterator rva = rvas->begin();
211 rva != rvas->end();
212 rva++) {
213
214 size_t offset;
215
216 if (!RVAToFileOffset(*rva, &offset))
217 return false;
218
219 offsets->push_back(offset);
220 }
221
222 return true;
223}
224
[email protected]144c8e92013-07-23 21:18:19225CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) {
226 for (ScopedVector<TypedRVA>::iterator rva = rvas->begin();
227 rva != rvas->end();
228 rva++) {
229
230 size_t offset;
231
232 if (!RVAToFileOffset((*rva)->rva(), &offset))
233 return false;
234
235 (*rva)->set_offset(offset);
236 }
237
238 return true;
239}
240
[email protected]39ed9732013-06-20 10:17:53241CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
242 // Walk all the bytes in the file, whether or not in a section.
243 uint32 file_offset = 0;
244
245 std::vector<size_t> abs_offsets;
[email protected]39ed9732013-06-20 10:17:53246
247 if (!RVAsToOffsets(&abs32_locations_, &abs_offsets))
248 return false;
249
[email protected]144c8e92013-07-23 21:18:19250 if (!RVAsToOffsets(&rel32_locations_))
[email protected]39ed9732013-06-20 10:17:53251 return false;
252
253 std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin();
[email protected]144c8e92013-07-23 21:18:19254 ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin();
[email protected]39ed9732013-06-20 10:17:53255
256 std::vector<size_t>::iterator end_abs_offset = abs_offsets.end();
[email protected]144c8e92013-07-23 21:18:19257 ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end();
[email protected]39ed9732013-06-20 10:17:53258
259 for (int section_id = 0;
260 section_id < SectionHeaderCount();
261 section_id++) {
262
263 const Elf32_Shdr *section_header = SectionHeader(section_id);
264
Will Harris3e6fa972015-03-02 21:14:25265 if (section_header->sh_type == SHT_NOBITS)
266 continue;
267
[email protected]39ed9732013-06-20 10:17:53268 if (!ParseSimpleRegion(file_offset,
269 section_header->sh_offset,
270 program))
271 return false;
272 file_offset = section_header->sh_offset;
273
274 switch (section_header->sh_type) {
275 case SHT_REL:
276 if (!ParseRelocationSection(section_header, program))
277 return false;
278 file_offset = section_header->sh_offset + section_header->sh_size;
279 break;
280 case SHT_PROGBITS:
281 if (!ParseProgbitsSection(section_header,
282 &current_abs_offset, end_abs_offset,
[email protected]144c8e92013-07-23 21:18:19283 &current_rel, end_rel,
[email protected]39ed9732013-06-20 10:17:53284 program))
285 return false;
286 file_offset = section_header->sh_offset + section_header->sh_size;
287 break;
[email protected]39ed9732013-06-20 10:17:53288 case SHT_INIT_ARRAY:
289 // Fall through
290 case SHT_FINI_ARRAY:
291 while (current_abs_offset != end_abs_offset &&
292 *current_abs_offset >= section_header->sh_offset &&
293 *current_abs_offset <
294 (section_header->sh_offset + section_header->sh_size)) {
295 // Skip any abs_offsets appear in the unsupported INIT_ARRAY section
296 VLOG(1) << "Skipping relocation entry for unsupported section: " <<
297 section_header->sh_type;
298 current_abs_offset++;
299 }
300 break;
301 default:
302 if (current_abs_offset != end_abs_offset &&
303 *current_abs_offset >= section_header->sh_offset &&
304 *current_abs_offset <
305 (section_header->sh_offset + section_header->sh_size))
306 VLOG(1) << "Relocation address in unrecognized ELF section: " << \
307 section_header->sh_type;
308 break;
309 }
310 }
311
312 // Rest of the file past the last section
313 if (!ParseSimpleRegion(file_offset,
314 length(),
315 program))
316 return false;
317
318 // Make certain we consume all of the relocations as expected
319 return (current_abs_offset == end_abs_offset);
320}
321
322CheckBool DisassemblerElf32::ParseProgbitsSection(
323 const Elf32_Shdr *section_header,
324 std::vector<size_t>::iterator* current_abs_offset,
325 std::vector<size_t>::iterator end_abs_offset,
[email protected]144c8e92013-07-23 21:18:19326 ScopedVector<TypedRVA>::iterator* current_rel,
327 ScopedVector<TypedRVA>::iterator end_rel,
[email protected]39ed9732013-06-20 10:17:53328 AssemblyProgram* program) {
329
330 // Walk all the bytes in the file, whether or not in a section.
331 size_t file_offset = section_header->sh_offset;
332 size_t section_end = section_header->sh_offset + section_header->sh_size;
333
334 Elf32_Addr origin = section_header->sh_addr;
335 size_t origin_offset = section_header->sh_offset;
336 if (!program->EmitOriginInstruction(origin))
337 return false;
338
339 while (file_offset < section_end) {
340
341 if (*current_abs_offset != end_abs_offset &&
342 file_offset > **current_abs_offset)
343 return false;
344
[email protected]144c8e92013-07-23 21:18:19345 while (*current_rel != end_rel &&
346 file_offset > (**current_rel)->get_offset()) {
347 (*current_rel)++;
[email protected]39ed9732013-06-20 10:17:53348 }
349
350 size_t next_relocation = section_end;
351
352 if (*current_abs_offset != end_abs_offset &&
353 next_relocation > **current_abs_offset)
354 next_relocation = **current_abs_offset;
355
356 // Rel offsets are heuristically derived, and might (incorrectly) overlap
357 // an Abs value, or the end of the section, so +3 to make sure there is
358 // room for the full 4 byte value.
[email protected]144c8e92013-07-23 21:18:19359 if (*current_rel != end_rel &&
360 next_relocation > ((**current_rel)->get_offset() + 3))
361 next_relocation = (**current_rel)->get_offset();
[email protected]39ed9732013-06-20 10:17:53362
363 if (next_relocation > file_offset) {
364 if (!ParseSimpleRegion(file_offset, next_relocation, program))
365 return false;
366
367 file_offset = next_relocation;
368 continue;
369 }
370
371 if (*current_abs_offset != end_abs_offset &&
372 file_offset == **current_abs_offset) {
373
374 const uint8* p = OffsetToPointer(file_offset);
375 RVA target_rva = Read32LittleEndian(p);
376
377 if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
378 return false;
379 file_offset += sizeof(RVA);
380 (*current_abs_offset)++;
381 continue;
382 }
383
[email protected]144c8e92013-07-23 21:18:19384 if (*current_rel != end_rel &&
385 file_offset == (**current_rel)->get_offset()) {
[email protected]39ed9732013-06-20 10:17:53386
[email protected]144c8e92013-07-23 21:18:19387 uint32 relative_target = (**current_rel)->relative_target();
[email protected]39ed9732013-06-20 10:17:53388 // This cast is for 64 bit systems, and is only safe because we
389 // are working on 32 bit executables.
390 RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
[email protected]144c8e92013-07-23 21:18:19391 relative_target);
[email protected]39ed9732013-06-20 10:17:53392
[email protected]2b637b62013-08-01 00:11:24393 if (! (**current_rel)->EmitInstruction(program, target_rva))
[email protected]39ed9732013-06-20 10:17:53394 return false;
[email protected]2b637b62013-08-01 00:11:24395 file_offset += (**current_rel)->op_size();
[email protected]144c8e92013-07-23 21:18:19396 (*current_rel)++;
[email protected]39ed9732013-06-20 10:17:53397 continue;
398 }
399 }
400
401 // Rest of the section (if any)
402 return ParseSimpleRegion(file_offset, section_end, program);
403}
404
405CheckBool DisassemblerElf32::ParseSimpleRegion(
406 size_t start_file_offset,
407 size_t end_file_offset,
408 AssemblyProgram* program) {
[email protected]c092858a2013-08-13 00:46:30409 // Callers don't guarantee start < end
pkasting8e3a26a2014-10-03 18:52:29410 if (start_file_offset >= end_file_offset) return true;
[email protected]39ed9732013-06-20 10:17:53411
pkasting8e3a26a2014-10-03 18:52:29412 const size_t len = end_file_offset - start_file_offset;
[email protected]c092858a2013-08-13 00:46:30413
pkasting8e3a26a2014-10-03 18:52:29414 if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), len))
[email protected]c092858a2013-08-13 00:46:30415 return false;
[email protected]39ed9732013-06-20 10:17:53416
417 return true;
418}
419
420CheckBool DisassemblerElf32::ParseAbs32Relocs() {
421 abs32_locations_.clear();
422
423 // Loop through sections for relocation sections
424 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
425 const Elf32_Shdr *section_header = SectionHeader(section_id);
426
427 if (section_header->sh_type == SHT_REL) {
428
429 Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id);
430
431 int relocs_table_count = section_header->sh_size /
432 section_header->sh_entsize;
433
434 // Elf32_Word relocation_section_id = section_header->sh_info;
435
436 // Loop through relocation objects in the relocation section
437 for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) {
438 RVA rva;
439
440 // Quite a few of these conversions fail, and we simply skip
441 // them, that's okay.
[email protected]a8e80412013-07-18 22:07:53442 if (RelToRVA(relocs_table[rel_id], &rva) && CheckSection(rva))
[email protected]39ed9732013-06-20 10:17:53443 abs32_locations_.push_back(rva);
444 }
445 }
446 }
447
448 std::sort(abs32_locations_.begin(), abs32_locations_.end());
449 return true;
450}
451
[email protected]a8e80412013-07-18 22:07:53452CheckBool DisassemblerElf32::CheckSection(RVA rva) {
453 size_t offset;
454
455 if (!RVAToFileOffset(rva, &offset)) {
456 return false;
457 }
458
459 for (int section_id = 0;
460 section_id < SectionHeaderCount();
461 section_id++) {
462
463 const Elf32_Shdr *section_header = SectionHeader(section_id);
464
465 if (offset >= section_header->sh_offset &&
466 offset < (section_header->sh_offset + section_header->sh_size)) {
467 switch (section_header->sh_type) {
468 case SHT_REL:
469 // Fall-through
470 case SHT_PROGBITS:
471 return true;
472 }
473 }
474 }
475
476 return false;
477}
478
[email protected]39ed9732013-06-20 10:17:53479CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() {
480
481 rel32_locations_.clear();
482
483 // Loop through sections for relocation sections
484 for (int section_id = 0;
485 section_id < SectionHeaderCount();
486 section_id++) {
487
488 const Elf32_Shdr *section_header = SectionHeader(section_id);
489
490 if (section_header->sh_type != SHT_PROGBITS)
491 continue;
492
493 if (!ParseRel32RelocsFromSection(section_header))
494 return false;
495 }
496
[email protected]144c8e92013-07-23 21:18:19497 std::sort(rel32_locations_.begin(),
498 rel32_locations_.end(),
499 TypedRVA::IsLessThan);
[email protected]39ed9732013-06-20 10:17:53500 return true;
501}
502
503} // namespace courgette