diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..0237da2c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,29 @@ +name: pyelftools-tests +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build: + + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [2.7, 3.6, 3.7, 3.8] + os: [ubuntu-latest] + + steps: + + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Test + run: | + python test/all_tests.py + diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index df78985a..00000000 --- a/.travis.yml +++ /dev/null @@ -1,7 +0,0 @@ -language: python -python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" -script: python test/all_tests.py diff --git a/CHANGES b/CHANGES index fd48c06b..5319a3a2 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,34 @@ Changelog ========= ++ Version 0.27 (2020.10.27) + + - Print addend wfor RELA relocations without symbol (#292) + - Implement symbol lookup for {GNU,}HashSection (#290) + - Major rewrite of expression parsing + - Cashed random access to CUs and DIEs (#264) + - GNU expressions (#303) + - Support parsing LSDA pointers from FDEs (#308) + - Add support for DWA_OP_GNU_push_tls_address in expressions (#315) + - Some initial support for AArch64 little-endian (#318) + - Support for ELF files with a large number of sections (#333) + - Some minimal support for DWARFv1 (#335) + - Many small bug fixes; see git log. + ++ Version 0.26 (2019.12.05) + + - Call relocation for ARM v3 (#194) + - More complete architecture coverage for ENUM_E_MACHINE (#206) + - Support for .debug_pubtypes and .debug_pubnames sections (#208) + - Support for DWARF v4 location lists (#214) + - Decode strings in dynamic string tables (#217) + - Improve symbol table handling in dynamic segments (#219) + - Improved handling of location information (#225) + - Avoid deprecation warnings in Python 3.7+ + - Add DWARF v5 OPs (#240) + - Handle many new translation forms and constants + - Lazy DIE parsing to speed up partial parsing of DWARF info (#249) + + Version 0.25 (2018.09.01) - Make parsing of SH_TYPE and PT_TYPE fields dependent on the machine diff --git a/MANIFEST.in b/MANIFEST.in index ea1b28f6..e2c7667f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,7 @@ recursive-include elftools *.py recursive-include scripts *.py recursive-include examples *.py *.elf *.out recursive-include test *.py *.elf *.arm *.mips *.o -include README +include README.rst include LICENSE include CHANGES include tox.ini diff --git a/README.rst b/README.rst index 567d7482..6b59c0ff 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,10 @@ -Introduction: what is pyelftools? ---------------------------------- +========== +pyelftools +========== + +.. image:: https://github.com/eliben/pyelftools/workflows/pyelftools-tests/badge.svg + :align: center + :target: https://github.com/eliben/pyelftools/actions **pyelftools** is a pure-Python library for parsing and analyzing ELF files and DWARF debugging information. See the @@ -10,7 +15,7 @@ Pre-requisites -------------- As a user of **pyelftools**, one only needs Python to run. It works with -Python versions 2.7 and 3.x (x >= 2). For hacking on **pyelftools** the +Python versions 2.7 and 3.x (x >= 5). For hacking on **pyelftools** the requirements are a bit more strict, please see the `hacking guide `_. @@ -48,13 +53,3 @@ License **pyelftools** is open source software. Its code is in the public domain. See the ``LICENSE`` file for more details. - -CI Status ---------- - -**pyelftools** has automatic testing enabled through the convenient -`Travis CI project `_. Here is the latest build status: - -.. image:: https://travis-ci.org/eliben/pyelftools.png?branch=master - :align: center - :target: https://travis-ci.org/eliben/pyelftools diff --git a/TODO b/TODO index 11b0adab..a2226f76 100755 --- a/TODO +++ b/TODO @@ -9,15 +9,10 @@ New version construct --------- -The construct seems to be maintained again - they also backported my Python 3 -fixes. Theoretically, I can remove construct from pyelftools and use it as a -dependency instead. I don't really have time to play with this now, but may -do so in the future. - -Distribution ------------- - -python setup.py build sdist bdist_wheel upload +construct seems to be maintained again - they also backported my Python 3 fixes. +Theoretically, I can remove construct from pyelftools and use it as a dependency +instead. I don't really have time to play with this now, but may do so in the +future. Preparing a new release ----------------------- @@ -29,3 +24,12 @@ Preparing a new release everything looks ok * Now build with upload to send it to PyPi * Test with pip install from some new virtualenv + +Distribution +------------ + +1. First install Twine (https://packaging.python.org/tutorials/packaging-projects/) +2. python3 -m twine upload dist/*, but make sure ``setup.py`` was already run + and the updated whl and tarbal are in dist/. + +Credentials for PyPI are stored in ~/.pypirc diff --git a/elftools/__init__.py b/elftools/__init__.py index 9344a0f8..25e13737 100644 --- a/elftools/__init__.py +++ b/elftools/__init__.py @@ -4,4 +4,4 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -__version__ = '0.25' +__version__ = '0.27' diff --git a/elftools/common/construct_utils.py b/elftools/common/construct_utils.py index 36332497..4b4a3920 100644 --- a/elftools/common/construct_utils.py +++ b/elftools/common/construct_utils.py @@ -8,7 +8,7 @@ #------------------------------------------------------------------------------- from ..construct import ( Subconstruct, ConstructError, ArrayError, Adapter, Field, RepeatUntil, - Rename + Rename, SizeofError ) diff --git a/elftools/common/exceptions.py b/elftools/common/exceptions.py index 5e409cf1..eb759bba 100644 --- a/elftools/common/exceptions.py +++ b/elftools/common/exceptions.py @@ -6,12 +6,12 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -class ELFError(Exception): +class ELFError(Exception): pass class ELFRelocationError(ELFError): pass - + class ELFParseError(ELFError): pass diff --git a/elftools/common/py3compat.py b/elftools/common/py3compat.py index b901e078..22967719 100644 --- a/elftools/common/py3compat.py +++ b/elftools/common/py3compat.py @@ -64,3 +64,8 @@ def itervalues(d): def iteritems(d): """Return an iterator over the items of a dictionary.""" return getattr(d, 'items' if PY3 else 'iteritems')() + +try: + from collections.abc import Mapping # python >= 3.3 +except ImportError: + from collections import Mapping # python < 3.3 diff --git a/elftools/common/utils.py b/elftools/common/utils.py index 4e80e188..d1fde2ca 100644 --- a/elftools/common/utils.py +++ b/elftools/common/utils.py @@ -9,7 +9,7 @@ from contextlib import contextmanager from .exceptions import ELFParseError, ELFError, DWARFError from .py3compat import int2byte -from ..construct import ConstructError +from ..construct import ConstructError, ULInt8 def merge_dicts(*dicts): @@ -102,6 +102,11 @@ def roundup(num, bits): """ return (num - 1 | (1 << bits) - 1) + 1 +def read_blob(stream, length): + """Read length bytes from stream, return a list of ints + """ + return [struct_parse(ULInt8(''), stream) for i in range(length)] + #------------------------- PRIVATE ------------------------- def _assert_with_exception(cond, msg, exception_type): diff --git a/elftools/construct/adapters.py b/elftools/construct/adapters.py index 54fcc08c..545dbac1 100644 --- a/elftools/construct/adapters.py +++ b/elftools/construct/adapters.py @@ -279,7 +279,7 @@ class ExprAdapter(Adapter): * subcon - the subcon to adapt * encoder - a function that takes (obj, context) and returns an encoded version of obj - * decoder - a function that takes (obj, context) and returns an decoded + * decoder - a function that takes (obj, context) and returns a decoded version of obj Example: diff --git a/elftools/construct/core.py b/elftools/construct/core.py index 214c58fc..14a50f88 100644 --- a/elftools/construct/core.py +++ b/elftools/construct/core.py @@ -345,7 +345,7 @@ def __getstate__(self): return attrs def __setstate__(self, attrs): attrs["packer"] = Packer(attrs["packer"]) - return StaticField.__setstate__(attrs) + return StaticField.__setstate__(self, attrs) def _parse(self, stream, context): try: return self.packer.unpack(_read_stream(stream, self.length))[0] @@ -1297,6 +1297,8 @@ def _build(self, obj, stream, context): assert obj is None def _sizeof(self, context): return 0 + def __reduce__(self): + return self.__class__.__name__ Pass = Pass(None) class Terminator(Construct): diff --git a/elftools/construct/debug.py b/elftools/construct/debug.py index 6023df92..846daf89 100644 --- a/elftools/construct/debug.py +++ b/elftools/construct/debug.py @@ -15,17 +15,17 @@ class Probe(Construct): A probe: dumps the context, stack frames, and stream content to the screen to aid the debugging process. See also Debugger. - + Parameters: * name - the display name - * show_stream - whether or not to show stream contents. default is True. + * show_stream - whether or not to show stream contents. default is True. the stream must be seekable. * show_context - whether or not to show the context. default is True. - * show_stack - whether or not to show the upper stack frames. default + * show_stack - whether or not to show the upper stack frames. default is True. * stream_lookahead - the number of bytes to dump when show_stack is set. default is 100. - + Example: Struct("foo", UBInt8("a"), @@ -34,13 +34,13 @@ class Probe(Construct): ) """ __slots__ = [ - "printname", "show_stream", "show_context", "show_stack", + "printname", "show_stream", "show_context", "show_stack", "stream_lookahead" ] counter = 0 - - def __init__(self, name = None, show_stream = True, - show_context = True, show_stack = True, + + def __init__(self, name = None, show_stream = True, + show_context = True, show_stack = True, stream_lookahead = 100): Construct.__init__(self, None) if name is None: @@ -59,7 +59,7 @@ def _build(self, obj, stream, context): self.printout(stream, context) def _sizeof(self, context): return 0 - + def printout(self, stream, context): obj = Container() if self.show_stream: @@ -71,10 +71,10 @@ def printout(self, stream, context): stream.seek(-len(follows), 1) obj.following_stream_data = HexString(follows) print - + if self.show_context: obj.context = context - + if self.show_stack: obj.stack = ListContainer() frames = [s[0] for s in inspect.stack()][1:-1] @@ -83,7 +83,7 @@ def printout(self, stream, context): a = Container() a.__update__(f.f_locals) obj.stack.append(a) - + print("=" * 80) print("Probe", self.printname) print(obj) @@ -93,10 +93,10 @@ class Debugger(Subconstruct): """ A pdb-based debugger. When an exception occurs in the subcon, a debugger will appear and allow you to debug the error (and even fix on-the-fly). - + Parameters: * subcon - the subcon to debug - + Example: Debugger( Enum(UBInt8("foo"), @@ -131,4 +131,3 @@ def handle_exc(self, msg = None): print(msg) pdb.post_mortem(sys.exc_info()[2]) print("=" * 80) - diff --git a/elftools/construct/lib/binary.py b/elftools/construct/lib/binary.py index c73b887b..3efef0d7 100644 --- a/elftools/construct/lib/binary.py +++ b/elftools/construct/lib/binary.py @@ -28,8 +28,8 @@ def int_to_bin(number, width=32): _bit_values = { - 0: 0, - 1: 1, + 0: 0, + 1: 1, 48: 0, # '0' 49: 1, # '1' @@ -90,7 +90,7 @@ def swap_bytes(bits, bytesize=8): def encode_bin(data): - """ + """ Create a binary representation of the given b'' object. Assume 8-bit ASCII. Example: @@ -101,7 +101,7 @@ def encode_bin(data): def decode_bin(data): - """ + """ Locical opposite of decode_bin. """ if len(data) & 7: @@ -115,4 +115,3 @@ def decode_bin(data): i += 8 j += 1 return b"".join(chars) - diff --git a/elftools/construct/lib/container.py b/elftools/construct/lib/container.py index 2f89b2dc..5a580fac 100644 --- a/elftools/construct/lib/container.py +++ b/elftools/construct/lib/container.py @@ -2,8 +2,8 @@ Various containers. """ -from collections import MutableMapping from pprint import pformat +from .py3compat import MutableMapping def recursion_lock(retval, lock_name = "__recursion_lock__"): def decorator(func): diff --git a/elftools/construct/lib/hex.py b/elftools/construct/lib/hex.py index e378e228..b830644a 100644 --- a/elftools/construct/lib/hex.py +++ b/elftools/construct/lib/hex.py @@ -34,11 +34,10 @@ def __init__(self, data, linesize = 16): def __new__(cls, data, *args, **kwargs): return bytes.__new__(cls, data) - + def __str__(self): if not self: return "''" sep = "\n" return sep + sep.join( hexdump(self, self.linesize)) - diff --git a/elftools/construct/lib/py3compat.py b/elftools/construct/lib/py3compat.py index 4a52c293..16e12979 100644 --- a/elftools/construct/lib/py3compat.py +++ b/elftools/construct/lib/py3compat.py @@ -6,6 +6,11 @@ import sys PY3 = sys.version_info[0] == 3 +try: + from collections.abc import MutableMapping # python >= 3.3 +except ImportError: + from collections import MutableMapping # python < 3.3 + if PY3: import io @@ -41,7 +46,7 @@ def decodebytes(b, encoding): return bytes(b, encoding) advance_iterator = next - + else: import cStringIO StringIO = BytesIO = cStringIO.StringIO @@ -67,4 +72,3 @@ def decodebytes(b, encoding): def advance_iterator(it): return it.next() - diff --git a/elftools/dwarf/abbrevtable.py b/elftools/dwarf/abbrevtable.py index 36f6d2a0..6d29d5cf 100644 --- a/elftools/dwarf/abbrevtable.py +++ b/elftools/dwarf/abbrevtable.py @@ -33,7 +33,7 @@ def get_abbrev(self, code): """ Get the AbbrevDecl for a given code. Raise KeyError if no declaration for this code exists. """ - return AbbrevDecl(code, self._abbrev_map[code]) + return self._abbrev_map[code] def _parse_abbrev_table(self): """ Parse the abbrev table from the stream @@ -49,7 +49,7 @@ def _parse_abbrev_table(self): declaration = struct_parse( struct=self.structs.Dwarf_abbrev_declaration, stream=self.stream) - map[decl_code] = declaration + map[decl_code] = AbbrevDecl(decl_code, declaration) return map diff --git a/elftools/dwarf/aranges.py b/elftools/dwarf/aranges.py index 32c287df..3f140f44 100644 --- a/elftools/dwarf/aranges.py +++ b/elftools/dwarf/aranges.py @@ -12,21 +12,21 @@ from bisect import bisect_right import math -# An entry in the aranges table; +# An entry in the aranges table; # begin_addr: The beginning address in the CU # length: The length of the address range in this entry # info_offset: The CU's offset into .debug_info # see 6.1.2 in DWARF4 docs for explanation of the remaining fields -ARangeEntry = namedtuple('ARangeEntry', +ARangeEntry = namedtuple('ARangeEntry', 'begin_addr length info_offset unit_length version address_size segment_size') class ARanges(object): """ ARanges table in DWARF - stream, size: + stream, size: A stream holding the .debug_aranges section, and its size - structs: + structs: A DWARFStructs instance for parsing the data """ def __init__(self, stream, size, structs): @@ -49,8 +49,11 @@ def cu_offset_at_addr(self, addr): 'offset' refers to the offset in the .debug_info section. """ tup = self.entries[bisect_right(self.keys, addr) - 1] - return tup.info_offset - + if tup.begin_addr <= addr < tup.begin_addr + tup.length: + return tup.info_offset + else: + return None + #------ PRIVATE ------# def _get_entries(self): @@ -62,14 +65,14 @@ def _get_entries(self): # one loop == one "set" == one CU while offset < self.size : - aranges_header = struct_parse(self.structs.Dwarf_aranges_header, + aranges_header = struct_parse(self.structs.Dwarf_aranges_header, self.stream, offset) addr_size = self._get_addr_size_struct(aranges_header["address_size"]) # No segmentation if aranges_header["segment_size"] == 0: # pad to nearest multiple of tuple size - tuple_size = aranges_header["address_size"] * 2 + tuple_size = aranges_header["address_size"] * 2 fp = self.stream.tell() seek_to = int(math.ceil(fp/float(tuple_size)) * tuple_size) self.stream.seek(seek_to) @@ -80,8 +83,8 @@ def _get_entries(self): while addr != 0 or length != 0: # 'begin_addr length info_offset version address_size segment_size' entries.append( - ARangeEntry(begin_addr=addr, - length=length, + ARangeEntry(begin_addr=addr, + length=length, info_offset=aranges_header["debug_info_offset"], unit_length=aranges_header["unit_length"], version=aranges_header["version"], @@ -93,18 +96,18 @@ def _get_entries(self): elif aranges_header["segment_size"] != 0: raise NotImplementedError("Segmentation not implemented") - offset = (offset - + aranges_header.unit_length + offset = (offset + + aranges_header.unit_length + self.structs.initial_length_field_size()) return entries def _get_addr_size_struct(self, addr_header_value): - """ Given this set's header value (int) for the address size, + """ Given this set's header value (int) for the address size, get the Construct representation of that size """ if addr_header_value == 4: return self.structs.Dwarf_uint32 - else: + else: assert addr_header_value == 8 return self.structs.Dwarf_uint64 diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index bcef78d7..8b3ec5c7 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -141,6 +141,14 @@ def _parse_entry_at(self, offset): else: cie = self._parse_cie_for_fde(offset, header, entry_structs) aug_bytes = self._read_augmentation_data(entry_structs) + lsda_encoding = cie.augmentation_dict.get('LSDA_encoding', DW_EH_encoding_flags['DW_EH_PE_omit']) + if lsda_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: + # parse LSDA pointer + lsda_pointer = self._parse_lsda_pointer(entry_structs, + self.stream.tell() - len(aug_bytes), + lsda_encoding) + else: + lsda_pointer = None # For convenience, compute the end offset for this entry end_offset = ( @@ -163,8 +171,10 @@ def _parse_entry_at(self, offset): cie = self._parse_cie_for_fde(offset, header, entry_structs) self._entry_cache[offset] = FDE( header=header, instructions=instructions, offset=offset, + structs=entry_structs, cie=cie, augmentation_bytes=aug_bytes, - structs=entry_structs, cie=cie) + lsda_pointer=lsda_pointer, + ) return self._entry_cache[offset] def _parse_instructions(self, structs, offset, end_offset): @@ -224,6 +234,8 @@ def _parse_instructions(self, structs, offset, end_offset): args = [ struct_parse(structs.Dwarf_uleb128(''), self.stream), struct_parse(structs.Dwarf_sleb128(''), self.stream)] + elif opcode == DW_CFA_GNU_args_size: + args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)] else: dwarf_assert(False, 'Unknown CFI opcode: 0x%x' % opcode) @@ -321,6 +333,37 @@ def _read_augmentation_data(self, entry_structs): self.stream)['length'] return self.stream.read(augmentation_data_length) + def _parse_lsda_pointer(self, structs, stream_offset, encoding): + """ Parse bytes to get an LSDA pointer. + + The basic encoding (lower four bits of the encoding) describes how the values are encoded in a CIE or an FDE. + The modifier (upper four bits of the encoding) describes how the raw values, after decoded using a basic + encoding, should be modified before using. + + Ref: https://www.airs.com/blog/archives/460 + """ + assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit'] + basic_encoding = encoding & 0x0f + modifier = encoding & 0xf0 + + formats = self._eh_encoding_to_field(structs) + + ptr = struct_parse( + Struct('Augmentation_Data', + formats[basic_encoding]('LSDA_pointer')), + self.stream, stream_pos=stream_offset)['LSDA_pointer'] + + if modifier == DW_EH_encoding_flags['DW_EH_PE_absptr']: + pass + + elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']: + ptr += self.address + stream_offset + + else: + assert False, 'Unsupported encoding modifier for LSDA pointer: {:#x}'.format(modifier) + + return ptr + def _parse_fde_header(self, entry_structs, offset): """ Compute a struct to parse the header of the current FDE. """ @@ -367,7 +410,8 @@ def _parse_fde_header(self, entry_structs, offset): return result - def _eh_encoding_to_field(self, entry_structs): + @staticmethod + def _eh_encoding_to_field(entry_structs): """ Return a mapping from basic encodings (DW_EH_encoding_flags) the corresponding field constructors (for instance @@ -375,9 +419,7 @@ def _eh_encoding_to_field(self, entry_structs): """ return { DW_EH_encoding_flags['DW_EH_PE_absptr']: - entry_structs.Dwarf_uint32 - if entry_structs.dwarf_format == 32 else - entry_structs.Dwarf_uint64, + entry_structs.Dwarf_target_addr, DW_EH_encoding_flags['DW_EH_PE_uleb128']: entry_structs.Dwarf_uleb128, DW_EH_encoding_flags['DW_EH_PE_udata2']: @@ -436,14 +478,14 @@ class CFIEntry(object): http://www.airs.com/blog/archives/460. """ def __init__(self, header, structs, instructions, offset, - augmentation_dict={}, augmentation_bytes=b'', cie=None): + augmentation_dict=None, augmentation_bytes=b'', cie=None): self.header = header self.structs = structs self.instructions = instructions self.offset = offset self.cie = cie self._decoded_table = None - self.augmentation_dict = augmentation_dict + self.augmentation_dict = augmentation_dict if augmentation_dict else {} self.augmentation_bytes = augmentation_bytes def get_decoded(self): @@ -467,7 +509,7 @@ def _decode_CFI_table(self): if isinstance(self, CIE): # For a CIE, initialize cur_line to an "empty" line cie = self - cur_line = dict(pc=0, cfa=None) + cur_line = dict(pc=0, cfa=CFARule(reg=None, offset=0)) reg_order = [] else: # FDE # For a FDE, we need to decode the attached CIE first, because its @@ -479,7 +521,7 @@ def _decode_CFI_table(self): last_line_in_CIE = copy.copy(cie_decoded_table.table[-1]) cur_line = copy.copy(last_line_in_CIE) else: - cur_line = dict(cfa=None) + cur_line = dict(cfa=CFARule(reg=None, offset=0)) cur_line['pc'] = self['initial_location'] reg_order = copy.copy(cie_decoded_table.reg_order) @@ -490,7 +532,9 @@ def _decode_CFI_table(self): line_stack = [] def _add_to_order(regnum): - if regnum not in cur_line: + # DW_CFA_restore and others remove registers from cur_line, + # but they stay in reg_order. Avoid duplicates. + if regnum not in reg_order: reg_order.append(regnum) for instr in self.instructions: @@ -575,7 +619,7 @@ def _add_to_order(regnum): # The current line is appended to the table after all instructions # have ended, if there were instructions. - if cur_line['cfa'] is not None or len(cur_line) > 2: + if cur_line['cfa'].reg is not None or len(cur_line) > 2: table.append(cur_line) return DecodedCallFrameTable(table=table, reg_order=reg_order) @@ -591,7 +635,9 @@ class CIE(CFIEntry): class FDE(CFIEntry): - pass + def __init__(self, header, structs, instructions, offset, augmentation_bytes=None, cie=None, lsda_pointer=None): + super(FDE, self).__init__(header, structs, instructions, offset, augmentation_bytes=augmentation_bytes, cie=cie) + self.lsda_pointer = lsda_pointer class ZERO(object): diff --git a/elftools/dwarf/compileunit.py b/elftools/dwarf/compileunit.py index 8b4030f4..eb66c571 100644 --- a/elftools/dwarf/compileunit.py +++ b/elftools/dwarf/compileunit.py @@ -6,7 +6,9 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from bisect import bisect_right from .die import DIE +from ..common.utils import dwarf_assert class CompileUnit(object): @@ -53,8 +55,16 @@ def __init__(self, header, dwarfinfo, structs, cu_offset, cu_die_offset): # requested. self._abbrev_table = None - # A list of DIEs belonging to this CU. Lazily parsed. + # A list of DIEs belonging to this CU. + # This list is lazily constructed as DIEs are iterated over. self._dielist = [] + # A list of file offsets, corresponding (by index) to the DIEs + # in `self._dielist`. This list exists separately from + # `self._dielist` to make it binary searchable, enabling the + # DIE population strategy used in `iter_DIE_children`. + # Like `self._dielist`, this list is lazily constructed + # as DIEs are iterated over. + self._diemap = [] def dwarf_format(self): """ Get the DWARF format (32 or 64) for this CU @@ -73,14 +83,94 @@ def get_top_DIE(self): """ Get the top DIE (which is either a DW_TAG_compile_unit or DW_TAG_partial_unit) of this CU """ - return self._get_DIE(0) + + # Note that a top DIE always has minimal offset and is therefore + # at the beginning of our lists, so no bisect is required. + if len(self._diemap) > 0: + return self._dielist[0] + + top = DIE( + cu=self, + stream=self.dwarfinfo.debug_info_sec.stream, + offset=self.cu_die_offset) + + self._dielist.insert(0, top) + self._diemap.insert(0, self.cu_die_offset) + + return top + + @property + def size(self): + return self['unit_length'] + self.structs.initial_length_field_size() + + def get_DIE_from_refaddr(self, refaddr): + """ Obtain a DIE contained in this CU from a reference. + + refaddr: + The offset into the .debug_info section, which must be + contained in this CU or a DWARFError will be raised. + + When using a reference class attribute with a form that is + relative to the compile unit, add unit add the compile unit's + .cu_addr before calling this function. + """ + # All DIEs are after the cu header and within the unit + dwarf_assert( + self.cu_die_offset <= refaddr < self.cu_offset + self.size, + 'refaddr %s not in DIE range of CU %s' % (refaddr, self.cu_offset)) + + return self._get_cached_DIE(refaddr) def iter_DIEs(self): """ Iterate over all the DIEs in the CU, in order of their appearance. Note that null DIEs will also be returned. """ - self._parse_DIEs() - return iter(self._dielist) + return self._iter_DIE_subtree(self.get_top_DIE()) + + def iter_DIE_children(self, die): + """ Given a DIE, yields either its children, without null DIE list + terminator, or nothing, if that DIE has no children. + + The null DIE terminator is saved in that DIE when iteration ended. + """ + if not die.has_children: + return + + # `cur_offset` tracks the stream offset of the next DIE to yield + # as we iterate over our children, + cur_offset = die.offset + die.size + + while True: + child = self._get_cached_DIE(cur_offset) + + child.set_parent(die) + + if child.is_null(): + die._terminator = child + return + + yield child + + if not child.has_children: + cur_offset += child.size + elif "DW_AT_sibling" in child.attributes: + sibling = child.attributes["DW_AT_sibling"] + cur_offset = sibling.value + self.cu_offset + else: + # If no DW_AT_sibling attribute is provided by the producer + # then the whole child subtree must be parsed to find its next + # sibling. There is one zero byte representing null DIE + # terminating children list. It is used to locate child subtree + # bounds. + + # If children are not parsed yet, this instruction will manage + # to recursive call of this function which will result in + # setting of `_terminator` attribute of the `child`. + if child._terminator is None: + for _ in self.iter_DIE_children(child): + pass + + cur_offset = child._terminator.offset + child._terminator.size #------ PRIVATE ------# @@ -89,64 +179,48 @@ def __getitem__(self, name): """ return self.header[name] - def _get_DIE(self, index): - """ Get the DIE at the given index - """ - self._parse_DIEs() - return self._dielist[index] - - def _parse_DIEs(self): - """ Parse all the DIEs pertaining to this CU from the stream and shove - them sequentially into self._dielist. - Also set the child/sibling/parent links in the DIEs according - (unflattening the prefix-order of the DIE tree). + def _iter_DIE_subtree(self, die): + """ Given a DIE, this yields it with its subtree including null DIEs + (child list terminators). """ - if len(self._dielist) > 0: - return + yield die + if die.has_children: + for c in die.iter_children(): + for d in self._iter_DIE_subtree(c): + yield d + yield die._terminator + + def _get_cached_DIE(self, offset): + """ Given a DIE offset, look it up in the cache. If not present, + parse the DIE and insert it into the cache. - # Compute the boundary (one byte past the bounds) of this CU in the - # stream - cu_boundary = ( self.cu_offset + - self['unit_length'] + - self.structs.initial_length_field_size()) - - # First pass: parse all DIEs and place them into self._dielist - die_offset = self.cu_die_offset - while die_offset < cu_boundary: - die = DIE( - cu=self, - stream=self.dwarfinfo.debug_info_sec.stream, - offset=die_offset) - self._dielist.append(die) - die_offset += die.size - - # Second pass - unflatten the DIE tree - self._unflatten_tree() - - def _unflatten_tree(self): - """ "Unflatten" the DIE tree from it serial representation, by setting - the child/sibling/parent links of DIEs. - - Assumes self._dielist was already populated by a linear list of DIEs - read from the stream section + offset: + The offset of the DIE in the debug_info section to retrieve. + + The stream reference is copied from the top DIE. The top die will + also be parsed and cached if needed. + + See also get_DIE_from_refaddr(self, refaddr). """ - # the first DIE in the list is the root node - root = self._dielist[0] - parentstack = [root] - - for die in self._dielist[1:]: - if not die.is_null(): - cur_parent = parentstack[-1] - # This DIE is a child of the current parent - cur_parent.add_child(die) - die.set_parent(cur_parent) - if die.has_children: - parentstack.append(die) - else: - # parentstack should not be really empty here. However, some - # compilers generate DWARF that has extra NULLs in the end and - # we don't want pyelftools to fail parsing them just because of - # this. - if len(parentstack) > 0: - # end of children for the current parent - parentstack.pop() + # The top die must be in the cache if any DIE is in the cache. + # The stream is the same for all DIEs in this CU, so populate + # the top DIE and obtain a reference to its stream. + top_die_stream = self.get_top_DIE().stream + + # `offset` is the offset in the stream of the DIE we want to return. + # The map is maintined as a parallel array to the list. We call + # bisect each time to ensure new DIEs are inserted in the correct + # order within both `self._dielist` and `self._diemap`. + i = bisect_right(self._diemap, offset) + + # Note that `self._diemap` cannot be empty because a the top DIE + # was inserted by the call to .get_top_DIE(). Also it has the minimal + # offset, so the bisect_right insert point will always be at least 1. + if offset == self._diemap[i - 1]: + die = self._dielist[i - 1] + else: + die = DIE(cu=self, stream=top_die_stream, offset=offset) + self._dielist.insert(i, die) + self._diemap.insert(i, offset) + + return die diff --git a/elftools/dwarf/constants.py b/elftools/dwarf/constants.py index e2072b0f..558e8c6a 100644 --- a/elftools/dwarf/constants.py +++ b/elftools/dwarf/constants.py @@ -37,6 +37,23 @@ DW_LANG_UPC = 0x0012 DW_LANG_D = 0x0013 DW_LANG_Python = 0x0014 +DW_LANG_OpenCL = 0x0015 +DW_LANG_Go = 0x0016 +DW_LANG_Modula3 = 0x0017 +DW_LANG_Haskell = 0x0018 +DW_LANG_C_plus_plus_03 = 0x0019 +DW_LANG_C_plus_plus_11 = 0x001a +DW_LANG_OCaml = 0x001b +DW_LANG_Rust = 0x001c +DW_LANG_C11 = 0x001d +DW_LANG_Swift = 0x001e +DW_LANG_Julia = 0x001f +DW_LANG_Dylan = 0x0020 +DW_LANG_C_plus_plus_14 = 0x0021 +DW_LANG_Fortran03 = 0x0022 +DW_LANG_Fortran08 = 0x0023 +DW_LANG_RenderScript = 0x0024 +DW_LANG_BLISS = 0x0025 DW_LANG_Mips_Assembler = 0x8001 DW_LANG_Upc = 0x8765 DW_LANG_HP_Bliss = 0x8003 @@ -44,6 +61,8 @@ DW_LANG_HP_Pascal91 = 0x8005 DW_LANG_HP_IMacro = 0x8006 DW_LANG_HP_Assembler = 0x8007 +DW_LANG_GOOGLE_RenderScript = 0x8e57 +DW_LANG_BORLAND_Delphi = 0xb000 # Encoding @@ -65,6 +84,8 @@ DW_ATE_unsigned_fixed = 0xe DW_ATE_decimal_float = 0xf DW_ATE_UTF = 0x10 +DW_ATE_UCS = 0x11 +DW_ATE_ASCII = 0x12 DW_ATE_lo_user = 0x80 DW_ATE_hi_user = 0xff DW_ATE_HP_float80 = 0x80 @@ -135,6 +156,9 @@ DW_LNE_end_sequence = 0x01 DW_LNE_set_address = 0x02 DW_LNE_define_file = 0x03 +DW_LNE_set_discriminator = 0x04 +DW_LNE_lo_user = 0x80 +DW_LNE_hi_user = 0xff # Call frame instructions @@ -173,3 +197,4 @@ DW_CFA_val_offset = 0x14 DW_CFA_val_offset_sf = 0x15 DW_CFA_val_expression = 0x16 +DW_CFA_GNU_args_size = 0x2e diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index eb20333f..e5c8c26b 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -9,7 +9,7 @@ from collections import defaultdict from .constants import * -from .dwarf_expr import GenericExprVisitor +from .dwarf_expr import DWARFExprParser from .die import DIE from ..common.utils import preserve_stream_pos, dwarf_assert from ..common.py3compat import bytes2str @@ -99,18 +99,17 @@ def _full_reg_name(regnum): s += ' %s: %s ofs %s\n' % ( name, _full_reg_name(instr.args[0]), instr.args[1] * cie['data_alignment_factor']) - elif name == 'DW_CFA_def_cfa_offset': + elif name in ('DW_CFA_def_cfa_offset', 'DW_CFA_GNU_args_size'): s += ' %s: %s\n' % (name, instr.args[0]) elif name == 'DW_CFA_def_cfa_expression': expr_dumper = ExprDumper(entry.structs) - expr_dumper.process_expr(instr.args[0]) # readelf output is missing a colon for DW_CFA_def_cfa_expression - s += ' %s (%s)\n' % (name, expr_dumper.get_str()) + s += ' %s (%s)\n' % (name, expr_dumper.dump_expr(instr.args[0])) elif name == 'DW_CFA_expression': expr_dumper = ExprDumper(entry.structs) - expr_dumper.process_expr(instr.args[1]) s += ' %s: %s (%s)\n' % ( - name, _full_reg_name(instr.args[0]), expr_dumper.get_str()) + name, _full_reg_name(instr.args[0]), + expr_dumper.dump_expr(instr.args[1])) else: s += ' %s: \n' % name @@ -133,7 +132,7 @@ def describe_CFI_CFA_rule(rule): return '%s%+d' % (describe_reg_name(rule.reg), rule.offset) -def describe_DWARF_expr(expr, structs): +def describe_DWARF_expr(expr, structs, cu_offset=None): """ Textual description of a DWARF expression encoded in 'expr'. structs should come from the entity encompassing the expression - it's needed to be able to parse it correctly. @@ -146,9 +145,7 @@ def describe_DWARF_expr(expr, structs): _DWARF_EXPR_DUMPER_CACHE[cache_key] = \ ExprDumper(structs) dwarf_expr_dumper = _DWARF_EXPR_DUMPER_CACHE[cache_key] - dwarf_expr_dumper.clear() - dwarf_expr_dumper.process_expr(expr) - return '(' + dwarf_expr_dumper.get_str() + ')' + return '(' + dwarf_expr_dumper.dump_expr(expr, cu_offset) + ')' def describe_reg_name(regnum, machine_arch=None, default=True): @@ -162,6 +159,8 @@ def describe_reg_name(regnum, machine_arch=None, default=True): return _REG_NAMES_x86[regnum] elif machine_arch == 'x64': return _REG_NAMES_x64[regnum] + elif machine_arch == 'AArch64': + return _REG_NAMES_AArch64[regnum] elif default: return 'r%s' % regnum else: @@ -314,7 +313,6 @@ def _describe_attr_block(attr, die, section_offset): DW_LANG_D: '(D)', DW_LANG_Python: '(Python)', DW_LANG_Mips_Assembler: '(MIPS assembler)', - DW_LANG_Upc: '(nified Parallel C)', DW_LANG_HP_Bliss: '(HP Bliss)', DW_LANG_HP_Basic91: '(HP Basic 91)', DW_LANG_HP_Pascal91: '(HP Pascal 91)', @@ -339,6 +337,7 @@ def _describe_attr_block(attr, die, section_offset): DW_ATE_edited: '(edited)', DW_ATE_signed_fixed: '(signed_fixed)', DW_ATE_unsigned_fixed: '(unsigned_fixed)', + DW_ATE_UTF: '(unicode string)', DW_ATE_HP_float80: '(HP_float80)', DW_ATE_HP_complex_float80: '(HP_complex_float80)', DW_ATE_HP_float128: '(HP_float128)', @@ -425,7 +424,7 @@ def _location_list_extra(attr, die, section_offset): if attr.form in ('DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sec_offset'): return '(location list)' else: - return describe_DWARF_expr(attr.value, die.cu.structs) + return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset) def _data_member_location_extra(attr, die, section_offset): @@ -438,7 +437,7 @@ def _data_member_location_extra(attr, die, section_offset): elif attr.form == 'DW_FORM_sdata': return str(attr.value) else: - return describe_DWARF_expr(attr.value, die.cu.structs) + return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset) def _import_extra(attr, die, section_offset): @@ -531,46 +530,66 @@ def _import_extra(attr, die, section_offset): 'mxcsr', 'fcw', 'fsw' ] +# https://developer.arm.com/docs/ihi0057/c/dwarf-for-the-arm-64-bit-architecture-aarch64-abi-2018q4#id24 +_REG_NAMES_AArch64 = [ + 'x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', + 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', 'x16', 'x17', 'x18', 'x19', + 'x20', 'x21', 'x22', 'x23', 'x24', 'x25', 'x26', 'x27', 'x28', 'x29', + 'x30', 'sp' +] + -class ExprDumper(GenericExprVisitor): - """ A concrete visitor for DWARF expressions that dumps a textual +class ExprDumper(object): + """ A dumper for DWARF expressions that dumps a textual representation of the complete expression. - Usage: after creation, call process_expr, and then get_str for a - semicolon-delimited string representation of the decoded expression. + Usage: after creation, call dump_expr repeatedly - it's stateless. """ def __init__(self, structs): - super(ExprDumper, self).__init__(structs) + self.structs = structs + self.expr_parser = DWARFExprParser(self.structs) self._init_lookups() - self._str_parts = [] - - def clear(self): - self._str_parts = [] - def get_str(self): - return '; '.join(self._str_parts) + def dump_expr(self, expr, cu_offset=None): + """ Parse and dump a DWARF expression. expr should be a list of + (integer) byte values. cu_offset is the cu_offset + value from the CU object where the expression resides. + Only affects a handful of GNU opcodes, if None is provided, + that's not a crash condition, only the expression dump will + not be consistent of that of readelf. + + Returns a string representing the expression. + """ + parsed = self.expr_parser.parse_expr(expr) + s = [] + for deo in parsed: + s.append(self._dump_to_string(deo.op, deo.op_name, deo.args, cu_offset)) + return '; '.join(s) def _init_lookups(self): self._ops_with_decimal_arg = set([ 'DW_OP_const1u', 'DW_OP_const1s', 'DW_OP_const2u', 'DW_OP_const2s', - 'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_constu', 'DW_OP_consts', - 'DW_OP_pick', 'DW_OP_plus_uconst', 'DW_OP_bra', 'DW_OP_skip', - 'DW_OP_fbreg', 'DW_OP_piece', 'DW_OP_deref_size', - 'DW_OP_xderef_size', 'DW_OP_regx',]) + 'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_const8u', 'DW_OP_const8s', + 'DW_OP_constu', 'DW_OP_consts', 'DW_OP_pick', 'DW_OP_plus_uconst', + 'DW_OP_bra', 'DW_OP_skip', 'DW_OP_fbreg', 'DW_OP_piece', + 'DW_OP_deref_size', 'DW_OP_xderef_size', 'DW_OP_regx',]) for n in range(0, 32): self._ops_with_decimal_arg.add('DW_OP_breg%s' % n) - self._ops_with_two_decimal_args = set([ - 'DW_OP_const8u', 'DW_OP_const8s', 'DW_OP_bregx', 'DW_OP_bit_piece']) + self._ops_with_two_decimal_args = set(['DW_OP_bregx', 'DW_OP_bit_piece']) self._ops_with_hex_arg = set( ['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref']) - def _after_visit(self, opcode, opcode_name, args): - self._str_parts.append(self._dump_to_string(opcode, opcode_name, args)) + def _dump_to_string(self, opcode, opcode_name, args, cu_offset=None): + # Some GNU ops contain an offset from the current CU as an argument, + # but readelf emits those ops with offset from the info section + # so we need the base offset of the parent CU. + # If omitted, arguments on some GNU opcodes will be off. + if cu_offset is None: + cu_offset = 0 - def _dump_to_string(self, opcode, opcode_name, args): if len(args) == 0: if opcode_name.startswith('DW_OP_reg'): regnum = int(opcode_name[9:]) @@ -598,5 +617,21 @@ def _dump_to_string(self, opcode, opcode_name, args): return '%s: %x' % (opcode_name, args[0]) elif opcode_name in self._ops_with_two_decimal_args: return '%s: %s %s' % (opcode_name, args[0], args[1]) + elif opcode_name == 'DW_OP_GNU_entry_value': + return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args) for deo in args[0]])) + elif opcode_name == 'DW_OP_implicit_value': + return "%s %s byte block: %s" % (opcode_name, len(args[0]), ''.join(["%x " % b for b in args[0]])) + elif opcode_name == 'DW_OP_GNU_parameter_ref': + return "%s: <0x%x>" % (opcode_name, args[0] + cu_offset) + elif opcode_name == 'DW_OP_GNU_implicit_pointer': + return "%s: <0x%x> %d" % (opcode_name, args[0], args[1]) + elif opcode_name == 'DW_OP_GNU_convert': + return "%s <0x%x>" % (opcode_name, args[0] + cu_offset) + elif opcode_name == 'DW_OP_GNU_deref_type': + return "%s: %d <0x%x>" % (opcode_name, args[0], args[1] + cu_offset) + elif opcode_name == 'DW_OP_GNU_const_type': + return "%s: <0x%x> %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1])) + elif opcode_name == 'DW_OP_GNU_regval_type': + return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset) else: return '' % opcode_name diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 184ff8cc..dd9d5925 100755 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -86,7 +86,9 @@ def __init__(self, cu, stream, offset): self.has_children = None self.abbrev_code = None self.size = 0 - self._children = [] + # Null DIE terminator. It can be used to obtain offset range occupied + # by this DIE including its whole subtree. + self._terminator = None self._parent = None self._parse_DIE() @@ -96,10 +98,34 @@ def is_null(self): """ return self.tag is None + def get_DIE_from_attribute(self, name): + """ Return the DIE referenced by the named attribute of this DIE. + The attribute must be in the reference attribute class. + + name: + The name of the attribute in the reference class. + """ + attr = self.attributes[name] + if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', + 'DW_FORM_ref8', 'DW_FORM_ref'): + refaddr = self.cu.cu_offset + attr.raw_value + return self.cu.get_DIE_from_refaddr(refaddr) + elif attr.form in ('DW_FORM_ref_addr'): + return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value) + elif attr.form in ('DW_FORM_ref_sig8'): + # Implement search type units for matching signature + raise NotImplementedError('%s (type unit by signature)' % attr.form) + elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8'): + raise NotImplementedError('%s to dwo' % attr.form) + else: + raise DWARFError('%s is not a reference class form attribute' % attr) + def get_parent(self): - """ The parent DIE of this DIE. None if the DIE has no parent (i.e. a - top-level DIE). + """ Return the parent DIE of this DIE, or None if the DIE has no + parent (i.e. is a top-level DIE). """ + if self._parent is None: + self._search_ancestor_offspring() return self._parent def get_full_path(self): @@ -117,15 +143,16 @@ def get_full_path(self): return os.path.join(comp_dir, fname) def iter_children(self): - """ Yield all children of this DIE + """ Iterates all children of this DIE """ - return iter(self._children) + return self.cu.iter_DIE_children(self) def iter_siblings(self): """ Yield all siblings of this DIE """ - if self._parent: - for sibling in self._parent.iter_children(): + parent = self.get_parent() + if parent: + for sibling in parent.iter_children(): if sibling is not self: yield sibling else: @@ -134,14 +161,48 @@ def iter_siblings(self): # The following methods are used while creating the DIE and should not be # interesting to consumers # - def add_child(self, die): - self._children.append(die) def set_parent(self, die): self._parent = die #------ PRIVATE ------# + def _search_ancestor_offspring(self): + """ Search our ancestors identifying their offspring to find our parent. + + DIEs are stored as a flattened tree. The top DIE is the ancestor + of all DIEs in the unit. Each parent is guaranteed to be at + an offset less than their children. In each generation of children + the sibling with the closest offset not greater than our offset is + our ancestor. + """ + # This code is called when get_parent notices that the _parent has + # not been identified. To avoid execution for each sibling record all + # the children of any parent iterated. Assuming get_parent will also be + # called for siblings, it is more efficient if siblings references are + # provided and no worse than a single walk if they are missing, while + # stopping iteration early could result in O(n^2) walks. + search = self.cu.get_top_DIE() + while search.offset < self.offset: + prev = search + for child in search.iter_children(): + child.set_parent(search) + if child.offset <= self.offset: + prev = child + + # We also need to check the offset of the terminator DIE + if search.has_children and search._terminator.offset <= self.offset: + prev = search._terminator + + # If we didn't find a closer parent, give up, don't loop. + # Either we mis-parsed an ancestor or someone created a DIE + # by an offset that was not actually the start of a DIE. + if prev is search: + raise ValueError("offset %s not in CU %s DIE tree" % + (self.offset, self.cu.cu_offset)) + + search = prev + def __repr__(self): s = 'DIE %s, size=%s, has_children=%s\n' % ( self.tag, self.size, self.has_children) @@ -170,9 +231,7 @@ def _parse_DIE(self): self.size = self.stream.tell() - self.offset return - with preserve_stream_pos(self.stream): - abbrev_decl = self.cu.get_abbrev_table().get_abbrev( - self.abbrev_code) + abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code) self.tag = abbrev_decl['tag'] self.has_children = abbrev_decl.has_children() @@ -190,19 +249,6 @@ def _parse_DIE(self): raw_value=raw_value, offset=attr_offset) - # Count and then consume any null termination bytes to avoid wrong die - # size calculation. - num_zero_terminators = 0 - with preserve_stream_pos(self.stream): - while True: - if self.stream.read(1) == 0: - num_zero_terminators += 1 - else: - break - if num_zero_terminators > 0: - # There was at least one zero termination -> consume all of them. - self.stream.read(num_zero_terminators) - self.size = self.stream.tell() - self.offset def _translate_attr_value(self, form, raw_value): @@ -214,6 +260,8 @@ def _translate_attr_value(self, form, raw_value): value = self.dwarfinfo.get_string_from_table(raw_value) elif form == 'DW_FORM_flag': value = not raw_value == 0 + elif form == 'DW_FORM_flag_present': + value = True elif form == 'DW_FORM_indirect': try: form = DW_FORM_raw2name[raw_value] diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index 270a7816..bb85daa0 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -6,8 +6,10 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from collections import namedtuple + from ..common.py3compat import BytesIO, iteritems -from ..common.utils import struct_parse, bytelist2string +from ..common.utils import struct_parse, bytelist2string, read_blob # DWARF expression opcodes. name -> opcode mapping @@ -68,6 +70,28 @@ DW_OP_form_tls_address=0x9b, DW_OP_call_frame_cfa=0x9c, DW_OP_bit_piece=0x9d, + DW_OP_implicit_value=0x9e, + DW_OP_stack_value=0x9f, + DW_OP_implicit_pointer=0xa0, + DW_OP_addrx=0xa1, + DW_OP_constx=0xa2, + DW_OP_entry_value=0xa3, + DW_OP_const_type=0xa4, + DW_OP_regval_type=0xa5, + DW_OP_deref_type=0xa6, + DW_OP_xderef_type=0xa7, + DW_OP_convert=0xa8, + DW_OP_reinterpret=0xa9, + DW_OP_lo_user=0xe0, + DW_OP_GNU_push_tls_address=0xe0, + DW_OP_GNU_implicit_pointer=0xf2, + DW_OP_GNU_entry_value=0xf3, + DW_OP_GNU_const_type=0xf4, + DW_OP_GNU_regval_type=0xf5, + DW_OP_GNU_deref_type=0xf6, + DW_OP_GNU_convert=0xf7, + DW_OP_GNU_parameter_ref=0xfa, + DW_OP_hi_user=0xff, ) def _generate_dynamic_values(map, prefix, index_start, index_end, value_start): @@ -88,170 +112,145 @@ def _generate_dynamic_values(map, prefix, index_start, index_end, value_start): DW_OP_opcode2name = dict((v, k) for k, v in iteritems(DW_OP_name2opcode)) -class GenericExprVisitor(object): - """ A DWARF expression is a sequence of instructions encoded in a block - of bytes. This class decodes the sequence into discrete instructions - with their arguments and allows generic "visiting" to process them. +# Each parsed DWARF expression is returned as this type with its numeric opcode, +# op name (as a string) and a list of arguments. +DWARFExprOp = namedtuple('DWARFExprOp', 'op op_name args') + - Usage: subclass this class, and override the needed methods. The - easiest way would be to just override _after_visit, which gets passed - each decoded instruction (with its arguments) in order. Clients of - the visitor then just execute process_expr. The subclass can keep - its own internal information updated in _after_visit and provide - methods to extract it. For a good example of this usage, see the - ExprDumper class in the descriptions module. +class DWARFExprParser(object): + """DWARF expression parser. - A more complex usage could be to override visiting methods for - specific instructions, by placing them into the dispatch table. + When initialized, requires structs to cache a dispatch table. After that, + parse_expr can be called repeatedly - it's stateless. """ + def __init__(self, structs): - self.structs = structs - self._init_dispatch_table() - self.stream = None - self._cur_opcode = None - self._cur_opcode_name = None - self._cur_args = [] - - def process_expr(self, expr): - """ Process (visit) a DWARF expression. expr should be a list of - (integer) byte values. + self._dispatch_table = _init_dispatch_table(structs) + + def parse_expr(self, expr): + """ Parses expr (a list of integers) into a list of DWARFExprOp. + + The list can potentially be nested. """ - self.stream = BytesIO(bytelist2string(expr)) + stream = BytesIO(bytelist2string(expr)) + parsed = [] while True: # Get the next opcode from the stream. If nothing is left in the # stream, we're done. - byte = self.stream.read(1) + byte = stream.read(1) if len(byte) == 0: break - # Decode the opcode and its name - self._cur_opcode = ord(byte) - self._cur_opcode_name = DW_OP_opcode2name.get( - self._cur_opcode, 'OP:0x%x' % self._cur_opcode) - # Will be filled in by visitors - self._cur_args = [] - - # Dispatch to a visitor function - visitor = self._dispatch_table.get( - self._cur_opcode, - self._default_visitor) - visitor(self._cur_opcode, self._cur_opcode_name) - - # Finally call the post-visit function - self._after_visit( - self._cur_opcode, self._cur_opcode_name, self._cur_args) - - def _after_visit(self, opcode, opcode_name, args): - pass - - def _default_visitor(self, opcode, opcode_name): - pass - - def _visit_OP_with_no_args(self, opcode, opcode_name): - self._cur_args = [] - - def _visit_OP_addr(self, opcode, opcode_name): - self._cur_args = [ - struct_parse(self.structs.Dwarf_target_addr(''), self.stream)] - - def _make_visitor_arg_struct(self, struct_arg): - """ Create a visitor method for an opcode that that accepts a single - argument, specified by a struct. - """ - def visitor(opcode, opcode_name): - self._cur_args = [struct_parse(struct_arg, self.stream)] - return visitor + # Decode the opcode and its name. + op = ord(byte) + op_name = DW_OP_opcode2name.get(op, 'OP:0x%x' % op) - def _make_visitor_arg_struct2(self, struct_arg1, struct_arg2): - """ Create a visitor method for an opcode that that accepts two - arguments, specified by structs. - """ - def visitor(opcode, opcode_name): - self._cur_args = [ - struct_parse(struct_arg1, self.stream), - struct_parse(struct_arg2, self.stream)] - return visitor - - def _init_dispatch_table(self): - self._dispatch_table = {} - def add(opcode_name, func): - self._dispatch_table[DW_OP_name2opcode[opcode_name]] = func - - add('DW_OP_addr', self._visit_OP_addr) - add('DW_OP_const1u', - self._make_visitor_arg_struct(self.structs.Dwarf_uint8(''))) - add('DW_OP_const1s', - self._make_visitor_arg_struct(self.structs.Dwarf_int8(''))) - add('DW_OP_const2u', - self._make_visitor_arg_struct(self.structs.Dwarf_uint16(''))) - add('DW_OP_const2s', - self._make_visitor_arg_struct(self.structs.Dwarf_int16(''))) - add('DW_OP_const4u', - self._make_visitor_arg_struct(self.structs.Dwarf_uint32(''))) - add('DW_OP_const4s', - self._make_visitor_arg_struct(self.structs.Dwarf_int32(''))) - add('DW_OP_const8u', - self._make_visitor_arg_struct2( - self.structs.Dwarf_uint32(''), - self.structs.Dwarf_uint32(''))) - add('DW_OP_const8s', - self._make_visitor_arg_struct2( - self.structs.Dwarf_int32(''), - self.structs.Dwarf_int32(''))) - add('DW_OP_constu', - self._make_visitor_arg_struct(self.structs.Dwarf_uleb128(''))) - add('DW_OP_consts', - self._make_visitor_arg_struct(self.structs.Dwarf_sleb128(''))) - add('DW_OP_pick', - self._make_visitor_arg_struct(self.structs.Dwarf_uint8(''))) - add('DW_OP_plus_uconst', - self._make_visitor_arg_struct(self.structs.Dwarf_uleb128(''))) - add('DW_OP_bra', - self._make_visitor_arg_struct(self.structs.Dwarf_int16(''))) - add('DW_OP_skip', - self._make_visitor_arg_struct(self.structs.Dwarf_int16(''))) - - for opname in [ 'DW_OP_deref', 'DW_OP_dup', 'DW_OP_drop', 'DW_OP_over', - 'DW_OP_swap', 'DW_OP_swap', 'DW_OP_rot', 'DW_OP_xderef', - 'DW_OP_abs', 'DW_OP_and', 'DW_OP_div', 'DW_OP_minus', - 'DW_OP_mod', 'DW_OP_mul', 'DW_OP_neg', 'DW_OP_not', - 'DW_OP_plus', 'DW_OP_shl', 'DW_OP_shr', 'DW_OP_shra', - 'DW_OP_xor', 'DW_OP_eq', 'DW_OP_ge', 'DW_OP_gt', - 'DW_OP_le', 'DW_OP_lt', 'DW_OP_ne', 'DW_OP_nop', - 'DW_OP_push_object_address', 'DW_OP_form_tls_address', - 'DW_OP_call_frame_cfa']: - add(opname, self._visit_OP_with_no_args) - - for n in range(0, 32): - add('DW_OP_lit%s' % n, self._visit_OP_with_no_args) - add('DW_OP_reg%s' % n, self._visit_OP_with_no_args) - add('DW_OP_breg%s' % n, - self._make_visitor_arg_struct(self.structs.Dwarf_sleb128(''))) - - add('DW_OP_fbreg', - self._make_visitor_arg_struct(self.structs.Dwarf_sleb128(''))) - add('DW_OP_regx', - self._make_visitor_arg_struct(self.structs.Dwarf_uleb128(''))) - add('DW_OP_bregx', - self._make_visitor_arg_struct2( - self.structs.Dwarf_uleb128(''), - self.structs.Dwarf_sleb128(''))) - add('DW_OP_piece', - self._make_visitor_arg_struct(self.structs.Dwarf_uleb128(''))) - add('DW_OP_bit_piece', - self._make_visitor_arg_struct2( - self.structs.Dwarf_uleb128(''), - self.structs.Dwarf_uleb128(''))) - add('DW_OP_deref_size', - self._make_visitor_arg_struct(self.structs.Dwarf_int8(''))) - add('DW_OP_xderef_size', - self._make_visitor_arg_struct(self.structs.Dwarf_int8(''))) - add('DW_OP_call2', - self._make_visitor_arg_struct(self.structs.Dwarf_uint16(''))) - add('DW_OP_call4', - self._make_visitor_arg_struct(self.structs.Dwarf_uint32(''))) - add('DW_OP_call_ref', - self._make_visitor_arg_struct(self.structs.Dwarf_offset(''))) + # Use dispatch table to parse args. + arg_parser = self._dispatch_table[op] + args = arg_parser(stream) + + parsed.append(DWARFExprOp(op=op, op_name=op_name, args=args)) + + return parsed + + +def _init_dispatch_table(structs): + """Creates a dispatch table for parsing args of an op. + + Returns a dict mapping opcode to a function. The function accepts a stream + and return a list of parsed arguments for the opcode from the stream; + the stream is advanced by the function as needed. + """ + table = {} + def add(opcode_name, func): + table[DW_OP_name2opcode[opcode_name]] = func + + def parse_noargs(): + return lambda stream: [] + + def parse_op_addr(): + return lambda stream: [struct_parse(structs.Dwarf_target_addr(''), + stream)] + + def parse_arg_struct(arg_struct): + return lambda stream: [struct_parse(arg_struct, stream)] + + def parse_arg_struct2(arg1_struct, arg2_struct): + return lambda stream: [struct_parse(arg1_struct, stream), + struct_parse(arg2_struct, stream)] + + # ULEB128, then an expression of that length + def parse_nestedexpr(): + def parse(stream): + size = struct_parse(structs.Dwarf_uleb128(''), stream) + nested_expr_blob = read_blob(stream, size) + return [DWARFExprParser(structs).parse_expr(nested_expr_blob)] + return parse + + # ULEB128, then a blob of that size + def parse_blob(): + return lambda stream: [read_blob(stream, struct_parse(structs.Dwarf_uleb128(''), stream))] + + # ULEB128 with datatype DIE offset, then byte, then a blob of that size + def parse_typedblob(): + return lambda stream: [struct_parse(structs.Dwarf_uleb128(''), stream), read_blob(stream, struct_parse(structs.Dwarf_uint8(''), stream))] + + add('DW_OP_addr', parse_op_addr()) + add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8(''))) + add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8(''))) + add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16(''))) + add('DW_OP_const2s', parse_arg_struct(structs.Dwarf_int16(''))) + add('DW_OP_const4u', parse_arg_struct(structs.Dwarf_uint32(''))) + add('DW_OP_const4s', parse_arg_struct(structs.Dwarf_int32(''))) + add('DW_OP_const8u', parse_arg_struct(structs.Dwarf_uint64(''))) + add('DW_OP_const8s', parse_arg_struct(structs.Dwarf_int64(''))) + add('DW_OP_constu', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_consts', parse_arg_struct(structs.Dwarf_sleb128(''))) + add('DW_OP_pick', parse_arg_struct(structs.Dwarf_uint8(''))) + add('DW_OP_plus_uconst', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_bra', parse_arg_struct(structs.Dwarf_int16(''))) + add('DW_OP_skip', parse_arg_struct(structs.Dwarf_int16(''))) + + for opname in [ 'DW_OP_deref', 'DW_OP_dup', 'DW_OP_drop', 'DW_OP_over', + 'DW_OP_swap', 'DW_OP_swap', 'DW_OP_rot', 'DW_OP_xderef', + 'DW_OP_abs', 'DW_OP_and', 'DW_OP_div', 'DW_OP_minus', + 'DW_OP_mod', 'DW_OP_mul', 'DW_OP_neg', 'DW_OP_not', + 'DW_OP_or', 'DW_OP_plus', 'DW_OP_shl', 'DW_OP_shr', + 'DW_OP_shra', 'DW_OP_xor', 'DW_OP_eq', 'DW_OP_ge', + 'DW_OP_gt', 'DW_OP_le', 'DW_OP_lt', 'DW_OP_ne', 'DW_OP_nop', + 'DW_OP_push_object_address', 'DW_OP_form_tls_address', + 'DW_OP_call_frame_cfa', 'DW_OP_stack_value', + 'DW_OP_GNU_push_tls_address']: + add(opname, parse_noargs()) + + for n in range(0, 32): + add('DW_OP_lit%s' % n, parse_noargs()) + add('DW_OP_reg%s' % n, parse_noargs()) + add('DW_OP_breg%s' % n, parse_arg_struct(structs.Dwarf_sleb128(''))) + add('DW_OP_fbreg', parse_arg_struct(structs.Dwarf_sleb128(''))) + add('DW_OP_regx', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_bregx', parse_arg_struct2(structs.Dwarf_uleb128(''), + structs.Dwarf_sleb128(''))) + add('DW_OP_piece', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_bit_piece', parse_arg_struct2(structs.Dwarf_uleb128(''), + structs.Dwarf_uleb128(''))) + add('DW_OP_deref_size', parse_arg_struct(structs.Dwarf_int8(''))) + add('DW_OP_xderef_size', parse_arg_struct(structs.Dwarf_int8(''))) + add('DW_OP_call2', parse_arg_struct(structs.Dwarf_uint16(''))) + add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32(''))) + add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset(''))) + add('DW_OP_implicit_value', parse_blob()) + add('DW_OP_GNU_entry_value', parse_nestedexpr()) + add('DW_OP_GNU_const_type', parse_typedblob()) + add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''), + structs.Dwarf_uleb128(''))) + add('DW_OP_GNU_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''), + structs.Dwarf_uleb128(''))) + add('DW_OP_GNU_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''), + structs.Dwarf_sleb128(''))) + add('DW_OP_GNU_parameter_ref', parse_arg_struct(structs.Dwarf_offset(''))) + add('DW_OP_GNU_convert', parse_arg_struct(structs.Dwarf_uleb128(''))) + return table diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index b8faf9d7..103fc491 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -7,6 +7,7 @@ # This code is in the public domain #------------------------------------------------------------------------------- from collections import namedtuple +from bisect import bisect_right from ..common.exceptions import DWARFError from ..common.utils import (struct_parse, dwarf_assert, @@ -19,6 +20,7 @@ from .locationlists import LocationLists from .ranges import RangeLists from .aranges import ARanges +from .namelut import NameLUT # Describes a debug section @@ -67,7 +69,9 @@ def __init__(self, debug_str_sec, debug_loc_sec, debug_ranges_sec, - debug_line_sec): + debug_line_sec, + debug_pubtypes_sec, + debug_pubnames_sec): """ config: A DwarfConfig object @@ -86,6 +90,8 @@ def __init__(self, self.debug_loc_sec = debug_loc_sec self.debug_ranges_sec = debug_ranges_sec self.debug_line_sec = debug_line_sec + self.debug_pubtypes_sec = debug_pubtypes_sec + self.debug_pubnames_sec = debug_pubnames_sec # This is the DWARFStructs the context uses, so it doesn't depend on # DWARF format and address_size (these are determined per CU) - set them @@ -98,6 +104,11 @@ def __init__(self, # Cache for abbrev tables: a dict keyed by offset self._abbrevtable_cache = {} + # Cache of compile units and map of their offsets for bisect lookup. + # Access with .iter_CUs(), .get_CU_containing(), and/or .get_CU_at(). + self._cu_cache = [] + self._cu_offsets_map = [] + @property def has_debug_info(self): """ Return whether this contains debug information. @@ -107,6 +118,84 @@ def has_debug_info(self): """ return bool(self.debug_info_sec) + def get_DIE_from_lut_entry(self, lut_entry): + """ Get the DIE from the pubnames or putbtypes lookup table entry. + + lut_entry: + A NameLUTEntry object from a NameLUT instance (see + .get_pubmames and .get_pubtypes methods). + """ + cu = self.get_CU_at(lut_entry.cu_ofs) + return self.get_DIE_from_refaddr(lut_entry.die_ofs, cu) + + def get_DIE_from_refaddr(self, refaddr, cu=None): + """ Given a .debug_info section offset of a DIE, return the DIE. + + refaddr: + The refaddr may come from a DW_FORM_ref_addr attribute. + + cu: + The compile unit object, if known. If None a search + from the closest offset less than refaddr will be performed. + """ + if cu is None: + cu = self.get_CU_containing(refaddr) + return cu.get_DIE_from_refaddr(refaddr) + + def get_CU_containing(self, refaddr): + """ Find the CU that includes the given reference address in the + .debug_info section. + + refaddr: + Either a refaddr of a DIE (possibly from a DW_FORM_ref_addr + attribute) or the section offset of a CU (possibly from an + aranges table). + + This function will parse and cache CUs until the search criteria + is met, starting from the closest known offset lessthan or equal + to the given address. + """ + dwarf_assert( + self.has_debug_info, + 'CU lookup but no debug info section') + dwarf_assert( + 0 <= refaddr < self.debug_info_sec.size, + "refaddr %s beyond .debug_info size" % refaddr) + + # The CU containing the DIE we desire will be to the right of the + # DIE insert point. If we have a CU address, then it will be a + # match but the right insert minus one will still be the item. + # The first CU starts at offset 0, so start there if cache is empty. + i = bisect_right(self._cu_offsets_map, refaddr) + start = self._cu_offsets_map[i - 1] if i > 0 else 0 + + # parse CUs until we find one containing the desired address + for cu in self._parse_CUs_iter(start): + if cu.cu_offset <= refaddr < cu.cu_offset + cu.size: + return cu + + raise ValueError("CU for reference address %s not found" % refaddr) + + def get_CU_at(self, offset): + """ Given a CU header offset, return the parsed CU. + + offset: + The offset may be from an accelerated access table such as + the public names, public types, address range table, or + prior use. + + This function will directly parse the CU doing no validation of + the offset beyond checking the size of the .debug_info section. + """ + dwarf_assert( + self.has_debug_info, + 'CU lookup but no debug info section') + dwarf_assert( + 0 <= offset < self.debug_info_sec.size, + "offset %s beyond .debug_info size" % offset) + + return self._cached_CU_at_offset(offset) + def iter_CUs(self): """ Yield all the compile units (CompileUnit objects) in the debug info """ @@ -185,13 +274,45 @@ def EH_CFI_entries(self): for_eh_frame=True) return cfi.get_entries() + def get_pubtypes(self): + """ + Returns a NameLUT object that contains information read from the + .debug_pubtypes section in the ELF file. + + NameLUT is essentially a dictionary containing the CU/DIE offsets of + each symbol. See the NameLUT doc string for more details. + """ + + if self.debug_pubtypes_sec: + return NameLUT(self.debug_pubtypes_sec.stream, + self.debug_pubtypes_sec.size, + self.structs) + else: + return None + + def get_pubnames(self): + """ + Returns a NameLUT object that contains information read from the + .debug_pubnames section in the ELF file. + + NameLUT is essentially a dictionary containing the CU/DIE offsets of + each symbol. See the NameLUT doc string for more details. + """ + + if self.debug_pubnames_sec: + return NameLUT(self.debug_pubnames_sec.stream, + self.debug_pubnames_sec.size, + self.structs) + else: + return None + def get_aranges(self): """ Get an ARanges object representing the .debug_aranges section of the DWARF data, or None if the section doesn't exist """ if self.debug_aranges_sec: - return ARanges(self.debug_aranges_sec.stream, - self.debug_aranges_sec.size, + return ARanges(self.debug_aranges_sec.stream, + self.debug_aranges_sec.size, self.structs) else: return None @@ -216,15 +337,20 @@ def range_lists(self): #------ PRIVATE ------# - def _parse_CUs_iter(self): - """ Parse CU entries from debug_info. Yield CUs in order of appearance. + def _parse_CUs_iter(self, offset=0): + """ Iterate CU objects in order of appearance in the debug_info section. + + offset: + The offset of the first CU to yield. Additional iterations + will return the sequential unit objects. + + See .iter_CUs(), .get_CU_containing(), and .get_CU_at(). """ if self.debug_info_sec is None: return - offset = 0 while offset < self.debug_info_sec.size: - cu = self._parse_CU_at_offset(offset) + cu = self._cached_CU_at_offset(offset) # Compute the offset of the next CU in the section. The unit_length # field of the CU header contains its size not including the length # field itself. @@ -233,6 +359,32 @@ def _parse_CUs_iter(self): cu.structs.initial_length_field_size()) yield cu + def _cached_CU_at_offset(self, offset): + """ Return the CU with unit header at the given offset into the + debug_info section from the cache. If not present, the unit is + header is parsed and the object is installed in the cache. + + offset: + The offset of the unit header in the .debug_info section + to of the unit to fetch from the cache. + + See get_CU_at(). + """ + # Find the insert point for the requested offset. With bisect_right, + # if this entry is present in the cache it will be the prior entry. + i = bisect_right(self._cu_offsets_map, offset) + if i >= 1 and offset == self._cu_offsets_map[i - 1]: + return self._cu_cache[i - 1] + + # Parse the CU and insert the offset and object into the cache. + # The ._cu_offsets_map[] contains just the numeric offsets for the + # bisect_right search while the parallel indexed ._cu_cache[] holds + # the object references. + cu = self._parse_CU_at_offset(offset) + self._cu_offsets_map.insert(i, offset) + self._cu_cache.insert(i, cu) + return cu + def _parse_CU_at_offset(self, offset): """ Parse and return a CU at the given offset in the debug_info stream. """ @@ -248,24 +400,25 @@ def _parse_CU_at_offset(self, offset): self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset) dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 - # At this point we still haven't read the whole header, so we don't - # know the address_size. Therefore, we're going to create structs - # with a default address_size=4. If, after parsing the header, we - # find out address_size is actually 8, we just create a new structs - # object for this CU. + + # Temporary structs for parsing the header + # The structs for the rest of the CU depend on the header data. # cu_structs = DWARFStructs( little_endian=self.config.little_endian, dwarf_format=dwarf_format, - address_size=4) + address_size=4, + dwarf_version=2) cu_header = struct_parse( cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset) - if cu_header['address_size'] == 8: - cu_structs = DWARFStructs( - little_endian=self.config.little_endian, - dwarf_format=dwarf_format, - address_size=8) + + # structs for the rest of the CU, taking into account bitness and DWARF version + cu_structs = DWARFStructs( + little_endian=self.config.little_endian, + dwarf_format=dwarf_format, + address_size=cu_header['address_size'], + dwarf_version=cu_header['version']) cu_die_offset = self.debug_info_sec.stream.tell() dwarf_assert( @@ -303,4 +456,3 @@ def _parse_line_program_at_offset(self, debug_line_offset, structs): structs=structs, program_start_offset=self.debug_line_sec.stream.tell(), program_end_offset=end_offset) - diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py index 903e7d51..c9008672 100644 --- a/elftools/dwarf/enums.py +++ b/elftools/dwarf/enums.py @@ -11,79 +11,96 @@ ENUM_DW_TAG = dict( - DW_TAG_null = 0x00, - DW_TAG_array_type = 0x01, - DW_TAG_class_type = 0x02, - DW_TAG_entry_point = 0x03, - DW_TAG_enumeration_type = 0x04, - DW_TAG_formal_parameter = 0x05, - DW_TAG_imported_declaration = 0x08, - DW_TAG_label = 0x0a, - DW_TAG_lexical_block = 0x0b, - DW_TAG_member = 0x0d, - DW_TAG_pointer_type = 0x0f, - DW_TAG_reference_type = 0x10, - DW_TAG_compile_unit = 0x11, - DW_TAG_string_type = 0x12, - DW_TAG_structure_type = 0x13, - DW_TAG_subroutine_type = 0x15, - DW_TAG_typedef = 0x16, - DW_TAG_union_type = 0x17, - DW_TAG_unspecified_parameters = 0x18, - DW_TAG_variant = 0x19, - DW_TAG_common_block = 0x1a, - DW_TAG_common_inclusion = 0x1b, - DW_TAG_inheritance = 0x1c, - DW_TAG_inlined_subroutine = 0x1d, - DW_TAG_module = 0x1e, - DW_TAG_ptr_to_member_type = 0x1f, - DW_TAG_set_type = 0x20, - DW_TAG_subrange_type = 0x21, - DW_TAG_with_stmt = 0x22, - DW_TAG_access_declaration = 0x23, - DW_TAG_base_type = 0x24, - DW_TAG_catch_block = 0x25, - DW_TAG_const_type = 0x26, - DW_TAG_constant = 0x27, - DW_TAG_enumerator = 0x28, - DW_TAG_file_type = 0x29, - DW_TAG_friend = 0x2a, - DW_TAG_namelist = 0x2b, - DW_TAG_namelist_item = 0x2c, - DW_TAG_namelist_items = 0x2c, - DW_TAG_packed_type = 0x2d, - DW_TAG_subprogram = 0x2e, + DW_TAG_null = 0x00, + DW_TAG_array_type = 0x01, + DW_TAG_class_type = 0x02, + DW_TAG_entry_point = 0x03, + DW_TAG_enumeration_type = 0x04, + DW_TAG_formal_parameter = 0x05, + DW_TAG_global_subroutine = 0x06, + DW_TAG_global_variable = 0x07, + DW_TAG_imported_declaration = 0x08, + DW_TAG_label = 0x0a, + DW_TAG_lexical_block = 0x0b, + DW_TAG_local_variable = 0x0c, + DW_TAG_member = 0x0d, + DW_TAG_pointer_type = 0x0f, + DW_TAG_reference_type = 0x10, + DW_TAG_compile_unit = 0x11, + DW_TAG_string_type = 0x12, + DW_TAG_structure_type = 0x13, + DW_TAG_subroutine = 0x14, + DW_TAG_subroutine_type = 0x15, + DW_TAG_typedef = 0x16, + DW_TAG_union_type = 0x17, + DW_TAG_unspecified_parameters = 0x18, + DW_TAG_variant = 0x19, + DW_TAG_common_block = 0x1a, + DW_TAG_common_inclusion = 0x1b, + DW_TAG_inheritance = 0x1c, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_module = 0x1e, + DW_TAG_ptr_to_member_type = 0x1f, + DW_TAG_set_type = 0x20, + DW_TAG_subrange_type = 0x21, + DW_TAG_with_stmt = 0x22, + DW_TAG_access_declaration = 0x23, + DW_TAG_base_type = 0x24, + DW_TAG_catch_block = 0x25, + DW_TAG_const_type = 0x26, + DW_TAG_constant = 0x27, + DW_TAG_enumerator = 0x28, + DW_TAG_file_type = 0x29, + DW_TAG_friend = 0x2a, + DW_TAG_namelist = 0x2b, + DW_TAG_namelist_item = 0x2c, + DW_TAG_namelist_items = 0x2c, + DW_TAG_packed_type = 0x2d, + DW_TAG_subprogram = 0x2e, # The DWARF standard defines these as _parameter, not _param, but we # maintain compatibility with readelf. - DW_TAG_template_type_param = 0x2f, - DW_TAG_template_value_param = 0x30, + DW_TAG_template_type_param = 0x2f, + DW_TAG_template_value_param = 0x30, - DW_TAG_thrown_type = 0x31, - DW_TAG_try_block = 0x32, - DW_TAG_variant_part = 0x33, - DW_TAG_variable = 0x34, - DW_TAG_volatile_type = 0x35, - DW_TAG_dwarf_procedure = 0x36, - DW_TAG_restrict_type = 0x37, - DW_TAG_interface_type = 0x38, - DW_TAG_namespace = 0x39, - DW_TAG_imported_module = 0x3a, - DW_TAG_unspecified_type = 0x3b, - DW_TAG_partial_unit = 0x3c, - DW_TAG_imported_unit = 0x3d, - DW_TAG_mutable_type = 0x3e, - DW_TAG_condition = 0x3f, - DW_TAG_shared_type = 0x40, - DW_TAG_type_unit = 0x41, - DW_TAG_rvalue_reference_type = 0x42, + DW_TAG_thrown_type = 0x31, + DW_TAG_try_block = 0x32, + DW_TAG_variant_part = 0x33, + DW_TAG_variable = 0x34, + DW_TAG_volatile_type = 0x35, + DW_TAG_dwarf_procedure = 0x36, + DW_TAG_restrict_type = 0x37, + DW_TAG_interface_type = 0x38, + DW_TAG_namespace = 0x39, + DW_TAG_imported_module = 0x3a, + DW_TAG_unspecified_type = 0x3b, + DW_TAG_partial_unit = 0x3c, + DW_TAG_imported_unit = 0x3d, + DW_TAG_mutable_type = 0x3e, + DW_TAG_condition = 0x3f, + DW_TAG_shared_type = 0x40, + DW_TAG_type_unit = 0x41, + DW_TAG_rvalue_reference_type = 0x42, + DW_TAG_atomic_type = 0x47, + DW_TAG_call_site = 0x48, + DW_TAG_call_site_parameter = 0x49, + DW_TAG_skeleton_unit = 0x4a, + DW_TAG_immutable_type = 0x4b, - DW_TAG_lo_user = 0x4080, - DW_TAG_GNU_call_site = 0x4109, - DW_TAG_GNU_call_site_parameter = 0x410a, - DW_TAG_hi_user = 0xffff, - _default_ = Pass, + + DW_TAG_lo_user = 0x4080, + DW_TAG_GNU_template_template_param = 0x4106, + DW_TAG_GNU_template_parameter_pack = 0x4107, + DW_TAG_GNU_formal_parameter_pack = 0x4108, + DW_TAG_GNU_call_site = 0x4109, + DW_TAG_GNU_call_site_parameter = 0x410a, + + DW_TAG_APPLE_property = 0x4200, + + DW_TAG_hi_user = 0xffff, + + _default_ = Pass, ) @@ -94,104 +111,139 @@ ENUM_DW_AT = dict( - DW_AT_null = 0x00, - DW_AT_sibling = 0x01, - DW_AT_location = 0x02, - DW_AT_name = 0x03, - DW_AT_ordering = 0x09, - DW_AT_subscr_data = 0x0a, - DW_AT_byte_size = 0x0b, - DW_AT_bit_offset = 0x0c, - DW_AT_bit_size = 0x0d, - DW_AT_element_list = 0x0f, - DW_AT_stmt_list = 0x10, - DW_AT_low_pc = 0x11, - DW_AT_high_pc = 0x12, - DW_AT_language = 0x13, - DW_AT_member = 0x14, - DW_AT_discr = 0x15, - DW_AT_discr_value = 0x16, - DW_AT_visibility = 0x17, - DW_AT_import = 0x18, - DW_AT_string_length = 0x19, - DW_AT_common_reference = 0x1a, - DW_AT_comp_dir = 0x1b, - DW_AT_const_value = 0x1c, - DW_AT_containing_type = 0x1d, - DW_AT_default_value = 0x1e, - DW_AT_inline = 0x20, - DW_AT_is_optional = 0x21, - DW_AT_lower_bound = 0x22, - DW_AT_producer = 0x25, - DW_AT_prototyped = 0x27, - DW_AT_return_addr = 0x2a, - DW_AT_start_scope = 0x2c, - DW_AT_bit_stride = 0x2e, - DW_AT_stride_size = 0x2e, - DW_AT_upper_bound = 0x2f, - DW_AT_abstract_origin = 0x31, - DW_AT_accessibility = 0x32, - DW_AT_address_class = 0x33, - DW_AT_artificial = 0x34, - DW_AT_base_types = 0x35, - DW_AT_calling_convention = 0x36, - DW_AT_count = 0x37, - DW_AT_data_member_location = 0x38, - DW_AT_decl_column = 0x39, - DW_AT_decl_file = 0x3a, - DW_AT_decl_line = 0x3b, - DW_AT_declaration = 0x3c, - DW_AT_discr_list = 0x3d, - DW_AT_encoding = 0x3e, - DW_AT_external = 0x3f, - DW_AT_frame_base = 0x40, - DW_AT_friend = 0x41, - DW_AT_identifier_case = 0x42, - DW_AT_macro_info = 0x43, - DW_AT_namelist_item = 0x44, - DW_AT_priority = 0x45, - DW_AT_segment = 0x46, - DW_AT_specification = 0x47, - DW_AT_static_link = 0x48, - DW_AT_type = 0x49, - DW_AT_use_location = 0x4a, - DW_AT_variable_parameter = 0x4b, - DW_AT_virtuality = 0x4c, - DW_AT_vtable_elem_location = 0x4d, - DW_AT_allocated = 0x4e, - DW_AT_associated = 0x4f, - DW_AT_data_location = 0x50, - DW_AT_byte_stride = 0x51, - DW_AT_stride = 0x51, - DW_AT_entry_pc = 0x52, - DW_AT_use_UTF8 = 0x53, - DW_AT_extension = 0x54, - DW_AT_ranges = 0x55, - DW_AT_trampoline = 0x56, - DW_AT_call_column = 0x57, - DW_AT_call_file = 0x58, - DW_AT_call_line = 0x59, - DW_AT_description = 0x5a, - DW_AT_binary_scale = 0x5b, - DW_AT_decimal_scale = 0x5c, - DW_AT_small = 0x5d, - DW_AT_decimal_sign = 0x5e, - DW_AT_digit_count = 0x5f, - DW_AT_picture_string = 0x60, - DW_AT_mutable = 0x61, - DW_AT_threads_scaled = 0x62, - DW_AT_explicit = 0x63, - DW_AT_object_pointer = 0x64, - DW_AT_endianity = 0x65, - DW_AT_elemental = 0x66, - DW_AT_pure = 0x67, - DW_AT_recursive = 0x68, - DW_AT_signature = 0x69, - DW_AT_main_subprogram = 0x6a, - DW_AT_data_bit_offset = 0x6b, - DW_AT_const_expr = 0x6c, - DW_AT_enum_class = 0x6d, - DW_AT_linkage_name = 0x6e, + DW_AT_null = 0x00, + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_fund_type = 0x05, + DW_AT_mod_fund_type = 0x06, + DW_AT_user_def_type = 0x07, + DW_AT_mod_u_d_type = 0x08, + DW_AT_ordering = 0x09, + DW_AT_subscr_data = 0x0a, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_element_list = 0x0f, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_member = 0x14, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_producer = 0x25, + DW_AT_protected = 0x26, + DW_AT_prototyped = 0x27, + DW_AT_public = 0x28, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_bit_stride = 0x2e, + DW_AT_stride_size = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_item = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_byte_stride = 0x51, + DW_AT_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + DW_AT_description = 0x5a, + DW_AT_binary_scale = 0x5b, + DW_AT_decimal_scale = 0x5c, + DW_AT_small = 0x5d, + DW_AT_decimal_sign = 0x5e, + DW_AT_digit_count = 0x5f, + DW_AT_picture_string = 0x60, + DW_AT_mutable = 0x61, + DW_AT_threads_scaled = 0x62, + DW_AT_explicit = 0x63, + DW_AT_object_pointer = 0x64, + DW_AT_endianity = 0x65, + DW_AT_elemental = 0x66, + DW_AT_pure = 0x67, + DW_AT_recursive = 0x68, + DW_AT_signature = 0x69, + DW_AT_main_subprogram = 0x6a, + DW_AT_data_bit_offset = 0x6b, + DW_AT_const_expr = 0x6c, + DW_AT_enum_class = 0x6d, + DW_AT_linkage_name = 0x6e, + DW_AT_string_length_bit_size = 0x6f, + DW_AT_string_length_byte_size = 0x70, + DW_AT_rank = 0x71, + DW_AT_str_offsets_base = 0x72, + DW_AT_addr_base = 0x73, + DW_AT_rnglists_base = 0x74, + DW_AT_dwo_name = 0x76, + DW_AT_reference = 0x77, + DW_AT_rvalue_reference = 0x78, + DW_AT_macros = 0x79, + DW_AT_call_all_calls = 0x7a, + DW_AT_call_all_source_calls = 0x7b, + DW_AT_call_all_tail_calls = 0x7c, + DW_AT_call_return_pc = 0x7d, + DW_AT_call_value = 0x7e, + DW_AT_call_origin = 0x7f, + DW_AT_call_parameter = 0x80, + DW_AT_call_pc = 0x81, + DW_AT_call_tail_call = 0x82, + DW_AT_call_target = 0x83, + DW_AT_call_target_clobbered = 0x84, + DW_AT_call_data_location = 0x85, + DW_AT_call_data_value = 0x86, + DW_AT_noreturn = 0x87, + DW_AT_alignment = 0x88, + DW_AT_export_symbols = 0x89, + DW_AT_deleted = 0x8a, + DW_AT_defaulted = 0x8b, + DW_AT_loclists_base = 0x8c, DW_AT_MIPS_fde = 0x2001, DW_AT_MIPS_loop_begin = 0x2002, @@ -219,6 +271,7 @@ DW_AT_body_end = 0x2106, DW_AT_GNU_vector = 0x2107, DW_AT_GNU_template_name = 0x2110, + DW_AT_GNU_odr_signature = 0x210f, DW_AT_GNU_call_site_value = 0x2111, DW_AT_GNU_call_site_data_value = 0x2112, @@ -228,6 +281,17 @@ DW_AT_GNU_all_tail_call_sites = 0x2116, DW_AT_GNU_all_call_sites = 0x2117, DW_AT_GNU_all_source_call_sites = 0x2118, + DW_AT_GNU_macros = 0x2119, + DW_AT_GNU_deleted = 0x211a, + DW_AT_GNU_dwo_id = 0x2131, + DW_AT_GNU_pubnames = 0x2134, + DW_AT_GNU_pubtypes = 0x2135, + DW_AT_GNU_discriminator = 0x2136, + + DW_AT_LLVM_include_path = 0x3e00, + DW_AT_LLVM_config_macros = 0x3e01, + DW_AT_LLVM_isysroot = 0x3e02, + DW_AT_LLVM_tag_offset = 0x3e03, DW_AT_APPLE_optimized = 0x3fe1, DW_AT_APPLE_flags = 0x3fe2, @@ -248,36 +312,57 @@ ENUM_DW_FORM = dict( - DW_FORM_null = 0x00, - DW_FORM_addr = 0x01, - DW_FORM_block2 = 0x03, - DW_FORM_block4 = 0x04, - DW_FORM_data2 = 0x05, - DW_FORM_data4 = 0x06, - DW_FORM_data8 = 0x07, - DW_FORM_string = 0x08, - DW_FORM_block = 0x09, - DW_FORM_block1 = 0x0a, - DW_FORM_data1 = 0x0b, - DW_FORM_flag = 0x0c, - DW_FORM_sdata = 0x0d, - DW_FORM_strp = 0x0e, - DW_FORM_udata = 0x0f, - DW_FORM_ref_addr = 0x10, - DW_FORM_ref1 = 0x11, - DW_FORM_ref2 = 0x12, - DW_FORM_ref4 = 0x13, - DW_FORM_ref8 = 0x14, - DW_FORM_ref_udata = 0x15, - DW_FORM_indirect = 0x16, - DW_FORM_sec_offset = 0x17, - DW_FORM_exprloc = 0x18, - DW_FORM_flag_present = 0x19, - DW_FORM_ref_sig8 = 0x20, + DW_FORM_null = 0x00, + DW_FORM_addr = 0x01, + DW_FORM_ref = 0x02, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16, + DW_FORM_sec_offset = 0x17, + DW_FORM_exprloc = 0x18, + DW_FORM_flag_present = 0x19, + DW_FORM_strx = 0x1a, + DW_FORM_addrx = 0x1b, + DW_FORM_ref_sup4 = 0x1c, + DW_FORM_strp_sup = 0x1d, + DW_FORM_data16 = 0x1e, + DW_FORM_line_strp = 0x1f, + DW_FORM_ref_sig8 = 0x20, + DW_FORM_implicit_const = 0x21, + DW_FORM_loclistx = 0x22, + DW_FORM_rnglistx = 0x23, + DW_FORM_ref_sup8 = 0x24, + DW_FORM_strx1 = 0x25, + DW_FORM_strx2 = 0x26, + DW_FORM_strx3 = 0x27, + DW_FORM_strx4 = 0x28, + DW_FORM_addrx1 = 0x29, + DW_FORM_addrx2 = 0x2a, + DW_FORM_addrx3 = 0x2b, + DW_FORM_addrx4 = 0x2c, - DW_FORM_GNU_strp_alt = 0x1f21, - DW_FORM_GNU_ref_alt = 0x1f20, - _default_ = Pass, + DW_FORM_GNU_addr_index = 0x1f01, + DW_FORM_GNU_str_index = 0x1f02, + DW_FORM_GNU_ref_alt = 0x1f20, + DW_FORM_GNU_strp_alt = 0x1f21, + _default_ = Pass, ) # Inverse mapping for ENUM_DW_FORM diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index 8996b5ca..ce69d685 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -58,12 +58,14 @@ def __init__(self, default_is_stmt): self.prologue_end = False self.epilogue_begin = False self.isa = 0 + self.discriminator = 0 def __repr__(self): a = ['\n' @@ -76,7 +78,7 @@ class LineProgram(object): """ def __init__(self, header, stream, structs, program_start_offset, program_end_offset): - """ + """ header: The header of this line program. Note: LineProgram may modify its header by appending file entries if DW_LNE_define_file @@ -115,7 +117,7 @@ def get_entries(self): return self._decoded_entries #------ PRIVATE ------# - + def __getitem__(self, name): """ Implement dict-like access to header entries """ @@ -130,6 +132,7 @@ def add_entry_new_state(cmd, args, is_extended=False): # After adding, clear some state registers. entries.append(LineProgramEntry( cmd, is_extended, args, copy.copy(state))) + state.discriminator = 0 state.basic_block = False state.prologue_end = False state.epilogue_begin = False @@ -141,7 +144,7 @@ def add_entry_old_state(cmd, args, is_extended=False): offset = self.program_start_offset while offset < self.program_end_offset: opcode = struct_parse( - self.structs.Dwarf_uint8(''), + self.structs.Dwarf_uint8(''), self.stream, offset) @@ -156,7 +159,7 @@ def add_entry_old_state(cmd, args, is_extended=False): adjusted_opcode = opcode - self['opcode_base'] operation_advance = adjusted_opcode // self['line_range'] address_addend = ( - self['minimum_instruction_length'] * + self['minimum_instruction_length'] * ((state.op_index + operation_advance) // maximum_operations_per_instruction)) state.address += address_addend @@ -177,7 +180,7 @@ def add_entry_old_state(cmd, args, is_extended=False): state.end_sequence = True add_entry_new_state(ex_opcode, [], is_extended=True) # reset state - state = LineState(self.header['default_is_stmt']) + state = LineState(self.header['default_is_stmt']) elif ex_opcode == DW_LNE_set_address: operand = struct_parse(self.structs.Dwarf_target_addr(''), self.stream) @@ -188,6 +191,10 @@ def add_entry_old_state(cmd, args, is_extended=False): self.structs.Dwarf_lineprog_file_entry, self.stream) self['file_entry'].append(operand) add_entry_old_state(ex_opcode, [operand], is_extended=True) + elif ex_opcode == DW_LNE_set_discriminator: + operand = struct_parse(self.structs.Dwarf_uleb128(''), + self.stream) + state.discriminator = operand else: # Unknown, but need to roll forward the stream because the # length is specified. Seek forward inst_len - 1 because @@ -252,4 +259,3 @@ def add_entry_old_state(cmd, args, is_extended=False): opcode,)) offset = self.stream.tell() return entries - diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index 3d97af3c..e6c735f5 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -11,10 +11,9 @@ from ..common.utils import struct_parse - -LocationEntry = namedtuple('LocationEntry', 'begin_offset end_offset loc_expr') -BaseAddressEntry = namedtuple('BaseAddressEntry', 'base_address') - +LocationExpr = namedtuple('LocationExpr', 'loc_expr') +LocationEntry = namedtuple('LocationEntry', 'entry_offset begin_offset end_offset loc_expr') +BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address') class LocationLists(object): """ A single location list is a Python list consisting of LocationEntry or @@ -47,6 +46,7 @@ def iter_location_lists(self): def _parse_location_list_from_stream(self): lst = [] while True: + entry_offset = self.stream.tell() begin_offset = struct_parse( self.structs.Dwarf_target_addr(''), self.stream) end_offset = struct_parse( @@ -56,7 +56,7 @@ def _parse_location_list_from_stream(self): break elif begin_offset == self._max_addr: # Base address selection entry - lst.append(BaseAddressEntry(base_address=end_offset)) + lst.append(BaseAddressEntry(entry_offset=entry_offset, base_address=end_offset)) else: # Location list entry expr_len = struct_parse( @@ -65,7 +65,66 @@ def _parse_location_list_from_stream(self): self.stream) for i in range(expr_len)] lst.append(LocationEntry( + entry_offset=entry_offset, begin_offset=begin_offset, end_offset=end_offset, loc_expr=loc_expr)) return lst + +class LocationParser(object): + """ A parser for location information in DIEs. + Handles both location information contained within the attribute + itself (represented as a LocationExpr object) and references to + location lists in the .debug_loc section (represented as a + list). + """ + def __init__(self, location_lists): + self.location_lists = location_lists + + @staticmethod + def attribute_has_location(attr, dwarf_version): + """ Checks if a DIE attribute contains location information. + """ + return (LocationParser._attribute_is_loclistptr_class(attr) and + (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or + LocationParser._attribute_has_loc_list(attr, dwarf_version))) + + def parse_from_attribute(self, attr, dwarf_version): + """ Parses a DIE attribute and returns either a LocationExpr or + a list. + """ + if self.attribute_has_location(attr, dwarf_version): + if self._attribute_has_loc_expr(attr, dwarf_version): + return LocationExpr(attr.value) + elif self._attribute_has_loc_list(attr, dwarf_version): + return self.location_lists.get_location_list_at_offset( + attr.value) + else: + raise ValueError("Attribute does not have location information") + + #------ PRIVATE ------# + + @staticmethod + def _attribute_has_loc_expr(attr, dwarf_version): + return ((dwarf_version < 4 and attr.form.startswith('DW_FORM_block') and + not attr.name == 'DW_AT_const_value') or + attr.form == 'DW_FORM_exprloc') + + @staticmethod + def _attribute_has_loc_list(attr, dwarf_version): + return ((dwarf_version < 4 and + attr.form in ('DW_FORM_data4', 'DW_FORM_data8') and + not attr.name == 'DW_AT_const_value') or + attr.form == 'DW_FORM_sec_offset') + + @staticmethod + def _attribute_is_loclistptr_class(attr): + return (attr.name in ( 'DW_AT_location', 'DW_AT_string_length', + 'DW_AT_const_value', 'DW_AT_return_addr', + 'DW_AT_data_member_location', + 'DW_AT_frame_base', 'DW_AT_segment', + 'DW_AT_static_link', 'DW_AT_use_location', + 'DW_AT_vtable_elem_location', + 'DW_AT_GNU_call_site_value', + 'DW_AT_GNU_call_site_target', + 'DW_AT_GNU_call_site_data_value')) diff --git a/elftools/dwarf/namelut.py b/elftools/dwarf/namelut.py new file mode 100755 index 00000000..fd12aad3 --- /dev/null +++ b/elftools/dwarf/namelut.py @@ -0,0 +1,198 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/namelut.py +# +# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames) +# +# Vijay Ramasami (rvijayc@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import collections +from collections import OrderedDict +from ..common.utils import struct_parse +from ..common.py3compat import Mapping +from bisect import bisect_right +import math +from ..construct import CString, Struct, If + +NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs') + +class NameLUT(Mapping): + """ + A "Name LUT" holds any of the tables specified by .debug_pubtypes or + .debug_pubnames sections. This is basically a dictionary where the key is + the symbol name (either a public variable, function or a type), and the + value is the tuple (cu_offset, die_offset) corresponding to the variable. + The die_offset is an absolute offset (meaning, it can be used to search the + CU by iterating until a match is obtained). + + An ordered dictionary is used to preserve the CU order (i.e, items are + stored on a per-CU basis (as it was originally in the .debug_* section). + + Usage: + + The NameLUT walks and talks like a dictionary and hence it can be used as + such. Some examples below: + + # get the pubnames (a NameLUT from DWARF info). + pubnames = dwarf_info.get_pubnames() + + # lookup a variable. + entry1 = pubnames["var_name1"] + entry2 = pubnames.get("var_name2", default=) + print(entry2.cu_ofs) + ... + + # iterate over items. + for (name, entry) in pubnames.items(): + # do stuff with name, entry.cu_ofs, entry.die_ofs + + # iterate over items on a per-CU basis. + import itertools + for cu_ofs, item_list in itertools.groupby(pubnames.items(), + key = lambda x: x[1].cu_ofs): + # items are now grouped by cu_ofs. + # item_list is an iterator yeilding NameLUTEntry'ies belonging + # to cu_ofs. + # We can parse the CU at cu_offset and use the parsed CU results + # to parse the pubname DIEs in the CU listed by item_list. + for item in item_list: + # work with item which is part of the CU with cu_ofs. + + """ + + def __init__(self, stream, size, structs): + + self._stream = stream + self._size = size + self._structs = structs + # entries are lazily loaded on demand. + self._entries = None + # CU headers (for readelf). + self._cu_headers = None + + def get_entries(self): + """ + Returns the parsed NameLUT entries. The returned object is a dictionary + with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as + the value. + + This is useful when dealing with very large ELF files with millions of + entries. The returned entries can be pickled to a file and restored by + calling set_entries on subsequent loads. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries + + def set_entries(self, entries, cu_headers): + """ + Set the NameLUT entries from an external source. The input is a + dictionary with the symbol name as the key and NameLUTEntry(cu_ofs, + die_ofs) as the value. + + This option is useful when dealing with very large ELF files with + millions of entries. The entries can be parsed once and pickled to a + file and can be restored via this function on subsequent loads. + """ + self._entries = entries + self._cu_headers = cu_headers + + def __len__(self): + """ + Returns the number of entries in the NameLUT. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return len(self._entries) + + def __getitem__(self, name): + """ + Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds + to the given symbol name. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries.get(name) + + def __iter__(self): + """ + Returns an iterator to the NameLUT dictionary. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return iter(self._entries) + + def items(self): + """ + Returns the NameLUT dictionary items. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries.items() + + def get(self, name, default=None): + """ + Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or + None if the symbol does not exist in the corresponding section. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries.get(name, default) + + def get_cu_headers(self): + """ + Returns all CU headers. Mainly required for readelf. + """ + if self._cu_headers is None: + self._entries, self._cu_headers = self._get_entries() + + return self._cu_headers + + def _get_entries(self): + """ + Parse the (name, cu_ofs, die_ofs) information from this section and + store as a dictionary. + """ + + self._stream.seek(0) + entries = OrderedDict() + cu_headers = [] + offset = 0 + # According to 6.1.1. of DWARFv4, each set of names is terminated by + # an offset field containing zero (and no following string). Because + # of sequential parsing, every next entry may be that terminator. + # So, field "name" is conditional. + entry_struct = Struct("Dwarf_offset_name_pair", + self._structs.Dwarf_offset('die_ofs'), + If(lambda ctx: ctx['die_ofs'], CString('name'))) + + # each run of this loop will fetch one CU worth of entries. + while offset < self._size: + + # read the header for this CU. + namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header, + self._stream, offset) + cu_headers.append(namelut_hdr) + # compute the next offset. + offset = (offset + namelut_hdr.unit_length + + self._structs.initial_length_field_size()) + + # before inner loop, latch data that will be used in the inner + # loop to avoid attribute access and other computation. + hdr_cu_ofs = namelut_hdr.debug_info_offset + + # while die_ofs of the entry is non-zero (which indicates the end) ... + while True: + entry = struct_parse(entry_struct, self._stream) + + # if it is zero, this is the terminating record. + if entry.die_ofs == 0: + break + # add this entry to the look-up dictionary. + entries[entry.name.decode('utf-8')] = NameLUTEntry( + cu_ofs = hdr_cu_ofs, + die_ofs = hdr_cu_ofs + entry.die_ofs) + + # return the entries parsed so far. + return (entries, cu_headers) diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index 9a216ee9..5f99473e 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -57,12 +57,9 @@ def _parse_range_list_from_stream(self): elif begin_offset == self._max_addr: # Base address selection entry lst.append(BaseAddressEntry(base_address=end_offset)) - else: + else: # Range entry lst.append(RangeEntry( begin_offset=begin_offset, end_offset=end_offset)) return lst - - - diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index f3b6ef3b..95130531 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -34,6 +34,9 @@ class DWARFStructs(object): Dwarf_offset: 32-bit or 64-bit word, depending on dwarf_format + Dwarf_length: + 32-bit or 64-bit word, depending on dwarf_format + Dwarf_target_addr: 32-bit or 64-bit word, depending on address size @@ -105,6 +108,7 @@ def _create_structs(self): self.Dwarf_uint32 = ULInt32 self.Dwarf_uint64 = ULInt64 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64 + self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64 self.Dwarf_target_addr = ( ULInt32 if self.address_size == 4 else ULInt64) self.Dwarf_int8 = SLInt8 @@ -117,6 +121,7 @@ def _create_structs(self): self.Dwarf_uint32 = UBInt32 self.Dwarf_uint64 = UBInt64 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64 + self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64 self.Dwarf_target_addr = ( UBInt32 if self.address_size == 4 else UBInt64) self.Dwarf_int8 = SBInt8 @@ -132,6 +137,7 @@ def _create_structs(self): self._create_lineprog_header() self._create_callframe_entry_headers() self._create_aranges_header() + self._create_nameLUT_header() def _create_initial_length(self): def _InitialLength(name): @@ -189,12 +195,13 @@ def _create_dw_form(self): DW_FORM_strp=self.Dwarf_offset(''), DW_FORM_flag=self.Dwarf_uint8(''), + DW_FORM_ref=self.Dwarf_uint32(''), DW_FORM_ref1=self.Dwarf_uint8(''), DW_FORM_ref2=self.Dwarf_uint16(''), DW_FORM_ref4=self.Dwarf_uint32(''), DW_FORM_ref8=self.Dwarf_uint64(''), DW_FORM_ref_udata=self.Dwarf_uleb128(''), - DW_FORM_ref_addr=self.Dwarf_offset(''), + DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''), DW_FORM_indirect=self.Dwarf_uleb128(''), @@ -218,6 +225,14 @@ def _create_aranges_header(self): self.Dwarf_uint8('segment_size') ) + def _create_nameLUT_header(self): + self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header", + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_offset('debug_info_offset'), + self.Dwarf_length('debug_info_length') + ) + def _create_lineprog_header(self): # A file entry is terminated by a NULL byte, so we don't want to parse # past it. Therefore an If is used. diff --git a/elftools/ehabi/__init__.py b/elftools/ehabi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/elftools/ehabi/constants.py b/elftools/ehabi/constants.py new file mode 100644 index 00000000..2921b97e --- /dev/null +++ b/elftools/ehabi/constants.py @@ -0,0 +1 @@ +EHABI_INDEX_ENTRY_SIZE = 8 diff --git a/elftools/ehabi/decoder.py b/elftools/ehabi/decoder.py new file mode 100644 index 00000000..ce20f656 --- /dev/null +++ b/elftools/ehabi/decoder.py @@ -0,0 +1,284 @@ +# ------------------------------------------------------------------------------- +# elftools: ehabi/decoder.py +# +# Decode ARM exception handler bytecode. +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- +from collections import namedtuple + + +class EHABIBytecodeDecoder(object): + """ Decoder of a sequence of ARM exception handler abi bytecode. + + Reference: + https://github.com/llvm/llvm-project/blob/master/llvm/tools/llvm-readobj/ARMEHABIPrinter.h + https://developer.arm.com/documentation/ihi0038/b/ + + Accessible attributes: + + mnemonic_array: + MnemonicItem array. + + Parameters: + + bytecode_array: + Integer array, raw data of bytecode. + + """ + + def __init__(self, bytecode_array): + self._bytecode_array = bytecode_array + self._index = None + self.mnemonic_array = None + self._decode() + + def _decode(self): + """ Decode bytecode array, put result into mnemonic_array. + """ + self._index = 0 + self.mnemonic_array = [] + while self._index < len(self._bytecode_array): + for mask, value, handler in self.ring: + if (self._bytecode_array[self._index] & mask) == value: + start_idx = self._index + mnemonic = handler(self) + end_idx = self._index + self.mnemonic_array.append( + MnemonicItem(self._bytecode_array[start_idx: end_idx], mnemonic)) + break + + def _decode_00xxxxxx(self): + # SW.startLine() << format("0x%02X ; vsp = vsp + %u\n", Opcode, + # ((Opcode & 0x3f) << 2) + 4); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'vsp = vsp + %u' % (((opcode & 0x3f) << 2) + 4) + + def _decode_01xxxxxx(self): + # SW.startLine() << format("0x%02X ; vsp = vsp - %u\n", Opcode, + # ((Opcode & 0x3f) << 2) + 4); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'vsp = vsp - %u' % (((opcode & 0x3f) << 2) + 4) + + gpr_register_names = ("r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc") + + def _calculate_range(self, start, count): + return ((1 << (count + 1)) - 1) << start + + def _printGPR(self, gpr_mask): + hits = [self.gpr_register_names[i] for i in range(32) if gpr_mask & (1 << i) != 0] + return '{%s}' % ', '.join(hits) + + def _print_registers(self, vfp_mask, prefix): + hits = [prefix + str(i) for i in range(32) if vfp_mask & (1 << i) != 0] + return '{%s}' % ', '.join(hits) + + def _decode_1000iiii_iiiiiiii(self): + op0 = self._bytecode_array[self._index] + self._index += 1 + op1 = self._bytecode_array[self._index] + self._index += 1 + # uint16_t GPRMask = (Opcode1 << 4) | ((Opcode0 & 0x0f) << 12); + # SW.startLine() + # << format("0x%02X 0x%02X ; %s", + # Opcode0, Opcode1, GPRMask ? "pop " : "refuse to unwind"); + # if (GPRMask) + # PrintGPR(GPRMask); + gpr_mask = (op1 << 4) | ((op0 & 0x0f) << 12) + if gpr_mask == 0: + return 'refuse to unwind' + else: + return 'pop %s' % self._printGPR(gpr_mask) + + def _decode_10011101(self): + self._index += 1 + return 'reserved (ARM MOVrr)' + + def _decode_10011111(self): + self._index += 1 + return 'reserved (WiMMX MOVrr)' + + def _decode_1001nnnn(self): + # SW.startLine() << format("0x%02X ; vsp = r%u\n", Opcode, (Opcode & 0x0f)); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'vsp = r%u' % (opcode & 0x0f) + + def _decode_10100nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4)); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07)) + + def _decode_10101nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4) | (1 << 14)); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07) | (1 << 14)) + + def _decode_10110000(self): + # SW.startLine() << format("0x%02X ; finish\n", Opcode); + self._index += 1 + return 'finish' + + def _decode_10110001_0000iiii(self): + # SW.startLine() + # << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, + # ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop "); + # if (((Opcode1 & 0xf0) == 0x00) && Opcode1) + # PrintGPR((Opcode1 & 0x0f)); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + if (op1 & 0xf0) != 0 or op1 == 0x00: + return 'spare' + else: + return 'pop %s' % self._printGPR((op1 & 0x0f)) + + def _decode_10110010_uleb128(self): + # SmallVector ULEB; + # do { ULEB.push_back(Opcodes[OI ^ 3]); } while (Opcodes[OI++ ^ 3] & 0x80); + # uint64_t Value = 0; + # for (unsigned BI = 0, BE = ULEB.size(); BI != BE; ++BI) + # Value = Value | ((ULEB[BI] & 0x7f) << (7 * BI)); + # OS << format("; vsp = vsp + %" PRIu64 "\n", 0x204 + (Value << 2)); + self._index += 1 # skip constant byte + uleb_buffer = [self._bytecode_array[self._index]] + self._index += 1 + while self._bytecode_array[self._index] & 0x80 == 0: + uleb_buffer.append(self._bytecode_array[self._index]) + self._index += 1 + value = 0 + for b in reversed(uleb_buffer): + value = (value << 7) + (b & 0x7F) + return 'vsp = vsp + %u' % (0x204 + (value << 2)) + + def _decode_10110011_sssscccc(self): + # these two decoders are equal + return self._decode_11001001_sssscccc() + + def _decode_101101nn(self): + return self._spare() + + def _decode_10111nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 8), "d"); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._print_registers(self._calculate_range(8, opcode & 0x07), "d") + + def _decode_11000110_sssscccc(self): + # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); + # uint8_t Start = ((Opcode1 & 0xf0) >> 4); + # uint8_t Count = ((Opcode1 & 0x0f) >> 0); + # PrintRegisters((((1 << (Count + 1)) - 1) << Start), "wR"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + start = ((op1 & 0xf0) >> 4) + count = ((op1 & 0x0f) >> 0) + return 'pop %s' % self._print_registers(self._calculate_range(start, count), "wR") + + def _decode_11000111_0000iiii(self): + # SW.startLine() + # << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, + # ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop "); + # if ((Opcode1 & 0xf0) == 0x00 && Opcode1) + # PrintRegisters(Opcode1 & 0x0f, "wCGR"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + if (op1 & 0xf0) != 0 or op1 == 0x00: + return 'spare' + else: + return 'pop %s' % self._print_registers(op1 & 0x0f, "wCGR") + + def _decode_11001000_sssscccc(self): + # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); + # uint8_t Start = 16 + ((Opcode1 & 0xf0) >> 4); + # uint8_t Count = ((Opcode1 & 0x0f) >> 0); + # PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + start = 16 + ((op1 & 0xf0) >> 4) + count = ((op1 & 0x0f) >> 0) + return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d") + + def _decode_11001001_sssscccc(self): + # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); + # uint8_t Start = ((Opcode1 & 0xf0) >> 4); + # uint8_t Count = ((Opcode1 & 0x0f) >> 0); + # PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + start = ((op1 & 0xf0) >> 4) + count = ((op1 & 0x0f) >> 0) + return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d") + + def _decode_11001yyy(self): + return self._spare() + + def _decode_11000nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 10), "wR"); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._print_registers(self._calculate_range(10, opcode & 0x07), "wR") + + def _decode_11010nnn(self): + # these two decoders are equal + return self._decode_10111nnn() + + def _decode_11xxxyyy(self): + return self._spare() + + def _spare(self): + self._index += 1 + return 'spare' + + _DECODE_RECIPE_TYPE = namedtuple('_DECODE_RECIPE_TYPE', 'mask value handler') + + ring = ( + _DECODE_RECIPE_TYPE(mask=0xc0, value=0x00, handler=_decode_00xxxxxx), + _DECODE_RECIPE_TYPE(mask=0xc0, value=0x40, handler=_decode_01xxxxxx), + _DECODE_RECIPE_TYPE(mask=0xf0, value=0x80, handler=_decode_1000iiii_iiiiiiii), + _DECODE_RECIPE_TYPE(mask=0xff, value=0x9d, handler=_decode_10011101), + _DECODE_RECIPE_TYPE(mask=0xff, value=0x9f, handler=_decode_10011111), + _DECODE_RECIPE_TYPE(mask=0xf0, value=0x90, handler=_decode_1001nnnn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xa0, handler=_decode_10100nnn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xa8, handler=_decode_10101nnn), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb0, handler=_decode_10110000), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb1, handler=_decode_10110001_0000iiii), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb2, handler=_decode_10110010_uleb128), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb3, handler=_decode_10110011_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xfc, value=0xb4, handler=_decode_101101nn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xb8, handler=_decode_10111nnn), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc6, handler=_decode_11000110_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc7, handler=_decode_11000111_0000iiii), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc8, handler=_decode_11001000_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc9, handler=_decode_11001001_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xc8, value=0xc8, handler=_decode_11001yyy), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xc0, handler=_decode_11000nnn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xd0, handler=_decode_11010nnn), + _DECODE_RECIPE_TYPE(mask=0xc0, value=0xc0, handler=_decode_11xxxyyy), + ) + + +class MnemonicItem(object): + """ Single mnemonic item. + """ + + def __init__(self, bytecode, mnemonic): + self.bytecode = bytecode + self.mnemonic = mnemonic + + def __repr__(self): + return '%s ; %s' % (' '.join(['0x%02x' % x for x in self.bytecode]), self.mnemonic) diff --git a/elftools/ehabi/ehabiinfo.py b/elftools/ehabi/ehabiinfo.py new file mode 100644 index 00000000..415566cf --- /dev/null +++ b/elftools/ehabi/ehabiinfo.py @@ -0,0 +1,209 @@ +# ------------------------------------------------------------------------------- +# elftools: ehabi/ehabiinfo.py +# +# Decoder for ARM exception handler bytecode. +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +from ..common.utils import struct_parse + +from .decoder import EHABIBytecodeDecoder +from .constants import EHABI_INDEX_ENTRY_SIZE +from .structs import EHABIStructs + + +class EHABIInfo(object): + """ ARM exception handler abi information class. + + Parameters: + + arm_idx_section: + elf.sections.Section object, section which type is SHT_ARM_EXIDX. + + little_endian: + bool, endianness of elf file. + """ + + def __init__(self, arm_idx_section, little_endian): + self._arm_idx_section = arm_idx_section + self._struct = EHABIStructs(little_endian) + self._num_entry = None + + def section_name(self): + return self._arm_idx_section.name + + def section_offset(self): + return self._arm_idx_section['sh_offset'] + + def num_entry(self): + """ Number of exception handler entry in the section. + """ + if self._num_entry is None: + self._num_entry = self._arm_idx_section['sh_size'] // EHABI_INDEX_ENTRY_SIZE + return self._num_entry + + def get_entry(self, n): + """ Get the exception handler entry at index #n. (EHABIEntry object or a subclass) + """ + if n >= self.num_entry(): + raise IndexError('Invalid entry %d/%d' % (n, self._num_entry)) + eh_index_entry_offset = self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE + eh_index_data = struct_parse(self._struct.EH_index_struct, self._arm_idx_section.stream, eh_index_entry_offset) + word0, word1 = eh_index_data['word0'], eh_index_data['word1'] + + if word0 & 0x80000000 != 0: + return CorruptEHABIEntry('Corrupt ARM exception handler table entry: %x' % n) + + function_offset = arm_expand_prel31(word0, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE) + + if word1 == 1: + # 0x1 means cannot unwind + return CannotUnwindEHABIEntry(function_offset) + elif word1 & 0x80000000 == 0: + # highest bit is zero, point to .ARM.extab data + eh_table_offset = arm_expand_prel31(word1, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE + 4) + eh_index_data = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream, eh_table_offset) + word0 = eh_index_data['word0'] + if word0 & 0x80000000 == 0: + # highest bit is one, generic model + return GenericEHABIEntry(function_offset, arm_expand_prel31(word0, eh_table_offset)) + else: + # highest bit is one, arm compact model + # highest half must be 0b1000 for compact model + if word0 & 0x70000000 != 0: + return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n) + per_index = (word0 >> 24) & 0x7f + if per_index == 0: + # arm compact model 0 + opcode = [(word0 & 0xFF0000) >> 16, (word0 & 0xFF00) >> 8, word0 & 0xFF] + return EHABIEntry(function_offset, per_index, opcode) + elif per_index == 1 or per_index == 2: + # arm compact model 1/2 + more_word = (word0 >> 16) & 0xff + opcode = [(word0 >> 8) & 0xff, (word0 >> 0) & 0xff] + self._arm_idx_section.stream.seek(eh_table_offset + 4) + for i in range(more_word): + r = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream)['word0'] + opcode.append((r >> 24) & 0xFF) + opcode.append((r >> 16) & 0xFF) + opcode.append((r >> 8) & 0xFF) + opcode.append((r >> 0) & 0xFF) + return EHABIEntry(function_offset, per_index, opcode, eh_table_offset=eh_table_offset) + else: + return CorruptEHABIEntry('Unknown ARM compact model %d at table entry: %x' % (per_index, n)) + else: + # highest bit is one, compact model must be 0 + if word1 & 0x7f000000 != 0: + return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n) + opcode = [(word1 & 0xFF0000) >> 16, (word1 & 0xFF00) >> 8, word1 & 0xFF] + return EHABIEntry(function_offset, 0, opcode) + + +class EHABIEntry(object): + """ Exception handler abi entry. + + Accessible attributes: + + function_offset: + Integer. + None if corrupt. (Reference: CorruptEHABIEntry) + + personality: + Integer. + None if corrupt or unwindable. (Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry) + 0/1/2 for ARM personality compact format. + Others for generic personality. + + bytecode_array: + Integer array. + None if corrupt or unwindable or generic personality. + (Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry) + + eh_table_offset: + Integer. + Only entries who point to .ARM.extab contains this field, otherwise return None. + + unwindable: + bool. Whether this function is unwindable. + + corrupt: + bool. Whether this entry is corrupt. + + """ + + def __init__(self, + function_offset, + personality, + bytecode_array, + eh_table_offset=None, + unwindable=True, + corrupt=False): + self.function_offset = function_offset + self.personality = personality + self.bytecode_array = bytecode_array + self.eh_table_offset = eh_table_offset + self.unwindable = unwindable + self.corrupt = corrupt + + def mnmemonic_array(self): + if self.bytecode_array: + return EHABIBytecodeDecoder(self.bytecode_array).mnemonic_array + else: + return None + + def __repr__(self): + return "" % ( + self.function_offset, + self.personality, + "eh_table_offset=0x%x, " % self.eh_table_offset if self.eh_table_offset else "", + self.bytecode_array) + + +class CorruptEHABIEntry(EHABIEntry): + """ This entry is corrupt. Attribute #corrupt will be True. + """ + + def __init__(self, reason): + super(CorruptEHABIEntry, self).__init__(function_offset=None, personality=None, bytecode_array=None, + corrupt=True) + self.reason = reason + + def __repr__(self): + return "" % self.reason + + +class CannotUnwindEHABIEntry(EHABIEntry): + """ This function cannot be unwind. Attribute #unwindable will be False. + """ + + def __init__(self, function_offset): + super(CannotUnwindEHABIEntry, self).__init__(function_offset, personality=None, bytecode_array=None, + unwindable=False) + + def __repr__(self): + return "" % self.function_offset + + +class GenericEHABIEntry(EHABIEntry): + """ This entry is generic model rather than ARM compact model.Attribute #bytecode_array will be None. + """ + + def __init__(self, function_offset, personality): + super(GenericEHABIEntry, self).__init__(function_offset, personality, bytecode_array=None) + + def __repr__(self): + return "" % (self.function_offset, self.personality) + + +def arm_expand_prel31(address, place): + """ + address: uint32 + place: uint32 + return: uint64 + """ + location = address & 0x7fffffff + if location & 0x04000000: + location |= 0xffffffff80000000 + return location + place & 0xffffffffffffffff diff --git a/elftools/ehabi/structs.py b/elftools/ehabi/structs.py new file mode 100644 index 00000000..35ceaf34 --- /dev/null +++ b/elftools/ehabi/structs.py @@ -0,0 +1,47 @@ +# ------------------------------------------------------------------------------- +# elftools: ehabi/structs.py +# +# Encapsulation of Construct structs for parsing an EHABI, adjusted for +# correct endianness and word-size. +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +from ..construct import UBInt32, ULInt32, Struct + + +class EHABIStructs(object): + """ Accessible attributes: + + EH_index_struct: + Struct of item in section .ARM.exidx. + + EH_table_struct: + Struct of item in section .ARM.extab. + """ + + def __init__(self, little_endian): + self._little_endian = little_endian + self._create_structs() + + def _create_structs(self): + if self._little_endian: + self.EHABI_uint32 = ULInt32 + else: + self.EHABI_uint32 = UBInt32 + self._create_exception_handler_index() + self._create_exception_handler_table() + + def _create_exception_handler_index(self): + self.EH_index_struct = Struct( + 'EH_index', + self.EHABI_uint32('word0'), + self.EHABI_uint32('word1') + ) + + def _create_exception_handler_table(self): + self.EH_table_struct = Struct( + 'EH_table', + self.EHABI_uint32('word0'), + ) diff --git a/elftools/elf/constants.py b/elftools/elf/constants.py index f9023c9b..2dc5071f 100644 --- a/elftools/elf/constants.py +++ b/elftools/elf/constants.py @@ -71,6 +71,7 @@ class SHN_INDICES(object): SHN_ABS=0xfff1 SHN_COMMON=0xfff2 SHN_HIRESERVE=0xffff + SHN_XINDEX=0xffff class SH_FLAGS(object): @@ -92,6 +93,27 @@ class SH_FLAGS(object): SHF_MASKPROC=0xf0000000 +class RH_FLAGS(object): + """ Flag values for the DT_MIPS_FLAGS dynamic table entries + """ + RHF_NONE=0x00000000 + RHF_QUICKSTART=0x00000001 + RHF_NOTPOT=0x00000002 + RHF_NO_LIBRARY_REPLACEMENT=0x00000004 + RHF_NO_MOVE=0x00000008 + RHF_SGI_ONLY=0x00000010 + RHF_GUARANTEE_INIT=0x00000020 + RHF_DELTA_C_PLUS_PLUS=0x00000040 + RHF_GUARANTEE_START_INIT=0x00000080 + RHF_PIXIE=0x00000100 + RHF_DEFAULT_DELAY_LOAD=0x00000200 + RHF_REQUICKSTART=0x00000400 + RHF_REQUICKSTARTED=0x00000800 + RHF_CORD=0x00001000 + RHF_NO_UNRES_UNDEF=0x00002000 + RHF_RLD_ORDER_SAFE=0x00004000 + + class P_FLAGS(object): """ Flag values for the p_flags field of program headers """ @@ -122,4 +144,4 @@ class SUNW_SYMINFO_FLAGS(object): class VER_FLAGS(object): VER_FLG_BASE=0x1 VER_FLG_WEAK=0x2 - VER_FLG_INFO=0x4 + VER_FLG_INFO=0x4 diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index 022b0745..94da8234 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -10,8 +10,9 @@ ENUM_D_TAG, ENUM_E_VERSION, ENUM_P_TYPE_BASE, ENUM_SH_TYPE_BASE, ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_MIPS, - ENUM_ATTR_TAG_ARM) -from .constants import P_FLAGS, SH_FLAGS, SUNW_SYMINFO_FLAGS, VER_FLAGS + ENUM_ATTR_TAG_ARM, ENUM_DT_FLAGS, ENUM_DT_FLAGS_1) +from .constants import ( + P_FLAGS, RH_FLAGS, SH_FLAGS, SUNW_SYMINFO_FLAGS, VER_FLAGS) from ..common.py3compat import iteritems @@ -62,6 +63,22 @@ def describe_p_flags(x): return s +def describe_rh_flags(x): + return ' '.join( + _DESCR_RH_FLAGS[flag] + for flag in (RH_FLAGS.RHF_NONE, RH_FLAGS.RHF_QUICKSTART, + RH_FLAGS.RHF_NOTPOT, RH_FLAGS.RHF_NO_LIBRARY_REPLACEMENT, + RH_FLAGS.RHF_NO_MOVE, RH_FLAGS.RHF_SGI_ONLY, + RH_FLAGS.RHF_GUARANTEE_INIT, + RH_FLAGS.RHF_DELTA_C_PLUS_PLUS, + RH_FLAGS.RHF_GUARANTEE_START_INIT, RH_FLAGS.RHF_PIXIE, + RH_FLAGS.RHF_DEFAULT_DELAY_LOAD, + RH_FLAGS.RHF_REQUICKSTART, RH_FLAGS.RHF_REQUICKSTARTED, + RH_FLAGS.RHF_CORD, RH_FLAGS.RHF_NO_UNRES_UNDEF, + RH_FLAGS.RHF_RLD_ORDER_SAFE) + if x & flag) + + def describe_sh_type(x): if x in _DESCR_SH_TYPE: return _DESCR_SH_TYPE.get(x) @@ -78,8 +95,11 @@ def describe_sh_flags(x): SH_FLAGS.SHF_WRITE, SH_FLAGS.SHF_ALLOC, SH_FLAGS.SHF_EXECINSTR, SH_FLAGS.SHF_MERGE, SH_FLAGS.SHF_STRINGS, SH_FLAGS.SHF_INFO_LINK, SH_FLAGS.SHF_LINK_ORDER, SH_FLAGS.SHF_OS_NONCONFORMING, - SH_FLAGS.SHF_GROUP, SH_FLAGS.SHF_TLS, SH_FLAGS.SHF_EXCLUDE): + SH_FLAGS.SHF_GROUP, SH_FLAGS.SHF_TLS, SH_FLAGS.SHF_MASKOS, + SH_FLAGS.SHF_EXCLUDE): s += _DESCR_SH_FLAGS[flag] if (x & flag) else '' + if x & SH_FLAGS.SHF_MASKPROC: + s += 'p' return s @@ -119,6 +139,16 @@ def describe_dyn_tag(x): return _DESCR_D_TAG.get(x, _unknown) +def describe_dt_flags(x): + return ' '.join(key[3:] for key, val in + sorted(ENUM_DT_FLAGS.items(), key=lambda t: t[1]) if x & val) + + +def describe_dt_flags_1(x): + return ' '.join(key[5:] for key, val in + sorted(ENUM_DT_FLAGS_1.items(), key=lambda t: t[1]) if x & val) + + def describe_syminfo_flags(x): return ''.join(_DESCR_SYMINFO_FLAGS[flag] for flag in ( SUNW_SYMINFO_FLAGS.SYMINFO_FLG_CAP, @@ -148,21 +178,30 @@ def describe_note(x): n_desc = x['n_desc'] desc = '' if x['n_type'] == 'NT_GNU_ABI_TAG': - desc = '\n OS: %s, ABI: %d.%d.%d' % ( - _DESCR_NOTE_ABI_TAG_OS.get(n_desc['abi_os'], _unknown), - n_desc['abi_major'], n_desc['abi_minor'], n_desc['abi_tiny']) + if x['n_name'] == 'Android': + desc = '\n description data: %s ' % ' '.join("%02x" % ord(b) for b in x['n_descdata']) + else: + desc = '\n OS: %s, ABI: %d.%d.%d' % ( + _DESCR_NOTE_ABI_TAG_OS.get(n_desc['abi_os'], _unknown), + n_desc['abi_major'], n_desc['abi_minor'], n_desc['abi_tiny']) elif x['n_type'] == 'NT_GNU_BUILD_ID': desc = '\n Build ID: %s' % (n_desc) + elif x['n_type'] == 'NT_GNU_GOLD_VERSION': + desc = '\n Version: %s' % (n_desc) else: desc = '\n description data: {}'.format(' '.join( '{:02x}'.format(ord(byte)) for byte in n_desc )) - note_type = (x['n_type'] if isinstance(x['n_type'], str) - else 'Unknown note type:') - note_type_desc = ('0x%.8x' % x['n_type'] - if isinstance(x['n_type'], int) else - _DESCR_NOTE_N_TYPE.get(x['n_type'], _unknown)) + if x['n_type'] == 'NT_GNU_ABI_TAG' and x['n_name'] == 'Android': + note_type = 'NT_VERSION' + note_type_desc = 'version' + else: + note_type = (x['n_type'] if isinstance(x['n_type'], str) + else 'Unknown note type:') + note_type_desc = ('0x%.8x' % x['n_type'] + if isinstance(x['n_type'], int) else + _DESCR_NOTE_N_TYPE.get(x['n_type'], _unknown)) return '%s (%s)%s' % (note_type, note_type_desc, desc) @@ -184,7 +223,7 @@ def describe_attr_tag_arm(tag, val, extra): elif tag == 'TAG_NODEFAULTS': return _DESCR_ATTR_TAG_ARM[tag] + 'True' - + s = _DESCR_ATTR_TAG_ARM[tag] s += '"%s"' % val if val else '' return s @@ -358,6 +397,7 @@ def describe_attr_tag_arm(tag, val, extra): SHT_MIPS_EH_REGION='MIPS_EH_REGION', SHT_MIPS_XLATE_OLD='MIPS_XLATE_OLD', SHT_MIPS_PDR_EXCEPTION='MIPS_PDR_EXCEPTION', + SHT_MIPS_ABIFLAGS='MIPS_ABIFLAGS', ) @@ -372,10 +412,31 @@ def describe_attr_tag_arm(tag, val, extra): SH_FLAGS.SHF_OS_NONCONFORMING: 'O', SH_FLAGS.SHF_GROUP: 'G', SH_FLAGS.SHF_TLS: 'T', + SH_FLAGS.SHF_MASKOS: 'o', SH_FLAGS.SHF_EXCLUDE: 'E', } +_DESCR_RH_FLAGS = { + RH_FLAGS.RHF_NONE: 'NONE', + RH_FLAGS.RHF_QUICKSTART: 'QUICKSTART', + RH_FLAGS.RHF_NOTPOT: 'NOTPOT', + RH_FLAGS.RHF_NO_LIBRARY_REPLACEMENT: 'NO_LIBRARY_REPLACEMENT', + RH_FLAGS.RHF_NO_MOVE: 'NO_MOVE', + RH_FLAGS.RHF_SGI_ONLY: 'SGI_ONLY', + RH_FLAGS.RHF_GUARANTEE_INIT: 'GUARANTEE_INIT', + RH_FLAGS.RHF_DELTA_C_PLUS_PLUS: 'DELTA_C_PLUS_PLUS', + RH_FLAGS.RHF_GUARANTEE_START_INIT: 'GUARANTEE_START_INIT', + RH_FLAGS.RHF_PIXIE: 'PIXIE', + RH_FLAGS.RHF_DEFAULT_DELAY_LOAD: 'DEFAULT_DELAY_LOAD', + RH_FLAGS.RHF_REQUICKSTART: 'REQUICKSTART', + RH_FLAGS.RHF_REQUICKSTARTED: 'REQUICKSTARTED', + RH_FLAGS.RHF_CORD: 'CORD', + RH_FLAGS.RHF_NO_UNRES_UNDEF: 'NO_UNRES_UNDEF', + RH_FLAGS.RHF_RLD_ORDER_SAFE: 'RLD_ORDER_SAFE', +} + + _DESCR_ST_INFO_TYPE = dict( STT_NOTYPE='NOTYPE', STT_OBJECT='OBJECT', diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index 92822844..f03c6b33 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -8,10 +8,14 @@ #------------------------------------------------------------------------------- import itertools +from collections import defaultdict +from .hash import ELFHashTable, GNUHashTable from .sections import Section, Symbol +from .enums import ENUM_D_TAG from .segments import Segment +from .relocation import RelocationTable from ..common.exceptions import ELFError -from ..common.utils import struct_parse, parse_cstring_from_stream +from ..common.utils import elf_assert, struct_parse, parse_cstring_from_stream class _DynamicStringTable(object): @@ -25,8 +29,8 @@ def __init__(self, stream, table_offset): def get_string(self, offset): """ Get the string stored at the given offset in this string table. """ - return parse_cstring_from_stream(self._stream, - self._table_offset + offset) + s = parse_cstring_from_stream(self._stream, self._table_offset + offset) + return s.decode('utf-8') if s else '' class DynamicTag(object): @@ -69,13 +73,32 @@ def __str__(self): class Dynamic(object): """ Shared functionality between dynamic sections and segments. """ - def __init__(self, stream, elffile, stringtable, position): + def __init__(self, stream, elffile, stringtable, position, empty): + """ + stream: + The file-like object from which to load data + + elffile: + The parent elffile object + + stringtable: + A stringtable reference to use for parsing string references in + entries + + position: + The file offset of the dynamic segment/section + + empty: + Whether this is a degenerate case with zero entries. Normally, every + dynamic table will have at least one entry, the DT_NULL terminator. + """ self.elffile = elffile self.elfstructs = elffile.structs self._stream = stream - self._num_tags = -1 + self._num_tags = -1 if not empty else 0 self._offset = position self._tagsize = self.elfstructs.Elf_Dyn.sizeof() + self._empty = empty # Do not access this directly yourself; use _get_stringtable() instead. self._stringtable = stringtable @@ -121,6 +144,8 @@ def _get_stringtable(self): def _iter_tags(self, type=None): """ Yield all raw tags (limit to |type| if specified) """ + if self._empty: + return for n in itertools.count(): tag = self._get_tag(n) if type is None or tag['d_tag'] == type: @@ -137,6 +162,8 @@ def iter_tags(self, type=None): def _get_tag(self, n): """ Get the raw tag at index #n from the file """ + if self._num_tags != -1 and n >= self._num_tags: + raise IndexError(n) offset = self._offset + n * self._tagsize return struct_parse( self.elfstructs.Elf_Dyn, @@ -149,7 +176,7 @@ def get_tag(self, n): return DynamicTag(self._get_tag(n), self._get_stringtable()) def num_tags(self): - """ Number of dynamic tags in the file + """ Number of dynamic tags in the file, including the DT_NULL tag """ if self._num_tags != -1: return self._num_tags @@ -160,6 +187,41 @@ def num_tags(self): self._num_tags = n + 1 return self._num_tags + def get_relocation_tables(self): + """ Load all available relocation tables from DYNAMIC tags. + + Returns a dictionary mapping found table types (REL, RELA, + JMPREL) to RelocationTable objects. + """ + + result = {} + + if list(self.iter_tags('DT_REL')): + result['REL'] = RelocationTable(self.elffile, + self.get_table_offset('DT_REL')[1], + next(self.iter_tags('DT_RELSZ'))['d_val'], False) + + relentsz = next(self.iter_tags('DT_RELENT'))['d_val'] + elf_assert(result['REL'].entry_size == relentsz, + 'Expected DT_RELENT to be %s' % relentsz) + + if list(self.iter_tags('DT_RELA')): + result['RELA'] = RelocationTable(self.elffile, + self.get_table_offset('DT_RELA')[1], + next(self.iter_tags('DT_RELASZ'))['d_val'], True) + + relentsz = next(self.iter_tags('DT_RELAENT'))['d_val'] + elf_assert(result['RELA'].entry_size == relentsz, + 'Expected DT_RELAENT to be %s' % relentsz) + + if list(self.iter_tags('DT_JMPREL')): + result['JMPREL'] = RelocationTable(self.elffile, + self.get_table_offset('DT_JMPREL')[1], + next(self.iter_tags('DT_PLTRELSZ'))['d_val'], + next(self.iter_tags('DT_PLTREL'))['d_val'] == ENUM_D_TAG['DT_RELA']) + + return result + class DynamicSection(Section, Dynamic): """ ELF dynamic table section. Knows how to process the list of tags. @@ -168,7 +230,7 @@ def __init__(self, header, name, elffile): Section.__init__(self, header, name, elffile) stringtable = elffile.get_section(header['sh_link']) Dynamic.__init__(self, self.stream, self.elffile, stringtable, - self['sh_offset']) + self['sh_offset'], self['sh_type'] == 'SHT_NOBITS') class DynamicSegment(Segment, Dynamic): @@ -188,7 +250,38 @@ def __init__(self, header, stream, elffile): stringtable = elffile.get_section(section['sh_link']) break Segment.__init__(self, header, stream) - Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset']) + Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'], + self['p_filesz'] == 0) + self._symbol_list = None + self._symbol_name_map = None + + def num_symbols(self): + """ Number of symbols in the table recovered from DT_SYMTAB + """ + if self._symbol_list is None: + self._symbol_list = list(self.iter_symbols()) + return len(self._symbol_list) + + def get_symbol(self, index): + """ Get the symbol at index #index from the table (Symbol object) + """ + if self._symbol_list is None: + self._symbol_list = list(self.iter_symbols()) + return self._symbol_list[index] + + def get_symbol_by_name(self, name): + """ Get a symbol(s) by name. Return None if no symbol by the given name + exists. + """ + # The first time this method is called, construct a name to number + # mapping + # + if self._symbol_name_map is None: + self._symbol_name_map = defaultdict(list) + for i, sym in enumerate(self.iter_symbols()): + self._symbol_name_map[sym.name].append(i) + symnums = self._symbol_name_map.get(name) + return [self.get_symbol(i) for i in symnums] if symnums else None def iter_symbols(self): """ Yield all symbols in this dynamic segment. The symbols are usually @@ -202,33 +295,55 @@ def iter_symbols(self): symbol_size = self.elfstructs.Elf_Sym.sizeof() - # Find closest higher pointer than tab_ptr. We'll use that to mark the - # end of the symbol table. - nearest_ptr = None - for tag in self.iter_tags(): - tag_ptr = tag['d_ptr'] - if tag['d_tag'] == 'DT_SYMENT': - if symbol_size != tag['d_val']: - # DT_SYMENT is the size of one symbol entry. It must be the - # same as returned by Elf_Sym.sizeof. - raise ELFError('DT_SYMENT (%d) != Elf_Sym (%d).' % - (tag['d_val'], symbol_size)) - if (tag_ptr > tab_ptr and - (nearest_ptr is None or nearest_ptr > tag_ptr)): - nearest_ptr = tag_ptr - - if nearest_ptr is None: - # Use the end of segment that contains DT_SYMTAB. - for segment in self.elffile.iter_segments(): - if (segment['p_vaddr'] <= tab_ptr and - tab_ptr <= (segment['p_vaddr'] + segment['p_filesz'])): - nearest_ptr = segment['p_vaddr'] + segment['p_filesz'] - - if nearest_ptr is None: + end_ptr = None + + # Check if a DT_GNU_HASH tag exists and recover the number of symbols + # from the corresponding hash table + _, gnu_hash_offset = self.get_table_offset('DT_GNU_HASH') + if gnu_hash_offset is not None: + hash_section = GNUHashTable(self.elffile, gnu_hash_offset, self) + end_ptr = tab_ptr + \ + hash_section.get_number_of_symbols() * symbol_size + + # If DT_GNU_HASH did not exist, maybe we can use DT_HASH + if end_ptr is None: + _, hash_offset = self.get_table_offset('DT_HASH') + if hash_offset is not None: + # Get the hash table from the DT_HASH offset + hash_section = ELFHashTable(self.elffile, hash_offset, self) + end_ptr = tab_ptr + \ + hash_section.get_number_of_symbols() * symbol_size + + if end_ptr is None: + # Find closest higher pointer than tab_ptr. We'll use that to mark + # the end of the symbol table. + nearest_ptr = None + for tag in self.iter_tags(): + tag_ptr = tag['d_ptr'] + if tag['d_tag'] == 'DT_SYMENT': + if symbol_size != tag['d_val']: + # DT_SYMENT is the size of one symbol entry. It must be + # the same as returned by Elf_Sym.sizeof. + raise ELFError('DT_SYMENT (%d) != Elf_Sym (%d).' % + (tag['d_val'], symbol_size)) + if (tag_ptr > tab_ptr and + (nearest_ptr is None or nearest_ptr > tag_ptr)): + nearest_ptr = tag_ptr + + if nearest_ptr is None: + # Use the end of segment that contains DT_SYMTAB. + for segment in self.elffile.iter_segments(): + if (segment['p_vaddr'] <= tab_ptr and + tab_ptr <= (segment['p_vaddr'] + segment['p_filesz'])): + nearest_ptr = segment['p_vaddr'] + segment['p_filesz'] + + end_ptr = nearest_ptr + + if end_ptr is None: raise ELFError('Cannot determine the end of DT_SYMTAB.') string_table = self._get_stringtable() - for i in range((nearest_ptr - tab_ptr) // symbol_size): + for i in range((end_ptr - tab_ptr) // symbol_size): symbol = struct_parse(self.elfstructs.Elf_Sym, self._stream, i * symbol_size + tab_offset) symbol_name = string_table.get_string(symbol['st_name']) diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 289a5f97..6a00e0a0 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -14,9 +14,13 @@ import resource PAGESIZE = resource.getpagesize() except ImportError: - # Windows system - import mmap - PAGESIZE = mmap.PAGESIZE + try: + # Windows system + import mmap + PAGESIZE = mmap.PAGESIZE + except ImportError: + # Jython + PAGESIZE = 4096 from ..common.py3compat import BytesIO from ..common.exceptions import ELFError @@ -24,8 +28,8 @@ from .structs import ELFStructs from .sections import ( Section, StringTableSection, SymbolTableSection, - SUNWSyminfoTableSection, NullSection, NoteSection, - StabSection, ARMAttributesSection) + SymbolTableIndexSection, SUNWSyminfoTableSection, NullSection, + NoteSection, StabSection, ARMAttributesSection) from .dynamic import DynamicSection, DynamicSegment from .relocation import RelocationSection, RelocationHandler from .gnuversions import ( @@ -33,7 +37,9 @@ GNUVerSymSection) from .segments import Segment, InterpSegment, NoteSegment from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig - +from ..ehabi.ehabiinfo import EHABIInfo +from .hash import ELFHashSection, GNUHashSection +from .constants import SHN_INDICES class ELFFile(object): """ Creation: the constructor accepts a stream (file-like object) with the @@ -78,12 +84,25 @@ def __init__(self, stream): self.stream.seek(0) self.e_ident_raw = self.stream.read(16) - self._file_stringtable_section = self._get_file_stringtable() + self._section_header_stringtable = \ + self._get_section_header_stringtable() self._section_name_map = None def num_sections(self): """ Number of sections in the file """ + if self['e_shoff'] == 0: + return 0 + # From the ELF ABI documentation at + # https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html: + # "e_shnum normally tells how many entries the section header table + # contains. [...] If the number of sections is greater than or equal to + # SHN_LORESERVE (0xff00), e_shnum has the value SHN_UNDEF (0) and the + # actual number of section header table entries is contained in the + # sh_size field of the section header at index 0 (otherwise, the sh_size + # member of the initial entry contains 0)." + if self['e_shnum'] == 0: + return self._get_section_header(0)['sh_size'] return self['e_shnum'] def get_section(self, n): @@ -116,7 +135,17 @@ def iter_sections(self): def num_segments(self): """ Number of segments in the file """ - return self['e_phnum'] + # From: https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI + # Section: 4.1.2 Number of Program Headers + # If the number of program headers is greater than or equal to + # PN_XNUM (0xffff), this member has the value PN_XNUM + # (0xffff). The actual number of program header table entries + # is contained in the sh_info field of the section header at + # index 0. + if self['e_phnum'] < 0xffff: + return self['e_phnum'] + else: + return self.get_section(0)['sh_info'] def get_segment(self, n): """ Get the segment at index #n from the file (Segment object) @@ -150,7 +179,7 @@ def has_dwarf_info(self): We assume that if it has the .debug_info or .zdebug_info section, it has all the other required sections as well. """ - return (self.get_section_by_name('.debug_info') or + return bool(self.get_section_by_name('.debug_info') or self.get_section_by_name('.zdebug_info') or self.get_section_by_name('.eh_frame')) @@ -167,7 +196,8 @@ def get_dwarf_info(self, relocate_dwarf_sections=True): section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev', '.debug_str', '.debug_line', '.debug_frame', - '.debug_loc', '.debug_ranges') + '.debug_loc', '.debug_ranges', '.debug_pubtypes', + '.debug_pubnames') compressed = bool(self.get_section_by_name('.zdebug_info')) if compressed: @@ -178,8 +208,8 @@ def get_dwarf_info(self, relocate_dwarf_sections=True): (debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name, debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name, - debug_loc_sec_name, debug_ranges_sec_name, - eh_frame_sec_name) = section_names + debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name, + debug_pubnames_name, eh_frame_sec_name) = section_names debug_sections = {} for secname in section_names: @@ -207,24 +237,232 @@ def get_dwarf_info(self, relocate_dwarf_sections=True): debug_str_sec=debug_sections[debug_str_sec_name], debug_loc_sec=debug_sections[debug_loc_sec_name], debug_ranges_sec=debug_sections[debug_ranges_sec_name], - debug_line_sec=debug_sections[debug_line_sec_name]) + debug_line_sec=debug_sections[debug_line_sec_name], + debug_pubtypes_sec = debug_sections[debug_pubtypes_name], + debug_pubnames_sec = debug_sections[debug_pubnames_name] + ) + + def has_ehabi_info(self): + """ Check whether this file appears to have arm exception handler index table. + """ + return any(s['sh_type'] == 'SHT_ARM_EXIDX' for s in self.iter_sections()) + + def get_ehabi_infos(self): + """ Generally, shared library and executable contain 1 .ARM.exidx section. + Object file contains many .ARM.exidx sections. + So we must traverse every section and filter sections whose type is SHT_ARM_EXIDX. + """ + _ret = [] + if self['e_type'] == 'ET_REL': + # TODO: support relocatable file + assert False, "Current version of pyelftools doesn't support relocatable file." + for section in self.iter_sections(): + if section['sh_type'] == 'SHT_ARM_EXIDX': + _ret.append(EHABIInfo(section, self.little_endian)) + return _ret if len(_ret) > 0 else None def get_machine_arch(self): """ Return the machine architecture, as detected from the ELF header. - Not all architectures are supported at the moment. - """ - if self['e_machine'] == 'EM_X86_64': - return 'x64' - elif self['e_machine'] in ('EM_386', 'EM_486'): - return 'x86' - elif self['e_machine'] == 'EM_ARM': - return 'ARM' - elif self['e_machine'] == 'EM_AARCH64': - return 'AArch64' - elif self['e_machine'] == 'EM_MIPS': - return 'MIPS' + """ + architectures = { + 'EM_M32' : 'AT&T WE 32100', + 'EM_SPARC' : 'SPARC', + 'EM_386' : 'x86', + 'EM_68K' : 'Motorola 68000', + 'EM_88K' : 'Motorola 88000', + 'EM_IAMCU' : 'Intel MCU', + 'EM_860' : 'Intel 80860', + 'EM_MIPS' : 'MIPS', + 'EM_S370' : 'IBM System/370', + 'EM_MIPS_RS3_LE' : 'MIPS RS3000 Little-endian', + 'EM_PARISC' : 'Hewlett-Packard PA-RISC', + 'EM_VPP500' : 'Fujitsu VPP500', + 'EM_SPARC32PLUS' : 'Enhanced SPARC', + 'EM_960' : 'Intel 80960', + 'EM_PPC' : 'PowerPC', + 'EM_PPC64' : '64-bit PowerPC', + 'EM_S390' : 'IBM System/390', + 'EM_SPU' : 'IBM SPU/SPC', + 'EM_V800' : 'NEC V800', + 'EM_FR20' : 'Fujitsu FR20', + 'EM_RH32' : 'TRW RH-32', + 'EM_RCE' : 'Motorola RCE', + 'EM_ARM' : 'ARM', + 'EM_ALPHA' : 'Digital Alpha', + 'EM_SH' : 'Hitachi SH', + 'EM_SPARCV9' : 'SPARC Version 9', + 'EM_TRICORE' : 'Siemens TriCore embedded processor', + 'EM_ARC' : 'Argonaut RISC Core, Argonaut Technologies Inc.', + 'EM_H8_300' : 'Hitachi H8/300', + 'EM_H8_300H' : 'Hitachi H8/300H', + 'EM_H8S' : 'Hitachi H8S', + 'EM_H8_500' : 'Hitachi H8/500', + 'EM_IA_64' : 'Intel IA-64', + 'EM_MIPS_X' : 'MIPS-X', + 'EM_COLDFIRE' : 'Motorola ColdFire', + 'EM_68HC12' : 'Motorola M68HC12', + 'EM_MMA' : 'Fujitsu MMA', + 'EM_PCP' : 'Siemens PCP', + 'EM_NCPU' : 'Sony nCPU', + 'EM_NDR1' : 'Denso NDR1', + 'EM_STARCORE' : 'Motorola Star*Core', + 'EM_ME16' : 'Toyota ME16', + 'EM_ST100' : 'STMicroelectronics ST100', + 'EM_TINYJ' : 'Advanced Logic TinyJ', + 'EM_X86_64' : 'x64', + 'EM_PDSP' : 'Sony DSP', + 'EM_PDP10' : 'Digital Equipment PDP-10', + 'EM_PDP11' : 'Digital Equipment PDP-11', + 'EM_FX66' : 'Siemens FX66', + 'EM_ST9PLUS' : 'STMicroelectronics ST9+ 8/16 bit', + 'EM_ST7' : 'STMicroelectronics ST7 8-bit', + 'EM_68HC16' : 'Motorola MC68HC16', + 'EM_68HC11' : 'Motorola MC68HC11', + 'EM_68HC08' : 'Motorola MC68HC08', + 'EM_68HC05' : 'Motorola MC68HC05', + 'EM_SVX' : 'Silicon Graphics SVx', + 'EM_ST19' : 'STMicroelectronics ST19 8-bit', + 'EM_VAX' : 'Digital VAX', + 'EM_CRIS' : 'Axis Communications 32-bit', + 'EM_JAVELIN' : 'Infineon Technologies 32-bit', + 'EM_FIREPATH' : 'Element 14 64-bit DSP', + 'EM_ZSP' : 'LSI Logic 16-bit DSP', + 'EM_MMIX' : 'Donald Knuth\'s educational 64-bit', + 'EM_HUANY' : 'Harvard University machine-independent object files', + 'EM_PRISM' : 'SiTera Prism', + 'EM_AVR' : 'Atmel AVR 8-bit', + 'EM_FR30' : 'Fujitsu FR30', + 'EM_D10V' : 'Mitsubishi D10V', + 'EM_D30V' : 'Mitsubishi D30V', + 'EM_V850' : 'NEC v850', + 'EM_M32R' : 'Mitsubishi M32R', + 'EM_MN10300' : 'Matsushita MN10300', + 'EM_MN10200' : 'Matsushita MN10200', + 'EM_PJ' : 'picoJava', + 'EM_OPENRISC' : 'OpenRISC 32-bit', + 'EM_ARC_COMPACT' : 'ARC International ARCompact', + 'EM_XTENSA' : 'Tensilica Xtensa', + 'EM_VIDEOCORE' : 'Alphamosaic VideoCore', + 'EM_TMM_GPP' : 'Thompson Multimedia', + 'EM_NS32K' : 'National Semiconductor 32000 series', + 'EM_TPC' : 'Tenor Network TPC', + 'EM_SNP1K' : 'Trebia SNP 1000', + 'EM_ST200' : 'STMicroelectronics ST200', + 'EM_IP2K' : 'Ubicom IP2xxx', + 'EM_MAX' : 'MAX', + 'EM_CR' : 'National Semiconductor CompactRISC', + 'EM_F2MC16' : 'Fujitsu F2MC16', + 'EM_MSP430' : 'Texas Instruments msp430', + 'EM_BLACKFIN' : 'Analog Devices Blackfin', + 'EM_SE_C33' : 'Seiko Epson S1C33', + 'EM_SEP' : 'Sharp', + 'EM_ARCA' : 'Arca RISC', + 'EM_UNICORE' : 'PKU-Unity MPRC', + 'EM_EXCESS' : 'eXcess', + 'EM_DXP' : 'Icera Semiconductor Deep Execution Processor', + 'EM_ALTERA_NIOS2' : 'Altera Nios II', + 'EM_CRX' : 'National Semiconductor CompactRISC CRX', + 'EM_XGATE' : 'Motorola XGATE', + 'EM_C166' : 'Infineon C16x/XC16x', + 'EM_M16C' : 'Renesas M16C', + 'EM_DSPIC30F' : 'Microchip Technology dsPIC30F', + 'EM_CE' : 'Freescale Communication Engine RISC core', + 'EM_M32C' : 'Renesas M32C', + 'EM_TSK3000' : 'Altium TSK3000', + 'EM_RS08' : 'Freescale RS08', + 'EM_SHARC' : 'Analog Devices SHARC', + 'EM_ECOG2' : 'Cyan Technology eCOG2', + 'EM_SCORE7' : 'Sunplus S+core7 RISC', + 'EM_DSP24' : 'New Japan Radio (NJR) 24-bit DSP', + 'EM_VIDEOCORE3' : 'Broadcom VideoCore III', + 'EM_LATTICEMICO32' : 'Lattice FPGA RISC', + 'EM_SE_C17' : 'Seiko Epson C17', + 'EM_TI_C6000' : 'TI TMS320C6000', + 'EM_TI_C2000' : 'TI TMS320C2000', + 'EM_TI_C5500' : 'TI TMS320C55x', + 'EM_TI_ARP32' : 'TI Application Specific RISC, 32bit', + 'EM_TI_PRU' : 'TI Programmable Realtime Unit', + 'EM_MMDSP_PLUS' : 'STMicroelectronics 64bit VLIW', + 'EM_CYPRESS_M8C' : 'Cypress M8C', + 'EM_R32C' : 'Renesas R32C', + 'EM_TRIMEDIA' : 'NXP Semiconductors TriMedia', + 'EM_QDSP6' : 'QUALCOMM DSP6', + 'EM_8051' : 'Intel 8051', + 'EM_STXP7X' : 'STMicroelectronics STxP7x', + 'EM_NDS32' : 'Andes Technology RISC', + 'EM_ECOG1' : 'Cyan Technology eCOG1X', + 'EM_ECOG1X' : 'Cyan Technology eCOG1X', + 'EM_MAXQ30' : 'Dallas Semiconductor MAXQ30', + 'EM_XIMO16' : 'New Japan Radio (NJR) 16-bit', + 'EM_MANIK' : 'M2000 Reconfigurable RISC', + 'EM_CRAYNV2' : 'Cray Inc. NV2', + 'EM_RX' : 'Renesas RX', + 'EM_METAG' : 'Imagination Technologies META', + 'EM_MCST_ELBRUS' : 'MCST Elbrus', + 'EM_ECOG16' : 'Cyan Technology eCOG16', + 'EM_CR16' : 'National Semiconductor CompactRISC CR16 16-bit', + 'EM_ETPU' : 'Freescale', + 'EM_SLE9X' : 'Infineon Technologies SLE9X', + 'EM_L10M' : 'Intel L10M', + 'EM_K10M' : 'Intel K10M', + 'EM_AARCH64' : 'AArch64', + 'EM_AVR32' : 'Atmel 32-bit', + 'EM_STM8' : 'STMicroeletronics STM8 8-bit', + 'EM_TILE64' : 'Tilera TILE64', + 'EM_TILEPRO' : 'Tilera TILEPro', + 'EM_MICROBLAZE' : 'Xilinx MicroBlaze 32-bit RISC', + 'EM_CUDA' : 'NVIDIA CUDA', + 'EM_TILEGX' : 'Tilera TILE-Gx', + 'EM_CLOUDSHIELD' : 'CloudShield', + 'EM_COREA_1ST' : 'KIPO-KAIST Core-A 1st generation', + 'EM_COREA_2ND' : 'KIPO-KAIST Core-A 2nd generation', + 'EM_ARC_COMPACT2' : 'Synopsys ARCompact V2', + 'EM_OPEN8' : 'Open8 8-bit RISC', + 'EM_RL78' : 'Renesas RL78', + 'EM_VIDEOCORE5' : 'Broadcom VideoCore V', + 'EM_78KOR' : 'Renesas 78KOR', + 'EM_56800EX' : 'Freescale 56800EX', + 'EM_BA1' : 'Beyond BA1', + 'EM_BA2' : 'Beyond BA2', + 'EM_XCORE' : 'XMOS xCORE', + 'EM_MCHP_PIC' : 'Microchip 8-bit PIC', + 'EM_INTEL205' : 'Reserved by Intel', + 'EM_INTEL206' : 'Reserved by Intel', + 'EM_INTEL207' : 'Reserved by Intel', + 'EM_INTEL208' : 'Reserved by Intel', + 'EM_INTEL209' : 'Reserved by Intel', + 'EM_KM32' : 'KM211 KM32 32-bit', + 'EM_KMX32' : 'KM211 KMX32 32-bit', + 'EM_KMX16' : 'KM211 KMX16 16-bit', + 'EM_KMX8' : 'KM211 KMX8 8-bit', + 'EM_KVARC' : 'KM211 KVARC', + 'EM_CDP' : 'Paneve CDP', + 'EM_COGE' : 'Cognitive', + 'EM_COOL' : 'Bluechip Systems CoolEngine', + 'EM_NORC' : 'Nanoradio Optimized RISC', + 'EM_CSR_KALIMBA' : 'CSR Kalimba', + 'EM_Z80' : 'Zilog Z80', + 'EM_VISIUM' : 'VISIUMcore', + 'EM_FT32' : 'FTDI Chip FT32 32-bit RISC', + 'EM_MOXIE' : 'Moxie', + 'EM_AMDGPU' : 'AMD GPU', + 'EM_RISCV' : 'RISC-V' + } + + return architectures.get(self['e_machine'], '') + + def get_shstrndx(self): + """ Find the string table section index for the section header table + """ + # From https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html: + # If the section name string table section index is greater than or + # equal to SHN_LORESERVE (0xff00), this member has the value SHN_XINDEX + # (0xffff) and the actual index of the section name string table section + # is contained in the sh_link field of the section header at index 0. + if self['e_shstrndx'] != SHN_INDICES.SHN_XINDEX: + return self['e_shstrndx'] else: - return '' + return self._get_section_header(0)['sh_link'] #-------------------------------- PRIVATE --------------------------------# @@ -295,7 +533,7 @@ def _get_section_name(self, section_header): string table """ name_offset = section_header['sh_name'] - return self._file_stringtable_section.get_string(name_offset) + return self._section_header_stringtable.get_string(name_offset) def _make_section(self, section_header): """ Create a section object of the appropriate type @@ -309,6 +547,8 @@ def _make_section(self, section_header): return NullSection(section_header, name, self) elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM', 'SHT_SUNW_LDYNSYM'): return self._make_symbol_table_section(section_header, name) + elif sectype == 'SHT_SYMTAB_SHNDX': + return self._make_symbol_table_index_section(section_header, name) elif sectype == 'SHT_SUNW_syminfo': return self._make_sunwsyminfo_table_section(section_header, name) elif sectype == 'SHT_GNU_verneed': @@ -327,6 +567,10 @@ def _make_section(self, section_header): return StabSection(section_header, name, self) elif sectype == 'SHT_ARM_ATTRIBUTES': return ARMAttributesSection(section_header, name, self) + elif sectype == 'SHT_HASH': + return self._make_elf_hash_section(section_header, name) + elif sectype == 'SHT_GNU_HASH': + return self._make_gnu_hash_section(section_header, name) else: return Section(section_header, name, self) @@ -340,6 +584,14 @@ def _make_symbol_table_section(self, section_header, name): elffile=self, stringtable=strtab_section) + def _make_symbol_table_index_section(self, section_header, name): + """ Create a SymbolTableIndexSection object + """ + linked_symtab_index = section_header['sh_link'] + return SymbolTableIndexSection( + section_header, name, elffile=self, + symboltable=linked_symtab_index) + def _make_sunwsyminfo_table_section(self, section_header, name): """ Create a SUNWSyminfoTableSection """ @@ -380,6 +632,20 @@ def _make_gnu_versym_section(self, section_header, name): elffile=self, symboltable=strtab_section) + def _make_elf_hash_section(self, section_header, name): + linked_symtab_index = section_header['sh_link'] + symtab_section = self.get_section(linked_symtab_index) + return ELFHashSection( + section_header, name, self, symtab_section + ) + + def _make_gnu_hash_section(self, section_header, name): + linked_symtab_index = section_header['sh_link'] + symtab_section = self.get_section(linked_symtab_index) + return GNUHashSection( + section_header, name, self, symtab_section + ) + def _get_segment_header(self, n): """ Find the header of segment #n, parse it and return the struct """ @@ -388,10 +654,11 @@ def _get_segment_header(self, n): self.stream, stream_pos=self._segment_offset(n)) - def _get_file_stringtable(self): - """ Find the file's string table section + def _get_section_header_stringtable(self): + """ Get the string table section corresponding to the section header + table. """ - stringtable_section_num = self['e_shstrndx'] + stringtable_section_num = self.get_shstrndx() return StringTableSection( header=self._get_section_header(stringtable_section_num), name='', @@ -422,7 +689,7 @@ def _read_dwarf_section(self, section, relocate_dwarf_sections): stream=section_stream, name=section.name, global_offset=section['sh_offset'], - size=section['sh_size'], + size=section.data_size, address=section['sh_addr']) @staticmethod diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py index e37effa9..1af39736 100644 --- a/elftools/elf/enums.py +++ b/elftools/elf/enums.py @@ -361,7 +361,8 @@ SHT_MIPS_WHIRL=0x70000026, SHT_MIPS_EH_REGION=0x70000027, SHT_MIPS_XLATE_OLD=0x70000028, - SHT_MIPS_PDR_EXCEPTION=0x70000029)) + SHT_MIPS_PDR_EXCEPTION=0x70000029, + SHT_MIPS_ABIFLAGS=0x7000002a)) ENUM_ELFCOMPRESS_TYPE = dict( ELFCOMPRESS_ZLIB=1, @@ -396,6 +397,7 @@ PT_GNU_EH_FRAME=0x6474e550, PT_GNU_STACK=0x6474e551, PT_GNU_RELRO=0x6474e552, + PT_GNU_PROPERTY=0x6474e553, _default_=Pass, ) @@ -605,6 +607,45 @@ for k in ENUMMAP_EXTRA_D_TAG_MACHINE: ENUM_D_TAG.update(ENUMMAP_EXTRA_D_TAG_MACHINE[k]) +ENUM_DT_FLAGS = dict( + DF_ORIGIN=0x1, + DF_SYMBOLIC=0x2, + DF_TEXTREL=0x4, + DF_BIND_NOW=0x8, + DF_STATIC_TLS=0x10, +) + +ENUM_DT_FLAGS_1 = dict( + DF_1_NOW=0x1, + DF_1_GLOBAL=0x2, + DF_1_GROUP=0x4, + DF_1_NODELETE=0x8, + DF_1_LOADFLTR=0x10, + DF_1_INITFIRST=0x20, + DF_1_NOOPEN=0x40, + DF_1_ORIGIN=0x80, + DF_1_DIRECT=0x100, + DF_1_TRANS=0x200, + DF_1_INTERPOSE=0x400, + DF_1_NODEFLIB=0x800, + DF_1_NODUMP=0x1000, + DF_1_CONFALT=0x2000, + DF_1_ENDFILTEE=0x4000, + DF_1_DISPRELDNE=0x8000, + DF_1_DISPRELPND=0x10000, + DF_1_NODIRECT=0x20000, + DF_1_IGNMULDEF=0x40000, + DF_1_NOKSYMS=0x80000, + DF_1_NOHDR=0x100000, + DF_1_EDITED=0x200000, + DF_1_NORELOC=0x400000, + DF_1_SYMINTPOSE=0x800000, + DF_1_GLOBAUDIT=0x1000000, + DF_1_SINGLETON=0x2000000, + DF_1_STUB=0x4000000, + DF_1_PIE=0x8000000, +) + ENUM_RELOC_TYPE_MIPS = dict( R_MIPS_NONE=0, R_MIPS_16=1, @@ -744,6 +785,7 @@ R_X86_64_TLSDESC_CALL=35, R_X86_64_TLSDESC=36, R_X86_64_IRELATIVE=37, + R_X86_64_REX_GOTPCRELX=42, R_X86_64_GNU_VTINHERIT=250, R_X86_64_GNU_VTENTRY=251, _default_=Pass, diff --git a/elftools/elf/hash.py b/elftools/elf/hash.py new file mode 100644 index 00000000..c8d1e17e --- /dev/null +++ b/elftools/elf/hash.py @@ -0,0 +1,186 @@ +#------------------------------------------------------------------------------- +# elftools: elf/hash.py +# +# ELF hash table sections +# +# Andreas Ziegler (andreas.ziegler@fau.de) +# This code is in the public domain +#------------------------------------------------------------------------------- + +import struct + +from ..common.utils import struct_parse +from .sections import Section + + +class ELFHashTable(object): + """ Representation of an ELF hash table to find symbols in the + symbol table - useful for super-stripped binaries without section + headers where only the start of the symbol table is known from the + dynamic segment. The layout and contents are nicely described at + https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/. + + The symboltable argument needs to implement a get_symbol() method - + in a regular ELF file, this will be the linked symbol table section + as indicated by the sh_link attribute. For super-stripped binaries, + one should use the DynamicSegment object as the symboltable as it + supports symbol lookup without access to a symbol table section. + """ + + def __init__(self, elffile, start_offset, symboltable): + self.elffile = elffile + self._symboltable = symboltable + self.params = struct_parse(self.elffile.structs.Elf_Hash, + self.elffile.stream, + start_offset) + + def get_number_of_symbols(self): + """ Get the number of symbols from the hash table parameters. + """ + return self.params['nchains'] + + def get_symbol(self, name): + """ Look up a symbol from this hash table with the given name. + """ + if self.params['nbuckets'] == 0: + return None + hval = self.elf_hash(name) % self.params['nbuckets'] + symndx = self.params['buckets'][hval] + while symndx != 0: + sym = self._symboltable.get_symbol(symndx) + if sym.name == name: + return sym + symndx = self.params['chains'][symndx] + return None + + @staticmethod + def elf_hash(name): + """ Compute the hash value for a given symbol name. + """ + if not isinstance(name, bytes): + name = name.encode('utf-8') + h = 0 + x = 0 + for c in bytearray(name): + h = (h << 4) + c + x = h & 0xF0000000 + if x != 0: + h ^= (x >> 24) + h &= ~x + return h + + +class ELFHashSection(Section, ELFHashTable): + """ Section representation of an ELF hash table. In regular ELF files, this + allows us to use the common functions defined on Section objects when + dealing with the hash table. + """ + def __init__(self, header, name, elffile, symboltable): + Section.__init__(self, header, name, elffile) + ELFHashTable.__init__(self, elffile, self['sh_offset'], symboltable) + + +class GNUHashTable(object): + """ Representation of a GNU hash table to find symbols in the + symbol table - useful for super-stripped binaries without section + headers where only the start of the symbol table is known from the + dynamic segment. The layout and contents are nicely described at + https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/. + + The symboltable argument needs to implement a get_symbol() method - + in a regular ELF file, this will be the linked symbol table section + as indicated by the sh_link attribute. For super-stripped binaries, + one should use the DynamicSegment object as the symboltable as it + supports symbol lookup without access to a symbol table section. + """ + def __init__(self, elffile, start_offset, symboltable): + self.elffile = elffile + self._symboltable = symboltable + self.params = struct_parse(self.elffile.structs.Gnu_Hash, + self.elffile.stream, + start_offset) + # Element sizes in the hash table + self._wordsize = self.elffile.structs.Elf_word('').sizeof() + self._xwordsize = self.elffile.structs.Elf_xword('').sizeof() + self._chain_pos = start_offset + 4 * self._wordsize + \ + self.params['bloom_size'] * self._xwordsize + \ + self.params['nbuckets'] * self._wordsize + + def get_number_of_symbols(self): + """ Get the number of symbols in the hash table by finding the bucket + with the highest symbol index and walking to the end of its chain. + """ + # Find highest index in buckets array + max_idx = max(self.params['buckets']) + if max_idx < self.params['symoffset']: + return self.params['symoffset'] + + # Position the stream at the start of the corresponding chain + max_chain_pos = self._chain_pos + \ + (max_idx - self.params['symoffset']) * self._wordsize + self.elffile.stream.seek(max_chain_pos) + hash_format = 'I' + + # Walk the chain to its end (lowest bit is set) + while True: + cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0] + if cur_hash & 1: + return max_idx + 1 + + max_idx += 1 + + def _matches_bloom(self, H1): + """ Helper function to check if the given hash could be in the hash + table by testing it against the bloom filter. + """ + arch_bits = self.elffile.elfclass + H2 = H1 >> self.params['bloom_shift'] + word_idx = int(H1 / arch_bits) % self.params['bloom_size'] + BITMASK = (1 << (H1 % arch_bits)) | (1 << (H2 % arch_bits)) + return (self.params['bloom'][word_idx] & BITMASK) == BITMASK + + def get_symbol(self, name): + """ Look up a symbol from this hash table with the given name. + """ + namehash = self.gnu_hash(name) + if not self._matches_bloom(namehash): + return None + + symidx = self.params['buckets'][namehash % self.params['nbuckets']] + if symidx < self.params['symoffset']: + return None + + self.elffile.stream.seek(self._chain_pos + (symidx - self.params['symoffset']) * self._wordsize) + hash_format = 'I' + while True: + cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0] + if cur_hash | 1 == namehash | 1: + symbol = self._symboltable.get_symbol(symidx) + if name == symbol.name: + return symbol + + if cur_hash & 1: + break + symidx += 1 + return None + + @staticmethod + def gnu_hash(key): + """ Compute the GNU-style hash value for a given symbol name. + """ + if not isinstance(key, bytes): + key = key.encode('utf-8') + h = 5381 + for c in bytearray(key): + h = h * 33 + c + return h & 0xFFFFFFFF + + +class GNUHashSection(Section, GNUHashTable): + """ Section representation of a GNU hash table. In regular ELF files, this + allows us to use the common functions defined on Section objects when + dealing with the hash table. + """ + def __init__(self, header, name, elffile, symboltable): + Section.__init__(self, header, name, elffile) + GNUHashTable.__init__(self, elffile, self['sh_offset'], symboltable) diff --git a/elftools/elf/notes.py b/elftools/elf/notes.py index d34198b8..3e46b3a3 100644 --- a/elftools/elf/notes.py +++ b/elftools/elf/notes.py @@ -30,6 +30,7 @@ def iter_notes(elffile, offset, size): offset += disk_namesz desc_data = bytes2str(elffile.stream.read(note['n_descsz'])) + note['n_descdata'] = desc_data if note['n_type'] == 'NT_GNU_ABI_TAG': note['n_desc'] = struct_parse(elffile.structs.Elf_abi, elffile.stream, @@ -40,6 +41,10 @@ def iter_notes(elffile, offset, size): note['n_desc'] = struct_parse(elffile.structs.Elf_Prpsinfo, elffile.stream, offset) + elif note['n_type'] == 'NT_FILE': + note['n_desc'] = struct_parse(elffile.structs.Elf_Nt_File, + elffile.stream, + offset) else: note['n_desc'] = desc_data offset += roundup(note['n_descsz'], 2) diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index 6f2c4b4e..427a56f4 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -12,7 +12,8 @@ from ..common.utils import elf_assert, struct_parse from .sections import Section from .enums import ( - ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS, ENUM_RELOC_TYPE_ARM) + ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS, + ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_D_TAG) class Relocation(object): @@ -44,43 +45,44 @@ def __str__(self): return self.__repr__() -class RelocationSection(Section): - """ ELF relocation section. Serves as a collection of Relocation entries. +class RelocationTable(object): + """ Shared functionality between relocation sections and relocation tables """ - def __init__(self, header, name, elffile): - super(RelocationSection, self).__init__(header, name, elffile) - if self.header['sh_type'] == 'SHT_REL': - expected_size = self.structs.Elf_Rel.sizeof() - self.entry_struct = self.structs.Elf_Rel - elif self.header['sh_type'] == 'SHT_RELA': - expected_size = self.structs.Elf_Rela.sizeof() - self.entry_struct = self.structs.Elf_Rela + + def __init__(self, elffile, offset, size, is_rela): + self._stream = elffile.stream + self._elffile = elffile + self._elfstructs = elffile.structs + self._size = size + self._offset = offset + self._is_rela = is_rela + + if is_rela: + self.entry_struct = self._elfstructs.Elf_Rela else: - elf_assert(False, 'Unknown relocation type section') + self.entry_struct = self._elfstructs.Elf_Rel - elf_assert( - self.header['sh_entsize'] == expected_size, - 'Expected sh_entsize of SHT_REL section to be %s' % expected_size) + self.entry_size = self.entry_struct.sizeof() def is_RELA(self): """ Is this a RELA relocation section? If not, it's REL. """ - return self.header['sh_type'] == 'SHT_RELA' + return self._is_rela def num_relocations(self): """ Number of relocations in the section """ - return self['sh_size'] // self['sh_entsize'] + return self._size // self.entry_size def get_relocation(self, n): """ Get the relocation at index #n from the section (Relocation object) """ - entry_offset = self['sh_offset'] + n * self['sh_entsize'] + entry_offset = self._offset + n * self.entry_size entry = struct_parse( self.entry_struct, - self.stream, + self._stream, stream_pos=entry_offset) - return Relocation(entry, self.elffile) + return Relocation(entry, self._elffile) def iter_relocations(self): """ Yield all the relocations in the section @@ -89,6 +91,21 @@ def iter_relocations(self): yield self.get_relocation(i) +class RelocationSection(Section, RelocationTable): + """ ELF relocation section. Serves as a collection of Relocation entries. + """ + def __init__(self, header, name, elffile): + Section.__init__(self, header, name, elffile) + RelocationTable.__init__(self, self.elffile, + self['sh_offset'], self['sh_size'], header['sh_type'] == 'SHT_RELA') + + elf_assert(header['sh_type'] in ('SHT_REL', 'SHT_RELA'), + 'Unknown relocation type section') + elf_assert(header['sh_entsize'] == self.entry_size, + 'Expected sh_entsize of %s section to be %s' % ( + header['sh_type'], self.entry_size)) + + class RelocationHandler(object): """ Handles the logic of relocations in ELF files. """ @@ -156,6 +173,8 @@ def _do_apply_relocation(self, stream, reloc, symtab): raise ELFRelocationError( 'Unexpected RELA relocation for ARM: %s' % reloc) recipe = self._RELOCATION_RECIPES_ARM.get(reloc_type, None) + elif self.elffile.get_machine_arch() == 'AArch64': + recipe = self._RELOCATION_RECIPES_AARCH64.get(reloc_type, None) if recipe is None: raise ELFRelocationError( @@ -219,10 +238,26 @@ def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0): def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0): return sym_value + addend - offset + def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0): + return sym_value // 4 + value - offset // 4 + _RELOCATION_RECIPES_ARM = { ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=False, calc_func=_reloc_calc_sym_plus_value), + ENUM_RELOC_TYPE_ARM['R_ARM_CALL']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=False, + calc_func=_arm_reloc_calc_sym_plus_value_pcrel), + } + + _RELOCATION_RECIPES_AARCH64 = { + ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS64']: _RELOCATION_RECIPE_TYPE( + bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS32']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + ENUM_RELOC_TYPE_AARCH64['R_AARCH64_PREL32']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=True, + calc_func=_reloc_calc_sym_plus_addend_pcrel), } # https://dmz-portal.mips.com/wiki/MIPS_relocation_types diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index 20e90562..9a97a09f 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -74,6 +74,10 @@ def data(self): Note that data is decompressed if the stored section data is compressed. """ + # If this section is NOBITS, there is no data. provide a dummy answer + if self.header['sh_type'] == 'SHT_NOBITS': + return b'\0'*self.data_size + # If this section is compressed, deflate it if self.compressed: c_type = self._compression_type @@ -137,7 +141,27 @@ def get_string(self, offset): """ table_offset = self['sh_offset'] s = parse_cstring_from_stream(self.stream, table_offset + offset) - return s.decode('utf-8') if s else '' + return s.decode('utf-8', errors='replace') if s else '' + + +class SymbolTableIndexSection(Section): + """ A section containing the section header table indices corresponding + to symbols in the linked symbol table. This section has to exist if the + symbol table contains an entry with a section header index set to + SHN_XINDEX (0xffff). The format of the section is described at + https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html + """ + def __init__(self, header, name, elffile, symboltable): + super(SymbolTableIndexSection, self).__init__(header, name, elffile) + self.symboltable = symboltable + + def get_section_index(self, n): + """ Get the section header table index for the symbol with index #n. + The section contains an array of Elf32_word values with one entry + for every symbol in the associated symbol table. + """ + return struct_parse(self.elffile.structs.Elf_word(''), self.stream, + self['sh_offset'] + n * self['sh_entsize']) class SymbolTableSection(Section): @@ -267,7 +291,7 @@ def iter_stabs(self): while offset < end: stabs = struct_parse( self.structs.Elf_Stabs, - self.elffile.stream, + self.stream, stream_pos=offset) stabs['n_offset'] = offset offset += self.structs.Elf_Stabs.sizeof() @@ -287,14 +311,12 @@ def __init__(self, structs, stream): if self.tag != 'TAG_FILE': self.extra = [] - s_number = struct_parse(self.structs.Elf_uleb128('s_number'), - self.stream - ) + s_number = struct_parse(structs.Elf_uleb128('s_number'), stream) while s_number != 0: self.extra.append(s_number) - s_number = struct_parse(self.structs.Elf_uleb128('s_number'), - self.stream + s_number = struct_parse(structs.Elf_uleb128('s_number'), + stream ) elif self.tag in ('TAG_CPU_RAW_NAME', 'TAG_CPU_NAME', 'TAG_CONFORMANCE'): @@ -313,7 +335,7 @@ def __init__(self, structs, stream): if type(self.value.value) is not str: nul = struct_parse(structs.Elf_byte('nul'), stream) - elf_assert(null_byte == 0, + elf_assert(nul == 0, "Invalid terminating byte %r, expecting NUL." % nul) else: @@ -324,7 +346,7 @@ def tag(self): return self._tag['tag'] def __repr__(self): - s = '' % (self.tag, self.value) + s = '' % (self.tag, self.value) s += ' %s' % self.extra if self.extra is not None else '' return s @@ -372,7 +394,7 @@ def _make_attributes(self): yield ARMAttribute(self.structs, self.stream) def __repr__(self): - s = "" + s = "" return s % (self.header.tag[4:], self.header.value) diff --git a/elftools/elf/segments.py b/elftools/elf/segments.py index 16560bcd..0c318e17 100644 --- a/elftools/elf/segments.py +++ b/elftools/elf/segments.py @@ -40,15 +40,23 @@ def section_in_segment(self, section): sectype = section['sh_type'] secflags = section['sh_flags'] - # Only PT_LOAD, PT_GNU_RELR0 and PT_TLS segments can contain SHF_TLS + # Only PT_LOAD, PT_GNU_RELRO and PT_TLS segments can contain SHF_TLS # sections if ( secflags & SH_FLAGS.SHF_TLS and - segtype in ('PT_TLS', 'PT_GNU_RELR0', 'PT_LOAD')): - return False + segtype in ('PT_TLS', 'PT_GNU_RELRO', 'PT_LOAD')): + pass # PT_TLS segment contains only SHF_TLS sections, PT_PHDR no sections # at all - elif ( (secflags & SH_FLAGS.SHF_TLS) != 0 and + elif ( (secflags & SH_FLAGS.SHF_TLS) == 0 and segtype not in ('PT_TLS', 'PT_PHDR')): + pass + else: + return False + + # PT_LOAD and similar segments only have SHF_ALLOC sections. + if ( (secflags & SH_FLAGS.SHF_ALLOC) == 0 and + segtype in ('PT_LOAD', 'PT_DYNAMIC', 'PT_GNU_EH_FRAME', + 'PT_GNU_RELRO', 'PT_GNU_STACK')): return False # In ELF_SECTION_IN_SEGMENT_STRICT the flag check_vma is on, so if diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index 660f6872..b9203c6b 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -43,6 +43,17 @@ def __init__(self, little_endian=True, elfclass=32): assert elfclass == 32 or elfclass == 64 self.little_endian = little_endian self.elfclass = elfclass + self.e_type = None + self.e_machine = None + self.e_ident_osabi = None + + def __getstate__(self): + return self.little_endian, self.elfclass, self.e_type, self.e_machine, self.e_ident_osabi + + def __setstate__(self, state): + self.little_endian, self.elfclass, e_type, e_machine, e_osabi = state + self.create_basic_structs() + self.create_advanced_structs(e_type, e_machine, e_osabi) def create_basic_structs(self): """ Create word-size related structs and ehdr struct needed for @@ -76,12 +87,16 @@ def create_advanced_structs(self, e_type=None, e_machine=None, e_ident_osabi=Non """ Create all ELF structs except the ehdr. They may possibly depend on provided e_type and/or e_machine parsed from ehdr. """ - self._create_phdr(e_machine) - self._create_shdr(e_machine) + self.e_type = e_type + self.e_machine = e_machine + self.e_ident_osabi = e_ident_osabi + + self._create_phdr() + self._create_shdr() self._create_chdr() self._create_sym() self._create_rel() - self._create_dyn(e_machine, e_ident_osabi) + self._create_dyn() self._create_sunw_syminfo() self._create_gnu_verneed() self._create_gnu_verdef() @@ -90,6 +105,8 @@ def create_advanced_structs(self, e_type=None, e_machine=None, e_ident_osabi=Non self._create_note(e_type) self._create_stabs() self._create_arm_attributes() + self._create_elf_hash() + self._create_gnu_hash() #-------------------------------- PRIVATE --------------------------------# @@ -125,13 +142,13 @@ def _create_leb128(self): def _create_ntbs(self): self.Elf_ntbs = CString - def _create_phdr(self, e_machine=None): + def _create_phdr(self): p_type_dict = ENUM_P_TYPE_BASE - if e_machine == 'EM_ARM': + if self.e_machine == 'EM_ARM': p_type_dict = ENUM_P_TYPE_ARM - elif e_machine == 'EM_AARCH64': + elif self.e_machine == 'EM_AARCH64': p_type_dict = ENUM_P_TYPE_AARCH64 - elif e_machine == 'EM_MIPS': + elif self.e_machine == 'EM_MIPS': p_type_dict = ENUM_P_TYPE_MIPS if self.elfclass == 32: @@ -157,17 +174,17 @@ def _create_phdr(self, e_machine=None): self.Elf_xword('p_align'), ) - def _create_shdr(self, e_machine=None): + def _create_shdr(self): """Section header parsing. Depends on e_machine because of machine-specific values in sh_type. """ sh_type_dict = ENUM_SH_TYPE_BASE - if e_machine == 'EM_ARM': + if self.e_machine == 'EM_ARM': sh_type_dict = ENUM_SH_TYPE_ARM - elif e_machine == 'EM_X86_64': + elif self.e_machine == 'EM_X86_64': sh_type_dict = ENUM_SH_TYPE_AMD64 - elif e_machine == 'EM_MIPS': + elif self.e_machine == 'EM_MIPS': sh_type_dict = ENUM_SH_TYPE_MIPS self.Elf_Shdr = Struct('Elf_Shdr', @@ -198,38 +215,63 @@ def _create_chdr(self): self.Elf_Chdr = Struct('Elf_Chdr', *fields) def _create_rel(self): - # r_info is also taken apart into r_info_sym and r_info_type. - # This is done in Value to avoid endianity issues while parsing. + # r_info is also taken apart into r_info_sym and r_info_type. This is + # done in Value to avoid endianity issues while parsing. if self.elfclass == 32: - r_info_sym = Value('r_info_sym', - lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF) - r_info_type = Value('r_info_type', - lambda ctx: ctx['r_info'] & 0xFF) - else: # 64 - r_info_sym = Value('r_info_sym', - lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF) - r_info_type = Value('r_info_type', - lambda ctx: ctx['r_info'] & 0xFFFFFFFF) + fields = [self.Elf_xword('r_info'), + Value('r_info_sym', + lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF), + Value('r_info_type', + lambda ctx: ctx['r_info'] & 0xFF)] + elif self.e_machine == 'EM_MIPS': # ELF64 MIPS + fields = [ + # The MIPS ELF64 specification + # (https://www.linux-mips.org/pub/linux/mips/doc/ABI/elf64-2.4.pdf) + # provides a non-standard relocation structure definition. + self.Elf_word('r_sym'), + self.Elf_byte('r_ssym'), + self.Elf_byte('r_type3'), + self.Elf_byte('r_type2'), + self.Elf_byte('r_type'), + + # Synthetize usual fields for compatibility with other + # architectures. This allows relocation consumers (including + # our readelf tests) to work without worrying about MIPS64 + # oddities. + Value('r_info_sym', lambda ctx: ctx['r_sym']), + Value('r_info_ssym', lambda ctx: ctx['r_ssym']), + Value('r_info_type', lambda ctx: ctx['r_type']), + Value('r_info_type2', lambda ctx: ctx['r_type2']), + Value('r_info_type3', lambda ctx: ctx['r_type3']), + Value('r_info', + lambda ctx: (ctx['r_sym'] << 32) + | (ctx['r_ssym'] << 24) + | (ctx['r_type3'] << 16) + | (ctx['r_type2'] << 8) + | ctx['r_type']), + ] + else: # Other 64 ELFs + fields = [self.Elf_xword('r_info'), + Value('r_info_sym', + lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF), + Value('r_info_type', + lambda ctx: ctx['r_info'] & 0xFFFFFFFF)] self.Elf_Rel = Struct('Elf_Rel', - self.Elf_addr('r_offset'), - self.Elf_xword('r_info'), - r_info_sym, - r_info_type, - ) + self.Elf_addr('r_offset'), + *fields) + + fields_and_addend = fields + [self.Elf_sxword('r_addend')] self.Elf_Rela = Struct('Elf_Rela', - self.Elf_addr('r_offset'), - self.Elf_xword('r_info'), - r_info_sym, - r_info_type, - self.Elf_sxword('r_addend'), + self.Elf_addr('r_offset'), + *fields_and_addend ) - def _create_dyn(self, e_machine=None, e_ident_osabi=None): + def _create_dyn(self): d_tag_dict = dict(ENUM_D_TAG_COMMON) - if e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE: - d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[e_machine]) - elif e_ident_osabi == 'ELFOSABI_SOLARIS': + if self.e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE: + d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[self.e_machine]) + elif self.e_ident_osabi == 'ELFOSABI_SOLARIS': d_tag_dict.update(ENUM_D_TAG_SOLARIS) self.Elf_Dyn = Struct('Elf_Dyn', @@ -372,6 +414,21 @@ def _create_note(self, e_type=None): String('pr_psargs', 80), ) + # A PT_NOTE of type NT_FILE matching the definition in + # https://chromium.googlesource.com/ + # native_client/nacl-binutils/+/upstream/master/binutils/readelf.c + # Line 15121 + self.Elf_Nt_File = Struct('Elf_Nt_File', + self.Elf_xword("num_map_entries"), + self.Elf_xword("page_size"), + Array(lambda ctx: ctx.num_map_entries, + Struct('Elf_Nt_File_Entry', + self.Elf_addr('vm_start'), + self.Elf_addr('vm_end'), + self.Elf_offset('page_offset'))), + Array(lambda ctx: ctx.num_map_entries, + CString('filename'))) + def _create_stabs(self): # Structure of one stabs entry, see binutils/bfd/stabs.c # Names taken from https://sourceware.org/gdb/current/onlinedocs/stabs.html#Overview @@ -398,3 +455,29 @@ def _create_arm_attributes(self): Enum(self.Elf_uleb128('tag'), **ENUM_ATTR_TAG_ARM) ) + + def _create_elf_hash(self): + # Structure of the old SYSV-style hash table header. It is documented + # in the Oracle "Linker and Libraries Guide", Part IV ELF Application + # Binary Interface, Chapter 14 Object File Format, Section Hash Table + # Section: + # https://docs.oracle.com/cd/E53394_01/html/E54813/chapter6-48031.html + + self.Elf_Hash = Struct('Elf_Hash', + self.Elf_word('nbuckets'), + self.Elf_word('nchains'), + Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')), + Array(lambda ctx: ctx['nchains'], self.Elf_word('chains'))) + + def _create_gnu_hash(self): + # Structure of the GNU-style hash table header. Documentation for this + # table is mostly in the GLIBC source code, a good explanation of the + # format can be found in this blog post: + # https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ + self.Gnu_Hash = Struct('Gnu_Hash', + self.Elf_word('nbuckets'), + self.Elf_word('symoffset'), + self.Elf_word('bloom_size'), + self.Elf_word('bloom_shift'), + Array(lambda ctx: ctx['bloom_size'], self.Elf_xword('bloom')), + Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets'))) diff --git a/examples/dwarf_decode_address.py b/examples/dwarf_decode_address.py index 7b7d3e01..047ce3b5 100644 --- a/examples/dwarf_decode_address.py +++ b/examples/dwarf_decode_address.py @@ -83,7 +83,11 @@ def decode_file_line(dwarfinfo, address): prevstate = None for entry in lineprog.get_entries(): # We're interested in those entries where a new state is assigned - if entry.state is None or entry.state.end_sequence: + if entry.state is None: + continue + if entry.state.end_sequence: + # if the line number sequence ends, clear prevstate. + prevstate = None continue # Looking for a range of addresses in two consecutive states that # contain the required address. diff --git a/examples/dwarf_lineprogram_filenames.py b/examples/dwarf_lineprogram_filenames.py new file mode 100644 index 00000000..2dd0e704 --- /dev/null +++ b/examples/dwarf_lineprogram_filenames.py @@ -0,0 +1,95 @@ +#------------------------------------------------------------------------------- +# elftools example: dwarf_lineprogram_filenames.py +# +# In the .debug_line section, the Dwarf line program generates a matrix +# of address-source references. This example demonstrates accessing the state +# of each line program entry to retrieve the underlying filenames. +# +# William Woodruff (william@yossarian.net) +# This code is in the public domain +#------------------------------------------------------------------------------- +from __future__ import print_function +from collections import defaultdict +import os +import sys + +# If pyelftools is not installed, the example can also run from the root or +# examples/ dir of the source distribution. +sys.path[0:0] = ['.', '..'] + +from elftools.elf.elffile import ELFFile + + +def process_file(filename): + print('Processing file:', filename) + with open(filename, 'rb') as f: + elffile = ELFFile(f) + + if not elffile.has_dwarf_info(): + print(' file has no DWARF info') + return + + dwarfinfo = elffile.get_dwarf_info() + for CU in dwarfinfo.iter_CUs(): + print(' Found a compile unit at offset %s, length %s' % ( + CU.cu_offset, CU['unit_length'])) + + # Every compilation unit in the DWARF information may or may not + # have a corresponding line program in .debug_line. + line_program = dwarfinfo.line_program_for_CU(CU) + if line_program is None: + print(' DWARF info is missing a line program for this CU') + continue + + # Print a reverse mapping of filename -> #entries + line_entry_mapping(line_program) + + +def line_entry_mapping(line_program): + filename_map = defaultdict(int) + + # The line program, when decoded, returns a list of line program + # entries. Each entry contains a state, which we'll use to build + # a reverse mapping of filename -> #entries. + lp_entries = line_program.get_entries() + for lpe in lp_entries: + # We skip LPEs that don't have an associated file. + # This can happen if instructions in the compiled binary + # don't correspond directly to any original source file. + if not lpe.state or lpe.state.file == 0: + continue + filename = lpe_filename(line_program, lpe.state.file) + filename_map[filename] += 1 + + for filename, lpe_count in filename_map.items(): + print(" filename=%s -> %d entries" % (filename, lpe_count)) + + +def lpe_filename(line_program, file_index): + # Retrieving the filename associated with a line program entry + # involves two levels of indirection: we take the file index from + # the LPE to grab the file_entry from the line program header, + # then take the directory index from the file_entry to grab the + # directory name from the line program header. Finally, we + # join the (base) filename from the file_entry to the directory + # name to get the absolute filename. + lp_header = line_program.header + file_entries = lp_header["file_entry"] + + # File and directory indices are 1-indexed. + file_entry = file_entries[file_index - 1] + dir_index = file_entry["dir_index"] + + # A dir_index of 0 indicates that no absolute directory was recorded during + # compilation; return just the basename. + if dir_index == 0: + return file_entry.name.decode() + + directory = lp_header["include_directory"][dir_index - 1] + return os.path.join(directory, file_entry.name).decode() + + +if __name__ == '__main__': + if sys.argv[1] == '--test': + for filename in sys.argv[2:]: + process_file(filename) diff --git a/examples/dwarf_location_lists.py b/examples/dwarf_location_info.py similarity index 56% rename from examples/dwarf_location_lists.py rename to examples/dwarf_location_info.py index 06401d34..0ec9933f 100644 --- a/examples/dwarf_location_lists.py +++ b/examples/dwarf_location_info.py @@ -1,8 +1,20 @@ #------------------------------------------------------------------------------- -# elftools example: dwarf_location_lists.py +# elftools example: dwarf_location_info.py # -# Examine DIE entries which have location list values, and decode these -# location lists. +# Examine DIE entries which have either location list values or location +# expression values and decode that information. +# +# Location information can either be completely contained within a DIE +# (using 'DW_FORM_exprloc' in DWARFv4 or 'DW_FORM_block1' in earlier +# versions) or be a reference to a location list contained within +# the .debug_loc section (using 'DW_FORM_sec_offset' in DWARFv4 or +# 'DW_FORM_data4' / 'DW_FORM_data8' in earlier versions). +# +# The LocationParser object parses the DIE attributes and handles both +# formats. +# +# The directory 'test/testfiles_for_location_info' contains test files with +# location information represented in both DWARFv4 and DWARFv2 forms. # # Eli Bendersky (eliben@gmail.com) # This code is in the public domain @@ -14,13 +26,12 @@ # examples/ dir of the source distribution. sys.path[0:0] = ['.', '..'] - from elftools.common.py3compat import itervalues from elftools.elf.elffile import ELFFile from elftools.dwarf.descriptions import ( describe_DWARF_expr, set_global_machine_arch) -from elftools.dwarf.locationlists import LocationEntry - +from elftools.dwarf.locationlists import ( + LocationEntry, LocationExpr, LocationParser) def process_file(filename): print('Processing file:', filename) @@ -43,6 +54,10 @@ def process_file(filename): # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) + # Create a LocationParser object that parses the DIE attributes and + # creates objects representing the actual location information. + loc_parser = LocationParser(location_lists) + for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some @@ -58,20 +73,25 @@ def process_file(filename): # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): - if attribute_has_location_list(attr): - # This is a location list. Its value is an offset into - # the .debug_loc section, so we can use the location - # lists object to decode it. - loclist = location_lists.get_location_list_at_offset( - attr.value) - - print(' DIE %s. attr %s.\n%s' % ( - DIE.tag, - attr.name, - show_loclist(loclist, dwarfinfo, indent=' '))) - - -def show_loclist(loclist, dwarfinfo, indent): + # Check if this attribute contains location information + if loc_parser.attribute_has_location(attr, CU['version']): + print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) + loc = loc_parser.parse_from_attribute(attr, + CU['version']) + # We either get a list (in case the attribute is a + # reference to the .debug_loc section) or a LocationExpr + # object (in case the attribute itself contains location + # information). + if isinstance(loc, LocationExpr): + print(' %s' % ( + describe_DWARF_expr(loc.loc_expr, + dwarfinfo.structs, CU.cu_offset))) + elif isinstance(loc, list): + print(show_loclist(loc, + dwarfinfo, + ' ', CU.cu_offset)) + +def show_loclist(loclist, dwarfinfo, indent, cu_offset): """ Display a location list nicely, decoding the DWARF expressions contained within. """ @@ -80,26 +100,11 @@ def show_loclist(loclist, dwarfinfo, indent): if isinstance(loc_entity, LocationEntry): d.append('%s <<%s>>' % ( loc_entity, - describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs))) + describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs, cu_offset))) else: d.append(str(loc_entity)) return '\n'.join(indent + s for s in d) - -def attribute_has_location_list(attr): - """ Only some attributes can have location list values, if they have the - required DW_FORM (loclistptr "class" in DWARF spec v3) - """ - if (attr.name in ( 'DW_AT_location', 'DW_AT_string_length', - 'DW_AT_const_value', 'DW_AT_return_addr', - 'DW_AT_data_member_location', 'DW_AT_frame_base', - 'DW_AT_segment', 'DW_AT_static_link', - 'DW_AT_use_location', 'DW_AT_vtable_elem_location')): - if attr.form in ('DW_FORM_data4', 'DW_FORM_data8'): - return True - return False - - if __name__ == '__main__': if sys.argv[1] == '--test': for filename in sys.argv[2:]: diff --git a/examples/dwarf_pubnames_types.py b/examples/dwarf_pubnames_types.py new file mode 100644 index 00000000..d9daaff4 --- /dev/null +++ b/examples/dwarf_pubnames_types.py @@ -0,0 +1,116 @@ +#------------------------------------------------------------------------------- +# elftools example: dwarf_pubnames_types.py +# +# Dump the contents of .debug_pubnames and .debug_pubtypes sections from the +# ELF file. +# +# Note: sample_exe64.elf doesn't have a .debug_pubtypes section. +# +# Vijay Ramasami (rvijayc@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from __future__ import print_function +import sys + +# If pyelftools is not installed, the example can also run from the root or +# examples/ dir of the source distribution. +sys.path[0:0] = ['.', '..'] + +from elftools.elf.elffile import ELFFile +from elftools.common.py3compat import bytes2str + +def process_file(filename): + print('Processing file:', filename) + with open(filename, 'rb') as f: + elffile = ELFFile(f) + + if not elffile.has_dwarf_info(): + print(' file has no DWARF info') + return + + # get_dwarf_info returns a DWARFInfo context object, which is the + # starting point for all DWARF-based processing in pyelftools. + dwarfinfo = elffile.get_dwarf_info() + + # get .debug_pubtypes section. + pubnames = dwarfinfo.get_pubnames() + if pubnames is None: + print('ERROR: No .debug_pubnames section found in ELF.') + else: + print('%d entries found in .debug_pubnames' % len(pubnames)) + + print('Trying pubnames example ...') + for name, entry in pubnames.items(): + print('%s: cu_ofs = %d, die_ofs = %d' % + (name, entry.cu_ofs, entry.die_ofs)) + + # get the actual CU/DIE that has this information. + print('Fetching the actual die for %s ...' % name) + for cu in dwarfinfo.iter_CUs(): + if cu.cu_offset == entry.cu_ofs: + for die in cu.iter_DIEs(): + if die.offset == entry.die_ofs: + print('Die Name: %s' % + bytes2str(die.attributes['DW_AT_name'].value)) + + # dump all entries in .debug_pubnames section. + print('Dumping .debug_pubnames table ...') + print('-' * 66) + print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS')) + print('-' * 66) + for (name, entry) in pubnames.items(): + print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs)) + print('-' * 66) + + # get .debug_pubtypes section. + pubtypes = dwarfinfo.get_pubtypes() + if pubtypes is None: + print('ERROR: No .debug_pubtypes section found in ELF') + else: + print('%d entries found in .debug_pubtypes' % len(pubtypes)) + + for name, entry in pubtypes.items(): + print('%s: cu_ofs = %d, die_ofs = %d' % + (name, entry.cu_ofs, entry.die_ofs)) + + # get the actual CU/DIE that has this information. + print('Fetching the actual die for %s ...' % name) + for cu in dwarfinfo.iter_CUs(): + if cu.cu_offset == entry.cu_ofs: + for die in cu.iter_DIEs(): + if die.offset == entry.die_ofs: + print('Die Name: %s' % + bytes2str(die.attributes['DW_AT_name'].value)) + die_info_rec(die) + + # dump all entries in .debug_pubtypes section. + print('Dumping .debug_pubtypes table ...') + print('-' * 66) + print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS')) + print('-' * 66) + for (name, entry) in pubtypes.items(): + print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs)) + print('-' * 66) + + +def die_info_rec(die, indent_level=' '): + """ A recursive function for showing information about a DIE and its + children. + """ + print(indent_level + 'DIE tag=%s, attrs=' % die.tag) + for name, val in die.attributes.items(): + print(indent_level + ' %s = %s' % (name, val)) + child_indent = indent_level + ' ' + for child in die.iter_children(): + die_info_rec(child, child_indent) + + +if __name__ == '__main__': + if sys.argv[1] == '--test': + process_file(sys.argv[2]) + sys.exit(0) + + if len(sys.argv) < 2: + print('Expected usage: {0} '.format(sys.argv[0])) + sys.exit(1) + process_file(sys.argv[1]) diff --git a/examples/reference_output/dwarf_lineprogram_filenames.out b/examples/reference_output/dwarf_lineprogram_filenames.out new file mode 100644 index 00000000..b20bbdda --- /dev/null +++ b/examples/reference_output/dwarf_lineprogram_filenames.out @@ -0,0 +1,8 @@ +Processing file: ./examples/sample_exe64.elf + Found a compile unit at offset 0, length 115 + filename=../sysdeps/x86_64/elf/start.S -> 13 entries + Found a compile unit at offset 119, length 135 + Found a compile unit at offset 258, length 156 + filename=z.c -> 5 entries + Found a compile unit at offset 418, length 300 + filename=elf-init.c -> 15 entries diff --git a/examples/reference_output/dwarf_location_info.out b/examples/reference_output/dwarf_location_info.out new file mode 100644 index 00000000..01c8933b --- /dev/null +++ b/examples/reference_output/dwarf_location_info.out @@ -0,0 +1,33 @@ +Processing file: ./examples/sample_exe64.elf + Found a compile unit at offset 0, length 115 + Found a compile unit at offset 119, length 135 + DIE DW_TAG_variable. attr DW_AT_location. + (DW_OP_addr: 400608) + Found a compile unit at offset 258, length 156 + DIE DW_TAG_subprogram. attr DW_AT_frame_base. + LocationEntry(entry_offset=0, begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> + LocationEntry(entry_offset=20, begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>> + LocationEntry(entry_offset=40, begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + (DW_OP_fbreg: -20) + DIE DW_TAG_formal_parameter. attr DW_AT_location. + (DW_OP_fbreg: -32) + DIE DW_TAG_variable. attr DW_AT_location. + (DW_OP_addr: 601018) + Found a compile unit at offset 418, length 300 + DIE DW_TAG_subprogram. attr DW_AT_frame_base. + (DW_OP_breg7 (rsp): 8) + DIE DW_TAG_subprogram. attr DW_AT_frame_base. + LocationEntry(entry_offset=76, begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> + LocationEntry(entry_offset=96, begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + LocationEntry(entry_offset=133, begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>> + LocationEntry(entry_offset=152, begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + LocationEntry(entry_offset=187, begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>> + LocationEntry(entry_offset=206, begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + LocationEntry(entry_offset=241, begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>> + LocationEntry(entry_offset=260, begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>> + DIE DW_TAG_variable. attr DW_AT_location. + LocationEntry(entry_offset=295, begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>> diff --git a/examples/reference_output/dwarf_location_lists.out b/examples/reference_output/dwarf_location_lists.out deleted file mode 100644 index 8788755c..00000000 --- a/examples/reference_output/dwarf_location_lists.out +++ /dev/null @@ -1,23 +0,0 @@ -Processing file: ./examples/sample_exe64.elf - Found a compile unit at offset 0, length 115 - Found a compile unit at offset 119, length 135 - Found a compile unit at offset 258, length 156 - DIE DW_TAG_subprogram. attr DW_AT_frame_base. - LocationEntry(begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> - LocationEntry(begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>> - LocationEntry(begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>> - Found a compile unit at offset 418, length 300 - DIE DW_TAG_subprogram. attr DW_AT_frame_base. - LocationEntry(begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> - LocationEntry(begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>> - DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>> - LocationEntry(begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>> - DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>> - LocationEntry(begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>> - DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>> - LocationEntry(begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>> - DIE DW_TAG_variable. attr DW_AT_location. - LocationEntry(begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>> diff --git a/examples/reference_output/dwarf_pubnames_types.out b/examples/reference_output/dwarf_pubnames_types.out new file mode 100644 index 00000000..b8f4040d --- /dev/null +++ b/examples/reference_output/dwarf_pubnames_types.out @@ -0,0 +1,29 @@ +Processing file: ./examples/sample_exe64.elf +5 entries found in .debug_pubnames +Trying pubnames example ... +_IO_stdin_used: cu_ofs = 119, die_ofs = 230 +Fetching the actual die for _IO_stdin_used ... +Die Name: _IO_stdin_used +main: cu_ofs = 258, die_ofs = 303 +Fetching the actual die for main ... +Die Name: main +glob: cu_ofs = 258, die_ofs = 395 +Fetching the actual die for glob ... +Die Name: glob +__libc_csu_fini: cu_ofs = 418, die_ofs = 495 +Fetching the actual die for __libc_csu_fini ... +Die Name: __libc_csu_fini +__libc_csu_init: cu_ofs = 418, die_ofs = 523 +Fetching the actual die for __libc_csu_init ... +Die Name: __libc_csu_init +Dumping .debug_pubnames table ... +------------------------------------------------------------------ + Symbol CU_OFS DIE_OFS +------------------------------------------------------------------ + _IO_stdin_used 119 230 + main 258 303 + glob 258 395 + __libc_csu_fini 418 495 + __libc_csu_init 418 523 +------------------------------------------------------------------ +ERROR: No .debug_pubtypes section found in ELF diff --git a/scripts/readelf.py b/scripts/readelf.py index 8535119f..c298aa28 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -10,6 +10,14 @@ import argparse import os, sys import string +import traceback +import itertools +# Note: zip has different behaviour between Python 2.x and 3.x. +# - Using izip ensures compatibility. +try: + from itertools import izip +except: + izip = zip # For running from development directory. It should take precedence over the # installed pyelftools. @@ -24,7 +32,9 @@ from elftools.elf.dynamic import DynamicSection, DynamicSegment from elftools.elf.enums import ENUM_D_TAG from elftools.elf.segments import InterpSegment -from elftools.elf.sections import NoteSection, SymbolTableSection +from elftools.elf.sections import ( + NoteSection, SymbolTableSection, SymbolTableIndexSection +) from elftools.elf.gnuversions import ( GNUVerSymSection, GNUVerDefSection, GNUVerNeedSection, @@ -34,22 +44,27 @@ describe_ei_class, describe_ei_data, describe_ei_version, describe_ei_osabi, describe_e_type, describe_e_machine, describe_e_version_numeric, describe_p_type, describe_p_flags, - describe_sh_type, describe_sh_flags, + describe_rh_flags, describe_sh_type, describe_sh_flags, describe_symbol_type, describe_symbol_bind, describe_symbol_visibility, describe_symbol_shndx, describe_reloc_type, describe_dyn_tag, - describe_ver_flags, describe_note, describe_attr_tag_arm + describe_dt_flags, describe_dt_flags_1, describe_ver_flags, describe_note, + describe_attr_tag_arm ) from elftools.elf.constants import E_FLAGS from elftools.elf.constants import E_FLAGS_MASKS +from elftools.elf.constants import SH_FLAGS +from elftools.elf.constants import SHN_INDICES from elftools.dwarf.dwarfinfo import DWARFInfo from elftools.dwarf.descriptions import ( describe_reg_name, describe_attr_value, set_global_machine_arch, describe_CFI_instructions, describe_CFI_register_rule, - describe_CFI_CFA_rule, + describe_CFI_CFA_rule, describe_DWARF_expr ) from elftools.dwarf.constants import ( DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file) +from elftools.dwarf.locationlists import LocationParser, LocationEntry from elftools.dwarf.callframe import CIE, FDE, ZERO +from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry class ReadElf(object): @@ -70,6 +85,8 @@ def __init__(self, file, output): self._versioninfo = None + self._shndx_sections = None + def display_file_header(self): """ Display the ELF file header """ @@ -115,10 +132,18 @@ def display_file_header(self): header['e_phnum']) self._emitline(' Size of section headers: %s (bytes)' % header['e_shentsize']) - self._emitline(' Number of section headers: %s' % + self._emit(' Number of section headers: %s' % header['e_shnum']) - self._emitline(' Section header string table index: %s' % + if header['e_shnum'] == 0 and self.elffile.num_sections() != 0: + self._emitline(' (%d)' % self.elffile.num_sections()) + else: + self._emitline('') + self._emit(' Section header string table index: %s' % header['e_shstrndx']) + if header['e_shstrndx'] == SHN_INDICES.SHN_XINDEX: + self._emitline(' (%d)' % self.elffile.get_shstrndx()) + else: + self._emitline('') def decode_flags(self, flags): description = "" @@ -207,7 +232,7 @@ def display_program_headers(self, show_heading=True): # readelf weirness - why isn't e_phoff printed as hex? (for section # headers, it is...) self._emitline('There are %s program headers, starting at offset %s' % ( - elfheader['e_phnum'], elfheader['e_phoff'])) + self.elffile.num_segments(), elfheader['e_phoff'])) self._emitline() self._emitline('Program Headers:') @@ -269,6 +294,9 @@ def display_program_headers(self, show_heading=True): for section in self.elffile.iter_sections(): if ( not section.is_null() and + not ((section['sh_flags'] & SH_FLAGS.SHF_TLS) != 0 and + section['sh_type'] == 'SHT_NOBITS' and + segment['p_type'] != 'PT_TLS') and segment.section_in_segment(section)): self._emit('%s ' % section.name) @@ -287,7 +315,7 @@ def display_section_headers(self, show_heading=True): return self._emitline('\nSection Header%s:' % ( - 's' if elfheader['e_shnum'] > 1 else '')) + 's' if self.elffile.num_sections() > 1 else '')) # Different formatting constraints of 32-bit and 64-bit addresses # @@ -342,7 +370,7 @@ def display_symbol_tables(self): """ self._init_versioninfo() - symbol_tables = [s for s in self.elffile.iter_sections() + symbol_tables = [(idx, s) for idx, s in enumerate(self.elffile.iter_sections()) if isinstance(s, SymbolTableSection)] if not symbol_tables and self.elffile.num_sections() == 0: @@ -350,7 +378,7 @@ def display_symbol_tables(self): self._emitline('Dynamic symbol information is not available for' ' displaying symbols.') - for section in symbol_tables: + for section_index, section in symbol_tables: if not isinstance(section, SymbolTableSection): continue @@ -387,15 +415,17 @@ def display_symbol_tables(self): version_info = '@@%(name)s' % version # symbol names are truncated to 25 chars, similarly to readelf - self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s%s' % ( + self._emitline('%6d: %s %s %-7s %-6s %-7s %4s %.25s%s' % ( nsym, self._format_hex( symbol['st_value'], fullhex=True, lead0x=False), - symbol['st_size'], + "%5d" % symbol['st_size'] if symbol['st_size'] < 100000 else hex(symbol['st_size']), describe_symbol_type(symbol['st_info']['type']), describe_symbol_bind(symbol['st_info']['bind']), describe_symbol_visibility(symbol['st_other']['visibility']), - describe_symbol_shndx(symbol['st_shndx']), + describe_symbol_shndx(self._get_symbol_shndx(symbol, + nsym, + section_index)), symbol.name, version_info)) @@ -425,6 +455,10 @@ def display_dynamic_tags(self): parsed = 'Library soname: [%s]' % tag.soname elif tag.entry.d_tag.endswith(('SZ', 'ENT')): parsed = '%i (bytes)' % tag['d_val'] + elif tag.entry.d_tag == 'DT_FLAGS': + parsed = describe_dt_flags(tag.entry.d_val) + elif tag.entry.d_tag == 'DT_FLAGS_1': + parsed = 'Flags: %s' % describe_dt_flags_1(tag.entry.d_val) elif tag.entry.d_tag.endswith(('NUM', 'COUNT')): parsed = '%i' % tag['d_val'] elif tag.entry.d_tag == 'DT_PLTREL': @@ -432,6 +466,11 @@ def display_dynamic_tags(self): if s.startswith('DT_'): s = s[3:] parsed = '%s' % s + elif tag.entry.d_tag == 'DT_MIPS_FLAGS': + parsed = describe_rh_flags(tag.entry.d_val) + elif tag.entry.d_tag in ('DT_MIPS_SYMTABNO', + 'DT_MIPS_LOCAL_GOTNO'): + parsed = str(tag.entry.d_val) else: parsed = '%#x' % tag['d_val'] @@ -452,7 +491,7 @@ def display_notes(self): for note in section.iter_notes(): self._emitline("\nDisplaying notes found in: {}".format( section.name)) - self._emitline(' Owner Data size Description') + self._emitline(' Owner Data size Description') self._emitline(' %s %s\t%s' % ( note['n_name'].ljust(20), self._format_hex(note['n_descsz'], fieldsize=8), @@ -467,7 +506,7 @@ def display_relocations(self): continue has_relocation_sections = True - self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % ( + self._emitline("\nRelocation section '%.128s' at offset %s contains %s entries:" % ( section.name, self._format_hex(section['sh_offset']), section.num_relocations())) @@ -490,40 +529,94 @@ def display_relocations(self): rel['r_info_type'], self.elffile))) if rel['r_info_sym'] == 0: + if section.is_RELA(): + fieldsize = 8 if self.elffile.elfclass == 32 else 16 + addend = self._format_hex(rel['r_addend'], lead0x=False) + self._emit(' %s %s' % (' ' * fieldsize, addend)) self._emitline() - continue - symbol = symtable.get_symbol(rel['r_info_sym']) - # Some symbols have zero 'st_name', so instead what's used is - # the name of the section they point at. Truncate symbol names - # (excluding version info) to 22 chars, similarly to readelf. - if symbol['st_name'] == 0: - symsec = self.elffile.get_section(symbol['st_shndx']) - symbol_name = symsec.name - version = '' else: - symbol_name = symbol.name - version = self._symbol_version(rel['r_info_sym']) - version = (version['name'] - if version and version['name'] else '') - symbol_name = '%.22s' % symbol_name - if version: - symbol_name += '@' + version - - self._emit(' %s %s' % ( - self._format_hex( - symbol['st_value'], - fullhex=True, lead0x=False), - symbol_name)) - if section.is_RELA(): - self._emit(' %s %x' % ( - '+' if rel['r_addend'] >= 0 else '-', - abs(rel['r_addend']))) - self._emitline() + symbol = symtable.get_symbol(rel['r_info_sym']) + # Some symbols have zero 'st_name', so instead what's used + # is the name of the section they point at. Truncate symbol + # names (excluding version info) to 22 chars, similarly to + # readelf. + if symbol['st_name'] == 0: + symsecidx = self._get_symbol_shndx(symbol, + rel['r_info_sym'], + section['sh_link']) + symsec = self.elffile.get_section(symsecidx) + symbol_name = symsec.name + version = '' + else: + symbol_name = symbol.name + version = self._symbol_version(rel['r_info_sym']) + version = (version['name'] + if version and version['name'] else '') + symbol_name = '%.22s' % symbol_name + if version: + symbol_name += '@' + version + + self._emit(' %s %s' % ( + self._format_hex( + symbol['st_value'], + fullhex=True, lead0x=False), + symbol_name)) + if section.is_RELA(): + self._emit(' %s %x' % ( + '+' if rel['r_addend'] >= 0 else '-', + abs(rel['r_addend']))) + self._emitline() + + # Emit the two additional relocation types for ELF64 MIPS + # binaries. + if (self.elffile.elfclass == 64 and + self.elffile['e_machine'] == 'EM_MIPS'): + for i in (2, 3): + rtype = rel['r_info_type%s' % i] + self._emit(' Type%s: %s' % ( + i, + describe_reloc_type(rtype, self.elffile))) + self._emitline() if not has_relocation_sections: self._emitline('\nThere are no relocations in this file.') + def display_arm_unwind(self): + if not self.elffile.has_ehabi_info(): + self._emitline('There are no .ARM.idx sections in this file.') + return + for ehabi_info in self.elffile.get_ehabi_infos(): + # Unwind section '.ARM.exidx' at offset 0x203e8 contains 1009 entries: + self._emitline("\nUnwind section '%s' at offset 0x%x contains %d entries" % ( + ehabi_info.section_name(), + ehabi_info.section_offset(), + ehabi_info.num_entry() + )) + + for i in range(ehabi_info.num_entry()): + entry = ehabi_info.get_entry(i) + self._emitline() + self._emitline("Entry %d:" % i) + if isinstance(entry, CorruptEHABIEntry): + self._emitline(" [corrupt] %s" % entry.reason) + continue + self._emit(" Function offset 0x%x: " % entry.function_offset) + if isinstance(entry, CannotUnwindEHABIEntry): + self._emitline("[cantunwind]") + continue + elif entry.eh_table_offset: + self._emitline("@0x%x" % entry.eh_table_offset) + else: + self._emitline("Compact (inline)") + if isinstance(entry, GenericEHABIEntry): + self._emitline(" Personality: 0x%x" % entry.personality) + else: + self._emitline(" Compact model index: %d" % entry.personality) + for mnemonic_item in entry.mnmemonic_array(): + self._emit(' ') + self._emitline(mnemonic_item) + def display_version_info(self): """ Display the version info contained in the file """ @@ -748,6 +841,10 @@ def display_debug_dump(self, dump_what): self._dump_debug_frames_interp() elif dump_what == 'aranges': self._dump_debug_aranges() + elif dump_what in { 'pubtypes', 'pubnames' }: + self._dump_debug_namelut(dump_what) + elif dump_what == 'loc': + self._dump_debug_locations() else: self._emitline('debug dump not yet supported for "%s"' % dump_what) @@ -861,7 +958,7 @@ def _symbol_version(self, nsym): if self._versioninfo['type'] == 'GNU': # In GNU versioning mode, the highest bit is used to - # store wether the symbol is hidden or not + # store whether the symbol is hidden or not if index & 0x8000: index &= ~0x8000 symbol_version['hidden'] = True @@ -894,6 +991,22 @@ def _section_from_spec(self, spec): # Not a number. Must be a name then return self.elffile.get_section_by_name(spec) + def _get_symbol_shndx(self, symbol, symbol_index, symtab_index): + """ Get the index into the section header table for the "symbol" + at "symbol_index" located in the symbol table with section index + "symtab_index". + """ + symbol_shndx = symbol['st_shndx'] + if symbol_shndx != SHN_INDICES.SHN_XINDEX: + return symbol_shndx + + # Check for or lazily construct index section mapping (symbol table + # index -> corresponding symbol table index section object) + if self._shndx_sections is None: + self._shndx_sections = {sec.symboltable: sec for sec in self.elffile.iter_sections() + if isinstance(sec, SymbolTableIndexSection)} + return self._shndx_sections[symtab_index].get_section_index(symbol_index) + def _note_relocs_for_section(self, section): """ If there are relocation sections pointing to the givne section, emit a note about it. @@ -946,7 +1059,10 @@ def _dump_debug_info(self): # correctly reflect the nesting depth # die_depth = 0 + current_function = None for die in cu.iter_DIEs(): + if die.tag == 'DW_TAG_subprogram': + current_function = die self._emitline(' <%s><%x>: Abbrev Number: %s%s' % ( die_depth, die.offset, @@ -961,11 +1077,19 @@ def _dump_debug_info(self): # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name - self._emitline(' <%x> %-18s: %s' % ( + + attr_desc = describe_attr_value(attr, die, section_offset) + + if 'DW_OP_fbreg' in attr_desc and current_function and not 'DW_AT_frame_base' in current_function.attributes: + postfix = ' [without dw_at_frame_base]' + else: + postfix = '' + + self._emitline(' <%x> %-18s: %s%s' % ( attr.offset, name, - describe_attr_value( - attr, die, section_offset))) + attr_desc, + postfix)) if die.has_children: die_depth += 1 @@ -1101,6 +1225,40 @@ def _dump_debug_frames(self): self._dwarfinfo.debug_frame_sec, self._dwarfinfo.CFI_entries()) + def _dump_debug_namelut(self, what): + """ + Dump the debug pubnames section. + """ + if what == 'pubnames': + namelut = self._dwarfinfo.get_pubnames() + section = self._dwarfinfo.debug_pubnames_sec + else: + namelut = self._dwarfinfo.get_pubtypes() + section = self._dwarfinfo.debug_pubtypes_sec + + # readelf prints nothing if the section is not present. + if namelut is None or len(namelut) == 0: + return + + self._emitline('Contents of the %s section:' % section.name) + self._emitline() + + cu_headers = namelut.get_cu_headers() + + # go over CU-by-CU first and item-by-item next. + for (cu_hdr, (cu_ofs, items)) in izip(cu_headers, itertools.groupby( + namelut.items(), key = lambda x: x[1].cu_ofs)): + + self._emitline(' Length: %d' % cu_hdr.unit_length) + self._emitline(' Version: %d' % cu_hdr.version) + self._emitline(' Offset into .debug_info section: 0x%x' % cu_hdr.debug_info_offset) + self._emitline(' Size of area in .debug_info section: %d' % cu_hdr.debug_info_length) + self._emitline() + self._emitline(' Offset Name') + for item in items: + self._emitline(' %x %s' % (item[1].die_ofs - cu_ofs, item[0])) + self._emitline() + def _dump_debug_aranges(self): """ Dump the aranges table """ @@ -1247,6 +1405,70 @@ def _dump_debug_frames_interp(self): self._dwarfinfo.debug_frame_sec, self._dwarfinfo.CFI_entries()) + def _dump_debug_locations(self): + """ Dump the location lists from .debug_location section + """ + def _get_cu_base(cu): + top_die = cu.get_top_DIE() + attr = top_die.attributes + if 'DW_AT_low_pc' in attr: + return attr['DW_AT_low_pc'].value + elif 'DW_AT_entry_pc' in attr: + return attr['DW_AT_entry_pc'].value + else: + raise ValueError("Can't find the base IP (low_pc) for a CU") + + di = self._dwarfinfo + loc_lists = di.location_lists() + if not loc_lists: # No locations section - readelf outputs nothing + return + + loc_lists = list(loc_lists.iter_location_lists()) + if len(loc_lists) == 0: + # Present but empty locations section - readelf outputs a message + self._emitline("\nSection '%s' has no debugging data." % di.debug_loc_sec.name) + return + + # To dump a location list, one needs to know the CU. + # Scroll through DIEs once, list the known location list offsets + cu_map = dict() # Loc list offset => CU + for cu in di.iter_CUs(): + for die in cu.iter_DIEs(): + for key in die.attributes: + attr = die.attributes[key] + if (LocationParser.attribute_has_location(attr, cu['version']) and + not LocationParser._attribute_has_loc_expr(attr, cu['version'])): + cu_map[attr.value] = cu + + addr_size = di.config.default_address_size # In bytes, 4 or 8 + addr_width = addr_size * 2 # In hex digits, 8 or 16 + line_template = " %%08x %%0%dx %%0%dx %%s%%s" % (addr_width, addr_width) + + self._emitline('Contents of the %s section:\n' % di.debug_loc_sec.name) + self._emitline(' Offset Begin End Expression') + for loc_list in loc_lists: + cu = cu_map.get(loc_list[0].entry_offset, False) + if not cu: + raise ValueError("Location list can't be tracked to a CU") + base_ip = _get_cu_base(cu) + for entry in loc_list: + # TODO: support BaseAddressEntry lines + expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset) + postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else '' + self._emitline(line_template % ( + entry.entry_offset, + base_ip + entry.begin_offset, + base_ip + entry.end_offset, + expr, + postfix)) + # Pyelftools doesn't store the terminating entry, + # but readelf emits its offset, so this should too. + last = loc_list[-1] + last_len = 2*addr_size + if isinstance(last, LocationEntry): + last_len += 2 + len(last.loc_expr) + self._emitline(" %08x " % (last.entry_offset + last_len)) + def _display_arch_specific_arm(self): """ Display the ARM architecture-specific info contained in the file. """ @@ -1318,6 +1540,9 @@ def main(stream=None): argparser.add_argument('-r', '--relocs', action='store_true', dest='show_relocs', help='Display the relocations (if present)') + argparser.add_argument('-au', '--arm-unwind', + action='store_true', dest='show_arm_unwind', + help='Display the armeabi unwind information (if present)') argparser.add_argument('-x', '--hex-dump', action='store', dest='show_hex_dump', metavar='', help='Dump the contents of section as bytes') @@ -1334,7 +1559,11 @@ def main(stream=None): action='store', dest='debug_dump_what', metavar='', help=( 'Display the contents of DWARF debug sections. can ' + - 'one of {info,decodedline,frames,frames-interp}')) + 'one of {info,decodedline,frames,frames-interp,aranges,pubtypes,pubnames,loc}')) + argparser.add_argument('--traceback', + action='store_true', dest='show_traceback', + help='Dump the Python traceback on ELFError' + ' exceptions from elftools') args = argparser.parse_args() @@ -1368,6 +1597,8 @@ def main(stream=None): readelf.display_notes() if args.show_relocs: readelf.display_relocations() + if args.show_arm_unwind: + readelf.display_arm_unwind() if args.show_version_info: readelf.display_version_info() if args.show_arch_specific: @@ -1379,7 +1610,10 @@ def main(stream=None): if args.debug_dump_what: readelf.display_debug_dump(args.debug_dump_what) except ELFError as ex: + sys.stdout.flush() sys.stderr.write('ELF error: %s\n' % ex) + if args.show_traceback: + traceback.print_exc() sys.exit(1) diff --git a/setup.py b/setup.py index e238c26e..a9ad9970 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ description='Library for analyzing ELF files and DWARF debugging information', long_description=description, license='Public domain', - version='0.25', + version='0.27', author='Eli Bendersky', maintainer='Eli Bendersky', author_email='eliben@gmail.com', @@ -41,6 +41,7 @@ 'elftools.elf', 'elftools.common', 'elftools.dwarf', + 'elftools.ehabi', 'elftools.construct', 'elftools.construct.lib', ], diff --git a/test/run_all_unittests.py b/test/run_all_unittests.py index 0e00a4d5..5f226dd2 100755 --- a/test/run_all_unittests.py +++ b/test/run_all_unittests.py @@ -13,7 +13,7 @@ import unittest # Make it possible to run this file from the root dir of pyelftools without -# installing pyelftools; useful for Travis testing, etc. +# installing pyelftools; useful for CI testing, etc. sys.path[0:0] = ['.'] diff --git a/test/run_examples_test.py b/test/run_examples_test.py index 349f13b7..c5268f34 100755 --- a/test/run_examples_test.py +++ b/test/run_examples_test.py @@ -12,7 +12,7 @@ from utils import run_exe, is_in_rootdir, dump_output_to_temp_files # Make it possible to run this file from the root dir of pyelftools without -# installing pyelftools; useful for Travis testing, etc. +# installing pyelftools; useful for CI testing, etc. sys.path[0:0] = ['.'] # Create a global logger object @@ -63,7 +63,7 @@ def run_example_and_compare(example_path): return True else: testlog.info('.......FAIL comparison') - dump_output_to_temp_files(testlog, example_out) + dump_output_to_temp_files(testlog, example_out, ref_str) return False diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index 7f4631b6..67addf72 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -20,7 +20,7 @@ from utils import run_exe, is_in_rootdir, dump_output_to_temp_files # Make it possible to run this file from the root dir of pyelftools without -# installing pyelftools; useful for Travis testing, etc. +# installing pyelftools; useful for CI testing, etc. sys.path[0:0] = ['.'] # Create a global logger object @@ -48,17 +48,27 @@ def discover_testfiles(rootdir): yield os.path.join(rootdir, filename) -def run_test_on_file(filename, verbose=False): +def run_test_on_file(filename, verbose=False, opt=None): """ Runs a test on the given input filename. Return True if all test runs succeeded. + If opt is specified, rather that going over the whole + set of supported readelf options, the test will only + run for one option. """ success = True testlog.info("Test file '%s'" % filename) - for option in [ + if opt is None: + options = [ '-e', '-d', '-s', '-n', '-r', '-x.text', '-p.shstrtab', '-V', '--debug-dump=info', '--debug-dump=decodedline', '--debug-dump=frames', '--debug-dump=frames-interp', - '--debug-dump=aranges']: + '--debug-dump=aranges', '--debug-dump=pubtypes', + '--debug-dump=pubnames', '--debug-dump=loc' + ] + else: + options = [opt] + + for option in options: if verbose: testlog.info("..option='%s'" % option) # TODO(zlobober): this is a dirty hack to make tests work for ELF core @@ -81,7 +91,7 @@ def run_test_on_file(filename, verbose=False): rc, stdout = run_exe(exe_path, args) if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,)) if rc != 0: - testlog.error("@@ aborting - '%s' returned '%s'" % (exe_path, rc)) + testlog.error("@@ aborting - '%s %s' returned '%s'" % (exe_path, option, rc)) return False stdouts.append(stdout) if verbose: testlog.info('....comparing output...') @@ -199,6 +209,9 @@ def main(): '-k', '--keep-going', action='store_true', dest='keep_going', help="Run all tests, don't stop at the first failure") + argparser.add_argument('--opt', + action='store', dest='opt', metavar='', + help= 'Limit the test one one readelf option.') args = argparser.parse_args() if args.parallel: @@ -220,14 +233,12 @@ def main(): if len(filenames) > 1 and args.parallel: pool = Pool() - results = pool.map( - run_test_on_file, - filenames) + results = pool.map(run_test_on_file, filenames) failures = results.count(False) else: failures = 0 for filename in filenames: - if not run_test_on_file(filename, verbose=args.verbose): + if not run_test_on_file(filename, args.verbose, args.opt): failures += 1 if not args.keep_going: break diff --git a/test/test_arm_call_reloc.py b/test/test_arm_call_reloc.py new file mode 100644 index 00000000..354c3a72 --- /dev/null +++ b/test/test_arm_call_reloc.py @@ -0,0 +1,45 @@ +#------------------------------------------------------------------------------- +# elftools tests +# +# Test 'R_ARM_CALL' relocation type support. +# Compare the '.text' section data of ELF file that was relocated by elftools +# with an ELF file that was relocated by linker. +# +# Dmitry Koltunov (koltunov@ispras.ru) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import sys +import unittest + +from elftools.common.py3compat import BytesIO +from elftools.elf.elffile import ELFFile +from elftools.elf.relocation import RelocationHandler + + +def do_relocation(rel_elf): + data = rel_elf.get_section_by_name('.text').data() + rh = RelocationHandler(rel_elf) + + stream = BytesIO() + stream.write(data) + + rel = rel_elf.get_section_by_name('.rel.text') + rh.apply_section_relocations(stream, rel) + return stream.getvalue() + + +class TestARMRElocation(unittest.TestCase): + def test_reloc(self): + test_dir = os.path.join('test', 'testfiles_for_unittests') + with open(os.path.join(test_dir, 'arm_reloc_unrelocated.o'), 'rb') as rel_f, \ + open(os.path.join(test_dir, 'arm_reloc_relocated.elf'), 'rb') as f: + rel_elf = ELFFile(rel_f) + elf = ELFFile(f) + + # Comparison of '.text' section data + self.assertEquals(do_relocation(rel_elf), + elf.get_section_by_name('.text').data()) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_callframe.py b/test/test_callframe.py index 5be2717f..fc434f9b 100644 --- a/test/test_callframe.py +++ b/test/test_callframe.py @@ -9,10 +9,13 @@ from elftools.common.py3compat import BytesIO from elftools.dwarf.callframe import ( CallFrameInfo, CIE, FDE, instruction_name, CallFrameInstruction, - RegisterRule) + RegisterRule, DecodedCallFrameTable, CFARule) from elftools.dwarf.structs import DWARFStructs from elftools.dwarf.descriptions import (describe_CFI_instructions, set_global_machine_arch) +from elftools.dwarf.enums import DW_EH_encoding_flags +from elftools.elf.elffile import ELFFile +from os.path import join class TestCallFrame(unittest.TestCase): @@ -83,6 +86,7 @@ def test_spec_sample_d6(self): self.assertEqual(entries[1]['length'], 40) self.assertEqual(entries[1]['CIE_pointer'], 0) self.assertEqual(entries[1]['address_range'], 84) + self.assertIsNone(entries[1].lsda_pointer) self.assertIs(entries[1].cie, entries[0]) self.assertEqual(len(entries[1].instructions), 21) self.assertInstruction(entries[1].instructions[0], @@ -145,6 +149,75 @@ def test_describe_CFI_instructions(self): ( ' DW_CFA_def_cfa: r7 (edi) ofs 2\n' + ' DW_CFA_expression: r2 (edx) (DW_OP_addr: 201; DW_OP_deref; DW_OP_deref)\n')) + def test_CFIEntry_get_decoded(self): + oracle_decoded = DecodedCallFrameTable( + table = [ + {'pc': 0, 'cfa': CFARule(reg = 29, offset = 0, expr = None)} + ], + reg_order = [] + ) + + test_dir = join('test', 'testfiles_for_unittests') + with open(join(test_dir, 'simple_mipsel.elf'), 'rb') as f: + elf = ELFFile(f) + di = elf.get_dwarf_info() + entries = di.CFI_entries() + decoded = entries[0].get_decoded() + self.assertEqual(oracle_decoded.table[0]['cfa'].reg, + decoded.table[0]['cfa'].reg + ) + self.assertEqual(oracle_decoded.table[0]['cfa'].offset, + decoded.table[0]['cfa'].offset) + + def test_ehframe_fde_with_lsda_pointer(self): + # CIE and FDE dumped from exceptions_0, offset 0xcc0 + # binary is at https://github.com/angr/binaries/blob/master/tests/x86_64/exceptions_0 + data = (b'' + + # CIE + b'\x1c\x00\x00\x00' + # length + b'\x00\x00\x00\x00' + # ID + b'\x01' + # version + b'\x7a\x50\x4c\x52\x00' + # augmentation string + b'\x01' + # code alignment + b'\x78' + # data alignment + b'\x10' + # return address register + b'\x07' + # augmentation data length + b'\x9b' + # personality function pointer encoding + b'\x3d\x13\x20\x00' + # personality function pointer + b'\x1b' + # LSDA pointer encoding + b'\x1b' + # FDE encoding + b'\x0c\x07\x08\x90' + # initial instructions + b'\x01\x00\x00' + + # FDE + b'\x24\x00\x00\x00' + # length + b'\x24\x00\x00\x00' + # CIE reference pointer + b'\x62\xfd\xff\xff' + # pc begin + b'\x89\x00\x00\x00' + # pc range + b'\x04' + # augmentation data length + b'\xb7\x00\x00\x00' + # LSDA pointer + b'\x41\x0e\x10\x86' + # initial instructions + b'\x02\x43\x0d\x06' + + b'\x45\x83\x03\x02' + + b'\x7f\x0c\x07\x08' + + b'\x00\x00\x00' + ) + s = BytesIO(data) + + structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=8) + cfi = CallFrameInfo(s, len(data), 0, structs, for_eh_frame=True) + entries = cfi.get_entries() + + self.assertEqual(len(entries), 2) + self.assertIsInstance(entries[0], CIE) + self.assertIn('LSDA_encoding', entries[0].augmentation_dict) + # check LSDA encoding + lsda_encoding = entries[0].augmentation_dict['LSDA_encoding'] + basic_encoding = lsda_encoding & 0x0f + modifier = lsda_encoding & 0xf0 + self.assertEqual(basic_encoding, DW_EH_encoding_flags['DW_EH_PE_sdata4']) + self.assertEqual(modifier, DW_EH_encoding_flags['DW_EH_PE_pcrel']) + self.assertIsInstance(entries[1], FDE) + self.assertEqual(entries[1].lsda_pointer, 232) if __name__ == '__main__': unittest.main() diff --git a/test/test_core_notes.py b/test/test_core_notes.py index 41ee6f8d..4caef862 100644 --- a/test/test_core_notes.py +++ b/test/test_core_notes.py @@ -10,38 +10,183 @@ from elftools.elf.elffile import ELFFile from elftools.elf.segments import NoteSegment + class TestCoreNotes(unittest.TestCase): """ This test makes sure than core dump specific sections are properly analyzed. """ + @classmethod + def setUpClass(cls): + cls._core_file = open(os.path.join('test', + 'testfiles_for_unittests', 'core_linux64.elf'), + 'rb') def test_core_prpsinfo(self): - with open(os.path.join('test', - 'testfiles_for_unittests', 'core_linux64.elf'), - 'rb') as f: - elf = ELFFile(f) - for segment in elf.iter_segments(): - if not isinstance(segment, NoteSegment): + elf = ELFFile(self._core_file) + for segment in elf.iter_segments(): + if not isinstance(segment, NoteSegment): + continue + notes = list(segment.iter_notes()) + for note in segment.iter_notes(): + if note['n_type'] != 'NT_PRPSINFO': + continue + desc = note['n_desc'] + self.assertEqual(desc['pr_state'], 0) + self.assertEqual(desc['pr_sname'], b'R') + self.assertEqual(desc['pr_zomb'], 0) + self.assertEqual(desc['pr_nice'], 0) + self.assertEqual(desc['pr_flag'], 0x400600) + self.assertEqual(desc['pr_uid'], 1000) + self.assertEqual(desc['pr_gid'], 1000) + self.assertEqual(desc['pr_pid'], 23395) + self.assertEqual(desc['pr_ppid'], 23187) + self.assertEqual(desc['pr_pgrp'], 23395) + self.assertEqual(desc['pr_sid'], 23187) + self.assertEqual( + desc['pr_fname'], + b'coredump_self\x00\x00\x00') + self.assertEqual( + desc['pr_psargs'], + b'./coredump_self foo bar 42 ' + b'\x00' * (80 - 27)) + + def test_core_nt_file(self): + """ + Test that the parsing of the NT_FILE note within a core file is + correct. + The assertions are made against the output of eu-readelf. + + eu-readelf -n core_linux64.elf + ... + CORE 621 FILE + 10 files: + 00400000-00401000 00000000 4096 + /home/max42/pyelftools/test/coredump_self + 00600000-00601000 00000000 4096 + /home/max42/pyelftools/test/coredump_self + 00601000-00602000 00001000 4096 + /home/max42/pyelftools/test/coredump_self + 7fa4593ae000-7fa45956d000 00000000 1830912 + /lib/x86_64-linux-gnu/libc-2.23.so + 7fa45956d000-7fa45976d000 001bf000 2097152 + /lib/x86_64-linux-gnu/libc-2.23.so + 7fa45976d000-7fa459771000 001bf000 16384 + /lib/x86_64-linux-gnu/libc-2.23.so + 7fa459771000-7fa459773000 001c3000 8192 + /lib/x86_64-linux-gnu/libc-2.23.so + 7fa459777000-7fa45979d000 00000000 155648 + /lib/x86_64-linux-gnu/ld-2.23.so + 7fa45999c000-7fa45999d000 00025000 4096 + /lib/x86_64-linux-gnu/ld-2.23.so + 7fa45999d000-7fa45999e000 00026000 4096 + /lib/x86_64-linux-gnu/ld-2.23.so + ... + """ + elf = ELFFile(self._core_file) + nt_file_found = False + for segment in elf.iter_segments(): + if not isinstance(segment, NoteSegment): + continue + for note in segment.iter_notes(): + if note['n_type'] != 'NT_FILE': continue - notes = list(segment.iter_notes()) - for note in segment.iter_notes(): - if note['n_type'] != 'NT_PRPSINFO': - continue - desc = note['n_desc'] - self.assertEquals(desc['pr_state'], 0) - self.assertEquals(desc['pr_sname'], b'R') - self.assertEquals(desc['pr_zomb'], 0) - self.assertEquals(desc['pr_nice'], 0) - self.assertEquals(desc['pr_flag'], 0x400600) - self.assertEquals(desc['pr_uid'], 1000) - self.assertEquals(desc['pr_gid'], 1000) - self.assertEquals(desc['pr_pid'], 23395) - self.assertEquals(desc['pr_ppid'], 23187) - self.assertEquals(desc['pr_pgrp'], 23395) - self.assertEquals(desc['pr_sid'], 23187) - self.assertEquals( - desc['pr_fname'], - b'coredump_self\x00\x00\x00') - self.assertEquals( - desc['pr_psargs'], - b'./coredump_self foo bar 42 ' + b'\x00' * (80 - 27)) + nt_file_found = True + desc = note['n_desc'] + self.assertEqual(desc['num_map_entries'], 10) + self.assertEqual(desc['page_size'], 4096) + self.assertEqual(len(desc['Elf_Nt_File_Entry']), 10) + self.assertEqual(len(desc['filename']), 10) + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][0], + desc['page_size'], + 0x00400000, + 0x00401000, + 0x00000000) + self.assertEqual(desc['filename'][0], + b"/home/max42/pyelftools/test/coredump_self") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][1], + desc['page_size'], + 0x00600000, + 0x00601000, + 0x00000000) + self.assertEqual(desc['filename'][1], + b"/home/max42/pyelftools/test/coredump_self") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][2], + desc['page_size'], + 0x00601000, + 0x00602000, + 0x00001000) + self.assertEqual(desc['filename'][2], + b"/home/max42/pyelftools/test/coredump_self") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][3], + desc['page_size'], + 0x7fa4593ae000, + 0x7fa45956d000, + 0x00000000) + self.assertEqual(desc['filename'][3], + b"/lib/x86_64-linux-gnu/libc-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][4], + desc['page_size'], + 0x7fa45956d000, + 0x7fa45976d000, + 0x001bf000) + self.assertEqual(desc['filename'][4], + b"/lib/x86_64-linux-gnu/libc-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][5], + desc['page_size'], + 0x7fa45976d000, + 0x7fa459771000, + 0x001bf000) + self.assertEqual(desc['filename'][5], + b"/lib/x86_64-linux-gnu/libc-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][6], + desc['page_size'], + 0x7fa459771000, + 0x7fa459773000, + 0x001c3000) + self.assertEqual(desc['filename'][6], + b"/lib/x86_64-linux-gnu/libc-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][7], + desc['page_size'], + 0x7fa459777000, + 0x7fa45979d000, + 0x00000000) + self.assertEqual(desc['filename'][7], + b"/lib/x86_64-linux-gnu/ld-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][8], + desc['page_size'], + 0x7fa45999c000, + 0x7fa45999d000, + 0x00025000) + self.assertEqual(desc['filename'][8], + b"/lib/x86_64-linux-gnu/ld-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][9], + desc['page_size'], + 0x7fa45999d000, + 0x7fa45999e000, + 0x00026000) + self.assertEqual(desc['filename'][9], + b"/lib/x86_64-linux-gnu/ld-2.23.so") + self.assertTrue(nt_file_found) + + def validate_nt_file_entry(self, + entry, + page_size, + expected_vm_start, + expected_vm_end, + expected_page_offset): + self.assertEqual(entry.vm_start, expected_vm_start) + self.assertEqual(entry.vm_end, expected_vm_end) + self.assertEqual(entry.page_offset * page_size, expected_page_offset) + + @classmethod + def tearDownClass(cls): + cls._core_file.close() diff --git a/test/test_dbgfile.py b/test/test_dbgfile.py new file mode 100644 index 00000000..901a19fd --- /dev/null +++ b/test/test_dbgfile.py @@ -0,0 +1,49 @@ +""" +Test that elftools does not fail to load debug symbol ELF files +""" +import unittest +import os + +from elftools.elf.elffile import ELFFile, DynamicSection +from elftools.dwarf.callframe import ZERO + +class TestDBGFile(unittest.TestCase): + def test_dynamic_segment(self): + """ Test that the degenerate case for the dynamic segment does not crash + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'debug_info.elf'), 'rb') as f: + elf = ELFFile(f) + + seen_dynamic_segment = False + for segment in elf.iter_segments(): + if segment.header.p_type == 'PT_DYNAMIC': + self.assertEqual(segment.num_tags(), 0, "The dynamic segment in this file should be empty") + seen_dynamic_segment = True + break + + self.assertTrue(seen_dynamic_segment, "There should be a dynamic segment in this file") + + def test_dynamic_section(self): + """ Test that the degenerate case for the dynamic section does not crash + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'debug_info.elf'), 'rb') as f: + elf = ELFFile(f) + section = DynamicSection(elf.get_section_by_name('.dynamic').header, '.dynamic', elf) + + self.assertEqual(section.num_tags(), 0, "The dynamic section in this file should be empty") + + def test_eh_frame(self): + """ Test that parsing .eh_frame with SHT_NOBITS does not crash + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'debug_info.elf'), 'rb') as f: + elf = ELFFile(f) + dwarf = elf.get_dwarf_info() + eh_frame = list(dwarf.EH_CFI_entries()) + self.assertEqual(len(eh_frame), 1, "There should only be the ZERO entry in eh_frame") + self.assertIs(type(eh_frame[0]), ZERO, "The only eh_frame entry should be the terminator") + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_die_size.py b/test/test_die_size.py new file mode 100644 index 00000000..7579ce24 --- /dev/null +++ b/test/test_die_size.py @@ -0,0 +1,32 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Anders Dellien (anders@andersdellien.se) +# This code is in the public domain +#------------------------------------------------------------------------------ +import unittest +import os + +from elftools.elf.elffile import ELFFile + +class TestDieSize(unittest.TestCase): + """ This test verifies that null DIEs are treated correctly - i.e. + removed when we 'unflatten' the linear list and build a tree. + The test file contains a CU with two non-null DIEs (both three bytes big), + where the second one is followed by three null DIEs. + We verify that the null DIEs are discarded and that the length of the second DIE + does not include the null entries that follow it. + """ + def test_die_size(self): + with open(os.path.join('test', + 'testfiles_for_unittests', 'trailing_null_dies.elf'), + 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + dwarfinfo = elffile.get_dwarf_info() + for CU in dwarfinfo.iter_CUs(): + for child in CU.get_top_DIE().iter_children(): + self.assertEquals(child.size, 3) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dwarf_aranges.py b/test/test_dwarf_aranges.py new file mode 100644 index 00000000..7b7d0cc3 --- /dev/null +++ b/test/test_dwarf_aranges.py @@ -0,0 +1,36 @@ +import os +import unittest + +from elftools.elf.elffile import ELFFile + +address_a = 0x112f; +address_b = 0x1154; + +class TestRangeLists(unittest.TestCase): + def test_arange_absent(self): + with open(os.path.join('test', 'testfiles_for_unittests', 'aranges_absent.elf'), 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + aranges = elffile.get_dwarf_info().get_aranges() + self.assertIsNone(aranges) + + def test_arange_partial(self): + with open(os.path.join('test', 'testfiles_for_unittests', 'aranges_partial.elf'), 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + aranges = elffile.get_dwarf_info().get_aranges() + self.assertIsNotNone(aranges) + self.assertIsNone(aranges.cu_offset_at_addr(address_a)) + self.assertIsNotNone(aranges.cu_offset_at_addr(address_b)) + + def test_arange_complete(self): + with open(os.path.join('test', 'testfiles_for_unittests', 'aranges_complete.elf'), 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + aranges = elffile.get_dwarf_info().get_aranges() + self.assertIsNotNone(aranges) + self.assertIsNotNone(aranges.cu_offset_at_addr(address_a)) + self.assertIsNotNone(aranges.cu_offset_at_addr(address_b)) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dwarf_attr_form_flag_present.py b/test/test_dwarf_attr_form_flag_present.py new file mode 100644 index 00000000..9ec9ce5d --- /dev/null +++ b/test/test_dwarf_attr_form_flag_present.py @@ -0,0 +1,25 @@ +#------------------------------------------------------------------------------- +# elftools tests +# +# Eli Bendersky (eliben@gmail.com), Santhosh Kumar Mani (santhoshmani@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import unittest + +from elftools.elf.elffile import ELFFile + + +class TestAttrFormFlagPresent(unittest.TestCase): + def test_form_flag_present_value_is_true(self): + with open(os.path.join('test', 'testfiles_for_unittests', + 'lambda.elf'), 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + + dwarf = elffile.get_dwarf_info() + for cu in dwarf.iter_CUs(): + for die in cu.iter_DIEs(): + for _, attr in die.attributes.items(): + if attr.form == "DW_FORM_flag_present": + self.assertTrue(attr.value) diff --git a/test/test_dwarf_constisntloc.py b/test/test_dwarf_constisntloc.py new file mode 100644 index 00000000..e98a0ca7 --- /dev/null +++ b/test/test_dwarf_constisntloc.py @@ -0,0 +1,37 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Seva Alekseyev (sevaa@sprynet.com) +# This code is in the public domain +#------------------------------------------------------------------------------ + +import unittest +import os, sys, io + +sys.path.insert(1, os.getcwd()) + +from elftools.elf.elffile import ELFFile +from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig +from elftools.dwarf.locationlists import LocationParser + +class TestConstWithData4IsntLocation(unittest.TestCase): + def _test_file(self, filename): + filepath = os.path.join('test', 'testfiles_for_unittests', filename) + with open(filepath, 'rb') as f: + elffile = ELFFile(f) + dwarfinfo = elffile.get_dwarf_info() + locparser = LocationParser(dwarfinfo.location_lists()) + for CU in dwarfinfo.iter_CUs(): + ver = CU['version'] + for DIE in CU.iter_DIEs(): + for key in DIE.attributes: + attr = DIE.attributes[key] + if LocationParser.attribute_has_location(attr, ver): + # This will crash on unpatched library on DIE at 0x9f + locparser.parse_from_attribute(attr, ver) + + def test_main(self): + self._test_file('pascalenum.o') + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dwarf_cu_and_die_cache.py b/test/test_dwarf_cu_and_die_cache.py new file mode 100644 index 00000000..bf7f4d78 --- /dev/null +++ b/test/test_dwarf_cu_and_die_cache.py @@ -0,0 +1,58 @@ +#------------------------------------------------------------------------------- +# elftools tests +# +# Eli Bendersky (eliben@gmail.com), Milton Miller +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import unittest + +from elftools.elf.elffile import ELFFile +from elftools.common.py3compat import bytes2str + +class TestCacheLUTandDIEref(unittest.TestCase): + def dprint(self, list): + if False: + self.oprint(list) + + def oprint(self, list): + if False: + print(list) + + def test_die_from_LUTentry(self): + lines = [''] + with open(os.path.join('test', 'testfiles_for_unittests', + 'lambda.elf'), 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + + dwarf = elffile.get_dwarf_info() + pt = dwarf.get_pubnames() + for (k, v) in pt.items(): + ndie = dwarf.get_DIE_from_lut_entry(v) + self.dprint(ndie) + if not 'DW_AT_type' in ndie.attributes: + continue + if not 'DW_AT_name' in ndie.attributes: + continue + name = bytes2str(ndie.attributes['DW_AT_name'].value) + tlist = [] + tdie = ndie + while True: + tdie = tdie.get_DIE_from_attribute('DW_AT_type') + self.dprint(ndie) + ttag = tdie.tag + if isinstance(ttag, int): + ttag = 'TAG(0x%x)' % ttag + tlist.append(ttag) + if 'DW_AT_name' in tdie.attributes: + break + tlist.append(bytes2str(tdie.attributes['DW_AT_name'].value)) + tname = ' '.join(tlist) + line = "%s DIE at %s is of type %s" % ( + ndie.tag, ndie.offset, tname) + lines.append(line) + self.dprint(line) + + self.oprint('\n'.join(lines)) + self.assertGreater(len(lines), 1) diff --git a/test/test_dwarf_expr.py b/test/test_dwarf_expr.py index 747ee388..308d8728 100644 --- a/test/test_dwarf_expr.py +++ b/test/test_dwarf_expr.py @@ -7,6 +7,7 @@ import unittest from elftools.dwarf.descriptions import ExprDumper, set_global_machine_arch +from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp from elftools.dwarf.structs import DWARFStructs @@ -21,52 +22,69 @@ def setUp(self): set_global_machine_arch('x64') def test_basic_single(self): - self.visitor.process_expr([0x1b]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x1b]), 'DW_OP_div') - self.setUp() - self.visitor.process_expr([0x74, 0x82, 0x01]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x74, 0x82, 0x01]), 'DW_OP_breg4 (rsi): 130') - self.setUp() - self.visitor.process_expr([0x91, 0x82, 0x01]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x91, 0x82, 0x01]), 'DW_OP_fbreg: 130') - self.setUp() - self.visitor.process_expr([0x51]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x51]), 'DW_OP_reg1 (rdx)') - self.setUp() - self.visitor.process_expr([0x90, 16]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x90, 16]), 'DW_OP_regx: 16 (rip)') - self.setUp() - self.visitor.process_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]), 'DW_OP_bit_piece: 1295 144') + self.assertEqual(self.visitor.dump_expr([0x0e, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00]), + 'DW_OP_const8u: 71777214294589695') + def test_basic_sequence(self): - self.visitor.process_expr([0x03, 0x01, 0x02, 0, 0, 0x06, 0x06]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x03, 0x01, 0x02, 0, 0, 0x06, 0x06]), 'DW_OP_addr: 201; DW_OP_deref; DW_OP_deref') - self.setUp() - self.visitor.process_expr([0x15, 0xFF, 0x0b, 0xf1, 0xff]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x15, 0xFF, 0x0b, 0xf1, 0xff]), 'DW_OP_pick: 255; DW_OP_const2s: -15') - self.setUp() - self.visitor.process_expr([0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1e]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1e]), 'DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul') + # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so + # check for both to prevent non-determinism. + self.assertIn(self.visitor.dump_expr([0x08, 0x0f, 0xe0]), + ('DW_OP_const1u: 15; DW_OP_GNU_push_tls_address', + 'DW_OP_const1u: 15; DW_OP_lo_user')) -if __name__ == '__main__': - unittest.main() +class TestParseExpr(unittest.TestCase): + structs32 = DWARFStructs( + little_endian=True, + dwarf_format=32, + address_size=4) + + def setUp(self): + set_global_machine_arch('x64') + + def test_single(self): + p = DWARFExprParser(self.structs32) + lst = p.parse_expr([0x1b]) + self.assertEqual(lst, [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[])]) + + lst = p.parse_expr([0x90, 16]) + self.assertEqual(lst, [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16])]) + lst = p.parse_expr([0xe0]) + self.assertEqual(len(lst), 1) + # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so + # check for both to prevent non-determinism. + self.assertIn(lst[0], [ + DWARFExprOp(op=0xe0, op_name='DW_OP_GNU_push_tls_address', args=[]), + DWARFExprOp(op=0xe0, op_name='DW_OP_lo_user', args=[])]) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dwarf_lineprogram.py b/test/test_dwarf_lineprogram.py index 5a5c65aa..2a0a19e0 100644 --- a/test/test_dwarf_lineprogram.py +++ b/test/test_dwarf_lineprogram.py @@ -18,7 +18,7 @@ def _make_program_in_stream(self, stream): """ ds = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) header = ds.Dwarf_lineprog_header.parse( - b'\x04\x10\x00\x00' + # initial lenght + b'\x04\x10\x00\x00' + # initial length b'\x03\x00' + # version b'\x20\x00\x00\x00' + # header length b'\x01\x01\x01\x0F' + # flags @@ -100,6 +100,27 @@ def test_spec_sample_60(self): self.assertLineState(linetable[7].state, address=0x24b, line=7, end_sequence=False) self.assertLineState(linetable[9].state, address=0x24d, line=7, end_sequence=True) + def test_lne_set_discriminator(self): + """ + Tests the handling of DWARFv4's new DW_LNE_set_discriminator opcode. + """ + s = BytesIO() + s.write( + b'\x00\x02\x04\x05' + # DW_LNE_set_discriminator (discriminator=0x05) + b'\x01' + # DW_LNS_copy + b'\x00\x01\x01' # DW_LNE_end_sequence + ) + + lp = self._make_program_in_stream(s) + linetable = lp.get_entries() + + # We expect two entries, since DW_LNE_set_discriminator does not add + # an entry of its own. + self.assertEqual(len(linetable), 2) + self.assertEqual(linetable[0].command, DW_LNS_copy) + self.assertLineState(linetable[0].state, discriminator=0x05) + self.assertLineState(linetable[1].state, discriminator=0x00, end_sequence=True) + if __name__ == '__main__': unittest.main() diff --git a/test/test_dwarf_locexpr_on_gnucall.py b/test/test_dwarf_locexpr_on_gnucall.py new file mode 100644 index 00000000..29d57a79 --- /dev/null +++ b/test/test_dwarf_locexpr_on_gnucall.py @@ -0,0 +1,36 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Seva Alekseyev (sevaa@sprynet.com) +# This code is in the public domain +#------------------------------------------------------------------------------ + +import unittest +import os, sys, io + +# sys.path.insert(1, os.getcwd()) + +from elftools.elf.elffile import ELFFile +from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig +from elftools.dwarf.locationlists import LocationParser + +class TestGNUCallAttributesHaveLocation(unittest.TestCase): + def _test_file(self, filename): + filepath = os.path.join('test', 'testfiles_for_unittests', filename) + with open(filepath, 'rb') as f: + elffile = ELFFile(f) + dwarfinfo = elffile.get_dwarf_info() + for CU in dwarfinfo.iter_CUs(): + ver = CU['version'] + for DIE in CU.iter_DIEs(): + for key in DIE.attributes: + attr = DIE.attributes[key] + if attr.form == 'DW_FORM_exprloc': + self.assertTrue(LocationParser.attribute_has_location(attr, CU['version']), "Attribute %s not recognized as a location" % key) + + + def test_main(self): + self._test_file('dwarf_gnuops1.o') + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dynamic.py b/test/test_dynamic.py index 1ef00809..1f48362e 100644 --- a/test/test_dynamic.py +++ b/test/test_dynamic.py @@ -49,13 +49,14 @@ def test_missing_sections(self): for t in segment.iter_tags(): if t.entry.d_tag == 'DT_NEEDED': - libs.append(t.needed.decode('utf-8')) + libs.append(t.needed) exp = ['libc.so.6'] self.assertEqual(libs, exp) - def test_reading_symbols(self): - """Verify we can read symbol table without SymbolTableSection""" + def test_reading_symbols_elf_hash(self): + """ Verify we can read symbol table without SymbolTableSection but with + a SYSV-style symbol hash table""" with open(os.path.join('test', 'testfiles_for_unittests', 'aarch64_super_stripped.elf'), 'rb') as f: elf = ELFFile(f) @@ -63,10 +64,41 @@ def test_reading_symbols(self): if segment.header.p_type != 'PT_DYNAMIC': continue + num_symbols = segment.num_symbols() symbol_names = [x.name for x in segment.iter_symbols()] + symbol_at_index_3 = segment.get_symbol(3) + symbols_abort = segment.get_symbol_by_name('abort') - exp = [b'', b'__libc_start_main', b'__gmon_start__', b'abort'] + self.assertEqual(num_symbols, 4) + exp = ['', '__libc_start_main', '__gmon_start__', 'abort'] self.assertEqual(symbol_names, exp) + self.assertEqual(symbol_at_index_3.name, 'abort') + self.assertIsNotNone(symbols_abort) + self.assertEqual(symbols_abort[0], symbol_at_index_3) + + def test_reading_symbols_gnu_hash(self): + """ Verify we can read symbol table without SymbolTableSection but with + a GNU symbol hash table""" + with open(os.path.join('test', 'testfiles_for_unittests', + 'android_dyntags.elf'), 'rb') as f: + elf = ELFFile(f) + for segment in elf.iter_segments(): + if segment.header.p_type != 'PT_DYNAMIC': + continue + + num_symbols = segment.num_symbols() + symbol_names = [x.name for x in segment.iter_symbols()] + symbol_at_index_3 = segment.get_symbol(3) + symbols_atfork = segment.get_symbol_by_name('__register_atfork') + + self.assertEqual(num_symbols, 212) + exp = ['', '__cxa_finalize' , '__cxa_atexit', '__register_atfork', + '__stack_chk_fail', '_ZNK7android7RefBase9decStrongEPKv', + '_ZN7android7RefBaseD2Ev', '_ZdlPv', 'pthread_mutex_lock'] + self.assertEqual(symbol_names[:9], exp) + self.assertEqual(symbol_at_index_3.name, '__register_atfork') + self.assertIsNotNone(symbols_atfork) + self.assertEqual(symbols_atfork[0], symbol_at_index_3) def test_sunw_tags(self): def extract_sunw(filename): diff --git a/test/test_ehabi_decoder.py b/test/test_ehabi_decoder.py new file mode 100644 index 00000000..61ad8b40 --- /dev/null +++ b/test/test_ehabi_decoder.py @@ -0,0 +1,95 @@ +# ------------------------------------------------------------------------------- +# elftools: tests +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +import unittest + +from elftools.ehabi.decoder import EHABIBytecodeDecoder + + +class TestEHABIDecoder(unittest.TestCase): + """ Tests for the EHABI decoder. + """ + + def testLLVM(self): + # Reference: https://github.com/llvm/llvm-project/blob/master/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s + mnemonic_array = EHABIBytecodeDecoder([0xb1, 0x0f, 0xa7, 0x3f, 0xb0, 0xb0]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "pop {r0, r1, r2, r3}") + self.assertEqual(mnemonic_array[1].mnemonic, "pop {r4, r5, r6, r7, r8, r9, r10, fp}") + self.assertEqual(mnemonic_array[2].mnemonic, "vsp = vsp + 256") + self.assertEqual(mnemonic_array[3].mnemonic, "finish") + self.assertEqual(mnemonic_array[4].mnemonic, "finish") + + mnemonic_array = EHABIBytecodeDecoder([0xc9, 0x84, 0xb0]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "pop {d8, d9, d10, d11, d12}") + self.assertEqual(mnemonic_array[1].mnemonic, "finish") + + mnemonic_array = EHABIBytecodeDecoder( + [0xD7, 0xC9, 0x02, 0xC8, 0x02, 0xC7, 0x03, 0xC6, + 0x02, 0xC2, 0xBA, 0xB3, 0x12, 0xB2, 0x80, 0x04, + 0xB1, 0x01, 0xB0, 0xA9, 0xA1, 0x91, 0x84, 0xC0, + 0x80, 0xC0, 0x80, 0x01, 0x81, 0x00, 0x80, 0x00, + 0x42, 0x02, ]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "pop {d8, d9, d10, d11, d12, d13, d14, d15}") + self.assertEqual(mnemonic_array[1].mnemonic, "pop {d0, d1, d2}") + self.assertEqual(mnemonic_array[2].mnemonic, "pop {d16, d17, d18}") + self.assertEqual(mnemonic_array[3].mnemonic, "pop {wCGR0, wCGR1}") + self.assertEqual(mnemonic_array[4].mnemonic, "pop {wR0, wR1, wR2}") + self.assertEqual(mnemonic_array[5].mnemonic, "pop {wR10, wR11, wR12}") + self.assertEqual(mnemonic_array[6].mnemonic, "pop {d8, d9, d10}") + self.assertEqual(mnemonic_array[7].mnemonic, "pop {d1, d2, d3}") + self.assertEqual(mnemonic_array[8].mnemonic, "vsp = vsp + 2564") + self.assertEqual(mnemonic_array[9].mnemonic, "pop {r0}") + self.assertEqual(mnemonic_array[10].mnemonic, "finish") + self.assertEqual(mnemonic_array[11].mnemonic, "pop {r4, r5, lr}") + self.assertEqual(mnemonic_array[12].mnemonic, "pop {r4, r5}") + self.assertEqual(mnemonic_array[13].mnemonic, "vsp = r1") + self.assertEqual(mnemonic_array[14].mnemonic, "pop {r10, fp, lr}") + self.assertEqual(mnemonic_array[15].mnemonic, "pop {r10, fp}") + self.assertEqual(mnemonic_array[16].mnemonic, "pop {r4}") + self.assertEqual(mnemonic_array[17].mnemonic, "pop {ip}") + self.assertEqual(mnemonic_array[18].mnemonic, "refuse to unwind") + self.assertEqual(mnemonic_array[19].mnemonic, "vsp = vsp - 12") + self.assertEqual(mnemonic_array[20].mnemonic, "vsp = vsp + 12") + + mnemonic_array = EHABIBytecodeDecoder( + [0xD8, 0xD0, 0xCA, 0xC9, 0x00, 0xC8, 0x00, 0xC7, + 0x10, 0xC7, 0x01, 0xC7, 0x00, 0xC6, 0x00, 0xC0, + 0xB8, 0xB4, 0xB3, 0x00, 0xB2, 0x00, 0xB1, 0x10, + 0xB1, 0x01, 0xB1, 0x00, 0xB0, 0xA8, 0xA0, 0x9F, + 0x9D, 0x91, 0x88, 0x00, 0x80, 0x00, 0x40, 0x00, + ]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "spare") + self.assertEqual(mnemonic_array[1].mnemonic, "pop {d8}") + self.assertEqual(mnemonic_array[2].mnemonic, "spare") + self.assertEqual(mnemonic_array[3].mnemonic, "pop {d0}") + self.assertEqual(mnemonic_array[4].mnemonic, "pop {d16}") + self.assertEqual(mnemonic_array[5].mnemonic, "spare") + self.assertEqual(mnemonic_array[6].mnemonic, "pop {wCGR0}") + self.assertEqual(mnemonic_array[7].mnemonic, "spare") + self.assertEqual(mnemonic_array[8].mnemonic, "pop {wR0}") + self.assertEqual(mnemonic_array[9].mnemonic, "pop {wR10}") + self.assertEqual(mnemonic_array[10].mnemonic, "pop {d8}") + self.assertEqual(mnemonic_array[11].mnemonic, "spare") + self.assertEqual(mnemonic_array[12].mnemonic, "pop {d0}") + self.assertEqual(mnemonic_array[13].mnemonic, "vsp = vsp + 516") + self.assertEqual(mnemonic_array[14].mnemonic, "spare") + self.assertEqual(mnemonic_array[15].mnemonic, "pop {r0}") + self.assertEqual(mnemonic_array[16].mnemonic, "spare") + self.assertEqual(mnemonic_array[17].mnemonic, "finish") + self.assertEqual(mnemonic_array[18].mnemonic, "pop {r4, lr}") + self.assertEqual(mnemonic_array[19].mnemonic, "pop {r4}") + self.assertEqual(mnemonic_array[20].mnemonic, "reserved (WiMMX MOVrr)") + self.assertEqual(mnemonic_array[21].mnemonic, "reserved (ARM MOVrr)") + self.assertEqual(mnemonic_array[22].mnemonic, "vsp = r1") + self.assertEqual(mnemonic_array[23].mnemonic, "pop {pc}") + self.assertEqual(mnemonic_array[24].mnemonic, "refuse to unwind") + self.assertEqual(mnemonic_array[25].mnemonic, "vsp = vsp - 4") + self.assertEqual(mnemonic_array[26].mnemonic, "vsp = vsp + 4") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_ehabi_elf.py b/test/test_ehabi_elf.py new file mode 100644 index 00000000..9a0c12b7 --- /dev/null +++ b/test/test_ehabi_elf.py @@ -0,0 +1,89 @@ +# ------------------------------------------------------------------------------- +# elftools: tests +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +import unittest +import os + +from elftools.ehabi.ehabiinfo import EHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry, CorruptEHABIEntry +from elftools.elf.elffile import ELFFile + + +class TestEHABIELF(unittest.TestCase): + """ Parse ELF and visit ARM exception handler index table entry. + """ + + def test_parse_object_file(self): + # FIXME: `.ARM.exidx.text.XXX` need relocation, it's too complex for current unittest. + fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.o') + with open(fname, 'rb') as f: + elf = ELFFile(f) + try: + elf.get_ehabi_infos() + self.assertTrue(False, "Unreachable code") + except AssertionError as e: + self.assertEqual(str(e), "Current version of pyelftools doesn't support relocatable file.") + + def test_parse_shared_library(self): + fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.so') + with open(fname, 'rb') as f: + elf = ELFFile(f) + self.assertTrue(elf.has_ehabi_info()) + infos = elf.get_ehabi_infos() + self.assertEqual(1, len(infos)) + info = infos[0] + + self.assertIsInstance(info.get_entry(0), EHABIEntry) + self.assertEqual(info.get_entry(0).function_offset, 0x34610) + self.assertEqual(info.get_entry(0).eh_table_offset, 0x69544) + self.assertEqual(info.get_entry(0).bytecode_array, [0x97, 0x41, 0x84, 0x0d, 0xb0, 0xb0]) + + self.assertIsInstance(info.get_entry(7), CannotUnwindEHABIEntry) + self.assertEqual(info.get_entry(7).function_offset, 0x346f8) + + self.assertIsInstance(info.get_entry(8), EHABIEntry) + self.assertEqual(info.get_entry(8).personality, 0) + self.assertEqual(info.get_entry(8).function_offset, 0x3473c) + self.assertEqual(info.get_entry(8).bytecode_array, [0x97, 0x84, 0x08]) + + self.assertIsInstance(info.get_entry(9), GenericEHABIEntry) + self.assertEqual(info.get_entry(9).function_offset, 0x3477c) + self.assertEqual(info.get_entry(9).personality, 0x31a30) + + for i in range(info.num_entry()): + self.assertNotIsInstance(info.get_entry(i), CorruptEHABIEntry) + + def test_parse_executable(self): + fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.elf') + with open(fname, 'rb') as f: + elf = ELFFile(f) + self.assertTrue(elf.has_ehabi_info()) + infos = elf.get_ehabi_infos() + self.assertEqual(1, len(infos)) + info = infos[0] + + self.assertIsInstance(info.get_entry(0), EHABIEntry) + self.assertEqual(info.get_entry(0).function_offset, 0x4f50) + self.assertEqual(info.get_entry(0).eh_table_offset, 0x22864) + self.assertEqual(info.get_entry(0).bytecode_array, [0x97, 0x41, 0x84, 0x0d, 0xb0, 0xb0]) + + self.assertIsInstance(info.get_entry(7), CannotUnwindEHABIEntry) + self.assertEqual(info.get_entry(7).function_offset, 0x5040) + + self.assertIsInstance(info.get_entry(8), GenericEHABIEntry) + self.assertEqual(info.get_entry(8).personality, 0x15d21) + + self.assertIsInstance(info.get_entry(9), EHABIEntry) + self.assertEqual(info.get_entry(9).function_offset, 0x5144) + self.assertEqual(info.get_entry(9).personality, 0) + self.assertEqual(info.get_entry(9).bytecode_array, [0x97, 0x84, 0x08]) + + for i in range(info.num_entry()): + self.assertNotIsInstance(info.get_entry(i), CorruptEHABIEntry) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_hash.py b/test/test_hash.py new file mode 100644 index 00000000..2c2ffec4 --- /dev/null +++ b/test/test_hash.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------- +# elftools tests +# +# Andreas Ziegler (andreas.ziegler@fau.de) +# This code is in the public domain +#------------------------------------------------------------------------------- +import unittest +import os + +from elftools.elf.elffile import ELFFile +from elftools.elf.hash import ELFHashTable, GNUHashTable + +class TestELFHash(unittest.TestCase): + """ Tests for the ELF hash table. + """ + + def test_elf_hash(self): + """ Verify correctness of ELF hashing function. The expected values + were computed with the C implementation from the glibc source code. + """ + self.assertEqual(ELFHashTable.elf_hash(''), 0x00000000) + self.assertEqual(ELFHashTable.elf_hash('main'), 0x000737fe) + self.assertEqual(ELFHashTable.elf_hash('printf'), 0x077905a6) + self.assertEqual(ELFHashTable.elf_hash('exit'), 0x0006cf04) + self.assertEqual(ELFHashTable.elf_hash(u'ïó®123'), 0x0efddae3) + self.assertEqual(ELFHashTable.elf_hash(b'\xe4\xbd\xa0\xe5\xa5\xbd'), + 0x0f07f00d) + + def test_get_number_of_syms(self): + """ Verify we can get get the number of symbols from an ELF hash + section. + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'aarch64_super_stripped.elf'), 'rb') as f: + elf = ELFFile(f) + dynamic_segment = None + for segment in elf.iter_segments(): + if segment.header.p_type == 'PT_DYNAMIC': + dynamic_segment = segment + break + + _, hash_offset = dynamic_segment.get_table_offset('DT_HASH') + + hash_section = ELFHashTable(elf, hash_offset, dynamic_segment) + self.assertIsNotNone(hash_section) + self.assertEqual(hash_section.get_number_of_symbols(), 4) + + def test_get_symbol(self): + """ Verify we can get a specific symbol from an ELF hash section. + """ + path = os.path.join('test', 'testfiles_for_unittests', + 'simple_mipsel.elf') + with open(path, 'rb') as f: + elf = ELFFile(f) + hash_section = elf.get_section_by_name('.hash') + self.assertIsNotNone(hash_section) + symbol_main = hash_section.get_symbol('main') + self.assertIsNotNone(symbol_main) + self.assertEqual(symbol_main['st_value'], int(0x400790)) + + +class TestGNUHash(unittest.TestCase): + """ Tests for the GNU hash table. + """ + + def test_gnu_hash(self): + """ Verify correctness of GNU hashing function. The expected values + were computed with the C implementation from the glibc source code. + """ + self.assertEqual(GNUHashTable.gnu_hash(''), 0x00001505) + self.assertEqual(GNUHashTable.gnu_hash('main'), 0x7c9a7f6a) + self.assertEqual(GNUHashTable.gnu_hash('printf'), 0x156b2bb8) + self.assertEqual(GNUHashTable.gnu_hash('exit'), 0x7c967e3f) + self.assertEqual(GNUHashTable.gnu_hash(u'ïó®123'), 0x8025a693) + self.assertEqual(GNUHashTable.gnu_hash(b'\xe4\xbd\xa0\xe5\xa5\xbd'), + 0x296eec2d) + + def test_get_number_of_syms(self): + """ Verify we can get get the number of symbols from a GNU hash + section. + """ + + with open(os.path.join('test', 'testfiles_for_unittests', + 'lib_versioned64.so.1.elf'), 'rb') as f: + elf = ELFFile(f) + hash_section = elf.get_section_by_name('.gnu.hash') + self.assertIsNotNone(hash_section) + self.assertEqual(hash_section.get_number_of_symbols(), 24) + + def test_get_symbol(self): + """ Verify we can get a specific symbol from a GNU hash section. + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'lib_versioned64.so.1.elf'), 'rb') as f: + elf = ELFFile(f) + hash_section = elf.get_section_by_name('.gnu.hash') + self.assertIsNotNone(hash_section) + symbol_f1 = hash_section.get_symbol('function1_ver1_1') + self.assertIsNotNone(symbol_f1) + self.assertEqual(symbol_f1['st_value'], int(0x9a2)) + + def test_get_symbol_big_endian(self): + """ Verify we can get a specific symbol from a GNU hash section in a + big-endian file. + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'aarch64_be_gnu_hash.so.elf'), 'rb') as f: + elf = ELFFile(f) + self.assertFalse(elf.little_endian) + hash_section = elf.get_section_by_name('.gnu.hash') + self.assertIsNotNone(hash_section) + symbol_f1 = hash_section.get_symbol('caller') + self.assertIsNotNone(symbol_f1) + self.assertEqual(symbol_f1['st_value'], int(0x5a4)) diff --git a/test/test_pubtypes.py b/test/test_pubtypes.py new file mode 100644 index 00000000..ed7c6a16 --- /dev/null +++ b/test/test_pubtypes.py @@ -0,0 +1,25 @@ +#------------------------------------------------------------------------------- +# elftools tests +# +# Efimov Vasiliy (real@ispras.ru) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import unittest + +from elftools.elf.elffile import ELFFile + + +class TestEmptyPubtypes(unittest.TestCase): + def test_empty_pubtypes(self): + test_dir = os.path.join('test', 'testfiles_for_unittests') + with open(os.path.join(test_dir, 'empty_pubtypes', 'main.elf'), 'rb') as f: + elf = ELFFile(f) + + # This test targets `ELFParseError` caused by buggy handling + # of ".debug_pubtypes" section which only has zero terminator + # entry. + self.assertEqual(len(elf.get_dwarf_info().get_pubtypes()), 0) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_refaddr_bitness.py b/test/test_refaddr_bitness.py new file mode 100644 index 00000000..85fc56d7 --- /dev/null +++ b/test/test_refaddr_bitness.py @@ -0,0 +1,62 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Seva Alekseyev (sevaa@sprynet.com) +# This code is in the public domain +# +# The error that motivated this fix was in an iOS binary in Mach-O format. It +# had v2 DWARF data, but it was targeting a 64 bit architecture. Before the fix, +# pyelftools would assume that DW_FORM_ref_addr attribute took 4 bytes and +# misparse the DWARF data in the binary. +# +# Since pyelftools doesn't work with Mach-O files, I've taken a sample binary +# apart, and saved the three relevant sections - info, abbrev, and str as flat +# files. The metadata (the fact that it's targeting ARM64) is hard-coded, since +# the Mach-O header isn't preserved. +#------------------------------------------------------------------------------ + +import unittest +import os, sys, io + +from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig + +class TestRefAddrOnDWARFv2With64BitTarget(unittest.TestCase): + def test_main(self): + # Read the three saved sections as bytestreams + with open(os.path.join('test', 'testfiles_for_unittests', 'arm64_on_dwarfv2.info.dat'), 'rb') as f: + info = f.read() + with open(os.path.join('test', 'testfiles_for_unittests', 'arm64_on_dwarfv2.abbrev.dat'), 'rb') as f: + abbrev = f.read() + with open(os.path.join('test', 'testfiles_for_unittests', 'arm64_on_dwarfv2.str.dat'), 'rb') as f: + str = f.read() + + # Parse the DWARF info + di = DWARFInfo( + config = DwarfConfig(little_endian = True, default_address_size = 8, machine_arch = "ARM64"), + debug_info_sec = DebugSectionDescriptor(io.BytesIO(info), '__debug_info', None, len(info), 0), + debug_aranges_sec = None, + debug_abbrev_sec = DebugSectionDescriptor(io.BytesIO(abbrev), '__debug_abbrev', None, len(abbrev), 0), + debug_frame_sec = None, + eh_frame_sec = None, + debug_str_sec = DebugSectionDescriptor(io.BytesIO(str), '__debug_str', None, len(str), 0), + debug_loc_sec = None, + debug_ranges_sec = None, + debug_line_sec = None, + debug_pubtypes_sec = None, + debug_pubnames_sec = None + ) + + CUs = [cu for cu in di.iter_CUs()] + # Locate a CU that I know has a reference in DW_FORM_ref_addr form + CU = CUs[21] + self.assertEqual(CU['version'], 2) + # Make sure pyelftools appreciates the difference between the target address size and DWARF inter-DIE offset size + self.assertEqual(CU.structs.dwarf_format, 32) + self.assertEqual(CU['address_size'], 8) + DIEs = [die for die in CU.iter_DIEs()] + # Before the patch, DIE #2 is misparsed, the current offset is off, the rest are misparsed too + self.assertEqual(len(DIEs), 15) + # It was 9 before the patch, which was wrong. + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_relocations.py b/test/test_relocations.py new file mode 100644 index 00000000..f1c8f107 --- /dev/null +++ b/test/test_relocations.py @@ -0,0 +1,48 @@ +import os +import sys +import unittest + +from elftools.common.py3compat import BytesIO +from elftools.elf.elffile import ELFFile +from elftools.elf.dynamic import DynamicSegment, DynamicSection + + +class TestRelocation(unittest.TestCase): + def test_dynamic_segment(self): + """Verify that we can process relocations on the PT_DYNAMIC segment without section headers""" + + test_dir = os.path.join('test', 'testfiles_for_unittests') + with open(os.path.join(test_dir, 'x64_bad_sections.elf'), 'rb') as f: + elff = ELFFile(f) + + for seg in elff.iter_segments(): + if isinstance(seg, DynamicSegment): + relos = seg.get_relocation_tables() + self.assertEqual(set(relos), {'JMPREL', 'RELA'}) + + def test_dynamic_section(self): + """Verify that we can parse relocations from the .dynamic section""" + + test_dir = os.path.join('test', 'testfiles_for_unittests') + with open(os.path.join(test_dir, 'sample_exe64.elf'), 'rb') as f: + elff = ELFFile(f) + + for sect in elff.iter_sections(): + if isinstance(sect, DynamicSection): + relos = sect.get_relocation_tables() + self.assertEqual(set(relos), {'JMPREL', 'RELA'}) + + def test_dynamic_section_solaris(self): + """Verify that we can parse relocations from the .dynamic section""" + + test_dir = os.path.join('test', 'testfiles_for_unittests') + with open(os.path.join(test_dir, 'exe_solaris32_cc.elf'), 'rb') as f: + elff = ELFFile(f) + + for sect in elff.iter_sections(): + if isinstance(sect, DynamicSection): + relos = sect.get_relocation_tables() + self.assertEqual(set(relos), {'JMPREL', 'REL'}) + +if __name__ == '__main__': + unittest.main() diff --git a/test/testfiles_for_location_info/test-dwarf2.o b/test/testfiles_for_location_info/test-dwarf2.o new file mode 100755 index 00000000..9bc2a280 Binary files /dev/null and b/test/testfiles_for_location_info/test-dwarf2.o differ diff --git a/test/testfiles_for_location_info/test-dwarf4.o b/test/testfiles_for_location_info/test-dwarf4.o new file mode 100755 index 00000000..187ce70f Binary files /dev/null and b/test/testfiles_for_location_info/test-dwarf4.o differ diff --git a/test/testfiles_for_readelf/aarch64-relocs-le.o.elf b/test/testfiles_for_readelf/aarch64-relocs-le.o.elf new file mode 100644 index 00000000..3f74d229 Binary files /dev/null and b/test/testfiles_for_readelf/aarch64-relocs-le.o.elf differ diff --git a/test/testfiles_for_readelf/aarch64-relocs.c b/test/testfiles_for_readelf/aarch64-relocs.c new file mode 100644 index 00000000..c78a5629 --- /dev/null +++ b/test/testfiles_for_readelf/aarch64-relocs.c @@ -0,0 +1,16 @@ +/* This source was compiled for aarch64 (little endian). + aarch64-linux-gnu-gcc -c -o aarch64-relocs-le.o.elf aarch64-relocs.c -g +*/ + +extern struct { + int i, j; +} data; + +extern int bar (void); + +int +foo (int a) +{ + data.i += a; + data.j -= bar(); +} diff --git a/test/testfiles_for_readelf/angr-eh_frame.elf b/test/testfiles_for_readelf/angr-eh_frame.elf new file mode 100644 index 00000000..f6514ad0 Binary files /dev/null and b/test/testfiles_for_readelf/angr-eh_frame.elf differ diff --git a/test/testfiles_for_readelf/dt_flags.elf b/test/testfiles_for_readelf/dt_flags.elf new file mode 100644 index 00000000..17574dd0 Binary files /dev/null and b/test/testfiles_for_readelf/dt_flags.elf differ diff --git a/test/testfiles_for_readelf/dwarf_gnuops4.so.elf b/test/testfiles_for_readelf/dwarf_gnuops4.so.elf new file mode 100644 index 00000000..d9ffe9b3 Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_gnuops4.so.elf differ diff --git a/test/testfiles_for_readelf/many_sections.o.elf b/test/testfiles_for_readelf/many_sections.o.elf new file mode 100644 index 00000000..f51fd3eb Binary files /dev/null and b/test/testfiles_for_readelf/many_sections.o.elf differ diff --git a/test/testfiles_for_readelf/mips64-relocs-be.o.elf b/test/testfiles_for_readelf/mips64-relocs-be.o.elf new file mode 100644 index 00000000..fb6d587f Binary files /dev/null and b/test/testfiles_for_readelf/mips64-relocs-be.o.elf differ diff --git a/test/testfiles_for_readelf/mips64-relocs-le.o.elf b/test/testfiles_for_readelf/mips64-relocs-le.o.elf new file mode 100644 index 00000000..028d4137 Binary files /dev/null and b/test/testfiles_for_readelf/mips64-relocs-le.o.elf differ diff --git a/test/testfiles_for_readelf/mips64-relocs.c b/test/testfiles_for_readelf/mips64-relocs.c new file mode 100644 index 00000000..707b625d --- /dev/null +++ b/test/testfiles_for_readelf/mips64-relocs.c @@ -0,0 +1,19 @@ +/* This source was compiled for MIPS64 (big endian) and MIPS64EL (little + endial): + + mips64-unknown-linux-gnu-gcc -c mips64-relocs.c -o mips64-relocs-be.o.elf -mabi=64 + mips64el-unknown-linux-gnu-gcc -c mips64-relocs.c -o mips64-relocs-le.o.elf -mabi=64 +*/ + +extern struct { + int i, j; +} data; + +extern int bar (void); + +int +foo (int a) +{ + data.i += a; + data.j -= bar(); +} diff --git a/test/testfiles_for_readelf/tls.c b/test/testfiles_for_readelf/tls.c new file mode 100644 index 00000000..781bf99c --- /dev/null +++ b/test/testfiles_for_readelf/tls.c @@ -0,0 +1,8 @@ +// Compile into tls.elf using: +// $ gcc -m32 -o tls.elf tls.c +// For tls64.elf, use: +// $ gcc -m64 -o tls64.elf tls.c + +__thread int i; + +int main(){} diff --git a/test/testfiles_for_readelf/tls.elf b/test/testfiles_for_readelf/tls.elf new file mode 100755 index 00000000..8c9ce8f4 Binary files /dev/null and b/test/testfiles_for_readelf/tls.elf differ diff --git a/test/testfiles_for_readelf/tls64.elf b/test/testfiles_for_readelf/tls64.elf new file mode 100755 index 00000000..ef775387 Binary files /dev/null and b/test/testfiles_for_readelf/tls64.elf differ diff --git a/test/testfiles_for_unittests/aarch64_be_gnu_hash.so.elf b/test/testfiles_for_unittests/aarch64_be_gnu_hash.so.elf new file mode 100755 index 00000000..ed368449 Binary files /dev/null and b/test/testfiles_for_unittests/aarch64_be_gnu_hash.so.elf differ diff --git a/test/testfiles_for_unittests/aranges_absent.elf b/test/testfiles_for_unittests/aranges_absent.elf new file mode 100755 index 00000000..5672d2d1 Binary files /dev/null and b/test/testfiles_for_unittests/aranges_absent.elf differ diff --git a/test/testfiles_for_unittests/aranges_complete.elf b/test/testfiles_for_unittests/aranges_complete.elf new file mode 100755 index 00000000..6767fa72 Binary files /dev/null and b/test/testfiles_for_unittests/aranges_complete.elf differ diff --git a/test/testfiles_for_unittests/aranges_partial.elf b/test/testfiles_for_unittests/aranges_partial.elf new file mode 100755 index 00000000..6ed3ea6b Binary files /dev/null and b/test/testfiles_for_unittests/aranges_partial.elf differ diff --git a/test/testfiles_for_unittests/aranges_partial_a.c b/test/testfiles_for_unittests/aranges_partial_a.c new file mode 100644 index 00000000..47ae2bcc --- /dev/null +++ b/test/testfiles_for_unittests/aranges_partial_a.c @@ -0,0 +1,20 @@ +/* +clang -g -c aranges_partial_a.c +clang -g -gdwarf-aranges -c aranges_partial_b.c +clang -g aranges_partial_{a,b}.o -o aranges_partial.elf + +clang -g -gdwarf-aranges -c aranges_partial_a.c +clang -g -gdwarf-aranges -c aranges_partial_b.c +clang -g aranges_partial_{a,b}.o -o aranges_complete.elf + +clang -g -c aranges_partial_a.c +clang -g -c aranges_partial_b.c +clang -g aranges_partial_{a,b}.o -o aranges_absent.elf +*/ + +extern int test(); + +int main() { + int a = test(); + return a; +} diff --git a/test/testfiles_for_unittests/aranges_partial_b.c b/test/testfiles_for_unittests/aranges_partial_b.c new file mode 100644 index 00000000..496a99d1 --- /dev/null +++ b/test/testfiles_for_unittests/aranges_partial_b.c @@ -0,0 +1,4 @@ +int test() { + int a = 0; + return a; +} diff --git a/test/testfiles_for_unittests/arm64_on_dwarfv2.abbrev.dat b/test/testfiles_for_unittests/arm64_on_dwarfv2.abbrev.dat new file mode 100644 index 00000000..16b2768f Binary files /dev/null and b/test/testfiles_for_unittests/arm64_on_dwarfv2.abbrev.dat differ diff --git a/test/testfiles_for_unittests/arm64_on_dwarfv2.info.dat b/test/testfiles_for_unittests/arm64_on_dwarfv2.info.dat new file mode 100644 index 00000000..b87d3b96 Binary files /dev/null and b/test/testfiles_for_unittests/arm64_on_dwarfv2.info.dat differ diff --git a/test/testfiles_for_unittests/arm64_on_dwarfv2.str.dat b/test/testfiles_for_unittests/arm64_on_dwarfv2.str.dat new file mode 100644 index 00000000..3bea27a7 Binary files /dev/null and b/test/testfiles_for_unittests/arm64_on_dwarfv2.str.dat differ diff --git a/test/testfiles_for_unittests/arm_exidx_test.cpp b/test/testfiles_for_unittests/arm_exidx_test.cpp new file mode 100644 index 00000000..4790cc0f --- /dev/null +++ b/test/testfiles_for_unittests/arm_exidx_test.cpp @@ -0,0 +1,23 @@ +#include +#include + +void func1(int i); + +void func2(int i); + +void func1(int i) { + if (i == 0) + return; + func2(i - 1); +} + +void func2(int i) { + if (i == 0) + return; + func1(i - 1); +} + +int main(int argc, char **argv) { + std::string hello = "Hello from C++"; + std::cout << hello << std::endl; +} diff --git a/test/testfiles_for_unittests/arm_exidx_test.elf b/test/testfiles_for_unittests/arm_exidx_test.elf new file mode 100644 index 00000000..94bb535c Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.elf differ diff --git a/test/testfiles_for_unittests/arm_exidx_test.o b/test/testfiles_for_unittests/arm_exidx_test.o new file mode 100644 index 00000000..c13b0037 Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.o differ diff --git a/test/testfiles_for_unittests/arm_exidx_test.so b/test/testfiles_for_unittests/arm_exidx_test.so new file mode 100755 index 00000000..ef45313a Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.so differ diff --git a/test/testfiles_for_unittests/arm_reloc_relocated.elf b/test/testfiles_for_unittests/arm_reloc_relocated.elf new file mode 100755 index 00000000..f1cfbb8d Binary files /dev/null and b/test/testfiles_for_unittests/arm_reloc_relocated.elf differ diff --git a/test/testfiles_for_unittests/arm_reloc_source.c b/test/testfiles_for_unittests/arm_reloc_source.c new file mode 100644 index 00000000..4ea1c67c --- /dev/null +++ b/test/testfiles_for_unittests/arm_reloc_source.c @@ -0,0 +1,41 @@ +/* Generated by compiling with any LLVM version and +** with any GNU Arm Embedded Toolchain version. +** LLVM 3.8.0/5.0.0 and GNU Arm Embedded Toolchain 2.26 is fine. +** +** clang -O0 --target=arm-none-eabi -emit-llvm -c simple.c -o simple.bc +** llc -O0 -march=arm -filetype=obj simple.bc -o reloc_simple_arm_llvm.o +** arm-none-eabi-ld -e main reloc_simple_arm_llvm.o -o simple_arm_llvm.elf +** +** reloc_simple_arm_llvm.o is ELF file that needs call relocation. +** +** simple_arm_llvm.elf is a relocated ELF file. +*/ + +int add(int a, int b) { + return a + b; +} + +int sub(int a, int b) { + return a - b; +} + +int mul(int a, int b) { + return a * b; +} + +void triple(int a, int b) { + add(a, b); + sub(a, b); + mul(a, b); +} + +int main(void) { + int a = 0xABCD, b = 0x1234; + + add(a, b); + sub(a, b); + mul(a, b); + triple(a, b); + + return 0; +} diff --git a/test/testfiles_for_unittests/arm_reloc_unrelocated.o b/test/testfiles_for_unittests/arm_reloc_unrelocated.o new file mode 100644 index 00000000..a1bfbaa8 Binary files /dev/null and b/test/testfiles_for_unittests/arm_reloc_unrelocated.o differ diff --git a/test/testfiles_for_unittests/debug_info.elf b/test/testfiles_for_unittests/debug_info.elf new file mode 100644 index 00000000..502b9202 Binary files /dev/null and b/test/testfiles_for_unittests/debug_info.elf differ diff --git a/test/testfiles_for_unittests/dwarf_gnuops1.o b/test/testfiles_for_unittests/dwarf_gnuops1.o new file mode 100644 index 00000000..d489f64f Binary files /dev/null and b/test/testfiles_for_unittests/dwarf_gnuops1.o differ diff --git a/test/testfiles_for_unittests/empty_pubtypes/Makefile b/test/testfiles_for_unittests/empty_pubtypes/Makefile new file mode 100644 index 00000000..c53964f7 --- /dev/null +++ b/test/testfiles_for_unittests/empty_pubtypes/Makefile @@ -0,0 +1,2 @@ +main.elf: main.c + gcc -gpubnames -g -O0 main.c -o main.elf diff --git a/test/testfiles_for_unittests/empty_pubtypes/main.c b/test/testfiles_for_unittests/empty_pubtypes/main.c new file mode 100644 index 00000000..ab73b3a2 --- /dev/null +++ b/test/testfiles_for_unittests/empty_pubtypes/main.c @@ -0,0 +1 @@ +void main() {} diff --git a/test/testfiles_for_unittests/empty_pubtypes/main.elf b/test/testfiles_for_unittests/empty_pubtypes/main.elf new file mode 100755 index 00000000..9a3c2c13 Binary files /dev/null and b/test/testfiles_for_unittests/empty_pubtypes/main.elf differ diff --git a/test/testfiles_for_unittests/lambda.elf b/test/testfiles_for_unittests/lambda.elf new file mode 100755 index 00000000..d232a051 Binary files /dev/null and b/test/testfiles_for_unittests/lambda.elf differ diff --git a/test/testfiles_for_unittests/pascalenum.o b/test/testfiles_for_unittests/pascalenum.o new file mode 100644 index 00000000..a8520e87 Binary files /dev/null and b/test/testfiles_for_unittests/pascalenum.o differ diff --git a/test/testfiles_for_unittests/simple_mipsel.c b/test/testfiles_for_unittests/simple_mipsel.c new file mode 100644 index 00000000..601434b0 --- /dev/null +++ b/test/testfiles_for_unittests/simple_mipsel.c @@ -0,0 +1,14 @@ +/* Generated by compiling with any GCC version for MIPS Little Endian. +** GCC 5.4.0 is fine. + +/usr/bin/mipsel-linux-gnu-gcc -g -O0 ./simple_mipsel.c -o ./simple_mipsel.elf +*/ + +void main(void) +{ + int a = 1, b = 2, res; + + res = a + b; + + return; +} diff --git a/test/testfiles_for_unittests/simple_mipsel.elf b/test/testfiles_for_unittests/simple_mipsel.elf new file mode 100755 index 00000000..86c25611 Binary files /dev/null and b/test/testfiles_for_unittests/simple_mipsel.elf differ diff --git a/test/testfiles_for_unittests/trailing_null_dies.elf b/test/testfiles_for_unittests/trailing_null_dies.elf new file mode 100644 index 00000000..1bc7f5ee Binary files /dev/null and b/test/testfiles_for_unittests/trailing_null_dies.elf differ diff --git a/test/testfiles_for_unittests/x64_bad_sections.elf b/test/testfiles_for_unittests/x64_bad_sections.elf new file mode 100644 index 00000000..6a30111f Binary files /dev/null and b/test/testfiles_for_unittests/x64_bad_sections.elf differ diff --git a/tox.ini b/tox.ini index 7e9b4846..1c0192f4 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,9 @@ [tox] -envlist = py27,py34,py35 +envlist = py27,py38 [testenv] +setenv = + LC_ALL = en_US.utf-8 commands = python test/run_all_unittests.py python test/run_examples_test.py