| //===--- RewriteInstance.h - Interface for machine-level function ---------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Interface to control an instance of a binary rewriting process. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_TOOLS_LLVM_BOLT_REWRITE_INSTANCE_H |
| #define LLVM_TOOLS_LLVM_BOLT_REWRITE_INSTANCE_H |
| |
| #include "BinaryFunction.h" |
| #include "DebugData.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" |
| #include "llvm/ExecutionEngine/SectionMemoryManager.h" |
| #include "llvm/MC/StringTableBuilder.h" |
| #include "llvm/Object/ELFObjectFile.h" |
| #include "llvm/Object/ObjectFile.h" |
| #include "llvm/Support/StringPool.h" |
| #include <map> |
| #include <set> |
| |
| namespace llvm { |
| |
| class DWARFContext; |
| class DWARFFrame; |
| class ToolOutputFile; |
| |
| namespace bolt { |
| |
| class BinaryContext; |
| class CFIReaderWriter; |
| class DataAggregator; |
| class DataReader; |
| |
| /// Section information for mapping and re-writing. |
| struct SectionInfo { |
| uint64_t AllocAddress{0}; /// Current location of the section in memory. |
| uint64_t Size{0}; /// Section size. |
| unsigned Alignment{0}; /// Alignment of the section. |
| bool IsCode{false}; /// Does this section contain code? |
| bool IsReadOnly{false}; /// Is the section read-only? |
| bool IsLocal{false}; /// Is this section local to a function, and |
| /// should only be emitted with the function? |
| bool IsStrTab{false}; /// Is this a string table section. |
| uint64_t FileAddress{0}; /// Address for the output file (final address). |
| uint64_t FileOffset{0}; /// Offset in the output file. |
| unsigned SectionID{0}; /// Unique ID used for address mapping. |
| bool IsELFNote{false}; /// Is ELF note section? |
| |
| struct Reloc { |
| uint32_t Offset; |
| uint8_t Size; |
| uint8_t Type; // unused atm |
| uint32_t Value; |
| }; |
| |
| /// Pending relocations for the section. |
| std::vector<Reloc> PendingRelocs; |
| |
| SectionInfo(uint64_t Address, uint64_t Size, unsigned Alignment, bool IsCode, |
| bool IsReadOnly, bool IsLocal, uint64_t FileAddress = 0, |
| uint64_t FileOffset = 0, unsigned SectionID = 0, |
| bool IsELFNote = false) |
| |
| : AllocAddress(Address), Size(Size), Alignment(Alignment), IsCode(IsCode), |
| IsReadOnly(IsReadOnly), IsLocal(IsLocal), FileAddress(FileAddress), |
| FileOffset(FileOffset), SectionID(SectionID), IsELFNote(IsELFNote) {} |
| |
| SectionInfo() {} |
| }; |
| |
| struct SegmentInfo { |
| uint64_t Address; /// Address of the segment in memory. |
| uint64_t Size; /// Size of the segment in memory. |
| uint64_t FileOffset; /// Offset in the file. |
| uint64_t FileSize; /// Size in file. |
| |
| void print(raw_ostream &OS) const { |
| OS << "SegmentInfo { Address: 0x" |
| << Twine::utohexstr(Address) << ", Size: 0x" |
| << Twine::utohexstr(Size) << ", FileOffset: 0x" |
| << Twine::utohexstr(FileOffset) << ", FileSize: 0x" |
| << Twine::utohexstr(FileSize) << "}"; |
| }; |
| }; |
| |
| inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) { |
| SegInfo.print(OS); |
| return OS; |
| } |
| |
| /// Class responsible for allocating and managing code and data sections. |
| class ExecutableFileMemoryManager : public SectionMemoryManager { |
| private: |
| uint8_t *allocateSection(intptr_t Size, |
| unsigned Alignment, |
| unsigned SectionID, |
| StringRef SectionName, |
| bool IsCode, |
| bool IsReadOnly); |
| |
| bool AllowStubs; |
| |
| public: |
| /// [start memory address] -> [segment info] mapping. |
| std::map<uint64_t, SegmentInfo> SegmentMapInfo; |
| |
| /// Keep [section name] -> [section info] map for later remapping. |
| std::map<std::string, SectionInfo> SectionMapInfo; |
| |
| /// Information about non-allocatable sections. |
| std::map<std::string, SectionInfo> NoteSectionInfo; |
| |
| ExecutableFileMemoryManager(bool AllowStubs) : AllowStubs(AllowStubs) {} |
| |
| ~ExecutableFileMemoryManager(); |
| |
| uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, |
| unsigned SectionID, |
| StringRef SectionName) override { |
| return allocateSection(Size, Alignment, SectionID, SectionName, |
| /*IsCode=*/true, true); |
| } |
| |
| uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, |
| unsigned SectionID, StringRef SectionName, |
| bool IsReadOnly) override { |
| return allocateSection(Size, Alignment, SectionID, SectionName, |
| /*IsCode=*/false, IsReadOnly); |
| } |
| |
| uint8_t *recordNoteSection(const uint8_t *Data, uintptr_t Size, |
| unsigned Alignment, unsigned SectionID, |
| StringRef SectionName) override; |
| |
| bool allowStubAllocation() const override { return AllowStubs; } |
| |
| bool finalizeMemory(std::string *ErrMsg = nullptr) override; |
| }; |
| |
| /// This class encapsulates all data necessary to carry on binary reading, |
| /// disassembly, CFG building, BB reordering (among other binary-level |
| /// optimizations) and rewriting. It also has the logic to coordinate such |
| /// events. |
| class RewriteInstance { |
| public: |
| RewriteInstance(llvm::object::ELFObjectFileBase *File, DataReader &DR, |
| DataAggregator &DA, const int Argc, const char *const *Argv); |
| ~RewriteInstance(); |
| |
| /// Reset all state except for split hints. Used to run a second pass with |
| /// function splitting information. |
| void reset(); |
| |
| /// Run all the necessary steps to read, optimize and rewrite the binary. |
| void run(); |
| |
| /// Check that binary build ID matches the one used in perf.data to collect |
| /// profile |
| void checkBuildID(); |
| |
| /// Populate array of binary functions and other objects of interest |
| /// from meta data in the file. |
| void discoverFileObjects(); |
| |
| /// Read info from special sections. E.g. eh_frame and .gcc_except_table |
| /// for exception and stack unwinding information. |
| void readSpecialSections(); |
| |
| /// Read relocations from a given section. |
| void readRelocations(const object::SectionRef &Section); |
| |
| /// Read information from debug sections. |
| void readDebugInfo(); |
| |
| /// Associate profile data with binary objects. |
| void processProfileData(); |
| |
| /// Disassemble each function in the binary and associate it with a |
| /// BinaryFunction object, preparing all information necessary for binary |
| /// optimization. |
| void disassembleFunctions(); |
| |
| void postProcessFunctions(); |
| |
| /// Run optimizations that operate at the binary, or post-linker, level. |
| void runOptimizationPasses(); |
| |
| /// Write all functions to an intermediary object file, map virtual to real |
| /// addresses and link this object file, resolving all relocations and |
| /// performing final relaxation. |
| void emitFunctions(); |
| |
| /// Emit data \p Section, possibly with relocations. Use name \p Name if |
| /// non-empty. |
| void emitDataSection(MCStreamer *Streamer, |
| const BinarySection &Section, |
| std::string Name = ""); |
| |
| /// Emit data sections that have code references in them. |
| void emitDataSections(MCStreamer *Streamer); |
| |
| /// Update debug information in the file for re-written code. |
| void updateDebugInfo(); |
| |
| /// Recursively update debug info for all DIEs in \p Unit. |
| /// If \p Function is not empty, it points to a function corresponding |
| /// to a parent DW_TAG_subprogram node of the current \p DIE. |
| void updateUnitDebugInfo(const DWARFDie DIE, |
| std::vector<const BinaryFunction *> FunctionStack); |
| |
| /// Map all sections to their final addresses. |
| void |
| mapFileSections(orc::RTDyldObjectLinkingLayer::ObjHandleT &ObjectsHandle); |
| |
| /// Update output object's values based on the final \p Layout. |
| void updateOutputValues(const MCAsmLayout &Layout); |
| |
| /// Check which functions became larger than their original version and |
| /// annotate function splitting information. |
| /// |
| /// Returns true if any function was annotated, requiring us to perform a |
| /// second pass to emit those functions in two parts. |
| bool checkLargeFunctions(); |
| |
| /// Updates debug line information for non-simple functions, which are not |
| /// rewritten. |
| void updateDebugLineInfoForNonSimpleFunctions(); |
| |
| /// Rewrite back all functions (hopefully optimized) that fit in the original |
| /// memory footprint for that function. If the function is now larger and does |
| /// not fit in the binary, reject it and preserve the original version of the |
| /// function. If we couldn't understand the function for some reason in |
| /// disassembleFunctions(), also preserve the original version. |
| void rewriteFile(); |
| |
| /// Return address of a function in the new binary corresponding to |
| /// \p OldAddress address in the original binary. |
| uint64_t getNewFunctionAddress(uint64_t OldAddress); |
| |
| /// Return value for the symbol \p Name in the output. |
| uint64_t getNewValueForSymbol(const StringRef Name) { |
| return cantFail(OLT->findSymbol(Name, false).getAddress(), |
| "findSymbol failed"); |
| } |
| |
| /// Return BinaryFunction containing a given \p Address or nullptr if |
| /// no registered function has it. |
| /// |
| /// In a binary a function has somewhat vague boundaries. E.g. a function can |
| /// refer to the first byte past the end of the function, and it will still be |
| /// referring to this function, not the function following it in the address |
| /// space. Thus we have the following flags that allow to lookup for |
| /// a function where a caller has more context for the search. |
| /// |
| /// If \p CheckPastEnd is true and the \p Address falls on a byte |
| /// immediately following the last byte of some function and there's no other |
| /// function that starts there, then return the function as the one containing |
| /// the \p Address. This is useful when we need to locate functions for |
| /// references pointing immediately past a function body. |
| /// |
| /// If \p UseMaxSize is true, then include the space between this function |
| /// body and the next object in address ranges that we check. |
| BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address, |
| bool CheckPastEnd = false, |
| bool UseMaxSize = false); |
| |
| const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const; |
| |
| /// Produce output address ranges based on input ranges for some module. |
| DWARFAddressRangesVector translateModuleAddressRanges( |
| const DWARFAddressRangesVector &InputRanges) const; |
| |
| private: |
| /// Emit a single function. |
| void emitFunction(MCStreamer &Streamer, BinaryFunction &Function, |
| bool EmitColdPart); |
| |
| /// Detect addresses and offsets available in the binary for allocating |
| /// new sections. |
| void discoverStorage(); |
| |
| /// Read binary sections and find a gnu note section with the build-id |
| Optional<std::string> getBuildID(); |
| |
| /// Adjust function sizes and set proper maximum size values after the whole |
| /// symbol table has been processed. |
| void adjustFunctionBoundaries(); |
| |
| /// Make .eh_frame section relocatable. |
| void relocateEHFrameSection(); |
| |
| /// Analyze relocation \p Rel contained in section \p RelocatedSection. |
| /// Return true if the relocation was successfully processed, false otherwise. |
| /// The \p SymbolName, \p SymbolAddress, \p Addend and \p ExtractedValue |
| /// parameters will be set on success. |
| bool analyzeRelocation(const RelocationRef &Rel, |
| SectionRef RelocatedSection, |
| std::string &SymbolName, |
| uint64_t &SymbolAddress, |
| int64_t &Addend, |
| uint64_t &ExtractedValue) const; |
| |
| /// Rewrite non-allocatable sections with modifications. |
| void rewriteNoteSections(); |
| |
| /// Write .eh_frame_hdr. |
| void writeEHFrameHeader(SectionInfo &EHFrameSecInfo); |
| |
| /// Disassemble and create function entries for PLT. |
| void disassemblePLT(); |
| |
| /// ELF-specific part. TODO: refactor into new class. |
| #define ELF_FUNCTION(FUNC) \ |
| template <typename ELFT> void FUNC(ELFObjectFile<ELFT> *Obj); \ |
| void FUNC() { \ |
| if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(InputFile)) \ |
| return FUNC(ELF32LE); \ |
| if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(InputFile)) \ |
| return FUNC(ELF64LE); \ |
| if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(InputFile)) \ |
| return FUNC(ELF32BE); \ |
| auto *ELF64BE = cast<ELF64BEObjectFile>(InputFile); \ |
| return FUNC(ELF64BE); \ |
| } |
| |
| /// Patch ELF book-keeping info. |
| void patchELF(); |
| void patchELFPHDRTable(); |
| |
| /// Create section header table. |
| ELF_FUNCTION(patchELFSectionHeaderTable); |
| |
| /// Create the regular symbol table and patch dyn symbol tables. |
| ELF_FUNCTION(patchELFSymTabs); |
| |
| /// Patch dynamic section/segment of ELF. |
| ELF_FUNCTION(patchELFDynamic); |
| |
| /// Patch .got |
| ELF_FUNCTION(patchELFGOT); |
| |
| /// Patch .rela.plt section. |
| ELF_FUNCTION(patchELFRelaPLT); |
| |
| /// Finalize memory image of section header string table. |
| ELF_FUNCTION(finalizeSectionStringTable); |
| |
| /// Get a list of all the sections to include in the output binary along |
| /// with a map of input to output indices. |
| template <typename ELFT, |
| typename ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr> |
| std::vector<uint32_t> |
| getOutputSections(ELFObjectFile<ELFT> *File, |
| std::vector<ELFShdrTy> *OutputSections); |
| |
| /// Add a notes section containing the BOLT revision and command line options. |
| void addBoltInfoSection(); |
| |
| /// Computes output .debug_line line table offsets for each compile unit, |
| /// and updates stmt_list for a corresponding compile unit. |
| void updateLineTableOffsets(); |
| |
| /// Generate new contents for .debug_ranges and .debug_aranges section. |
| void finalizeDebugSections(); |
| |
| /// Patches the binary for DWARF address ranges (e.g. in functions and lexical |
| /// blocks) to be updated. |
| void updateDWARFAddressRanges(); |
| |
| /// Rewrite .gdb_index section if present. |
| void updateGdbIndexSection(); |
| |
| /// Patches the binary for an object's address ranges to be updated. |
| /// The object can be a anything that has associated address ranges via either |
| /// DW_AT_low/high_pc or DW_AT_ranges (i.e. functions, lexical blocks, etc). |
| /// \p DebugRangesOffset is the offset in .debug_ranges of the object's |
| /// new address ranges in the output binary. |
| /// \p Unit Compile uniit the object belongs to. |
| /// \p DIE is the object's DIE in the input binary. |
| void updateDWARFObjectAddressRanges(const DWARFDie DIE, |
| uint64_t DebugRangesOffset); |
| |
| /// Return file offset corresponding to a given virtual address. |
| uint64_t getFileOffsetFor(uint64_t Address) { |
| assert(Address >= NewTextSegmentAddress && |
| "address in not in the new text segment"); |
| return Address - NewTextSegmentAddress + NewTextSegmentOffset; |
| } |
| |
| /// Return file offset corresponding to a virtual \p Address. |
| /// Return 0 if the address has no mapping in the file, including being |
| /// part of .bss section. |
| uint64_t getFileOffsetForAddress(uint64_t Address) const; |
| |
| /// Return true if we will overwrite contents of the section instead |
| /// of appending contents to it. |
| bool willOverwriteSection(StringRef SectionName); |
| |
| /// Construct BinaryFunction object and add it to internal maps. |
| BinaryFunction *createBinaryFunction(const std::string &Name, |
| BinarySection &Section, |
| uint64_t Address, |
| uint64_t Size, |
| bool IsSimple); |
| |
| public: |
| /// When updating debug info, these are the sections we overwrite. |
| static constexpr const char *SectionsToOverwrite[] = { |
| ".shstrtab", |
| ".symtab", |
| ".strtab", |
| ".debug_aranges", |
| ".debug_line", |
| ".debug_loc", |
| ".debug_ranges", |
| ".gdb_index", |
| }; |
| |
| private: |
| |
| static const char TimerGroupName[]; |
| |
| static const char TimerGroupDesc[]; |
| |
| /// Huge page size used for alignment. |
| static constexpr unsigned PageAlign = 0x200000; |
| |
| /// Alignment value used for .eh_frame_hdr. |
| static constexpr uint64_t EHFrameHdrAlign = 4; |
| |
| /// An instance of the input binary we are processing, externally owned. |
| llvm::object::ELFObjectFileBase *InputFile; |
| |
| /// Command line args used to process binary. |
| const int Argc; |
| const char *const *Argv; |
| |
| /// Holds our data aggregator in case user supplied a raw perf data file |
| DataAggregator &DA; |
| |
| std::unique_ptr<BinaryContext> BC; |
| std::unique_ptr<CFIReaderWriter> CFIRdWrt; |
| |
| /// Memory manager for sections and segments. Used to communicate with ORC |
| /// among other things. |
| std::shared_ptr<ExecutableFileMemoryManager> EFMM; |
| |
| // Run ObjectLinkingLayer() with custom memory manager and symbol resolver. |
| std::unique_ptr<orc::RTDyldObjectLinkingLayer> OLT; |
| |
| /// Output file where we mix original code from the input binary and |
| /// optimized code for selected functions. |
| std::unique_ptr<ToolOutputFile> Out; |
| |
| /// Offset in the input file where non-allocatable sections start. |
| uint64_t FirstNonAllocatableOffset{0}; |
| |
| /// Information about program header table. |
| uint64_t PHDRTableAddress{0}; |
| uint64_t PHDRTableOffset{0}; |
| unsigned Phnum{0}; |
| |
| /// New code segment info. |
| uint64_t NewTextSegmentAddress{0}; |
| uint64_t NewTextSegmentOffset{0}; |
| uint64_t NewTextSegmentSize{0}; |
| |
| /// Track next available address for new allocatable sections. |
| uint64_t NextAvailableAddress{0}; |
| |
| /// Entry point in the file (first instructions to be executed). |
| uint64_t EntryPoint{0}; |
| |
| /// Store all non-zero symbols in this map for a quick address lookup. |
| std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs; |
| |
| /// Store all functions in the binary, sorted by original address. |
| std::map<uint64_t, BinaryFunction> BinaryFunctions; |
| |
| /// Stores and serializes information that will be put into the .debug_ranges |
| /// and .debug_aranges DWARF sections. |
| std::unique_ptr<DebugRangesSectionsWriter> RangesSectionsWriter; |
| |
| std::unique_ptr<DebugLocWriter> LocationListWriter; |
| |
| /// Patchers used to apply simple changes to sections of the input binary. |
| /// Maps section name -> patcher. |
| std::map<std::string, std::unique_ptr<BinaryPatcher>> SectionPatchers; |
| |
| uint64_t NewTextSectionStartAddress{0}; |
| |
| uint64_t NewTextSectionIndex{0}; |
| |
| /// Exception handling and stack unwinding information in this binary. |
| ArrayRef<uint8_t> LSDAData; |
| uint64_t LSDAAddress{0}; |
| const llvm::DWARFDebugFrame *EHFrame{nullptr}; |
| ErrorOr<BinarySection &> EHFrameSection{std::errc::bad_address}; |
| |
| /// .plt section. |
| ErrorOr<BinarySection &> PLTSection{std::errc::bad_address}; |
| |
| /// .got.plt sections. |
| /// |
| /// Contains jump slots (addresses) indirectly referenced by |
| /// instructions in .plt section. |
| ErrorOr<BinarySection &> GOTPLTSection{std::errc::bad_address}; |
| |
| /// .plt.got section (#clowntown). |
| /// |
| /// A section sometimes generated by BFD linker. |
| ErrorOr<BinarySection &> PLTGOTSection{std::errc::bad_address}; |
| |
| /// .rela.plt section. |
| /// |
| /// Contains relocations against .got.plt. |
| ErrorOr<BinarySection &> RelaPLTSection{std::errc::bad_address}; |
| |
| /// .gdb_index section. |
| ErrorOr<BinarySection &> GdbIndexSection{std::errc::bad_address}; |
| |
| uint64_t NewSymTabOffset{0}; |
| |
| /// Keep track of functions we fail to write in the binary. We need to avoid |
| /// rewriting CFI info for these functions. |
| std::vector<uint64_t> FailedAddresses; |
| |
| /// Size of the .debug_loc section in input. |
| uint32_t DebugLocSize{0}; |
| |
| /// Keep track of which functions didn't fit in their original space in the |
| /// last emission, so that we may either decide to split or not optimize them. |
| std::set<uint64_t> LargeFunctions; |
| |
| /// Section header string table. |
| StringTableBuilder SHStrTab; |
| StringPool SHStrTabPool; |
| std::vector<PooledStringPtr> AllSHStrTabStrings; |
| |
| /// A rewrite of strtab |
| std::string NewStrTab; |
| |
| static const std::string OrgSecPrefix; |
| |
| static const std::string BOLTSecPrefix; |
| }; |
| |
| } // namespace bolt |
| } // namespace llvm |
| |
| #endif |