blob: c9067168c7f1b32d53623913c1a1222349fa564d [file] [log] [blame]
//===--- BinaryContext.h - Interface for machine-level context -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Context for processing binary executables in files and/or memory.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_BOLT_BINARY_CONTEXT_H
#define LLVM_TOOLS_LLVM_BOLT_BINARY_CONTEXT_H
#include "BinarySection.h"
#include "DebugData.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
#include <functional>
#include <map>
#include <set>
#include <string>
#include <system_error>
#include <unordered_map>
#include <vector>
namespace llvm {
class DWARFDebugInfoEntryMinimal;
using namespace object;
namespace bolt {
class BinaryFunction;
class DataReader;
class BinaryContext {
BinaryContext() = delete;
/// Set of all sections.
using SectionSetType = std::set<BinarySection>;
SectionSetType Sections;
/// Map virtual address to a section. It is possible to have more than one
/// section mapped to the same address, e.g. non-allocatable sections.
using AddressToSectionMapType = std::multimap<uint64_t, BinarySection *>;
AddressToSectionMapType AddressToSection;
/// multimap of section name to BinarySection object. Some binaries
/// have multiple sections with the same name.
using NameToSectionMapType = std::multimap<std::string, BinarySection *>;
NameToSectionMapType NameToSection;
public:
/// [name] -> [address] map used for global symbol resolution.
typedef std::map<std::string, uint64_t> SymbolMapType;
SymbolMapType GlobalSymbols;
/// [address] -> [name1], [name2], ...
/// Global addresses never change.
std::multimap<uint64_t, std::string> GlobalAddresses;
/// [MCSymbol] -> [BinaryFunction]
///
/// As we fold identical functions, multiple symbols can point
/// to the same BinaryFunction.
std::unordered_map<const MCSymbol *,
BinaryFunction *> SymbolToFunctionMap;
/// Map address to a constant island owner (constant data in code section)
std::map<uint64_t, BinaryFunction *> AddressToConstantIslandMap;
/// Set of addresses in the code that are not a function start, and are
/// referenced from outside of containing function. E.g. this could happen
/// when a function has more than a single entry point.
std::set<uint64_t> InterproceduralReferences;
std::unique_ptr<MCContext> Ctx;
std::unique_ptr<DWARFContext> DwCtx;
std::unique_ptr<Triple> TheTriple;
const Target *TheTarget;
std::string TripleName;
std::unique_ptr<MCCodeEmitter> MCE;
std::unique_ptr<MCObjectFileInfo> MOFI;
std::unique_ptr<const MCAsmInfo> AsmInfo;
std::unique_ptr<const MCInstrInfo> MII;
std::unique_ptr<const MCSubtargetInfo> STI;
std::unique_ptr<MCInstPrinter> InstPrinter;
std::unique_ptr<const MCInstrAnalysis> MIA;
std::unique_ptr<const MCRegisterInfo> MRI;
std::unique_ptr<MCDisassembler> DisAsm;
std::unique_ptr<MCAsmBackend> MAB;
std::function<void(std::error_code)> ErrorCheck;
DataReader &DR;
/// Indicates if relocations are availabe for usage.
bool HasRelocations{false};
/// Sum of execution count of all functions
uint64_t SumExecutionCount{0};
/// Number of functions with profile information
uint64_t NumProfiledFuncs{0};
/// Total hotness score according to profiling data for this binary.
uint64_t TotalScore{0};
/// Track next available address for new allocatable sections. RewriteInstance
/// sets this prior to running BOLT passes, so layout passes are aware of the
/// final addresses functions will have.
uint64_t LayoutStartAddress{0};
/// Old .text info.
uint64_t OldTextSectionAddress{0};
uint64_t OldTextSectionOffset{0};
uint64_t OldTextSectionSize{0};
/// True if the binary requires immediate relocation processing.
bool RequiresZNow{false};
BinaryContext(std::unique_ptr<MCContext> Ctx,
std::unique_ptr<DWARFContext> DwCtx,
std::unique_ptr<Triple> TheTriple,
const Target *TheTarget,
std::string TripleName,
std::unique_ptr<MCCodeEmitter> MCE,
std::unique_ptr<MCObjectFileInfo> MOFI,
std::unique_ptr<const MCAsmInfo> AsmInfo,
std::unique_ptr<const MCInstrInfo> MII,
std::unique_ptr<const MCSubtargetInfo> STI,
std::unique_ptr<MCInstPrinter> InstPrinter,
std::unique_ptr<const MCInstrAnalysis> MIA,
std::unique_ptr<const MCRegisterInfo> MRI,
std::unique_ptr<MCDisassembler> DisAsm,
DataReader &DR) :
Ctx(std::move(Ctx)),
DwCtx(std::move(DwCtx)),
TheTriple(std::move(TheTriple)),
TheTarget(TheTarget),
TripleName(TripleName),
MCE(std::move(MCE)),
MOFI(std::move(MOFI)),
AsmInfo(std::move(AsmInfo)),
MII(std::move(MII)),
STI(std::move(STI)),
InstPrinter(std::move(InstPrinter)),
MIA(std::move(MIA)),
MRI(std::move(MRI)),
DisAsm(std::move(DisAsm)),
DR(DR) {
Relocation::Arch = this->TheTriple->getArch();
}
~BinaryContext();
std::unique_ptr<MCObjectWriter> createObjectWriter(raw_pwrite_stream &OS);
/// Return a global symbol registered at a given \p Address. If no symbol
/// exists, create one with unique name using \p Prefix.
/// If there are multiple symbols registered at the \p Address, then
/// return the first one.
MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix);
/// Return MCSymbol registered at a given \p Address or nullptr if no
/// global symbol was registered at the location.
MCSymbol *getGlobalSymbolAtAddress(uint64_t Address) const;
/// Find the address of the global symbol with the given \p Name.
/// return an error if no such symbol exists.
ErrorOr<uint64_t> getAddressForGlobalSymbol(StringRef Name) const {
auto Itr = GlobalSymbols.find(Name);
if (Itr != GlobalSymbols.end())
return Itr->second;
return std::make_error_code(std::errc::bad_address);
}
/// Return MCSymbol for the given \p Name or nullptr if no
/// global symbol with that name exists.
MCSymbol *getGlobalSymbolByName(const std::string &Name) const;
/// Print the global symbol table.
void printGlobalSymbols(raw_ostream& OS) const;
/// Get the raw bytes for a given function.
ErrorOr<ArrayRef<uint8_t>>
getFunctionData(const BinaryFunction &Function) const;
/// Register information about the given section so we can look up
/// sections for addresses.
BinarySection &registerSection(SectionRef Section);
iterator_range<SectionSetType::iterator> sections() {
return make_range(Sections.begin(), Sections.end());
}
iterator_range<SectionSetType::const_iterator> sections() const {
return make_range(Sections.begin(), Sections.end());
}
/// Return largest section containing the given \p Address. These
/// functions only work for allocatable sections, i.e. ones with non-zero
/// addresses.
ErrorOr<BinarySection &> getSectionForAddress(uint64_t Address);
ErrorOr<const BinarySection &> getSectionForAddress(uint64_t Address) const;
/// Return section(s) associated with given \p Name.
iterator_range<NameToSectionMapType::iterator>
getSectionByName(StringRef Name) {
return make_range(NameToSection.equal_range(Name));
}
iterator_range<NameToSectionMapType::const_iterator>
getSectionByName(StringRef Name) const {
return make_range(NameToSection.equal_range(Name));
}
/// Return the unique (allocatable) section associated with given \p Name.
/// If there is more than one section with the same name, return an error
/// object.
ErrorOr<BinarySection &> getUniqueSectionByName(StringRef SectionName) {
auto Sections = getSectionByName(SectionName);
if (Sections.begin() != Sections.end() &&
std::next(Sections.begin()) == Sections.end())
return *Sections.begin()->second;
return std::make_error_code(std::errc::bad_address);
}
ErrorOr<const BinarySection &>
getUniqueSectionByName(StringRef SectionName) const {
auto Sections = getSectionByName(SectionName);
if (Sections.begin() != Sections.end() &&
std::next(Sections.begin()) == Sections.end())
return *Sections.begin()->second;
return std::make_error_code(std::errc::bad_address);
}
/// Given \p Address in the binary, extract and return a pointer value at that
/// address. The address has to be a valid statically allocated address for
/// the binary.
ErrorOr<uint64_t> extractPointerAtAddress(uint64_t Address) const;
/// Register a symbol with \p Name at a given \p Address.
MCSymbol *registerNameAtAddress(const std::string &Name, uint64_t Address) {
// Check if the Name was already registered.
const auto GSI = GlobalSymbols.find(Name);
if (GSI != GlobalSymbols.end()) {
assert(GSI->second == Address && "addresses do not match");
auto *Symbol = Ctx->lookupSymbol(Name);
assert(Symbol && "symbol should be registered with MCContext");
return Symbol;
}
// Add the name to global symbols map.
GlobalSymbols[Name] = Address;
// Add to the reverse map. There could multiple names at the same address.
GlobalAddresses.emplace(std::make_pair(Address, Name));
// Register the name with MCContext.
return Ctx->getOrCreateSymbol(Name);
}
/// Replaces all references to \p ChildBF with \p ParentBF. \p ChildBF is then
/// removed from the list of functions \p BFs. The profile data of \p ChildBF
/// is merged into that of \p ParentBF.
void foldFunction(BinaryFunction &ChildBF,
BinaryFunction &ParentBF,
std::map<uint64_t, BinaryFunction> &BFs);
/// Add relocation for \p Section at a given \p Offset.
void addSectionRelocation(BinarySection &Section, uint64_t Offset,
MCSymbol *Symbol, uint64_t Type,
uint64_t Addend = 0);
/// Add a relocation at a given \p Address.
void addRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type,
uint64_t Addend = 0);
/// Remove registered relocation at a given \p Address.
void removeRelocationAt(uint64_t Address);
/// Return a relocation registered at a given \p Address, or nullptr if there
/// is no relocation at such address.
const Relocation *getRelocationAt(uint64_t Address);
const BinaryFunction *getFunctionForSymbol(const MCSymbol *Symbol) const {
auto BFI = SymbolToFunctionMap.find(Symbol);
return BFI == SymbolToFunctionMap.end() ? nullptr : BFI->second;
}
BinaryFunction *getFunctionForSymbol(const MCSymbol *Symbol) {
auto BFI = SymbolToFunctionMap.find(Symbol);
return BFI == SymbolToFunctionMap.end() ? nullptr : BFI->second;
}
/// Populate some internal data structures with debug info.
void preprocessDebugInfo(
std::map<uint64_t, BinaryFunction> &BinaryFunctions);
/// Add a filename entry from SrcCUID to DestCUID.
unsigned addDebugFilenameToUnit(const uint32_t DestCUID,
const uint32_t SrcCUID,
unsigned FileIndex);
/// Return functions in output layout order
static std::vector<BinaryFunction *>
getSortedFunctions(std::map<uint64_t, BinaryFunction> &BinaryFunctions);
/// Compute the native code size for a range of instructions.
/// Note: this can be imprecise wrt the final binary since happening prior to
/// relaxation, as well as wrt the original binary because of opcode
/// shortening.
template <typename Itr>
uint64_t computeCodeSize(Itr Beg, Itr End) const {
uint64_t Size = 0;
while (Beg != End) {
// Calculate the size of the instruction.
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
if (MIA->isCFI(*Beg) || MIA->isEHLabel(*Beg)) {
++Beg;
continue;
}
MCE->encodeInstruction(*Beg++, VecOS, Fixups, *STI);
Size += Code.size();
}
return Size;
}
/// Return a function execution count threshold for determining whether
/// the function is 'hot'. Consider it hot if count is above the average exec
/// count of profiled functions.
uint64_t getHotThreshold() const {
static uint64_t Threshold{0};
if (Threshold == 0) {
Threshold =
NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1;
}
return Threshold;
}
/// Return true if instruction \p Inst requires an offset for further
/// processing (e.g. assigning a profile).
bool keepOffsetForInstruction(const MCInst &Inst) const {
if (MIA->isCall(Inst) || MIA->isBranch(Inst) || MIA->isReturn(Inst) ||
MIA->isPrefix(Inst) || MIA->isIndirectBranch(Inst)) {
return true;
}
return false;
}
/// Print the string name for a CFI operation.
static void printCFI(raw_ostream &OS, const MCCFIInstruction &Inst);
/// Print a single MCInst in native format. If Function is non-null,
/// the instruction will be annotated with CFI and possibly DWARF line table
/// info.
/// If printMCInst is true, the instruction is also printed in the
/// architecture independent format.
void printInstruction(raw_ostream &OS,
const MCInst &Instruction,
uint64_t Offset = 0,
const BinaryFunction *Function = nullptr,
bool PrintMCInst = false,
bool PrintMemData = false,
bool PrintRelocations = false) const;
/// Print a range of instructions.
template <typename Itr>
uint64_t printInstructions(raw_ostream &OS,
Itr Begin,
Itr End,
uint64_t Offset = 0,
const BinaryFunction *Function = nullptr,
bool PrintMCInst = false,
bool PrintMemData = false,
bool PrintRelocations = false) const {
while (Begin != End) {
printInstruction(OS, *Begin, Offset, Function, PrintMCInst,
PrintMemData, PrintRelocations);
Offset += computeCodeSize(Begin, Begin + 1);
++Begin;
}
return Offset;
}
};
} // namespace bolt
} // namespace llvm
#endif