diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index a52998564ee1b..d140202ee295f 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -360,6 +360,11 @@ class BinaryFunction { /// True if the function is used for patching code at a fixed address. bool IsPatch{false}; + /// True if the original entry point of the function may get called, but the + /// original body cannot be executed and needs to be patched with code that + /// redirects execution to the new function body. + bool NeedsPatch{false}; + /// True if the function should not have an associated symbol table entry. bool IsAnonymous{false}; @@ -1372,6 +1377,9 @@ class BinaryFunction { /// Return true if this function is used for patching existing code. bool isPatch() const { return IsPatch; } + /// Return true if the function requires a patch. + bool needsPatch() const { return NeedsPatch; } + /// Return true if the function should not have associated symbol table entry. bool isAnonymous() const { return IsAnonymous; } @@ -1757,6 +1765,9 @@ class BinaryFunction { IsPatch = V; } + /// Mark the function for patching. + void setNeedsPatch(bool V) { NeedsPatch = V; } + /// Indicate if the function should have a name in the symbol table. void setAnonymous(bool V) { assert(isInjected() && "Only injected functions could be anonymous"); diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index 3de945f6a1507..fbb2614ba85f3 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -34,6 +34,7 @@ extern llvm::cl::opt AlignText; extern llvm::cl::opt AlignFunctions; extern llvm::cl::opt AggregateOnly; extern llvm::cl::opt BucketsPerLine; +extern llvm::cl::opt CompactCodeModel; extern llvm::cl::opt DiffOnly; extern llvm::cl::opt EnableBAT; extern llvm::cl::opt EqualizeBBCounts; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 9773e21aa7522..a7fecd085940e 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1797,8 +1797,6 @@ bool BinaryFunction::scanExternalRefs() { // Create relocation for every fixup. for (const MCFixup &Fixup : Fixups) { std::optional Rel = BC.MIB->createRelocation(Fixup, *BC.MAB); - // Can be skipped in case of overlow during relocation value encoding. - Rel->setOptional(); if (!Rel) { Success = false; continue; @@ -1814,6 +1812,17 @@ bool BinaryFunction::scanExternalRefs() { Success = false; continue; } + + if (BC.isAArch64()) { + // Allow the relocation to be skipped in case of the overflow during the + // relocation value encoding. + Rel->setOptional(); + + if (!opts::CompactCodeModel) + if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Rel->Symbol)) + TargetBF->setNeedsPatch(true); + } + Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset; FunctionRelocations.push_back(*Rel); } diff --git a/bolt/lib/Core/BinarySection.cpp b/bolt/lib/Core/BinarySection.cpp index e5def7547a187..6f07017c26060 100644 --- a/bolt/lib/Core/BinarySection.cpp +++ b/bolt/lib/Core/BinarySection.cpp @@ -186,16 +186,6 @@ void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS, !Relocation::canEncodeValue(Reloc.Type, Value, SectionAddress + Reloc.Offset)) { - // A successful run of 'scanExternalRefs' means that all pending - // relocations are flushed. Otherwise, PatchEntries should run. - if (!opts::ForcePatch) { - BC.errs() - << "BOLT-ERROR: cannot encode relocation for symbol " - << Reloc.Symbol->getName() - << " as it is out-of-range. To proceed must use -force-patch\n"; - exit(1); - } - ++SkippedPendingRelocations; continue; } diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp index e6bd417705e6f..4dade161cc232 100644 --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -12,6 +12,7 @@ #include "bolt/Passes/LongJmp.h" #include "bolt/Core/ParallelUtilities.h" +#include "bolt/Utils/CommandLineOpts.h" #include "llvm/Support/MathExtras.h" #define DEBUG_TYPE "longjmp" @@ -26,11 +27,6 @@ extern cl::opt AlignFunctions; extern cl::opt UseOldText; extern cl::opt HotFunctionsAtEnd; -static cl::opt - CompactCodeModel("compact-code-model", - cl::desc("generate code for binaries <128MB on AArch64"), - cl::init(false), cl::cat(BoltCategory)); - static cl::opt GroupStubs("group-stubs", cl::desc("share stubs across functions"), cl::init(true), cl::cat(BoltOptCategory)); diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp index 4877e7dd8fdf3..55f7513615e7d 100644 --- a/bolt/lib/Passes/PatchEntries.cpp +++ b/bolt/lib/Passes/PatchEntries.cpp @@ -39,7 +39,8 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { bool NeedsPatching = llvm::any_of( llvm::make_second_range(BC.getBinaryFunctions()), [&](BinaryFunction &BF) { - return !BC.shouldEmit(BF) && !BF.hasExternalRefRelocations(); + return (!BC.shouldEmit(BF) && !BF.hasExternalRefRelocations()) || + BF.needsPatch(); }); if (!NeedsPatching) @@ -66,7 +67,7 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { // Check if we can skip patching the function. if (!opts::ForcePatch && !Function.hasEHRanges() && - Function.getSize() < PatchThreshold) + !Function.needsPatch() && Function.getSize() < PatchThreshold) continue; // List of patches for function entries. We either successfully patch diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index ad714371436e0..2d1d697919712 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -61,6 +61,11 @@ cl::opt cl::desc("number of entries per line (default 256)"), cl::init(256), cl::Optional, cl::cat(HeatmapCategory)); +cl::opt + CompactCodeModel("compact-code-model", + cl::desc("generate code for binaries <128MB on AArch64"), + cl::init(false), cl::cat(BoltCategory)); + cl::opt DiffOnly("diff-only", cl::desc("stop processing once we have enough to compare two binaries"), diff --git a/bolt/test/AArch64/lite-mode.s b/bolt/test/AArch64/lite-mode.s index a71edbe034669..d1e35ef75de46 100644 --- a/bolt/test/AArch64/lite-mode.s +++ b/bolt/test/AArch64/lite-mode.s @@ -2,21 +2,34 @@ ## non-optimized code. # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ +# RUN: --defsym COMPACT=1 %s -o %t.compact.o # RUN: link_fdata %s %t.o %t.fdata -# RUN: llvm-strip --strip-unneeded %t.o +# RUN: llvm-strip --strip-unneeded %t*.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static +# RUN: %clang %cflags %t.compact.o -o %t.compact.exe -Wl,-q -static # RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --lite +# RUN: llvm-bolt %t.compact.exe -o %t.compact.bolt --data %t.fdata --lite \ +# RUN: --compact-code-model # RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.exe \ # RUN: | FileCheck %s --check-prefix=CHECK-INPUT # RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.bolt \ # RUN: | FileCheck %s +# RUN: llvm-objdump -d --disassemble-symbols=_start.org.0 %t.bolt \ +# RUN: | FileCheck %s --check-prefix=CHECK-PATCH +# RUN: llvm-objdump -d %t.compact.bolt \ +# RUN: | FileCheck %s --check-prefix=CHECK-COMPACT +## In compact mode, make sure we do not create an unnecessary patch thunk. +# CHECK-COMPACT-NOT: <_start.org.0> ## Verify that the number of FDEs matches the number of functions in the output ## binary. There are three original functions and two optimized. +## NOTE: at the moment we are emitting extra FDEs for patched functions, thus +## there is one more FDE for _start. # RUN: llvm-readelf -u %t.bolt | grep -wc FDE \ # RUN: | FileCheck --check-prefix=CHECK-FDE %s -# CHECK-FDE: 5 +# CHECK-FDE: 6 ## In lite mode, optimized code will be separated from the original .text by ## over 128MB, making it impossible for call/bl instructions in cold functions @@ -28,15 +41,22 @@ _start: # FDATA: 0 [unknown] 0 1 _start 0 0 100 .cfi_startproc + +## Check that the code at the original location is converted into a +## veneer/thunk. +# CHECK-PATCH-LABEL: <_start.org.0> +# CHECK-PATCH-NEXT: adrp x16 +# CHECK-PATCH-NEXT: add x16, x16, +# CHECK-PATCH-NEXT: br x16 cmp x0, 1 b.eq .L0 bl cold_function .L0: ret x30 .cfi_endproc -.size _start, .-_start + .size _start, .-_start -## Cold non-optimized function with a reference to a hot function (_start). +## Cold non-optimized function with references to hot functions. # CHECK: Disassembly of section .bolt.org.text: # CHECK-LABEL: .globl cold_function @@ -97,12 +117,26 @@ cold_function: # CHECK-NEXT: nop # CHECK-NEXT: ldr x5 +## Since _start is relocated further than 128MB from the call site, we check +## that the call is converted into a call to its original version. That original +## version should contain a veneer/thunk code that we check separately. + bl _start +# CHECK-INPUT-NEXT: bl {{.*}} <_start> +# CHECK-NEXT: bl {{.*}} <_start.org.0> + +## Same as above, but the instruction is a tail call. + b _start +# CHECK-INPUT-NEXT: b {{.*}} <_start> +# CHECK-NEXT: b {{.*}} <_start.org.0> + .cfi_endproc -.size cold_function, .-cold_function + .size cold_function, .-cold_function -## Reserve 1MB of space to make functions that follow unreachable by ADRs in +.ifndef COMPACT +## Reserve 128MB of space to make functions that follow unreachable by ADRs in ## code that precedes this gap. -.space 0x100000 +.space 0x8000000 +.endif .globl far_func .type far_func, %function @@ -111,5 +145,4 @@ far_func: .cfi_startproc ret x30 .cfi_endproc -.size far_func, .-far_func - + .size far_func, .-far_func diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp index 377517adf03db..ba3e4ce099347 100644 --- a/bolt/unittests/Core/BinaryContext.cpp +++ b/bolt/unittests/Core/BinaryContext.cpp @@ -162,36 +162,6 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) { << "Wrong forward branch value\n"; } -TEST_P(BinaryContextTester, - FlushOptionalOutOfRangePendingRelocCALL26_ForcePatchOff) { - if (GetParam() != Triple::aarch64) - GTEST_SKIP(); - - // Tests that flushPendingRelocations exits if any pending relocation is out - // of range and PatchEntries hasn't run. Pending relocations are added by - // scanExternalRefs, so this ensures that either all scanExternalRefs - // relocations were flushed or PatchEntries ran. - - BinarySection &BS = BC->registerOrUpdateSection( - ".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); - // Create symbol 'Func0x4' - MCSymbol *RelSymbol = BC->getOrCreateGlobalSymbol(4, "Func"); - ASSERT_TRUE(RelSymbol); - Relocation Reloc{8, RelSymbol, ELF::R_AARCH64_CALL26, 0, 0}; - Reloc.setOptional(); - BS.addPendingRelocation(Reloc); - - SmallVector Vect; - raw_svector_ostream OS(Vect); - - // Resolve relocation symbol to a high value so encoding will be out of range. - EXPECT_EXIT(BS.flushPendingRelocations( - OS, [&](const MCSymbol *S) { return 0x800000F; }), - ::testing::ExitedWithCode(1), - "BOLT-ERROR: cannot encode relocation for symbol Func0x4 as it is" - " out-of-range. To proceed must use -force-patch"); -} - TEST_P(BinaryContextTester, FlushOptionalOutOfRangePendingRelocCALL26_ForcePatchOn) { if (GetParam() != Triple::aarch64)