Skip to content

[BOLT][AArch64] Patch functions targeted by optional relocs #138750

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,11 @@ class BinaryFunction {
/// True if the function is used for patching code at a fixed address.
bool IsPatch{false};

/// True if the original entry point of the function may get called, but the
/// original body cannot be executed and needs to be patched with code that
/// redirects execution to the new function body.
bool NeedsPatch{false};

/// True if the function should not have an associated symbol table entry.
bool IsAnonymous{false};

Expand Down Expand Up @@ -1372,6 +1377,9 @@ class BinaryFunction {
/// Return true if this function is used for patching existing code.
bool isPatch() const { return IsPatch; }

/// Return true if the function requires a patch.
bool needsPatch() const { return NeedsPatch; }

/// Return true if the function should not have associated symbol table entry.
bool isAnonymous() const { return IsAnonymous; }

Expand Down Expand Up @@ -1757,6 +1765,9 @@ class BinaryFunction {
IsPatch = V;
}

/// Mark the function for patching.
void setNeedsPatch(bool V) { NeedsPatch = V; }

/// Indicate if the function should have a name in the symbol table.
void setAnonymous(bool V) {
assert(isInjected() && "Only injected functions could be anonymous");
Expand Down
1 change: 1 addition & 0 deletions bolt/include/bolt/Utils/CommandLineOpts.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ extern llvm::cl::opt<unsigned> AlignText;
extern llvm::cl::opt<unsigned> AlignFunctions;
extern llvm::cl::opt<bool> AggregateOnly;
extern llvm::cl::opt<unsigned> BucketsPerLine;
extern llvm::cl::opt<bool> CompactCodeModel;
extern llvm::cl::opt<bool> DiffOnly;
extern llvm::cl::opt<bool> EnableBAT;
extern llvm::cl::opt<bool> EqualizeBBCounts;
Expand Down
13 changes: 11 additions & 2 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1797,8 +1797,6 @@ bool BinaryFunction::scanExternalRefs() {
// Create relocation for every fixup.
for (const MCFixup &Fixup : Fixups) {
std::optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
// Can be skipped in case of overlow during relocation value encoding.
Rel->setOptional();
if (!Rel) {
Success = false;
continue;
Expand All @@ -1814,6 +1812,17 @@ bool BinaryFunction::scanExternalRefs() {
Success = false;
continue;
}

if (BC.isAArch64()) {
// Allow the relocation to be skipped in case of the overflow during the
// relocation value encoding.
Rel->setOptional();

if (!opts::CompactCodeModel)
if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Rel->Symbol))
TargetBF->setNeedsPatch(true);
}

Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
FunctionRelocations.push_back(*Rel);
}
Expand Down
10 changes: 0 additions & 10 deletions bolt/lib/Core/BinarySection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,16 +186,6 @@ void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
!Relocation::canEncodeValue(Reloc.Type, Value,
SectionAddress + Reloc.Offset)) {

// A successful run of 'scanExternalRefs' means that all pending
// relocations are flushed. Otherwise, PatchEntries should run.
if (!opts::ForcePatch) {
BC.errs()
<< "BOLT-ERROR: cannot encode relocation for symbol "
<< Reloc.Symbol->getName()
<< " as it is out-of-range. To proceed must use -force-patch\n";
exit(1);
}

++SkippedPendingRelocations;
continue;
}
Expand Down
6 changes: 1 addition & 5 deletions bolt/lib/Passes/LongJmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "bolt/Passes/LongJmp.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/MathExtras.h"

#define DEBUG_TYPE "longjmp"
Expand All @@ -26,11 +27,6 @@ extern cl::opt<unsigned> AlignFunctions;
extern cl::opt<bool> UseOldText;
extern cl::opt<bool> HotFunctionsAtEnd;

static cl::opt<bool>
CompactCodeModel("compact-code-model",
cl::desc("generate code for binaries <128MB on AArch64"),
cl::init(false), cl::cat(BoltCategory));

static cl::opt<bool> GroupStubs("group-stubs",
cl::desc("share stubs across functions"),
cl::init(true), cl::cat(BoltOptCategory));
Expand Down
5 changes: 3 additions & 2 deletions bolt/lib/Passes/PatchEntries.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) {
bool NeedsPatching = llvm::any_of(
llvm::make_second_range(BC.getBinaryFunctions()),
[&](BinaryFunction &BF) {
return !BC.shouldEmit(BF) && !BF.hasExternalRefRelocations();
return (!BC.shouldEmit(BF) && !BF.hasExternalRefRelocations()) ||
BF.needsPatch();
});

if (!NeedsPatching)
Expand All @@ -66,7 +67,7 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) {

// Check if we can skip patching the function.
if (!opts::ForcePatch && !Function.hasEHRanges() &&
Function.getSize() < PatchThreshold)
!Function.needsPatch() && Function.getSize() < PatchThreshold)
continue;

// List of patches for function entries. We either successfully patch
Expand Down
5 changes: 5 additions & 0 deletions bolt/lib/Utils/CommandLineOpts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ cl::opt<unsigned>
cl::desc("number of entries per line (default 256)"),
cl::init(256), cl::Optional, cl::cat(HeatmapCategory));

cl::opt<bool>
CompactCodeModel("compact-code-model",
cl::desc("generate code for binaries <128MB on AArch64"),
cl::init(false), cl::cat(BoltCategory));

cl::opt<bool>
DiffOnly("diff-only",
cl::desc("stop processing once we have enough to compare two binaries"),
Expand Down
51 changes: 42 additions & 9 deletions bolt/test/AArch64/lite-mode.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,34 @@
## non-optimized code.

# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
# RUN: --defsym COMPACT=1 %s -o %t.compact.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: llvm-strip --strip-unneeded %t*.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static
# RUN: %clang %cflags %t.compact.o -o %t.compact.exe -Wl,-q -static
# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --lite
# RUN: llvm-bolt %t.compact.exe -o %t.compact.bolt --data %t.fdata --lite \
# RUN: --compact-code-model
# RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.exe \
# RUN: | FileCheck %s --check-prefix=CHECK-INPUT
# RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.bolt \
# RUN: | FileCheck %s
# RUN: llvm-objdump -d --disassemble-symbols=_start.org.0 %t.bolt \
# RUN: | FileCheck %s --check-prefix=CHECK-PATCH
# RUN: llvm-objdump -d %t.compact.bolt \
# RUN: | FileCheck %s --check-prefix=CHECK-COMPACT

## In compact mode, make sure we do not create an unnecessary patch thunk.
# CHECK-COMPACT-NOT: <_start.org.0>

## Verify that the number of FDEs matches the number of functions in the output
## binary. There are three original functions and two optimized.
## NOTE: at the moment we are emitting extra FDEs for patched functions, thus
## there is one more FDE for _start.
# RUN: llvm-readelf -u %t.bolt | grep -wc FDE \
# RUN: | FileCheck --check-prefix=CHECK-FDE %s
# CHECK-FDE: 5
# CHECK-FDE: 6

## In lite mode, optimized code will be separated from the original .text by
## over 128MB, making it impossible for call/bl instructions in cold functions
Expand All @@ -28,15 +41,22 @@
_start:
# FDATA: 0 [unknown] 0 1 _start 0 0 100
.cfi_startproc

## Check that the code at the original location is converted into a
## veneer/thunk.
# CHECK-PATCH-LABEL: <_start.org.0>
# CHECK-PATCH-NEXT: adrp x16
# CHECK-PATCH-NEXT: add x16, x16,
# CHECK-PATCH-NEXT: br x16
cmp x0, 1
b.eq .L0
bl cold_function
.L0:
ret x30
.cfi_endproc
.size _start, .-_start
.size _start, .-_start

## Cold non-optimized function with a reference to a hot function (_start).
## Cold non-optimized function with references to hot functions.
# CHECK: Disassembly of section .bolt.org.text:
# CHECK-LABEL: <cold_function>
.globl cold_function
Expand Down Expand Up @@ -97,12 +117,26 @@ cold_function:
# CHECK-NEXT: nop
# CHECK-NEXT: ldr x5

## Since _start is relocated further than 128MB from the call site, we check
## that the call is converted into a call to its original version. That original
## version should contain a veneer/thunk code that we check separately.
bl _start
# CHECK-INPUT-NEXT: bl {{.*}} <_start>
# CHECK-NEXT: bl {{.*}} <_start.org.0>

## Same as above, but the instruction is a tail call.
b _start
# CHECK-INPUT-NEXT: b {{.*}} <_start>
# CHECK-NEXT: b {{.*}} <_start.org.0>

.cfi_endproc
.size cold_function, .-cold_function
.size cold_function, .-cold_function

## Reserve 1MB of space to make functions that follow unreachable by ADRs in
.ifndef COMPACT
## Reserve 128MB of space to make functions that follow unreachable by ADRs in
## code that precedes this gap.
.space 0x100000
.space 0x8000000
.endif

.globl far_func
.type far_func, %function
Expand All @@ -111,5 +145,4 @@ far_func:
.cfi_startproc
ret x30
.cfi_endproc
.size far_func, .-far_func

.size far_func, .-far_func
30 changes: 0 additions & 30 deletions bolt/unittests/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,36 +162,6 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) {
<< "Wrong forward branch value\n";
}

TEST_P(BinaryContextTester,
FlushOptionalOutOfRangePendingRelocCALL26_ForcePatchOff) {
if (GetParam() != Triple::aarch64)
GTEST_SKIP();

// Tests that flushPendingRelocations exits if any pending relocation is out
// of range and PatchEntries hasn't run. Pending relocations are added by
// scanExternalRefs, so this ensures that either all scanExternalRefs
// relocations were flushed or PatchEntries ran.

BinarySection &BS = BC->registerOrUpdateSection(
".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
// Create symbol 'Func0x4'
MCSymbol *RelSymbol = BC->getOrCreateGlobalSymbol(4, "Func");
ASSERT_TRUE(RelSymbol);
Relocation Reloc{8, RelSymbol, ELF::R_AARCH64_CALL26, 0, 0};
Reloc.setOptional();
BS.addPendingRelocation(Reloc);

SmallVector<char> Vect;
raw_svector_ostream OS(Vect);

// Resolve relocation symbol to a high value so encoding will be out of range.
EXPECT_EXIT(BS.flushPendingRelocations(
OS, [&](const MCSymbol *S) { return 0x800000F; }),
::testing::ExitedWithCode(1),
"BOLT-ERROR: cannot encode relocation for symbol Func0x4 as it is"
" out-of-range. To proceed must use -force-patch");
}

TEST_P(BinaryContextTester,
FlushOptionalOutOfRangePendingRelocCALL26_ForcePatchOn) {
if (GetParam() != Triple::aarch64)
Expand Down
Loading