blob: 1047c2cbf864e10e5d042493b20813bc71ac617b [file] [log] [blame]
Maksim Panchenko2f09f442021-12-21 18:21:411//===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===//
Rafael Aulerc67a7532015-11-24 01:54:182//
Rafael Auler16521f12021-03-16 01:04:183// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Rafael Aulerc67a7532015-11-24 01:54:186//
7//===----------------------------------------------------------------------===//
Rafael Aulerc67a7532015-11-24 01:54:188
Rafael Aulera34c7532021-10-08 18:47:109#include "bolt/Rewrite/RewriteInstance.h"
10#include "bolt/Core/BinaryContext.h"
11#include "bolt/Core/BinaryEmitter.h"
12#include "bolt/Core/BinaryFunction.h"
13#include "bolt/Core/DebugData.h"
14#include "bolt/Core/Exceptions.h"
15#include "bolt/Core/MCPlusBuilder.h"
16#include "bolt/Core/ParallelUtilities.h"
17#include "bolt/Core/Relocation.h"
18#include "bolt/Passes/CacheMetrics.h"
19#include "bolt/Passes/ReorderFunctions.h"
20#include "bolt/Profile/BoltAddressTranslation.h"
21#include "bolt/Profile/DataAggregator.h"
22#include "bolt/Profile/DataReader.h"
23#include "bolt/Profile/YAMLProfileReader.h"
24#include "bolt/Profile/YAMLProfileWriter.h"
25#include "bolt/Rewrite/BinaryPassManager.h"
26#include "bolt/Rewrite/DWARFRewriter.h"
27#include "bolt/Rewrite/ExecutableFileMemoryManager.h"
28#include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h"
29#include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
30#include "bolt/Utils/CommandLineOpts.h"
31#include "bolt/Utils/Utils.h"
Maksim Panchenkoae409f02017-07-17 18:22:2232#include "llvm/ADT/Optional.h"
Rafael Aulerc67a7532015-11-24 01:54:1833#include "llvm/DebugInfo/DWARF/DWARFContext.h"
serge-sans-paille290e4822022-02-14 15:27:0434#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
Amir Ayupovf1bfb182021-03-18 20:06:1835#include "llvm/ExecutionEngine/RuntimeDyld.h"
Rafael Aulerc67a7532015-11-24 01:54:1836#include "llvm/MC/MCAsmBackend.h"
37#include "llvm/MC/MCAsmInfo.h"
laith sakka7d428352019-07-12 14:25:5038#include "llvm/MC/MCAsmLayout.h"
Rafael Auler8a5a3012018-02-06 23:00:2339#include "llvm/MC/MCDisassembler/MCDisassembler.h"
Rafael Aulerc67a7532015-11-24 01:54:1840#include "llvm/MC/MCObjectStreamer.h"
Rafael Aulerc67a7532015-11-24 01:54:1841#include "llvm/MC/MCStreamer.h"
Rafael Aulerc67a7532015-11-24 01:54:1842#include "llvm/MC/MCSymbol.h"
Rafael Aulera34c7532021-10-08 18:47:1043#include "llvm/MC/TargetRegistry.h"
Rafael Aulerc67a7532015-11-24 01:54:1844#include "llvm/Object/ObjectFile.h"
Amir Ayupov1c5d3a02020-12-02 00:29:3945#include "llvm/Support/Alignment.h"
Rafael Aulerc67a7532015-11-24 01:54:1846#include "llvm/Support/Casting.h"
47#include "llvm/Support/CommandLine.h"
Maksim Panchenko55fc5412016-09-28 02:09:3848#include "llvm/Support/DataExtractor.h"
Rafael Aulerc67a7532015-11-24 01:54:1849#include "llvm/Support/Errc.h"
Amir Ayupov32d24732022-02-17 04:39:5950#include "llvm/Support/Error.h"
Amir Ayupov081e39a2021-03-29 23:04:5751#include "llvm/Support/FileSystem.h"
James Luo3e55dea2021-07-15 21:58:3252#include "llvm/Support/LEB128.h"
Rafael Aulerc67a7532015-11-24 01:54:1853#include "llvm/Support/ManagedStatic.h"
Bill Nell591e0ef2017-11-28 02:00:2454#include "llvm/Support/Timer.h"
Rafael Aulerc67a7532015-11-24 01:54:1855#include "llvm/Support/ToolOutputFile.h"
Bill Nell5cd58962017-05-24 21:14:1656#include "llvm/Support/raw_ostream.h"
Rafael Aulerc67a7532015-11-24 01:54:1857#include <algorithm>
Maksim Panchenko218c5f02016-01-27 00:03:5858#include <fstream>
Amir Ayupov32d24732022-02-17 04:39:5959#include <memory>
Rafael Aulerc67a7532015-11-24 01:54:1860#include <system_error>
61
62#undef DEBUG_TYPE
Maksim Panchenkod1526082016-02-05 22:42:0463#define DEBUG_TYPE "bolt"
Rafael Aulerc67a7532015-11-24 01:54:1864
65using namespace llvm;
66using namespace object;
Maksim Panchenkod1526082016-02-05 22:42:0467using namespace bolt;
Rafael Aulerc67a7532015-11-24 01:54:1868
Rafael Aulerc82e7fd2020-02-11 02:50:5369extern cl::opt<uint32_t> X86AlignBranchBoundary;
70extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;
71
Rafael Aulerc67a7532015-11-24 01:54:1872namespace opts {
73
Maksim Panchenko120d2672018-04-13 22:46:1974extern cl::opt<MacroFusionType> AlignMacroOpFusion;
Rafael Aulera34c7532021-10-08 18:47:1075extern cl::list<std::string> HotTextMoveSections;
Xun Li9bd71612020-05-02 18:14:3876extern cl::opt<bool> Hugify;
Xun Li00892a5fd2020-05-21 21:28:4777extern cl::opt<bool> Instrument;
Maksim Panchenko6ff17952017-01-17 23:49:5978extern cl::opt<JumpTableSupportLevel> JumpTables;
Bill Nell729da2d2018-04-21 03:03:3179extern cl::list<std::string> ReorderData;
Maksim Panchenko492e4a52019-04-26 00:00:0580extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
laith sakka7d428352019-07-12 14:25:5081extern cl::opt<bool> TimeBuild;
Maksim Panchenko55fc5412016-09-28 02:09:3882
Fangrui Songb92436e2022-06-05 20:29:4983static cl::opt<bool> ForceToDataRelocations(
84 "force-data-relocations",
85 cl::desc("force relocations to data sections to always be processed"),
86
87 cl::Hidden, cl::cat(BoltCategory));
Bill Nell89feb842018-01-24 13:42:1188
Rafael Aulerf91d1212020-05-07 00:31:2589cl::opt<std::string>
Fangrui Songb92436e2022-06-05 20:29:4990 BoltID("bolt-id",
91 cl::desc("add any string to tag this execution in the "
92 "output binary via bolt info section"),
93 cl::cat(BoltCategory));
Rafael Aulerf91d1212020-05-07 00:31:2594
Maksim Panchenko0bde7962017-03-28 21:40:2095cl::opt<bool>
96AllowStripped("allow-stripped",
97 cl::desc("allow processing of stripped binaries"),
98 cl::Hidden,
99 cl::cat(BoltCategory));
100
Amir Ayupov6333e5d2022-06-02 07:26:23101cl::opt<bool> DumpDotAll(
102 "dump-dot-all",
103 cl::desc("dump function CFGs to graphviz format after each stage;"
104 "enable '-print-loops' for color-coded blocks"),
Fangrui Songb92436e2022-06-05 20:29:49105 cl::Hidden, cl::cat(BoltCategory));
Maksim Panchenko0bde7962017-03-28 21:40:20106
Maksim Panchenko0bde7962017-03-28 21:40:20107static cl::list<std::string>
Maksim Panchenko04c5d4f2020-05-03 20:54:45108ForceFunctionNames("funcs",
Maksim Panchenko0bde7962017-03-28 21:40:20109 cl::CommaSeparated,
Maksim Panchenko04c5d4f2020-05-03 20:54:45110 cl::desc("limit optimizations to functions from the list"),
Maksim Panchenko0bde7962017-03-28 21:40:20111 cl::value_desc("func1,func2,func3,..."),
112 cl::Hidden,
113 cl::cat(BoltCategory));
114
115static cl::opt<std::string>
116FunctionNamesFile("funcs-file",
117 cl::desc("file with list of functions to optimize"),
118 cl::Hidden,
119 cl::cat(BoltCategory));
120
Amir Ayupovd474dbd2021-06-05 01:49:29121static cl::list<std::string> ForceFunctionNamesNR(
122 "funcs-no-regex", cl::CommaSeparated,
123 cl::desc("limit optimizations to functions from the list (non-regex)"),
124 cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory));
125
126static cl::opt<std::string> FunctionNamesFileNR(
127 "funcs-file-no-regex",
128 cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden,
129 cl::cat(BoltCategory));
130
Maksim Panchenko0bde7962017-03-28 21:40:20131cl::opt<bool>
Maksim Panchenko0bde7962017-03-28 21:40:20132KeepTmp("keep-tmp",
133 cl::desc("preserve intermediate .o file"),
134 cl::Hidden,
135 cl::cat(BoltCategory));
136
Fangrui Songb92436e2022-06-05 20:29:49137cl::opt<bool> Lite("lite", cl::desc("skip processing of cold functions"),
138 cl::cat(BoltCategory));
Maksim Panchenko924d0bd2020-05-03 22:49:58139
Maksim Panchenko0bde7962017-03-28 21:40:20140static cl::opt<unsigned>
Rafael Aulere3898d52020-12-30 20:23:58141LiteThresholdPct("lite-threshold-pct",
142 cl::desc("threshold (in percent) for selecting functions to process in lite "
143 "mode. Higher threshold means fewer functions to process. E.g "
144 "threshold of 90 means only top 10 percent of functions with "
145 "profile will be processed."),
146 cl::init(0),
147 cl::ZeroOrMore,
148 cl::Hidden,
149 cl::cat(BoltOptCategory));
150
Fangrui Songb92436e2022-06-05 20:29:49151static cl::opt<unsigned> LiteThresholdCount(
152 "lite-threshold-count",
153 cl::desc("similar to '-lite-threshold-pct' but specify threshold using "
154 "absolute function call count. I.e. limit processing to functions "
155 "executed at least the specified number of times."),
156 cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
Rafael Aulere3898d52020-12-30 20:23:58157
158static cl::opt<unsigned>
Fangrui Songb92436e2022-06-05 20:29:49159 MaxFunctions("max-funcs",
160 cl::desc("maximum number of functions to process"), cl::Hidden,
161 cl::cat(BoltCategory));
Maksim Panchenko0bde7962017-03-28 21:40:20162
Fangrui Songb92436e2022-06-05 20:29:49163static cl::opt<unsigned> MaxDataRelocations(
164 "max-data-relocations",
165 cl::desc("maximum number of data relocations to process"), cl::Hidden,
166 cl::cat(BoltCategory));
Bill Nell0e4d86b2017-11-15 04:05:11167
Fangrui Songb92436e2022-06-05 20:29:49168cl::opt<bool> PrintAll("print-all",
169 cl::desc("print functions after each stage"), cl::Hidden,
170 cl::cat(BoltCategory));
Maksim Panchenko0bde7962017-03-28 21:40:20171
Fangrui Songb92436e2022-06-05 20:29:49172cl::opt<bool> PrintCFG("print-cfg",
173 cl::desc("print functions after CFG construction"),
174 cl::Hidden, cl::cat(BoltCategory));
Maksim Panchenko0bde7962017-03-28 21:40:20175
Alexander Shaposhnikov5b64bf22020-02-11 22:30:33176cl::opt<bool> PrintDisasm("print-disasm",
Fangrui Songb92436e2022-06-05 20:29:49177 cl::desc("print function after disassembly"),
178 cl::Hidden, cl::cat(BoltCategory));
Maksim Panchenko0bde7962017-03-28 21:40:20179
Maksim Panchenko0bde7962017-03-28 21:40:20180static cl::opt<bool>
Fangrui Songb92436e2022-06-05 20:29:49181 PrintGlobals("print-globals",
182 cl::desc("print global symbols after disassembly"), cl::Hidden,
183 cl::cat(BoltCategory));
Bill Nell0e4d86b2017-11-15 04:05:11184
Alexander Shaposhnikov36cf37c2020-01-30 21:10:48185extern cl::opt<bool> PrintSections;
Bill Nellddefc772018-02-02 00:33:43186
Fangrui Songb92436e2022-06-05 20:29:49187static cl::opt<bool> PrintLoopInfo("print-loops",
188 cl::desc("print loop related information"),
189 cl::Hidden, cl::cat(BoltCategory));
Maksim Panchenko0bde7962017-03-28 21:40:20190
Fangrui Songb92436e2022-06-05 20:29:49191static cl::opt<bool> PrintSDTMarkers("print-sdt",
192 cl::desc("print all SDT markers"),
193 cl::Hidden, cl::cat(BoltCategory));
Laith Saed Sakka47558252019-05-16 00:19:18194
James Luodea6c242021-06-25 18:42:58195enum PrintPseudoProbesOptions {
196 PPP_None = 0,
197 PPP_Probes_Section_Decode = 0x1,
198 PPP_Probes_Address_Conversion = 0x2,
James Luo0df7bf72021-07-16 23:05:18199 PPP_Encoded_Probes = 0x3,
James Luodea6c242021-06-25 18:42:58200 PPP_All = 0xf
201};
202
203cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
204 "print-pseudo-probes", cl::desc("print pseudo probe info"),
205 cl::init(PPP_None),
206 cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode",
207 "decode probes section from binary"),
208 clEnumValN(PPP_Probes_Address_Conversion, "address_conversion",
209 "update address2ProbesMap with output block address"),
James Luo0df7bf72021-07-16 23:05:18210 clEnumValN(PPP_Encoded_Probes, "encoded_probes",
211 "display the encoded probes in binary section"),
James Luodea6c242021-06-25 18:42:58212 clEnumValN(PPP_All, "all", "enable all debugging printout")),
213 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
James Luo8a919592021-06-11 20:06:12214
Fangrui Songb92436e2022-06-05 20:29:49215static cl::opt<cl::boolOrDefault> RelocationMode(
216 "relocs", cl::desc("use relocations in the binary (default=autodetect)"),
217 cl::cat(BoltCategory));
Maksim Panchenko0bde7962017-03-28 21:40:20218
Maksim Panchenkob6cb1122017-12-14 07:12:01219static cl::opt<std::string>
220SaveProfile("w",
221 cl::desc("save recorded profile to a file"),
222 cl::cat(BoltOutputCategory));
223
Maksim Panchenko0bde7962017-03-28 21:40:20224static cl::list<std::string>
225SkipFunctionNames("skip-funcs",
226 cl::CommaSeparated,
227 cl::desc("list of functions to skip"),
228 cl::value_desc("func1,func2,func3,..."),
229 cl::Hidden,
230 cl::cat(BoltCategory));
231
232static cl::opt<std::string>
233SkipFunctionNamesFile("skip-funcs-file",
234 cl::desc("file with list of functions to skip"),
235 cl::Hidden,
236 cl::cat(BoltCategory));
237
Maksim Panchenko0bde7962017-03-28 21:40:20238cl::opt<bool>
239TrapOldCode("trap-old-code",
240 cl::desc("insert traps in old function bodies (relocation mode)"),
241 cl::Hidden,
242 cl::cat(BoltCategory));
243
Rafael Aulere485a982021-06-16 16:52:03244static cl::opt<std::string> DWPPathName("dwp",
245 cl::desc("Path and name to DWP file."),
Fangrui Song36c7d792022-06-04 07:10:42246 cl::Hidden, cl::init(""),
247 cl::cat(BoltCategory));
Rafael Aulere485a982021-06-16 16:52:03248
Maksim Panchenko0bde7962017-03-28 21:40:20249static cl::opt<bool>
250UseGnuStack("use-gnu-stack",
251 cl::desc("use GNU_STACK program header for new segment (workaround for "
252 "issues with strip/objcopy)"),
253 cl::ZeroOrMore,
254 cl::cat(BoltCategory));
255
Rafael Auler0ed144a2017-10-06 21:42:46256static cl::opt<bool>
Fangrui Songb92436e2022-06-05 20:29:49257 TimeRewrite("time-rewrite",
258 cl::desc("print time spent in rewriting passes"), cl::Hidden,
259 cl::cat(BoltCategory));
Bill Nell591e0ef2017-11-28 02:00:24260
laith sakka7d428352019-07-12 14:25:50261static cl::opt<bool>
262SequentialDisassembly("sequential-disassembly",
263 cl::desc("performs disassembly sequentially"),
264 cl::init(false),
265 cl::cat(BoltOptCategory));
266
Fangrui Songb92436e2022-06-05 20:29:49267static cl::opt<bool> WriteBoltInfoSection(
268 "bolt-info", cl::desc("write bolt info section in the output binary"),
269 cl::init(true), cl::Hidden, cl::cat(BoltOutputCategory));
laith sakkac1564a12019-07-31 00:55:27270
Rafael Aulerc67a7532015-11-24 01:54:18271} // namespace opts
272
Maksim Panchenko96adec52017-05-16 16:27:34273constexpr const char *RewriteInstance::SectionsToOverwrite[];
Amir Ayupov12e9fec2021-04-01 18:43:00274std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = {
Alexander Yermolovich014cd372022-04-21 22:47:49275 ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_line_str",
276 ".debug_loc", ".debug_loclists", ".debug_ranges", ".debug_rnglists",
277 ".gdb_index", ".debug_addr"};
Bill Nellc1d1c2e2016-07-23 03:52:57278
Rafael Auler8a5a3012018-02-06 23:00:23279const char RewriteInstance::TimerGroupName[] = "rewrite";
280const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes";
Bill Nell591e0ef2017-11-28 02:00:24281
Bill Nell5cd58962017-05-24 21:14:16282namespace llvm {
283namespace bolt {
Alexander Shaposhnikov36cf37c2020-01-30 21:10:48284
Bill Nell5cd58962017-05-24 21:14:16285extern const char *BoltRevision;
Bill Nell5cd58962017-05-24 21:14:16286
Rafael Aulera34c7532021-10-08 18:47:10287MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
288 const MCInstrAnalysis *Analysis,
289 const MCInstrInfo *Info,
290 const MCRegisterInfo *RegInfo) {
291#ifdef X86_AVAILABLE
292 if (Arch == Triple::x86_64)
293 return createX86MCPlusBuilder(Analysis, Info, RegInfo);
294#endif
295
296#ifdef AARCH64_AVAILABLE
297 if (Arch == Triple::aarch64)
298 return createAArch64MCPlusBuilder(Analysis, Info, RegInfo);
299#endif
300
301 llvm_unreachable("architecture unsupported by MCPlusBuilder");
302}
303
Vladislav Khmelevsky20e9d4c2022-01-26 20:45:46304} // namespace bolt
305} // namespace llvm
306
307namespace {
308
309bool refersToReorderedSection(ErrorOr<BinarySection &> Section) {
310 auto Itr =
Amir Ayupovd2c87692022-06-24 05:15:47311 llvm::find_if(opts::ReorderData, [&](const std::string &SectionName) {
312 return (Section && Section->getName() == SectionName);
313 });
Vladislav Khmelevsky20e9d4c2022-01-26 20:45:46314 return Itr != opts::ReorderData.end();
315}
316
Rafael Aulera34c7532021-10-08 18:47:10317} // anonymous namespace
Maksim Panchenko003d106c2016-08-11 21:23:54318
Amir Ayupov32d24732022-02-17 04:39:59319Expected<std::unique_ptr<RewriteInstance>>
320RewriteInstance::createRewriteInstance(ELFObjectFileBase *File, const int Argc,
321 const char *const *Argv,
322 StringRef ToolPath) {
323 Error Err = Error::success();
324 auto RI = std::make_unique<RewriteInstance>(File, Argc, Argv, ToolPath, Err);
325 if (Err)
326 return std::move(Err);
Vladislav Khmelevsky63686af2022-04-19 15:48:27327 return std::move(RI);
Amir Ayupov32d24732022-02-17 04:39:59328}
329
Maksim Panchenko87291712020-05-08 06:00:29330RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
Amir Ayupov32d24732022-02-17 04:39:59331 const char *const *Argv, StringRef ToolPath,
332 Error &Err)
Maksim Panchenko87291712020-05-08 06:00:29333 : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath),
Maksim Panchenko7fd48702019-04-03 22:52:01334 SHStrTab(StringTableBuilder::ELF) {
Amir Ayupov32d24732022-02-17 04:39:59335 ErrorAsOutParameter EAO(&Err);
Maksim Panchenko4f4239c2020-11-04 19:44:02336 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
337 if (!ELF64LEFile) {
Amir Ayupov32d24732022-02-17 04:39:59338 Err = createStringError(errc::not_supported,
339 "Only 64-bit LE ELF binaries are supported");
340 return;
Maksim Panchenko4f4239c2020-11-04 19:44:02341 }
342
343 bool IsPIC = false;
Amir Ayupov1c5d3a02020-12-02 00:29:39344 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
345 if (Obj.getHeader().e_type != ELF::ET_EXEC) {
Maksim Panchenko4f4239c2020-11-04 19:44:02346 outs() << "BOLT-INFO: shared object or position-independent executable "
347 "detected\n";
348 IsPIC = true;
349 }
350
Amir Ayupov32d24732022-02-17 04:39:59351 auto BCOrErr = BinaryContext::createBinaryContext(
Amir Ayupov1c5d3a02020-12-02 00:29:39352 File, IsPIC,
Rafael Aulera8cbc802021-10-06 20:03:56353 DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore,
354 nullptr, opts::DWPPathName,
355 WithColor::defaultErrorHandler,
356 WithColor::defaultWarningHandler));
Amir Ayupov32d24732022-02-17 04:39:59357 if (Error E = BCOrErr.takeError()) {
358 Err = std::move(E);
359 return;
360 }
361 BC = std::move(BCOrErr.get());
Rafael Aulera34c7532021-10-08 18:47:10362 BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(createMCPlusBuilder(
363 BC->TheTriple->getArch(), BC->MIA.get(), BC->MII.get(), BC->MRI.get())));
364
Amir Ayupov1c5d3a02020-12-02 00:29:39365 BAT = std::make_unique<BoltAddressTranslation>(*BC);
Maksim Panchenko4f4239c2020-11-04 19:44:02366
367 if (opts::UpdateDebugSections)
Amir Ayupovf1bfb182021-03-18 20:06:18368 DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC);
Maksim Panchenko4f4239c2020-11-04 19:44:02369
Maksim Panchenkoee0e9cc2021-12-23 20:38:33370 if (opts::Instrument)
Rafael Aulerb3c34d52021-03-15 23:34:25371 BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
Maksim Panchenkoee0e9cc2021-12-23 20:38:33372 else if (opts::Hugify)
Amir Ayupov1c5d3a02020-12-02 00:29:39373 BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>());
Maksim Panchenko7fd48702019-04-03 22:52:01374}
Rafael Aulerc67a7532015-11-24 01:54:18375
376RewriteInstance::~RewriteInstance() {}
377
Maksim Panchenko87291712020-05-08 06:00:29378Error RewriteInstance::setProfile(StringRef Filename) {
379 if (!sys::fs::exists(Filename))
380 return errorCodeToError(make_error_code(errc::no_such_file_or_directory));
381
382 if (ProfileReader) {
383 // Already exists
Maksim Panchenko40c2e0f2021-12-15 00:52:51384 return make_error<StringError>(Twine("multiple profiles specified: ") +
385 ProfileReader->getFilename() + " and " +
386 Filename,
387 inconvertibleErrorCode());
Maksim Panchenko87291712020-05-08 06:00:29388 }
389
390 // Spawn a profile reader based on file contents.
Maksim Panchenkoee0e9cc2021-12-23 20:38:33391 if (DataAggregator::checkPerfDataMagic(Filename))
Amir Ayupov1c5d3a02020-12-02 00:29:39392 ProfileReader = std::make_unique<DataAggregator>(Filename);
Maksim Panchenkoee0e9cc2021-12-23 20:38:33393 else if (YAMLProfileReader::isYAML(Filename))
Amir Ayupov1c5d3a02020-12-02 00:29:39394 ProfileReader = std::make_unique<YAMLProfileReader>(Filename);
Maksim Panchenkoee0e9cc2021-12-23 20:38:33395 else
Amir Ayupov1c5d3a02020-12-02 00:29:39396 ProfileReader = std::make_unique<DataReader>(Filename);
Maksim Panchenko87291712020-05-08 06:00:29397
398 return Error::success();
399}
400
Maksim Panchenko0ce0bce2020-06-15 07:15:47401/// Return true if the function \p BF should be disassembled.
402static bool shouldDisassemble(const BinaryFunction &BF) {
403 if (BF.isPseudo())
Maksim Panchenkoc6ce2ab2019-01-16 07:43:40404 return false;
Maksim Panchenkoc6ce2ab2019-01-16 07:43:40405
Maksim Panchenko0ce0bce2020-06-15 07:15:47406 if (opts::processAllFunctions())
407 return true;
408
409 return !BF.isIgnored();
Maksim Panchenkoc6ce2ab2019-01-16 07:43:40410}
411
Amir Ayupovaf6e66f2022-02-24 03:30:30412Error RewriteInstance::discoverStorage() {
Rafael Auler8a5a3012018-02-06 23:00:23413 NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName,
414 TimerGroupDesc, opts::TimeRewrite);
spupyrev48a53a72017-11-15 00:51:24415
Rafael Auler624b2d92017-09-20 17:43:01416 // Stubs are harmful because RuntimeDyld may try to increase the size of
417 // sections accounting for stubs when we need those sections to match the
418 // same size seen in the input binary, in case this section is a copy
419 // of the original one seen in the binary.
Maksim Panchenkod414acf2019-12-17 19:17:31420 BC->EFMM.reset(new ExecutableFileMemoryManager(*BC, /*AllowStubs*/ false));
Maksim Panchenko6ff17952017-01-17 23:49:59421
Maksim Panchenkod68b1c7b2016-03-03 18:13:11422 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
Amir Ayupovc7306cc2021-04-08 07:19:26423 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
Maksim Panchenko50c895a2016-02-08 18:02:48424
Amir Ayupov1c5d3a02020-12-02 00:29:39425 BC->StartFunctionAddress = Obj.getHeader().e_entry;
Maksim Panchenko55fc5412016-09-28 02:09:38426
Maksim Panchenko50c895a2016-02-08 18:02:48427 NextAvailableAddress = 0;
Maksim Panchenko7f7d4af2016-02-13 03:01:53428 uint64_t NextAvailableOffset = 0;
Amir Ayupovaf6e66f2022-02-24 03:30:30429 Expected<ELF64LE::PhdrRange> PHsOrErr = Obj.program_headers();
430 if (Error E = PHsOrErr.takeError())
431 return E;
432
433 ELF64LE::PhdrRange PHs = PHsOrErr.get();
Amir Ayupovc7306cc2021-04-08 07:19:26434 for (const ELF64LE::Phdr &Phdr : PHs) {
Vasily Leonenkoad79d512021-06-18 20:08:35435 switch (Phdr.p_type) {
436 case ELF::PT_LOAD:
Maksim Panchenkoa76b13d2018-10-03 00:16:26437 BC->FirstAllocAddress = std::min(BC->FirstAllocAddress,
438 static_cast<uint64_t>(Phdr.p_vaddr));
Maksim Panchenko50c895a2016-02-08 18:02:48439 NextAvailableAddress = std::max(NextAvailableAddress,
440 Phdr.p_vaddr + Phdr.p_memsz);
Maksim Panchenko7f7d4af2016-02-13 03:01:53441 NextAvailableOffset = std::max(NextAvailableOffset,
442 Phdr.p_offset + Phdr.p_filesz);
Maksim Panchenko6ff17952017-01-17 23:49:59443
Maksim Panchenko250ca402020-06-26 23:52:07444 BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr,
445 Phdr.p_memsz,
446 Phdr.p_offset,
447 Phdr.p_filesz,
448 Phdr.p_align};
Vasily Leonenkoad79d512021-06-18 20:08:35449 break;
450 case ELF::PT_INTERP:
451 BC->HasInterpHeader = true;
452 break;
Maksim Panchenko50c895a2016-02-08 18:02:48453 }
454 }
455
Amir Ayupovc7306cc2021-04-08 07:19:26456 for (const SectionRef &Section : InputFile->sections()) {
Amir Ayupovaf6e66f2022-02-24 03:30:30457 Expected<StringRef> SectionNameOrErr = Section.getName();
458 if (Error E = SectionNameOrErr.takeError())
459 return E;
460 StringRef SectionName = SectionNameOrErr.get();
Maksim Panchenko55fc5412016-09-28 02:09:38461 if (SectionName == ".text") {
Rafael Auler624b2d92017-09-20 17:43:01462 BC->OldTextSectionAddress = Section.getAddress();
463 BC->OldTextSectionSize = Section.getSize();
Maksim Panchenko2df4e7b2020-02-25 01:12:41464
Amir Ayupovaf6e66f2022-02-24 03:30:30465 Expected<StringRef> SectionContentsOrErr = Section.getContents();
466 if (Error E = SectionContentsOrErr.takeError())
467 return E;
468 StringRef SectionContents = SectionContentsOrErr.get();
Rafael Auler624b2d92017-09-20 17:43:01469 BC->OldTextSectionOffset =
Maksim Panchenko40c2e0f2021-12-15 00:52:51470 SectionContents.data() - InputFile->getData().data();
Maksim Panchenkoc89821c2017-02-07 23:31:14471 }
472
Rafael Auler6c8fc282020-07-17 00:35:55473 if (!opts::HeatmapMode &&
Rafael Auler21f43032019-04-13 00:33:46474 !(opts::AggregateOnly && BAT->enabledFor(InputFile)) &&
Maksim Panchenkoa07f1a22020-03-11 22:51:32475 (SectionName.startswith(getOrgSecPrefix()) ||
Amir Ayupovaf6e66f2022-02-24 03:30:30476 SectionName == getBOLTTextSectionName()))
477 return createStringError(
478 errc::function_not_supported,
479 "BOLT-ERROR: input file was processed by BOLT. Cannot re-optimize");
Maksim Panchenko55fc5412016-09-28 02:09:38480 }
481
Amir Ayupovaf6e66f2022-02-24 03:30:30482 if (!NextAvailableAddress || !NextAvailableOffset)
483 return createStringError(errc::executable_format_error,
484 "no PT_LOAD pheader seen");
Maksim Panchenko50c895a2016-02-08 18:02:48485
Bill Nellc27a6a52016-09-02 21:15:29486 outs() << "BOLT-INFO: first alloc address is 0x"
Maksim Panchenkoa76b13d2018-10-03 00:16:26487 << Twine::utohexstr(BC->FirstAllocAddress) << '\n';
Maksim Panchenko50c895a2016-02-08 18:02:48488
Maksim Panchenko7f7d4af2016-02-13 03:01:53489 FirstNonAllocatableOffset = NextAvailableOffset;
Maksim Panchenko50c895a2016-02-08 18:02:48490
Maksim Panchenko1387a9d2018-09-25 03:58:31491 NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign);
492 NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign);
Maksim Panchenko50c895a2016-02-08 18:02:48493
Maksim Panchenko7f7d4af2016-02-13 03:01:53494 if (!opts::UseGnuStack) {
495 // This is where the black magic happens. Creating PHDR table in a segment
496 // other than that containing ELF header is tricky. Some loaders and/or
497 // parts of loaders will apply e_phoff from ELF header assuming both are in
498 // the same segment, while others will do the proper calculation.
499 // We create the new PHDR table in such a way that both of the methods
500 // of loading and locating the table work. There's a slight file size
501 // overhead because of that.
Maksim Panchenkod68b1c7b2016-03-03 18:13:11502 //
503 // NB: bfd's strip command cannot do the above and will corrupt the
504 // binary during the process of stripping non-allocatable sections.
Maksim Panchenkoee0e9cc2021-12-23 20:38:33505 if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress)
Maksim Panchenkoa76b13d2018-10-03 00:16:26506 NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress;
Maksim Panchenkoee0e9cc2021-12-23 20:38:33507 else
Maksim Panchenkoa76b13d2018-10-03 00:16:26508 NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress;
Maksim Panchenkoee0e9cc2021-12-23 20:38:33509
Maksim Panchenko40c2e0f2021-12-15 00:52:51510 assert(NextAvailableOffset ==
511 NextAvailableAddress - BC->FirstAllocAddress &&
512 "PHDR table address calculation error");
Maksim Panchenko50c895a2016-02-08 18:02:48513
Bill Nellc27a6a52016-09-02 21:15:29514 outs() << "BOLT-INFO: creating new program header table at address 0x"
Maksim Panchenko7f7d4af2016-02-13 03:01:53515 << Twine::utohexstr(NextAvailableAddress) << ", offset 0x"
516 << Twine::utohexstr(NextAvailableOffset) << '\n';
517
518 PHDRTableAddress = NextAvailableAddress;
519 PHDRTableOffset = NextAvailableOffset;
520
521 // Reserve space for 3 extra pheaders.
Amir Ayupov1c5d3a02020-12-02 00:29:39522 unsigned Phnum = Obj.getHeader().e_phnum;
Maksim Panchenko7f7d4af2016-02-13 03:01:53523 Phnum += 3;
524
Amir Ayupovc7306cc2021-04-08 07:19:26525 NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy);
Maksim Panchenko40c2e0f2021-12-15 00:52:51526 NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy);
Maksim Panchenko50c895a2016-02-08 18:02:48527 }
528
Maksim Panchenko7f7d4af2016-02-13 03:01:53529 // Align at cache line.
Rafael Auler8a5a3012018-02-06 23:00:23530 NextAvailableAddress = alignTo(NextAvailableAddress, 64);
531 NextAvailableOffset = alignTo(NextAvailableOffset, 64);
Maksim Panchenko50c895a2016-02-08 18:02:48532
Maksim Panchenko50c895a2016-02-08 18:02:48533 NewTextSegmentAddress = NextAvailableAddress;
534 NewTextSegmentOffset = NextAvailableOffset;
Rafael Auler76d77402017-08-31 18:45:37535 BC->LayoutStartAddress = NextAvailableAddress;
Maksim Panchenko250ca402020-06-26 23:52:07536
537 // Tools such as objcopy can strip section contents but leave header
538 // entries. Check that at least .text is mapped in the file.
Amir Ayupovaf6e66f2022-02-24 03:30:30539 if (!getFileOffsetForAddress(BC->OldTextSectionAddress))
540 return createStringError(errc::executable_format_error,
541 "BOLT-ERROR: input binary is not a valid ELF "
542 "executable as its text section is not "
543 "mapped to a valid segment");
544 return Error::success();
Maksim Panchenko50c895a2016-02-08 18:02:48545}
546
Laith Saed Sakka47558252019-05-16 00:19:18547void RewriteInstance::parseSDTNotes() {
548 if (!SDTSection)
549 return;
550
551 StringRef Buf = SDTSection->getContents();
Amir Ayupovc7306cc2021-04-08 07:19:26552 DataExtractor DE = DataExtractor(Buf, BC->AsmInfo->isLittleEndian(),
553 BC->AsmInfo->getCodePointerSize());
Amir Ayupov1c5d3a02020-12-02 00:29:39554 uint64_t Offset = 0;
Laith Saed Sakka47558252019-05-16 00:19:18555
556 while (DE.isValidOffset(Offset)) {
Amir Ayupovc7306cc2021-04-08 07:19:26557 uint32_t NameSz = DE.getU32(&Offset);
Laith Saed Sakka47558252019-05-16 00:19:18558 DE.getU32(&Offset); // skip over DescSz
Amir Ayupovc7306cc2021-04-08 07:19:26559 uint32_t Type = DE.getU32(&Offset);
Laith Saed Sakka47558252019-05-16 00:19:18560 Offset = alignTo(Offset, 4);
561
562 if (Type != 3)
563 errs() << "BOLT-WARNING: SDT note type \"" << Type
564 << "\" is not expected\n";
565
566 if (NameSz == 0)
567 errs() << "BOLT-WARNING: SDT note has empty name\n";
568
569 StringRef Name = DE.getCStr(&Offset);
570
571 if (!Name.equals("stapsdt"))
572 errs() << "BOLT-WARNING: SDT note name \"" << Name
573 << "\" is not expected\n";
574
575 // Parse description
576 SDTMarkerInfo Marker;
Laith Sakka3df2c9e2019-05-17 14:58:27577 Marker.PCOffset = Offset;
Laith Saed Sakka47558252019-05-16 00:19:18578 Marker.PC = DE.getU64(&Offset);
579 Marker.Base = DE.getU64(&Offset);
580 Marker.Semaphore = DE.getU64(&Offset);
581 Marker.Provider = DE.getCStr(&Offset);
582 Marker.Name = DE.getCStr(&Offset);
583 Marker.Args = DE.getCStr(&Offset);
Laith Saed Sakka47558252019-05-16 00:19:18584 Offset = alignTo(Offset, 4);
Laith Saed Sakkaca659e42019-05-16 19:46:32585 BC->SDTMarkers[Marker.PC] = Marker;
Laith Saed Sakka47558252019-05-16 00:19:18586 }
587
588 if (opts::PrintSDTMarkers)
589 printSDTMarkers();
590}
591
James Luo8a919592021-06-11 20:06:12592void RewriteInstance::parsePseudoProbe() {
Maksim Panchenkoee0e9cc2021-12-23 20:38:33593 if (!PseudoProbeDescSection && !PseudoProbeSection) {
James Luo8a919592021-06-11 20:06:12594 // pesudo probe is not added to binary. It is normal and no warning needed.
595 return;
Maksim Panchenkoee0e9cc2021-12-23 20:38:33596 }
597
James Luo8a919592021-06-11 20:06:12598 // If only one section is found, it might mean the ELF is corrupted.
599 if (!PseudoProbeDescSection) {
600 errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n";
601 return;
602 } else if (!PseudoProbeSection) {
603 errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n";
604 return;
605 }
606
607 StringRef Contents = PseudoProbeDescSection->getContents();
608 if (!BC->ProbeDecoder.buildGUID2FuncDescMap(
609 reinterpret_cast<const uint8_t *>(Contents.data()),
610 Contents.size())) {
611 errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n";
612 return;
613 }
614 Contents = PseudoProbeSection->getContents();
615 if (!BC->ProbeDecoder.buildAddress2ProbeMap(
616 reinterpret_cast<const uint8_t *>(Contents.data()),
617 Contents.size())) {
James Luodea6c242021-06-25 18:42:58618 BC->ProbeDecoder.getAddress2ProbesMap().clear();
James Luo8a919592021-06-11 20:06:12619 errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n";
620 return;
621 }
622
James Luodea6c242021-06-25 18:42:58623 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All ||
624 opts::PrintPseudoProbes ==
625 opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) {
626 outs() << "Report of decoding input pseudo probe binaries \n";
James Luo8a919592021-06-11 20:06:12627 BC->ProbeDecoder.printGUID2FuncDescMap(outs());
628 BC->ProbeDecoder.printProbesForAllAddresses(outs());
629 }
630}
631
Laith Saed Sakka47558252019-05-16 00:19:18632void RewriteInstance::printSDTMarkers() {
633 outs() << "BOLT-INFO: Number of SDT markers is " << BC->SDTMarkers.size()
634 << "\n";
Laith Saed Sakkaca659e42019-05-16 19:46:32635 for (auto It : BC->SDTMarkers) {
Amir Ayupovc7306cc2021-04-08 07:19:26636 SDTMarkerInfo &Marker = It.second;
Laith Saed Sakka47558252019-05-16 00:19:18637 outs() << "BOLT-INFO: PC: " << utohexstr(Marker.PC)
638 << ", Base: " << utohexstr(Marker.Base)
639 << ", Semaphore: " << utohexstr(Marker.Semaphore)
640 << ", Provider: " << Marker.Provider << ", Name: " << Marker.Name
641 << ", Args: " << Marker.Args << "\n";
642 }
643}
644
Rafael Auler9c4fcaf2018-08-09 00:55:24645void RewriteInstance::parseBuildID() {
646 if (!BuildIDSection)
647 return;
Rafael Auler0ed144a2017-10-06 21:42:46648
Rafael Auler9c4fcaf2018-08-09 00:55:24649 StringRef Buf = BuildIDSection->getContents();
Rafael Auler0ed144a2017-10-06 21:42:46650
Rafael Auler9c4fcaf2018-08-09 00:55:24651 // Reading notes section (see Portable Formats Specification, Version 1.1,
652 // pg 2-5, section "Note Section").
653 DataExtractor DE = DataExtractor(Buf, true, 8);
Amir Ayupov1c5d3a02020-12-02 00:29:39654 uint64_t Offset = 0;
Rafael Auler9c4fcaf2018-08-09 00:55:24655 if (!DE.isValidOffset(Offset))
656 return;
657 uint32_t NameSz = DE.getU32(&Offset);
658 if (!DE.isValidOffset(Offset))
659 return;
660 uint32_t DescSz = DE.getU32(&Offset);
661 if (!DE.isValidOffset(Offset))
662 return;
663 uint32_t Type = DE.getU32(&Offset);
Rafael Auler0ed144a2017-10-06 21:42:46664
Amir Ayupov1c5d3a02020-12-02 00:29:39665 LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz
666 << "; Type = " << Type << "\n");
Rafael Auler0ed144a2017-10-06 21:42:46667
Rafael Auler9c4fcaf2018-08-09 00:55:24668 // Type 3 is a GNU build-id note section
669 if (Type != 3)
670 return;
Rafael Auler0ed144a2017-10-06 21:42:46671
Rafael Auler9c4fcaf2018-08-09 00:55:24672 StringRef Name = Buf.slice(Offset, Offset + NameSz);
673 Offset = alignTo(Offset + NameSz, 4);
674 if (Name.substr(0, 3) != "GNU")
675 return;
Rafael Auler0ed144a2017-10-06 21:42:46676
Rafael Auler9c4fcaf2018-08-09 00:55:24677 BuildID = Buf.slice(Offset, Offset + DescSz);
678}
Rafael Auler0ed144a2017-10-06 21:42:46679
Rafael Auler9c4fcaf2018-08-09 00:55:24680Optional<std::string> RewriteInstance::getPrintableBuildID() const {
681 if (BuildID.empty())
682 return NoneType();
683
684 std::string Str;
685 raw_string_ostream OS(Str);
Amir Ayupovc7306cc2021-04-08 07:19:26686 const unsigned char *CharIter = BuildID.bytes_begin();
Rafael Auler9c4fcaf2018-08-09 00:55:24687 while (CharIter != BuildID.bytes_end()) {
688 if (*CharIter < 0x10)
689 OS << "0";
690 OS << Twine::utohexstr(*CharIter);
691 ++CharIter;
Rafael Auler0ed144a2017-10-06 21:42:46692 }
Rafael Auler9c4fcaf2018-08-09 00:55:24693 return OS.str();
694}
695
696void RewriteInstance::patchBuildID() {
Amir Ayupovc7306cc2021-04-08 07:19:26697 raw_fd_ostream &OS = Out->os();
Rafael Auler9c4fcaf2018-08-09 00:55:24698
699 if (BuildID.empty())
700 return;
701
702 size_t IDOffset = BuildIDSection->getContents().rfind(BuildID);
703 assert(IDOffset != StringRef::npos && "failed to patch build-id");
704
Amir Ayupovc7306cc2021-04-08 07:19:26705 uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress());
Rafael Auler9c4fcaf2018-08-09 00:55:24706 if (!FileOffset) {
707 errs() << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n";
708 return;
709 }
710
711 char LastIDByte = BuildID[BuildID.size() - 1];
712 LastIDByte ^= 1;
713 OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1);
714
715 outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
Rafael Auler0ed144a2017-10-06 21:42:46716}
717
Amir Ayupovaf6e66f2022-02-24 03:30:30718Error RewriteInstance::run() {
719 assert(BC && "failed to create a binary context");
Rafael Aulerc67a7532015-11-24 01:54:18720
Rafael Auler787db1c2017-07-25 16:11:42721 outs() << "BOLT-INFO: Target architecture: "
722 << Triple::getArchTypeName(
723 (llvm::Triple::ArchType)InputFile->getArch())
724 << "\n";
Rafael Aulerd7fb9982020-10-05 19:41:03725 outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n";
Rafael Auler787db1c2017-07-25 16:11:42726
Amir Ayupovaf6e66f2022-02-24 03:30:30727 if (Error E = discoverStorage())
728 return E;
Amir Ayupovced54722022-03-08 17:12:19729 if (Error E = readSpecialSections())
730 return E;
Maksim Panchenkoe9c6c732019-09-11 22:42:22731 adjustCommandLineOptions();
732 discoverFileObjects();
733
Rafael Aulere3898d52020-12-30 20:23:58734 preprocessProfileData();
735
Maksim Panchenko87291712020-05-08 06:00:29736 // Skip disassembling if we have a translation table and we are running an
737 // aggregation job.
738 if (opts::AggregateOnly && BAT->enabledFor(InputFile)) {
Maksim Panchenko87291712020-05-08 06:00:29739 processProfileData();
Amir Ayupovaf6e66f2022-02-24 03:30:30740 return Error::success();
Maksim Panchenko87291712020-05-08 06:00:29741 }
Maksim Panchenkoe9c6c732019-09-11 22:42:22742
Maksim Panchenko04c5d4f2020-05-03 20:54:45743 selectFunctionsToProcess();
744
Maksim Panchenkoe9c6c732019-09-11 22:42:22745 readDebugInfo();
746
Maksim Panchenko87291712020-05-08 06:00:29747 disassembleFunctions();
Maksim Panchenkoe9c6c732019-09-11 22:42:22748
Maksim Panchenko87291712020-05-08 06:00:29749 processProfileDataPreCFG();
750
751 buildFunctionsCFG();
Maksim Panchenkoe9c6c732019-09-11 22:42:22752
753 processProfileData();
754
Maksim Panchenkoe9c6c732019-09-11 22:42:22755 postProcessFunctions();
Maksim Panchenko4349b632016-03-31 23:38:49756
Maksim Panchenkoe9c6c732019-09-11 22:42:22757 if (opts::DiffOnly)
Amir Ayupovaf6e66f2022-02-24 03:30:30758 return Error::success();
Gabriel Poesiaf6c89292016-04-12 00:46:18759
Maksim Panchenkoe9c6c732019-09-11 22:42:22760 runOptimizationPasses();
761
762 emitAndLink();
763
Maksim Panchenkof2b257b2019-11-04 05:57:15764 updateMetadata();
Rafael Aulerc67a7532015-11-24 01:54:18765
Maksim Panchenkoa10f7992020-09-15 18:42:03766 if (opts::LinuxKernelMode) {
767 errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n";
Amir Ayupovaf6e66f2022-02-24 03:30:30768 return Error::success();
Amir Ayupov081e39a2021-03-29 23:04:57769 } else if (opts::OutputFilename == "/dev/null") {
770 outs() << "BOLT-INFO: skipping writing final binary to disk\n";
Amir Ayupovaf6e66f2022-02-24 03:30:30771 return Error::success();
Maksim Panchenkoa10f7992020-09-15 18:42:03772 }
773
Maksim Panchenkod68b1c7b2016-03-03 18:13:11774 // Rewrite allocatable contents and copy non-allocatable parts with mods.
Rafael Aulerc67a7532015-11-24 01:54:18775 rewriteFile();
Amir Ayupovaf6e66f2022-02-24 03:30:30776 return Error::success();
Rafael Aulerc67a7532015-11-24 01:54:18777}
778
Maksim Panchenkoe7e9e152016-03-11 19:09:34779void RewriteInstance::discoverFileObjects() {
Rafael Auler8a5a3012018-02-06 23:00:23780 NamedRegionTimer T("discoverFileObjects", "discover file objects",
781 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
Rafael Aulerc67a7532015-11-24 01:54:18782 FileSymRefs.clear();
Maksim Panchenko7fd48702019-04-03 22:52:01783 BC->getBinaryFunctions().clear();
Bill Nell0e4d86b2017-11-15 04:05:11784 BC->clearBinaryData();
Rafael Aulerc67a7532015-11-24 01:54:18785
Maksim Panchenkoe241e9c2016-09-29 18:19:06786 // For local symbols we want to keep track of associated FILE symbol name for
787 // disambiguation by combined name.
Maksim Panchenko40c2e0f2021-12-15 00:52:51788 StringRef FileSymbolName;
Maksim Panchenkoe241e9c2016-09-29 18:19:06789 bool SeenFileName = false;
790 struct SymbolRefHash {
Maksim Panchenkoee0371a2020-04-07 07:21:37791 size_t operator()(SymbolRef const &S) const {
Maksim Panchenkoe241e9c2016-09-29 18:19:06792 return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p);
793 }
794 };
795 std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName;
Amir Ayupovc7306cc2021-04-08 07:19:26796 for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
797 Expected<StringRef> NameOrError = Symbol.getName();
Maksim Panchenko6b0b5bb2017-02-07 23:56:00798 if (NameOrError && NameOrError->startswith("__asan_init")) {
799 errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer "
800 "support. Cannot optimize.\n";
801 exit(1);
802 }
Maksim Panchenkof7d32f72017-03-31 14:51:30803 if (NameOrError && NameOrError->startswith("__llvm_coverage_mapping")) {
804 errs() << "BOLT-ERROR: input file was compiled or linked with coverage "
805 "support. Cannot optimize.\n";
806 exit(1);
807 }
Maksim Panchenko6b0b5bb2017-02-07 23:56:00808
Amir Ayupov1c5d3a02020-12-02 00:29:39809 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
Rafael Aulerc67a7532015-11-24 01:54:18810 continue;
811
Rafael Auler8a5a3012018-02-06 23:00:23812 if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
Amir Ayupovc7306cc2021-04-08 07:19:26813 StringRef Name =
Rafael Auler8a5a3012018-02-06 23:00:23814 cantFail(std::move(NameOrError), "cannot get symbol name for file");
Maksim Panchenkof32784f2017-09-26 01:05:37815 // Ignore Clang LTO artificial FILE symbol as it is not always generated,
816 // and this uncertainty is causing havoc in function name matching.
Rafael Auler8a5a3012018-02-06 23:00:23817 if (Name == "ld-temp.o")
Maksim Panchenkof32784f2017-09-26 01:05:37818 continue;
Rafael Auler8a5a3012018-02-06 23:00:23819 FileSymbolName = Name;
Maksim Panchenko84b5b9e2016-07-12 01:51:13820 SeenFileName = true;
Rafael Aulerc67a7532015-11-24 01:54:18821 continue;
822 }
Maksim Panchenkoe241e9c2016-09-29 18:19:06823 if (!FileSymbolName.empty() &&
Maksim Panchenkoee0e9cc2021-12-23 20:38:33824 !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global))
Maksim Panchenkoe241e9c2016-09-29 18:19:06825 SymbolToFileName[Symbol] = FileSymbolName;
Maksim Panchenkoe241e9c2016-09-29 18:19:06826 }
827
Maksim Panchenko53b72d0f2018-09-05 21:36:52828 // Sort symbols in the file by value. Ignore symbols from non-allocatable
829 // sections.
830 auto isSymbolInMemory = [this](const SymbolRef &Sym) {
831 if (cantFail(Sym.getType()) == SymbolRef::ST_File)
832 return false;
Amir Ayupov1c5d3a02020-12-02 00:29:39833 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute)
Maksim Panchenko53b72d0f2018-09-05 21:36:52834 return true;
Amir Ayupov1c5d3a02020-12-02 00:29:39835 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined)
Maksim Panchenko53b72d0f2018-09-05 21:36:52836 return false;
837 BinarySection Section(*BC, *cantFail(Sym.getSection()));
838 return Section.isAllocatable();
839 };
840 std::vector<SymbolRef> SortedFileSymbols;
Amir Ayupovd2c87692022-06-24 05:15:47841 llvm::copy_if(InputFile->symbols(), std::back_inserter(SortedFileSymbols),
842 isSymbolInMemory);
Denis Revunov8579db92022-05-31 18:50:59843 auto CompareSymbols = [this](const SymbolRef &A, const SymbolRef &B) {
844 // Marker symbols have the highest precedence, while
845 // SECTIONs have the lowest.
846 auto AddressA = cantFail(A.getAddress());
847 auto AddressB = cantFail(B.getAddress());
848 if (AddressA != AddressB)
849 return AddressA < AddressB;
Maksim Panchenko53b72d0f2018-09-05 21:36:52850
Denis Revunov8579db92022-05-31 18:50:59851 bool AMarker = BC->isMarker(A);
852 bool BMarker = BC->isMarker(B);
853 if (AMarker || BMarker) {
854 return AMarker && !BMarker;
855 }
Maksim Panchenko99ef4c92019-04-16 17:24:34856
Denis Revunov8579db92022-05-31 18:50:59857 auto AType = cantFail(A.getType());
858 auto BType = cantFail(B.getType());
859 if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
860 return true;
861 if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
862 return true;
Maksim Panchenko99ef4c92019-04-16 17:24:34863
Denis Revunov8579db92022-05-31 18:50:59864 return false;
865 };
866
Amir Ayupovd2c87692022-06-24 05:15:47867 llvm::stable_sort(SortedFileSymbols, CompareSymbols);
Denis Revunov8579db92022-05-31 18:50:59868
869 auto LastSymbol = SortedFileSymbols.end() - 1;
Maksim Panchenkoe241e9c2016-09-29 18:19:06870
Rafael Auler907ca252017-11-23 00:17:36871 // For aarch64, the ABI defines mapping symbols so we identify data in the
872 // code section (see IHI0056B). $d identifies data contents.
Denis Revunov8579db92022-05-31 18:50:59873 // Compilers usually merge multiple data objects in a single $d-$x interval,
874 // but we need every data object to be marked with $d. Because of that we
875 // create a vector of MarkerSyms with all locations of data objects.
876
877 struct MarkerSym {
878 uint64_t Address;
879 MarkerSymType Type;
880 };
881
882 std::vector<MarkerSym> SortedMarkerSymbols;
883 auto addExtraDataMarkerPerSymbol =
884 [this](const std::vector<SymbolRef> &SortedFileSymbols,
885 std::vector<MarkerSym> &SortedMarkerSymbols) {
886 bool IsData = false;
887 uint64_t LastAddr = 0;
888 for (auto Sym = SortedFileSymbols.begin();
889 Sym < SortedFileSymbols.end(); ++Sym) {
890 uint64_t Address = cantFail(Sym->getAddress());
891 if (LastAddr == Address) // don't repeat markers
892 continue;
893
894 MarkerSymType MarkerType = BC->getMarkerType(*Sym);
895 if (MarkerType != MarkerSymType::NONE) {
896 SortedMarkerSymbols.push_back(MarkerSym{Address, MarkerType});
897 LastAddr = Address;
898 IsData = MarkerType == MarkerSymType::DATA;
899 continue;
900 }
901
902 if (IsData) {
903 SortedMarkerSymbols.push_back(
904 MarkerSym{cantFail(Sym->getAddress()), MarkerSymType::DATA});
905 LastAddr = Address;
906 }
907 }
908 };
909
Rafael Auler7df6a6d2018-03-20 21:34:58910 if (BC->isAArch64()) {
Denis Revunov8579db92022-05-31 18:50:59911 addExtraDataMarkerPerSymbol(SortedFileSymbols, SortedMarkerSymbols);
Maksim Panchenko8c6ea852019-10-08 18:03:33912 LastSymbol = std::stable_partition(
Rafael Auler907ca252017-11-23 00:17:36913 SortedFileSymbols.begin(), SortedFileSymbols.end(),
Denis Revunov8579db92022-05-31 18:50:59914 [this](const SymbolRef &Symbol) { return !BC->isMarker(Symbol); });
Maksim Panchenko8c6ea852019-10-08 18:03:33915 --LastSymbol;
Rafael Auler907ca252017-11-23 00:17:36916 }
917
Maksim Panchenko55fc5412016-09-28 02:09:38918 BinaryFunction *PreviousFunction = nullptr;
Bill Nell0e4d86b2017-11-15 04:05:11919 unsigned AnonymousId = 0;
920
Denis Revunov8579db92022-05-31 18:50:59921 const auto SortedSymbolsEnd = std::next(LastSymbol);
922 for (auto ISym = SortedFileSymbols.begin(); ISym != SortedSymbolsEnd;
923 ++ISym) {
Amir Ayupovc7306cc2021-04-08 07:19:26924 const SymbolRef &Symbol = *ISym;
Maksim Panchenkoe241e9c2016-09-29 18:19:06925 // Keep undefined symbols for pretty printing?
Amir Ayupov1c5d3a02020-12-02 00:29:39926 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
Maksim Panchenkoe241e9c2016-09-29 18:19:06927 continue;
928
Amir Ayupovc7306cc2021-04-08 07:19:26929 const SymbolRef::Type SymbolType = cantFail(Symbol.getType());
Maksim Panchenko8c6ea852019-10-08 18:03:33930
931 if (SymbolType == SymbolRef::ST_File)
Maksim Panchenkoe241e9c2016-09-29 18:19:06932 continue;
933
Rafael Auler8a5a3012018-02-06 23:00:23934 StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name");
935 uint64_t Address =
936 cantFail(Symbol.getAddress(), "cannot get symbol address");
Rafael Aulerc67a7532015-11-24 01:54:18937 if (Address == 0) {
Maksim Panchenko8c6ea852019-10-08 18:03:33938 if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function)
Maksim Panchenkod1526082016-02-05 22:42:04939 errs() << "BOLT-WARNING: function with 0 address seen\n";
Rafael Aulerc67a7532015-11-24 01:54:18940 continue;
941 }
942
Rafael Aulere4396c42020-10-17 07:50:27943 // Ignore input hot markers
Maksim Panchenkoee0e9cc2021-12-23 20:38:33944 if (SymName == "__hot_start" || SymName == "__hot_end")
Rafael Aulere4396c42020-10-17 07:50:27945 continue;
Rafael Aulere4396c42020-10-17 07:50:27946
Rafael Auler907ca252017-11-23 00:17:36947 FileSymRefs[Address] = Symbol;
Rafael Aulerc67a7532015-11-24 01:54:18948
Maksim Panchenkoc9f5f472021-06-30 21:41:41949 // Skip section symbols that will be registered by disassemblePLT().
950 if ((cantFail(Symbol.getType()) == SymbolRef::ST_Debug)) {
951 ErrorOr<BinarySection &> BSection = BC->getSectionForAddress(Address);
952 if (BSection && getPLTSectionInfo(BSection->getName()))
953 continue;
Amir Ayupov1c5d3a02020-12-02 00:29:39954 }
955
Maksim Panchenko84b5b9e2016-07-12 01:51:13956 /// It is possible we are seeing a globalized local. LLVM might treat it as
957 /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to
958 /// change the prefix to enforce global scope of the symbol.
Rafael Auler8a5a3012018-02-06 23:00:23959 std::string Name = SymName.startswith(BC->AsmInfo->getPrivateGlobalPrefix())
960 ? "PG" + std::string(SymName)
961 : std::string(SymName);
Maksim Panchenko84b5b9e2016-07-12 01:51:13962
Rafael Aulerc67a7532015-11-24 01:54:18963 // Disambiguate all local symbols before adding to symbol table.
Maksim Panchenko84b5b9e2016-07-12 01:51:13964 // Since we don't know if we will see a global with the same name,
Rafael Aulerc67a7532015-11-24 01:54:18965 // always modify the local name.
Maksim Panchenko84b5b9e2016-07-12 01:51:13966 //
967 // NOTE: the naming convention for local symbols should match
968 // the one we use for profile data.
Rafael Aulerc67a7532015-11-24 01:54:18969 std::string UniqueName;
Maksim Panchenko84b5b9e2016-07-12 01:51:13970 std::string AlternativeName;
Bill Nell0e4d86b2017-11-15 04:05:11971 if (Name.empty()) {
Bill Nell0e4d86b2017-11-15 04:05:11972 UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++);
Amir Ayupov1c5d3a02020-12-02 00:29:39973 } else if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global) {
Bill Nell0e4d86b2017-11-15 04:05:11974 assert(!BC->getBinaryDataByName(Name) && "global name not unique");
Maksim Panchenko84b5b9e2016-07-12 01:51:13975 UniqueName = Name;
Rafael Aulerc67a7532015-11-24 01:54:18976 } else {
Maksim Panchenko84b5b9e2016-07-12 01:51:13977 // If we have a local file name, we should create 2 variants for the
978 // function name. The reason is that perf profile might have been
979 // collected on a binary that did not have the local file name (e.g. as
980 // a side effect of stripping debug info from the binary):
981 //
982 // primary: <function>/<id>
983 // alternative: <function>/<file>/<id2>
984 //
985 // The <id> field is used for disambiguation of local symbols since there
986 // could be identical function names coming from identical file names
987 // (e.g. from different directories).
Maksim Panchenko84b5b9e2016-07-12 01:51:13988 std::string AltPrefix;
Maksim Panchenkoe241e9c2016-09-29 18:19:06989 auto SFI = SymbolToFileName.find(Symbol);
Maksim Panchenkoee0e9cc2021-12-23 20:38:33990 if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end())
Maksim Panchenko8c6ea852019-10-08 18:03:33991 AltPrefix = Name + "/" + std::string(SFI->second);
Rafael Aulerc67a7532015-11-24 01:54:18992
Alexander Shaposhnikov16630f52020-02-17 22:37:46993 UniqueName = NR.uniquify(Name);
Maksim Panchenko84b5b9e2016-07-12 01:51:13994 if (!AltPrefix.empty())
Alexander Shaposhnikov16630f52020-02-17 22:37:46995 AlternativeName = NR.uniquify(AltPrefix);
Rafael Aulerc67a7532015-11-24 01:54:18996 }
997
Bill Nell0e4d86b2017-11-15 04:05:11998 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
Bill Nell0e4d86b2017-11-15 04:05:11999 uint64_t SymbolAlignment = Symbol.getAlignment();
Amir Ayupov1c5d3a02020-12-02 00:29:391000 unsigned SymbolFlags = cantFail(Symbol.getFlags());
Bill Nell0e4d86b2017-11-15 04:05:111001
1002 auto registerName = [&](uint64_t FinalSize) {
1003 // Register names even if it's not a function, e.g. for an entry point.
Maksim Panchenko40c2e0f2021-12-15 00:52:511004 BC->registerNameAtAddress(UniqueName, Address, FinalSize, SymbolAlignment,
1005 SymbolFlags);
Bill Nell0e4d86b2017-11-15 04:05:111006 if (!AlternativeName.empty())
1007 BC->registerNameAtAddress(AlternativeName, Address, FinalSize,
Bill Nell729da2d2018-04-21 03:03:311008 SymbolAlignment, SymbolFlags);
Bill Nell0e4d86b2017-11-15 04:05:111009 };
Rafael Aulerc67a7532015-11-24 01:54:181010
Rafael Auler8a5a3012018-02-06 23:00:231011 section_iterator Section =
1012 cantFail(Symbol.getSection(), "cannot get symbol section");
Maksim Panchenkod68b1c7b2016-03-03 18:13:111013 if (Section == InputFile->section_end()) {
Rafael Aulerc67a7532015-11-24 01:54:181014 // Could be an absolute symbol. Could record for pretty printing.
Amir Ayupov1c5d3a02020-12-02 00:29:391015 LLVM_DEBUG(if (opts::Verbosity > 1) {
1016 dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n";
1017 });
Maksim Panchenkob11c8262021-03-15 19:06:561018 registerName(SymbolSize);
Rafael Aulerc67a7532015-11-24 01:54:181019 continue;
1020 }
1021
Amir Ayupov1c5d3a02020-12-02 00:29:391022 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName
1023 << " for function\n");
Maksim Panchenkoe241e9c2016-09-29 18:19:061024
1025 if (!Section->isText()) {
Maksim Panchenko8c6ea852019-10-08 18:03:331026 assert(SymbolType != SymbolRef::ST_Function &&
Maksim Panchenkoe241e9c2016-09-29 18:19:061027 "unexpected function inside non-code section");
Amir Ayupov1c5d3a02020-12-02 00:29:391028 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n");
Maksim Panchenkob11c8262021-03-15 19:06:561029 registerName(SymbolSize);
Maksim Panchenkoe241e9c2016-09-29 18:19:061030 continue;
1031 }
1032
Maksim Panchenkoe241e9c2016-09-29 18:19:061033 // Assembly functions could be ST_NONE with 0 size. Check that the
1034 // corresponding section is a code section and they are not inside any
1035 // other known function to consider them.
1036 //
1037 // Sometimes assembly functions are not marked as functions and neither are
1038 // their local labels. The only way to tell them apart is to look at
1039 // symbol scope - global vs local.
Maksim Panchenko8c6ea852019-10-08 18:03:331040 if (PreviousFunction && SymbolType != SymbolRef::ST_Function) {
1041 if (PreviousFunction->containsAddress(Address)) {
1042 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
Amir Ayupov1c5d3a02020-12-02 00:29:391043 LLVM_DEBUG(dbgs()
1044 << "BOLT-DEBUG: symbol is a function local symbol\n");
Maksim Panchenko8c6ea852019-10-08 18:03:331045 } else if (Address == PreviousFunction->getAddress() && !SymbolSize) {
Amir Ayupov1c5d3a02020-12-02 00:29:391046 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n");
Maksim Panchenko8c6ea852019-10-08 18:03:331047 } else if (opts::Verbosity > 1) {
1048 errs() << "BOLT-WARNING: symbol " << UniqueName
Amir Ayupov1c5d3a02020-12-02 00:29:391049 << " seen in the middle of function " << *PreviousFunction
1050 << ". Could be a new entry.\n";
Maksim Panchenkoe241e9c2016-09-29 18:19:061051 }
Maksim Panchenko8c6ea852019-10-08 18:03:331052 registerName(SymbolSize);
1053 continue;
1054 } else if (PreviousFunction->getSize() == 0 &&
1055 PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
Amir Ayupov1c5d3a02020-12-02 00:29:391056 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
Maksim Panchenko8c6ea852019-10-08 18:03:331057 registerName(SymbolSize);
1058 continue;
Maksim Panchenkoe241e9c2016-09-29 18:19:061059 }
1060 }
1061
Maksim Panchenko40c2e0f2021-12-15 00:52:511062 if (PreviousFunction && PreviousFunction->containsAddress(Address) &&
Maksim Panchenko55fc5412016-09-28 02:09:381063 PreviousFunction->getAddress() != Address) {
1064 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:331065 if (opts::Verbosity >= 1)
Amir Ayupov1c5d3a02020-12-02 00:29:391066 outs() << "BOLT-INFO: skipping possibly another entry for function "
Maksim Panchenko55fc5412016-09-28 02:09:381067 << *PreviousFunction << " : " << UniqueName << '\n';
Maksim Panchenko55fc5412016-09-28 02:09:381068 } else {
1069 outs() << "BOLT-INFO: using " << UniqueName << " as another entry to "
1070 << "function " << *PreviousFunction << '\n';
1071
Maksim Panchenko4946b882020-06-22 23:16:081072 registerName(0);
1073
Maksim Panchenko40c2e0f2021-12-15 00:52:511074 PreviousFunction->addEntryPointAtOffset(Address -
1075 PreviousFunction->getAddress());
Maksim Panchenko55fc5412016-09-28 02:09:381076
Maksim Panchenko55fc5412016-09-28 02:09:381077 // Remove the symbol from FileSymRefs so that we can skip it from
1078 // in the future.
1079 auto SI = FileSymRefs.find(Address);
1080 assert(SI != FileSymRefs.end() && "symbol expected to be present");
1081 assert(SI->second == Symbol && "wrong symbol found");
1082 FileSymRefs.erase(SI);
Maksim Panchenkoe241e9c2016-09-29 18:19:061083 }
Bill Nell0e4d86b2017-11-15 04:05:111084 registerName(SymbolSize);
Maksim Panchenkoe241e9c2016-09-29 18:19:061085 continue;
1086 }
1087
Maksim Panchenkoe7e9e152016-03-11 19:09:341088 // Checkout for conflicts with function data from FDEs.
1089 bool IsSimple = true;
1090 auto FDEI = CFIRdWrt->getFDEs().lower_bound(Address);
1091 if (FDEI != CFIRdWrt->getFDEs().end()) {
Amir Ayupovc7306cc2021-04-08 07:19:261092 const dwarf::FDE &FDE = *FDEI->second;
Maksim Panchenkoe7e9e152016-03-11 19:09:341093 if (FDEI->first != Address) {
1094 // There's no matching starting address in FDE. Make sure the previous
1095 // FDE does not contain this address.
1096 if (FDEI != CFIRdWrt->getFDEs().begin()) {
1097 --FDEI;
Amir Ayupovc7306cc2021-04-08 07:19:261098 const dwarf::FDE &PrevFDE = *FDEI->second;
1099 uint64_t PrevStart = PrevFDE.getInitialLocation();
1100 uint64_t PrevLength = PrevFDE.getAddressRange();
Maksim Panchenkoc4e36c12016-09-15 22:47:101101 if (Address > PrevStart && Address < PrevStart + PrevLength) {
Maksim Panchenko55fc5412016-09-28 02:09:381102 errs() << "BOLT-ERROR: function " << UniqueName
1103 << " is in conflict with FDE ["
1104 << Twine::utohexstr(PrevStart) << ", "
1105 << Twine::utohexstr(PrevStart + PrevLength)
1106 << "). Skipping.\n";
Maksim Panchenkoe7e9e152016-03-11 19:09:341107 IsSimple = false;
1108 }
1109 }
1110 } else if (FDE.getAddressRange() != SymbolSize) {
Maksim Panchenkoc4e36c12016-09-15 22:47:101111 if (SymbolSize) {
1112 // Function addresses match but sizes differ.
Maksim Panchenko4b485f42017-06-03 01:41:311113 errs() << "BOLT-WARNING: sizes differ for function " << UniqueName
Maksim Panchenko55fc5412016-09-28 02:09:381114 << ". FDE : " << FDE.getAddressRange()
Maksim Panchenko4b485f42017-06-03 01:41:311115 << "; symbol table : " << SymbolSize << ". Using max size.\n";
Maksim Panchenkoc4e36c12016-09-15 22:47:101116 }
Maksim Panchenkoe7e9e152016-03-11 19:09:341117 SymbolSize = std::max(SymbolSize, FDE.getAddressRange());
Bill Nell0e4d86b2017-11-15 04:05:111118 if (BC->getBinaryDataAtAddress(Address)) {
1119 BC->setBinaryDataSize(Address, SymbolSize);
1120 } else {
Amir Ayupov1c5d3a02020-12-02 00:29:391121 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x"
1122 << Twine::utohexstr(Address) << "\n");
Bill Nell0e4d86b2017-11-15 04:05:111123 }
Maksim Panchenkoe7e9e152016-03-11 19:09:341124 }
1125 }
Rafael Auler31fc56b2019-04-16 21:35:291126
Maksim Panchenkofe37f182021-05-13 17:50:471127 BinaryFunction *BF = nullptr;
Maksim Panchenko7fd48702019-04-03 22:52:011128 // Since function may not have yet obtained its real size, do a search
1129 // using the list of registered functions instead of calling
1130 // getBinaryFunctionAtAddress().
1131 auto BFI = BC->getBinaryFunctions().find(Address);
1132 if (BFI != BC->getBinaryFunctions().end()) {
Maksim Panchenko003d106c2016-08-11 21:23:541133 BF = &BFI->second;
Maksim Panchenko7fd48702019-04-03 22:52:011134 // Duplicate the function name. Make sure everything matches before we add
Maksim Panchenkof1192a72016-06-11 00:13:051135 // an alternative name.
Maksim Panchenkoc4e36c12016-09-15 22:47:101136 if (SymbolSize != BF->getSize()) {
1137 if (opts::Verbosity >= 1) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:331138 if (SymbolSize && BF->getSize())
Maksim Panchenkoc4e36c12016-09-15 22:47:101139 errs() << "BOLT-WARNING: size mismatch for duplicate entries "
1140 << *BF << " and " << UniqueName << '\n';
Maksim Panchenko40c2e0f2021-12-15 00:52:511141 outs() << "BOLT-INFO: adjusting size of function " << *BF << " old "
1142 << BF->getSize() << " new " << SymbolSize << "\n";
Maksim Panchenkoc4e36c12016-09-15 22:47:101143 }
1144 BF->setSize(std::max(SymbolSize, BF->getSize()));
Bill Nell0e4d86b2017-11-15 04:05:111145 BC->setBinaryDataSize(Address, BF->getSize());
Maksim Panchenkof1192a72016-06-11 00:13:051146 }
Maksim Panchenko003d106c2016-08-11 21:23:541147 BF->addAlternativeName(UniqueName);
Maksim Panchenkof1192a72016-06-11 00:13:051148 } else {
Amir Ayupovc7306cc2021-04-08 07:19:261149 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
takh48b71ad2020-06-11 06:00:391150 // Skip symbols from invalid sections
1151 if (!Section) {
1152 errs() << "BOLT-WARNING: " << UniqueName << " (0x"
Maksim Panchenko40c2e0f2021-12-15 00:52:511153 << Twine::utohexstr(Address) << ") does not have any section\n";
takh48b71ad2020-06-11 06:00:391154 continue;
1155 }
Maksim Panchenko06e7a1e2019-06-27 10:20:171156 assert(Section && "section for functions must be registered");
Wenlei He459add22019-06-26 18:06:461157
Maksim Panchenko06e7a1e2019-06-27 10:20:171158 // Skip symbols from zero-sized sections.
1159 if (!Section->getSize())
Wenlei He459add22019-06-26 18:06:461160 continue;
laith sakka7d428352019-07-12 14:25:501161
Maksim Panchenko0ce0bce2020-06-15 07:15:471162 BF = BC->createBinaryFunction(UniqueName, *Section, Address, SymbolSize);
1163 if (!IsSimple)
1164 BF->setSimple(false);
Maksim Panchenkof1192a72016-06-11 00:13:051165 }
Maksim Panchenko84b5b9e2016-07-12 01:51:131166 if (!AlternativeName.empty())
Maksim Panchenko003d106c2016-08-11 21:23:541167 BF->addAlternativeName(AlternativeName);
Maksim Panchenkoe241e9c2016-09-29 18:19:061168
Bill Nell0e4d86b2017-11-15 04:05:111169 registerName(SymbolSize);
Maksim Panchenkoe241e9c2016-09-29 18:19:061170 PreviousFunction = BF;
Maksim Panchenko84b5b9e2016-07-12 01:51:131171 }
1172
Maksim Panchenko38c58872021-06-22 20:46:061173 // Read dynamic relocation first as their presence affects the way we process
1174 // static relocations. E.g. we will ignore a static relocation at an address
1175 // that is a subject to dynamic relocation processing.
1176 processDynamicRelocations();
1177
Maksim Panchenko49d1f562017-08-04 18:21:051178 // Process PLT section.
Vladislav Khmelevsky00b6efc2022-01-25 00:22:471179 disassemblePLT();
Maksim Panchenko49d1f562017-08-04 18:21:051180
Maksim Panchenko55fc5412016-09-28 02:09:381181 // See if we missed any functions marked by FDE.
1182 for (const auto &FDEI : CFIRdWrt->getFDEs()) {
Amir Ayupovc7306cc2021-04-08 07:19:261183 const uint64_t Address = FDEI.first;
1184 const dwarf::FDE *FDE = FDEI.second;
1185 const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
Maksim Panchenko7fd48702019-04-03 22:52:011186 if (BF)
1187 continue;
1188
1189 BF = BC->getBinaryFunctionContainingAddress(Address);
1190 if (BF) {
1191 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
1192 << Twine::utohexstr(Address + FDE->getAddressRange())
1193 << ") conflicts with function " << *BF << '\n';
1194 continue;
Maksim Panchenko4b485f42017-06-03 01:41:311195 }
Maksim Panchenko7fd48702019-04-03 22:52:011196
Maksim Panchenkoee0e9cc2021-12-23 20:38:331197 if (opts::Verbosity >= 1)
Maksim Panchenko40c2e0f2021-12-15 00:52:511198 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
1199 << Twine::utohexstr(Address + FDE->getAddressRange())
Maksim Panchenko7fd48702019-04-03 22:52:011200 << ") has no corresponding symbol table entry\n";
Maksim Panchenkoee0e9cc2021-12-23 20:38:331201
Amir Ayupovc7306cc2021-04-08 07:19:261202 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
Maksim Panchenko7fd48702019-04-03 22:52:011203 assert(Section && "cannot get section for address from FDE");
1204 std::string FunctionName =
Maksim Panchenko40c2e0f2021-12-15 00:52:511205 "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str();
Maksim Panchenko7fd48702019-04-03 22:52:011206 BC->createBinaryFunction(FunctionName, *Section, Address,
Maksim Panchenko0ce0bce2020-06-15 07:15:471207 FDE->getAddressRange());
Maksim Panchenko4b485f42017-06-03 01:41:311208 }
1209
Maksim Panchenko87291712020-05-08 06:00:291210 BC->setHasSymbolsWithFileName(SeenFileName);
Maksim Panchenkoe241e9c2016-09-29 18:19:061211
1212 // Now that all the functions were created - adjust their boundaries.
1213 adjustFunctionBoundaries();
Maksim Panchenko55fc5412016-09-28 02:09:381214
Rafael Auler907ca252017-11-23 00:17:361215 // Annotate functions with code/data markers in AArch64
Denis Revunov8579db92022-05-31 18:50:591216 for (auto ISym = SortedMarkerSymbols.begin();
1217 ISym != SortedMarkerSymbols.end(); ++ISym) {
1218
1219 auto *BF =
1220 BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
1221
Rafael Auler907ca252017-11-23 00:17:361222 if (!BF) {
1223 // Stray marker
1224 continue;
1225 }
Denis Revunov8579db92022-05-31 18:50:591226 const auto EntryOffset = ISym->Address - BF->getAddress();
1227 if (ISym->Type == MarkerSymType::CODE) {
Rafael Auler907ca252017-11-23 00:17:361228 BF->markCodeAtOffset(EntryOffset);
1229 continue;
1230 }
Denis Revunov8579db92022-05-31 18:50:591231 if (ISym->Type == MarkerSymType::DATA) {
Rafael Auler907ca252017-11-23 00:17:361232 BF->markDataAtOffset(EntryOffset);
Denis Revunov8579db92022-05-31 18:50:591233 BC->AddressToConstantIslandMap[ISym->Address] = BF;
Rafael Auler907ca252017-11-23 00:17:361234 continue;
1235 }
1236 llvm_unreachable("Unknown marker");
1237 }
1238
Maksim Panchenkoa10f7992020-09-15 18:42:031239 if (opts::LinuxKernelMode) {
1240 // Read all special linux kernel sections and their relocations
1241 processLKSections();
1242 } else {
1243 // Read all relocations now that we have binary functions mapped.
1244 processRelocations();
1245 }
Maksim Panchenkoe241e9c2016-09-29 18:19:061246}
1247
Vladislav Khmelevsky00b6efc2022-01-25 00:22:471248void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,
1249 uint64_t EntryAddress,
1250 uint64_t EntrySize) {
1251 if (!TargetAddress)
1252 return;
1253
Vladislav Khmelevsky8bdbcfe2022-03-02 21:34:411254 auto setPLTSymbol = [&](BinaryFunction *BF, StringRef Name) {
1255 const unsigned PtrSize = BC->AsmInfo->getCodePointerSize();
1256 MCSymbol *TargetSymbol = BC->registerNameAtAddress(
1257 Name.str() + "@GOT", TargetAddress, PtrSize, PtrSize);
1258 BF->setPLTSymbol(TargetSymbol);
1259 };
1260
1261 BinaryFunction *BF = BC->getBinaryFunctionAtAddress(EntryAddress);
1262 if (BF && BC->isAArch64()) {
1263 // Handle IFUNC trampoline
1264 setPLTSymbol(BF, BF->getOneName());
1265 return;
1266 }
1267
Vladislav Khmelevsky00b6efc2022-01-25 00:22:471268 const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress);
1269 if (!Rel || !Rel->Symbol)
1270 return;
1271
Vladislav Khmelevsky00b6efc2022-01-25 00:22:471272 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(EntryAddress);
1273 assert(Section && "cannot get section for address");
Vladislav Khmelevsky8bdbcfe2022-03-02 21:34:411274 BF = BC->createBinaryFunction(Rel->Symbol->getName().str() + "@PLT", *Section,
1275 EntryAddress, 0, EntrySize,
1276 Section->getAlignment());
1277 setPLTSymbol(BF, Rel->Symbol->getName());
Vladislav Khmelevsky00b6efc2022-01-25 00:22:471278}
1279
1280void RewriteInstance::disassemblePLTSectionAArch64(BinarySection &Section) {
1281 const uint64_t SectionAddress = Section.getAddress();
1282 const uint64_t SectionSize = Section.getSize();
1283 StringRef PLTContents = Section.getContents();
1284 ArrayRef<uint8_t> PLTData(
1285 reinterpret_cast<const uint8_t *>(PLTContents.data()), SectionSize);
1286
1287 auto disassembleInstruction = [&](uint64_t InstrOffset, MCInst &Instruction,
1288 uint64_t &InstrSize) {
1289 const uint64_t InstrAddr = SectionAddress + InstrOffset;
1290 if (!BC->DisAsm->getInstruction(Instruction, InstrSize,
1291 PLTData.slice(InstrOffset), InstrAddr,
1292 nulls())) {
1293 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT section "
1294 << Section.getName() << " at offset 0x"
1295 << Twine::utohexstr(InstrOffset) << '\n';
1296 exit(1);
1297 }
1298 };
1299
1300 uint64_t InstrOffset = 0;
1301 // Locate new plt entry
1302 while (InstrOffset < SectionSize) {
1303 InstructionListType Instructions;
1304 MCInst Instruction;
1305 uint64_t EntryOffset = InstrOffset;
1306 uint64_t EntrySize = 0;
1307 uint64_t InstrSize;
1308 // Loop through entry instructions
1309 while (InstrOffset < SectionSize) {
1310 disassembleInstruction(InstrOffset, Instruction, InstrSize);
1311 EntrySize += InstrSize;
1312 if (!BC->MIB->isIndirectBranch(Instruction)) {
1313 Instructions.emplace_back(Instruction);
1314 InstrOffset += InstrSize;
1315 continue;
1316 }
1317
1318 const uint64_t EntryAddress = SectionAddress + EntryOffset;
1319 const uint64_t TargetAddress = BC->MIB->analyzePLTEntry(
1320 Instruction, Instructions.begin(), Instructions.end(), EntryAddress);
1321
1322 createPLTBinaryFunction(TargetAddress, EntryAddress, EntrySize);
1323 break;
1324 }
1325
1326 // Branch instruction
1327 InstrOffset += InstrSize;
1328
1329 // Skip nops if any
1330 while (InstrOffset < SectionSize) {
1331 disassembleInstruction(InstrOffset, Instruction, InstrSize);
1332 if (!BC->MIB->isNoop(Instruction))
1333 break;
1334
1335 InstrOffset += InstrSize;
1336 }
1337 }
1338}
1339
1340void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
1341 uint64_t EntrySize) {
1342 const uint64_t SectionAddress = Section.getAddress();
1343 const uint64_t SectionSize = Section.getSize();
1344 StringRef PLTContents = Section.getContents();
1345 ArrayRef<uint8_t> PLTData(
1346 reinterpret_cast<const uint8_t *>(PLTContents.data()), SectionSize);
1347
1348 auto disassembleInstruction = [&](uint64_t InstrOffset, MCInst &Instruction,
1349 uint64_t &InstrSize) {
1350 const uint64_t InstrAddr = SectionAddress + InstrOffset;
1351 if (!BC->DisAsm->getInstruction(Instruction, InstrSize,
1352 PLTData.slice(InstrOffset), InstrAddr,
1353 nulls())) {
1354 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT section "
1355 << Section.getName() << " at offset 0x"
1356 << Twine::utohexstr(InstrOffset) << '\n';
1357 exit(1);
1358 }
1359 };
1360
1361 for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= SectionSize;
1362 EntryOffset += EntrySize) {
1363 MCInst Instruction;
1364 uint64_t InstrSize, InstrOffset = EntryOffset;
1365 while (InstrOffset < EntryOffset + EntrySize) {
1366 disassembleInstruction(InstrOffset, Instruction, InstrSize);
1367 // Check if the entry size needs adjustment.
1368 if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) &&
1369 EntrySize == 8)
1370 EntrySize = 16;
1371
1372 if (BC->MIB->isIndirectBranch(Instruction))
1373 break;
1374
1375 InstrOffset += InstrSize;
1376 }
1377
1378 if (InstrOffset + InstrSize > EntryOffset + EntrySize)
1379 continue;
1380
1381 uint64_t TargetAddress;
1382 if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
1383 SectionAddress + InstrOffset,
1384 InstrSize)) {
1385 errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x"
1386 << Twine::utohexstr(SectionAddress + InstrOffset) << '\n';
1387 exit(1);
1388 }
1389
1390 createPLTBinaryFunction(TargetAddress, SectionAddress + EntryOffset,
1391 EntrySize);
1392 }
1393}
1394
Maksim Panchenko49d1f562017-08-04 18:21:051395void RewriteInstance::disassemblePLT() {
Maksim Panchenkoc9f5f472021-06-30 21:41:411396 auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) {
Vladislav Khmelevsky00b6efc2022-01-25 00:22:471397 if (BC->isAArch64())
1398 return disassemblePLTSectionAArch64(Section);
1399 return disassemblePLTSectionX86(Section, EntrySize);
Rafael Auler1f6564f2019-08-26 22:03:381400 };
1401
Maksim Panchenkoc9f5f472021-06-30 21:41:411402 for (BinarySection &Section : BC->allocatableSections()) {
1403 const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName());
1404 if (!PLTSI)
1405 continue;
Maksim Panchenko49d1f562017-08-04 18:21:051406
Maksim Panchenkoc9f5f472021-06-30 21:41:411407 analyzeOnePLTSection(Section, PLTSI->EntrySize);
1408 // If we did not register any function at the start of the section,
1409 // then it must be a general PLT entry. Add a function at the location.
1410 if (BC->getBinaryFunctions().find(Section.getAddress()) ==
Rafael Auler1f6564f2019-08-26 22:03:381411 BC->getBinaryFunctions().end()) {
Maksim Panchenkoc9f5f472021-06-30 21:41:411412 BinaryFunction *BF = BC->createBinaryFunction(
1413 "__BOLT_PSEUDO_" + Section.getName().str(), Section,
1414 Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment());
Maksim Panchenko0ce0bce2020-06-15 07:15:471415 BF->setPseudo(true);
Maksim Panchenko49d1f562017-08-04 18:21:051416 }
1417 }
1418}
1419
Maksim Panchenkoe241e9c2016-09-29 18:19:061420void RewriteInstance::adjustFunctionBoundaries() {
Maksim Panchenko7fd48702019-04-03 22:52:011421 for (auto BFI = BC->getBinaryFunctions().begin(),
1422 BFE = BC->getBinaryFunctions().end();
Maksim Panchenko2ab74722017-10-10 21:54:091423 BFI != BFE; ++BFI) {
Amir Ayupovc7306cc2021-04-08 07:19:261424 BinaryFunction &Function = BFI->second;
Maksim Panchenkofe37f182021-05-13 17:50:471425 const BinaryFunction *NextFunction = nullptr;
Maksim Panchenko078ece12019-06-28 18:53:341426 if (std::next(BFI) != BFE)
1427 NextFunction = &std::next(BFI)->second;
Maksim Panchenkoe241e9c2016-09-29 18:19:061428
Maksim Panchenko99ef4c92019-04-16 17:24:341429 // Check if it's a fragment of a function.
Amir Ayupovc7306cc2021-04-08 07:19:261430 Optional<StringRef> FragName =
1431 Function.hasRestoredNameRegex(".*\\.cold(\\.[0-9]+)?");
Maksim Panchenko9ef9a7b2019-05-30 01:33:091432 if (FragName) {
Maksim Panchenko99ef4c92019-04-16 17:24:341433 static bool PrintedWarning = false;
1434 if (BC->HasRelocations && !PrintedWarning) {
1435 errs() << "BOLT-WARNING: split function detected on input : "
Rafael Auler1f6564f2019-08-26 22:03:381436 << *FragName << ". The support is limited in relocation mode.\n";
Maksim Panchenko99ef4c92019-04-16 17:24:341437 PrintedWarning = true;
1438 }
1439 Function.IsFragment = true;
1440 }
1441
Maksim Panchenko2ab74722017-10-10 21:54:091442 // Check if there's a symbol or a function with a larger address in the
1443 // same section. If there is - it determines the maximum size for the
1444 // current function. Otherwise, it is the size of a containing section
1445 // the defines it.
Maksim Panchenkoe241e9c2016-09-29 18:19:061446 //
1447 // NOTE: ignore some symbols that could be tolerated inside the body
1448 // of a function.
1449 auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress());
1450 while (NextSymRefI != FileSymRefs.end()) {
Amir Ayupovc7306cc2021-04-08 07:19:261451 SymbolRef &Symbol = NextSymRefI->second;
1452 const uint64_t SymbolAddress = NextSymRefI->first;
1453 const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
Maksim Panchenko078ece12019-06-28 18:53:341454
1455 if (NextFunction && SymbolAddress >= NextFunction->getAddress())
1456 break;
Maksim Panchenkoe241e9c2016-09-29 18:19:061457
1458 if (!Function.isSymbolValidInScope(Symbol, SymbolSize))
1459 break;
1460
1461 // This is potentially another entry point into the function.
Amir Ayupovc7306cc2021-04-08 07:19:261462 uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
Amir Ayupov1c5d3a02020-12-02 00:29:391463 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
1464 << Function << " at offset 0x"
1465 << Twine::utohexstr(EntryOffset) << '\n');
Rafael Auler907ca252017-11-23 00:17:361466 Function.addEntryPointAtOffset(EntryOffset);
Maksim Panchenkoe241e9c2016-09-29 18:19:061467
1468 ++NextSymRefI;
1469 }
Maksim Panchenkoe241e9c2016-09-29 18:19:061470
Maksim Panchenko2ab74722017-10-10 21:54:091471 // Function runs at most till the end of the containing section.
Maksim Panchenko0465d952020-10-09 23:06:271472 uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress();
Maksim Panchenko2ab74722017-10-10 21:54:091473 // Or till the next object marked by a symbol.
Maksim Panchenkoee0e9cc2021-12-23 20:38:331474 if (NextSymRefI != FileSymRefs.end())
Maksim Panchenko2ab74722017-10-10 21:54:091475 NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress);
Maksim Panchenkoee0e9cc2021-12-23 20:38:331476
Maksim Panchenko2ab74722017-10-10 21:54:091477 // Or till the next function not marked by a symbol.
Maksim Panchenkoee0e9cc2021-12-23 20:38:331478 if (NextFunction)
Rafael Auler1f6564f2019-08-26 22:03:381479 NextObjectAddress =
1480 std::min(NextFunction->getAddress(), NextObjectAddress);
Maksim Panchenkoe241e9c2016-09-29 18:19:061481
Amir Ayupovc7306cc2021-04-08 07:19:261482 const uint64_t MaxSize = NextObjectAddress - Function.getAddress();
Maksim Panchenkoe241e9c2016-09-29 18:19:061483 if (MaxSize < Function.getSize()) {
Maksim Panchenko55fc5412016-09-28 02:09:381484 errs() << "BOLT-ERROR: symbol seen in the middle of the function "
1485 << Function << ". Skipping.\n";
Maksim Panchenkoe241e9c2016-09-29 18:19:061486 Function.setSimple(false);
Maksim Panchenko55fc5412016-09-28 02:09:381487 Function.setMaxSize(Function.getSize());
Maksim Panchenkoe241e9c2016-09-29 18:19:061488 continue;
1489 }
1490 Function.setMaxSize(MaxSize);
Maksim Panchenko55fc5412016-09-28 02:09:381491 if (!Function.getSize() && Function.isSimple()) {
Maksim Panchenkoe241e9c2016-09-29 18:19:061492 // Some assembly functions have their size set to 0, use the max
1493 // size as their real size.
Maksim Panchenkoee0e9cc2021-12-23 20:38:331494 if (opts::Verbosity >= 1)
Rafael Auler1f6564f2019-08-26 22:03:381495 outs() << "BOLT-INFO: setting size of function " << Function << " to "
1496 << Function.getMaxSize() << " (was 0)\n";
Maksim Panchenkoe241e9c2016-09-29 18:19:061497 Function.setSize(Function.getMaxSize());
1498 }
1499 }
Rafael Aulerc67a7532015-11-24 01:54:181500}
1501
Maksim Panchenkoa7fb6102016-11-11 22:33:341502void RewriteInstance::relocateEHFrameSection() {
Bill Nell2640b402018-01-23 23:10:241503 assert(EHFrameSection && "non-empty .eh_frame section expected");
Maksim Panchenkoa7fb6102016-11-11 22:33:341504
Bill Nellddefc772018-02-02 00:33:431505 DWARFDataExtractor DE(EHFrameSection->getContents(),
1506 BC->AsmInfo->isLittleEndian(),
Rafael Auler8a5a3012018-02-06 23:00:231507 BC->AsmInfo->getCodePointerSize());
Maksim Panchenkoa7fb6102016-11-11 22:33:341508 auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) {
1509 if (DwarfType == dwarf::DW_EH_PE_omit)
1510 return;
1511
Maksim Panchenko606532b2020-04-16 07:02:351512 // Only fix references that are relative to other locations.
Maksim Panchenkoa7fb6102016-11-11 22:33:341513 if (!(DwarfType & dwarf::DW_EH_PE_pcrel) &&
1514 !(DwarfType & dwarf::DW_EH_PE_textrel) &&
1515 !(DwarfType & dwarf::DW_EH_PE_funcrel) &&
Maksim Panchenkoee0e9cc2021-12-23 20:38:331516 !(DwarfType & dwarf::DW_EH_PE_datarel))
Maksim Panchenkoa7fb6102016-11-11 22:33:341517 return;
Maksim Panchenkoa7fb6102016-11-11 22:33:341518
1519 if (!(DwarfType & dwarf::DW_EH_PE_sdata4))
1520 return;
1521
1522 uint64_t RelType;
1523 switch (DwarfType & 0x0f) {
1524 default:
1525 llvm_unreachable("unsupported DWARF encoding type");
1526 case dwarf::DW_EH_PE_sdata4:
1527 case dwarf::DW_EH_PE_udata4:
Rafael Auler35632d42020-10-07 22:40:511528 RelType = Relocation::getPC32();
Rafael Auler8a5a3012018-02-06 23:00:231529 Offset -= 4;
Maksim Panchenkoa7fb6102016-11-11 22:33:341530 break;
1531 case dwarf::DW_EH_PE_sdata8:
1532 case dwarf::DW_EH_PE_udata8:
Rafael Auler35632d42020-10-07 22:40:511533 RelType = Relocation::getPC64();
Rafael Auler8a5a3012018-02-06 23:00:231534 Offset -= 8;
Maksim Panchenkoa7fb6102016-11-11 22:33:341535 break;
1536 }
1537
Maksim Panchenko606532b2020-04-16 07:02:351538 // Create a relocation against an absolute value since the goal is to
1539 // preserve the contents of the section independent of the new values
1540 // of referenced symbols.
1541 EHFrameSection->addRelocation(Offset, nullptr, RelType, Value);
Maksim Panchenkoa7fb6102016-11-11 22:33:341542 };
1543
Maksim Panchenko40c2e0f2021-12-15 00:52:511544 Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc);
Amir Ayupov1c5d3a02020-12-02 00:29:391545 check_error(std::move(E), "failed to patch EH frame");
Maksim Panchenkoa7fb6102016-11-11 22:33:341546}
1547
Bill Nell729da2d2018-04-21 03:03:311548ArrayRef<uint8_t> RewriteInstance::getLSDAData() {
1549 return ArrayRef<uint8_t>(LSDASection->getData(),
1550 LSDASection->getContents().size());
1551}
1552
Maksim Panchenko40c2e0f2021-12-15 00:52:511553uint64_t RewriteInstance::getLSDAAddress() { return LSDASection->getAddress(); }
Bill Nell729da2d2018-04-21 03:03:311554
Amir Ayupovced54722022-03-08 17:12:191555Error RewriteInstance::readSpecialSections() {
Rafael Auler8a5a3012018-02-06 23:00:231556 NamedRegionTimer T("readSpecialSections", "read special sections",
1557 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
Bill Nell591e0ef2017-11-28 02:00:241558
Maksim Panchenkod5a02642017-03-23 05:05:501559 bool HasTextRelocations = false;
Maksim Panchenko2b152332019-04-26 22:30:121560 bool HasDebugInfo = false;
Maksim Panchenkod5a02642017-03-23 05:05:501561
Rafael Aulerc67a7532015-11-24 01:54:181562 // Process special sections.
Amir Ayupovc7306cc2021-04-08 07:19:261563 for (const SectionRef &Section : InputFile->sections()) {
Amir Ayupov1c5d3a02020-12-02 00:29:391564 Expected<StringRef> SectionNameOrErr = Section.getName();
1565 check_error(SectionNameOrErr.takeError(), "cannot get section name");
1566 StringRef SectionName = *SectionNameOrErr;
Maksim Panchenkoc6d0c562016-07-21 19:45:351567
Bill Nellddefc772018-02-02 00:33:431568 // Only register sections with names.
Bill Nell729da2d2018-04-21 03:03:311569 if (!SectionName.empty()) {
Amir Ayupovced54722022-03-08 17:12:191570 if (Error E = Section.getContents().takeError())
1571 return E;
Bill Nellddefc772018-02-02 00:33:431572 BC->registerSection(Section);
Amir Ayupov1c5d3a02020-12-02 00:29:391573 LLVM_DEBUG(
1574 dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x"
1575 << Twine::utohexstr(Section.getAddress()) << ":0x"
1576 << Twine::utohexstr(Section.getAddress() + Section.getSize())
1577 << "\n");
Maksim Panchenko2b152332019-04-26 22:30:121578 if (isDebugSection(SectionName))
1579 HasDebugInfo = true;
takh48b71ad2020-06-11 06:00:391580 if (isKSymtabSection(SectionName))
1581 opts::LinuxKernelMode = true;
Bill Nellddefc772018-02-02 00:33:431582 }
1583 }
1584
Rafael Auler21f43032019-04-13 00:33:461585 if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) {
Maksim Panchenko2b152332019-04-26 22:30:121586 errs() << "BOLT-WARNING: debug info will be stripped from the binary. "
1587 "Use -update-debug-sections to keep it.\n";
1588 }
1589
Bill Nell729da2d2018-04-21 03:03:311590 HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text");
1591 LSDASection = BC->getUniqueSectionByName(".gcc_except_table");
1592 EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
Bill Nell729da2d2018-04-21 03:03:311593 GOTPLTSection = BC->getUniqueSectionByName(".got.plt");
Bill Nell729da2d2018-04-21 03:03:311594 RelaPLTSection = BC->getUniqueSectionByName(".rela.plt");
Rafael Auler1f6564f2019-08-26 22:03:381595 RelaDynSection = BC->getUniqueSectionByName(".rela.dyn");
Rafael Auler9c4fcaf2018-08-09 00:55:241596 BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id");
Laith Saed Sakka47558252019-05-16 00:19:181597 SDTSection = BC->getUniqueSectionByName(".note.stapsdt");
James Luo8a919592021-06-11 20:06:121598 PseudoProbeDescSection = BC->getUniqueSectionByName(".pseudo_probe_desc");
1599 PseudoProbeSection = BC->getUniqueSectionByName(".pseudo_probe");
Bill Nell729da2d2018-04-21 03:03:311600
Amir Ayupovc7306cc2021-04-08 07:19:261601 if (ErrorOr<BinarySection &> BATSec =
Rafael Auler21f43032019-04-13 00:33:461602 BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
Rafael Auler698a4682019-10-11 20:32:141603 // Do not read BAT when plotting a heatmap
1604 if (!opts::HeatmapMode) {
1605 if (std::error_code EC = BAT->parse(BATSec->getContents())) {
1606 errs() << "BOLT-ERROR: failed to parse BOLT address translation "
Maksim Panchenko40c2e0f2021-12-15 00:52:511607 "table.\n";
Rafael Auler698a4682019-10-11 20:32:141608 exit(1);
1609 }
Rafael Auler21f43032019-04-13 00:33:461610 }
1611 }
1612
Bill Nellddefc772018-02-02 00:33:431613 if (opts::PrintSections) {
1614 outs() << "BOLT-INFO: Sections from original binary:\n";
1615 BC->printSections(outs());
Rafael Aulerc67a7532015-11-24 01:54:181616 }
1617
Maksim Panchenkob6f7c682017-12-10 05:40:391618 if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) {
Maksim Panchenkod5a02642017-03-23 05:05:501619 errs() << "BOLT-ERROR: relocations against code are missing from the input "
1620 "file. Cannot proceed in relocations mode (-relocs).\n";
1621 exit(1);
1622 }
1623
Maksim Panchenko40c2e0f2021-12-15 00:52:511624 BC->HasRelocations =
1625 HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE);
laith sakka7d428352019-07-12 14:25:501626
Wenlei He459add22019-06-26 18:06:461627 // Force non-relocation mode for heatmap generation
Maksim Panchenkoee0e9cc2021-12-23 20:38:331628 if (opts::HeatmapMode)
Wenlei He459add22019-06-26 18:06:461629 BC->HasRelocations = false;
laith sakka7d428352019-07-12 14:25:501630
Maksim Panchenkoee0e9cc2021-12-23 20:38:331631 if (BC->HasRelocations)
Maksim Panchenkoe89ad0d2019-06-28 16:21:271632 outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "")
1633 << "relocation mode\n";
Maksim Panchenkob6f7c682017-12-10 05:40:391634
Maksim Panchenko33559362021-04-21 18:24:151635 // Read EH frame for function boundaries info.
1636 Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame();
1637 if (!EHFrameOrError)
1638 report_error("expected valid eh_frame section", EHFrameOrError.takeError());
Maksim Panchenko33559362021-04-21 18:24:151639 CFIRdWrt.reset(new CFIReaderWriter(*EHFrameOrError.get()));
Rafael Auler9c4fcaf2018-08-09 00:55:241640
1641 // Parse build-id
1642 parseBuildID();
Maksim Panchenkoee0e9cc2021-12-23 20:38:331643 if (Optional<std::string> FileBuildID = getPrintableBuildID())
Maksim Panchenko87291712020-05-08 06:00:291644 BC->setFileBuildID(*FileBuildID);
Laith Saed Sakka47558252019-05-16 00:19:181645
1646 parseSDTNotes();
Maksim Panchenko74a27772020-03-09 02:04:391647
Maksim Panchenko250ca402020-06-26 23:52:071648 // Read .dynamic/PT_DYNAMIC.
Amir Ayupov1e016c32022-03-08 17:17:411649 return readELFDynamic();
Rafael Aulerc67a7532015-11-24 01:54:181650}
1651
Maksim Panchenko120d2672018-04-13 22:46:191652void RewriteInstance::adjustCommandLineOptions() {
Maksim Panchenkoee0e9cc2021-12-23 20:38:331653 if (BC->isAArch64() && !BC->HasRelocations)
Maksim Panchenko120d2672018-04-13 22:46:191654 errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully "
1655 "supported\n";
Maksim Panchenko120d2672018-04-13 22:46:191656
Maksim Panchenkoee0e9cc2021-12-23 20:38:331657 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
Xun Li9bd71612020-05-02 18:14:381658 RtLibrary->adjustCommandLineOptions(*BC);
Rafael Auler0d23cba2019-06-20 03:10:491659
Maksim Panchenko120d2672018-04-13 22:46:191660 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
1661 outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
1662 opts::AlignMacroOpFusion = MFT_NONE;
1663 }
Maksim Panchenko6bcb3382019-03-15 20:43:361664
Amir Ayupov1c5d3a02020-12-02 00:29:391665 if (BC->isX86() && BC->MAB->allowAutoPadding()) {
Rafael Aulerc82e7fd2020-02-11 02:50:531666 if (!BC->HasRelocations) {
1667 errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in "
1668 "non-relocation mode\n";
1669 exit(1);
1670 }
1671 outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
1672 "may take several minutes\n";
1673 opts::AlignMacroOpFusion = MFT_NONE;
1674 }
1675
Maksim Panchenko23edb3e2020-04-19 22:02:501676 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
Maksim Panchenko120d2672018-04-13 22:46:191677 outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
1678 "mode\n";
1679 opts::AlignMacroOpFusion = MFT_NONE;
1680 }
Maksim Panchenko6bcb3382019-03-15 20:43:361681
Rafael Auler72ecd12f2018-06-25 21:55:481682 if (opts::SplitEH && !BC->HasRelocations) {
Maksim Panchenko492e4a52019-04-26 00:00:051683 errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n";
Rafael Auler72ecd12f2018-06-25 21:55:481684 opts::SplitEH = false;
1685 }
Maksim Panchenko6bcb3382019-03-15 20:43:361686
Maksim Panchenkoe89ad0d2019-06-28 16:21:271687 if (opts::StrictMode && !BC->HasRelocations) {
1688 errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation "
1689 "mode\n";
1690 opts::StrictMode = false;
1691 }
1692
Maksim Panchenko79ff4ec2019-06-11 20:24:101693 if (BC->HasRelocations && opts::AggregateOnly &&
1694 !opts::StrictMode.getNumOccurrences()) {
Rafael Auler28f91872019-11-15 00:07:111695 outs() << "BOLT-INFO: enabling strict relocation mode for aggregation "
Maksim Panchenko79ff4ec2019-06-11 20:24:101696 "purposes\n";
1697 opts::StrictMode = true;
1698 }
1699
Maksim Panchenko120d2672018-04-13 22:46:191700 if (BC->isX86() && BC->HasRelocations &&
Maksim Panchenko87291712020-05-08 06:00:291701 opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
Maksim Panchenko120d2672018-04-13 22:46:191702 outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
1703 "was specified\n";
1704 opts::AlignMacroOpFusion = MFT_ALL;
1705 }
Maksim Panchenko163adbe2019-03-15 01:51:051706
Maksim Panchenko492e4a52019-04-26 00:00:051707 if (!BC->HasRelocations &&
1708 opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
1709 errs() << "BOLT-ERROR: function reordering only works when "
1710 << "relocations are enabled\n";
1711 exit(1);
1712 }
1713
1714 if (opts::ReorderFunctions != ReorderFunctions::RT_NONE &&
1715 !opts::HotText.getNumOccurrences()) {
1716 opts::HotText = true;
1717 } else if (opts::HotText && !BC->HasRelocations) {
1718 errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n";
Maksim Panchenko163adbe2019-03-15 01:51:051719 opts::HotText = false;
1720 }
Maksim Panchenko6bcb3382019-03-15 20:43:361721
1722 if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) {
1723 opts::HotTextMoveSections.addValue(".stub");
1724 opts::HotTextMoveSections.addValue(".mover");
Maksim Panchenko22ba3dc2019-04-16 17:39:051725 opts::HotTextMoveSections.addValue(".never_hugify");
Maksim Panchenko6bcb3382019-03-15 20:43:361726 }
Maksim Panchenko2df4e7b2020-02-25 01:12:411727
1728 if (opts::UseOldText && !BC->OldTextSectionAddress) {
1729 errs() << "BOLT-WARNING: cannot use old .text as the section was not found"
1730 "\n";
1731 opts::UseOldText = false;
1732 }
Maksim Panchenko0ce0bce2020-06-15 07:15:471733 if (opts::UseOldText && !BC->HasRelocations) {
1734 errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n";
1735 opts::UseOldText = false;
1736 }
1737
Maksim Panchenkoee0e9cc2021-12-23 20:38:331738 if (!opts::AlignText.getNumOccurrences())
Maksim Panchenko23edb3e2020-04-19 22:02:501739 opts::AlignText = BC->PageAlign;
Maksim Panchenko924d0bd2020-05-03 22:49:581740
Vladislav Khmelevsky62a289d2022-03-15 19:17:511741 if (opts::AlignText < opts::AlignFunctions)
1742 opts::AlignText = (unsigned)opts::AlignFunctions;
1743
Maksim Panchenko40c2e0f2021-12-15 00:52:511744 if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode &&
Maksim Panchenkoee0e9cc2021-12-23 20:38:331745 !opts::UseOldText)
Maksim Panchenko924d0bd2020-05-03 22:49:581746 opts::Lite = true;
Maksim Panchenko0ce0bce2020-06-15 07:15:471747
1748 if (opts::Lite && opts::UseOldText) {
1749 errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. "
1750 "Disabling -use-old-text.\n";
1751 opts::UseOldText = false;
1752 }
1753
Maksim Panchenkod6d88392020-10-17 22:09:061754 if (opts::Lite && opts::StrictMode) {
Maksim Panchenko0ce0bce2020-06-15 07:15:471755 errs() << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n";
1756 exit(1);
Maksim Panchenko924d0bd2020-05-03 22:49:581757 }
Rafael Auler65478132020-08-06 21:43:331758
Maksim Panchenkoee0e9cc2021-12-23 20:38:331759 if (opts::Lite)
Rafael Auler65478132020-08-06 21:43:331760 outs() << "BOLT-INFO: enabling lite mode\n";
Maksim Panchenkoaaf49b02020-08-13 01:10:411761
1762 if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) {
1763 errs() << "BOLT-ERROR: unable to save profile in YAML format for input "
1764 "file processed by BOLT. Please remove -w option and use branch "
1765 "profile.\n";
1766 exit(1);
1767 }
Maksim Panchenko120d2672018-04-13 22:46:191768}
1769
Maksim Panchenko55fc5412016-09-28 02:09:381770namespace {
1771template <typename ELFT>
1772int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj,
1773 const RelocationRef &RelRef) {
Amir Ayupovc7306cc2021-04-08 07:19:261774 using ELFShdrTy = typename ELFT::Shdr;
1775 using Elf_Rela = typename ELFT::Rela;
Maksim Panchenko55fc5412016-09-28 02:09:381776 int64_t Addend = 0;
Amir Ayupov1c5d3a02020-12-02 00:29:391777 const ELFFile<ELFT> &EF = Obj->getELFFile();
Maksim Panchenko55fc5412016-09-28 02:09:381778 DataRefImpl Rel = RelRef.getRawDataRefImpl();
Amir Ayupovc7306cc2021-04-08 07:19:261779 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
Maksim Panchenko55fc5412016-09-28 02:09:381780 switch (RelocationSection->sh_type) {
Maksim Panchenko40c2e0f2021-12-15 00:52:511781 default:
1782 llvm_unreachable("unexpected relocation section type");
Maksim Panchenko55fc5412016-09-28 02:09:381783 case ELF::SHT_REL:
1784 break;
1785 case ELF::SHT_RELA: {
Amir Ayupovc7306cc2021-04-08 07:19:261786 const Elf_Rela *RelA = Obj->getRela(Rel);
Maksim Panchenko55fc5412016-09-28 02:09:381787 Addend = RelA->r_addend;
1788 break;
1789 }
1790 }
1791
1792 return Addend;
1793}
1794
1795int64_t getRelocationAddend(const ELFObjectFileBase *Obj,
Maksim Panchenko40c2e0f2021-12-15 00:52:511796 const RelocationRef &Rel) {
Maksim Panchenko55fc5412016-09-28 02:09:381797 if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
1798 return getRelocationAddend(ELF32LE, Rel);
1799 if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
1800 return getRelocationAddend(ELF64LE, Rel);
1801 if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
1802 return getRelocationAddend(ELF32BE, Rel);
1803 auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
1804 return getRelocationAddend(ELF64BE, Rel);
1805}
Vladislav Khmelevsky729d29e2022-02-16 15:13:441806
1807template <typename ELFT>
1808uint32_t getRelocationSymbol(const ELFObjectFile<ELFT> *Obj,
1809 const RelocationRef &RelRef) {
1810 using ELFShdrTy = typename ELFT::Shdr;
1811 uint32_t Symbol = 0;
1812 const ELFFile<ELFT> &EF = Obj->getELFFile();
1813 DataRefImpl Rel = RelRef.getRawDataRefImpl();
1814 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
1815 switch (RelocationSection->sh_type) {
1816 default:
1817 llvm_unreachable("unexpected relocation section type");
1818 case ELF::SHT_REL:
1819 Symbol = Obj->getRel(Rel)->getSymbol(EF.isMips64EL());
1820 break;
1821 case ELF::SHT_RELA:
1822 Symbol = Obj->getRela(Rel)->getSymbol(EF.isMips64EL());
1823 break;
1824 }
1825
1826 return Symbol;
1827}
1828
1829uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj,
1830 const RelocationRef &Rel) {
1831 if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
1832 return getRelocationSymbol(ELF32LE, Rel);
1833 if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
1834 return getRelocationSymbol(ELF64LE, Rel);
1835 if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
1836 return getRelocationSymbol(ELF32BE, Rel);
1837 auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
1838 return getRelocationSymbol(ELF64BE, Rel);
1839}
Maksim Panchenko55fc5412016-09-28 02:09:381840} // anonymous namespace
1841
Vladislav Khmelevsky00c06592021-09-08 10:37:191842bool RewriteInstance::analyzeRelocation(
1843 const RelocationRef &Rel, uint64_t RType, std::string &SymbolName,
1844 bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend,
1845 uint64_t &ExtractedValue, bool &Skip) const {
1846 Skip = false;
Maksim Panchenkoe50e89b2019-04-12 00:11:081847 if (!Relocation::isSupported(RType))
Bill Nell89feb842018-01-24 13:42:111848 return false;
1849
Rafael Auler7df6a6d2018-03-20 21:34:581850 const bool IsAArch64 = BC->isAArch64();
Rafael Auler8a5a3012018-02-06 23:00:231851
Amir Ayupovc7306cc2021-04-08 07:19:261852 const size_t RelSize = Relocation::getSizeForType(RType);
Maksim Panchenkoa8e05d02019-04-09 19:29:401853
Amir Ayupovc7306cc2021-04-08 07:19:261854 ErrorOr<uint64_t> Value =
1855 BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
Maksim Panchenkoa8e05d02019-04-09 19:29:401856 assert(Value && "failed to extract relocated value");
Vladislav Khmelevsky00c06592021-09-08 10:37:191857 if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
1858 return true;
1859
Maksim Panchenko40c2e0f2021-12-15 00:52:511860 ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
Maksim Panchenkoce508b52018-09-21 19:00:201861 Addend = getRelocationAddend(InputFile, Rel);
1862
Amir Ayupovc7306cc2021-04-08 07:19:261863 const bool IsPCRelative = Relocation::isPCRelative(RType);
1864 const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0;
Maksim Panchenkoce508b52018-09-21 19:00:201865 bool SkipVerification = false;
1866 auto SymbolIter = Rel.getSymbol();
1867 if (SymbolIter == InputFile->symbol_end()) {
Maksim Panchenko06e7a1e2019-06-27 10:20:171868 SymbolAddress = ExtractedValue - Addend + PCRelOffset;
Amir Ayupovc7306cc2021-04-08 07:19:261869 MCSymbol *RelSymbol =
1870 BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat");
Amir Ayupov1c5d3a02020-12-02 00:29:391871 SymbolName = std::string(RelSymbol->getName());
Maksim Panchenkoce508b52018-09-21 19:00:201872 IsSectionRelocation = false;
1873 } else {
Amir Ayupovc7306cc2021-04-08 07:19:261874 const SymbolRef &Symbol = *SymbolIter;
Amir Ayupov1c5d3a02020-12-02 00:29:391875 SymbolName = std::string(cantFail(Symbol.getName()));
Maksim Panchenkoce508b52018-09-21 19:00:201876 SymbolAddress = cantFail(Symbol.getAddress());
1877 SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other);
1878 // Section symbols are marked as ST_Debug.
1879 IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
Vladislav Khmelevsky00b6efc2022-01-25 00:22:471880 // Check for PLT entry registered with symbol name
1881 if (!SymbolAddress && IsAArch64) {
Vladislav Khmelevsky4956e0e2022-04-03 16:11:311882 const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName);
Vladislav Khmelevsky00b6efc2022-01-25 00:22:471883 SymbolAddress = BD ? BD->getAddress() : 0;
1884 }
Maksim Panchenko06e7a1e2019-06-27 10:20:171885 }
Rafael Auler28f91872019-11-15 00:07:111886 // For PIE or dynamic libs, the linker may choose not to put the relocation
1887 // result at the address if it is a X86_64_64 one because it will emit a
1888 // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to
1889 // resolve it at run time. The static relocation result goes as the addend
1890 // of the dynamic relocation in this case. We can't verify these cases.
1891 // FIXME: perhaps we can try to find if it really emitted a corresponding
1892 // RELATIVE relocation at this offset with the correct value as the addend.
1893 if (!BC->HasFixedLoadAddress && RelSize == 8)
1894 SkipVerification = true;
Maksim Panchenko06e7a1e2019-06-27 10:20:171895
1896 if (IsSectionRelocation && !IsAArch64) {
Amir Ayupovc7306cc2021-04-08 07:19:261897 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress);
Maksim Panchenko06e7a1e2019-06-27 10:20:171898 assert(Section && "section expected for section relocation");
1899 SymbolName = "section " + std::string(Section->getName());
1900 // Convert section symbol relocations to regular relocations inside
1901 // non-section symbols.
1902 if (Section->containsAddress(ExtractedValue) && !IsPCRelative) {
1903 SymbolAddress = ExtractedValue;
1904 Addend = 0;
1905 } else {
1906 Addend = ExtractedValue - (SymbolAddress - PCRelOffset);
Maksim Panchenkoce508b52018-09-21 19:00:201907 }
1908 }
Bill Nell89feb842018-01-24 13:42:111909
1910 // If no symbol has been found or if it is a relocation requiring the
1911 // creation of a GOT entry, do not link against the symbol but against
1912 // whatever address was extracted from the instruction itself. We are
1913 // not creating a GOT entry as this was already processed by the linker.
Rafael Auler74a71c62018-10-12 01:12:091914 // For GOT relocs, do not subtract addend as the addend does not refer
1915 // to this instruction's target, but it refers to the target in the GOT
1916 // entry.
Maksim Panchenkoe50e89b2019-04-12 00:11:081917 if (Relocation::isGOT(RType)) {
Rafael Auler74a71c62018-10-12 01:12:091918 Addend = 0;
1919 SymbolAddress = ExtractedValue + PCRelOffset;
Vladislav Khmelevsky542c03c2021-09-02 18:04:331920 } else if (Relocation::isTLS(RType)) {
1921 SkipVerification = true;
Rafael Auler74a71c62018-10-12 01:12:091922 } else if (!SymbolAddress) {
Maksim Panchenkoce508b52018-09-21 19:00:201923 assert(!IsSectionRelocation);
Rafael Auler74a71c62018-10-12 01:12:091924 if (ExtractedValue || Addend == 0 || IsPCRelative) {
Maksim Panchenko40c2e0f2021-12-15 00:52:511925 SymbolAddress =
1926 truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize);
Bill Nell89feb842018-01-24 13:42:111927 } else {
1928 // This is weird case. The extracted value is zero but the addend is
1929 // non-zero and the relocation is not pc-rel. Using the previous logic,
1930 // the SymbolAddress would end up as a huge number. Seen in
1931 // exceptions_pic.test.
Amir Ayupov1c5d3a02020-12-02 00:29:391932 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x"
1933 << Twine::utohexstr(Rel.getOffset())
1934 << " value does not match addend for "
1935 << "relocation to undefined symbol.\n");
Bill Nell89feb842018-01-24 13:42:111936 return true;
1937 }
Bill Nell89feb842018-01-24 13:42:111938 }
1939
Maksim Panchenko39f6fcc2018-07-30 17:29:471940 auto verifyExtractedValue = [&]() {
Maksim Panchenkoce508b52018-09-21 19:00:201941 if (SkipVerification)
1942 return true;
1943
Maksim Panchenko39f6fcc2018-07-30 17:29:471944 if (IsAArch64)
1945 return true;
Bill Nell89feb842018-01-24 13:42:111946
Maksim Panchenko39f6fcc2018-07-30 17:29:471947 if (SymbolName == "__hot_start" || SymbolName == "__hot_end")
1948 return true;
Bill Nell89feb842018-01-24 13:42:111949
Maksim Panchenkoe233dec2020-07-01 02:58:431950 if (RType == ELF::R_X86_64_PLT32)
1951 return true;
1952
Maksim Panchenko39f6fcc2018-07-30 17:29:471953 return truncateToSize(ExtractedValue, RelSize) ==
1954 truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize);
1955 };
1956
Joey Thaman4c12afc2021-06-29 19:11:561957 (void)verifyExtractedValue;
Maksim Panchenko39f6fcc2018-07-30 17:29:471958 assert(verifyExtractedValue() && "mismatched extracted relocation value");
Bill Nell89feb842018-01-24 13:42:111959
1960 return true;
1961}
1962
Maksim Panchenko38c58872021-06-22 20:46:061963void RewriteInstance::processDynamicRelocations() {
Maksim Panchenko1de07462021-06-30 21:38:501964 // Read relocations for PLT - DT_JMPREL.
1965 if (PLTRelocationsSize > 0) {
1966 ErrorOr<BinarySection &> PLTRelSectionOrErr =
1967 BC->getSectionForAddress(*PLTRelocationsAddress);
Maksim Panchenkoee0e9cc2021-12-23 20:38:331968 if (!PLTRelSectionOrErr)
Maksim Panchenko1de07462021-06-30 21:38:501969 report_error("unable to find section corresponding to DT_JMPREL",
1970 PLTRelSectionOrErr.getError());
Maksim Panchenkoee0e9cc2021-12-23 20:38:331971 if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize)
Maksim Panchenko1de07462021-06-30 21:38:501972 report_error("section size mismatch for DT_PLTRELSZ",
1973 errc::executable_format_error);
Vladislav Khmelevsky729d29e2022-02-16 15:13:441974 readDynamicRelocations(PLTRelSectionOrErr->getSectionRef(),
1975 /*IsJmpRel*/ true);
Maksim Panchenko1de07462021-06-30 21:38:501976 }
1977
1978 // The rest of dynamic relocations - DT_RELA.
1979 if (DynamicRelocationsSize > 0) {
1980 ErrorOr<BinarySection &> DynamicRelSectionOrErr =
1981 BC->getSectionForAddress(*DynamicRelocationsAddress);
Maksim Panchenkoee0e9cc2021-12-23 20:38:331982 if (!DynamicRelSectionOrErr)
Maksim Panchenko1de07462021-06-30 21:38:501983 report_error("unable to find section corresponding to DT_RELA",
1984 DynamicRelSectionOrErr.getError());
Maksim Panchenkoee0e9cc2021-12-23 20:38:331985 if (DynamicRelSectionOrErr->getSize() != DynamicRelocationsSize)
Maksim Panchenko1de07462021-06-30 21:38:501986 report_error("section size mismatch for DT_RELASZ",
1987 errc::executable_format_error);
Vladislav Khmelevsky729d29e2022-02-16 15:13:441988 readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef(),
1989 /*IsJmpRel*/ false);
Maksim Panchenko4aaa8892020-06-23 19:22:581990 }
Maksim Panchenko38c58872021-06-22 20:46:061991}
1992
1993void RewriteInstance::processRelocations() {
1994 if (!BC->HasRelocations)
1995 return;
Maksim Panchenko4aaa8892020-06-23 19:22:581996
Amir Ayupovc7306cc2021-04-08 07:19:261997 for (const SectionRef &Section : InputFile->sections()) {
Amir Ayupov1c5d3a02020-12-02 00:29:391998 if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() &&
Maksim Panchenkoee0e9cc2021-12-23 20:38:331999 !BinarySection(*BC, Section).isAllocatable())
Maksim Panchenkod89bb532020-02-25 01:10:022000 readRelocations(Section);
Maksim Panchenko4aaa8892020-06-23 19:22:582001 }
Vladislav Khmelevskya1036e42021-08-21 23:44:302002
2003 if (NumFailedRelocations)
2004 errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations
2005 << " relocations\n";
Maksim Panchenko4aaa8892020-06-23 19:22:582006}
2007
Maksim Panchenkoa10f7992020-09-15 18:42:032008void RewriteInstance::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
2009 int32_t PCRelativeOffset,
2010 bool IsPCRelative, StringRef SectionName) {
Maksim Panchenko40c2e0f2021-12-15 00:52:512011 BC->LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{
2012 SectionOffset, PCRelativeOffset, IsPCRelative, SectionName});
Maksim Panchenkoa10f7992020-09-15 18:42:032013}
2014
2015void RewriteInstance::processLKSections() {
2016 assert(opts::LinuxKernelMode &&
2017 "process Linux Kernel special sections and their relocations only in "
2018 "linux kernel mode.\n");
2019
2020 processLKExTable();
2021 processLKPCIFixup();
2022 processLKKSymtab();
2023 processLKKSymtab(true);
2024 processLKBugTable();
2025 processLKSMPLocks();
2026}
2027
2028/// Process __ex_table section of Linux Kernel.
2029/// This section contains information regarding kernel level exception
2030/// handling (https://www.kernel.org/doc/html/latest/x86/exception-tables.html).
2031/// More documentation is in arch/x86/include/asm/extable.h.
2032///
2033/// The section is the list of the following structures:
2034///
2035/// struct exception_table_entry {
2036/// int insn;
2037/// int fixup;
2038/// int handler;
2039/// };
2040///
2041void RewriteInstance::processLKExTable() {
Amir Ayupovc7306cc2021-04-08 07:19:262042 ErrorOr<BinarySection &> SectionOrError =
2043 BC->getUniqueSectionByName("__ex_table");
Maksim Panchenkoa10f7992020-09-15 18:42:032044 if (!SectionOrError)
2045 return;
2046
2047 const uint64_t SectionSize = SectionOrError->getSize();
2048 const uint64_t SectionAddress = SectionOrError->getAddress();
2049 assert((SectionSize % 12) == 0 &&
2050 "The size of the __ex_table section should be a multiple of 12");
2051 for (uint64_t I = 0; I < SectionSize; I += 4) {
2052 const uint64_t EntryAddress = SectionAddress + I;
Amir Ayupovc7306cc2021-04-08 07:19:262053 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
Maksim Panchenkoa10f7992020-09-15 18:42:032054 assert(Offset && "failed reading PC-relative offset for __ex_table");
2055 int32_t SignedOffset = *Offset;
2056 const uint64_t RefAddress = EntryAddress + SignedOffset;
2057
Amir Ayupovc7306cc2021-04-08 07:19:262058 BinaryFunction *ContainingBF =
2059 BC->getBinaryFunctionContainingAddress(RefAddress);
Maksim Panchenkoa10f7992020-09-15 18:42:032060 if (!ContainingBF)
2061 continue;
2062
2063 MCSymbol *ReferencedSymbol = ContainingBF->getSymbol();
2064 const uint64_t FunctionOffset = RefAddress - ContainingBF->getAddress();
2065 switch (I % 12) {
2066 default:
2067 llvm_unreachable("bad alignment of __ex_table");
2068 break;
2069 case 0:
2070 // insn
2071 insertLKMarker(RefAddress, I, SignedOffset, true, "__ex_table");
2072 break;
2073 case 4:
2074 // fixup
2075 if (FunctionOffset)
2076 ReferencedSymbol = ContainingBF->addEntryPointAtOffset(FunctionOffset);
Rafael Auler35632d42020-10-07 22:40:512077 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(),
2078 0, *Offset);
Maksim Panchenkoa10f7992020-09-15 18:42:032079 break;
2080 case 8:
2081 // handler
2082 assert(!FunctionOffset &&
2083 "__ex_table handler entry should point to function start");
Rafael Auler35632d42020-10-07 22:40:512084 BC->addRelocation(EntryAddress, ReferencedSymbol, Relocation::getPC32(),
2085 0, *Offset);
Maksim Panchenkoa10f7992020-09-15 18:42:032086 break;
2087 }
2088 }
2089}
2090
2091/// Process .pci_fixup section of Linux Kernel.
2092/// This section contains a list of entries for different PCI devices and their
2093/// corresponding hook handler (code pointer where the fixup
2094/// code resides, usually on x86_64 it is an entry PC relative 32 bit offset).
2095/// Documentation is in include/linux/pci.h.
2096void RewriteInstance::processLKPCIFixup() {
Amir Ayupovc7306cc2021-04-08 07:19:262097 ErrorOr<BinarySection &> SectionOrError =
2098 BC->getUniqueSectionByName(".pci_fixup");
Maksim Panchenkoa10f7992020-09-15 18:42:032099 assert(SectionOrError &&
2100 ".pci_fixup section not found in Linux Kernel binary");
2101 const uint64_t SectionSize = SectionOrError->getSize();
2102 const uint64_t SectionAddress = SectionOrError->getAddress();
2103 assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16");
2104
2105 for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) {
2106 const uint64_t PC = SectionAddress + I;
Amir Ayupovc7306cc2021-04-08 07:19:262107 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(PC, 4);
Maksim Panchenkoa10f7992020-09-15 18:42:032108 assert(Offset && "cannot read value from .pci_fixup");
2109 const int32_t SignedOffset = *Offset;
2110 const uint64_t HookupAddress = PC + SignedOffset;
Amir Ayupovc7306cc2021-04-08 07:19:262111 BinaryFunction *HookupFunction =
2112 BC->getBinaryFunctionAtAddress(HookupAddress);
Maksim Panchenkoa10f7992020-09-15 18:42:032113 assert(HookupFunction && "expected function for entry in .pci_fixup");
Maksim Panchenko40c2e0f2021-12-15 00:52:512114 BC->addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0,
2115 *Offset);
Maksim Panchenkoa10f7992020-09-15 18:42:032116 }
2117}
2118
2119/// Process __ksymtab[_gpl] sections of Linux Kernel.
2120/// This section lists all the vmlinux symbols that kernel modules can access.
2121///
2122/// All the entries are 4 bytes each and hence we can read them by one by one
2123/// and ignore the ones that are not pointing to the .text section. All pointers
2124/// are PC relative offsets. Always, points to the beginning of the function.
2125void RewriteInstance::processLKKSymtab(bool IsGPL) {
2126 StringRef SectionName = "__ksymtab";
Maksim Panchenkoee0e9cc2021-12-23 20:38:332127 if (IsGPL)
Maksim Panchenkoa10f7992020-09-15 18:42:032128 SectionName = "__ksymtab_gpl";
Amir Ayupovc7306cc2021-04-08 07:19:262129 ErrorOr<BinarySection &> SectionOrError =
2130 BC->getUniqueSectionByName(SectionName);
Maksim Panchenkoa10f7992020-09-15 18:42:032131 assert(SectionOrError &&
2132 "__ksymtab[_gpl] section not found in Linux Kernel binary");
2133 const uint64_t SectionSize = SectionOrError->getSize();
2134 const uint64_t SectionAddress = SectionOrError->getAddress();
2135 assert((SectionSize % 4) == 0 &&
2136 "The size of the __ksymtab[_gpl] section should be a multiple of 4");
2137
2138 for (uint64_t I = 0; I < SectionSize; I += 4) {
2139 const uint64_t EntryAddress = SectionAddress + I;
Amir Ayupovc7306cc2021-04-08 07:19:262140 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
Maksim Panchenkoa10f7992020-09-15 18:42:032141 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
2142 const int32_t SignedOffset = *Offset;
2143 const uint64_t RefAddress = EntryAddress + SignedOffset;
Amir Ayupovc7306cc2021-04-08 07:19:262144 BinaryFunction *BF = BC->getBinaryFunctionAtAddress(RefAddress);
Maksim Panchenkoa10f7992020-09-15 18:42:032145 if (!BF)
2146 continue;
2147
Rafael Auler35632d42020-10-07 22:40:512148 BC->addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0,
Maksim Panchenkoa10f7992020-09-15 18:42:032149 *Offset);
2150 }
2151}
2152
2153/// Process __bug_table section.
2154/// This section contains information useful for kernel debugging.
2155/// Each entry in the section is a struct bug_entry that contains a pointer to
2156/// the ud2 instruction corresponding to the bug, corresponding file name (both
2157/// pointers use PC relative offset addressing), line number, and flags.
2158/// The definition of the struct bug_entry can be found in
2159/// `include/asm-generic/bug.h`
2160void RewriteInstance::processLKBugTable() {
Amir Ayupovc7306cc2021-04-08 07:19:262161 ErrorOr<BinarySection &> SectionOrError =
2162 BC->getUniqueSectionByName("__bug_table");
Maksim Panchenkoa10f7992020-09-15 18:42:032163 if (!SectionOrError)
2164 return;
2165
2166 const uint64_t SectionSize = SectionOrError->getSize();
2167 const uint64_t SectionAddress = SectionOrError->getAddress();
2168 assert((SectionSize % 12) == 0 &&
2169 "The size of the __bug_table section should be a multiple of 12");
2170 for (uint64_t I = 0; I < SectionSize; I += 12) {
2171 const uint64_t EntryAddress = SectionAddress + I;
Amir Ayupovc7306cc2021-04-08 07:19:262172 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
Maksim Panchenkoa10f7992020-09-15 18:42:032173 assert(Offset &&
2174 "Reading valid PC-relative offset for a __bug_table entry");
2175 const int32_t SignedOffset = *Offset;
2176 const uint64_t RefAddress = EntryAddress + SignedOffset;
Joey Thaman4c12afc2021-06-29 19:11:562177 assert(BC->getBinaryFunctionContainingAddress(RefAddress) &&
2178 "__bug_table entries should point to a function");
Maksim Panchenkoa10f7992020-09-15 18:42:032179
2180 insertLKMarker(RefAddress, I, SignedOffset, true, "__bug_table");
2181 }
2182}
2183
Maksim Panchenkoa10f7992020-09-15 18:42:032184/// .smp_locks section contains PC-relative references to instructions with LOCK
2185/// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
2186void RewriteInstance::processLKSMPLocks() {
Amir Ayupovc7306cc2021-04-08 07:19:262187 ErrorOr<BinarySection &> SectionOrError =
2188 BC->getUniqueSectionByName(".smp_locks");
Maksim Panchenkoa10f7992020-09-15 18:42:032189 if (!SectionOrError)
2190 return;
2191
2192 uint64_t SectionSize = SectionOrError->getSize();
2193 const uint64_t SectionAddress = SectionOrError->getAddress();
2194 assert((SectionSize % 4) == 0 &&
2195 "The size of the .smp_locks section should be a multiple of 4");
2196
2197 for (uint64_t I = 0; I < SectionSize; I += 4) {
2198 const uint64_t EntryAddress = SectionAddress + I;
Amir Ayupovc7306cc2021-04-08 07:19:262199 ErrorOr<uint64_t> Offset = BC->getSignedValueAtAddress(EntryAddress, 4);
Maksim Panchenkoa10f7992020-09-15 18:42:032200 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry");
2201 int32_t SignedOffset = *Offset;
2202 uint64_t RefAddress = EntryAddress + SignedOffset;
2203
Amir Ayupovc7306cc2021-04-08 07:19:262204 BinaryFunction *ContainingBF =
2205 BC->getBinaryFunctionContainingAddress(RefAddress);
Maksim Panchenkoa10f7992020-09-15 18:42:032206 if (!ContainingBF)
2207 continue;
2208
2209 insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks");
2210 }
2211}
2212
Vladislav Khmelevsky729d29e2022-02-16 15:13:442213void RewriteInstance::readDynamicRelocations(const SectionRef &Section,
2214 bool IsJmpRel) {
Maksim Panchenko4aaa8892020-06-23 19:22:582215 assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected");
2216
Joey Thaman4c12afc2021-06-29 19:11:562217 LLVM_DEBUG({
2218 StringRef SectionName = cantFail(Section.getName());
2219 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName
2220 << ":\n";
2221 });
Maksim Panchenko4aaa8892020-06-23 19:22:582222
Amir Ayupovc7306cc2021-04-08 07:19:262223 for (const RelocationRef &Rel : Section.relocations()) {
Vladislav Khmelevsky729d29e2022-02-16 15:13:442224 const uint64_t RType = Rel.getType();
Vladislav Khmelevskyec9751e2021-02-17 23:36:582225 if (Relocation::isNone(RType))
2226 continue;
Maksim Panchenko4aaa8892020-06-23 19:22:582227
2228 StringRef SymbolName = "<none>";
2229 MCSymbol *Symbol = nullptr;
2230 uint64_t SymbolAddress = 0;
2231 const uint64_t Addend = getRelocationAddend(InputFile, Rel);
2232
Amir Ayupovc7306cc2021-04-08 07:19:262233 symbol_iterator SymbolIter = Rel.getSymbol();
Maksim Panchenko4aaa8892020-06-23 19:22:582234 if (SymbolIter != InputFile->symbol_end()) {
2235 SymbolName = cantFail(SymbolIter->getName());
Amir Ayupovc7306cc2021-04-08 07:19:262236 BinaryData *BD = BC->getBinaryDataByName(SymbolName);
Maksim Panchenko1de07462021-06-30 21:38:502237 Symbol = BD ? BD->getSymbol()
2238 : BC->getOrCreateUndefinedGlobalSymbol(SymbolName);
Maksim Panchenko4aaa8892020-06-23 19:22:582239 SymbolAddress = cantFail(SymbolIter->getAddress());
2240 (void)SymbolAddress;
2241 }
2242
Amir Ayupov1c5d3a02020-12-02 00:29:392243 LLVM_DEBUG(
Maksim Panchenko4aaa8892020-06-23 19:22:582244 SmallString<16> TypeName;
2245 Rel.getTypeName(TypeName);
2246 dbgs() << "BOLT-DEBUG: dynamic relocation at 0x"
2247 << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName
2248 << " : " << SymbolName << " : " << Twine::utohexstr(SymbolAddress)
2249 << " : + 0x" << Twine::utohexstr(Addend) << '\n'
2250 );
2251
Vladislav Khmelevsky729d29e2022-02-16 15:13:442252 if (IsJmpRel)
2253 IsJmpRelocation[RType] = true;
2254
2255 if (Symbol)
2256 SymbolIndex[Symbol] = getRelocationSymbol(InputFile, Rel);
2257
2258 BC->addDynamicRelocation(Rel.getOffset(), Symbol, RType, Addend);
Maksim Panchenkod89bb532020-02-25 01:10:022259 }
2260}
2261
2262void RewriteInstance::readRelocations(const SectionRef &Section) {
Joey Thaman4c12afc2021-06-29 19:11:562263 LLVM_DEBUG({
2264 StringRef SectionName = cantFail(Section.getName());
2265 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName
2266 << ":\n";
2267 });
Maksim Panchenkoce508b52018-09-21 19:00:202268 if (BinarySection(*BC, Section).isAllocatable()) {
Amir Ayupov1c5d3a02020-12-02 00:29:392269 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n");
Maksim Panchenko55fc5412016-09-28 02:09:382270 return;
2271 }
Amir Ayupovc7306cc2021-04-08 07:19:262272 section_iterator SecIter = cantFail(Section.getRelocatedSection());
Maksim Panchenko55fc5412016-09-28 02:09:382273 assert(SecIter != InputFile->section_end() && "relocated section expected");
Amir Ayupovc7306cc2021-04-08 07:19:262274 SectionRef RelocatedSection = *SecIter;
Maksim Panchenko55fc5412016-09-28 02:09:382275
Amir Ayupov1c5d3a02020-12-02 00:29:392276 StringRef RelocatedSectionName = cantFail(RelocatedSection.getName());
2277 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is "
2278 << RelocatedSectionName << '\n');
Maksim Panchenko55fc5412016-09-28 02:09:382279
Maksim Panchenkoce508b52018-09-21 19:00:202280 if (!BinarySection(*BC, RelocatedSection).isAllocatable()) {
Amir Ayupov1c5d3a02020-12-02 00:29:392281 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against "
2282 << "non-allocatable section\n");
Maksim Panchenko55fc5412016-09-28 02:09:382283 return;
2284 }
2285 const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName)
Amir Ayupov1c5d3a02020-12-02 00:29:392286 .Cases(".plt", ".rela.plt", ".got.plt",
2287 ".eh_frame", ".gcc_except_table", true)
2288 .Default(false);
Maksim Panchenko55fc5412016-09-28 02:09:382289 if (SkipRelocs) {
Amir Ayupov1c5d3a02020-12-02 00:29:392290 LLVM_DEBUG(
2291 dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n");
Maksim Panchenko55fc5412016-09-28 02:09:382292 return;
2293 }
2294
Rafael Auler7df6a6d2018-03-20 21:34:582295 const bool IsAArch64 = BC->isAArch64();
Bill Nell89feb842018-01-24 13:42:112296 const bool IsFromCode = RelocatedSection.isText();
Maksim Panchenko55fc5412016-09-28 02:09:382297
Bill Nell0e4d86b2017-11-15 04:05:112298 auto printRelocationInfo = [&](const RelocationRef &Rel,
2299 StringRef SymbolName,
2300 uint64_t SymbolAddress,
2301 uint64_t Addend,
2302 uint64_t ExtractedValue) {
2303 SmallString<16> TypeName;
2304 Rel.getTypeName(TypeName);
Amir Ayupovc7306cc2021-04-08 07:19:262305 const uint64_t Address = SymbolAddress + Addend;
2306 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress);
Bill Nell0e4d86b2017-11-15 04:05:112307 dbgs() << "Relocation: offset = 0x"
2308 << Twine::utohexstr(Rel.getOffset())
Maksim Panchenko771d9762018-07-12 17:13:032309 << "; type = " << TypeName
Bill Nell0e4d86b2017-11-15 04:05:112310 << "; value = 0x" << Twine::utohexstr(ExtractedValue)
2311 << "; symbol = " << SymbolName
2312 << " (" << (Section ? Section->getName() : "") << ")"
2313 << "; symbol address = 0x" << Twine::utohexstr(SymbolAddress)
2314 << "; addend = 0x" << Twine::utohexstr(Addend)
2315 << "; address = 0x" << Twine::utohexstr(Address)
2316 << "; in = ";
Amir Ayupovc7306cc2021-04-08 07:19:262317 if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress(
Maksim Panchenkoee0e9cc2021-12-23 20:38:332318 Rel.getOffset(), false, IsAArch64))
Bill Nell0e4d86b2017-11-15 04:05:112319 dbgs() << Func->getPrintName() << "\n";
Maksim Panchenkoee0e9cc2021-12-23 20:38:332320 else
Bill Nell0e4d86b2017-11-15 04:05:112321 dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName() << "\n";
Bill Nell0e4d86b2017-11-15 04:05:112322 };
2323
Amir Ayupovc7306cc2021-04-08 07:19:262324 for (const RelocationRef &Rel : Section.relocations()) {
Maksim Panchenko55fc5412016-09-28 02:09:382325 SmallString<16> TypeName;
2326 Rel.getTypeName(TypeName);
Amir Ayupovc7306cc2021-04-08 07:19:262327 uint64_t RType = Rel.getType();
Vladislav Khmelevsky6e26ffa2022-06-09 16:00:242328 if (Relocation::skipRelocationType(RType))
Vladislav Khmelevskydc4b32e2021-10-17 13:36:242329 continue;
Maksim Panchenkoe50e89b2019-04-12 00:11:082330
2331 // Adjust the relocation type as the linker might have skewed it.
2332 if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:332333 if (opts::Verbosity >= 1)
Maksim Panchenkoe50e89b2019-04-12 00:11:082334 dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n";
Maksim Panchenkoe50e89b2019-04-12 00:11:082335 RType &= ~ELF::R_X86_64_converted_reloc_bit;
2336 }
Bill Nell89feb842018-01-24 13:42:112337
Vladislav Khmelevsky542c03c2021-09-02 18:04:332338 if (Relocation::isTLS(RType)) {
2339 // No special handling required for TLS relocations on X86.
2340 if (BC->isX86())
2341 continue;
2342
2343 // The non-got related TLS relocations on AArch64 also could be skipped.
2344 if (!Relocation::isGOT(RType))
2345 continue;
2346 }
Maksim Panchenkoe89ad0d2019-06-28 16:21:272347
Vladislav Khmelevsky3b1314f2022-03-20 14:15:562348 if (!IsAArch64 && BC->getDynamicRelocationAt(Rel.getOffset())) {
Amir Ayupov1c5d3a02020-12-02 00:29:392349 LLVM_DEBUG(
2350 dbgs() << "BOLT-DEBUG: address 0x"
2351 << Twine::utohexstr(Rel.getOffset())
2352 << " has a dynamic relocation against it. Ignoring static "
2353 "relocation.\n");
Maksim Panchenko4aaa8892020-06-23 19:22:582354 continue;
2355 }
2356
Bill Nell89feb842018-01-24 13:42:112357 std::string SymbolName;
2358 uint64_t SymbolAddress;
2359 int64_t Addend;
2360 uint64_t ExtractedValue;
Maksim Panchenkoce508b52018-09-21 19:00:202361 bool IsSectionRelocation;
Vladislav Khmelevsky00c06592021-09-08 10:37:192362 bool Skip;
2363 if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation,
2364 SymbolAddress, Addend, ExtractedValue, Skip)) {
2365 LLVM_DEBUG(dbgs() << "BOLT-WARNING: failed to analyze relocation @ "
2366 << "offset = 0x" << Twine::utohexstr(Rel.getOffset())
2367 << "; type name = " << TypeName << '\n');
2368 ++NumFailedRelocations;
2369 continue;
2370 }
2371
2372 if (Skip) {
Amir Ayupov1c5d3a02020-12-02 00:29:392373 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skipping relocation @ offset = 0x"
2374 << Twine::utohexstr(Rel.getOffset())
2375 << "; type name = " << TypeName << '\n');
Maksim Panchenko29d4f4c2017-09-13 18:21:472376 continue;
2377 }
2378
Amir Ayupovc7306cc2021-04-08 07:19:262379 const uint64_t Address = SymbolAddress + Addend;
Maksim Panchenko55fc5412016-09-28 02:09:382380
Maksim Panchenko40c2e0f2021-12-15 00:52:512381 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "; printRelocationInfo(
2382 Rel, SymbolName, SymbolAddress, Addend, ExtractedValue));
Maksim Panchenko55fc5412016-09-28 02:09:382383
Maksim Panchenko55fc5412016-09-28 02:09:382384 BinaryFunction *ContainingBF = nullptr;
2385 if (IsFromCode) {
Bill Nell89feb842018-01-24 13:42:112386 ContainingBF =
Maksim Panchenko40c2e0f2021-12-15 00:52:512387 BC->getBinaryFunctionContainingAddress(Rel.getOffset(),
2388 /*CheckPastEnd*/ false,
2389 /*UseMaxSize*/ true);
Maksim Panchenko55fc5412016-09-28 02:09:382390 assert(ContainingBF && "cannot find function for address in code");
Maksim Panchenko0ce0bce2020-06-15 07:15:472391 if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:332392 if (opts::Verbosity >= 1)
Maksim Panchenko0ce0bce2020-06-15 07:15:472393 outs() << "BOLT-INFO: " << *ContainingBF
2394 << " has relocations in padding area\n";
Maksim Panchenko0ce0bce2020-06-15 07:15:472395 ContainingBF->setSize(ContainingBF->getMaxSize());
2396 ContainingBF->setSimple(false);
2397 continue;
2398 }
Maksim Panchenko55fc5412016-09-28 02:09:382399 }
2400
Maksim Panchenko4101aa132022-02-24 06:54:422401 MCSymbol *ReferencedSymbol = nullptr;
Maksim Panchenko36cb7362022-04-13 01:42:192402 if (!IsSectionRelocation)
Maksim Panchenko4101aa132022-02-24 06:54:422403 if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
2404 ReferencedSymbol = BD->getSymbol();
Maksim Panchenko4101aa132022-02-24 06:54:422405
Maksim Panchenko36cb7362022-04-13 01:42:192406 ErrorOr<BinarySection &> ReferencedSection =
2407 BC->getSectionForAddress(SymbolAddress);
2408
2409 const bool IsToCode = ReferencedSection && ReferencedSection->isText();
2410
2411 // Special handling of PC-relative relocations.
Maksim Panchenkoe50e89b2019-04-12 00:11:082412 if (!IsAArch64 && Relocation::isPCRelative(RType)) {
Maksim Panchenko36cb7362022-04-13 01:42:192413 if (!IsFromCode && IsToCode) {
2414 // PC-relative relocations from data to code are tricky since the
2415 // original information is typically lost after linking, even with
2416 // '--emit-relocs'. Such relocations are normally used by PIC-style
2417 // jump tables and they reference both the jump table and jump
2418 // targets by computing the difference between the two. If we blindly
2419 // apply the relocation, it will appear that it references an arbitrary
2420 // location in the code, possibly in a different function from the one
2421 // containing the jump table.
2422 //
2423 // For that reason, we only register the fact that there is a
2424 // PC-relative relocation at a given address against the code.
2425 // The actual referenced label/address will be determined during jump
2426 // table analysis.
Maksim Panchenko658f2702019-11-20 02:52:082427 BC->addPCRelativeDataRelocation(Rel.getOffset());
Maksim Panchenko36cb7362022-04-13 01:42:192428 } else if (ContainingBF && !IsSectionRelocation && ReferencedSymbol) {
2429 // If we know the referenced symbol, register the relocation from
2430 // the code. It's required to properly handle cases where
2431 // "symbol + addend" references an object different from "symbol".
Maksim Panchenko4101aa132022-02-24 06:54:422432 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
2433 Addend, ExtractedValue);
Maksim Panchenko36cb7362022-04-13 01:42:192434 } else {
Maksim Panchenko4101aa132022-02-24 06:54:422435 LLVM_DEBUG(
2436 dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at 0x"
2437 << Twine::utohexstr(Rel.getOffset()) << " for " << SymbolName
2438 << "\n");
Maksim Panchenko36cb7362022-04-13 01:42:192439 }
2440
Maksim Panchenko55fc5412016-09-28 02:09:382441 continue;
2442 }
2443
Maksim Panchenkodb4642d2020-06-18 18:10:412444 bool ForceRelocation = BC->forceSymbolRelocations(SymbolName);
Maksim Panchenko36cb7362022-04-13 01:42:192445 if (BC->isAArch64() && Relocation::isGOT(RType))
Vladislav Khmelevsky542c03c2021-09-02 18:04:332446 ForceRelocation = true;
Maksim Panchenko55fc5412016-09-28 02:09:382447
Maksim Panchenko36cb7362022-04-13 01:42:192448 if (!ReferencedSection && !ForceRelocation) {
2449 LLVM_DEBUG(
2450 dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n");
2451 continue;
2452 }
Maksim Panchenko55fc5412016-09-28 02:09:382453
2454 // Occasionally we may see a reference past the last byte of the function
2455 // typically as a result of __builtin_unreachable(). Check it here.
Amir Ayupovc7306cc2021-04-08 07:19:262456 BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress(
Rafael Aulerd6003e92018-04-12 17:07:112457 Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64);
Maksim Panchenko3af353732018-05-14 18:10:262458
2459 if (!IsSectionRelocation) {
Amir Ayupovc7306cc2021-04-08 07:19:262460 if (BinaryFunction *BF =
2461 BC->getBinaryFunctionContainingAddress(SymbolAddress)) {
Maksim Panchenko3af353732018-05-14 18:10:262462 if (BF != ReferencedBF) {
2463 // It's possible we are referencing a function without referencing any
2464 // code, e.g. when taking a bitmask action on a function address.
2465 errs() << "BOLT-WARNING: non-standard function reference (e.g. "
Maksim Panchenko40c2e0f2021-12-15 00:52:512466 "bitmask) detected against function "
2467 << *BF;
Maksim Panchenkoee0e9cc2021-12-23 20:38:332468 if (IsFromCode)
Maksim Panchenko3af353732018-05-14 18:10:262469 errs() << " from function " << *ContainingBF << '\n';
Maksim Panchenkoee0e9cc2021-12-23 20:38:332470 else
Maksim Panchenko3af353732018-05-14 18:10:262471 errs() << " from data section at 0x"
2472 << Twine::utohexstr(Rel.getOffset()) << '\n';
Maksim Panchenko40c2e0f2021-12-15 00:52:512473 LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend,
2474 ExtractedValue));
Maksim Panchenko3af353732018-05-14 18:10:262475 ReferencedBF = BF;
2476 }
2477 }
Maksim Panchenko06e7a1e2019-06-27 10:20:172478 } else if (ReferencedBF) {
Maksim Panchenko36cb7362022-04-13 01:42:192479 assert(ReferencedSection && "section expected for section relocation");
2480 if (*ReferencedBF->getOriginSection() != *ReferencedSection) {
Amir Ayupov1c5d3a02020-12-02 00:29:392481 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n");
Maksim Panchenko06e7a1e2019-06-27 10:20:172482 ReferencedBF = nullptr;
2483 }
Maksim Panchenko3af353732018-05-14 18:10:262484 }
2485
Maksim Panchenkoc8232202019-09-17 21:24:312486 // Workaround for a member function pointer de-virtualization bug. We check
2487 // if a non-pc-relative relocation in the code is pointing to (fptr - 1).
2488 if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) &&
2489 (!ReferencedBF || (ReferencedBF->getAddress() != Address))) {
Amir Ayupovc7306cc2021-04-08 07:19:262490 if (const BinaryFunction *RogueBF =
2491 BC->getBinaryFunctionAtAddress(Address + 1)) {
Maksim Panchenkoc8232202019-09-17 21:24:312492 // Do an extra check that the function was referenced previously.
2493 // It's a linear search, but it should rarely happen.
Maksim Panchenkofe37f182021-05-13 17:50:472494 bool Found = false;
Maksim Panchenkoc8232202019-09-17 21:24:312495 for (const auto &RelKV : ContainingBF->Relocations) {
Amir Ayupovc7306cc2021-04-08 07:19:262496 const Relocation &Rel = RelKV.second;
Maksim Panchenkoc8232202019-09-17 21:24:312497 if (Rel.Symbol == RogueBF->getSymbol() &&
2498 !Relocation::isPCRelative(Rel.Type)) {
2499 Found = true;
2500 break;
2501 }
2502 }
2503
2504 if (Found) {
2505 errs() << "BOLT-WARNING: detected possible compiler "
2506 "de-virtualization bug: -1 addend used with "
2507 "non-pc-relative relocation against function "
2508 << *RogueBF << " in function " << *ContainingBF << '\n';
2509 continue;
2510 }
2511 }
2512 }
2513
Maksim Panchenko55fc5412016-09-28 02:09:382514 if (ForceRelocation) {
Amir Ayupovc7306cc2021-04-08 07:19:262515 std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName;
Bill Nell0e4d86b2017-11-15 04:05:112516 ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
Bill Nell89feb842018-01-24 13:42:112517 SymbolAddress = 0;
Maksim Panchenkoe50e89b2019-04-12 00:11:082518 if (Relocation::isGOT(RType))
Rafael Aulerd0a80b02018-08-29 01:15:132519 Addend = Address;
Amir Ayupov1c5d3a02020-12-02 00:29:392520 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
2521 << SymbolName << " with addend " << Addend << '\n');
Maksim Panchenko55fc5412016-09-28 02:09:382522 } else if (ReferencedBF) {
Maksim Panchenko3af353732018-05-14 18:10:262523 ReferencedSymbol = ReferencedBF->getSymbol();
Maksim Panchenkoa09659f2019-11-18 22:08:172524 uint64_t RefFunctionOffset = 0;
Maksim Panchenko3af353732018-05-14 18:10:262525
2526 // Adjust the point of reference to a code location inside a function.
2527 if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */true)) {
2528 RefFunctionOffset = Address - ReferencedBF->getAddress();
2529 if (RefFunctionOffset) {
Maksim Panchenkoefce4432020-06-22 20:05:132530 if (ContainingBF && ContainingBF != ReferencedBF) {
2531 ReferencedSymbol =
Maksim Panchenko40c2e0f2021-12-15 00:52:512532 ReferencedBF->addEntryPointAtOffset(RefFunctionOffset);
Maksim Panchenkoefce4432020-06-22 20:05:132533 } else {
2534 ReferencedSymbol =
Maksim Panchenko40c2e0f2021-12-15 00:52:512535 ReferencedBF->getOrCreateLocalLabel(Address,
2536 /*CreatePastEnd =*/true);
Maksim Panchenkoefce4432020-06-22 20:05:132537 ReferencedBF->registerReferencedOffset(RefFunctionOffset);
2538 }
Amir Ayupov1c5d3a02020-12-02 00:29:392539 if (opts::Verbosity > 1 &&
Maksim Panchenkoee0e9cc2021-12-23 20:38:332540 !BinarySection(*BC, RelocatedSection).isReadOnly())
2541 errs() << "BOLT-WARNING: writable reference into the middle of "
Maksim Panchenkoe89ad0d2019-06-28 16:21:272542 << "the function " << *ReferencedBF
2543 << " detected at address 0x"
2544 << Twine::utohexstr(Rel.getOffset()) << '\n';
Maksim Panchenko3af353732018-05-14 18:10:262545 }
2546 SymbolAddress = Address;
2547 Addend = 0;
Maksim Panchenko55fc5412016-09-28 02:09:382548 }
Amir Ayupov1c5d3a02020-12-02 00:29:392549 LLVM_DEBUG(
Maksim Panchenko3af353732018-05-14 18:10:262550 dbgs() << " referenced function " << *ReferencedBF;
2551 if (Address != ReferencedBF->getAddress())
2552 dbgs() << " at offset 0x" << Twine::utohexstr(RefFunctionOffset);
2553 dbgs() << '\n'
2554 );
Maksim Panchenko55fc5412016-09-28 02:09:382555 } else {
Maksim Panchenkoe89ad0d2019-06-28 16:21:272556 if (IsToCode && SymbolAddress) {
Maksim Panchenko55fc5412016-09-28 02:09:382557 // This can happen e.g. with PIC-style jump tables.
Amir Ayupov1c5d3a02020-12-02 00:29:392558 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for "
2559 "relocation against code\n");
Maksim Panchenko55fc5412016-09-28 02:09:382560 }
Bill Nell0e4d86b2017-11-15 04:05:112561
Rafael Auler7df6a6d2018-03-20 21:34:582562 // In AArch64 there are zero reasons to keep a reference to the
2563 // "original" symbol plus addend. The original symbol is probably just a
2564 // section symbol. If we are here, this means we are probably accessing
2565 // data, so it is imperative to keep the original address.
2566 if (IsAArch64) {
2567 SymbolName = ("SYMBOLat0x" + Twine::utohexstr(Address)).str();
2568 SymbolAddress = Address;
2569 Addend = 0;
2570 }
2571
Amir Ayupovc7306cc2021-04-08 07:19:262572 if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) {
Rafael Auler7df6a6d2018-03-20 21:34:582573 // Note: this assertion is trying to check sanity of BinaryData objects
2574 // but AArch64 has inferred and incomplete object locations coming from
2575 // GOT/TLS or any other non-trivial relocation (that requires creation
2576 // of sections and whose symbol address is not really what should be
2577 // encoded in the instruction). So we essentially disabled this check
2578 // for AArch64 and live with bogus names for objects.
Maksim Panchenko40c2e0f2021-12-15 00:52:512579 assert((IsAArch64 || IsSectionRelocation ||
Bill Nell729da2d2018-04-21 03:03:312580 BD->nameStartsWith(SymbolName) ||
2581 BD->nameStartsWith("PG" + SymbolName) ||
2582 (BD->nameStartsWith("ANONYMOUS") &&
2583 (BD->getSectionName().startswith(".plt") ||
2584 BD->getSectionName().endswith(".plt")))) &&
Maksim Panchenkoa7d02512018-06-14 21:27:202585 "BOLT symbol names of all non-section relocations must match "
Bill Nell729da2d2018-04-21 03:03:312586 "up with symbol names referenced in the relocation");
2587
Maksim Panchenkoee0e9cc2021-12-23 20:38:332588 if (IsSectionRelocation)
Maksim Panchenkoa09659f2019-11-18 22:08:172589 BC->markAmbiguousRelocations(*BD, Address);
Bill Nell729da2d2018-04-21 03:03:312590
Bill Nell0e4d86b2017-11-15 04:05:112591 ReferencedSymbol = BD->getSymbol();
2592 Addend += (SymbolAddress - BD->getAddress());
2593 SymbolAddress = BD->getAddress();
2594 assert(Address == SymbolAddress + Addend);
2595 } else {
Bill Nell0e4d86b2017-11-15 04:05:112596 // These are mostly local data symbols but undefined symbols
2597 // in relocation sections can get through here too, from .plt.
Maksim Panchenko40c2e0f2021-12-15 00:52:512598 assert(
2599 (IsAArch64 || IsSectionRelocation ||
2600 BC->getSectionNameForAddress(SymbolAddress)->startswith(".plt")) &&
2601 "known symbols should not resolve to anonymous locals");
Bill Nell729da2d2018-04-21 03:03:312602
Maksim Panchenkoce508b52018-09-21 19:00:202603 if (IsSectionRelocation) {
Maksim Panchenko40c2e0f2021-12-15 00:52:512604 ReferencedSymbol =
2605 BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat");
Maksim Panchenkoce508b52018-09-21 19:00:202606 } else {
Amir Ayupovc7306cc2021-04-08 07:19:262607 SymbolRef Symbol = *Rel.getSymbol();
Maksim Panchenkoce508b52018-09-21 19:00:202608 const uint64_t SymbolSize =
2609 IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize();
2610 const uint64_t SymbolAlignment =
2611 IsAArch64 ? 1 : Symbol.getAlignment();
Amir Ayupovc7306cc2021-04-08 07:19:262612 const uint32_t SymbolFlags = cantFail(Symbol.getFlags());
Bill Nell0e4d86b2017-11-15 04:05:112613 std::string Name;
Maksim Panchenkoce508b52018-09-21 19:00:202614 if (SymbolFlags & SymbolRef::SF_Global) {
Bill Nell0e4d86b2017-11-15 04:05:112615 Name = SymbolName;
Rafael Auler7df6a6d2018-03-20 21:34:582616 } else {
Maksim Panchenko40c2e0f2021-12-15 00:52:512617 if (StringRef(SymbolName)
Maksim Panchenkoee0e9cc2021-12-23 20:38:332618 .startswith(BC->AsmInfo->getPrivateGlobalPrefix()))
Alexander Shaposhnikov16630f52020-02-17 22:37:462619 Name = NR.uniquify("PG" + SymbolName);
Maksim Panchenkoee0e9cc2021-12-23 20:38:332620 else
Alexander Shaposhnikov16630f52020-02-17 22:37:462621 Name = NR.uniquify(SymbolName);
Rafael Auler7df6a6d2018-03-20 21:34:582622 }
Maksim Panchenko40c2e0f2021-12-15 00:52:512623 ReferencedSymbol = BC->registerNameAtAddress(
2624 Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags);
Bill Nell729da2d2018-04-21 03:03:312625 }
2626
Maksim Panchenkocb9c9912020-03-03 23:51:242627 if (IsSectionRelocation) {
Amir Ayupovc7306cc2021-04-08 07:19:262628 BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName());
Maksim Panchenkoa09659f2019-11-18 22:08:172629 BC->markAmbiguousRelocations(*BD, Address);
Bill Nell0e4d86b2017-11-15 04:05:112630 }
2631 }
Maksim Panchenko55fc5412016-09-28 02:09:382632 }
2633
Bill Nell0e4d86b2017-11-15 04:05:112634 auto checkMaxDataRelocations = [&]() {
2635 ++NumDataRelocations;
2636 if (opts::MaxDataRelocations &&
2637 NumDataRelocations + 1 == opts::MaxDataRelocations) {
Amir Ayupov1c5d3a02020-12-02 00:29:392638 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on data relocation "
2639 << NumDataRelocations << ": ");
2640 printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress,
2641 Addend, ExtractedValue);
Bill Nell0e4d86b2017-11-15 04:05:112642 }
2643
2644 return (!opts::MaxDataRelocations ||
2645 NumDataRelocations < opts::MaxDataRelocations);
2646 };
2647
Maksim Panchenko36cb7362022-04-13 01:42:192648 if ((ReferencedSection && refersToReorderedSection(ReferencedSection)) ||
Maksim Panchenko771d9762018-07-12 17:13:032649 (opts::ForceToDataRelocations && checkMaxDataRelocations()))
2650 ForceRelocation = true;
2651
Maksim Panchenko55fc5412016-09-28 02:09:382652 if (IsFromCode) {
Maksim Panchenko40c2e0f2021-12-15 00:52:512653 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
2654 Addend, ExtractedValue);
Maksim Panchenko771d9762018-07-12 17:13:032655 } else if (IsToCode || ForceRelocation) {
Maksim Panchenkoe89ad0d2019-06-28 16:21:272656 BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend,
2657 ExtractedValue);
Maksim Panchenko55fc5412016-09-28 02:09:382658 } else {
Amir Ayupov1c5d3a02020-12-02 00:29:392659 LLVM_DEBUG(
2660 dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n");
Maksim Panchenko55fc5412016-09-28 02:09:382661 }
2662 }
2663}
2664
Maksim Panchenko04c5d4f2020-05-03 20:54:452665void RewriteInstance::selectFunctionsToProcess() {
2666 // Extend the list of functions to process or skip from a file.
2667 auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile,
2668 cl::list<std::string> &FunctionNames) {
2669 if (FunctionNamesFile.empty())
2670 return;
2671 std::ifstream FuncsFile(FunctionNamesFile, std::ios::in);
2672 std::string FuncName;
Maksim Panchenkoee0e9cc2021-12-23 20:38:332673 while (std::getline(FuncsFile, FuncName))
Maksim Panchenko04c5d4f2020-05-03 20:54:452674 FunctionNames.push_back(FuncName);
Maksim Panchenko04c5d4f2020-05-03 20:54:452675 };
2676 populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames);
2677 populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames);
Amir Ayupovd474dbd2021-06-05 01:49:292678 populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR);
Maksim Panchenko04c5d4f2020-05-03 20:54:452679
Amir Ayupovd474dbd2021-06-05 01:49:292680 // Make a set of functions to process to speed up lookups.
2681 std::unordered_set<std::string> ForceFunctionsNR(
2682 opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end());
2683
2684 if ((!opts::ForceFunctionNames.empty() ||
2685 !opts::ForceFunctionNamesNR.empty()) &&
2686 !opts::SkipFunctionNames.empty()) {
Maksim Panchenko04c5d4f2020-05-03 20:54:452687 errs() << "BOLT-ERROR: cannot select functions to process and skip at the "
2688 "same time. Please use only one type of selection.\n";
2689 exit(1);
2690 }
2691
Rafael Aulere3898d52020-12-30 20:23:582692 uint64_t LiteThresholdExecCount = 0;
2693 if (opts::LiteThresholdPct) {
2694 if (opts::LiteThresholdPct > 100)
2695 opts::LiteThresholdPct = 100;
Maksim Panchenko04c5d4f2020-05-03 20:54:452696
Rafael Aulere3898d52020-12-30 20:23:582697 std::vector<const BinaryFunction *> TopFunctions;
2698 for (auto &BFI : BC->getBinaryFunctions()) {
2699 const BinaryFunction &Function = BFI.second;
2700 if (ProfileReader->mayHaveProfileData(Function))
2701 TopFunctions.push_back(&Function);
2702 }
Amir Ayupovd2c87692022-06-24 05:15:472703 llvm::sort(
2704 TopFunctions, [](const BinaryFunction *A, const BinaryFunction *B) {
2705 return A->getKnownExecutionCount() < B->getKnownExecutionCount();
2706 });
Rafael Aulere3898d52020-12-30 20:23:582707
2708 size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100;
2709 if (Index)
2710 --Index;
2711 LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount();
2712 outs() << "BOLT-INFO: limiting processing to functions with at least "
2713 << LiteThresholdExecCount << " invocations\n";
2714 }
Maksim Panchenko40c2e0f2021-12-15 00:52:512715 LiteThresholdExecCount = std::max(
2716 LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount));
Rafael Aulere3898d52020-12-30 20:23:582717
2718 uint64_t NumFunctionsToProcess = 0;
Maksim Panchenko04c5d4f2020-05-03 20:54:452719 auto shouldProcess = [&](const BinaryFunction &Function) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:332720 if (opts::MaxFunctions && NumFunctionsToProcess > opts::MaxFunctions)
Maksim Panchenko04c5d4f2020-05-03 20:54:452721 return false;
2722
2723 // If the list is not empty, only process functions from the list.
Amir Ayupovd474dbd2021-06-05 01:49:292724 if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) {
2725 // Regex check (-funcs and -funcs-file options).
Maksim Panchenkoee0e9cc2021-12-23 20:38:332726 for (std::string &Name : opts::ForceFunctionNames)
2727 if (Function.hasNameRegex(Name))
Maksim Panchenko04c5d4f2020-05-03 20:54:452728 return true;
Maksim Panchenkoee0e9cc2021-12-23 20:38:332729
Amir Ayupovd474dbd2021-06-05 01:49:292730 // Non-regex check (-funcs-no-regex and -funcs-file-no-regex).
2731 Optional<StringRef> Match =
2732 Function.forEachName([&ForceFunctionsNR](StringRef Name) {
2733 return ForceFunctionsNR.count(Name.str());
2734 });
2735 return Match.hasValue();
Maksim Panchenko04c5d4f2020-05-03 20:54:452736 }
2737
Maksim Panchenkoee0e9cc2021-12-23 20:38:332738 for (std::string &Name : opts::SkipFunctionNames)
2739 if (Function.hasNameRegex(Name))
Maksim Panchenko04c5d4f2020-05-03 20:54:452740 return false;
Maksim Panchenko04c5d4f2020-05-03 20:54:452741
Maksim Panchenko924d0bd2020-05-03 22:49:582742 if (opts::Lite) {
Rafael Aulere3898d52020-12-30 20:23:582743 if (ProfileReader && !ProfileReader->mayHaveProfileData(Function))
Maksim Panchenko924d0bd2020-05-03 22:49:582744 return false;
Rafael Aulere3898d52020-12-30 20:23:582745
2746 if (Function.getKnownExecutionCount() < LiteThresholdExecCount)
2747 return false;
Maksim Panchenko924d0bd2020-05-03 22:49:582748 }
2749
Maksim Panchenko04c5d4f2020-05-03 20:54:452750 return true;
2751 };
2752
2753 for (auto &BFI : BC->getBinaryFunctions()) {
Amir Ayupovc7306cc2021-04-08 07:19:262754 BinaryFunction &Function = BFI.second;
Maksim Panchenko04c5d4f2020-05-03 20:54:452755
Rafael Aulere3898d52020-12-30 20:23:582756 // Pseudo functions are explicitly marked by us not to be processed.
Maksim Panchenko0ce0bce2020-06-15 07:15:472757 if (Function.isPseudo()) {
2758 Function.IsIgnored = true;
2759 Function.HasExternalRefRelocations = true;
Maksim Panchenko04c5d4f2020-05-03 20:54:452760 continue;
Maksim Panchenko0ce0bce2020-06-15 07:15:472761 }
Maksim Panchenko04c5d4f2020-05-03 20:54:452762
2763 if (!shouldProcess(Function)) {
Amir Ayupov1c5d3a02020-12-02 00:29:392764 LLVM_DEBUG(dbgs() << "BOLT-INFO: skipping processing of function "
2765 << Function << " per user request\n");
Maksim Panchenko0ce0bce2020-06-15 07:15:472766 Function.setIgnored();
Maksim Panchenko04c5d4f2020-05-03 20:54:452767 } else {
2768 ++NumFunctionsToProcess;
Maksim Panchenkoee0e9cc2021-12-23 20:38:332769 if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions)
Maksim Panchenko0ce0bce2020-06-15 07:15:472770 outs() << "BOLT-INFO: processing ending on " << Function << '\n';
Maksim Panchenko04c5d4f2020-05-03 20:54:452771 }
2772 }
2773}
2774
Maksim Panchenkod01172f2016-03-15 01:48:052775void RewriteInstance::readDebugInfo() {
Rafael Auler8a5a3012018-02-06 23:00:232776 NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName,
2777 TimerGroupDesc, opts::TimeRewrite);
Maksim Panchenkod01172f2016-03-15 01:48:052778 if (!opts::UpdateDebugSections)
2779 return;
2780
Maksim Panchenko7fd48702019-04-03 22:52:012781 BC->preprocessDebugInfo();
Maksim Panchenkod01172f2016-03-15 01:48:052782}
2783
Maksim Panchenkoc6ce2ab2019-01-16 07:43:402784void RewriteInstance::preprocessProfileData() {
Maksim Panchenko87291712020-05-08 06:00:292785 if (!ProfileReader)
2786 return;
2787
Maksim Panchenkoc6ce2ab2019-01-16 07:43:402788 NamedRegionTimer T("preprocessprofile", "pre-process profile data",
2789 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
Maksim Panchenko87291712020-05-08 06:00:292790
2791 outs() << "BOLT-INFO: pre-processing profile using "
2792 << ProfileReader->getReaderName() << '\n';
2793
2794 if (BAT->enabledFor(InputFile)) {
2795 outs() << "BOLT-INFO: profile collection done on a binary already "
2796 "processed by BOLT\n";
2797 ProfileReader->setBAT(&*BAT);
Rafael Auler21f43032019-04-13 00:33:462798 }
Maksim Panchenko924d0bd2020-05-03 22:49:582799
Amir Ayupovc7306cc2021-04-08 07:19:262800 if (Error E = ProfileReader->preprocessProfile(*BC.get()))
Maksim Panchenko87291712020-05-08 06:00:292801 report_error("cannot pre-process profile", std::move(E));
2802
Maksim Panchenko40c2e0f2021-12-15 00:52:512803 if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() &&
Maksim Panchenko87291712020-05-08 06:00:292804 !opts::AllowStripped) {
2805 errs() << "BOLT-ERROR: input binary does not have local file symbols "
2806 "but profile data includes function names with embedded file "
2807 "names. It appears that the input binary was stripped while a "
2808 "profiled binary was not. If you know what you are doing and "
2809 "wish to proceed, use -allow-stripped option.\n";
2810 exit(1);
Maksim Panchenko924d0bd2020-05-03 22:49:582811 }
Maksim Panchenkoc6ce2ab2019-01-16 07:43:402812}
2813
Maksim Panchenko87291712020-05-08 06:00:292814void RewriteInstance::processProfileDataPreCFG() {
2815 if (!ProfileReader)
2816 return;
2817
2818 NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG",
2819 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
2820
Amir Ayupovc7306cc2021-04-08 07:19:262821 if (Error E = ProfileReader->readProfilePreCFG(*BC.get()))
Maksim Panchenko87291712020-05-08 06:00:292822 report_error("cannot read profile pre-CFG", std::move(E));
2823}
2824
Maksim Panchenkob6cb1122017-12-14 07:12:012825void RewriteInstance::processProfileData() {
Maksim Panchenko87291712020-05-08 06:00:292826 if (!ProfileReader)
2827 return;
2828
Maksim Panchenkoc6ce2ab2019-01-16 07:43:402829 NamedRegionTimer T("processprofile", "process profile data", TimerGroupName,
2830 TimerGroupDesc, opts::TimeRewrite);
Maksim Panchenko924d0bd2020-05-03 22:49:582831
Amir Ayupovc7306cc2021-04-08 07:19:262832 if (Error E = ProfileReader->readProfile(*BC.get()))
Maksim Panchenko87291712020-05-08 06:00:292833 report_error("cannot read profile", std::move(E));
Maksim Panchenkoae409f02017-07-17 18:22:222834
Maksim Panchenkob6cb1122017-12-14 07:12:012835 if (!opts::SaveProfile.empty()) {
Maksim Panchenko87291712020-05-08 06:00:292836 YAMLProfileWriter PW(opts::SaveProfile);
Maksim Panchenko8b049d32018-04-10 02:10:192837 PW.writeProfile(*this);
Maksim Panchenkoae409f02017-07-17 18:22:222838 }
Maksim Panchenko87291712020-05-08 06:00:292839
2840 // Release memory used by profile reader.
2841 ProfileReader.reset();
2842
Maksim Panchenkoee0e9cc2021-12-23 20:38:332843 if (opts::AggregateOnly)
Maksim Panchenko87291712020-05-08 06:00:292844 exit(0);
Maksim Panchenkoae409f02017-07-17 18:22:222845}
2846
Rafael Aulerc67a7532015-11-24 01:54:182847void RewriteInstance::disassembleFunctions() {
Rafael Auler8a5a3012018-02-06 23:00:232848 NamedRegionTimer T("disassembleFunctions", "disassemble functions",
2849 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
Maksim Panchenko7fd48702019-04-03 22:52:012850 for (auto &BFI : BC->getBinaryFunctions()) {
Rafael Aulerc67a7532015-11-24 01:54:182851 BinaryFunction &Function = BFI.second;
2852
Amir Ayupovc7306cc2021-04-08 07:19:262853 ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData();
Bill Nellc4d74602017-10-20 19:11:342854 if (!FunctionData) {
Maksim Panchenko55fc5412016-09-28 02:09:382855 errs() << "BOLT-ERROR: corresponding section is non-executable or "
2856 << "empty for function " << Function << '\n';
Maksim Panchenko924d0bd2020-05-03 22:49:582857 exit(1);
Rafael Aulerc67a7532015-11-24 01:54:182858 }
2859
Maksim Panchenkoc4e36c12016-09-15 22:47:102860 // Treat zero-sized functions as non-simple ones.
2861 if (Function.getSize() == 0) {
2862 Function.setSimple(false);
2863 continue;
Rafael Aulerc67a7532015-11-24 01:54:182864 }
2865
Rafael Aulerc67a7532015-11-24 01:54:182866 // Offset of the function in the file.
Maksim Panchenkod15b93b2017-11-28 17:57:212867 const auto *FileBegin =
Maksim Panchenko40c2e0f2021-12-15 00:52:512868 reinterpret_cast<const uint8_t *>(InputFile->getData().data());
Bill Nellc4d74602017-10-20 19:11:342869 Function.setFileOffset(FunctionData->begin() - FileBegin);
Rafael Aulerc67a7532015-11-24 01:54:182870
Maksim Panchenko924d0bd2020-05-03 22:49:582871 if (!shouldDisassemble(Function)) {
2872 NamedRegionTimer T("scan", "scan functions", "buildfuncs",
2873 "Scan Binary Functions", opts::TimeBuild);
2874 Function.scanExternalRefs();
2875 Function.setSimple(false);
2876 continue;
2877 }
2878
Maksim Panchenko0ce0bce2020-06-15 07:15:472879 if (!Function.disassemble()) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:332880 if (opts::processAllFunctions())
Maksim Panchenko0ce0bce2020-06-15 07:15:472881 BC->exitWithBugReport("function cannot be properly disassembled. "
2882 "Unable to continue in relocation mode.",
2883 Function);
Maksim Panchenkoee0e9cc2021-12-23 20:38:332884 if (opts::Verbosity >= 1)
Maksim Panchenko0ce0bce2020-06-15 07:15:472885 outs() << "BOLT-INFO: could not disassemble function " << Function
2886 << ". Will ignore.\n";
Maksim Panchenko0ce0bce2020-06-15 07:15:472887 // Forcefully ignore the function.
2888 Function.setIgnored();
2889 continue;
Maksim Panchenko55fc5412016-09-28 02:09:382890 }
2891
Rafael Aulerc67a7532015-11-24 01:54:182892 if (opts::PrintAll || opts::PrintDisasm)
Bill Nellc27a6a52016-09-02 21:15:292893 Function.print(outs(), "after disassembly", true);
Rafael Aulerc67a7532015-11-24 01:54:182894
Maksim Panchenko0ce0bce2020-06-15 07:15:472895 BC->processInterproceduralReferences(Function);
Maksim Panchenkod660f8b2018-02-14 20:06:172896 }
2897
Huan Nguyen28b1dcb2022-06-17 23:17:222898 BC->clearJumpTableOffsets();
Maksim Panchenko9e2ad3f2019-06-13 01:21:022899 BC->populateJumpTables();
Amir Ayupov6aa735c2021-12-02 05:14:562900 BC->skipMarkedFragments();
Maksim Panchenko9e2ad3f2019-06-13 01:21:022901
2902 for (auto &BFI : BC->getBinaryFunctions()) {
2903 BinaryFunction &Function = BFI.second;
2904
2905 if (!shouldDisassemble(Function))
2906 continue;
2907
Rafael Auler961d3d02020-01-15 01:12:032908 Function.postProcessEntryPoints();
Maksim Panchenko9e2ad3f2019-06-13 01:21:022909 Function.postProcessJumpTables();
2910 }
2911
Maksim Panchenko0ce0bce2020-06-15 07:15:472912 BC->adjustCodePadding();
Maksim Panchenkoa9b9aa12019-07-24 03:48:412913
Maksim Panchenko7fd48702019-04-03 22:52:012914 for (auto &BFI : BC->getBinaryFunctions()) {
Maksim Panchenkod660f8b2018-02-14 20:06:172915 BinaryFunction &Function = BFI.second;
2916
Maksim Panchenkoc6ce2ab2019-01-16 07:43:402917 if (!shouldDisassemble(Function))
Maksim Panchenkod660f8b2018-02-14 20:06:172918 continue;
Maksim Panchenkod660f8b2018-02-14 20:06:172919
2920 if (!Function.isSimple()) {
Huan Nguyen82095bd2022-06-10 22:48:132921 assert((!BC->HasRelocations || Function.getSize() == 0 ||
2922 Function.hasSplitJumpTable()) &&
Maksim Panchenkod660f8b2018-02-14 20:06:172923 "unexpected non-simple function in relocation mode");
2924 continue;
2925 }
Rafael Aulerc67a7532015-11-24 01:54:182926
2927 // Fill in CFI information for this function
Maksim Panchenkoee0e9cc2021-12-23 20:38:332928 if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) {
2929 if (BC->HasRelocations) {
2930 BC->exitWithBugReport("unable to fill CFI.", Function);
2931 } else {
2932 errs() << "BOLT-WARNING: unable to fill CFI for function " << Function
2933 << ". Skipping.\n";
2934 Function.setSimple(false);
2935 continue;
Maksim Panchenko73e9afe2016-02-23 02:25:432936 }
Rafael Aulerc67a7532015-11-24 01:54:182937 }
2938
2939 // Parse LSDA.
Maksim Panchenkod660f8b2018-02-14 20:06:172940 if (Function.getLSDAAddress() != 0)
Bill Nell729da2d2018-04-21 03:03:312941 Function.parseLSDA(getLSDAData(), getLSDAAddress());
laith sakka7d428352019-07-12 14:25:502942 }
Maksim Panchenko87291712020-05-08 06:00:292943}
2944
2945void RewriteInstance::buildFunctionsCFG() {
2946 NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs",
2947 "Build Binary Functions", opts::TimeBuild);
2948
2949 // Create annotation indices to allow lock-free execution
Maksim Panchenko87291712020-05-08 06:00:292950 BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
Maksim Panchenkoccb99dd2021-12-19 01:05:002951 BC->MIB->getOrCreateAnnotationIndex("NOP");
2952 BC->MIB->getOrCreateAnnotationIndex("Size");
Maksim Panchenko87291712020-05-08 06:00:292953
2954 ParallelUtilities::WorkFuncWithAllocTy WorkFun =
2955 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {
2956 if (!BF.buildCFG(AllocId))
2957 return;
2958
Amir Ayupovd1638cb2022-03-10 04:27:152959 if (opts::PrintAll) {
2960 auto L = BC->scopeLock();
Maksim Panchenko87291712020-05-08 06:00:292961 BF.print(outs(), "while building cfg", true);
Amir Ayupovd1638cb2022-03-10 04:27:152962 }
Maksim Panchenko87291712020-05-08 06:00:292963 };
2964
Maksim Panchenko40c2e0f2021-12-15 00:52:512965 ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
2966 return !shouldDisassemble(BF) || !BF.isSimple();
2967 };
Maksim Panchenko87291712020-05-08 06:00:292968
2969 ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
2970 *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
2971 SkipPredicate, "disassembleFunctions-buildCFG",
2972 /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll);
laith sakka7d428352019-07-12 14:25:502973
Bill Nell706abb62018-06-06 10:17:322974 BC->postProcessSymbolTable();
Maksim Panchenkod15b93b2017-11-28 17:57:212975}
2976
2977void RewriteInstance::postProcessFunctions() {
2978 BC->TotalScore = 0;
2979 BC->SumExecutionCount = 0;
Maksim Panchenko7fd48702019-04-03 22:52:012980 for (auto &BFI : BC->getBinaryFunctions()) {
Maksim Panchenkod15b93b2017-11-28 17:57:212981 BinaryFunction &Function = BFI.second;
2982
2983 if (Function.empty())
2984 continue;
2985
2986 Function.postProcessCFG();
2987
Rafael Aulerc67a7532015-11-24 01:54:182988 if (opts::PrintAll || opts::PrintCFG)
Bill Nellc27a6a52016-09-02 21:15:292989 Function.print(outs(), "after building cfg", true);
Rafael Aulerc67a7532015-11-24 01:54:182990
Bill Nell260f6fb2016-07-01 15:40:562991 if (opts::DumpDotAll)
Amir Ayupov5f2f96c2020-10-22 00:08:322992 Function.dumpGraphForPass("00_build-cfg");
Bill Nell260f6fb2016-07-01 15:40:562993
Theodoros Kasampalis17b84652016-05-26 17:58:012994 if (opts::PrintLoopInfo) {
2995 Function.calculateLoopInfo();
Bill Nellc27a6a52016-09-02 21:15:292996 Function.printLoopInfo(outs());
Theodoros Kasampalis17b84652016-05-26 17:58:012997 }
2998
Maksim Panchenkod15b93b2017-11-28 17:57:212999 BC->TotalScore += Function.getFunctionScore();
Rafael Aulerd850ca32017-05-01 23:52:543000 BC->SumExecutionCount += Function.getKnownExecutionCount();
Maksim Panchenko4a44d182016-01-16 22:58:223001 }
Bill Nell0e4d86b2017-11-15 04:05:113002
Bill Nell0e4d86b2017-11-15 04:05:113003 if (opts::PrintGlobals) {
3004 outs() << "BOLT-INFO: Global symbols:\n";
3005 BC->printGlobalSymbols(outs());
3006 }
Rafael Aulerc67a7532015-11-24 01:54:183007}
3008
3009void RewriteInstance::runOptimizationPasses() {
Rafael Auler8a5a3012018-02-06 23:00:233010 NamedRegionTimer T("runOptimizationPasses", "run optimization passes",
3011 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
Maksim Panchenko7fd48702019-04-03 22:52:013012 BinaryFunctionPassManager::runAllPasses(*BC);
Rafael Aulerc67a7532015-11-24 01:54:183013}
3014
Bill Nell96943d22017-05-25 01:40:293015namespace {
3016
Amir Ayupov1c5d3a02020-12-02 00:29:393017class BOLTSymbolResolver : public JITSymbolResolver {
3018 BinaryContext &BC;
Maksim Panchenko40c2e0f2021-12-15 00:52:513019
Amir Ayupov1c5d3a02020-12-02 00:29:393020public:
Rafael Aulera23726b2021-11-17 00:47:023021 BOLTSymbolResolver(BinaryContext &BC) : BC(BC) {}
Amir Ayupov1c5d3a02020-12-02 00:29:393022
3023 // We are responsible for all symbols
3024 Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override {
3025 return Symbols;
3026 }
3027
3028 // Some of our symbols may resolve to zero and this should not be an error
3029 bool allowsZeroSymbols() override { return true; }
3030
3031 /// Resolves the address of each symbol requested
3032 void lookup(const LookupSet &Symbols,
3033 OnResolvedFunction OnResolved) override {
3034 JITSymbolResolver::LookupResult AllResults;
3035
3036 if (BC.EFMM->ObjectsLoaded) {
Amir Ayupovc7306cc2021-04-08 07:19:263037 for (const StringRef &Symbol : Symbols) {
Amir Ayupov1c5d3a02020-12-02 00:29:393038 std::string SymName = Symbol.str();
3039 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n");
Rafael Aulera23726b2021-11-17 00:47:023040 // Resolve to a PLT entry if possible
Vladislav Khmelevsky4956e0e2022-04-03 16:11:313041 if (const BinaryData *I = BC.getPLTBinaryDataByName(SymName)) {
Rafael Aulera23726b2021-11-17 00:47:023042 AllResults[Symbol] =
3043 JITEvaluatedSymbol(I->getAddress(), JITSymbolFlags());
3044 continue;
Amir Ayupov1c5d3a02020-12-02 00:29:393045 }
Rafael Aulera23726b2021-11-17 00:47:023046 OnResolved(make_error<StringError>(
3047 "Symbol not found required by runtime: " + Symbol,
3048 inconvertibleErrorCode()));
3049 return;
Amir Ayupov1c5d3a02020-12-02 00:29:393050 }
3051 OnResolved(std::move(AllResults));
3052 return;
3053 }
3054
Amir Ayupovc7306cc2021-04-08 07:19:263055 for (const StringRef &Symbol : Symbols) {
Amir Ayupov1c5d3a02020-12-02 00:29:393056 std::string SymName = Symbol.str();
3057 LLVM_DEBUG(dbgs() << "BOLT: looking for " << SymName << "\n");
3058
Amir Ayupovc7306cc2021-04-08 07:19:263059 if (BinaryData *I = BC.getBinaryDataByName(SymName)) {
Amir Ayupov1c5d3a02020-12-02 00:29:393060 uint64_t Address = I->isMoved() && !I->isJumpTable()
3061 ? I->getOutputAddress()
3062 : I->getAddress();
3063 LLVM_DEBUG(dbgs() << "Resolved to address 0x"
3064 << Twine::utohexstr(Address) << "\n");
3065 AllResults[Symbol] = JITEvaluatedSymbol(Address, JITSymbolFlags());
3066 continue;
3067 }
3068 LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n");
3069 AllResults[Symbol] = JITEvaluatedSymbol(0, JITSymbolFlags());
3070 }
3071
3072 OnResolved(std::move(AllResults));
3073 }
3074};
3075
Rafael Aulerc67a7532015-11-24 01:54:183076} // anonymous namespace
3077
Rafael Auler62aa74f2019-07-24 21:03:433078void RewriteInstance::emitAndLink() {
3079 NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName,
Rafael Auler8a5a3012018-02-06 23:00:233080 TimerGroupDesc, opts::TimeRewrite);
Rafael Aulerc67a7532015-11-24 01:54:183081 std::error_code EC;
3082
3083 // This is an object file, which we keep for debugging purposes.
3084 // Once we decide it's useless, we should create it in memory.
Amir Ayupov081e39a2021-03-29 23:04:573085 SmallString<128> OutObjectPath;
3086 sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath);
Rafael Auler8a5a3012018-02-06 23:00:233087 std::unique_ptr<ToolOutputFile> TempOut =
Amir Ayupov081e39a2021-03-29 23:04:573088 std::make_unique<ToolOutputFile>(OutObjectPath, EC, sys::fs::OF_None);
Rafael Aulerc67a7532015-11-24 01:54:183089 check_error(EC, "cannot create output object file");
3090
3091 std::unique_ptr<buffer_ostream> BOS =
Maksim Panchenko40c2e0f2021-12-15 00:52:513092 std::make_unique<buffer_ostream>(TempOut->os());
Rafael Aulerc67a7532015-11-24 01:54:183093 raw_pwrite_stream *OS = BOS.get();
3094
3095 // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB)
3096 // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these
3097 // two instances.
Amir Ayupov12e9fec2021-04-01 18:43:003098 std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(*OS);
Rafael Aulerc67a7532015-11-24 01:54:183099
Bill Nell2640b402018-01-23 23:10:243100 if (EHFrameSection) {
Maksim Panchenko0ce0bce2020-06-15 07:15:473101 if (opts::UseOldText || opts::StrictMode) {
Maksim Panchenko33e0b2a2020-04-19 19:55:433102 // The section is going to be regenerated from scratch.
3103 // Empty the contents, but keep the section reference.
Xun Li8a680742020-05-21 23:25:053104 EHFrameSection->clearContents();
Maksim Panchenko33e0b2a2020-04-19 19:55:433105 } else {
3106 // Make .eh_frame relocatable.
3107 relocateEHFrameSection();
3108 }
Maksim Panchenkoa7fb6102016-11-11 22:33:343109 }
3110
Maksim Panchenkoa07f1a22020-03-11 22:51:323111 emitBinaryContext(*Streamer, *BC, getOrgSecPrefix());
Maksim Panchenko1f3e3512020-03-06 23:06:373112
Fangrui Song15d82c62022-06-07 07:31:023113 Streamer->finish();
Amir Ayupov9b02dc62022-04-09 04:07:273114 if (Streamer->getContext().hadError()) {
3115 errs() << "BOLT-ERROR: Emission failed.\n";
3116 exit(1);
3117 }
Rafael Aulerc67a7532015-11-24 01:54:183118
Maksim Panchenko50c895a2016-02-08 18:02:483119 //////////////////////////////////////////////////////////////////////////////
Maksim Panchenko3f42fdf2017-05-09 05:51:363120 // Assign addresses to new sections.
Maksim Panchenko50c895a2016-02-08 18:02:483121 //////////////////////////////////////////////////////////////////////////////
3122
Rafael Aulerc67a7532015-11-24 01:54:183123 // Get output object as ObjectFile.
3124 std::unique_ptr<MemoryBuffer> ObjectMemBuffer =
3125 MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false);
Rafael Auler8a5a3012018-02-06 23:00:233126 std::unique_ptr<object::ObjectFile> Obj = cantFail(
3127 object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()),
3128 "error creating in-memory object");
Rafael Aulerc67a7532015-11-24 01:54:183129
Rafael Aulera23726b2021-11-17 00:47:023130 BOLTSymbolResolver Resolver = BOLTSymbolResolver(*BC);
Maksim Panchenko55fc5412016-09-28 02:09:383131
Maksim Panchenko3f42fdf2017-05-09 05:51:363132 MCAsmLayout FinalLayout(
Maksim Panchenko40c2e0f2021-12-15 00:52:513133 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());
Gabriel Poesiaffa96412016-03-29 00:45:223134
Amir Ayupov1c5d3a02020-12-02 00:29:393135 RTDyld.reset(new decltype(RTDyld)::element_type(*BC->EFMM, Resolver));
Maksim Panchenkoa2637032021-05-26 23:23:343136 RTDyld->setProcessAllSections(false);
Amir Ayupov1c5d3a02020-12-02 00:29:393137 RTDyld->loadObject(*Obj);
Rafael Auler8a5a3012018-02-06 23:00:233138
Amir Ayupov1c5d3a02020-12-02 00:29:393139 // Assign addresses to all sections. If key corresponds to the object
3140 // created by ourselves, call our regular mapping function. If we are
3141 // loading additional objects as part of runtime libraries for
3142 // instrumentation, treat them as extra sections.
3143 mapFileSections(*RTDyld);
3144
3145 RTDyld->finalizeWithMemoryManagerLocking();
3146 if (RTDyld->hasError()) {
Amir Ayupov9b02dc62022-04-09 04:07:273147 errs() << "BOLT-ERROR: RTDyld failed: " << RTDyld->getErrorString() << "\n";
Amir Ayupov1c5d3a02020-12-02 00:29:393148 exit(1);
3149 }
3150
3151 // Update output addresses based on the new section map and
3152 // layout. Only do this for the object created by ourselves.
3153 updateOutputValues(FinalLayout);
Maksim Panchenkoe1a61e12016-02-08 18:08:283154
Maksim Panchenko96bb0902021-10-13 20:19:063155 if (opts::UpdateDebugSections)
3156 DebugInfoRewriter->updateLineTableOffsets(FinalLayout);
3157
Maksim Panchenkoee0e9cc2021-12-23 20:38:333158 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
Amir Ayupov1c5d3a02020-12-02 00:29:393159 RtLibrary->link(*BC, ToolPath, *RTDyld, [this](RuntimeDyld &R) {
3160 this->mapExtraSections(*RTDyld);
3161 });
Rafael Auler62aa74f2019-07-24 21:03:433162
Maksim Panchenko163adbe2019-03-15 01:51:053163 // Once the code is emitted, we can rename function sections to actual
3164 // output sections and de-register sections used for emission.
Maksim Panchenko0465d952020-10-09 23:06:273165 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
3166 ErrorOr<BinarySection &> Section = Function->getCodeSection();
Maksim Panchenko40c2e0f2021-12-15 00:52:513167 if (Section &&
Maksim Panchenkoee0e9cc2021-12-23 20:38:333168 (Function->getImageAddress() == 0 || Function->getImageSize() == 0))
Maksim Panchenko0465d952020-10-09 23:06:273169 continue;
Maksim Panchenko0465d952020-10-09 23:06:273170
3171 // Restore origin section for functions that were emitted or supposed to
3172 // be emitted to patch sections.
3173 if (Section)
3174 BC->deregisterSection(*Section);
3175 assert(Function->getOriginSectionName() && "expected origin section");
Amir Ayupov1c5d3a02020-12-02 00:29:393176 Function->CodeSectionName = std::string(*Function->getOriginSectionName());
Maksim Panchenko0465d952020-10-09 23:06:273177 if (Function->isSplit()) {
Amir Ayupovc7306cc2021-04-08 07:19:263178 if (ErrorOr<BinarySection &> ColdSection = Function->getColdCodeSection())
Maksim Panchenko0465d952020-10-09 23:06:273179 BC->deregisterSection(*ColdSection);
Amir Ayupov1c5d3a02020-12-02 00:29:393180 Function->ColdCodeSectionName = std::string(getBOLTTextSectionName());
Maksim Panchenko163adbe2019-03-15 01:51:053181 }
3182 }
3183
spupyrevb77172c2017-10-16 23:53:503184 if (opts::PrintCacheMetrics) {
spupyrev48a53a72017-11-15 00:51:243185 outs() << "BOLT-INFO: cache metrics after emitting functions:\n";
Maksim Panchenko7fd48702019-04-03 22:52:013186 CacheMetrics::printAll(BC->getSortedFunctions());
spupyrevb77172c2017-10-16 23:53:503187 }
3188
Amir Ayupov081e39a2021-03-29 23:04:573189 if (opts::KeepTmp) {
Maksim Panchenkoe1a61e12016-02-08 18:08:283190 TempOut->keep();
Amir Ayupov081e39a2021-03-29 23:04:573191 outs() << "BOLT-INFO: intermediary output object file saved for debugging "
3192 "purposes: "
3193 << OutObjectPath << "\n";
3194 }
Rafael Aulerc67a7532015-11-24 01:54:183195}
3196
Maksim Panchenkof2b257b2019-11-04 05:57:153197void RewriteInstance::updateMetadata() {
3198 updateSDTMarkers();
takh0033a762020-08-04 20:50:003199 updateLKMarkers();
James Luodea6c242021-06-25 18:42:583200 parsePseudoProbe();
3201 updatePseudoProbes();
Maksim Panchenkof2b257b2019-11-04 05:57:153202
Maksim Panchenko87291712020-05-08 06:00:293203 if (opts::UpdateDebugSections) {
3204 NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName,
3205 TimerGroupDesc, opts::TimeRewrite);
3206 DebugInfoRewriter->updateDebugInfo();
3207 }
Maksim Panchenkoe9c6c732019-09-11 22:42:223208
Maksim Panchenkoee0e9cc2021-12-23 20:38:333209 if (opts::WriteBoltInfoSection)
Maksim Panchenko87291712020-05-08 06:00:293210 addBoltInfoSection();
Maksim Panchenkoe9c6c732019-09-11 22:42:223211}
3212
James Luodea6c242021-06-25 18:42:583213void RewriteInstance::updatePseudoProbes() {
James Luo3e55dea2021-07-15 21:58:323214 // check if there is pseudo probe section decoded
3215 if (BC->ProbeDecoder.getAddress2ProbesMap().empty())
3216 return;
James Luodea6c242021-06-25 18:42:583217 // input address converted to output
3218 AddressProbesMap &Address2ProbesMap = BC->ProbeDecoder.getAddress2ProbesMap();
3219 const GUIDProbeFunctionMap &GUID2Func =
3220 BC->ProbeDecoder.getGUID2FuncDescMap();
James Luo0df7bf72021-07-16 23:05:183221
James Luodea6c242021-06-25 18:42:583222 for (auto &AP : Address2ProbesMap) {
3223 BinaryFunction *F = BC->getBinaryFunctionContainingAddress(AP.first);
James Luo0df7bf72021-07-16 23:05:183224 // If F is removed, eliminate all probes inside it from inline tree
James Luodea6c242021-06-25 18:42:583225 // Setting probes' addresses as INT64_MAX means elimination
James Luo0df7bf72021-07-16 23:05:183226 if (!F) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:333227 for (MCDecodedPseudoProbe &Probe : AP.second)
James Luodea6c242021-06-25 18:42:583228 Probe.setAddress(INT64_MAX);
James Luodea6c242021-06-25 18:42:583229 continue;
3230 }
James Luo0df7bf72021-07-16 23:05:183231 // If F is not emitted, the function will remain in the same address as its
3232 // input
Maksim Panchenkoee0e9cc2021-12-23 20:38:333233 if (!F->isEmitted())
James Luo0df7bf72021-07-16 23:05:183234 continue;
Maksim Panchenkoee0e9cc2021-12-23 20:38:333235
James Luodea6c242021-06-25 18:42:583236 uint64_t Offset = AP.first - F->getAddress();
3237 const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset);
3238 uint64_t BlkOutputAddress = BB->getOutputAddressRange().first;
3239 // Check if block output address is defined.
3240 // If not, such block is removed from binary. Then remove the probes from
3241 // inline tree
3242 if (BlkOutputAddress == 0) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:333243 for (MCDecodedPseudoProbe &Probe : AP.second)
James Luodea6c242021-06-25 18:42:583244 Probe.setAddress(INT64_MAX);
James Luodea6c242021-06-25 18:42:583245 continue;
3246 }
James Luo0df7bf72021-07-16 23:05:183247
3248 unsigned ProbeTrack = AP.second.size();
3249 std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
3250 while (ProbeTrack != 0) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:333251 if (Probe->isBlock()) {
James Luo0df7bf72021-07-16 23:05:183252 Probe->setAddress(BlkOutputAddress);
Maksim Panchenkoee0e9cc2021-12-23 20:38:333253 } else if (Probe->isCall()) {
James Luo0df7bf72021-07-16 23:05:183254 // A call probe may be duplicated due to ICP
3255 // Go through output of InputOffsetToAddressMap to collect all related
3256 // probes
James Luodea6c242021-06-25 18:42:583257 const InputOffsetToAddressMapTy &Offset2Addr =
3258 F->getInputOffsetToAddressMap();
James Luo0df7bf72021-07-16 23:05:183259 auto CallOutputAddresses = Offset2Addr.equal_range(Offset);
3260 auto CallOutputAddress = CallOutputAddresses.first;
3261 if (CallOutputAddress == CallOutputAddresses.second) {
3262 Probe->setAddress(INT64_MAX);
3263 } else {
3264 Probe->setAddress(CallOutputAddress->second);
3265 CallOutputAddress = std::next(CallOutputAddress);
3266 }
3267
3268 while (CallOutputAddress != CallOutputAddresses.second) {
3269 AP.second.push_back(*Probe);
3270 AP.second.back().setAddress(CallOutputAddress->second);
3271 Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
3272 CallOutputAddress = std::next(CallOutputAddress);
3273 }
James Luodea6c242021-06-25 18:42:583274 }
James Luo0df7bf72021-07-16 23:05:183275 Probe = std::next(Probe);
3276 ProbeTrack--;
James Luodea6c242021-06-25 18:42:583277 }
3278 }
3279
3280 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All ||
3281 opts::PrintPseudoProbes ==
3282 opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) {
3283 outs() << "Pseudo Probe Address Conversion results:\n";
3284 // table that correlates address to block
3285 std::unordered_map<uint64_t, StringRef> Addr2BlockNames;
Maksim Panchenkoee0e9cc2021-12-23 20:38:333286 for (auto &F : BC->getBinaryFunctions())
3287 for (BinaryBasicBlock &BinaryBlock : F.second)
James Luodea6c242021-06-25 18:42:583288 Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] =
3289 BinaryBlock.getName();
Maksim Panchenkoee0e9cc2021-12-23 20:38:333290
James Luodea6c242021-06-25 18:42:583291 // scan all addresses -> correlate probe to block when print out
3292 std::vector<uint64_t> Addresses;
3293 for (auto &Entry : Address2ProbesMap)
3294 Addresses.push_back(Entry.first);
Amir Ayupovd2c87692022-06-24 05:15:473295 llvm::sort(Addresses);
James Luodea6c242021-06-25 18:42:583296 for (uint64_t Key : Addresses) {
3297 for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) {
3298 if (Probe.getAddress() == INT64_MAX)
3299 outs() << "Deleted Probe: ";
3300 else
3301 outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
3302 Probe.print(outs(), GUID2Func, true);
3303 // print block name only if the probe is block type and undeleted.
3304 if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
3305 outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
3306 << Addr2BlockNames[Probe.getAddress()] << "\n";
3307 }
3308 }
3309 outs() << "=======================================\n";
3310 }
James Luo3e55dea2021-07-15 21:58:323311
3312 // encode pseudo probes with updated addresses
3313 encodePseudoProbes();
3314}
3315
3316template <typename F>
3317static void emitLEB128IntValue(F encode, uint64_t Value,
3318 SmallString<8> &Contents) {
3319 SmallString<128> Tmp;
3320 raw_svector_ostream OSE(Tmp);
3321 encode(Value, OSE);
3322 Contents.append(OSE.str().begin(), OSE.str().end());
3323}
3324
3325void RewriteInstance::encodePseudoProbes() {
3326 // Buffer for new pseudo probes section
3327 SmallString<8> Contents;
3328 MCDecodedPseudoProbe *LastProbe = nullptr;
3329
Rafael Aulerae585be2021-11-12 02:14:533330 auto EmitInt = [&](uint64_t Value, uint32_t Size) {
James Luo3e55dea2021-07-15 21:58:323331 const bool IsLittleEndian = BC->AsmInfo->isLittleEndian();
3332 uint64_t Swapped = support::endian::byte_swap(
3333 Value, IsLittleEndian ? support::little : support::big);
3334 unsigned Index = IsLittleEndian ? 0 : 8 - Size;
3335 auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size);
3336 Contents.append(Entry.begin(), Entry.end());
3337 };
3338
3339 auto EmitULEB128IntValue = [&](uint64_t Value) {
3340 SmallString<128> Tmp;
3341 raw_svector_ostream OSE(Tmp);
3342 encodeULEB128(Value, OSE, 0);
3343 Contents.append(OSE.str().begin(), OSE.str().end());
3344 };
3345
3346 auto EmitSLEB128IntValue = [&](int64_t Value) {
3347 SmallString<128> Tmp;
3348 raw_svector_ostream OSE(Tmp);
3349 encodeSLEB128(Value, OSE);
3350 Contents.append(OSE.str().begin(), OSE.str().end());
3351 };
3352
3353 // Emit indiviual pseudo probes in a inline tree node
3354 // Probe index, type, attribute, address type and address are encoded
3355 // Address of the first probe is absolute.
3356 // Other probes' address are represented by delta
3357 auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) {
3358 EmitULEB128IntValue(CurProbe->getIndex());
3359 uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4);
3360 uint8_t Flag =
3361 LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0;
3362 EmitInt(Flag | PackedType, 1);
3363 if (LastProbe) {
3364 // Emit the delta between the address label and LastProbe.
3365 int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress();
3366 EmitSLEB128IntValue(Delta);
3367 } else {
3368 // Emit absolute address for encoding the first pseudo probe.
Rafael Aulerae585be2021-11-12 02:14:533369 uint32_t AddrSize = BC->AsmInfo->getCodePointerSize();
James Luo3e55dea2021-07-15 21:58:323370 EmitInt(CurProbe->getAddress(), AddrSize);
3371 }
3372 };
3373
3374 std::map<InlineSite, MCDecodedPseudoProbeInlineTree *,
3375 std::greater<InlineSite>>
3376 Inlinees;
3377
3378 // DFS of inline tree to emit pseudo probes in all tree node
3379 // Inline site index of a probe is emitted first.
3380 // Then tree node Guid, size of pseudo probes and children nodes, and detail
3381 // of contained probes are emitted Deleted probes are skipped Root node is not
3382 // encoded to binaries. It's a "wrapper" of inline trees of each function.
3383 std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes;
3384 const MCDecodedPseudoProbeInlineTree &Root =
3385 BC->ProbeDecoder.getDummyInlineRoot();
3386 for (auto Child = Root.getChildren().begin();
3387 Child != Root.getChildren().end(); ++Child)
3388 Inlinees[Child->first] = Child->second.get();
3389
3390 for (auto Inlinee : Inlinees)
3391 // INT64_MAX is "placeholder" of unused callsite index field in the pair
3392 NextNodes.push_back({INT64_MAX, Inlinee.second});
3393
3394 Inlinees.clear();
3395
3396 while (!NextNodes.empty()) {
3397 uint64_t ProbeIndex = NextNodes.back().first;
3398 MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second;
3399 NextNodes.pop_back();
3400
3401 if (Cur->Parent && !Cur->Parent->isRoot())
3402 // Emit probe inline site
3403 EmitULEB128IntValue(ProbeIndex);
3404
3405 // Emit probes grouped by GUID.
3406 LLVM_DEBUG({
3407 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
3408 dbgs() << "GUID: " << Cur->Guid << "\n";
3409 });
3410 // Emit Guid
3411 EmitInt(Cur->Guid, 8);
3412 // Emit number of probes in this node
3413 uint64_t Deleted = 0;
3414 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes())
3415 if (Probe->getAddress() == INT64_MAX)
3416 Deleted++;
3417 LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
3418 uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
3419 EmitULEB128IntValue(ProbesSize);
3420 // Emit number of direct inlinees
3421 EmitULEB128IntValue(Cur->getChildren().size());
3422 // Emit probes in this group
3423 for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) {
3424 if (Probe->getAddress() == INT64_MAX)
3425 continue;
3426 EmitDecodedPseudoProbe(Probe);
3427 LastProbe = Probe;
3428 }
3429
3430 for (auto Child = Cur->getChildren().begin();
3431 Child != Cur->getChildren().end(); ++Child)
3432 Inlinees[Child->first] = Child->second.get();
3433 for (const auto &Inlinee : Inlinees) {
3434 assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
3435 NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});
3436 LLVM_DEBUG({
3437 dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
3438 dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n";
3439 });
3440 }
3441 Inlinees.clear();
3442 }
3443
3444 // Create buffer for new contents for the section
3445 // Freed when parent section is destroyed
3446 uint8_t *Output = new uint8_t[Contents.str().size()];
3447 memcpy(Output, Contents.str().data(), Contents.str().size());
3448 addToDebugSectionsToOverwrite(".pseudo_probe");
3449 BC->registerOrUpdateSection(".pseudo_probe", PseudoProbeSection->getELFType(),
3450 PseudoProbeSection->getELFFlags(), Output,
3451 Contents.str().size(), 1);
James Luo0df7bf72021-07-16 23:05:183452 if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All ||
3453 opts::PrintPseudoProbes ==
3454 opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) {
3455 // create a dummy decoder;
3456 MCPseudoProbeDecoder DummyDecoder;
3457 StringRef DescContents = PseudoProbeDescSection->getContents();
3458 DummyDecoder.buildGUID2FuncDescMap(
3459 reinterpret_cast<const uint8_t *>(DescContents.data()),
3460 DescContents.size());
3461 StringRef ProbeContents = PseudoProbeSection->getOutputContents();
3462 DummyDecoder.buildAddress2ProbeMap(
3463 reinterpret_cast<const uint8_t *>(ProbeContents.data()),
3464 ProbeContents.size());
3465 DummyDecoder.printProbesForAllAddresses(outs());
3466 }
James Luodea6c242021-06-25 18:42:583467}
3468
Maksim Panchenkof2b257b2019-11-04 05:57:153469void RewriteInstance::updateSDTMarkers() {
3470 NamedRegionTimer T("updateSDTMarkers", "update SDT markers", TimerGroupName,
3471 TimerGroupDesc, opts::TimeRewrite);
3472
Amir Ayupovf1bfb182021-03-18 20:06:183473 if (!SDTSection)
3474 return;
3475 SDTSection->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
3476
Amir Ayupovc7306cc2021-04-08 07:19:263477 SimpleBinaryPatcher *SDTNotePatcher =
Amir Ayupovf1bfb182021-03-18 20:06:183478 static_cast<SimpleBinaryPatcher *>(SDTSection->getPatcher());
Maksim Panchenkof2b257b2019-11-04 05:57:153479 for (auto &SDTInfoKV : BC->SDTMarkers) {
Amir Ayupovc7306cc2021-04-08 07:19:263480 const uint64_t OriginalAddress = SDTInfoKV.first;
3481 SDTMarkerInfo &SDTInfo = SDTInfoKV.second;
3482 const BinaryFunction *F =
3483 BC->getBinaryFunctionContainingAddress(OriginalAddress);
Maksim Panchenkof2b257b2019-11-04 05:57:153484 if (!F)
3485 continue;
Amir Ayupovc7306cc2021-04-08 07:19:263486 const uint64_t NewAddress =
3487 F->translateInputToOutputAddress(OriginalAddress);
Maksim Panchenkof2b257b2019-11-04 05:57:153488 SDTNotePatcher->addLE64Patch(SDTInfo.PCOffset, NewAddress);
3489 }
3490}
3491
takh0033a762020-08-04 20:50:003492void RewriteInstance::updateLKMarkers() {
Maksim Panchenkoee0e9cc2021-12-23 20:38:333493 if (BC->LKMarkers.size() == 0)
takh0033a762020-08-04 20:50:003494 return;
takh0033a762020-08-04 20:50:003495
3496 NamedRegionTimer T("updateLKMarkers", "update LK markers", TimerGroupName,
3497 TimerGroupDesc, opts::TimeRewrite);
3498
3499 std::unordered_map<std::string, uint64_t> PatchCounts;
Amir Ayupovc7306cc2021-04-08 07:19:263500 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>>
3501 &LKMarkerInfoKV : BC->LKMarkers) {
Maksim Panchenkoa10f7992020-09-15 18:42:033502 const uint64_t OriginalAddress = LKMarkerInfoKV.first;
Amir Ayupovc7306cc2021-04-08 07:19:263503 const BinaryFunction *BF =
takh0033a762020-08-04 20:50:003504 BC->getBinaryFunctionContainingAddress(OriginalAddress, false, true);
Maksim Panchenkoa10f7992020-09-15 18:42:033505 if (!BF)
takh0033a762020-08-04 20:50:003506 continue;
Maksim Panchenkoa10f7992020-09-15 18:42:033507
3508 uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress);
3509 if (NewAddress == 0)
takh0033a762020-08-04 20:50:003510 continue;
Maksim Panchenkoa10f7992020-09-15 18:42:033511
3512 // Apply base address.
3513 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff)
takh0033a762020-08-04 20:50:003514 NewAddress = NewAddress + 0xffffffff00000000;
Maksim Panchenkoa10f7992020-09-15 18:42:033515
3516 if (OriginalAddress == NewAddress)
takh0033a762020-08-04 20:50:003517 continue;
Maksim Panchenkoa10f7992020-09-15 18:42:033518
Amir Ayupovc7306cc2021-04-08 07:19:263519 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) {
Maksim Panchenkoa10f7992020-09-15 18:42:033520 StringRef SectionName = LKMarkerInfo.SectionName;
takh0033a762020-08-04 20:50:003521 SimpleBinaryPatcher *LKPatcher;
Amir Ayupovc7306cc2021-04-08 07:19:263522 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName);
Amir Ayupovf1bfb182021-03-18 20:06:183523 assert(BSec && "missing section info for kernel section");
3524 if (!BSec->getPatcher())
3525 BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
3526 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher());
Amir Ayupov1c5d3a02020-12-02 00:29:393527 PatchCounts[std::string(SectionName)]++;
Maksim Panchenkoee0e9cc2021-12-23 20:38:333528 if (LKMarkerInfo.IsPCRelative)
takh0033a762020-08-04 20:50:003529 LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset,
3530 NewAddress - OriginalAddress +
3531 LKMarkerInfo.PCRelativeOffset);
Maksim Panchenkoee0e9cc2021-12-23 20:38:333532 else
takh0033a762020-08-04 20:50:003533 LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress);
takh0033a762020-08-04 20:50:003534 }
3535 }
3536 outs() << "BOLT-INFO: patching linux kernel sections. Total patches per "
3537 "section are as follows:\n";
Maksim Panchenkoee0e9cc2021-12-23 20:38:333538 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts)
takh0033a762020-08-04 20:50:003539 outs() << " Section: " << KV.first << ", patch-counts: " << KV.second
3540 << '\n';
takh0033a762020-08-04 20:50:003541}
3542
Amir Ayupov1c5d3a02020-12-02 00:29:393543void RewriteInstance::mapFileSections(RuntimeDyld &RTDyld) {
3544 mapCodeSections(RTDyld);
3545 mapDataSections(RTDyld);
Bill Nell729da2d2018-04-21 03:03:313546}
Rafael Auler35c09dc2018-06-20 19:03:243547
Maksim Panchenko40c2e0f2021-12-15 00:52:513548std::vector<BinarySection *> RewriteInstance::getCodeSections() {
Maksim Panchenko6bcb3382019-03-15 20:43:363549 std::vector<BinarySection *> CodeSections;
Maksim Panchenkoee0e9cc2021-12-23 20:38:333550 for (BinarySection &Section : BC->textSections())
Maksim Panchenko6bcb3382019-03-15 20:43:363551 if (Section.hasValidSectionID())
3552 CodeSections.emplace_back(&Section);
Maksim Panchenko6bcb3382019-03-15 20:43:363553
3554 auto compareSections = [&](const BinarySection *A, const BinarySection *B) {
3555 // Place movers before anything else.
3556 if (A->getName() == BC->getHotTextMoverSectionName())
3557 return true;
3558 if (B->getName() == BC->getHotTextMoverSectionName())
3559 return false;
3560
3561 // Depending on the option, put main text at the beginning or at the end.
Maksim Panchenkoee0e9cc2021-12-23 20:38:333562 if (opts::HotFunctionsAtEnd)
Maksim Panchenko6bcb3382019-03-15 20:43:363563 return B->getName() == BC->getMainCodeSectionName();
Maksim Panchenkoee0e9cc2021-12-23 20:38:333564 else
Maksim Panchenko6bcb3382019-03-15 20:43:363565 return A->getName() == BC->getMainCodeSectionName();
Maksim Panchenko6bcb3382019-03-15 20:43:363566 };
3567
3568 // Determine the order of sections.
Amir Ayupovd2c87692022-06-24 05:15:473569 llvm::stable_sort(CodeSections, compareSections);
Maksim Panchenko6bcb3382019-03-15 20:43:363570
3571 return CodeSections;
3572}
3573
Amir Ayupov1c5d3a02020-12-02 00:29:393574void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) {
Maksim Panchenkob6f7c682017-12-10 05:40:393575 if (BC->HasRelocations) {
Rafael Auler47ce9b32021-09-16 01:03:503576 ErrorOr<BinarySection &> TextSection =
3577 BC->getUniqueSectionByName(BC->getMainCodeSectionName());
3578 assert(TextSection && ".text section not found in output");
Maksim Panchenkod1b76f22019-03-22 04:13:453579 assert(TextSection->hasValidSectionID() && ".text section should be valid");
3580
Maksim Panchenko0465d952020-10-09 23:06:273581 // Map sections for functions with pre-assigned addresses.
Amir Ayupovc7306cc2021-04-08 07:19:263582 for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) {
Maksim Panchenko0465d952020-10-09 23:06:273583 const uint64_t OutputAddress = InjectedFunction->getOutputAddress();
3584 if (!OutputAddress)
3585 continue;
3586
3587 ErrorOr<BinarySection &> FunctionSection =
3588 InjectedFunction->getCodeSection();
3589 assert(FunctionSection && "function should have section");
3590 FunctionSection->setOutputAddress(OutputAddress);
Amir Ayupov1c5d3a02020-12-02 00:29:393591 RTDyld.reassignSectionAddress(FunctionSection->getSectionID(),
3592 OutputAddress);
Maksim Panchenko0465d952020-10-09 23:06:273593 InjectedFunction->setImageAddress(FunctionSection->getAllocAddress());
3594 InjectedFunction->setImageSize(FunctionSection->getOutputSize());
3595 }
3596
Maksim Panchenko0a550012019-03-15 03:32:043597 // Populate the list of sections to be allocated.
Maksim Panchenko0465d952020-10-09 23:06:273598 std::vector<BinarySection *> CodeSections = getCodeSections();
3599
3600 // Remove sections that were pre-allocated (patch sections).
Amir Ayupovd2c87692022-06-24 05:15:473601 llvm::erase_if(CodeSections, [](BinarySection *Section) {
3602 return Section->getOutputAddress();
3603 });
Amir Ayupov1c5d3a02020-12-02 00:29:393604 LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n";
Maksim Panchenkoee0e9cc2021-12-23 20:38:333605 for (const BinarySection *Section : CodeSections)
Maksim Panchenko0a550012019-03-15 03:32:043606 dbgs() << Section->getName() << '\n';
Maksim Panchenkoee0e9cc2021-12-23 20:38:333607 );
Maksim Panchenko163adbe2019-03-15 01:51:053608
Maksim Panchenkofe37f182021-05-13 17:50:473609 uint64_t PaddingSize = 0; // size of padding required at the end
Maksim Panchenkod1b76f22019-03-22 04:13:453610
3611 // Allocate sections starting at a given Address.
3612 auto allocateAt = [&](uint64_t Address) {
Amir Ayupovc7306cc2021-04-08 07:19:263613 for (BinarySection *Section : CodeSections) {
Maksim Panchenkod1b76f22019-03-22 04:13:453614 Address = alignTo(Address, Section->getAlignment());
3615 Section->setOutputAddress(Address);
3616 Address += Section->getOutputSize();
3617 }
3618
3619 // Make sure we allocate enough space for huge pages.
3620 if (opts::HotText) {
Amir Ayupovc7306cc2021-04-08 07:19:263621 uint64_t HotTextEnd =
3622 TextSection->getOutputAddress() + TextSection->getOutputSize();
Maksim Panchenkod1b76f22019-03-22 04:13:453623 HotTextEnd = alignTo(HotTextEnd, BC->PageAlign);
3624 if (HotTextEnd > Address) {
3625 PaddingSize = HotTextEnd - Address;
3626 Address = HotTextEnd;
3627 }
3628 }
3629 return Address;
3630 };
Maksim Panchenko0a550012019-03-15 03:32:043631
3632 // Check if we can fit code in the original .text
Maksim Panchenkofe37f182021-05-13 17:50:473633 bool AllocationDone = false;
Maksim Panchenko0a550012019-03-15 03:32:043634 if (opts::UseOldText) {
Amir Ayupovc7306cc2021-04-08 07:19:263635 const uint64_t CodeSize =
3636 allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress;
Maksim Panchenko0a550012019-03-15 03:32:043637
3638 if (CodeSize <= BC->OldTextSectionSize) {
3639 outs() << "BOLT-INFO: using original .text for new code with 0x"
Maksim Panchenko23edb3e2020-04-19 22:02:503640 << Twine::utohexstr(opts::AlignText) << " alignment\n";
Maksim Panchenkod1b76f22019-03-22 04:13:453641 AllocationDone = true;
3642 } else {
Maksim Panchenko1387a9d2018-09-25 03:58:313643 errs() << "BOLT-WARNING: original .text too small to fit the new code"
Maksim Panchenko23edb3e2020-04-19 22:02:503644 << " using 0x" << Twine::utohexstr(opts::AlignText)
Maksim Panchenko40c2e0f2021-12-15 00:52:513645 << " alignment. " << CodeSize << " bytes needed, have "
3646 << BC->OldTextSectionSize << " bytes available.\n";
Maksim Panchenko1387a9d2018-09-25 03:58:313647 opts::UseOldText = false;
Maksim Panchenko55fc5412016-09-28 02:09:383648 }
Maksim Panchenko55fc5412016-09-28 02:09:383649 }
Maksim Panchenko55fc5412016-09-28 02:09:383650
Maksim Panchenkoee0e9cc2021-12-23 20:38:333651 if (!AllocationDone)
Maksim Panchenkod1b76f22019-03-22 04:13:453652 NextAvailableAddress = allocateAt(NextAvailableAddress);
Maksim Panchenkod1b76f22019-03-22 04:13:453653
3654 // Do the mapping for ORC layer based on the allocation.
Amir Ayupovc7306cc2021-04-08 07:19:263655 for (BinarySection *Section : CodeSections) {
Amir Ayupov1c5d3a02020-12-02 00:29:393656 LLVM_DEBUG(
3657 dbgs() << "BOLT: mapping " << Section->getName() << " at 0x"
3658 << Twine::utohexstr(Section->getAllocAddress()) << " to 0x"
3659 << Twine::utohexstr(Section->getOutputAddress()) << '\n');
3660 RTDyld.reassignSectionAddress(Section->getSectionID(),
3661 Section->getOutputAddress());
Maksim Panchenko0ce0bce2020-06-15 07:15:473662 Section->setOutputFileOffset(
Maksim Panchenkod1b76f22019-03-22 04:13:453663 getFileOffsetForAddress(Section->getOutputAddress()));
Maksim Panchenko0a550012019-03-15 03:32:043664 }
Maksim Panchenko163adbe2019-03-15 01:51:053665
Maksim Panchenkod1b76f22019-03-22 04:13:453666 // Check if we need to insert a padding section for hot text.
Maksim Panchenkoee0e9cc2021-12-23 20:38:333667 if (PaddingSize && !opts::UseOldText)
Maksim Panchenkod1b76f22019-03-22 04:13:453668 outs() << "BOLT-INFO: padding code to 0x"
3669 << Twine::utohexstr(NextAvailableAddress)
3670 << " to accommodate hot text\n";
Maksim Panchenko163adbe2019-03-15 01:51:053671
Maksim Panchenkod1b76f22019-03-22 04:13:453672 return;
3673 }
Laith Saed Sakka27f30322018-07-08 19:14:083674
Maksim Panchenkod1b76f22019-03-22 04:13:453675 // Processing in non-relocation mode.
Amir Ayupovc7306cc2021-04-08 07:19:263676 uint64_t NewTextSectionStartAddress = NextAvailableAddress;
Maksim Panchenko163adbe2019-03-15 01:51:053677
Maksim Panchenko7fd48702019-04-03 22:52:013678 for (auto &BFI : BC->getBinaryFunctions()) {
Amir Ayupovc7306cc2021-04-08 07:19:263679 BinaryFunction &Function = BFI.second;
Maksim Panchenko04c5d4f2020-05-03 20:54:453680 if (!Function.isEmitted())
Maksim Panchenkod1b76f22019-03-22 04:13:453681 continue;
3682
Amir Ayupovc7306cc2021-04-08 07:19:263683 bool TooLarge = false;
3684 ErrorOr<BinarySection &> FuncSection = Function.getCodeSection();
Maksim Panchenkod1b76f22019-03-22 04:13:453685 assert(FuncSection && "cannot find section for function");
3686 FuncSection->setOutputAddress(Function.getAddress());
Amir Ayupov1c5d3a02020-12-02 00:29:393687 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x"
3688 << Twine::utohexstr(FuncSection->getAllocAddress())
3689 << " to 0x" << Twine::utohexstr(Function.getAddress())
3690 << '\n');
3691 RTDyld.reassignSectionAddress(FuncSection->getSectionID(),
3692 Function.getAddress());
Maksim Panchenkod1b76f22019-03-22 04:13:453693 Function.setImageAddress(FuncSection->getAllocAddress());
3694 Function.setImageSize(FuncSection->getOutputSize());
3695 if (Function.getImageSize() > Function.getMaxSize()) {
3696 TooLarge = true;
3697 FailedAddresses.emplace_back(Function.getAddress());
Laith Saed Sakka27f30322018-07-08 19:14:083698 }
3699
Maksim Panchenkod1b76f22019-03-22 04:13:453700 // Map jump tables if updating in-place.
3701 if (opts::JumpTables == JTS_BASIC) {
3702 for (auto &JTI : Function.JumpTables) {
Amir Ayupovc7306cc2021-04-08 07:19:263703 JumpTable *JT = JTI.second;
3704 BinarySection &Section = JT->getOutputSection();
Maksim Panchenkod1b76f22019-03-22 04:13:453705 Section.setOutputAddress(JT->getAddress());
Rafael Auler7b779f82021-09-10 23:19:503706 Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress()));
Amir Ayupov1c5d3a02020-12-02 00:29:393707 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping " << Section.getName()
3708 << " to 0x" << Twine::utohexstr(JT->getAddress())
3709 << '\n');
Rafael Auler7b779f82021-09-10 23:19:503710 RTDyld.reassignSectionAddress(Section.getSectionID(), JT->getAddress());
Maksim Panchenko55fc5412016-09-28 02:09:383711 }
Maksim Panchenko55fc5412016-09-28 02:09:383712 }
3713
Maksim Panchenkod1b76f22019-03-22 04:13:453714 if (!Function.isSplit())
3715 continue;
3716
Amir Ayupovc7306cc2021-04-08 07:19:263717 ErrorOr<BinarySection &> ColdSection = Function.getColdCodeSection();
Maksim Panchenkod1b76f22019-03-22 04:13:453718 assert(ColdSection && "cannot find section for cold part");
3719 // Cold fragments are aligned at 16 bytes.
3720 NextAvailableAddress = alignTo(NextAvailableAddress, 16);
Amir Ayupovc7306cc2021-04-08 07:19:263721 BinaryFunction::FragmentInfo &ColdPart = Function.cold();
Maksim Panchenkod1b76f22019-03-22 04:13:453722 if (TooLarge) {
3723 // The corresponding FDE will refer to address 0.
3724 ColdPart.setAddress(0);
3725 ColdPart.setImageAddress(0);
3726 ColdPart.setImageSize(0);
3727 ColdPart.setFileOffset(0);
3728 } else {
3729 ColdPart.setAddress(NextAvailableAddress);
3730 ColdPart.setImageAddress(ColdSection->getAllocAddress());
3731 ColdPart.setImageSize(ColdSection->getOutputSize());
3732 ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress));
3733 ColdSection->setOutputAddress(ColdPart.getAddress());
Maksim Panchenko55fc5412016-09-28 02:09:383734 }
Maksim Panchenkod1b76f22019-03-22 04:13:453735
Amir Ayupov1c5d3a02020-12-02 00:29:393736 LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x"
3737 << Twine::utohexstr(ColdPart.getImageAddress())
3738 << " to 0x" << Twine::utohexstr(ColdPart.getAddress())
3739 << " with size "
3740 << Twine::utohexstr(ColdPart.getImageSize()) << '\n');
3741 RTDyld.reassignSectionAddress(ColdSection->getSectionID(),
3742 ColdPart.getAddress());
Maksim Panchenkod1b76f22019-03-22 04:13:453743
3744 NextAvailableAddress += ColdPart.getImageSize();
3745 }
3746
3747 // Add the new text section aggregating all existing code sections.
3748 // This is pseudo-section that serves a purpose of creating a corresponding
3749 // entry in section header table.
Amir Ayupovc7306cc2021-04-08 07:19:263750 int64_t NewTextSectionSize =
3751 NextAvailableAddress - NewTextSectionStartAddress;
Maksim Panchenkod1b76f22019-03-22 04:13:453752 if (NewTextSectionSize) {
Amir Ayupovc7306cc2021-04-08 07:19:263753 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
3754 /*IsText=*/true,
3755 /*IsAllocatable=*/true);
3756 BinarySection &Section =
Maksim Panchenkoa07f1a22020-03-11 22:51:323757 BC->registerOrUpdateSection(getBOLTTextSectionName(),
3758 ELF::SHT_PROGBITS,
3759 Flags,
3760 /*Data=*/nullptr,
3761 NewTextSectionSize,
3762 16);
Maksim Panchenkod1b76f22019-03-22 04:13:453763 Section.setOutputAddress(NewTextSectionStartAddress);
Maksim Panchenko0ce0bce2020-06-15 07:15:473764 Section.setOutputFileOffset(
Maksim Panchenko40c2e0f2021-12-15 00:52:513765 getFileOffsetForAddress(NewTextSectionStartAddress));
Maksim Panchenko55fc5412016-09-28 02:09:383766 }
Bill Nell729da2d2018-04-21 03:03:313767}
Maksim Panchenko55fc5412016-09-28 02:09:383768
Amir Ayupov1c5d3a02020-12-02 00:29:393769void RewriteInstance::mapDataSections(RuntimeDyld &RTDyld) {
Maksim Panchenko55fc5412016-09-28 02:09:383770 // Map special sections to their addresses in the output image.
3771 // These are the sections that we generate via MCStreamer.
3772 // The order is important.
Rafael Auler0d23cba2019-06-20 03:10:493773 std::vector<std::string> Sections = {
Maksim Panchenkoa07f1a22020-03-11 22:51:323774 ".eh_frame", Twine(getOrgSecPrefix(), ".eh_frame").str(),
Xun Li9bd71612020-05-02 18:14:383775 ".gcc_except_table", ".rodata", ".rodata.cold"};
Maksim Panchenkoee0e9cc2021-12-23 20:38:333776 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
Xun Li9bd71612020-05-02 18:14:383777 RtLibrary->addRuntimeLibSections(Sections);
Maksim Panchenkoee0e9cc2021-12-23 20:38:333778
Amir Ayupovc7306cc2021-04-08 07:19:263779 for (std::string &SectionName : Sections) {
3780 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName);
Bill Nellddefc772018-02-02 00:33:433781 if (!Section || !Section->isAllocatable() || !Section->isFinalized())
Maksim Panchenko55fc5412016-09-28 02:09:383782 continue;
Maksim Panchenko40c2e0f2021-12-15 00:52:513783 NextAvailableAddress =
3784 alignTo(NextAvailableAddress, Section->getAlignment());
Amir Ayupov1c5d3a02020-12-02 00:29:393785 LLVM_DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x"
3786 << Twine::utohexstr(Section->getAllocAddress())
3787 << ") to 0x" << Twine::utohexstr(NextAvailableAddress)
3788 << ":0x"
3789 << Twine::utohexstr(NextAvailableAddress +
3790 Section->getOutputSize())
3791 << '\n');
Maksim Panchenko55fc5412016-09-28 02:09:383792
Amir Ayupov1c5d3a02020-12-02 00:29:393793 RTDyld.reassignSectionAddress(Section->getSectionID(),
3794 NextAvailableAddress);
Maksim Panchenko163adbe2019-03-15 01:51:053795 Section->setOutputAddress(NextAvailableAddress);
Maksim Panchenko0ce0bce2020-06-15 07:15:473796 Section->setOutputFileOffset(getFileOffsetForAddress(NextAvailableAddress));
Maksim Panchenko55fc5412016-09-28 02:09:383797
Bill Nellddefc772018-02-02 00:33:433798 NextAvailableAddress += Section->getOutputSize();
Maksim Panchenko55fc5412016-09-28 02:09:383799 }
3800
3801 // Handling for sections with relocations.
Amir Ayupovc7306cc2021-04-08 07:19:263802 for (BinarySection &Section : BC->sections()) {
Maksim Panchenko0ce0bce2020-06-15 07:15:473803 if (!Section.hasSectionRef())
Bill Nell2640b402018-01-23 23:10:243804 continue;
3805
3806 StringRef SectionName = Section.getName();
Amir Ayupovc7306cc2021-04-08 07:19:263807 ErrorOr<BinarySection &> OrgSection =
3808 BC->getUniqueSectionByName((getOrgSecPrefix() + SectionName).str());
Bill Nellddefc772018-02-02 00:33:433809 if (!OrgSection ||
3810 !OrgSection->isAllocatable() ||
Maksim Panchenko0ce0bce2020-06-15 07:15:473811 !OrgSection->isFinalized() ||
3812 !OrgSection->hasValidSectionID())
Maksim Panchenko55fc5412016-09-28 02:09:383813 continue;
Maksim Panchenko55fc5412016-09-28 02:09:383814
Maksim Panchenko163adbe2019-03-15 01:51:053815 if (OrgSection->getOutputAddress()) {
Amir Ayupov1c5d3a02020-12-02 00:29:393816 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: section " << SectionName
3817 << " is already mapped at 0x"
3818 << Twine::utohexstr(OrgSection->getOutputAddress())
3819 << '\n');
Maksim Panchenko55fc5412016-09-28 02:09:383820 continue;
3821 }
Amir Ayupov1c5d3a02020-12-02 00:29:393822 LLVM_DEBUG(
3823 dbgs() << "BOLT: mapping original section " << SectionName << " (0x"
3824 << Twine::utohexstr(OrgSection->getAllocAddress()) << ") to 0x"
3825 << Twine::utohexstr(Section.getAddress()) << '\n');
Maksim Panchenko55fc5412016-09-28 02:09:383826
Amir Ayupov1c5d3a02020-12-02 00:29:393827 RTDyld.reassignSectionAddress(OrgSection->getSectionID(),
3828 Section.getAddress());
Maksim Panchenko55fc5412016-09-28 02:09:383829
Maksim Panchenko163adbe2019-03-15 01:51:053830 OrgSection->setOutputAddress(Section.getAddress());
Maksim Panchenko0ce0bce2020-06-15 07:15:473831 OrgSection->setOutputFileOffset(Section.getContents().data() -
3832 InputFile->getData().data());
Maksim Panchenko55fc5412016-09-28 02:09:383833 }
Maksim Panchenko55fc5412016-09-28 02:09:383834}
3835
Amir Ayupov1c5d3a02020-12-02 00:29:393836void RewriteInstance::mapExtraSections(RuntimeDyld &RTDyld) {
Amir Ayupovc7306cc2021-04-08 07:19:263837 for (BinarySection &Section : BC->allocatableSections()) {
Rafael Auler62aa74f2019-07-24 21:03:433838 if (Section.getOutputAddress() || !Section.hasValidSectionID())
3839 continue;
3840 NextAvailableAddress =
3841 alignTo(NextAvailableAddress, Section.getAlignment());
3842 Section.setOutputAddress(NextAvailableAddress);
3843 NextAvailableAddress += Section.getOutputSize();
3844
Amir Ayupov1c5d3a02020-12-02 00:29:393845 LLVM_DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName()
3846 << " at 0x" << Twine::utohexstr(Section.getAllocAddress())
3847 << " to 0x"
3848 << Twine::utohexstr(Section.getOutputAddress()) << '\n');
Rafael Auler62aa74f2019-07-24 21:03:433849
Amir Ayupov1c5d3a02020-12-02 00:29:393850 RTDyld.reassignSectionAddress(Section.getSectionID(),
3851 Section.getOutputAddress());
Maksim Panchenko0ce0bce2020-06-15 07:15:473852 Section.setOutputFileOffset(
Amir Ayupov1c5d3a02020-12-02 00:29:393853 getFileOffsetForAddress(Section.getOutputAddress()));
Rafael Auler62aa74f2019-07-24 21:03:433854 }
3855}
3856
Maksim Panchenko3f42fdf2017-05-09 05:51:363857void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:333858 for (BinaryFunction *Function : BC->getAllBinaryFunctions())
Maksim Panchenko0465d952020-10-09 23:06:273859 Function->updateOutputValues(Layout);
Maksim Panchenko3f42fdf2017-05-09 05:51:363860}
3861
Maksim Panchenkod68b1c7b2016-03-03 18:13:113862void RewriteInstance::patchELFPHDRTable() {
3863 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
Maksim Panchenko50c895a2016-02-08 18:02:483864 if (!ELF64LEFile) {
3865 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
3866 exit(1);
Rafael Aulerc67a7532015-11-24 01:54:183867 }
Amir Ayupovc7306cc2021-04-08 07:19:263868 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
3869 raw_fd_ostream &OS = Out->os();
Maksim Panchenko7f7d4af2016-02-13 03:01:533870
3871 // Write/re-write program headers.
Amir Ayupov1c5d3a02020-12-02 00:29:393872 Phnum = Obj.getHeader().e_phnum;
Maksim Panchenko7f7d4af2016-02-13 03:01:533873 if (PHDRTableOffset) {
3874 // Writing new pheader table.
3875 Phnum += 1; // only adding one new segment
3876 // Segment size includes the size of the PHDR area.
3877 NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress;
3878 } else {
3879 assert(!PHDRTableAddress && "unexpected address for program header table");
Maksim Panchenko7f7d4af2016-02-13 03:01:533880 // Update existing table.
Amir Ayupov1c5d3a02020-12-02 00:29:393881 PHDRTableOffset = Obj.getHeader().e_phoff;
Maksim Panchenko7f7d4af2016-02-13 03:01:533882 NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
3883 }
Maksim Panchenko50c895a2016-02-08 18:02:483884 OS.seek(PHDRTableOffset);
3885
Maksim Panchenko7f7d4af2016-02-13 03:01:533886 bool ModdedGnuStack = false;
Maksim Panchenko24285672017-05-25 17:29:383887 (void)ModdedGnuStack;
Maksim Panchenko7f7d4af2016-02-13 03:01:533888 bool AddedSegment = false;
Maksim Panchenko24285672017-05-25 17:29:383889 (void)AddedSegment;
Maksim Panchenko50c895a2016-02-08 18:02:483890
Maksim Panchenko250ca402020-06-26 23:52:073891 auto createNewTextPhdr = [&]() {
Amir Ayupovc7306cc2021-04-08 07:19:263892 ELF64LEPhdrTy NewPhdr;
Maksim Panchenko250ca402020-06-26 23:52:073893 NewPhdr.p_type = ELF::PT_LOAD;
3894 if (PHDRTableAddress) {
3895 NewPhdr.p_offset = PHDRTableOffset;
3896 NewPhdr.p_vaddr = PHDRTableAddress;
3897 NewPhdr.p_paddr = PHDRTableAddress;
3898 } else {
3899 NewPhdr.p_offset = NewTextSegmentOffset;
3900 NewPhdr.p_vaddr = NewTextSegmentAddress;
3901 NewPhdr.p_paddr = NewTextSegmentAddress;
3902 }
3903 NewPhdr.p_filesz = NewTextSegmentSize;
3904 NewPhdr.p_memsz = NewTextSegmentSize;
3905 NewPhdr.p_flags = ELF::PF_X | ELF::PF_R;
3906 // FIXME: Currently instrumentation is experimental and the runtime data
3907 // is emitted with code, thus everything needs to be writable
3908 if (opts::Instrument)
3909 NewPhdr.p_flags |= ELF::PF_W;
3910 NewPhdr.p_align = BC->PageAlign;
3911
3912 return NewPhdr;
3913 };
3914
Maksim Panchenko50c895a2016-02-08 18:02:483915 // Copy existing program headers with modifications.
Amir Ayupovc7306cc2021-04-08 07:19:263916 for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) {
3917 ELF64LE::Phdr NewPhdr = Phdr;
Maksim Panchenko7f7d4af2016-02-13 03:01:533918 if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) {
Maksim Panchenko50c895a2016-02-08 18:02:483919 NewPhdr.p_offset = PHDRTableOffset;
3920 NewPhdr.p_vaddr = PHDRTableAddress;
3921 NewPhdr.p_paddr = PHDRTableAddress;
3922 NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum;
3923 NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum;
Maksim Panchenko50c895a2016-02-08 18:02:483924 } else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) {
Amir Ayupovc7306cc2021-04-08 07:19:263925 ErrorOr<BinarySection &> EHFrameHdrSec =
3926 BC->getUniqueSectionByName(".eh_frame_hdr");
Maksim Panchenko40c2e0f2021-12-15 00:52:513927 if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() &&
Bill Nellddefc772018-02-02 00:33:433928 EHFrameHdrSec->isFinalized()) {
Maksim Panchenko0ce0bce2020-06-15 07:15:473929 NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset();
Maksim Panchenko163adbe2019-03-15 01:51:053930 NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress();
3931 NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress();
Bill Nellddefc772018-02-02 00:33:433932 NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize();
3933 NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize();
Bill Nell674dbcc2016-07-12 23:43:533934 }
Maksim Panchenko7f7d4af2016-02-13 03:01:533935 } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) {
Maksim Panchenko250ca402020-06-26 23:52:073936 NewPhdr = createNewTextPhdr();
Maksim Panchenko7f7d4af2016-02-13 03:01:533937 ModdedGnuStack = true;
3938 } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) {
Maksim Panchenko250ca402020-06-26 23:52:073939 // Insert the new header before DYNAMIC.
Amir Ayupovc7306cc2021-04-08 07:19:263940 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr();
Maksim Panchenko7f7d4af2016-02-13 03:01:533941 OS.write(reinterpret_cast<const char *>(&NewTextPhdr),
3942 sizeof(NewTextPhdr));
3943 AddedSegment = true;
Maksim Panchenko50c895a2016-02-08 18:02:483944 }
Maksim Panchenko7f7d4af2016-02-13 03:01:533945 OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr));
Maksim Panchenko50c895a2016-02-08 18:02:483946 }
3947
Maksim Panchenko250ca402020-06-26 23:52:073948 if (!opts::UseGnuStack && !AddedSegment) {
3949 // Append the new header to the end of the table.
Amir Ayupovc7306cc2021-04-08 07:19:263950 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr();
Maksim Panchenko40c2e0f2021-12-15 00:52:513951 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr));
Maksim Panchenko250ca402020-06-26 23:52:073952 }
3953
Maksim Panchenko7f7d4af2016-02-13 03:01:533954 assert((!opts::UseGnuStack || ModdedGnuStack) &&
3955 "could not find GNU_STACK program header to modify");
Maksim Panchenkod68b1c7b2016-03-03 18:13:113956}
Maksim Panchenko50c895a2016-02-08 18:02:483957
Maksim Panchenko44648612016-09-16 22:54:323958namespace {
Maksim Panchenko075f0762017-04-06 17:49:593959
3960/// Write padding to \p OS such that its current \p Offset becomes aligned
3961/// at \p Alignment. Return new (aligned) offset.
Maksim Panchenko40c2e0f2021-12-15 00:52:513962uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset,
Maksim Panchenko075f0762017-04-06 17:49:593963 uint64_t Alignment) {
Maksim Panchenko69b58632017-05-17 00:29:313964 if (!Alignment)
3965 return Offset;
3966
Amir Ayupovc7306cc2021-04-08 07:19:263967 const uint64_t PaddingSize =
3968 offsetToAlignment(Offset, llvm::Align(Alignment));
Maksim Panchenko075f0762017-04-06 17:49:593969 for (unsigned I = 0; I < PaddingSize; ++I)
Maksim Panchenko44648612016-09-16 22:54:323970 OS.write((unsigned char)0);
Maksim Panchenko075f0762017-04-06 17:49:593971 return Offset + PaddingSize;
Maksim Panchenko44648612016-09-16 22:54:323972}
Maksim Panchenko075f0762017-04-06 17:49:593973
Maksim Panchenko44648612016-09-16 22:54:323974}
3975
Maksim Panchenkod68b1c7b2016-03-03 18:13:113976void RewriteInstance::rewriteNoteSections() {
3977 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
3978 if (!ELF64LEFile) {
3979 errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
3980 exit(1);
3981 }
Amir Ayupovc7306cc2021-04-08 07:19:263982 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
3983 raw_fd_ostream &OS = Out->os();
Maksim Panchenkod68b1c7b2016-03-03 18:13:113984
Maksim Panchenko6ff17952017-01-17 23:49:593985 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
Maksim Panchenko7f7d4af2016-02-13 03:01:533986 assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
3987 "next available offset calculation failure");
Maksim Panchenko7f7d4af2016-02-13 03:01:533988 OS.seek(NextAvailableOffset);
Maksim Panchenko7f7d4af2016-02-13 03:01:533989
Maksim Panchenkod68b1c7b2016-03-03 18:13:113990 // Copy over non-allocatable section contents and update file offsets.
Amir Ayupovc7306cc2021-04-08 07:19:263991 for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) {
Maksim Panchenkod68b1c7b2016-03-03 18:13:113992 if (Section.sh_type == ELF::SHT_NULL)
3993 continue;
3994 if (Section.sh_flags & ELF::SHF_ALLOC)
3995 continue;
Maksim Panchenko7f7d4af2016-02-13 03:01:533996
Maksim Panchenko1ed3ac12019-10-29 21:49:493997 StringRef SectionName =
Amir Ayupov1c5d3a02020-12-02 00:29:393998 cantFail(Obj.getSectionName(Section), "cannot get section name");
Amir Ayupovc7306cc2021-04-08 07:19:263999 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName);
Maksim Panchenko1ed3ac12019-10-29 21:49:494000
4001 if (shouldStrip(Section, SectionName))
Maksim Panchenko55fc5412016-09-28 02:09:384002 continue;
4003
Maksim Panchenkod68b1c7b2016-03-03 18:13:114004 // Insert padding as needed.
Maksim Panchenko075f0762017-04-06 17:49:594005 NextAvailableOffset =
Maksim Panchenko40c2e0f2021-12-15 00:52:514006 appendPadding(OS, NextAvailableOffset, Section.sh_addralign);
Maksim Panchenkod68b1c7b2016-03-03 18:13:114007
Maksim Panchenkof047b9d2016-05-17 00:02:174008 // New section size.
Gabriel Poesia80ea31b2016-03-11 19:30:304009 uint64_t Size = 0;
Amir Ayupov12e9fec2021-04-01 18:43:004010 bool DataWritten = false;
4011 uint8_t *SectionData = nullptr;
Maksim Panchenkoa7fb6102016-11-11 22:33:344012 // Copy over section contents unless it's one of the sections we overwrite.
Rafael Auler8a5a3012018-02-06 23:00:234013 if (!willOverwriteSection(SectionName)) {
Gabriel Poesia80ea31b2016-03-11 19:30:304014 Size = Section.sh_size;
Alexander Yermolovich1c2f4bb2021-11-16 01:19:244015 StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size);
4016 std::string Data;
4017 if (BSec && BSec->getPatcher()) {
4018 Data = BSec->getPatcher()->patchBinary(Dataref);
4019 Dataref = StringRef(Data);
4020 }
Maksim Panchenko075f0762017-04-06 17:49:594021
Amir Ayupov12e9fec2021-04-01 18:43:004022 // Section was expanded, so need to treat it as overwrite.
Alexander Yermolovich1c2f4bb2021-11-16 01:19:244023 if (Size != Dataref.size()) {
4024 BSec = BC->registerOrUpdateNoteSection(
4025 SectionName, copyByteArray(Dataref), Dataref.size());
Amir Ayupov12e9fec2021-04-01 18:43:004026 Size = 0;
4027 } else {
Alexander Yermolovich1c2f4bb2021-11-16 01:19:244028 OS << Dataref;
Amir Ayupov12e9fec2021-04-01 18:43:004029 DataWritten = true;
4030
4031 // Add padding as the section extension might rely on the alignment.
4032 Size = appendPadding(OS, Size, Section.sh_addralign);
4033 }
Gabriel Poesia80ea31b2016-03-11 19:30:304034 }
Maksim Panchenkod68b1c7b2016-03-03 18:13:114035
Maksim Panchenkof2df1a82016-03-10 00:06:414036 // Perform section post-processing.
Bill Nellddefc772018-02-02 00:33:434037 if (BSec && !BSec->isAllocatable()) {
4038 assert(BSec->getAlignment() <= Section.sh_addralign &&
Maksim Panchenkod68b1c7b2016-03-03 18:13:114039 "alignment exceeds value in file");
Maksim Panchenkof2df1a82016-03-10 00:06:414040
Bill Nellddefc772018-02-02 00:33:434041 if (BSec->getAllocAddress()) {
Amir Ayupov12e9fec2021-04-01 18:43:004042 assert(!DataWritten && "Writing section twice.");
Amir Ayupovc907d6e2022-05-17 21:30:004043 (void)DataWritten;
Bill Nellddefc772018-02-02 00:33:434044 SectionData = BSec->getOutputData();
Amir Ayupov12e9fec2021-04-01 18:43:004045
Amir Ayupov1c5d3a02020-12-02 00:29:394046 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing")
4047 << " contents to section " << SectionName << '\n');
Maksim Panchenko40c2e0f2021-12-15 00:52:514048 OS.write(reinterpret_cast<char *>(SectionData), BSec->getOutputSize());
Bill Nellddefc772018-02-02 00:33:434049 Size += BSec->getOutputSize();
Maksim Panchenkof2df1a82016-03-10 00:06:414050 }
4051
Maksim Panchenko0ce0bce2020-06-15 07:15:474052 BSec->setOutputFileOffset(NextAvailableOffset);
4053 BSec->flushPendingRelocations(OS,
4054 [this] (const MCSymbol *S) {
4055 return getNewValueForSymbol(S->getName());
4056 });
Maksim Panchenkod68b1c7b2016-03-03 18:13:114057 }
4058
4059 // Set/modify section info.
Amir Ayupovc7306cc2021-04-08 07:19:264060 BinarySection &NewSection =
Bill Nellddefc772018-02-02 00:33:434061 BC->registerOrUpdateNoteSection(SectionName,
4062 SectionData,
4063 Size,
4064 Section.sh_addralign,
4065 BSec ? BSec->isReadOnly() : false,
4066 BSec ? BSec->getELFType()
Maksim Panchenko97112862020-02-18 17:20:174067 : ELF::SHT_PROGBITS);
Maksim Panchenko163adbe2019-03-15 01:51:054068 NewSection.setOutputAddress(0);
Maksim Panchenko0ce0bce2020-06-15 07:15:474069 NewSection.setOutputFileOffset(NextAvailableOffset);
Maksim Panchenkod68b1c7b2016-03-03 18:13:114070
4071 NextAvailableOffset += Size;
4072 }
Maksim Panchenko69b58632017-05-17 00:29:314073
4074 // Write new note sections.
Amir Ayupovc7306cc2021-04-08 07:19:264075 for (BinarySection &Section : BC->nonAllocatableSections()) {
Maksim Panchenko0ce0bce2020-06-15 07:15:474076 if (Section.getOutputFileOffset() || !Section.getAllocAddress())
Maksim Panchenko69b58632017-05-17 00:29:314077 continue;
4078
Bill Nellddefc772018-02-02 00:33:434079 assert(!Section.hasPendingRelocations() && "cannot have pending relocs");
Maksim Panchenko69b58632017-05-17 00:29:314080
Maksim Panchenko40c2e0f2021-12-15 00:52:514081 NextAvailableOffset =
4082 appendPadding(OS, NextAvailableOffset, Section.getAlignment());
Maksim Panchenko0ce0bce2020-06-15 07:15:474083 Section.setOutputFileOffset(NextAvailableOffset);
Maksim Panchenko69b58632017-05-17 00:29:314084
Amir Ayupov1c5d3a02020-12-02 00:29:394085 LLVM_DEBUG(
4086 dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName()
4087 << " of size " << Section.getOutputSize() << " at offset 0x"
4088 << Twine::utohexstr(Section.getOutputFileOffset()) << '\n');
Maksim Panchenko69b58632017-05-17 00:29:314089
Bill Nellddefc772018-02-02 00:33:434090 OS.write(Section.getOutputContents().data(), Section.getOutputSize());
4091 NextAvailableOffset += Section.getOutputSize();
Maksim Panchenko69b58632017-05-17 00:29:314092 }
Maksim Panchenkod68b1c7b2016-03-03 18:13:114093}
4094
Maksim Panchenkoe2128052017-02-07 20:20:464095template <typename ELFT>
Maksim Panchenko69b58632017-05-17 00:29:314096void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) {
Amir Ayupovc7306cc2021-04-08 07:19:264097 using ELFShdrTy = typename ELFT::Shdr;
4098 const ELFFile<ELFT> &Obj = File->getELFFile();
Maksim Panchenkoe2128052017-02-07 20:20:464099
4100 // Pre-populate section header string table.
Amir Ayupovc7306cc2021-04-08 07:19:264101 for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
Rafael Auler8a5a3012018-02-06 23:00:234102 StringRef SectionName =
Amir Ayupov1c5d3a02020-12-02 00:29:394103 cantFail(Obj.getSectionName(Section), "cannot get section name");
Rafael Auler8a5a3012018-02-06 23:00:234104 SHStrTab.add(SectionName);
Amir Ayupov1c5d3a02020-12-02 00:29:394105 std::string OutputSectionName = getOutputSectionName(Obj, Section);
Maksim Panchenkoee0e9cc2021-12-23 20:38:334106 if (OutputSectionName != SectionName)
Amir Ayupov1c5d3a02020-12-02 00:29:394107 SHStrTabPool.emplace_back(std::move(OutputSectionName));
Maksim Panchenkoe2128052017-02-07 20:20:464108 }
Maksim Panchenkoee0e9cc2021-12-23 20:38:334109 for (const std::string &Str : SHStrTabPool)
Amir Ayupov1c5d3a02020-12-02 00:29:394110 SHStrTab.add(Str);
Maksim Panchenkoee0e9cc2021-12-23 20:38:334111 for (const BinarySection &Section : BC->sections())
Bill Nell0e4d86b2017-11-15 04:05:114112 SHStrTab.add(Section.getName());
Rafael Auler8a5a3012018-02-06 23:00:234113 SHStrTab.finalize();
Maksim Panchenkoe2128052017-02-07 20:20:464114
Amir Ayupovc7306cc2021-04-08 07:19:264115 const size_t SHStrTabSize = SHStrTab.getSize();
Maksim Panchenko69b58632017-05-17 00:29:314116 uint8_t *DataCopy = new uint8_t[SHStrTabSize];
Rafael Auler8a5a3012018-02-06 23:00:234117 memset(DataCopy, 0, SHStrTabSize);
4118 SHStrTab.write(DataCopy);
Bill Nellddefc772018-02-02 00:33:434119 BC->registerOrUpdateNoteSection(".shstrtab",
4120 DataCopy,
4121 SHStrTabSize,
4122 /*Alignment=*/1,
4123 /*IsReadOnly=*/true,
4124 ELF::SHT_STRTAB);
Maksim Panchenkoe2128052017-02-07 20:20:464125}
4126
Rafael Auler9c4fcaf2018-08-09 00:55:244127void RewriteInstance::addBoltInfoSection() {
4128 std::string DescStr;
4129 raw_string_ostream DescOS(DescStr);
4130
4131 DescOS << "BOLT revision: " << BoltRevision << ", "
4132 << "command line:";
Maksim Panchenkoee0e9cc2021-12-23 20:38:334133 for (int I = 0; I < Argc; ++I)
Rafael Auler9c4fcaf2018-08-09 00:55:244134 DescOS << " " << Argv[I];
Rafael Auler9c4fcaf2018-08-09 00:55:244135 DescOS.flush();
4136
Rafael Auler821480d2019-08-02 18:20:134137 // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n'
Amir Ayupovc7306cc2021-04-08 07:19:264138 const std::string BoltInfo =
Rafael Auler821480d2019-08-02 18:20:134139 BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/);
Rafael Auler9c4fcaf2018-08-09 00:55:244140 BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo),
4141 BoltInfo.size(),
4142 /*Alignment=*/1,
4143 /*IsReadOnly=*/true, ELF::SHT_NOTE);
Bill Nell5cd58962017-05-24 21:14:164144}
4145
Rafael Auler21f43032019-04-13 00:33:464146void RewriteInstance::addBATSection() {
4147 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr,
4148 0,
4149 /*Alignment=*/1,
4150 /*IsReadOnly=*/true, ELF::SHT_NOTE);
4151}
4152
4153void RewriteInstance::encodeBATSection() {
4154 std::string DescStr;
4155 raw_string_ostream DescOS(DescStr);
4156
4157 BAT->write(DescOS);
4158 DescOS.flush();
4159
Amir Ayupovc7306cc2021-04-08 07:19:264160 const std::string BoltInfo =
Rafael Auler821480d2019-08-02 18:20:134161 BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT);
Rafael Auler21f43032019-04-13 00:33:464162 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME,
4163 copyByteArray(BoltInfo), BoltInfo.size(),
4164 /*Alignment=*/1,
4165 /*IsReadOnly=*/true, ELF::SHT_NOTE);
4166}
4167
Maksim Panchenko40c2e0f2021-12-15 00:52:514168template <typename ELFObjType, typename ELFShdrTy>
Amir Ayupov1c5d3a02020-12-02 00:29:394169std::string RewriteInstance::getOutputSectionName(const ELFObjType &Obj,
Maksim Panchenko163adbe2019-03-15 01:51:054170 const ELFShdrTy &Section) {
4171 if (Section.sh_type == ELF::SHT_NULL)
4172 return "";
4173
4174 StringRef SectionName =
Amir Ayupov1c5d3a02020-12-02 00:29:394175 cantFail(Obj.getSectionName(Section), "cannot get section name");
Maksim Panchenko163adbe2019-03-15 01:51:054176
Maksim Panchenko2b152332019-04-26 22:30:124177 if ((Section.sh_flags & ELF::SHF_ALLOC) && willOverwriteSection(SectionName))
Maksim Panchenkoa07f1a22020-03-11 22:51:324178 return (getOrgSecPrefix() + SectionName).str();
Maksim Panchenko163adbe2019-03-15 01:51:054179
Amir Ayupov1c5d3a02020-12-02 00:29:394180 return std::string(SectionName);
Maksim Panchenko163adbe2019-03-15 01:51:054181}
4182
Maksim Panchenko1ed3ac12019-10-29 21:49:494183template <typename ELFShdrTy>
4184bool RewriteInstance::shouldStrip(const ELFShdrTy &Section,
4185 StringRef SectionName) {
4186 // Strip non-allocatable relocation sections.
4187 if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA)
4188 return true;
4189
4190 // Strip debug sections if not updating them.
4191 if (isDebugSection(SectionName) && !opts::UpdateDebugSections)
4192 return true;
4193
Vladislav Khmelevsky95ee1292021-10-16 14:02:454194 // Strip symtab section if needed
4195 if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB)
4196 return true;
4197
Maksim Panchenko1ed3ac12019-10-29 21:49:494198 return false;
4199}
4200
Amir Ayupov29fe14c2022-01-14 20:57:374201template <typename ELFT>
4202std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr>
Maksim Panchenko40c2e0f2021-12-15 00:52:514203RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
4204 std::vector<uint32_t> &NewSectionIndex) {
Amir Ayupov29fe14c2022-01-14 20:57:374205 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
Amir Ayupovc7306cc2021-04-08 07:19:264206 const ELFFile<ELFT> &Obj = File->getELFFile();
4207 typename ELFT::ShdrRange Sections = cantFail(Obj.sections());
Rafael Auler4e29afe2017-06-27 23:25:594208
Maksim Panchenko163adbe2019-03-15 01:51:054209 // Keep track of section header entries together with their name.
4210 std::vector<std::pair<std::string, ELFShdrTy>> OutputSections;
4211 auto addSection = [&](const std::string &Name, const ELFShdrTy &Section) {
Amir Ayupovc7306cc2021-04-08 07:19:264212 ELFShdrTy NewSection = Section;
Maksim Panchenko163adbe2019-03-15 01:51:054213 NewSection.sh_name = SHStrTab.getOffset(Name);
Amir Ayupov9a884542021-05-08 01:43:254214 OutputSections.emplace_back(Name, std::move(NewSection));
Maksim Panchenko163adbe2019-03-15 01:51:054215 };
Rafael Auler4e29afe2017-06-27 23:25:594216
Maksim Panchenko163adbe2019-03-15 01:51:054217 // Copy over entries for original allocatable sections using modified name.
Amir Ayupovc7306cc2021-04-08 07:19:264218 for (const ELFShdrTy &Section : Sections) {
Rafael Auler4e29afe2017-06-27 23:25:594219 // Always ignore this section.
4220 if (Section.sh_type == ELF::SHT_NULL) {
Amir Ayupov9a884542021-05-08 01:43:254221 OutputSections.emplace_back("", Section);
Rafael Auler4e29afe2017-06-27 23:25:594222 continue;
4223 }
4224
Rafael Auler4e29afe2017-06-27 23:25:594225 if (!(Section.sh_flags & ELF::SHF_ALLOC))
4226 continue;
4227
Maksim Panchenko163adbe2019-03-15 01:51:054228 addSection(getOutputSectionName(Obj, Section), Section);
Rafael Auler4e29afe2017-06-27 23:25:594229 }
4230
Amir Ayupovc7306cc2021-04-08 07:19:264231 for (const BinarySection &Section : BC->allocatableSections()) {
Bill Nell0e4d86b2017-11-15 04:05:114232 if (!Section.isFinalized())
Bill Nellddefc772018-02-02 00:33:434233 continue;
4234
Maksim Panchenkoa07f1a22020-03-11 22:51:324235 if (Section.getName().startswith(getOrgSecPrefix()) ||
Maksim Panchenko1f3e3512020-03-06 23:06:374236 Section.isAnonymous()) {
Rafael Auler4e29afe2017-06-27 23:25:594237 if (opts::Verbosity)
Maksim Panchenko297d1a42019-03-27 20:58:314238 outs() << "BOLT-INFO: not writing section header for section "
Maksim Panchenko163adbe2019-03-15 01:51:054239 << Section.getName() << '\n';
Rafael Auler4e29afe2017-06-27 23:25:594240 continue;
4241 }
4242
Rafael Auler4e29afe2017-06-27 23:25:594243 if (opts::Verbosity >= 1)
Maksim Panchenko40c2e0f2021-12-15 00:52:514244 outs() << "BOLT-INFO: writing section header for " << Section.getName()
4245 << '\n';
Rafael Auler4e29afe2017-06-27 23:25:594246 ELFShdrTy NewSection;
Rafael Auler4e29afe2017-06-27 23:25:594247 NewSection.sh_type = ELF::SHT_PROGBITS;
Maksim Panchenko163adbe2019-03-15 01:51:054248 NewSection.sh_addr = Section.getOutputAddress();
Maksim Panchenko0ce0bce2020-06-15 07:15:474249 NewSection.sh_offset = Section.getOutputFileOffset();
Maksim Panchenko163adbe2019-03-15 01:51:054250 NewSection.sh_size = Section.getOutputSize();
Rafael Auler4e29afe2017-06-27 23:25:594251 NewSection.sh_entsize = 0;
Maksim Panchenko163adbe2019-03-15 01:51:054252 NewSection.sh_flags = Section.getELFFlags();
Rafael Auler4e29afe2017-06-27 23:25:594253 NewSection.sh_link = 0;
4254 NewSection.sh_info = 0;
Maksim Panchenko163adbe2019-03-15 01:51:054255 NewSection.sh_addralign = Section.getAlignment();
Amir Ayupov1c5d3a02020-12-02 00:29:394256 addSection(std::string(Section.getName()), NewSection);
Maksim Panchenko163adbe2019-03-15 01:51:054257 }
4258
4259 // Sort all allocatable sections by their offset.
Amir Ayupovd2c87692022-06-24 05:15:474260 llvm::stable_sort(OutputSections,
4261 [](const std::pair<std::string, ELFShdrTy> &A,
4262 const std::pair<std::string, ELFShdrTy> &B) {
4263 return A.second.sh_offset < B.second.sh_offset;
4264 });
Maksim Panchenko163adbe2019-03-15 01:51:054265
4266 // Fix section sizes to prevent overlapping.
Maksim Panchenko225a8d72021-03-05 00:31:124267 ELFShdrTy *PrevSection = nullptr;
4268 StringRef PrevSectionName;
4269 for (auto &SectionKV : OutputSections) {
4270 ELFShdrTy &Section = SectionKV.second;
Maksim Panchenko163adbe2019-03-15 01:51:054271
Maksim Panchenko225a8d72021-03-05 00:31:124272 // TBSS section does not take file or memory space. Ignore it for layout
4273 // purposes.
4274 if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS))
Maksim Panchenko163adbe2019-03-15 01:51:054275 continue;
4276
Maksim Panchenko225a8d72021-03-05 00:31:124277 if (PrevSection &&
4278 PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:334279 if (opts::Verbosity > 1)
Maksim Panchenko225a8d72021-03-05 00:31:124280 outs() << "BOLT-INFO: adjusting size for section " << PrevSectionName
4281 << '\n';
Maksim Panchenko40c2e0f2021-12-15 00:52:514282 PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr
4283 ? Section.sh_addr - PrevSection->sh_addr
4284 : 0;
Maksim Panchenko163adbe2019-03-15 01:51:054285 }
Maksim Panchenko225a8d72021-03-05 00:31:124286
4287 PrevSection = &Section;
4288 PrevSectionName = SectionKV.first;
Rafael Auler4e29afe2017-06-27 23:25:594289 }
4290
4291 uint64_t LastFileOffset = 0;
4292
4293 // Copy over entries for non-allocatable sections performing necessary
4294 // adjustments.
Amir Ayupovc7306cc2021-04-08 07:19:264295 for (const ELFShdrTy &Section : Sections) {
Rafael Auler4e29afe2017-06-27 23:25:594296 if (Section.sh_type == ELF::SHT_NULL)
4297 continue;
4298 if (Section.sh_flags & ELF::SHF_ALLOC)
4299 continue;
Rafael Auler4e29afe2017-06-27 23:25:594300
Bill Nell729da2d2018-04-21 03:03:314301 StringRef SectionName =
Amir Ayupov1c5d3a02020-12-02 00:29:394302 cantFail(Obj.getSectionName(Section), "cannot get section name");
Bill Nell729da2d2018-04-21 03:03:314303
Maksim Panchenko1ed3ac12019-10-29 21:49:494304 if (shouldStrip(Section, SectionName))
Maksim Panchenko2b152332019-04-26 22:30:124305 continue;
4306
Amir Ayupovc7306cc2021-04-08 07:19:264307 ErrorOr<BinarySection &> BSec = BC->getUniqueSectionByName(SectionName);
Bill Nellddefc772018-02-02 00:33:434308 assert(BSec && "missing section info for non-allocatable section");
Rafael Auler4e29afe2017-06-27 23:25:594309
Amir Ayupovc7306cc2021-04-08 07:19:264310 ELFShdrTy NewSection = Section;
Maksim Panchenko0ce0bce2020-06-15 07:15:474311 NewSection.sh_offset = BSec->getOutputFileOffset();
Bill Nellddefc772018-02-02 00:33:434312 NewSection.sh_size = BSec->getOutputSize();
Rafael Auler4e29afe2017-06-27 23:25:594313
Maksim Panchenkoee0e9cc2021-12-23 20:38:334314 if (NewSection.sh_type == ELF::SHT_SYMTAB)
Maksim Panchenko30fd9602018-10-23 01:48:124315 NewSection.sh_info = NumLocalSymbols;
Maksim Panchenko30fd9602018-10-23 01:48:124316
Amir Ayupov1c5d3a02020-12-02 00:29:394317 addSection(std::string(SectionName), NewSection);
Rafael Auler4e29afe2017-06-27 23:25:594318
Maksim Panchenko0ce0bce2020-06-15 07:15:474319 LastFileOffset = BSec->getOutputFileOffset();
Rafael Auler4e29afe2017-06-27 23:25:594320 }
4321
Rafael Auler4e29afe2017-06-27 23:25:594322 // Create entries for new non-allocatable sections.
Amir Ayupovc7306cc2021-04-08 07:19:264323 for (BinarySection &Section : BC->nonAllocatableSections()) {
Maksim Panchenko0ce0bce2020-06-15 07:15:474324 if (Section.getOutputFileOffset() <= LastFileOffset)
Rafael Auler4e29afe2017-06-27 23:25:594325 continue;
4326
Maksim Panchenkoee0e9cc2021-12-23 20:38:334327 if (opts::Verbosity >= 1)
Maksim Panchenko40c2e0f2021-12-15 00:52:514328 outs() << "BOLT-INFO: writing section header for " << Section.getName()
4329 << '\n';
Maksim Panchenkoee0e9cc2021-12-23 20:38:334330
Rafael Auler4e29afe2017-06-27 23:25:594331 ELFShdrTy NewSection;
Bill Nellddefc772018-02-02 00:33:434332 NewSection.sh_type = Section.getELFType();
Rafael Auler4e29afe2017-06-27 23:25:594333 NewSection.sh_addr = 0;
Maksim Panchenko0ce0bce2020-06-15 07:15:474334 NewSection.sh_offset = Section.getOutputFileOffset();
Bill Nellddefc772018-02-02 00:33:434335 NewSection.sh_size = Section.getOutputSize();
Rafael Auler4e29afe2017-06-27 23:25:594336 NewSection.sh_entsize = 0;
Bill Nellddefc772018-02-02 00:33:434337 NewSection.sh_flags = Section.getELFFlags();
Rafael Auler4e29afe2017-06-27 23:25:594338 NewSection.sh_link = 0;
4339 NewSection.sh_info = 0;
Bill Nellddefc772018-02-02 00:33:434340 NewSection.sh_addralign = Section.getAlignment();
Maksim Panchenko163adbe2019-03-15 01:51:054341
Amir Ayupov1c5d3a02020-12-02 00:29:394342 addSection(std::string(Section.getName()), NewSection);
Rafael Auler4e29afe2017-06-27 23:25:594343 }
4344
Maksim Panchenko163adbe2019-03-15 01:51:054345 // Assign indices to sections.
4346 std::unordered_map<std::string, uint64_t> NameToIndex;
4347 for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) {
Amir Ayupovc7306cc2021-04-08 07:19:264348 const std::string &SectionName = OutputSections[Index].first;
Maksim Panchenko163adbe2019-03-15 01:51:054349 NameToIndex[SectionName] = Index;
Amir Ayupovc7306cc2021-04-08 07:19:264350 if (ErrorOr<BinarySection &> Section =
4351 BC->getUniqueSectionByName(SectionName))
Maksim Panchenko163adbe2019-03-15 01:51:054352 Section->setIndex(Index);
4353 }
4354
4355 // Update section index mapping
4356 NewSectionIndex.clear();
4357 NewSectionIndex.resize(Sections.size(), 0);
Amir Ayupovc7306cc2021-04-08 07:19:264358 for (const ELFShdrTy &Section : Sections) {
Maksim Panchenko163adbe2019-03-15 01:51:054359 if (Section.sh_type == ELF::SHT_NULL)
4360 continue;
4361
Amir Ayupovc7306cc2021-04-08 07:19:264362 size_t OrgIndex = std::distance(Sections.begin(), &Section);
4363 std::string SectionName = getOutputSectionName(Obj, Section);
Maksim Panchenko163adbe2019-03-15 01:51:054364
4365 // Some sections are stripped
4366 if (!NameToIndex.count(SectionName))
4367 continue;
4368
4369 NewSectionIndex[OrgIndex] = NameToIndex[SectionName];
4370 }
4371
4372 std::vector<ELFShdrTy> SectionsOnly(OutputSections.size());
Amir Ayupovd2c87692022-06-24 05:15:474373 llvm::transform(OutputSections, SectionsOnly.begin(),
4374 [](std::pair<std::string, ELFShdrTy> &SectionInfo) {
4375 return SectionInfo.second;
4376 });
Maksim Panchenko163adbe2019-03-15 01:51:054377
4378 return SectionsOnly;
Rafael Auler4e29afe2017-06-27 23:25:594379}
4380
Maksim Panchenkod68b1c7b2016-03-03 18:13:114381// Rewrite section header table inserting new entries as needed. The sections
4382// header table size itself may affect the offsets of other sections,
4383// so we are placing it at the end of the binary.
4384//
4385// As we rewrite entries we need to track how many sections were inserted
Maksim Panchenko69b58632017-05-17 00:29:314386// as it changes the sh_link value. We map old indices to new ones for
4387// existing sections.
Maksim Panchenko44648612016-09-16 22:54:324388template <typename ELFT>
4389void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
Maksim Panchenkobbbf6792020-02-27 04:43:184390 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
Amir Ayupovc7306cc2021-04-08 07:19:264391 using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr;
4392 raw_fd_ostream &OS = Out->os();
4393 const ELFFile<ELFT> &Obj = File->getELFFile();
Maksim Panchenkof9436bc2017-06-08 03:06:294394
Maksim Panchenko163adbe2019-03-15 01:51:054395 std::vector<uint32_t> NewSectionIndex;
Amir Ayupovc7306cc2021-04-08 07:19:264396 std::vector<ELFShdrTy> OutputSections =
4397 getOutputSections(File, NewSectionIndex);
Amir Ayupov1c5d3a02020-12-02 00:29:394398 LLVM_DEBUG(
Maksim Panchenkof9436bc2017-06-08 03:06:294399 dbgs() << "BOLT-DEBUG: old to new section index mapping:\n";
Maksim Panchenkoee0e9cc2021-12-23 20:38:334400 for (uint64_t I = 0; I < NewSectionIndex.size(); ++I)
Maksim Panchenkof9436bc2017-06-08 03:06:294401 dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n';
Maksim Panchenkof9436bc2017-06-08 03:06:294402 );
4403
4404 // Align starting address for section header table.
Amir Ayupovc7306cc2021-04-08 07:19:264405 uint64_t SHTOffset = OS.tell();
Maksim Panchenkobbbf6792020-02-27 04:43:184406 SHTOffset = appendPadding(OS, SHTOffset, sizeof(ELFShdrTy));
Maksim Panchenkof9436bc2017-06-08 03:06:294407
4408 // Write all section header entries while patching section references.
Amir Ayupovc7306cc2021-04-08 07:19:264409 for (ELFShdrTy &Section : OutputSections) {
Maksim Panchenkof9436bc2017-06-08 03:06:294410 Section.sh_link = NewSectionIndex[Section.sh_link];
4411 if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) {
4412 if (Section.sh_info)
4413 Section.sh_info = NewSectionIndex[Section.sh_info];
4414 }
4415 OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section));
Maksim Panchenko69b58632017-05-17 00:29:314416 }
Maksim Panchenko88a46102017-02-22 19:29:524417
Maksim Panchenko7f7d4af2016-02-13 03:01:534418 // Fix ELF header.
Amir Ayupovc7306cc2021-04-08 07:19:264419 ELFEhdrTy NewEhdr = Obj.getHeader();
Maksim Panchenko3f42fdf2017-05-09 05:51:364420
Maksim Panchenkob6f7c682017-12-10 05:40:394421 if (BC->HasRelocations) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:334422 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
Xun Li00892a5fd2020-05-21 21:28:474423 NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress();
Maksim Panchenkoee0e9cc2021-12-23 20:38:334424 else
Xun Li00892a5fd2020-05-21 21:28:474425 NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry);
Amir Ayupov1c5d3a02020-12-02 00:29:394426 assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) &&
Maksim Panchenko4aaa8892020-06-23 19:22:584427 "cannot find new address for entry point");
Maksim Panchenko3f42fdf2017-05-09 05:51:364428 }
Maksim Panchenko7f7d4af2016-02-13 03:01:534429 NewEhdr.e_phoff = PHDRTableOffset;
4430 NewEhdr.e_phnum = Phnum;
Maksim Panchenkod68b1c7b2016-03-03 18:13:114431 NewEhdr.e_shoff = SHTOffset;
Rafael Auler4e29afe2017-06-27 23:25:594432 NewEhdr.e_shnum = OutputSections.size();
Maksim Panchenkof9436bc2017-06-08 03:06:294433 NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx];
Maksim Panchenko7f7d4af2016-02-13 03:01:534434 OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0);
Maksim Panchenko55fc5412016-09-28 02:09:384435}
4436
Amir Ayupov29fe14c2022-01-14 20:57:374437template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy>
Maksim Panchenkobbbf6792020-02-27 04:43:184438void RewriteInstance::updateELFSymbolTable(
Amir Ayupov29fe14c2022-01-14 20:57:374439 ELFObjectFile<ELFT> *File, bool IsDynSym,
4440 const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection,
4441 const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write,
Maksim Panchenkobbbf6792020-02-27 04:43:184442 StrTabFuncTy AddToStrTab) {
Amir Ayupovc7306cc2021-04-08 07:19:264443 const ELFFile<ELFT> &Obj = File->getELFFile();
Maksim Panchenko40c2e0f2021-12-15 00:52:514444 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym;
Maksim Panchenkobbbf6792020-02-27 04:43:184445
Amir Ayupovc7306cc2021-04-08 07:19:264446 StringRef StringSection =
4447 cantFail(Obj.getStringTableForSymtab(SymTabSection));
Maksim Panchenkobbbf6792020-02-27 04:43:184448
4449 unsigned NumHotTextSymsUpdated = 0;
4450 unsigned NumHotDataSymsUpdated = 0;
4451
4452 std::map<const BinaryFunction *, uint64_t> IslandSizes;
4453 auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) {
4454 auto Itr = IslandSizes.find(&BF);
4455 if (Itr != IslandSizes.end())
4456 return Itr->second;
4457 return IslandSizes[&BF] = BF.estimateConstantIslandSize();
4458 };
4459
4460 // Symbols for the new symbol table.
4461 std::vector<ELFSymTy> Symbols;
4462
Maksim Panchenko6b185cc2020-10-22 23:35:294463 auto getNewSectionIndex = [&](uint32_t OldIndex) {
4464 assert(OldIndex < NewSectionIndex.size() && "section index out of bounds");
4465 const uint32_t NewIndex = NewSectionIndex[OldIndex];
4466
4467 // We may have stripped the section that dynsym was referencing due to
4468 // the linker bug. In that case return the old index avoiding marking
4469 // the symbol as undefined.
4470 if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF)
4471 return OldIndex;
4472 return NewIndex;
4473 };
4474
Maksim Panchenko0ce0bce2020-06-15 07:15:474475 // Add extra symbols for the function.
Maksim Panchenkoffaba222020-06-24 19:36:154476 //
4477 // Note that addExtraSymbols() could be called multiple times for the same
4478 // function with different FunctionSymbol matching the main function entry
4479 // point.
Maksim Panchenkobbbf6792020-02-27 04:43:184480 auto addExtraSymbols = [&](const BinaryFunction &Function,
4481 const ELFSymTy &FunctionSymbol) {
Maksim Panchenkoabda7dc2020-04-05 03:12:384482 if (Function.isFolded()) {
Amir Ayupovc7306cc2021-04-08 07:19:264483 BinaryFunction *ICFParent = Function.getFoldedIntoFunction();
Maksim Panchenkoabda7dc2020-04-05 03:12:384484 while (ICFParent->isFolded())
4485 ICFParent = ICFParent->getFoldedIntoFunction();
Amir Ayupovc7306cc2021-04-08 07:19:264486 ELFSymTy ICFSymbol = FunctionSymbol;
Maksim Panchenkoabda7dc2020-04-05 03:12:384487 SmallVector<char, 256> Buf;
4488 ICFSymbol.st_name =
Maksim Panchenko40c2e0f2021-12-15 00:52:514489 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection)))
4490 .concat(".icf.0")
4491 .toStringRef(Buf));
Maksim Panchenkoabda7dc2020-04-05 03:12:384492 ICFSymbol.st_value = ICFParent->getOutputAddress();
4493 ICFSymbol.st_size = ICFParent->getOutputSize();
Maksim Panchenko2d524fd2020-06-10 02:12:064494 ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex();
Maksim Panchenkoabda7dc2020-04-05 03:12:384495 Symbols.emplace_back(ICFSymbol);
4496 }
4497 if (Function.isSplit() && Function.cold().getAddress()) {
Amir Ayupovc7306cc2021-04-08 07:19:264498 ELFSymTy NewColdSym = FunctionSymbol;
Maksim Panchenkobbbf6792020-02-27 04:43:184499 SmallVector<char, 256> Buf;
4500 NewColdSym.st_name =
Maksim Panchenko40c2e0f2021-12-15 00:52:514501 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection)))
4502 .concat(".cold.0")
4503 .toStringRef(Buf));
Maksim Panchenkobbbf6792020-02-27 04:43:184504 NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex();
4505 NewColdSym.st_value = Function.cold().getAddress();
4506 NewColdSym.st_size = Function.cold().getImageSize();
4507 NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
4508 Symbols.emplace_back(NewColdSym);
4509 }
4510 if (Function.hasConstantIsland()) {
Amir Ayupovc7306cc2021-04-08 07:19:264511 uint64_t DataMark = Function.getOutputDataAddress();
4512 uint64_t CISize = getConstantIslandSize(Function);
4513 uint64_t CodeMark = DataMark + CISize;
4514 ELFSymTy DataMarkSym = FunctionSymbol;
Maksim Panchenkobbbf6792020-02-27 04:43:184515 DataMarkSym.st_name = AddToStrTab("$d");
4516 DataMarkSym.st_value = DataMark;
4517 DataMarkSym.st_size = 0;
4518 DataMarkSym.setType(ELF::STT_NOTYPE);
4519 DataMarkSym.setBinding(ELF::STB_LOCAL);
Amir Ayupovc7306cc2021-04-08 07:19:264520 ELFSymTy CodeMarkSym = DataMarkSym;
Maksim Panchenkobbbf6792020-02-27 04:43:184521 CodeMarkSym.st_name = AddToStrTab("$x");
4522 CodeMarkSym.st_value = CodeMark;
4523 Symbols.emplace_back(DataMarkSym);
4524 Symbols.emplace_back(CodeMarkSym);
4525 }
4526 if (Function.hasConstantIsland() && Function.isSplit()) {
Amir Ayupovc7306cc2021-04-08 07:19:264527 uint64_t DataMark = Function.getOutputColdDataAddress();
4528 uint64_t CISize = getConstantIslandSize(Function);
4529 uint64_t CodeMark = DataMark + CISize;
4530 ELFSymTy DataMarkSym = FunctionSymbol;
Maksim Panchenkobbbf6792020-02-27 04:43:184531 DataMarkSym.st_name = AddToStrTab("$d");
4532 DataMarkSym.st_value = DataMark;
4533 DataMarkSym.st_size = 0;
4534 DataMarkSym.setType(ELF::STT_NOTYPE);
4535 DataMarkSym.setBinding(ELF::STB_LOCAL);
Amir Ayupovc7306cc2021-04-08 07:19:264536 ELFSymTy CodeMarkSym = DataMarkSym;
Maksim Panchenkobbbf6792020-02-27 04:43:184537 CodeMarkSym.st_name = AddToStrTab("$x");
4538 CodeMarkSym.st_value = CodeMark;
4539 Symbols.emplace_back(DataMarkSym);
4540 Symbols.emplace_back(CodeMarkSym);
4541 }
4542 };
4543
4544 // For regular (non-dynamic) symbol table, exclude symbols referring
4545 // to non-allocatable sections.
4546 auto shouldStrip = [&](const ELFSymTy &Symbol) {
4547 if (Symbol.isAbsolute() || !Symbol.isDefined())
4548 return false;
4549
4550 // If we cannot link the symbol to a section, leave it as is.
Amir Ayupovc7306cc2021-04-08 07:19:264551 Expected<const typename ELFT::Shdr *> Section =
4552 Obj.getSection(Symbol.st_shndx);
Maksim Panchenkobbbf6792020-02-27 04:43:184553 if (!Section)
4554 return false;
4555
4556 // Remove the section symbol iif the corresponding section was stripped.
4557 if (Symbol.getType() == ELF::STT_SECTION) {
Maksim Panchenko6b185cc2020-10-22 23:35:294558 if (!getNewSectionIndex(Symbol.st_shndx))
Maksim Panchenkobbbf6792020-02-27 04:43:184559 return true;
4560 return false;
4561 }
4562
4563 // Symbols in non-allocatable sections are typically remnants of relocations
4564 // emitted under "-emit-relocs" linker option. Delete those as we delete
4565 // relocations against non-allocatable sections.
4566 if (!((*Section)->sh_flags & ELF::SHF_ALLOC))
4567 return true;
4568
4569 return false;
4570 };
4571
Amir Ayupov1c5d3a02020-12-02 00:29:394572 for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) {
Maksim Panchenkobbbf6792020-02-27 04:43:184573 // For regular (non-dynamic) symbol table strip unneeded symbols.
Maksim Panchenko6b185cc2020-10-22 23:35:294574 if (!IsDynSym && shouldStrip(Symbol))
Maksim Panchenkobbbf6792020-02-27 04:43:184575 continue;
4576
Amir Ayupovc7306cc2021-04-08 07:19:264577 const BinaryFunction *Function =
4578 BC->getBinaryFunctionAtAddress(Symbol.st_value);
Maksim Panchenkobbbf6792020-02-27 04:43:184579 // Ignore false function references, e.g. when the section address matches
4580 // the address of the function.
4581 if (Function && Symbol.getType() == ELF::STT_SECTION)
4582 Function = nullptr;
4583
4584 // For non-dynamic symtab, make sure the symbol section matches that of
4585 // the function. It can mismatch e.g. if the symbol is a section marker
4586 // in which case we treat the symbol separately from the function.
4587 // For dynamic symbol table, the section index could be wrong on the input,
4588 // and its value is ignored by the runtime if it's different from
4589 // SHN_UNDEF and SHN_ABS.
Maksim Panchenko6b185cc2020-10-22 23:35:294590 if (!IsDynSym && Function &&
Maksim Panchenko0465d952020-10-09 23:06:274591 Symbol.st_shndx !=
Maksim Panchenko40c2e0f2021-12-15 00:52:514592 Function->getOriginSection()->getSectionRef().getIndex())
Maksim Panchenkobbbf6792020-02-27 04:43:184593 Function = nullptr;
4594
4595 // Create a new symbol based on the existing symbol.
Amir Ayupovc7306cc2021-04-08 07:19:264596 ELFSymTy NewSymbol = Symbol;
Maksim Panchenkobbbf6792020-02-27 04:43:184597
Maksim Panchenko10245b52020-04-16 07:05:014598 if (Function) {
Maksim Panchenko2d524fd2020-06-10 02:12:064599 // If the symbol matched a function that was not emitted, update the
4600 // corresponding section index but otherwise leave it unchanged.
Maksim Panchenko10245b52020-04-16 07:05:014601 if (Function->isEmitted()) {
4602 NewSymbol.st_value = Function->getOutputAddress();
4603 NewSymbol.st_size = Function->getOutputSize();
4604 NewSymbol.st_shndx = Function->getCodeSection()->getIndex();
Maksim Panchenko2d524fd2020-06-10 02:12:064605 } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) {
Maksim Panchenko6b185cc2020-10-22 23:35:294606 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
Maksim Panchenko10245b52020-04-16 07:05:014607 }
Maksim Panchenkobbbf6792020-02-27 04:43:184608
4609 // Add new symbols to the symbol table if necessary.
Maksim Panchenko6b185cc2020-10-22 23:35:294610 if (!IsDynSym)
Maksim Panchenkobbbf6792020-02-27 04:43:184611 addExtraSymbols(*Function, NewSymbol);
Maksim Panchenko10245b52020-04-16 07:05:014612 } else {
Maksim Panchenkobbbf6792020-02-27 04:43:184613 // Check if the function symbol matches address inside a function, i.e.
4614 // it marks a secondary entry point.
Maksim Panchenko40c2e0f2021-12-15 00:52:514615 Function =
4616 (Symbol.getType() == ELF::STT_FUNC)
4617 ? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
4618 /*CheckPastEnd=*/false,
4619 /*UseMaxSize=*/true)
4620 : nullptr;
Maksim Panchenkobbbf6792020-02-27 04:43:184621
4622 if (Function && Function->isEmitted()) {
Amir Ayupovc7306cc2021-04-08 07:19:264623 const uint64_t OutputAddress =
4624 Function->translateInputToOutputAddress(Symbol.st_value);
Maksim Panchenkobbbf6792020-02-27 04:43:184625
4626 NewSymbol.st_value = OutputAddress;
4627 // Force secondary entry points to have zero size.
4628 NewSymbol.st_size = 0;
Maksim Panchenko40c2e0f2021-12-15 00:52:514629 NewSymbol.st_shndx =
4630 OutputAddress >= Function->cold().getAddress() &&
4631 OutputAddress < Function->cold().getImageSize()
4632 ? Function->getColdCodeSection()->getIndex()
4633 : Function->getCodeSection()->getIndex();
Maksim Panchenkobbbf6792020-02-27 04:43:184634 } else {
4635 // Check if the symbol belongs to moved data object and update it.
4636 BinaryData *BD = opts::ReorderData.empty()
Maksim Panchenko40c2e0f2021-12-15 00:52:514637 ? nullptr
4638 : BC->getBinaryDataAtAddress(Symbol.st_value);
Maksim Panchenkobbbf6792020-02-27 04:43:184639 if (BD && BD->isMoved() && !BD->isJumpTable()) {
4640 assert((!BD->getSize() || !Symbol.st_size ||
4641 Symbol.st_size == BD->getSize()) &&
4642 "sizes must match");
4643
Amir Ayupovc7306cc2021-04-08 07:19:264644 BinarySection &OutputSection = BD->getOutputSection();
Maksim Panchenkobbbf6792020-02-27 04:43:184645 assert(OutputSection.getIndex());
Amir Ayupov1c5d3a02020-12-02 00:29:394646 LLVM_DEBUG(dbgs()
4647 << "BOLT-DEBUG: moving " << BD->getName() << " from "
4648 << *BC->getSectionNameForAddress(Symbol.st_value) << " ("
4649 << Symbol.st_shndx << ") to " << OutputSection.getName()
4650 << " (" << OutputSection.getIndex() << ")\n");
Maksim Panchenkobbbf6792020-02-27 04:43:184651 NewSymbol.st_shndx = OutputSection.getIndex();
4652 NewSymbol.st_value = BD->getOutputAddress();
4653 } else {
4654 // Otherwise just update the section for the symbol.
Maksim Panchenkoee0e9cc2021-12-23 20:38:334655 if (Symbol.st_shndx < ELF::SHN_LORESERVE)
Maksim Panchenko6b185cc2020-10-22 23:35:294656 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
Maksim Panchenkobbbf6792020-02-27 04:43:184657 }
4658
4659 // Detect local syms in the text section that we didn't update
4660 // and that were preserved by the linker to support relocations against
4661 // .text. Remove them from the symtab.
4662 if (Symbol.getType() == ELF::STT_NOTYPE &&
Maksim Panchenko40c2e0f2021-12-15 00:52:514663 Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) {
Maksim Panchenkobbbf6792020-02-27 04:43:184664 if (BC->getBinaryFunctionContainingAddress(Symbol.st_value,
4665 /*CheckPastEnd=*/false,
Maksim Panchenkoa82cff02020-09-14 22:48:324666 /*UseMaxSize=*/true)) {
Maksim Panchenkobbbf6792020-02-27 04:43:184667 // Can only delete the symbol if not patching. Such symbols should
4668 // not exist in the dynamic symbol table.
Maksim Panchenko6b185cc2020-10-22 23:35:294669 assert(!IsDynSym && "cannot delete symbol");
Maksim Panchenkobbbf6792020-02-27 04:43:184670 continue;
4671 }
4672 }
4673 }
4674 }
4675
4676 // Handle special symbols based on their name.
Amir Ayupovc7306cc2021-04-08 07:19:264677 Expected<StringRef> SymbolName = Symbol.getName(StringSection);
Maksim Panchenkobbbf6792020-02-27 04:43:184678 assert(SymbolName && "cannot get symbol name");
4679
4680 auto updateSymbolValue = [&](const StringRef Name, unsigned &IsUpdated) {
4681 NewSymbol.st_value = getNewValueForSymbol(Name);
4682 NewSymbol.st_shndx = ELF::SHN_ABS;
4683 outs() << "BOLT-INFO: setting " << Name << " to 0x"
4684 << Twine::utohexstr(NewSymbol.st_value) << '\n';
4685 ++IsUpdated;
Maksim Panchenkobbbf6792020-02-27 04:43:184686 };
4687
Maksim Panchenko40c2e0f2021-12-15 00:52:514688 if (opts::HotText &&
4689 (*SymbolName == "__hot_start" || *SymbolName == "__hot_end"))
Maksim Panchenkobbbf6792020-02-27 04:43:184690 updateSymbolValue(*SymbolName, NumHotTextSymsUpdated);
4691
Maksim Panchenko40c2e0f2021-12-15 00:52:514692 if (opts::HotData &&
4693 (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end"))
Maksim Panchenkobbbf6792020-02-27 04:43:184694 updateSymbolValue(*SymbolName, NumHotDataSymsUpdated);
4695
Maksim Panchenkodb4642d2020-06-18 18:10:414696 if (*SymbolName == "_end") {
4697 unsigned Ignored;
4698 updateSymbolValue(*SymbolName, Ignored);
Maksim Panchenkobbbf6792020-02-27 04:43:184699 }
4700
Maksim Panchenkoee0e9cc2021-12-23 20:38:334701 if (IsDynSym)
Amir Ayupov1c5d3a02020-12-02 00:29:394702 Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) *
Maksim Panchenkobbbf6792020-02-27 04:43:184703 sizeof(ELFSymTy),
4704 NewSymbol);
Maksim Panchenkoee0e9cc2021-12-23 20:38:334705 else
Maksim Panchenkobbbf6792020-02-27 04:43:184706 Symbols.emplace_back(NewSymbol);
Maksim Panchenkobbbf6792020-02-27 04:43:184707 }
4708
Maksim Panchenko6b185cc2020-10-22 23:35:294709 if (IsDynSym) {
Maksim Panchenkobbbf6792020-02-27 04:43:184710 assert(Symbols.empty());
4711 return;
4712 }
4713
4714 // Add symbols of injected functions
4715 for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) {
4716 ELFSymTy NewSymbol;
Maksim Panchenko0465d952020-10-09 23:06:274717 BinarySection *OriginSection = Function->getOriginSection();
Maksim Panchenko40c2e0f2021-12-15 00:52:514718 NewSymbol.st_shndx =
4719 OriginSection
4720 ? getNewSectionIndex(OriginSection->getSectionRef().getIndex())
4721 : Function->getCodeSection()->getIndex();
Maksim Panchenkobbbf6792020-02-27 04:43:184722 NewSymbol.st_value = Function->getOutputAddress();
4723 NewSymbol.st_name = AddToStrTab(Function->getOneName());
4724 NewSymbol.st_size = Function->getOutputSize();
4725 NewSymbol.st_other = 0;
4726 NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
4727 Symbols.emplace_back(NewSymbol);
4728
4729 if (Function->isSplit()) {
Amir Ayupovc7306cc2021-04-08 07:19:264730 ELFSymTy NewColdSym = NewSymbol;
Maksim Panchenkobbbf6792020-02-27 04:43:184731 NewColdSym.setType(ELF::STT_NOTYPE);
4732 SmallVector<char, 256> Buf;
4733 NewColdSym.st_name = AddToStrTab(
Maksim Panchenko40c2e0f2021-12-15 00:52:514734 Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf));
Maksim Panchenkobbbf6792020-02-27 04:43:184735 NewColdSym.st_value = Function->cold().getAddress();
4736 NewColdSym.st_size = Function->cold().getImageSize();
4737 Symbols.emplace_back(NewColdSym);
4738 }
4739 }
4740
4741 assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) &&
4742 "either none or both __hot_start/__hot_end symbols were expected");
4743 assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) &&
4744 "either none or both __hot_data_start/__hot_data_end symbols were "
4745 "expected");
4746
4747 auto addSymbol = [&](const std::string &Name) {
4748 ELFSymTy Symbol;
4749 Symbol.st_value = getNewValueForSymbol(Name);
4750 Symbol.st_shndx = ELF::SHN_ABS;
4751 Symbol.st_name = AddToStrTab(Name);
4752 Symbol.st_size = 0;
4753 Symbol.st_other = 0;
4754 Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE);
4755
4756 outs() << "BOLT-INFO: setting " << Name << " to 0x"
4757 << Twine::utohexstr(Symbol.st_value) << '\n';
4758
4759 Symbols.emplace_back(Symbol);
4760 };
4761
4762 if (opts::HotText && !NumHotTextSymsUpdated) {
4763 addSymbol("__hot_start");
4764 addSymbol("__hot_end");
4765 }
4766
4767 if (opts::HotData && !NumHotDataSymsUpdated) {
4768 addSymbol("__hot_data_start");
4769 addSymbol("__hot_data_end");
4770 }
4771
4772 // Put local symbols at the beginning.
Amir Ayupovd2c87692022-06-24 05:15:474773 llvm::stable_sort(Symbols, [](const ELFSymTy &A, const ELFSymTy &B) {
4774 if (A.getBinding() == ELF::STB_LOCAL && B.getBinding() != ELF::STB_LOCAL)
4775 return true;
4776 return false;
4777 });
Maksim Panchenkobbbf6792020-02-27 04:43:184778
Maksim Panchenkoee0e9cc2021-12-23 20:38:334779 for (const ELFSymTy &Symbol : Symbols)
Maksim Panchenkobbbf6792020-02-27 04:43:184780 Write(0, Symbol);
Maksim Panchenkobbbf6792020-02-27 04:43:184781}
4782
Maksim Panchenko55fc5412016-09-28 02:09:384783template <typename ELFT>
4784void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) {
Amir Ayupovc7306cc2021-04-08 07:19:264785 const ELFFile<ELFT> &Obj = File->getELFFile();
Maksim Panchenkobbbf6792020-02-27 04:43:184786 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
Maksim Panchenko40c2e0f2021-12-15 00:52:514787 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym;
Maksim Panchenkobbbf6792020-02-27 04:43:184788
4789 // Compute a preview of how section indices will change after rewriting, so
4790 // we can properly update the symbol table based on new section indices.
4791 std::vector<uint32_t> NewSectionIndex;
4792 getOutputSections(File, NewSectionIndex);
4793
Rafael Auler4e29afe2017-06-27 23:25:594794 // Set pointer at the end of the output file, so we can pwrite old symbol
4795 // tables if we need to.
4796 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
4797 assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
4798 "next available offset calculation failure");
4799 Out->os().seek(NextAvailableOffset);
Maksim Panchenko55fc5412016-09-28 02:09:384800
Maksim Panchenko55fc5412016-09-28 02:09:384801 // Update dynamic symbol table.
Maksim Panchenkobbbf6792020-02-27 04:43:184802 const ELFShdrTy *DynSymSection = nullptr;
Amir Ayupovc7306cc2021-04-08 07:19:264803 for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
Maksim Panchenko55fc5412016-09-28 02:09:384804 if (Section.sh_type == ELF::SHT_DYNSYM) {
4805 DynSymSection = &Section;
4806 break;
4807 }
4808 }
Maksim Panchenko250ca402020-06-26 23:52:074809 assert((DynSymSection || BC->IsStaticExecutable) &&
4810 "dynamic symbol table expected");
4811 if (DynSymSection) {
4812 updateELFSymbolTable(
4813 File,
Maksim Panchenko6b185cc2020-10-22 23:35:294814 /*IsDynSym=*/true,
Maksim Panchenko250ca402020-06-26 23:52:074815 *DynSymSection,
4816 NewSectionIndex,
4817 [&](size_t Offset, const ELFSymTy &Sym) {
4818 Out->os().pwrite(reinterpret_cast<const char *>(&Sym),
4819 sizeof(ELFSymTy),
4820 DynSymSection->sh_offset + Offset);
4821 },
4822 [](StringRef) -> size_t { return 0; });
4823 }
Maksim Panchenko55fc5412016-09-28 02:09:384824
Vladislav Khmelevsky95ee1292021-10-16 14:02:454825 if (opts::RemoveSymtab)
4826 return;
4827
Rafael Auler4e29afe2017-06-27 23:25:594828 // (re)create regular symbol table.
Maksim Panchenkobbbf6792020-02-27 04:43:184829 const ELFShdrTy *SymTabSection = nullptr;
Amir Ayupovc7306cc2021-04-08 07:19:264830 for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
Maksim Panchenko55fc5412016-09-28 02:09:384831 if (Section.sh_type == ELF::SHT_SYMTAB) {
4832 SymTabSection = &Section;
4833 break;
4834 }
4835 }
4836 if (!SymTabSection) {
4837 errs() << "BOLT-WARNING: no symbol table found\n";
4838 return;
4839 }
Rafael Auler4e29afe2017-06-27 23:25:594840
Maksim Panchenkobbbf6792020-02-27 04:43:184841 const ELFShdrTy *StrTabSection =
Amir Ayupov1c5d3a02020-12-02 00:29:394842 cantFail(Obj.getSection(SymTabSection->sh_link));
Rafael Auler4e29afe2017-06-27 23:25:594843 std::string NewContents;
Amir Ayupov1c5d3a02020-12-02 00:29:394844 std::string NewStrTab = std::string(
4845 File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size));
Amir Ayupovc7306cc2021-04-08 07:19:264846 StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection));
4847 StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection));
Rafael Auler4e29afe2017-06-27 23:25:594848
Maksim Panchenko30fd9602018-10-23 01:48:124849 NumLocalSymbols = 0;
Maksim Panchenkobbbf6792020-02-27 04:43:184850 updateELFSymbolTable(
4851 File,
Maksim Panchenko6b185cc2020-10-22 23:35:294852 /*IsDynSym=*/false,
Maksim Panchenkobbbf6792020-02-27 04:43:184853 *SymTabSection,
4854 NewSectionIndex,
4855 [&](size_t Offset, const ELFSymTy &Sym) {
4856 if (Sym.getBinding() == ELF::STB_LOCAL)
4857 ++NumLocalSymbols;
4858 NewContents.append(reinterpret_cast<const char *>(&Sym),
4859 sizeof(ELFSymTy));
4860 },
4861 [&](StringRef Str) {
4862 size_t Idx = NewStrTab.size();
Maksim Panchenko0465d952020-10-09 23:06:274863 NewStrTab.append(NameResolver::restore(Str).str());
Maksim Panchenkobbbf6792020-02-27 04:43:184864 NewStrTab.append(1, '\0');
4865 return Idx;
4866 });
Rafael Auler4e29afe2017-06-27 23:25:594867
Bill Nellddefc772018-02-02 00:33:434868 BC->registerOrUpdateNoteSection(SecName,
4869 copyByteArray(NewContents),
4870 NewContents.size(),
4871 /*Alignment=*/1,
4872 /*IsReadOnly=*/true,
4873 ELF::SHT_SYMTAB);
4874
4875 BC->registerOrUpdateNoteSection(StrSecName,
4876 copyByteArray(NewStrTab),
4877 NewStrTab.size(),
4878 /*Alignment=*/1,
4879 /*IsReadOnly=*/true,
4880 ELF::SHT_STRTAB);
Maksim Panchenko55fc5412016-09-28 02:09:384881}
4882
4883template <typename ELFT>
Maksim Panchenko88bb1452018-08-16 23:53:144884void
4885RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) {
Amir Ayupovc7306cc2021-04-08 07:19:264886 using Elf_Rela = typename ELFT::Rela;
4887 raw_fd_ostream &OS = Out->os();
Vladislav Khmelevsky729d29e2022-02-16 15:13:444888 const ELFFile<ELFT> &EF = File->getELFFile();
Maksim Panchenko55fc5412016-09-28 02:09:384889
Vladislav Khmelevsky729d29e2022-02-16 15:13:444890 uint64_t RelDynOffset = 0, RelDynEndOffset = 0;
4891 uint64_t RelPltOffset = 0, RelPltEndOffset = 0;
4892
4893 auto setSectionFileOffsets = [&](uint64_t Address, uint64_t &Start,
4894 uint64_t &End) {
4895 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
4896 Start = Section->getInputFileOffset();
4897 End = Start + Section->getSize();
4898 };
4899
4900 if (!DynamicRelocationsAddress && !PLTRelocationsAddress)
4901 return;
4902
4903 if (DynamicRelocationsAddress)
4904 setSectionFileOffsets(*DynamicRelocationsAddress, RelDynOffset,
4905 RelDynEndOffset);
4906
4907 if (PLTRelocationsAddress)
4908 setSectionFileOffsets(*PLTRelocationsAddress, RelPltOffset,
4909 RelPltEndOffset);
4910
4911 DynamicRelativeRelocationsCount = 0;
4912
4913 auto writeRela = [&OS](const Elf_Rela *RelA, uint64_t &Offset) {
4914 OS.pwrite(reinterpret_cast<const char *>(RelA), sizeof(*RelA), Offset);
4915 Offset += sizeof(*RelA);
4916 };
4917
4918 auto writeRelocations = [&](bool PatchRelative) {
4919 for (BinarySection &Section : BC->allocatableSections()) {
4920 for (const Relocation &Rel : Section.dynamicRelocations()) {
4921 const bool IsRelative = Rel.isRelative();
4922 if (PatchRelative != IsRelative)
4923 continue;
4924
4925 if (IsRelative)
4926 ++DynamicRelativeRelocationsCount;
4927
4928 Elf_Rela NewRelA;
4929 uint64_t SectionAddress = Section.getOutputAddress();
4930 SectionAddress =
4931 SectionAddress == 0 ? Section.getAddress() : SectionAddress;
4932 MCSymbol *Symbol = Rel.Symbol;
4933 uint32_t SymbolIdx = 0;
4934 uint64_t Addend = Rel.Addend;
4935
4936 if (Rel.Symbol) {
4937 SymbolIdx = getOutputDynamicSymbolIndex(Symbol);
4938 } else {
4939 // Usually this case is used for R_*_(I)RELATIVE relocations
4940 const uint64_t Address = getNewFunctionOrDataAddress(Addend);
4941 if (Address)
4942 Addend = Address;
4943 }
4944
4945 NewRelA.setSymbolAndType(SymbolIdx, Rel.Type, EF.isMips64EL());
4946 NewRelA.r_offset = SectionAddress + Rel.Offset;
4947 NewRelA.r_addend = Addend;
4948
4949 const bool IsJmpRel =
4950 !!(IsJmpRelocation.find(Rel.Type) != IsJmpRelocation.end());
4951 uint64_t &Offset = IsJmpRel ? RelPltOffset : RelDynOffset;
4952 const uint64_t &EndOffset =
4953 IsJmpRel ? RelPltEndOffset : RelDynEndOffset;
4954 if (!Offset || !EndOffset) {
4955 errs() << "BOLT-ERROR: Invalid offsets for dynamic relocation\n";
4956 exit(1);
4957 }
4958
4959 if (Offset + sizeof(NewRelA) > EndOffset) {
4960 errs() << "BOLT-ERROR: Offset overflow for dynamic relocation\n";
4961 exit(1);
4962 }
4963
4964 writeRela(&NewRelA, Offset);
4965 }
Maksim Panchenko55fc5412016-09-28 02:09:384966 }
Vladislav Khmelevsky729d29e2022-02-16 15:13:444967 };
4968
4969 // The dynamic linker expects R_*_RELATIVE relocations to be emitted first
4970 writeRelocations(/* PatchRelative */ true);
4971 writeRelocations(/* PatchRelative */ false);
4972
4973 auto fillNone = [&](uint64_t &Offset, uint64_t EndOffset) {
4974 if (!Offset)
4975 return;
4976
4977 typename ELFObjectFile<ELFT>::Elf_Rela RelA;
4978 RelA.setSymbolAndType(0, Relocation::getNone(), EF.isMips64EL());
4979 RelA.r_offset = 0;
4980 RelA.r_addend = 0;
4981 while (Offset < EndOffset)
4982 writeRela(&RelA, Offset);
4983
4984 assert(Offset == EndOffset && "Unexpected section overflow");
4985 };
4986
4987 // Fill the rest of the sections with R_*_NONE relocations
4988 fillNone(RelDynOffset, RelDynEndOffset);
4989 fillNone(RelPltOffset, RelPltEndOffset);
Maksim Panchenko55fc5412016-09-28 02:09:384990}
4991
4992template <typename ELFT>
4993void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) {
Amir Ayupovc7306cc2021-04-08 07:19:264994 raw_fd_ostream &OS = Out->os();
Maksim Panchenko55fc5412016-09-28 02:09:384995
4996 SectionRef GOTSection;
Amir Ayupovc7306cc2021-04-08 07:19:264997 for (const SectionRef &Section : File->sections()) {
Amir Ayupov1c5d3a02020-12-02 00:29:394998 StringRef SectionName = cantFail(Section.getName());
Maksim Panchenko55fc5412016-09-28 02:09:384999 if (SectionName == ".got") {
5000 GOTSection = Section;
5001 break;
5002 }
5003 }
5004 if (!GOTSection.getObject()) {
Vladislav Khmelevsky729d29e2022-02-16 15:13:445005 if (!BC->IsStaticExecutable)
5006 errs() << "BOLT-INFO: no .got section found\n";
Maksim Panchenko55fc5412016-09-28 02:09:385007 return;
5008 }
5009
Amir Ayupov1c5d3a02020-12-02 00:29:395010 StringRef GOTContents = cantFail(GOTSection.getContents());
Maksim Panchenko55fc5412016-09-28 02:09:385011 for (const uint64_t *GOTEntry =
Maksim Panchenko40c2e0f2021-12-15 00:52:515012 reinterpret_cast<const uint64_t *>(GOTContents.data());
Maksim Panchenko55fc5412016-09-28 02:09:385013 GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() +
Maksim Panchenko40c2e0f2021-12-15 00:52:515014 GOTContents.size());
Maksim Panchenko55fc5412016-09-28 02:09:385015 ++GOTEntry) {
Amir Ayupovc7306cc2021-04-08 07:19:265016 if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) {
Amir Ayupov1c5d3a02020-12-02 00:29:395017 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x"
5018 << Twine::utohexstr(*GOTEntry) << " with 0x"
5019 << Twine::utohexstr(NewAddress) << '\n');
Maksim Panchenko55fc5412016-09-28 02:09:385020 OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress),
Maksim Panchenko40c2e0f2021-12-15 00:52:515021 reinterpret_cast<const char *>(GOTEntry) -
5022 File->getData().data());
Maksim Panchenko55fc5412016-09-28 02:09:385023 }
5024 }
5025}
5026
5027template <typename ELFT>
5028void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
Maksim Panchenko250ca402020-06-26 23:52:075029 if (BC->IsStaticExecutable)
5030 return;
5031
Amir Ayupovc7306cc2021-04-08 07:19:265032 const ELFFile<ELFT> &Obj = File->getELFFile();
5033 raw_fd_ostream &OS = Out->os();
Maksim Panchenko55fc5412016-09-28 02:09:385034
5035 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
Maksim Panchenko40c2e0f2021-12-15 00:52:515036 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
Maksim Panchenko55fc5412016-09-28 02:09:385037
5038 // Locate DYNAMIC by looking through program headers.
5039 uint64_t DynamicOffset = 0;
5040 const Elf_Phdr *DynamicPhdr = 0;
Amir Ayupovc7306cc2021-04-08 07:19:265041 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) {
Maksim Panchenko55fc5412016-09-28 02:09:385042 if (Phdr.p_type == ELF::PT_DYNAMIC) {
5043 DynamicOffset = Phdr.p_offset;
5044 DynamicPhdr = &Phdr;
5045 assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match");
5046 break;
5047 }
5048 }
5049 assert(DynamicPhdr && "missing dynamic in ELF binary");
5050
Maksim Panchenko49d1f562017-08-04 18:21:055051 bool ZNowSet = false;
5052
Maksim Panchenko55fc5412016-09-28 02:09:385053 // Go through all dynamic entries and patch functions addresses with
5054 // new ones.
Amir Ayupovc7306cc2021-04-08 07:19:265055 typename ELFT::DynRange DynamicEntries =
Amir Ayupov1c5d3a02020-12-02 00:29:395056 cantFail(Obj.dynamicEntries(), "error accessing dynamic table");
5057 auto DTB = DynamicEntries.begin();
5058 for (const Elf_Dyn &Dyn : DynamicEntries) {
Amir Ayupovc7306cc2021-04-08 07:19:265059 Elf_Dyn NewDE = Dyn;
Maksim Panchenko55fc5412016-09-28 02:09:385060 bool ShouldPatch = true;
Amir Ayupov1c5d3a02020-12-02 00:29:395061 switch (Dyn.d_tag) {
Maksim Panchenko55fc5412016-09-28 02:09:385062 default:
5063 ShouldPatch = false;
5064 break;
Vladislav Khmelevsky729d29e2022-02-16 15:13:445065 case ELF::DT_RELACOUNT:
5066 NewDE.d_un.d_val = DynamicRelativeRelocationsCount;
5067 break;
Maksim Panchenko55fc5412016-09-28 02:09:385068 case ELF::DT_INIT:
Maksim Panchenkodf288e82021-12-28 21:46:455069 case ELF::DT_FINI: {
Maksim Panchenkob6f7c682017-12-10 05:40:395070 if (BC->HasRelocations) {
Amir Ayupovc7306cc2021-04-08 07:19:265071 if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) {
Amir Ayupov1c5d3a02020-12-02 00:29:395072 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type "
5073 << Dyn.getTag() << '\n');
Maksim Panchenko49d1f562017-08-04 18:21:055074 NewDE.d_un.d_ptr = NewAddress;
5075 }
5076 }
Maksim Panchenkodf288e82021-12-28 21:46:455077 RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
5078 if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) {
5079 if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress())
5080 NewDE.d_un.d_ptr = Addr;
Xun Li00892a5fd2020-05-21 21:28:475081 }
Maksim Panchenkodf288e82021-12-28 21:46:455082 if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
5083 if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
5084 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
5085 << Twine::utohexstr(Addr) << '\n');
5086 NewDE.d_un.d_ptr = Addr;
Vasily Leonenkoad79d512021-06-18 20:08:355087 }
5088 }
Maksim Panchenko49d1f562017-08-04 18:21:055089 break;
Maksim Panchenkodf288e82021-12-28 21:46:455090 }
Maksim Panchenko49d1f562017-08-04 18:21:055091 case ELF::DT_FLAGS:
5092 if (BC->RequiresZNow) {
5093 NewDE.d_un.d_val |= ELF::DF_BIND_NOW;
5094 ZNowSet = true;
5095 }
5096 break;
5097 case ELF::DT_FLAGS_1:
5098 if (BC->RequiresZNow) {
5099 NewDE.d_un.d_val |= ELF::DF_1_NOW;
5100 ZNowSet = true;
Maksim Panchenko55fc5412016-09-28 02:09:385101 }
5102 break;
5103 }
Maksim Panchenkoee0e9cc2021-12-23 20:38:335104 if (ShouldPatch)
Maksim Panchenko55fc5412016-09-28 02:09:385105 OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE),
Amir Ayupov1c5d3a02020-12-02 00:29:395106 DynamicOffset + (&Dyn - DTB) * sizeof(Dyn));
Maksim Panchenko55fc5412016-09-28 02:09:385107 }
Maksim Panchenko49d1f562017-08-04 18:21:055108
5109 if (BC->RequiresZNow && !ZNowSet) {
5110 errs() << "BOLT-ERROR: output binary requires immediate relocation "
5111 "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in "
5112 ".dynamic. Please re-link the binary with -znow.\n";
5113 exit(1);
5114 }
Maksim Panchenko55fc5412016-09-28 02:09:385115}
5116
Rafael Auler16a497c2019-12-14 01:27:035117template <typename ELFT>
Amir Ayupov1e016c32022-03-08 17:17:415118Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
Amir Ayupovc7306cc2021-04-08 07:19:265119 const ELFFile<ELFT> &Obj = File->getELFFile();
Rafael Auler16a497c2019-12-14 01:27:035120
5121 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
Maksim Panchenko40c2e0f2021-12-15 00:52:515122 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
Rafael Auler16a497c2019-12-14 01:27:035123
Rafael Auler16a497c2019-12-14 01:27:035124 // Locate DYNAMIC by looking through program headers.
5125 const Elf_Phdr *DynamicPhdr = 0;
Amir Ayupovc7306cc2021-04-08 07:19:265126 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) {
Rafael Auler16a497c2019-12-14 01:27:035127 if (Phdr.p_type == ELF::PT_DYNAMIC) {
5128 DynamicPhdr = &Phdr;
Rafael Auler16a497c2019-12-14 01:27:035129 break;
5130 }
5131 }
Rafael Auler16a497c2019-12-14 01:27:035132
Maksim Panchenko250ca402020-06-26 23:52:075133 if (!DynamicPhdr) {
5134 outs() << "BOLT-INFO: static input executable detected\n";
Vasily Leonenko9b39a822021-06-20 17:59:385135 // TODO: static PIE executable might have dynamic header
Maksim Panchenko250ca402020-06-26 23:52:075136 BC->IsStaticExecutable = true;
Amir Ayupov1e016c32022-03-08 17:17:415137 return Error::success();
Maksim Panchenko74a27772020-03-09 02:04:395138 }
5139
Amir Ayupov1e016c32022-03-08 17:17:415140 if (DynamicPhdr->p_memsz != DynamicPhdr->p_filesz)
5141 return createStringError(errc::executable_format_error,
5142 "dynamic section sizes should match");
Maksim Panchenko74a27772020-03-09 02:04:395143
5144 // Go through all dynamic entries to locate entries of interest.
Amir Ayupovd16bbc52022-03-10 04:24:205145 auto DynamicEntriesOrErr = Obj.dynamicEntries();
5146 if (!DynamicEntriesOrErr)
5147 return DynamicEntriesOrErr.takeError();
5148 typename ELFT::DynRange DynamicEntries = DynamicEntriesOrErr.get();
Amir Ayupov1c5d3a02020-12-02 00:29:395149
5150 for (const Elf_Dyn &Dyn : DynamicEntries) {
5151 switch (Dyn.d_tag) {
Vasily Leonenkoad79d512021-06-18 20:08:355152 case ELF::DT_INIT:
5153 if (!BC->HasInterpHeader) {
5154 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
5155 BC->StartFunctionAddress = Dyn.getPtr();
5156 }
5157 break;
Maksim Panchenko4aaa8892020-06-23 19:22:585158 case ELF::DT_FINI:
Amir Ayupov1c5d3a02020-12-02 00:29:395159 BC->FiniFunctionAddress = Dyn.getPtr();
Maksim Panchenko4aaa8892020-06-23 19:22:585160 break;
5161 case ELF::DT_RELA:
Maksim Panchenko1de07462021-06-30 21:38:505162 DynamicRelocationsAddress = Dyn.getPtr();
Maksim Panchenko4aaa8892020-06-23 19:22:585163 break;
5164 case ELF::DT_RELASZ:
Maksim Panchenko1de07462021-06-30 21:38:505165 DynamicRelocationsSize = Dyn.getVal();
5166 break;
5167 case ELF::DT_JMPREL:
5168 PLTRelocationsAddress = Dyn.getPtr();
5169 break;
5170 case ELF::DT_PLTRELSZ:
5171 PLTRelocationsSize = Dyn.getVal();
Maksim Panchenko4aaa8892020-06-23 19:22:585172 break;
Vladislav Khmelevsky729d29e2022-02-16 15:13:445173 case ELF::DT_RELACOUNT:
5174 DynamicRelativeRelocationsCount = Dyn.getVal();
5175 break;
Maksim Panchenko4aaa8892020-06-23 19:22:585176 }
Rafael Auler16a497c2019-12-14 01:27:035177 }
Maksim Panchenko1de07462021-06-30 21:38:505178
Vladislav Khmelevsky729d29e2022-02-16 15:13:445179 if (!DynamicRelocationsAddress || !DynamicRelocationsSize) {
5180 DynamicRelocationsAddress.reset();
Maksim Panchenko1de07462021-06-30 21:38:505181 DynamicRelocationsSize = 0;
Vladislav Khmelevsky729d29e2022-02-16 15:13:445182 }
Maksim Panchenko1de07462021-06-30 21:38:505183
Vladislav Khmelevsky729d29e2022-02-16 15:13:445184 if (!PLTRelocationsAddress || !PLTRelocationsSize) {
5185 PLTRelocationsAddress.reset();
Maksim Panchenko1de07462021-06-30 21:38:505186 PLTRelocationsSize = 0;
Vladislav Khmelevsky729d29e2022-02-16 15:13:445187 }
Amir Ayupov1e016c32022-03-08 17:17:415188 return Error::success();
Rafael Auler16a497c2019-12-14 01:27:035189}
5190
Maksim Panchenko55fc5412016-09-28 02:09:385191uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
Amir Ayupovc7306cc2021-04-08 07:19:265192 const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress);
Maksim Panchenko55fc5412016-09-28 02:09:385193 if (!Function)
5194 return 0;
Maksim Panchenkoa82cff02020-09-14 22:48:325195
Maksim Panchenko3f42fdf2017-05-09 05:51:365196 return Function->getOutputAddress();
Rafael Aulerc67a7532015-11-24 01:54:185197}
5198
Vladislav Khmelevsky729d29e2022-02-16 15:13:445199uint64_t RewriteInstance::getNewFunctionOrDataAddress(uint64_t OldAddress) {
5200 if (uint64_t Function = getNewFunctionAddress(OldAddress))
5201 return Function;
5202
5203 const BinaryData *BD = BC->getBinaryDataAtAddress(OldAddress);
5204 if (BD && BD->isMoved())
5205 return BD->getOutputAddress();
5206
5207 return 0;
5208}
5209
Rafael Aulerc67a7532015-11-24 01:54:185210void RewriteInstance::rewriteFile() {
Maksim Panchenko87291712020-05-08 06:00:295211 std::error_code EC;
Amir Ayupov1c5d3a02020-12-02 00:29:395212 Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC,
Maksim Panchenko40c2e0f2021-12-15 00:52:515213 sys::fs::OF_None);
Maksim Panchenko87291712020-05-08 06:00:295214 check_error(EC, "cannot create output executable file");
5215
Amir Ayupovc7306cc2021-04-08 07:19:265216 raw_fd_ostream &OS = Out->os();
Maksim Panchenko55fc5412016-09-28 02:09:385217
Maksim Panchenko87291712020-05-08 06:00:295218 // Copy allocatable part of the input.
5219 OS << InputFile->getData().substr(0, FirstNonAllocatableOffset);
5220
Maksim Panchenko50c895a2016-02-08 18:02:485221 // We obtain an asm-specific writer so that we can emit nops in an
5222 // architecture-specific way at the end of the function.
Amir Ayupov12e9fec2021-04-01 18:43:005223 std::unique_ptr<MCAsmBackend> MAB(
5224 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
5225 auto Streamer = BC->createStreamer(OS);
Maksim Panchenko7f7d4af2016-02-13 03:01:535226 // Make sure output stream has enough reserved space, otherwise
5227 // pwrite() will fail.
Amir Ayupovc7306cc2021-04-08 07:19:265228 uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress));
Maksim Panchenko24285672017-05-25 17:29:385229 (void)Offset;
Maksim Panchenko6ff17952017-01-17 23:49:595230 assert(Offset == getFileOffsetForAddress(NextAvailableAddress) &&
Maksim Panchenko50c895a2016-02-08 18:02:485231 "error resizing output file");
Rafael Aulerc67a7532015-11-24 01:54:185232
Rafael Auler7b779f82021-09-10 23:19:505233 // Overwrite functions with fixed output address. This is mostly used by
5234 // non-relocation mode, with one exception: injected functions are covered
5235 // here in both modes.
Maksim Panchenko0465d952020-10-09 23:06:275236 uint64_t CountOverwrittenFunctions = 0;
5237 uint64_t OverwrittenScore = 0;
5238 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
Maksim Panchenko0465d952020-10-09 23:06:275239 if (Function->getImageAddress() == 0 || Function->getImageSize() == 0)
5240 continue;
Gabriel Poesia784f6a82016-04-06 02:35:455241
Maksim Panchenko0465d952020-10-09 23:06:275242 if (Function->getImageSize() > Function->getMaxSize()) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:335243 if (opts::Verbosity >= 1)
Maksim Panchenko0465d952020-10-09 23:06:275244 errs() << "BOLT-WARNING: new function size (0x"
5245 << Twine::utohexstr(Function->getImageSize())
5246 << ") is larger than maximum allowed size (0x"
Maksim Panchenko40c2e0f2021-12-15 00:52:515247 << Twine::utohexstr(Function->getMaxSize()) << ") for function "
5248 << *Function << '\n';
Maksim Panchenkoee0e9cc2021-12-23 20:38:335249
Rafael Auler7b779f82021-09-10 23:19:505250 // Remove jump table sections that this function owns in non-reloc mode
Maksim Panchenkoba1f5032021-09-29 18:40:165251 // because we don't want to write them anymore.
Rafael Auler7b779f82021-09-10 23:19:505252 if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) {
5253 for (auto &JTI : Function->JumpTables) {
5254 JumpTable *JT = JTI.second;
5255 BinarySection &Section = JT->getOutputSection();
5256 BC->deregisterSection(Section);
5257 }
5258 }
Maksim Panchenko0465d952020-10-09 23:06:275259 continue;
5260 }
Rafael Aulerc67a7532015-11-24 01:54:185261
Maksim Panchenko0465d952020-10-09 23:06:275262 if (Function->isSplit() && (Function->cold().getImageAddress() == 0 ||
5263 Function->cold().getImageSize() == 0))
5264 continue;
Maksim Panchenkoc4c518e2016-09-08 21:52:265265
Maksim Panchenko0465d952020-10-09 23:06:275266 OverwrittenScore += Function->getFunctionScore();
5267 // Overwrite function in the output file.
Maksim Panchenkoee0e9cc2021-12-23 20:38:335268 if (opts::Verbosity >= 2)
Maksim Panchenko0465d952020-10-09 23:06:275269 outs() << "BOLT: rewriting function \"" << *Function << "\"\n";
Maksim Panchenkoee0e9cc2021-12-23 20:38:335270
Maksim Panchenko0465d952020-10-09 23:06:275271 OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()),
Maksim Panchenko40c2e0f2021-12-15 00:52:515272 Function->getImageSize(), Function->getFileOffset());
Rafael Aulerc67a7532015-11-24 01:54:185273
Maksim Panchenko0465d952020-10-09 23:06:275274 // Write nops at the end of the function.
5275 if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) {
Amir Ayupovc7306cc2021-04-08 07:19:265276 uint64_t Pos = OS.tell();
Maksim Panchenko0465d952020-10-09 23:06:275277 OS.seek(Function->getFileOffset() + Function->getImageSize());
Amir Ayupov1c5d3a02020-12-02 00:29:395278 MAB->writeNopData(OS, Function->getMaxSize() - Function->getImageSize(),
5279 &*BC->STI);
5280
Maksim Panchenko6ff17952017-01-17 23:49:595281 OS.seek(Pos);
Maksim Panchenko0465d952020-10-09 23:06:275282 }
Maksim Panchenko6ff17952017-01-17 23:49:595283
Maksim Panchenko0465d952020-10-09 23:06:275284 if (!Function->isSplit()) {
Rafael Aulerc67a7532015-11-24 01:54:185285 ++CountOverwrittenFunctions;
5286 if (opts::MaxFunctions &&
5287 CountOverwrittenFunctions == opts::MaxFunctions) {
Maksim Panchenkoa7d02512018-06-14 21:27:205288 outs() << "BOLT: maximum number of functions reached\n";
Rafael Aulerc67a7532015-11-24 01:54:185289 break;
5290 }
Maksim Panchenko0465d952020-10-09 23:06:275291 continue;
Rafael Aulerc67a7532015-11-24 01:54:185292 }
5293
Maksim Panchenko0465d952020-10-09 23:06:275294 // Write cold part
Maksim Panchenkoee0e9cc2021-12-23 20:38:335295 if (opts::Verbosity >= 2)
Maksim Panchenko0465d952020-10-09 23:06:275296 outs() << "BOLT: rewriting function \"" << *Function
5297 << "\" (cold part)\n";
Maksim Panchenkoee0e9cc2021-12-23 20:38:335298
Maksim Panchenko40c2e0f2021-12-15 00:52:515299 OS.pwrite(reinterpret_cast<char *>(Function->cold().getImageAddress()),
Maksim Panchenko0465d952020-10-09 23:06:275300 Function->cold().getImageSize(),
5301 Function->cold().getFileOffset());
5302
5303 ++CountOverwrittenFunctions;
Maksim Panchenko40c2e0f2021-12-15 00:52:515304 if (opts::MaxFunctions && CountOverwrittenFunctions == opts::MaxFunctions) {
Maksim Panchenko0465d952020-10-09 23:06:275305 outs() << "BOLT: maximum number of functions reached\n";
5306 break;
5307 }
5308 }
5309
5310 // Print function statistics for non-relocation mode.
5311 if (!BC->HasRelocations) {
Maksim Panchenko40c2e0f2021-12-15 00:52:515312 outs() << "BOLT: " << CountOverwrittenFunctions << " out of "
5313 << BC->getBinaryFunctions().size()
Maksim Panchenko55fc5412016-09-28 02:09:385314 << " functions were overwritten.\n";
Maksim Panchenkod15b93b2017-11-28 17:57:215315 if (BC->TotalScore != 0) {
Maksim Panchenko40c2e0f2021-12-15 00:52:515316 double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0;
Wenlei He615a3182019-09-04 05:24:065317 outs() << format("BOLT-INFO: rewritten functions cover %.2lf", Coverage)
Maksim Panchenko55fc5412016-09-28 02:09:385318 << "% of the execution count of simple functions of "
Wenlei He615a3182019-09-04 05:24:065319 "this binary\n";
Rafael Aulerc67a7532015-11-24 01:54:185320 }
5321 }
Maksim Panchenkoc9b7e3e2015-12-19 01:00:465322
Maksim Panchenkob6f7c682017-12-10 05:40:395323 if (BC->HasRelocations && opts::TrapOldCode) {
Amir Ayupovc7306cc2021-04-08 07:19:265324 uint64_t SavedPos = OS.tell();
Maksim Panchenko55fc5412016-09-28 02:09:385325 // Overwrite function body to make sure we never execute these instructions.
Maksim Panchenko7fd48702019-04-03 22:52:015326 for (auto &BFI : BC->getBinaryFunctions()) {
Amir Ayupovc7306cc2021-04-08 07:19:265327 BinaryFunction &BF = BFI.second;
Maksim Panchenko0ce0bce2020-06-15 07:15:475328 if (!BF.getFileOffset() || !BF.isEmitted())
Maksim Panchenko55fc5412016-09-28 02:09:385329 continue;
Maksim Panchenko6ff17952017-01-17 23:49:595330 OS.seek(BF.getFileOffset());
Maksim Panchenko55fc5412016-09-28 02:09:385331 for (unsigned I = 0; I < BF.getMaxSize(); ++I)
Amir Ayupov1c5d3a02020-12-02 00:29:395332 OS.write((unsigned char)BC->MIB->getTrapFillValue());
Maksim Panchenko55fc5412016-09-28 02:09:385333 }
Maksim Panchenko6ff17952017-01-17 23:49:595334 OS.seek(SavedPos);
Maksim Panchenkod68b1c7b2016-03-03 18:13:115335 }
Maksim Panchenkoc9b7e3e2015-12-19 01:00:465336
Rafael Auler7b779f82021-09-10 23:19:505337 // Write all allocatable sections - reloc-mode text is written here as well
Amir Ayupovc7306cc2021-04-08 07:19:265338 for (BinarySection &Section : BC->allocatableSections()) {
Maksim Panchenko97112862020-02-18 17:20:175339 if (!Section.isFinalized() || !Section.getOutputData())
Maksim Panchenkoc9b7e3e2015-12-19 01:00:465340 continue;
Maksim Panchenko97112862020-02-18 17:20:175341
Maksim Panchenkoee0e9cc2021-12-23 20:38:335342 if (opts::Verbosity >= 1)
Maksim Panchenkoa7d02512018-06-14 21:27:205343 outs() << "BOLT: writing new section " << Section.getName()
Maksim Panchenkoa62f4fd2018-03-20 01:32:125344 << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress())
Maksim Panchenko40c2e0f2021-12-15 00:52:515345 << "\n of size " << Section.getOutputSize() << "\n at offset "
5346 << Section.getOutputFileOffset() << '\n';
Maksim Panchenko40c2e0f2021-12-15 00:52:515347 OS.pwrite(reinterpret_cast<const char *>(Section.getOutputData()),
5348 Section.getOutputSize(), Section.getOutputFileOffset());
Maksim Panchenko0ce0bce2020-06-15 07:15:475349 }
5350
Maksim Panchenkoee0e9cc2021-12-23 20:38:335351 for (BinarySection &Section : BC->allocatableSections())
Maksim Panchenko40c2e0f2021-12-15 00:52:515352 Section.flushPendingRelocations(OS, [this](const MCSymbol *S) {
5353 return getNewValueForSymbol(S->getName());
5354 });
Rafael Aulerc67a7532015-11-24 01:54:185355
Maksim Panchenkoa7fb6102016-11-11 22:33:345356 // If .eh_frame is present create .eh_frame_hdr.
Maksim Panchenkoee0e9cc2021-12-23 20:38:335357 if (EHFrameSection && EHFrameSection->isFinalized())
Bill Nellddefc772018-02-02 00:33:435358 writeEHFrameHeader();
Maksim Panchenko50c895a2016-02-08 18:02:485359
Rafael Auler21f43032019-04-13 00:33:465360 // Add BOLT Addresses Translation maps to allow profile collection to
5361 // happen in the output binary
5362 if (opts::EnableBAT)
5363 addBATSection();
5364
Maksim Panchenkod68b1c7b2016-03-03 18:13:115365 // Patch program header table.
5366 patchELFPHDRTable();
Maksim Panchenko50c895a2016-02-08 18:02:485367
Maksim Panchenko69b58632017-05-17 00:29:315368 // Finalize memory image of section string table.
5369 finalizeSectionStringTable();
5370
Rafael Auler624b2d92017-09-20 17:43:015371 // Update symbol tables.
5372 patchELFSymTabs();
Rafael Auler4e29afe2017-06-27 23:25:595373
Rafael Auler9c4fcaf2018-08-09 00:55:245374 patchBuildID();
5375
Rafael Auler21f43032019-04-13 00:33:465376 if (opts::EnableBAT)
5377 encodeBATSection();
5378
Maksim Panchenkod68b1c7b2016-03-03 18:13:115379 // Copy non-allocatable sections once allocatable part is finished.
5380 rewriteNoteSections();
5381
Maksim Panchenkob6f7c682017-12-10 05:40:395382 if (BC->HasRelocations) {
Maksim Panchenko88bb1452018-08-16 23:53:145383 patchELFAllocatableRelaSections();
Maksim Panchenko6ff17952017-01-17 23:49:595384 patchELFGOT();
5385 }
Maksim Panchenko55fc5412016-09-28 02:09:385386
Vladislav Khmelevsky729d29e2022-02-16 15:13:445387 // Patch dynamic section/segment.
5388 patchELFDynamic();
5389
Maksim Panchenkod68b1c7b2016-03-03 18:13:115390 // Update ELF book-keeping info.
5391 patchELFSectionHeaderTable();
Rafael Aulerc67a7532015-11-24 01:54:185392
Bill Nellddefc772018-02-02 00:33:435393 if (opts::PrintSections) {
5394 outs() << "BOLT-INFO: Sections after processing:\n";
5395 BC->printSections(outs());
5396 }
5397
Rafael Aulerc67a7532015-11-24 01:54:185398 Out->keep();
Maksim Panchenko40c2e0f2021-12-15 00:52:515399 EC = sys::fs::setPermissions(opts::OutputFilename, sys::fs::perms::all_all);
Amir Ayupov1c5d3a02020-12-02 00:29:395400 check_error(EC, "cannot set permissions of output file");
Rafael Aulerc67a7532015-11-24 01:54:185401}
Gabriel Poesia73c9f0a2016-03-03 02:40:105402
Bill Nellddefc772018-02-02 00:33:435403void RewriteInstance::writeEHFrameHeader() {
Amir Ayupov1c5d3a02020-12-02 00:29:395404 DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true,
5405 EHFrameSection->getOutputAddress());
5406 Error E = NewEHFrame.parse(DWARFDataExtractor(
5407 EHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(),
5408 BC->AsmInfo->getCodePointerSize()));
5409 check_error(std::move(E), "failed to parse EH frame");
Maksim Panchenkoa7fb6102016-11-11 22:33:345410
Maksim Panchenkofe37f182021-05-13 17:50:475411 uint64_t OldEHFrameAddress = 0;
Maksim Panchenko33e0b2a2020-04-19 19:55:435412 StringRef OldEHFrameContents;
Amir Ayupovc7306cc2021-04-08 07:19:265413 ErrorOr<BinarySection &> OldEHFrameSection =
5414 BC->getUniqueSectionByName(Twine(getOrgSecPrefix(), ".eh_frame").str());
Maksim Panchenko33e0b2a2020-04-19 19:55:435415 if (OldEHFrameSection) {
5416 OldEHFrameAddress = OldEHFrameSection->getOutputAddress();
5417 OldEHFrameContents = OldEHFrameSection->getOutputContents();
5418 }
Amir Ayupov1c5d3a02020-12-02 00:29:395419 DWARFDebugFrame OldEHFrame(BC->TheTriple->getArch(), true, OldEHFrameAddress);
5420 Error Er = OldEHFrame.parse(
5421 DWARFDataExtractor(OldEHFrameContents, BC->AsmInfo->isLittleEndian(),
5422 BC->AsmInfo->getCodePointerSize()));
5423 check_error(std::move(Er), "failed to parse EH frame");
Maksim Panchenkoa7fb6102016-11-11 22:33:345424
Amir Ayupov1c5d3a02020-12-02 00:29:395425 LLVM_DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n");
Maksim Panchenkoa7fb6102016-11-11 22:33:345426
Maksim Panchenko075f0762017-04-06 17:49:595427 NextAvailableAddress =
Maksim Panchenko40c2e0f2021-12-15 00:52:515428 appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign);
Maksim Panchenkoa7fb6102016-11-11 22:33:345429
Amir Ayupovc7306cc2021-04-08 07:19:265430 const uint64_t EHFrameHdrOutputAddress = NextAvailableAddress;
5431 const uint64_t EHFrameHdrFileOffset =
5432 getFileOffsetForAddress(NextAvailableAddress);
Maksim Panchenkoa7fb6102016-11-11 22:33:345433
Amir Ayupovc7306cc2021-04-08 07:19:265434 std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader(
5435 OldEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses);
Maksim Panchenkoa7fb6102016-11-11 22:33:345436
Bill Nellddefc772018-02-02 00:33:435437 assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch");
5438 Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size());
Maksim Panchenkoa7fb6102016-11-11 22:33:345439
Amir Ayupovc7306cc2021-04-08 07:19:265440 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
5441 /*IsText=*/false,
5442 /*IsAllocatable=*/true);
5443 BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection(
5444 ".eh_frame_hdr", ELF::SHT_PROGBITS, Flags, nullptr, NewEHFrameHdr.size(),
5445 /*Alignment=*/1);
Maksim Panchenko0ce0bce2020-06-15 07:15:475446 EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset);
Maksim Panchenko163adbe2019-03-15 01:51:055447 EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress);
Maksim Panchenkoa7fb6102016-11-11 22:33:345448
Bill Nellddefc772018-02-02 00:33:435449 NextAvailableAddress += EHFrameHdrSec.getOutputSize();
Maksim Panchenkoa7fb6102016-11-11 22:33:345450
Maksim Panchenkoaf553122020-03-07 19:19:095451 // Merge new .eh_frame with original so that gdb can locate all FDEs.
Maksim Panchenko33e0b2a2020-04-19 19:55:435452 if (OldEHFrameSection) {
Amir Ayupovc7306cc2021-04-08 07:19:265453 const uint64_t EHFrameSectionSize = (OldEHFrameSection->getOutputAddress() +
5454 OldEHFrameSection->getOutputSize() -
5455 EHFrameSection->getOutputAddress());
Maksim Panchenko33e0b2a2020-04-19 19:55:435456 EHFrameSection =
5457 BC->registerOrUpdateSection(".eh_frame",
5458 EHFrameSection->getELFType(),
5459 EHFrameSection->getELFFlags(),
5460 EHFrameSection->getOutputData(),
5461 EHFrameSectionSize,
5462 EHFrameSection->getAlignment());
5463 BC->deregisterSection(*OldEHFrameSection);
5464 }
Bill Nellddefc772018-02-02 00:33:435465
Amir Ayupov1c5d3a02020-12-02 00:29:395466 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is "
5467 << EHFrameSection->getOutputSize() << '\n');
Maksim Panchenkoa7fb6102016-11-11 22:33:345468}
5469
Maksim Panchenko4946b882020-06-22 23:16:085470uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) {
Maksim Panchenko40c2e0f2021-12-15 00:52:515471 uint64_t Value = RTDyld->getSymbol(Name).getAddress();
Maksim Panchenko4946b882020-06-22 23:16:085472 if (Value != 0)
5473 return Value;
5474
5475 // Return the original value if we haven't emitted the symbol.
Amir Ayupovc7306cc2021-04-08 07:19:265476 BinaryData *BD = BC->getBinaryDataByName(Name);
Maksim Panchenko4946b882020-06-22 23:16:085477 if (!BD)
5478 return 0;
5479
5480 return BD->getAddress();
5481}
5482
Maksim Panchenko6ff17952017-01-17 23:49:595483uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const {
5484 // Check if it's possibly part of the new segment.
Maksim Panchenkoee0e9cc2021-12-23 20:38:335485 if (Address >= NewTextSegmentAddress)
Maksim Panchenko6ff17952017-01-17 23:49:595486 return Address - NewTextSegmentAddress + NewTextSegmentOffset;
Maksim Panchenko6ff17952017-01-17 23:49:595487
5488 // Find an existing segment that matches the address.
Maksim Panchenko250ca402020-06-26 23:52:075489 const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address);
5490 if (SegmentInfoI == BC->SegmentMapInfo.begin())
Maksim Panchenko6ff17952017-01-17 23:49:595491 return 0;
5492
Amir Ayupovc7306cc2021-04-08 07:19:265493 const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second;
Maksim Panchenko6ff17952017-01-17 23:49:595494 if (Address < SegmentInfo.Address ||
5495 Address >= SegmentInfo.Address + SegmentInfo.FileSize)
5496 return 0;
5497
Maksim Panchenko40c2e0f2021-12-15 00:52:515498 return SegmentInfo.FileOffset + Address - SegmentInfo.Address;
Maksim Panchenko6ff17952017-01-17 23:49:595499}
5500
Maksim Panchenkoe2128052017-02-07 20:20:465501bool RewriteInstance::willOverwriteSection(StringRef SectionName) {
Maksim Panchenkoee0e9cc2021-12-23 20:38:335502 for (const char *const &OverwriteName : SectionsToOverwrite)
Rafael Auler624b2d92017-09-20 17:43:015503 if (SectionName == OverwriteName)
5504 return true;
Maksim Panchenkoee0e9cc2021-12-23 20:38:335505 for (std::string &OverwriteName : DebugSectionsToOverwrite)
Maksim Panchenko2b152332019-04-26 22:30:125506 if (SectionName == OverwriteName)
5507 return true;
Rafael Auler21f43032019-04-13 00:33:465508
Amir Ayupovc7306cc2021-04-08 07:19:265509 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName);
Bill Nellddefc772018-02-02 00:33:435510 return Section && Section->isAllocatable() && Section->isFinalized();
Maksim Panchenkof047b9d2016-05-17 00:02:175511}
Maksim Panchenko2b152332019-04-26 22:30:125512
5513bool RewriteInstance::isDebugSection(StringRef SectionName) {
Vladislav Khmelevsky95ee1292021-10-16 14:02:455514 if (SectionName.startswith(".debug_") || SectionName.startswith(".zdebug_") ||
5515 SectionName == ".gdb_index" || SectionName == ".stab" ||
5516 SectionName == ".stabstr")
Maksim Panchenko2b152332019-04-26 22:30:125517 return true;
5518
5519 return false;
5520}
takh48b71ad2020-06-11 06:00:395521
5522bool RewriteInstance::isKSymtabSection(StringRef SectionName) {
5523 if (SectionName.startswith("__ksymtab"))
5524 return true;
5525
5526 return false;
5527}