blob: 2b2cf19f14b308cb88c0d5cc5371c6dc217f2c9c [file] [log] [blame]
Sam Clegg03626332018-01-31 01:45:471//===- MarkLive.cpp -------------------------------------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:563// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Sam Clegg03626332018-01-31 01:45:476//
7//===----------------------------------------------------------------------===//
8//
9// This file implements --gc-sections, which is a feature to remove unused
10// chunks from the output. Unused chunks are those that are not reachable from
11// known root symbols or chunks. This feature is implemented as a mark-sweep
12// garbage collector.
13//
14// Here's how it works. Each InputChunk has a "Live" bit. The bit is off by
15// default. Starting with the GC-roots, visit all reachable chunks and set their
16// Live bits. The Writer will then ignore chunks whose Live bits are off, so
17// that such chunk are not appear in the output.
18//
19//===----------------------------------------------------------------------===//
20
21#include "MarkLive.h"
22#include "Config.h"
23#include "InputChunks.h"
Andy Wingoa56e5742021-02-11 11:15:2424#include "InputElement.h"
Sam Clegg03626332018-01-31 01:45:4725#include "SymbolTable.h"
26#include "Symbols.h"
27
28#define DEBUG_TYPE "lld"
29
30using namespace llvm;
31using namespace llvm::wasm;
Sam Clegg03626332018-01-31 01:45:4732
Sam Cleggd32f71a2023-03-06 17:55:0033namespace lld::wasm {
Sam Cleggad2e12a2019-10-10 03:23:0634
35namespace {
36
37class MarkLive {
38public:
39 void run();
40
41private:
42 void enqueue(Symbol *sym);
Yuta Saitoba3c1f92024-02-20 18:35:3643 void enqueue(InputChunk *chunk);
Dan Gohman950ae432020-10-01 03:00:0444 void enqueueInitFunctions(const ObjFile *sym);
Yuta Saitoba3c1f92024-02-20 18:35:3645 void enqueueRetainedSegments(const ObjFile *file);
Sam Cleggad2e12a2019-10-10 03:23:0646 void mark();
Dan Gohman6cd85112020-10-01 00:21:5747 bool isCallCtorsLive();
Sam Cleggad2e12a2019-10-10 03:23:0648
49 // A list of chunks to visit.
50 SmallVector<InputChunk *, 256> queue;
51};
52
53} // namespace
54
55void MarkLive::enqueue(Symbol *sym) {
56 if (!sym || sym->isLive())
Sam Cleggffd0aaf2018-06-22 15:13:1057 return;
Sam Cleggad2e12a2019-10-10 03:23:0658 LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
Dan Gohman950ae432020-10-01 03:00:0459
60 InputFile *file = sym->getFile();
Yuta Saitoba3c1f92024-02-20 18:35:3661 bool markImplicitDeps = file && !file->isLive() && sym->isDefined();
Dan Gohman950ae432020-10-01 03:00:0462
Sam Cleggad2e12a2019-10-10 03:23:0663 sym->markLive();
Dan Gohman950ae432020-10-01 03:00:0464
Yuta Saitoba3c1f92024-02-20 18:35:3665 if (markImplicitDeps) {
Sam Clegg22b7b842024-07-12 20:26:5266 if (auto obj = dyn_cast<ObjFile>(file)) {
67 // Mark as live the ctor functions in the object that defines this symbol.
68 // The ctor functions are all referenced by the synthetic callCtors
69 // function. However, this function does not contain relocations so we
70 // have to manually mark the ctors as live.
71 enqueueInitFunctions(obj);
72 // Mark retained segments in the object that defines this symbol live.
73 enqueueRetainedSegments(obj);
74 }
Yuta Saitoba3c1f92024-02-20 18:35:3675 }
Dan Gohman950ae432020-10-01 03:00:0476
Sam Cleggad2e12a2019-10-10 03:23:0677 if (InputChunk *chunk = sym->getChunk())
78 queue.push_back(chunk);
Sam Cleggad2e12a2019-10-10 03:23:0679}
Sam Clegg03626332018-01-31 01:45:4780
Yuta Saitoba3c1f92024-02-20 18:35:3681void MarkLive::enqueue(InputChunk *chunk) {
82 LLVM_DEBUG(dbgs() << "markLive: " << toString(chunk) << "\n");
83 chunk->live = true;
84 queue.push_back(chunk);
85}
86
Dan Gohman950ae432020-10-01 03:00:0487// The ctor functions are all referenced by the synthetic callCtors
88// function. However, this function does not contain relocations so we
89// have to manually mark the ctors as live.
90void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
91 const WasmLinkingData &l = obj->getWasmObj()->linkingData();
92 for (const WasmInitFunc &f : l.InitFunctions) {
93 auto *initSym = obj->getFunctionSymbol(f.Symbol);
94 if (!initSym->isDiscarded())
95 enqueue(initSym);
96 }
97}
98
Yuta Saitoba3c1f92024-02-20 18:35:3699// Mark segments flagged by segment-level no-strip. Segment-level no-strip is
100// usually used to retain segments without having symbol table entry.
101void MarkLive::enqueueRetainedSegments(const ObjFile *file) {
102 for (InputChunk *chunk : file->segments)
103 if (chunk->isRetained())
104 enqueue(chunk);
105}
106
Sam Cleggad2e12a2019-10-10 03:23:06107void MarkLive::run() {
Sam Clegg03626332018-01-31 01:45:47108 // Add GC root symbols.
Fangrui Song3792b362025-01-03 01:08:18109 if (!ctx.arg.entry.empty())
110 enqueue(symtab->find(ctx.arg.entry));
Sam Clegg03626332018-01-31 01:45:47111
Dan Gohman7cb9c8a2019-08-29 22:41:05112 // We need to preserve any no-strip or exported symbol
Sam Clegg113b5682022-08-05 19:54:29113 for (Symbol *sym : symtab->symbols())
Dan Gohman7cb9c8a2019-08-29 22:41:05114 if (sym->isNoStrip() || sym->isExported())
Rui Ueyama136d27a2019-07-11 05:40:30115 enqueue(sym);
Sam Clegg03626332018-01-31 01:45:47116
Anutosh Bhat9cbbb742025-04-25 14:35:00117 if (ctx.sym.callDtors)
118 enqueue(ctx.sym.callDtors);
Dan Gohman6cd85112020-10-01 00:21:57119
Sam Clegg3c584572024-01-18 23:53:13120 for (const ObjFile *obj : ctx.objectFiles)
Yuta Saitoba3c1f92024-02-20 18:35:36121 if (obj->isLive()) {
122 // Enqueue constructors in objects explicitly live from the command-line.
Fangrui Song025b3092022-06-04 05:18:06123 enqueueInitFunctions(obj);
Yuta Saitoba3c1f92024-02-20 18:35:36124 // Enqueue retained segments in objects explicitly live from the
125 // command-line.
126 enqueueRetainedSegments(obj);
127 }
Dan Gohman950ae432020-10-01 03:00:04128
Sam Cleggad2e12a2019-10-10 03:23:06129 mark();
Dan Gohman950ae432020-10-01 03:00:04130
131 // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
132 // live so that we assign it an index and call it.
133 if (isCallCtorsLive())
Anutosh Bhat9cbbb742025-04-25 14:35:00134 ctx.sym.callCtors->markLive();
Sam Cleggad2e12a2019-10-10 03:23:06135}
136
137void MarkLive::mark() {
Rui Ueyama34133b232018-02-19 22:34:47138 // Follow relocations to mark all reachable chunks.
Sam Cleggad2e12a2019-10-10 03:23:06139 while (!queue.empty()) {
140 InputChunk *c = queue.pop_back_val();
Rui Ueyama34133b232018-02-19 22:34:47141
Rui Ueyama136d27a2019-07-11 05:40:30142 for (const WasmRelocation reloc : c->getRelocations()) {
143 if (reloc.Type == R_WASM_TYPE_INDEX_LEB)
Nicholas Wilson2e55ee72018-03-09 17:06:38144 continue;
Rui Ueyama136d27a2019-07-11 05:40:30145 Symbol *sym = c->file->getSymbol(reloc.Index);
Nicholas Wilson2e55ee72018-03-09 17:06:38146
147 // If the function has been assigned the special index zero in the table,
148 // the relocation doesn't pull in the function body, since the function
149 // won't actually go in the table (the runtime will trap attempts to call
150 // that index, since we don't use it). A function with a table index of
151 // zero is only reachable via "call", not via "call_indirect". The stub
152 // functions used for weak-undefined symbols have this behaviour (compare
153 // equal to null pointer, only reachable via direct call).
Rui Ueyama136d27a2019-07-11 05:40:30154 if (reloc.Type == R_WASM_TABLE_INDEX_SLEB ||
Wouter van Oortmerssencc1b9b62020-07-10 23:51:01155 reloc.Type == R_WASM_TABLE_INDEX_SLEB64 ||
156 reloc.Type == R_WASM_TABLE_INDEX_I32 ||
157 reloc.Type == R_WASM_TABLE_INDEX_I64) {
Rui Ueyama136d27a2019-07-11 05:40:30158 auto *funcSym = cast<FunctionSymbol>(sym);
Sam Clegg48ddf5e12020-11-23 23:41:07159 if (funcSym->isStub)
Nicholas Wilson2e55ee72018-03-09 17:06:38160 continue;
161 }
162
Rui Ueyama136d27a2019-07-11 05:40:30163 enqueue(sym);
Sam Clegg03626332018-01-31 01:45:47164 }
Rui Ueyama34133b232018-02-19 22:34:47165 }
Sam Cleggad2e12a2019-10-10 03:23:06166}
167
168void markLive() {
Fangrui Song3792b362025-01-03 01:08:18169 if (!ctx.arg.gcSections)
Sam Cleggad2e12a2019-10-10 03:23:06170 return;
171
172 LLVM_DEBUG(dbgs() << "markLive\n");
173
174 MarkLive marker;
175 marker.run();
Sam Clegg03626332018-01-31 01:45:47176
177 // Report garbage-collected sections.
Fangrui Song3792b362025-01-03 01:08:18178 if (ctx.arg.printGcSections) {
Sam Clegg3c584572024-01-18 23:53:13179 for (const ObjFile *obj : ctx.objectFiles) {
Rui Ueyama136d27a2019-07-11 05:40:30180 for (InputChunk *c : obj->functions)
181 if (!c->live)
182 message("removing unused section " + toString(c));
183 for (InputChunk *c : obj->segments)
184 if (!c->live)
185 message("removing unused section " + toString(c));
186 for (InputGlobal *g : obj->globals)
187 if (!g->live)
188 message("removing unused section " + toString(g));
Heejin Ahn1d891d42021-06-15 08:49:43189 for (InputTag *t : obj->tags)
190 if (!t->live)
191 message("removing unused section " + toString(t));
Andy Wingo53e3b812021-01-05 11:08:58192 for (InputTable *t : obj->tables)
193 if (!t->live)
194 message("removing unused section " + toString(t));
Sam Clegg03626332018-01-31 01:45:47195 }
Sam Clegg3c584572024-01-18 23:53:13196 for (InputChunk *c : ctx.syntheticFunctions)
Rui Ueyama136d27a2019-07-11 05:40:30197 if (!c->live)
198 message("removing unused section " + toString(c));
Sam Clegg3c584572024-01-18 23:53:13199 for (InputGlobal *g : ctx.syntheticGlobals)
Rui Ueyama136d27a2019-07-11 05:40:30200 if (!g->live)
201 message("removing unused section " + toString(g));
Sam Clegg3c584572024-01-18 23:53:13202 for (InputTable *t : ctx.syntheticTables)
Andy Wingo63393822021-01-14 09:15:56203 if (!t->live)
204 message("removing unused section " + toString(t));
Sam Clegg03626332018-01-31 01:45:47205 }
206}
Sam Cleggad2e12a2019-10-10 03:23:06207
Dan Gohman6cd85112020-10-01 00:21:57208bool MarkLive::isCallCtorsLive() {
209 // In a reloctable link, we don't call `__wasm_call_ctors`.
Fangrui Song3792b362025-01-03 01:08:18210 if (ctx.arg.relocatable)
Dan Gohman6cd85112020-10-01 00:21:57211 return false;
212
213 // In Emscripten-style PIC, we call `__wasm_call_ctors` which calls
Sam Clegg5c016482021-01-28 16:20:42214 // `__wasm_apply_data_relocs`.
Sam Clegg184c22d2024-01-18 23:01:21215 if (ctx.isPic)
Dan Gohman6cd85112020-10-01 00:21:57216 return true;
217
218 // If there are any init functions, mark `__wasm_call_ctors` live so that
219 // it can call them.
Sam Clegg3c584572024-01-18 23:53:13220 for (const ObjFile *file : ctx.objectFiles) {
Dan Gohman6cd85112020-10-01 00:21:57221 const WasmLinkingData &l = file->getWasmObj()->linkingData();
Dan Gohman950ae432020-10-01 03:00:04222 for (const WasmInitFunc &f : l.InitFunctions) {
223 auto *sym = file->getFunctionSymbol(f.Symbol);
224 if (!sym->isDiscarded() && sym->isLive())
Dan Gohman6cd85112020-10-01 00:21:57225 return true;
Dan Gohman950ae432020-10-01 03:00:04226 }
Dan Gohman6cd85112020-10-01 00:21:57227 }
228
229 return false;
230}
231
Sam Cleggd32f71a2023-03-06 17:55:00232} // namespace lld::wasm