caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 1 | //===- MapFile.cpp --------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 9 | // This file implements the -map option, which maps address ranges to their |
| 10 | // respective contents, plus the input file these contents were originally from. |
| 11 | // The contents (typically symbols) are listed in address order. Dead-stripped |
| 12 | // contents are included as well. |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 13 | // |
| 14 | // # Path: test |
| 15 | // # Arch: x86_84 |
| 16 | // # Object files: |
| 17 | // [ 0] linker synthesized |
| 18 | // [ 1] a.o |
| 19 | // # Sections: |
Jez Ng | da374d1 | 2022-10-22 02:39:57 | [diff] [blame] | 20 | // # Address Size Segment Section |
| 21 | // 0x1000005C0 0x0000004C __TEXT __text |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 22 | // # Symbols: |
Jez Ng | da374d1 | 2022-10-22 02:39:57 | [diff] [blame] | 23 | // # Address Size File Name |
| 24 | // 0x1000005C0 0x00000001 [ 1] _main |
| 25 | // # Dead Stripped Symbols: |
| 26 | // # Size File Name |
| 27 | // <<dead>> 0x00000001 [ 1] _foo |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 28 | // |
| 29 | //===----------------------------------------------------------------------===// |
| 30 | |
| 31 | #include "MapFile.h" |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 32 | #include "ConcatOutputSection.h" |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 33 | #include "Config.h" |
| 34 | #include "InputFiles.h" |
| 35 | #include "InputSection.h" |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 36 | #include "OutputSegment.h" |
| 37 | #include "Symbols.h" |
Roger Kim | 4f2c46c | 2022-02-12 00:33:23 | [diff] [blame] | 38 | #include "SyntheticSections.h" |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 39 | #include "Target.h" |
Vy Nguyen | fc7a718 | 2022-10-19 16:45:49 | [diff] [blame] | 40 | #include "lld/Common/ErrorHandler.h" |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 41 | #include "llvm/ADT/DenseMap.h" |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 42 | #include "llvm/Support/Parallel.h" |
Jez Ng | 4bcaafe | 2021-03-25 18:39:44 | [diff] [blame] | 43 | #include "llvm/Support/TimeProfiler.h" |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 44 | |
| 45 | using namespace llvm; |
| 46 | using namespace llvm::sys; |
| 47 | using namespace lld; |
| 48 | using namespace lld::macho; |
| 49 | |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 50 | struct CStringInfo { |
| 51 | uint32_t fileIndex; |
| 52 | StringRef str; |
| 53 | }; |
| 54 | |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 55 | struct MapInfo { |
| 56 | SmallVector<InputFile *> files; |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 57 | SmallVector<Defined *> deadSymbols; |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 58 | DenseMap<const OutputSection *, |
| 59 | SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>> |
| 60 | liveCStringsForSection; |
| 61 | SmallVector<CStringInfo> deadCStrings; |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 62 | }; |
| 63 | |
| 64 | static MapInfo gatherMapInfo() { |
| 65 | MapInfo info; |
Jez Ng | aa288fd | 2022-12-21 22:26:02 | [diff] [blame] | 66 | for (InputFile *file : inputFiles) { |
| 67 | bool isReferencedFile = false; |
| 68 | |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 69 | if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) { |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 70 | uint32_t fileIndex = info.files.size() + 1; |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 71 | |
| 72 | // Gather the dead symbols. We don't have to bother with the live ones |
| 73 | // because we will pick them up as we iterate over the OutputSections |
| 74 | // later. |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 75 | for (Symbol *sym : file->symbols) { |
Nico Weber | a564551 | 2021-05-07 21:10:05 | [diff] [blame] | 76 | if (auto *d = dyn_cast_or_null<Defined>(sym)) |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 77 | // Only emit the prevailing definition of a symbol. Also, don't emit |
| 78 | // the symbol if it is part of a cstring section (we use the literal |
| 79 | // value instead, similar to ld64) |
alx32 | 2a3a79c | 2024-04-18 18:42:22 | [diff] [blame] | 80 | if (d->isec() && d->getFile() == file && |
| 81 | !isa<CStringInputSection>(d->isec())) { |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 82 | isReferencedFile = true; |
| 83 | if (!d->isLive()) |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 84 | info.deadSymbols.push_back(d); |
Jez Ng | 213dbdb | 2022-11-08 21:33:22 | [diff] [blame] | 85 | } |
Jez Ng | 213dbdb | 2022-11-08 21:33:22 | [diff] [blame] | 86 | } |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 87 | |
| 88 | // Gather all the cstrings (both live and dead). A CString(Output)Section |
| 89 | // doesn't provide us a way of figuring out which InputSections its |
| 90 | // cstring contents came from, so we need to build up that mapping here. |
| 91 | for (const Section *sec : file->sections) { |
| 92 | for (const Subsection &subsec : sec->subsections) { |
| 93 | if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) { |
| 94 | auto &liveCStrings = info.liveCStringsForSection[isec->parent]; |
| 95 | for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { |
| 96 | if (piece.live) |
| 97 | liveCStrings.push_back({isec->parent->addr + piece.outSecOff, |
| 98 | {fileIndex, isec->getStringRef(i)}}); |
| 99 | else |
| 100 | info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)}); |
| 101 | isReferencedFile = true; |
| 102 | } |
| 103 | } else { |
| 104 | break; |
| 105 | } |
| 106 | } |
| 107 | } |
Jez Ng | aa288fd | 2022-12-21 22:26:02 | [diff] [blame] | 108 | } else if (const auto *dylibFile = dyn_cast<DylibFile>(file)) { |
| 109 | isReferencedFile = dylibFile->isReferenced(); |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 110 | } |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 111 | |
Jez Ng | aa288fd | 2022-12-21 22:26:02 | [diff] [blame] | 112 | if (isReferencedFile) |
| 113 | info.files.push_back(file); |
| 114 | } |
| 115 | |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 116 | // cstrings are not stored in sorted order in their OutputSections, so we sort |
| 117 | // them here. |
| 118 | for (auto &liveCStrings : info.liveCStringsForSection) |
| 119 | parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) { |
| 120 | return p1.first < p2.first; |
| 121 | }); |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 122 | return info; |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 123 | } |
| 124 | |
Jez Ng | 5b21395 | 2023-03-11 03:29:14 | [diff] [blame] | 125 | // We use this instead of `toString(const InputFile *)` as we don't want to |
| 126 | // include the dylib install name in our output. |
| 127 | static void printFileName(raw_fd_ostream &os, const InputFile *f) { |
| 128 | if (f->archiveName.empty()) |
| 129 | os << f->getName(); |
| 130 | else |
| 131 | os << f->archiveName << "(" << path::filename(f->getName()) + ")"; |
| 132 | } |
| 133 | |
Jez Ng | aa288fd | 2022-12-21 22:26:02 | [diff] [blame] | 134 | // For printing the contents of the __stubs and __la_symbol_ptr sections. |
Jez Ng | 5b21395 | 2023-03-11 03:29:14 | [diff] [blame] | 135 | static void printStubsEntries( |
Jez Ng | aa288fd | 2022-12-21 22:26:02 | [diff] [blame] | 136 | raw_fd_ostream &os, |
| 137 | const DenseMap<lld::macho::InputFile *, uint32_t> &readerToFileOrdinal, |
| 138 | const OutputSection *osec, size_t entrySize) { |
| 139 | for (const Symbol *sym : in.stubs->getEntries()) |
| 140 | os << format("0x%08llX\t0x%08zX\t[%3u] %s\n", |
| 141 | osec->addr + sym->stubsIndex * entrySize, entrySize, |
| 142 | readerToFileOrdinal.lookup(sym->getFile()), |
| 143 | sym->getName().str().data()); |
| 144 | } |
| 145 | |
Jez Ng | 5b21395 | 2023-03-11 03:29:14 | [diff] [blame] | 146 | static void printNonLazyPointerSection(raw_fd_ostream &os, |
| 147 | NonLazyPointerSectionBase *osec) { |
Jez Ng | aa288fd | 2022-12-21 22:26:02 | [diff] [blame] | 148 | // ld64 considers stubs to belong to particular files, but considers GOT |
| 149 | // entries to be linker-synthesized. Not sure why they made that decision, but |
| 150 | // I think we can follow suit unless there's demand for better symbol-to-file |
| 151 | // associations. |
| 152 | for (const Symbol *sym : osec->getEntries()) |
| 153 | os << format("0x%08llX\t0x%08zX\t[ 0] non-lazy-pointer-to-local: %s\n", |
| 154 | osec->addr + sym->gotIndex * target->wordSize, |
| 155 | target->wordSize, sym->getName().str().data()); |
| 156 | } |
| 157 | |
alx32 | 2a3a79c | 2024-04-18 18:42:22 | [diff] [blame] | 158 | static uint64_t getSymSizeForMap(Defined *sym) { |
alx32 | d175616 | 2024-09-05 23:36:21 | [diff] [blame] | 159 | if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body) |
alx32 | 2a3a79c | 2024-04-18 18:42:22 | [diff] [blame] | 160 | return 0; |
| 161 | return sym->size; |
| 162 | } |
| 163 | |
alx32 | 162814a | 2025-01-08 05:07:51 | [diff] [blame] | 164 | // Merges two vectors of input sections in order of their outSecOff values. |
| 165 | // This approach creates a new (temporary) vector which is not ideal but the |
| 166 | // ideal approach leads to a lot of code duplication. |
| 167 | static std::vector<ConcatInputSection *> |
| 168 | mergeOrderedInputs(ArrayRef<ConcatInputSection *> inputs1, |
| 169 | ArrayRef<ConcatInputSection *> inputs2) { |
| 170 | std::vector<ConcatInputSection *> vec(inputs1.size() + inputs2.size()); |
| 171 | std::merge(inputs1.begin(), inputs1.end(), inputs2.begin(), inputs2.end(), |
| 172 | vec.begin(), [](ConcatInputSection *a, ConcatInputSection *b) { |
| 173 | return a->outSecOff < b->outSecOff; |
| 174 | }); |
| 175 | return vec; |
| 176 | } |
| 177 | |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 178 | void macho::writeMapFile() { |
| 179 | if (config->mapFile.empty()) |
| 180 | return; |
| 181 | |
Jez Ng | 4bcaafe | 2021-03-25 18:39:44 | [diff] [blame] | 182 | TimeTraceScope timeScope("Write map file"); |
| 183 | |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 184 | // Open a map file for writing. |
| 185 | std::error_code ec; |
| 186 | raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None); |
| 187 | if (ec) { |
| 188 | error("cannot open " + config->mapFile + ": " + ec.message()); |
| 189 | return; |
| 190 | } |
| 191 | |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 192 | os << format("# Path: %s\n", config->outputFile.str().c_str()); |
Jez Ng | ed4a4e3 | 2021-04-21 19:43:38 | [diff] [blame] | 193 | os << format("# Arch: %s\n", |
| 194 | getArchitectureName(config->arch()).str().c_str()); |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 195 | |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 196 | MapInfo info = gatherMapInfo(); |
| 197 | |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 198 | os << "# Object files:\n"; |
| 199 | os << format("[%3u] %s\n", 0, (const char *)"linker synthesized"); |
| 200 | uint32_t fileIndex = 1; |
| 201 | DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal; |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 202 | for (InputFile *file : info.files) { |
Jez Ng | 5b21395 | 2023-03-11 03:29:14 | [diff] [blame] | 203 | os << format("[%3u] ", fileIndex); |
| 204 | printFileName(os, file); |
| 205 | os << "\n"; |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 206 | readerToFileOrdinal[file] = fileIndex++; |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 207 | } |
| 208 | |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 209 | os << "# Sections:\n"; |
| 210 | os << "# Address\tSize \tSegment\tSection\n"; |
| 211 | for (OutputSegment *seg : outputSegments) |
| 212 | for (OutputSection *osec : seg->getSections()) { |
| 213 | if (osec->isHidden()) |
| 214 | continue; |
| 215 | |
| 216 | os << format("0x%08llX\t0x%08llX\t%s\t%s\n", osec->addr, osec->getSize(), |
| 217 | seg->name.str().c_str(), osec->name.str().c_str()); |
| 218 | } |
| 219 | |
alx32 | 742a82a | 2024-03-27 21:34:27 | [diff] [blame] | 220 | // Shared function to print an array of symbols. |
| 221 | auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) { |
| 222 | for (const ConcatInputSection *isec : arr) { |
| 223 | for (Defined *sym : isec->symbols) { |
alx32 | 2a3a79c | 2024-04-18 18:42:22 | [diff] [blame] | 224 | if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0)) |
alx32 | 742a82a | 2024-03-27 21:34:27 | [diff] [blame] | 225 | os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), |
alx32 | 2a3a79c | 2024-04-18 18:42:22 | [diff] [blame] | 226 | getSymSizeForMap(sym), |
| 227 | readerToFileOrdinal[sym->getFile()], |
alx32 | 742a82a | 2024-03-27 21:34:27 | [diff] [blame] | 228 | sym->getName().str().data()); |
| 229 | } |
| 230 | } |
| 231 | }; |
| 232 | |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 233 | os << "# Symbols:\n"; |
Jez Ng | bdd0cec | 2022-10-13 20:44:29 | [diff] [blame] | 234 | os << "# Address\tSize \tFile Name\n"; |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 235 | for (const OutputSegment *seg : outputSegments) { |
| 236 | for (const OutputSection *osec : seg->getSections()) { |
alx32 | 162814a | 2025-01-08 05:07:51 | [diff] [blame] | 237 | if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) { |
| 238 | auto inputsAndThunks = |
| 239 | mergeOrderedInputs(textOsec->inputs, textOsec->getThunks()); |
| 240 | printIsecArrSyms(inputsAndThunks); |
| 241 | } else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) { |
alx32 | 742a82a | 2024-03-27 21:34:27 | [diff] [blame] | 242 | printIsecArrSyms(concatOsec->inputs); |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 243 | } else if (osec == in.cStringSection || osec == in.objcMethnameSection) { |
| 244 | const auto &liveCStrings = info.liveCStringsForSection.lookup(osec); |
| 245 | uint64_t lastAddr = 0; // strings will never start at address 0, so this |
| 246 | // is a sentinel value |
| 247 | for (const auto &[addr, info] : liveCStrings) { |
| 248 | uint64_t size = 0; |
| 249 | if (addr != lastAddr) |
| 250 | size = info.str.size() + 1; // include null terminator |
| 251 | lastAddr = addr; |
| 252 | os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size, |
| 253 | info.fileIndex); |
| 254 | os.write_escaped(info.str) << "\n"; |
| 255 | } |
Jez Ng | 41f90e9 | 2022-12-01 06:20:54 | [diff] [blame] | 256 | } else if (osec == (void *)in.unwindInfo) { |
| 257 | os << format("0x%08llX\t0x%08llX\t[ 0] compact unwind info\n", |
| 258 | osec->addr, osec->getSize()); |
Jez Ng | aa288fd | 2022-12-21 22:26:02 | [diff] [blame] | 259 | } else if (osec == in.stubs) { |
| 260 | printStubsEntries(os, readerToFileOrdinal, osec, target->stubSize); |
| 261 | } else if (osec == in.lazyPointers) { |
| 262 | printStubsEntries(os, readerToFileOrdinal, osec, target->wordSize); |
| 263 | } else if (osec == in.stubHelper) { |
| 264 | // yes, ld64 calls it "helper helper"... |
| 265 | os << format("0x%08llX\t0x%08llX\t[ 0] helper helper\n", osec->addr, |
| 266 | osec->getSize()); |
| 267 | } else if (osec == in.got) { |
| 268 | printNonLazyPointerSection(os, in.got); |
| 269 | } else if (osec == in.tlvPointers) { |
| 270 | printNonLazyPointerSection(os, in.tlvPointers); |
alx32 | 742a82a | 2024-03-27 21:34:27 | [diff] [blame] | 271 | } else if (osec == in.objcMethList) { |
| 272 | printIsecArrSyms(in.objcMethList->getInputs()); |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 273 | } |
| 274 | // TODO print other synthetic sections |
| 275 | } |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 276 | } |
| 277 | |
Roger Kim | 4220843 | 2022-01-28 18:51:27 | [diff] [blame] | 278 | if (config->deadStrip) { |
Roger Kim | 4220843 | 2022-01-28 18:51:27 | [diff] [blame] | 279 | os << "# Dead Stripped Symbols:\n"; |
Jez Ng | bdd0cec | 2022-10-13 20:44:29 | [diff] [blame] | 280 | os << "# \tSize \tFile Name\n"; |
Jez Ng | b945733 | 2022-10-22 02:48:25 | [diff] [blame] | 281 | for (Defined *sym : info.deadSymbols) { |
Roger Kim | 4220843 | 2022-01-28 18:51:27 | [diff] [blame] | 282 | assert(!sym->isLive()); |
alx32 | 2a3a79c | 2024-04-18 18:42:22 | [diff] [blame] | 283 | os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", getSymSizeForMap(sym), |
Jez Ng | bdd0cec | 2022-10-13 20:44:29 | [diff] [blame] | 284 | readerToFileOrdinal[sym->getFile()], |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 285 | sym->getName().str().data()); |
| 286 | } |
| 287 | for (CStringInfo &cstrInfo : info.deadCStrings) { |
David Spickett | 7c7e39d | 2022-12-06 10:30:38 | [diff] [blame] | 288 | os << format("<<dead>>\t0x%08zX\t[%3u] literal string: ", |
Jez Ng | 7ca32bd | 2022-12-01 05:57:16 | [diff] [blame] | 289 | cstrInfo.str.size() + 1, cstrInfo.fileIndex); |
| 290 | os.write_escaped(cstrInfo.str) << "\n"; |
Roger Kim | 4220843 | 2022-01-28 18:51:27 | [diff] [blame] | 291 | } |
| 292 | } |
caoming.roy | ed8bff1 | 2021-03-18 14:38:30 | [diff] [blame] | 293 | } |