Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 1 | //===- UnwindInfoSection.cpp ----------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "UnwindInfoSection.h" |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 10 | #include "InputSection.h" |
Jez Ng | 453102a | 2023-02-16 21:18:46 | [diff] [blame] | 11 | #include "Layout.h" |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 12 | #include "OutputSection.h" |
| 13 | #include "OutputSegment.h" |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 14 | #include "SymbolTable.h" |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 15 | #include "Symbols.h" |
| 16 | #include "SyntheticSections.h" |
| 17 | #include "Target.h" |
| 18 | |
| 19 | #include "lld/Common/ErrorHandler.h" |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 20 | #include "lld/Common/Memory.h" |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 21 | #include "llvm/ADT/DenseMap.h" |
Jez Ng | 7ca133c | 2021-04-26 05:23:32 | [diff] [blame] | 22 | #include "llvm/ADT/STLExtras.h" |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 23 | #include "llvm/BinaryFormat/MachO.h" |
Jez Ng | a9353db | 2021-10-26 20:04:06 | [diff] [blame] | 24 | #include "llvm/Support/Parallel.h" |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 25 | |
Vy Nguyen | fc7a718 | 2022-10-19 16:45:49 | [diff] [blame] | 26 | #include "mach-o/compact_unwind_encoding.h" |
| 27 | |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 28 | #include <numeric> |
| 29 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 30 | using namespace llvm; |
| 31 | using namespace llvm::MachO; |
Jez Ng | e183bf8 | 2022-06-13 01:56:45 | [diff] [blame] | 32 | using namespace llvm::support::endian; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 33 | using namespace lld; |
| 34 | using namespace lld::macho; |
| 35 | |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 36 | #define COMMON_ENCODINGS_MAX 127 |
| 37 | #define COMPACT_ENCODINGS_MAX 256 |
| 38 | |
| 39 | #define SECOND_LEVEL_PAGE_BYTES 4096 |
| 40 | #define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t)) |
| 41 | #define REGULAR_SECOND_LEVEL_ENTRIES_MAX \ |
| 42 | ((SECOND_LEVEL_PAGE_BYTES - \ |
| 43 | sizeof(unwind_info_regular_second_level_page_header)) / \ |
| 44 | sizeof(unwind_info_regular_second_level_entry)) |
| 45 | #define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \ |
| 46 | ((SECOND_LEVEL_PAGE_BYTES - \ |
| 47 | sizeof(unwind_info_compressed_second_level_page_header)) / \ |
| 48 | sizeof(uint32_t)) |
| 49 | |
| 50 | #define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24 |
| 51 | #define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \ |
| 52 | UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0) |
| 53 | |
Jez Ng | f7bc79c | 2023-04-04 13:26:03 | [diff] [blame] | 54 | static_assert(static_cast<uint32_t>(UNWIND_X86_64_DWARF_SECTION_OFFSET) == |
| 55 | static_cast<uint32_t>(UNWIND_ARM64_DWARF_SECTION_OFFSET) && |
| 56 | static_cast<uint32_t>(UNWIND_X86_64_DWARF_SECTION_OFFSET) == |
| 57 | static_cast<uint32_t>(UNWIND_X86_DWARF_SECTION_OFFSET)); |
| 58 | |
| 59 | constexpr uint64_t DWARF_SECTION_OFFSET = UNWIND_X86_64_DWARF_SECTION_OFFSET; |
| 60 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 61 | // Compact Unwind format is a Mach-O evolution of DWARF Unwind that |
| 62 | // optimizes space and exception-time lookup. Most DWARF unwind |
| 63 | // entries can be replaced with Compact Unwind entries, but the ones |
| 64 | // that cannot are retained in DWARF form. |
| 65 | // |
| 66 | // This comment will address macro-level organization of the pre-link |
| 67 | // and post-link compact unwind tables. For micro-level organization |
| 68 | // pertaining to the bitfield layout of the 32-bit compact unwind |
| 69 | // entries, see libunwind/include/mach-o/compact_unwind_encoding.h |
| 70 | // |
| 71 | // Important clarifying factoids: |
| 72 | // |
| 73 | // * __LD,__compact_unwind is the compact unwind format for compiler |
| 74 | // output and linker input. It is never a final output. It could be |
| 75 | // an intermediate output with the `-r` option which retains relocs. |
| 76 | // |
| 77 | // * __TEXT,__unwind_info is the compact unwind format for final |
| 78 | // linker output. It is never an input. |
| 79 | // |
| 80 | // * __TEXT,__eh_frame is the DWARF format for both linker input and output. |
| 81 | // |
| 82 | // * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd |
| 83 | // level) by ascending address, and the pages are referenced by an |
| 84 | // index (1st level) in the section header. |
| 85 | // |
| 86 | // * Following the headers in __TEXT,__unwind_info, the bulk of the |
| 87 | // section contains a vector of compact unwind entries |
| 88 | // `{functionOffset, encoding}` sorted by ascending `functionOffset`. |
| 89 | // Adjacent entries with the same encoding can be folded to great |
| 90 | // advantage, achieving a 3-order-of-magnitude reduction in the |
| 91 | // number of entries. |
| 92 | // |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 93 | // Refer to the definition of unwind_info_section_header in |
| 94 | // compact_unwind_encoding.h for an overview of the format we are encoding |
| 95 | // here. |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 96 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 97 | // TODO(gkm): how do we align the 2nd-level pages? |
| 98 | |
Jez Ng | 453102a | 2023-02-16 21:18:46 | [diff] [blame] | 99 | // The various fields in the on-disk representation of each compact unwind |
| 100 | // entry. |
| 101 | #define FOR_EACH_CU_FIELD(DO) \ |
| 102 | DO(Ptr, functionAddress) \ |
| 103 | DO(uint32_t, functionLength) \ |
| 104 | DO(compact_unwind_encoding_t, encoding) \ |
| 105 | DO(Ptr, personality) \ |
| 106 | DO(Ptr, lsda) |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 107 | |
Jez Ng | 453102a | 2023-02-16 21:18:46 | [diff] [blame] | 108 | CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD); |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 109 | |
Jez Ng | 453102a | 2023-02-16 21:18:46 | [diff] [blame] | 110 | #undef FOR_EACH_CU_FIELD |
Jez Ng | a9353db | 2021-10-26 20:04:06 | [diff] [blame] | 111 | |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 112 | // LLD's internal representation of a compact unwind entry. |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 113 | struct CompactUnwindEntry { |
| 114 | uint64_t functionAddress; |
| 115 | uint32_t functionLength; |
| 116 | compact_unwind_encoding_t encoding; |
| 117 | Symbol *personality; |
| 118 | InputSection *lsda; |
| 119 | }; |
| 120 | |
Jez Ng | 28a2102 | 2021-07-11 22:35:45 | [diff] [blame] | 121 | using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 122 | |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 123 | struct SecondLevelPage { |
| 124 | uint32_t kind; |
| 125 | size_t entryIndex; |
| 126 | size_t entryCount; |
| 127 | size_t byteCount; |
| 128 | std::vector<compact_unwind_encoding_t> localEncodings; |
| 129 | EncodingMap localEncodingIndexes; |
| 130 | }; |
| 131 | |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 132 | // UnwindInfoSectionImpl allows us to avoid cluttering our header file with a |
| 133 | // lengthy definition of UnwindInfoSection. |
Jez Ng | 3a11528 | 2021-07-02 00:33:42 | [diff] [blame] | 134 | class UnwindInfoSectionImpl final : public UnwindInfoSection { |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 135 | public: |
Jez Ng | 453102a | 2023-02-16 21:18:46 | [diff] [blame] | 136 | UnwindInfoSectionImpl() : cuLayout(target->wordSize) {} |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 137 | uint64_t getSize() const override { return unwindInfoSize; } |
Jez Ng | 7b45dfc | 2022-10-12 03:50:46 | [diff] [blame] | 138 | void prepare() override; |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 139 | void finalize() override; |
| 140 | void writeTo(uint8_t *buf) const override; |
| 141 | |
| 142 | private: |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 143 | void prepareRelocations(ConcatInputSection *); |
| 144 | void relocateCompactUnwind(std::vector<CompactUnwindEntry> &); |
| 145 | void encodePersonalities(); |
Jez Ng | 7b45dfc | 2022-10-12 03:50:46 | [diff] [blame] | 146 | Symbol *canonicalizePersonality(Symbol *); |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 147 | |
| 148 | uint64_t unwindInfoSize = 0; |
Fangrui Song | fb2a971 | 2023-07-25 05:04:03 | [diff] [blame] | 149 | SmallVector<decltype(symbols)::value_type, 0> symbolsVec; |
Jez Ng | 453102a | 2023-02-16 21:18:46 | [diff] [blame] | 150 | CompactUnwindLayout cuLayout; |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 151 | std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings; |
| 152 | EncodingMap commonEncodingIndexes; |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 153 | // The entries here will be in the same order as their originating symbols |
| 154 | // in symbolsVec. |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 155 | std::vector<CompactUnwindEntry> cuEntries; |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 156 | // Indices into the cuEntries vector. |
| 157 | std::vector<size_t> cuIndices; |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 158 | std::vector<Symbol *> personalities; |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 159 | SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *> |
| 160 | personalityTable; |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 161 | // Indices into cuEntries for CUEs with a non-null LSDA. |
| 162 | std::vector<size_t> entriesWithLsda; |
| 163 | // Map of cuEntries index to an index within the LSDA array. |
| 164 | DenseMap<size_t, uint32_t> lsdaIndex; |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 165 | std::vector<SecondLevelPage> secondLevelPages; |
| 166 | uint64_t level2PagesOffset = 0; |
Vy Nguyen | 65226d3 | 2022-11-18 20:21:23 | [diff] [blame] | 167 | // The highest-address function plus its size. The unwinder needs this to |
| 168 | // determine the address range that is covered by unwind info. |
| 169 | uint64_t cueEndBoundary = 0; |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 170 | }; |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 171 | |
Jez Ng | 3a11528 | 2021-07-02 00:33:42 | [diff] [blame] | 172 | UnwindInfoSection::UnwindInfoSection() |
| 173 | : SyntheticSection(segment_names::text, section_names::unwindInfo) { |
| 174 | align = 4; |
Jez Ng | 3a11528 | 2021-07-02 00:33:42 | [diff] [blame] | 175 | } |
| 176 | |
Jez Ng | a9353db | 2021-10-26 20:04:06 | [diff] [blame] | 177 | // Record function symbols that may need entries emitted in __unwind_info, which |
| 178 | // stores unwind data for address ranges. |
| 179 | // |
Shoaib Meenai | 56bd318 | 2022-08-28 20:09:56 | [diff] [blame] | 180 | // Note that if several adjacent functions have the same unwind encoding and |
| 181 | // personality function and no LSDA, they share one unwind entry. For this to |
| 182 | // work, functions without unwind info need explicit "no unwind info" unwind |
| 183 | // entries -- else the unwinder would think they have the unwind info of the |
| 184 | // closest function with unwind info right before in the image. Thus, we add |
| 185 | // function symbols for each unique address regardless of whether they have |
| 186 | // associated unwind info. |
Jez Ng | a9353db | 2021-10-26 20:04:06 | [diff] [blame] | 187 | void UnwindInfoSection::addSymbol(const Defined *d) { |
Greg McGary | 9cc489a | 2021-11-15 18:46:59 | [diff] [blame] | 188 | if (d->unwindEntry) |
Jez Ng | a9353db | 2021-10-26 20:04:06 | [diff] [blame] | 189 | allEntriesAreOmitted = false; |
| 190 | // We don't yet know the final output address of this symbol, but we know that |
| 191 | // they are uniquely determined by a combination of the isec and value, so |
| 192 | // we use that as the key here. |
| 193 | auto p = symbols.insert({{d->isec, d->value}, d}); |
| 194 | // If we have multiple symbols at the same address, only one of them can have |
Jez Ng | 241f62d | 2022-07-21 13:44:01 | [diff] [blame] | 195 | // an associated unwind entry. |
Greg McGary | 9cc489a | 2021-11-15 18:46:59 | [diff] [blame] | 196 | if (!p.second && d->unwindEntry) { |
Jez Ng | 7f60ed1 | 2022-12-21 22:44:45 | [diff] [blame] | 197 | assert(p.first->second == d || !p.first->second->unwindEntry); |
Jez Ng | a9353db | 2021-10-26 20:04:06 | [diff] [blame] | 198 | p.first->second = d; |
Jez Ng | 002eda7 | 2021-10-26 20:04:04 | [diff] [blame] | 199 | } |
Jez Ng | 3a11528 | 2021-07-02 00:33:42 | [diff] [blame] | 200 | } |
| 201 | |
Jez Ng | 7b45dfc | 2022-10-12 03:50:46 | [diff] [blame] | 202 | void UnwindInfoSectionImpl::prepare() { |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 203 | // This iteration needs to be deterministic, since prepareRelocations may add |
| 204 | // entries to the GOT. Hence the use of a MapVector for |
| 205 | // UnwindInfoSection::symbols. |
| 206 | for (const Defined *d : make_second_range(symbols)) |
Jez Ng | 7b45dfc | 2022-10-12 03:50:46 | [diff] [blame] | 207 | if (d->unwindEntry) { |
| 208 | if (d->unwindEntry->getName() == section_names::compactUnwind) { |
| 209 | prepareRelocations(d->unwindEntry); |
| 210 | } else { |
| 211 | // We don't have to add entries to the GOT here because FDEs have |
| 212 | // explicit GOT relocations, so Writer::scanRelocations() will add those |
| 213 | // GOT entries. However, we still need to canonicalize the personality |
| 214 | // pointers (like prepareRelocations() does for CU entries) in order |
| 215 | // to avoid overflowing the 3-personality limit. |
| 216 | FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry]; |
| 217 | fde.personality = canonicalizePersonality(fde.personality); |
| 218 | } |
| 219 | } |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 220 | } |
| 221 | |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 222 | // Compact unwind relocations have different semantics, so we handle them in a |
| 223 | // separate code path from regular relocations. First, we do not wish to add |
| 224 | // rebase opcodes for __LD,__compact_unwind, because that section doesn't |
| 225 | // actually end up in the final binary. Second, personality pointers always |
| 226 | // reside in the GOT and must be treated specially. |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 227 | void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { |
Nico Weber | d5a70db | 2021-05-06 18:47:57 | [diff] [blame] | 228 | assert(!isec->shouldOmitFromOutput() && |
| 229 | "__compact_unwind section should not be omitted"); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 230 | |
Nico Weber | 7d4c8a2 | 2021-06-13 17:30:05 | [diff] [blame] | 231 | // FIXME: Make this skip relocations for CompactUnwindEntries that |
Nico Weber | a564551 | 2021-05-07 21:10:05 | [diff] [blame] | 232 | // point to dead-stripped functions. That might save some amount of |
| 233 | // work. But since there are usually just few personality functions |
| 234 | // that are referenced from many places, at least some of them likely |
| 235 | // live, it wouldn't reduce number of got entries. |
Greg McGary | f27e454 | 2021-05-19 16:58:17 | [diff] [blame] | 236 | for (size_t i = 0; i < isec->relocs.size(); ++i) { |
| 237 | Reloc &r = isec->relocs[i]; |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 238 | assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); |
Jez Ng | 04b1dad | 2022-12-05 21:18:15 | [diff] [blame] | 239 | // Since compact unwind sections aren't part of the inputSections vector, |
| 240 | // they don't get canonicalized by scanRelocations(), so we have to do the |
| 241 | // canonicalization here. |
| 242 | if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) |
| 243 | r.referent = referentIsec->canonical(); |
Nico Weber | 8a7b5eb | 2021-07-07 15:28:27 | [diff] [blame] | 244 | |
Greg McGary | 9cc489a | 2021-11-15 18:46:59 | [diff] [blame] | 245 | // Functions and LSDA entries always reside in the same object file as the |
| 246 | // compact unwind entries that references them, and thus appear as section |
| 247 | // relocs. There is no need to prepare them. We only prepare relocs for |
| 248 | // personality functions. |
Jez Ng | 453102a | 2023-02-16 21:18:46 | [diff] [blame] | 249 | if (r.offset != cuLayout.personalityOffset) |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 250 | continue; |
| 251 | |
Greg McGary | 427d359 | 2021-03-30 00:19:29 | [diff] [blame] | 252 | if (auto *s = r.referent.dyn_cast<Symbol *>()) { |
Greg McGary | 9cc489a | 2021-11-15 18:46:59 | [diff] [blame] | 253 | // Personality functions are nearly always system-defined (e.g., |
| 254 | // ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an |
| 255 | // application provides its own personality function, it might be |
| 256 | // referenced by an extern Defined symbol reloc, or a local section reloc. |
Vy Nguyen | b428c3e | 2021-09-15 19:49:56 | [diff] [blame] | 257 | if (auto *defined = dyn_cast<Defined>(s)) { |
Fangrui Song | 640d9b3 | 2022-11-09 01:28:04 | [diff] [blame] | 258 | // XXX(vyng) This is a special case for handling duplicate personality |
Vy Nguyen | b428c3e | 2021-09-15 19:49:56 | [diff] [blame] | 259 | // symbols. Note that LD64's behavior is a bit different and it is |
| 260 | // inconsistent with how symbol resolution usually work |
| 261 | // |
| 262 | // So we've decided not to follow it. Instead, simply pick the symbol |
| 263 | // with the same name from the symbol table to replace the local one. |
| 264 | // |
| 265 | // (See discussions/alternatives already considered on D107533) |
| 266 | if (!defined->isExternal()) |
Vy Nguyen | 944071e | 2021-11-19 15:56:58 | [diff] [blame] | 267 | if (Symbol *sym = symtab->find(defined->getName())) |
Fangrui Song | 0aae2bf | 2022-01-19 18:14:49 | [diff] [blame] | 268 | if (!sym->isLazy()) |
Vy Nguyen | 944071e | 2021-11-19 15:56:58 | [diff] [blame] | 269 | r.referent = s = sym; |
Vy Nguyen | b428c3e | 2021-09-15 19:49:56 | [diff] [blame] | 270 | } |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 271 | if (auto *undefined = dyn_cast<Undefined>(s)) { |
Daniel Bertalan | f2e92cf | 2022-06-14 13:41:28 | [diff] [blame] | 272 | treatUndefinedSymbol(*undefined, isec, r.offset); |
Nico Weber | 0658fc6 | 2021-02-28 18:42:14 | [diff] [blame] | 273 | // treatUndefinedSymbol() can replace s with a DylibSymbol; re-check. |
| 274 | if (isa<Undefined>(s)) |
| 275 | continue; |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 276 | } |
Vy Nguyen | b428c3e | 2021-09-15 19:49:56 | [diff] [blame] | 277 | |
Jez Ng | 7b45dfc | 2022-10-12 03:50:46 | [diff] [blame] | 278 | // Similar to canonicalizePersonality(), but we also register a GOT entry. |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 279 | if (auto *defined = dyn_cast<Defined>(s)) { |
| 280 | // Check if we have created a synthetic symbol at the same address. |
Greg McGary | 427d359 | 2021-03-30 00:19:29 | [diff] [blame] | 281 | Symbol *&personality = |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 282 | personalityTable[{defined->isec, defined->value}]; |
| 283 | if (personality == nullptr) { |
| 284 | personality = defined; |
| 285 | in.got->addEntry(defined); |
| 286 | } else if (personality != defined) { |
| 287 | r.referent = personality; |
| 288 | } |
| 289 | continue; |
| 290 | } |
Jez Ng | 7b45dfc | 2022-10-12 03:50:46 | [diff] [blame] | 291 | |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 292 | assert(isa<DylibSymbol>(s)); |
| 293 | in.got->addEntry(s); |
| 294 | continue; |
| 295 | } |
| 296 | |
| 297 | if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { |
Jez Ng | b8bbb97 | 2021-06-16 19:23:04 | [diff] [blame] | 298 | assert(!isCoalescedWeak(referentIsec)); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 299 | // Personality functions can be referenced via section relocations |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 300 | // if they live in the same object file. Create placeholder synthetic |
| 301 | // symbols for them in the GOT. |
Greg McGary | 427d359 | 2021-03-30 00:19:29 | [diff] [blame] | 302 | Symbol *&s = personalityTable[{referentIsec, r.addend}]; |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 303 | if (s == nullptr) { |
Nico Weber | a564551 | 2021-05-07 21:10:05 | [diff] [blame] | 304 | // This runs after dead stripping, so the noDeadStrip argument does not |
| 305 | // matter. |
Nico Weber | c1b2a7b | 2021-04-22 14:44:56 | [diff] [blame] | 306 | s = make<Defined>("<internal>", /*file=*/nullptr, referentIsec, |
| 307 | r.addend, /*size=*/0, /*isWeakDef=*/false, |
Jez Ng | 05c5363 | 2021-04-30 20:17:26 | [diff] [blame] | 308 | /*isExternal=*/false, /*isPrivateExtern=*/false, |
Jez Ng | 1cff723 | 2022-04-11 19:45:25 | [diff] [blame] | 309 | /*includeInSymtab=*/true, |
Vincent Lee | ed59b8a | 2023-05-15 09:00:29 | [diff] [blame] | 310 | /*isReferencedDynamically=*/false, |
Nico Weber | a564551 | 2021-05-07 21:10:05 | [diff] [blame] | 311 | /*noDeadStrip=*/false); |
Alex Brachet | 190b0f4 | 2022-05-20 21:39:16 | [diff] [blame] | 312 | s->used = true; |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 313 | in.got->addEntry(s); |
| 314 | } |
| 315 | r.referent = s; |
| 316 | r.addend = 0; |
| 317 | } |
| 318 | } |
| 319 | } |
| 320 | |
Jez Ng | 7b45dfc | 2022-10-12 03:50:46 | [diff] [blame] | 321 | Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) { |
| 322 | if (auto *defined = dyn_cast_or_null<Defined>(personality)) { |
| 323 | // Check if we have created a synthetic symbol at the same address. |
| 324 | Symbol *&synth = personalityTable[{defined->isec, defined->value}]; |
| 325 | if (synth == nullptr) |
| 326 | synth = defined; |
| 327 | else if (synth != defined) |
| 328 | return synth; |
| 329 | } |
| 330 | return personality; |
| 331 | } |
| 332 | |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 333 | // We need to apply the relocations to the pre-link compact unwind section |
| 334 | // before converting it to post-link form. There should only be absolute |
| 335 | // relocations here: since we are not emitting the pre-link CU section, there |
| 336 | // is no source address to make a relative location meaningful. |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 337 | void UnwindInfoSectionImpl::relocateCompactUnwind( |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 338 | std::vector<CompactUnwindEntry> &cuEntries) { |
Nico Weber | 7effcbd | 2022-06-19 16:30:06 | [diff] [blame] | 339 | parallelFor(0, symbolsVec.size(), [&](size_t i) { |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 340 | CompactUnwindEntry &cu = cuEntries[i]; |
Jez Ng | a9353db | 2021-10-26 20:04:06 | [diff] [blame] | 341 | const Defined *d = symbolsVec[i].second; |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 342 | cu.functionAddress = d->getVA(); |
Greg McGary | 9cc489a | 2021-11-15 18:46:59 | [diff] [blame] | 343 | if (!d->unwindEntry) |
Jez Ng | a9353db | 2021-10-26 20:04:06 | [diff] [blame] | 344 | return; |
Nico Weber | d5a70db | 2021-05-06 18:47:57 | [diff] [blame] | 345 | |
Vy Nguyen | e60b30d | 2023-06-06 18:00:47 | [diff] [blame] | 346 | // If we have DWARF unwind info, create a slimmed-down CU entry that points |
| 347 | // to it. |
Jez Ng | e183bf8 | 2022-06-13 01:56:45 | [diff] [blame] | 348 | if (d->unwindEntry->getName() == section_names::ehFrame) { |
Jez Ng | f7bc79c | 2023-04-04 13:26:03 | [diff] [blame] | 349 | // The unwinder will look for the DWARF entry starting at the hint, |
| 350 | // assuming the hint points to a valid CFI record start. If it |
| 351 | // fails to find the record, it proceeds in a linear search through the |
| 352 | // contiguous CFI records from the hint until the end of the section. |
| 353 | // Ideally, in the case where the offset is too large to be encoded, we |
| 354 | // would instead encode the largest possible offset to a valid CFI record, |
| 355 | // but since we don't keep track of that, just encode zero -- the start of |
| 356 | // the section is always the start of a CFI record. |
| 357 | uint64_t dwarfOffsetHint = |
| 358 | d->unwindEntry->outSecOff <= DWARF_SECTION_OFFSET |
| 359 | ? d->unwindEntry->outSecOff |
| 360 | : 0; |
| 361 | cu.encoding = target->modeDwarfEncoding | dwarfOffsetHint; |
Jez Ng | e183bf8 | 2022-06-13 01:56:45 | [diff] [blame] | 362 | const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry]; |
| 363 | cu.functionLength = fde.funcLength; |
Vy Nguyen | e60b30d | 2023-06-06 18:00:47 | [diff] [blame] | 364 | // Omit the DWARF personality from compact-unwind entry so that we |
| 365 | // don't need to encode it. |
| 366 | cu.personality = nullptr; |
Jez Ng | e183bf8 | 2022-06-13 01:56:45 | [diff] [blame] | 367 | cu.lsda = fde.lsda; |
| 368 | return; |
| 369 | } |
| 370 | |
| 371 | assert(d->unwindEntry->getName() == section_names::compactUnwind); |
| 372 | |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 373 | auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) - |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 374 | target->wordSize; |
| 375 | cu.functionLength = |
Jez Ng | 453102a | 2023-02-16 21:18:46 | [diff] [blame] | 376 | support::endian::read32le(buf + cuLayout.functionLengthOffset); |
| 377 | cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset); |
Greg McGary | 9cc489a | 2021-11-15 18:46:59 | [diff] [blame] | 378 | for (const Reloc &r : d->unwindEntry->relocs) { |
Jez Ng | 453102a | 2023-02-16 21:18:46 | [diff] [blame] | 379 | if (r.offset == cuLayout.personalityOffset) |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 380 | cu.personality = r.referent.get<Symbol *>(); |
Jez Ng | 453102a | 2023-02-16 21:18:46 | [diff] [blame] | 381 | else if (r.offset == cuLayout.lsdaOffset) |
| 382 | cu.lsda = r.getReferentInputSection(); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 383 | } |
Jez Ng | a9353db | 2021-10-26 20:04:06 | [diff] [blame] | 384 | }); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 385 | } |
| 386 | |
| 387 | // There should only be a handful of unique personality pointers, so we can |
| 388 | // encode them as 2-bit indices into a small array. |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 389 | void UnwindInfoSectionImpl::encodePersonalities() { |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 390 | for (size_t idx : cuIndices) { |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 391 | CompactUnwindEntry &cu = cuEntries[idx]; |
| 392 | if (cu.personality == nullptr) |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 393 | continue; |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 394 | // Linear search is fast enough for a small array. |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 395 | auto it = find(personalities, cu.personality); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 396 | uint32_t personalityIndex; // 1-based index |
| 397 | if (it != personalities.end()) { |
| 398 | personalityIndex = std::distance(personalities.begin(), it) + 1; |
| 399 | } else { |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 400 | personalities.push_back(cu.personality); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 401 | personalityIndex = personalities.size(); |
| 402 | } |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 403 | cu.encoding |= |
Kazu Hirata | 55e2cd1 | 2023-01-28 20:41:19 | [diff] [blame] | 404 | personalityIndex << llvm::countr_zero( |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 405 | static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK)); |
| 406 | } |
| 407 | if (personalities.size() > 3) |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 408 | error("too many personalities (" + Twine(personalities.size()) + |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 409 | ") for compact unwind to encode"); |
| 410 | } |
| 411 | |
Nico Weber | 0f24ffc | 2021-06-26 02:50:46 | [diff] [blame] | 412 | static bool canFoldEncoding(compact_unwind_encoding_t encoding) { |
| 413 | // From compact_unwind_encoding.h: |
| 414 | // UNWIND_X86_64_MODE_STACK_IND: |
| 415 | // A "frameless" (RBP not used as frame pointer) function large constant |
| 416 | // stack size. This case is like the previous, except the stack size is too |
| 417 | // large to encode in the compact unwind encoding. Instead it requires that |
| 418 | // the function contains "subq $nnnnnnnn,RSP" in its prolog. The compact |
| 419 | // encoding contains the offset to the nnnnnnnn value in the function in |
| 420 | // UNWIND_X86_64_FRAMELESS_STACK_SIZE. |
| 421 | // Since this means the unwinder has to look at the `subq` in the function |
| 422 | // of the unwind info's unwind address, two functions that have identical |
| 423 | // unwind info can't be folded if it's using this encoding since both |
| 424 | // entries need unique addresses. |
Martin Storsjö | 59c6f41 | 2022-08-02 07:29:01 | [diff] [blame] | 425 | static_assert(static_cast<uint32_t>(UNWIND_X86_64_MODE_STACK_IND) == |
Kazu Hirata | 32aa35b | 2022-09-03 18:17:47 | [diff] [blame] | 426 | static_cast<uint32_t>(UNWIND_X86_MODE_STACK_IND)); |
Nico Weber | 0f24ffc | 2021-06-26 02:50:46 | [diff] [blame] | 427 | if ((target->cpuType == CPU_TYPE_X86_64 || target->cpuType == CPU_TYPE_X86) && |
Vy Nguyen | a6d6734 | 2022-10-06 13:08:00 | [diff] [blame] | 428 | (encoding & UNWIND_MODE_MASK) == UNWIND_X86_64_MODE_STACK_IND) { |
Nico Weber | 0f24ffc | 2021-06-26 02:50:46 | [diff] [blame] | 429 | // FIXME: Consider passing in the two function addresses and getting |
| 430 | // their two stack sizes off the `subq` and only returning false if they're |
| 431 | // actually different. |
| 432 | return false; |
| 433 | } |
| 434 | return true; |
| 435 | } |
| 436 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 437 | // Scan the __LD,__compact_unwind entries and compute the space needs of |
Jez Ng | 3e95180 | 2022-02-01 18:45:38 | [diff] [blame] | 438 | // __TEXT,__unwind_info and __TEXT,__eh_frame. |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 439 | void UnwindInfoSectionImpl::finalize() { |
Jez Ng | a9353db | 2021-10-26 20:04:06 | [diff] [blame] | 440 | if (symbols.empty()) |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 441 | return; |
| 442 | |
| 443 | // At this point, the address space for __TEXT,__text has been |
| 444 | // assigned, so we can relocate the __LD,__compact_unwind entries |
| 445 | // into a temporary buffer. Relocation is necessary in order to sort |
| 446 | // the CU entries by function address. Sorting is necessary so that |
Shoaib Meenai | 56bd318 | 2022-08-28 20:09:56 | [diff] [blame] | 447 | // we can fold adjacent CU entries with identical encoding+personality |
| 448 | // and without any LSDA. Folding is necessary because it reduces the |
| 449 | // number of CU entries by as much as 3 orders of magnitude! |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 450 | cuEntries.resize(symbols.size()); |
| 451 | // The "map" part of the symbols MapVector was only needed for deduplication |
| 452 | // in addSymbol(). Now that we are done adding, move the contents to a plain |
| 453 | // std::vector for indexed access. |
| 454 | symbolsVec = symbols.takeVector(); |
| 455 | relocateCompactUnwind(cuEntries); |
Nico Weber | d6565a2 | 2021-06-22 02:29:11 | [diff] [blame] | 456 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 457 | // Rather than sort & fold the 32-byte entries directly, we create a |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 458 | // vector of indices to entries and sort & fold that instead. |
| 459 | cuIndices.resize(cuEntries.size()); |
| 460 | std::iota(cuIndices.begin(), cuIndices.end(), 0); |
| 461 | llvm::sort(cuIndices, [&](size_t a, size_t b) { |
| 462 | return cuEntries[a].functionAddress < cuEntries[b].functionAddress; |
Jez Ng | 7ca133c | 2021-04-26 05:23:32 | [diff] [blame] | 463 | }); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 464 | |
Vy Nguyen | 65226d3 | 2022-11-18 20:21:23 | [diff] [blame] | 465 | // Record the ending boundary before we fold the entries. |
| 466 | cueEndBoundary = cuEntries[cuIndices.back()].functionAddress + |
| 467 | cuEntries[cuIndices.back()].functionLength; |
| 468 | |
Shoaib Meenai | 56bd318 | 2022-08-28 20:09:56 | [diff] [blame] | 469 | // Fold adjacent entries with matching encoding+personality and without LSDA |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 470 | // We use three iterators on the same cuIndices to fold in-situ: |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 471 | // (1) `foldBegin` is the first of a potential sequence of matching entries |
| 472 | // (2) `foldEnd` is the first non-matching entry after `foldBegin`. |
| 473 | // The semi-open interval [ foldBegin .. foldEnd ) contains a range |
| 474 | // entries that can be folded into a single entry and written to ... |
| 475 | // (3) `foldWrite` |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 476 | auto foldWrite = cuIndices.begin(); |
| 477 | for (auto foldBegin = cuIndices.begin(); foldBegin < cuIndices.end();) { |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 478 | auto foldEnd = foldBegin; |
Shoaib Meenai | 56bd318 | 2022-08-28 20:09:56 | [diff] [blame] | 479 | // Common LSDA encodings (e.g. for C++ and Objective-C) contain offsets from |
| 480 | // a base address. The base address is normally not contained directly in |
| 481 | // the LSDA, and in that case, the personality function treats the starting |
| 482 | // address of the function (which is computed by the unwinder) as the base |
| 483 | // address and interprets the LSDA accordingly. The unwinder computes the |
| 484 | // starting address of a function as the address associated with its CU |
| 485 | // entry. For this reason, we cannot fold adjacent entries if they have an |
| 486 | // LSDA, because folding would make the unwinder compute the wrong starting |
| 487 | // address for the functions with the folded entries, which in turn would |
| 488 | // cause the personality function to misinterpret the LSDA for those |
| 489 | // functions. In the very rare case where the base address is encoded |
| 490 | // directly in the LSDA, two functions at different addresses would |
| 491 | // necessarily have different LSDAs, so their CU entries would not have been |
| 492 | // folded anyway. |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 493 | while (++foldEnd < cuIndices.end() && |
| 494 | cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding && |
Shoaib Meenai | 56bd318 | 2022-08-28 20:09:56 | [diff] [blame] | 495 | !cuEntries[*foldBegin].lsda && !cuEntries[*foldEnd].lsda && |
| 496 | // If we've gotten to this point, we don't have an LSDA, which should |
| 497 | // also imply that we don't have a personality function, since in all |
| 498 | // likelihood a personality function needs the LSDA to do anything |
| 499 | // useful. It can be technically valid to have a personality function |
| 500 | // and no LSDA though (e.g. the C++ personality __gxx_personality_v0 |
| 501 | // is just a no-op without LSDA), so we still check for personality |
| 502 | // function equivalence to handle that case. |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 503 | cuEntries[*foldBegin].personality == |
| 504 | cuEntries[*foldEnd].personality && |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 505 | canFoldEncoding(cuEntries[*foldEnd].encoding)) |
| 506 | ; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 507 | *foldWrite++ = *foldBegin; |
| 508 | foldBegin = foldEnd; |
| 509 | } |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 510 | cuIndices.erase(foldWrite, cuIndices.end()); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 511 | |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 512 | encodePersonalities(); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 513 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 514 | // Count frequencies of the folded encodings |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 515 | EncodingMap encodingFrequencies; |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 516 | for (size_t idx : cuIndices) |
| 517 | encodingFrequencies[cuEntries[idx].encoding]++; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 518 | |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 519 | // Make a vector of encodings, sorted by descending frequency |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 520 | for (const auto &frequency : encodingFrequencies) |
| 521 | commonEncodings.emplace_back(frequency); |
Jez Ng | 7ca133c | 2021-04-26 05:23:32 | [diff] [blame] | 522 | llvm::sort(commonEncodings, |
| 523 | [](const std::pair<compact_unwind_encoding_t, size_t> &a, |
| 524 | const std::pair<compact_unwind_encoding_t, size_t> &b) { |
| 525 | if (a.second == b.second) |
| 526 | // When frequencies match, secondarily sort on encoding |
| 527 | // to maintain parity with validate-unwind-info.py |
| 528 | return a.first > b.first; |
| 529 | return a.second > b.second; |
| 530 | }); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 531 | |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 532 | // Truncate the vector to 127 elements. |
Nico Weber | 5688247 | 2021-01-02 03:28:11 | [diff] [blame] | 533 | // Common encoding indexes are limited to 0..126, while encoding |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 534 | // indexes 127..255 are local to each second-level page |
| 535 | if (commonEncodings.size() > COMMON_ENCODINGS_MAX) |
| 536 | commonEncodings.resize(COMMON_ENCODINGS_MAX); |
| 537 | |
| 538 | // Create a map from encoding to common-encoding-table index |
| 539 | for (size_t i = 0; i < commonEncodings.size(); i++) |
| 540 | commonEncodingIndexes[commonEncodings[i].first] = i; |
| 541 | |
| 542 | // Split folded encodings into pages, where each page is limited by ... |
| 543 | // (a) 4 KiB capacity |
| 544 | // (b) 24-bit difference between first & final function address |
| 545 | // (c) 8-bit compact-encoding-table index, |
| 546 | // for which 0..126 references the global common-encodings table, |
| 547 | // and 127..255 references a local per-second-level-page table. |
| 548 | // First we try the compact format and determine how many entries fit. |
| 549 | // If more entries fit in the regular format, we use that. |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 550 | for (size_t i = 0; i < cuIndices.size();) { |
| 551 | size_t idx = cuIndices[i]; |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 552 | secondLevelPages.emplace_back(); |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 553 | SecondLevelPage &page = secondLevelPages.back(); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 554 | page.entryIndex = i; |
David Spickett | 79942d3 | 2022-07-08 11:32:44 | [diff] [blame] | 555 | uint64_t functionAddressMax = |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 556 | cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK; |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 557 | size_t n = commonEncodings.size(); |
| 558 | size_t wordsRemaining = |
| 559 | SECOND_LEVEL_PAGE_WORDS - |
| 560 | sizeof(unwind_info_compressed_second_level_page_header) / |
| 561 | sizeof(uint32_t); |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 562 | while (wordsRemaining >= 1 && i < cuIndices.size()) { |
| 563 | idx = cuIndices[i]; |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 564 | const CompactUnwindEntry *cuPtr = &cuEntries[idx]; |
Jez Ng | c4d9df9 | 2023-04-05 05:48:34 | [diff] [blame] | 565 | if (cuPtr->functionAddress >= functionAddressMax) |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 566 | break; |
Jez Ng | c4d9df9 | 2023-04-05 05:48:34 | [diff] [blame] | 567 | if (commonEncodingIndexes.count(cuPtr->encoding) || |
| 568 | page.localEncodingIndexes.count(cuPtr->encoding)) { |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 569 | i++; |
| 570 | wordsRemaining--; |
| 571 | } else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) { |
| 572 | page.localEncodings.emplace_back(cuPtr->encoding); |
| 573 | page.localEncodingIndexes[cuPtr->encoding] = n++; |
| 574 | i++; |
| 575 | wordsRemaining -= 2; |
| 576 | } else { |
| 577 | break; |
| 578 | } |
| 579 | } |
| 580 | page.entryCount = i - page.entryIndex; |
| 581 | |
Fangrui Song | 640d9b3 | 2022-11-09 01:28:04 | [diff] [blame] | 582 | // If this is not the final page, see if it's possible to fit more entries |
| 583 | // by using the regular format. This can happen when there are many unique |
| 584 | // encodings, and we saturated the local encoding table early. |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 585 | if (i < cuIndices.size() && |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 586 | page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) { |
| 587 | page.kind = UNWIND_SECOND_LEVEL_REGULAR; |
| 588 | page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX, |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 589 | cuIndices.size() - page.entryIndex); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 590 | i = page.entryIndex + page.entryCount; |
| 591 | } else { |
| 592 | page.kind = UNWIND_SECOND_LEVEL_COMPRESSED; |
| 593 | } |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 594 | } |
| 595 | |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 596 | for (size_t idx : cuIndices) { |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 597 | lsdaIndex[idx] = entriesWithLsda.size(); |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 598 | if (cuEntries[idx].lsda) |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 599 | entriesWithLsda.push_back(idx); |
Jez Ng | 5112035 | 2021-02-08 18:47:34 | [diff] [blame] | 600 | } |
| 601 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 602 | // compute size of __TEXT,__unwind_info section |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 603 | level2PagesOffset = sizeof(unwind_info_section_header) + |
| 604 | commonEncodings.size() * sizeof(uint32_t) + |
| 605 | personalities.size() * sizeof(uint32_t) + |
| 606 | // The extra second-level-page entry is for the sentinel |
| 607 | (secondLevelPages.size() + 1) * |
| 608 | sizeof(unwind_info_section_header_index_entry) + |
| 609 | entriesWithLsda.size() * |
| 610 | sizeof(unwind_info_section_header_lsda_index_entry); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 611 | unwindInfoSize = |
| 612 | level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 613 | } |
| 614 | |
Nico Weber | 126f58e | 2020-12-02 01:27:33 | [diff] [blame] | 615 | // All inputs are relocated and output addresses are known, so write! |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 616 | |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 617 | void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 618 | assert(!cuIndices.empty() && "call only if there is unwind info"); |
Nico Weber | 8a7b5eb | 2021-07-07 15:28:27 | [diff] [blame] | 619 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 620 | // section header |
| 621 | auto *uip = reinterpret_cast<unwind_info_section_header *>(buf); |
| 622 | uip->version = 1; |
| 623 | uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header); |
| 624 | uip->commonEncodingsArrayCount = commonEncodings.size(); |
| 625 | uip->personalityArraySectionOffset = |
| 626 | uip->commonEncodingsArraySectionOffset + |
| 627 | (uip->commonEncodingsArrayCount * sizeof(uint32_t)); |
| 628 | uip->personalityArrayCount = personalities.size(); |
| 629 | uip->indexSectionOffset = uip->personalityArraySectionOffset + |
| 630 | (uip->personalityArrayCount * sizeof(uint32_t)); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 631 | uip->indexCount = secondLevelPages.size() + 1; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 632 | |
| 633 | // Common encodings |
| 634 | auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]); |
| 635 | for (const auto &encoding : commonEncodings) |
| 636 | *i32p++ = encoding.first; |
| 637 | |
| 638 | // Personalities |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 639 | for (const Symbol *personality : personalities) |
| 640 | *i32p++ = personality->getGotVA() - in.header->addr; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 641 | |
Vy Nguyen | 65226d3 | 2022-11-18 20:21:23 | [diff] [blame] | 642 | // FIXME: LD64 checks and warns aboutgaps or overlapse in cuEntries address |
| 643 | // ranges. We should do the same too |
| 644 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 645 | // Level-1 index |
| 646 | uint32_t lsdaOffset = |
| 647 | uip->indexSectionOffset + |
| 648 | uip->indexCount * sizeof(unwind_info_section_header_index_entry); |
| 649 | uint64_t l2PagesOffset = level2PagesOffset; |
| 650 | auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 651 | for (const SecondLevelPage &page : secondLevelPages) { |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 652 | size_t idx = cuIndices[page.entryIndex]; |
| 653 | iep->functionOffset = cuEntries[idx].functionAddress - in.header->addr; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 654 | iep->secondLevelPagesSectionOffset = l2PagesOffset; |
Jez Ng | 5112035 | 2021-02-08 18:47:34 | [diff] [blame] | 655 | iep->lsdaIndexArraySectionOffset = |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 656 | lsdaOffset + lsdaIndex.lookup(idx) * |
Jez Ng | 5112035 | 2021-02-08 18:47:34 | [diff] [blame] | 657 | sizeof(unwind_info_section_header_lsda_index_entry); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 658 | iep++; |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 659 | l2PagesOffset += SECOND_LEVEL_PAGE_BYTES; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 660 | } |
| 661 | // Level-1 sentinel |
Vy Nguyen | 65226d3 | 2022-11-18 20:21:23 | [diff] [blame] | 662 | // XXX(vyng): Note that LD64 adds +1 here. |
| 663 | // Unsure whether it's a bug or it's their workaround for something else. |
| 664 | // See comments from https://reviews.llvm.org/D138320. |
| 665 | iep->functionOffset = cueEndBoundary - in.header->addr; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 666 | iep->secondLevelPagesSectionOffset = 0; |
Jez Ng | 5112035 | 2021-02-08 18:47:34 | [diff] [blame] | 667 | iep->lsdaIndexArraySectionOffset = |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 668 | lsdaOffset + entriesWithLsda.size() * |
| 669 | sizeof(unwind_info_section_header_lsda_index_entry); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 670 | iep++; |
| 671 | |
| 672 | // LSDAs |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 673 | auto *lep = |
| 674 | reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep); |
| 675 | for (size_t idx : entriesWithLsda) { |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 676 | const CompactUnwindEntry &cu = cuEntries[idx]; |
| 677 | lep->lsdaOffset = cu.lsda->getVA(/*off=*/0) - in.header->addr; |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 678 | lep->functionOffset = cu.functionAddress - in.header->addr; |
| 679 | lep++; |
| 680 | } |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 681 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 682 | // Level-2 pages |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 683 | auto *pp = reinterpret_cast<uint32_t *>(lep); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 684 | for (const SecondLevelPage &page : secondLevelPages) { |
| 685 | if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) { |
| 686 | uintptr_t functionAddressBase = |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 687 | cuEntries[cuIndices[page.entryIndex]].functionAddress; |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 688 | auto *p2p = |
| 689 | reinterpret_cast<unwind_info_compressed_second_level_page_header *>( |
| 690 | pp); |
| 691 | p2p->kind = page.kind; |
| 692 | p2p->entryPageOffset = |
| 693 | sizeof(unwind_info_compressed_second_level_page_header); |
| 694 | p2p->entryCount = page.entryCount; |
| 695 | p2p->encodingsPageOffset = |
| 696 | p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t); |
| 697 | p2p->encodingsCount = page.localEncodings.size(); |
| 698 | auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); |
| 699 | for (size_t i = 0; i < page.entryCount; i++) { |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 700 | const CompactUnwindEntry &cue = |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 701 | cuEntries[cuIndices[page.entryIndex + i]]; |
| 702 | auto it = commonEncodingIndexes.find(cue.encoding); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 703 | if (it == commonEncodingIndexes.end()) |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 704 | it = page.localEncodingIndexes.find(cue.encoding); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 705 | *ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) | |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 706 | (cue.functionAddress - functionAddressBase); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 707 | } |
Vy Nguyen | 3f35dd0 | 2021-10-26 19:14:25 | [diff] [blame] | 708 | if (!page.localEncodings.empty()) |
Fangrui Song | 791fe7a | 2020-12-21 04:01:20 | [diff] [blame] | 709 | memcpy(ep, page.localEncodings.data(), |
| 710 | page.localEncodings.size() * sizeof(uint32_t)); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 711 | } else { |
| 712 | auto *p2p = |
| 713 | reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp); |
| 714 | p2p->kind = page.kind; |
| 715 | p2p->entryPageOffset = |
| 716 | sizeof(unwind_info_regular_second_level_page_header); |
| 717 | p2p->entryCount = page.entryCount; |
| 718 | auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); |
| 719 | for (size_t i = 0; i < page.entryCount; i++) { |
Jez Ng | 82dcf30 | 2022-04-09 02:33:00 | [diff] [blame] | 720 | const CompactUnwindEntry &cue = |
Jez Ng | a2404f1 | 2021-11-11 00:31:54 | [diff] [blame] | 721 | cuEntries[cuIndices[page.entryIndex + i]]; |
| 722 | *ep++ = cue.functionAddress; |
| 723 | *ep++ = cue.encoding; |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 724 | } |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 725 | } |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 726 | pp += SECOND_LEVEL_PAGE_WORDS; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 727 | } |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 728 | } |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 729 | |
| 730 | UnwindInfoSection *macho::makeUnwindInfoSection() { |
Jez Ng | 2a66690 | 2022-04-13 20:17:29 | [diff] [blame] | 731 | return make<UnwindInfoSectionImpl>(); |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 732 | } |