Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 1 | //===- UnwindInfoSection.cpp ----------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "UnwindInfoSection.h" |
Jez Ng | 3370619 | 2021-05-25 18:57:16 | [diff] [blame] | 10 | #include "ConcatOutputSection.h" |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 11 | #include "Config.h" |
| 12 | #include "InputSection.h" |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 13 | #include "OutputSection.h" |
| 14 | #include "OutputSegment.h" |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 15 | #include "SymbolTable.h" |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 16 | #include "Symbols.h" |
| 17 | #include "SyntheticSections.h" |
| 18 | #include "Target.h" |
| 19 | |
| 20 | #include "lld/Common/ErrorHandler.h" |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 21 | #include "lld/Common/Memory.h" |
Jez Ng | 7ca133c | 2021-04-26 05:23:32 | [diff] [blame] | 22 | #include "llvm/ADT/STLExtras.h" |
Jez Ng | 9cc0d89 | 2021-05-25 18:58:06 | [diff] [blame] | 23 | #include "llvm/ADT/SmallVector.h" |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 24 | #include "llvm/BinaryFormat/MachO.h" |
| 25 | |
| 26 | using namespace llvm; |
| 27 | using namespace llvm::MachO; |
| 28 | using namespace lld; |
| 29 | using namespace lld::macho; |
| 30 | |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 31 | #define COMMON_ENCODINGS_MAX 127 |
| 32 | #define COMPACT_ENCODINGS_MAX 256 |
| 33 | |
| 34 | #define SECOND_LEVEL_PAGE_BYTES 4096 |
| 35 | #define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t)) |
| 36 | #define REGULAR_SECOND_LEVEL_ENTRIES_MAX \ |
| 37 | ((SECOND_LEVEL_PAGE_BYTES - \ |
| 38 | sizeof(unwind_info_regular_second_level_page_header)) / \ |
| 39 | sizeof(unwind_info_regular_second_level_entry)) |
| 40 | #define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \ |
| 41 | ((SECOND_LEVEL_PAGE_BYTES - \ |
| 42 | sizeof(unwind_info_compressed_second_level_page_header)) / \ |
| 43 | sizeof(uint32_t)) |
| 44 | |
| 45 | #define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24 |
| 46 | #define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \ |
| 47 | UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0) |
| 48 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 49 | // Compact Unwind format is a Mach-O evolution of DWARF Unwind that |
| 50 | // optimizes space and exception-time lookup. Most DWARF unwind |
| 51 | // entries can be replaced with Compact Unwind entries, but the ones |
| 52 | // that cannot are retained in DWARF form. |
| 53 | // |
| 54 | // This comment will address macro-level organization of the pre-link |
| 55 | // and post-link compact unwind tables. For micro-level organization |
| 56 | // pertaining to the bitfield layout of the 32-bit compact unwind |
| 57 | // entries, see libunwind/include/mach-o/compact_unwind_encoding.h |
| 58 | // |
| 59 | // Important clarifying factoids: |
| 60 | // |
| 61 | // * __LD,__compact_unwind is the compact unwind format for compiler |
| 62 | // output and linker input. It is never a final output. It could be |
| 63 | // an intermediate output with the `-r` option which retains relocs. |
| 64 | // |
| 65 | // * __TEXT,__unwind_info is the compact unwind format for final |
| 66 | // linker output. It is never an input. |
| 67 | // |
| 68 | // * __TEXT,__eh_frame is the DWARF format for both linker input and output. |
| 69 | // |
| 70 | // * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd |
| 71 | // level) by ascending address, and the pages are referenced by an |
| 72 | // index (1st level) in the section header. |
| 73 | // |
| 74 | // * Following the headers in __TEXT,__unwind_info, the bulk of the |
| 75 | // section contains a vector of compact unwind entries |
| 76 | // `{functionOffset, encoding}` sorted by ascending `functionOffset`. |
| 77 | // Adjacent entries with the same encoding can be folded to great |
| 78 | // advantage, achieving a 3-order-of-magnitude reduction in the |
| 79 | // number of entries. |
| 80 | // |
| 81 | // * The __TEXT,__unwind_info format can accommodate up to 127 unique |
| 82 | // encodings for the space-efficient compressed format. In practice, |
| 83 | // fewer than a dozen unique encodings are used by C++ programs of |
| 84 | // all sizes. Therefore, we don't even bother implementing the regular |
| 85 | // non-compressed format. Time will tell if anyone in the field ever |
| 86 | // overflows the 127-encodings limit. |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 87 | // |
| 88 | // Refer to the definition of unwind_info_section_header in |
| 89 | // compact_unwind_encoding.h for an overview of the format we are encoding |
| 90 | // here. |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 91 | |
Nico Weber | a564551 | 2021-05-07 21:10:05 | [diff] [blame] | 92 | // TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410 |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 93 | // TODO(gkm): how do we align the 2nd-level pages? |
| 94 | |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 95 | using EncodingMap = llvm::DenseMap<compact_unwind_encoding_t, size_t>; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 96 | |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 97 | struct SecondLevelPage { |
| 98 | uint32_t kind; |
| 99 | size_t entryIndex; |
| 100 | size_t entryCount; |
| 101 | size_t byteCount; |
| 102 | std::vector<compact_unwind_encoding_t> localEncodings; |
| 103 | EncodingMap localEncodingIndexes; |
| 104 | }; |
| 105 | |
| 106 | template <class Ptr> class UnwindInfoSectionImpl : public UnwindInfoSection { |
| 107 | public: |
Jez Ng | 7f2ba39 | 2021-06-11 23:49:52 | [diff] [blame] | 108 | void prepareRelocations(ConcatInputSection *) override; |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 109 | void finalize() override; |
| 110 | void writeTo(uint8_t *buf) const override; |
| 111 | |
| 112 | private: |
| 113 | std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings; |
| 114 | EncodingMap commonEncodingIndexes; |
| 115 | // Indices of personality functions within the GOT. |
| 116 | std::vector<uint32_t> personalities; |
| 117 | SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *> |
| 118 | personalityTable; |
| 119 | std::vector<unwind_info_section_header_lsda_index_entry> lsdaEntries; |
| 120 | // Map of function offset (from the image base) to an index within the LSDA |
| 121 | // array. |
| 122 | llvm::DenseMap<uint32_t, uint32_t> functionToLsdaIndex; |
| 123 | std::vector<CompactUnwindEntry<Ptr>> cuVector; |
| 124 | std::vector<CompactUnwindEntry<Ptr> *> cuPtrVector; |
| 125 | std::vector<SecondLevelPage> secondLevelPages; |
| 126 | uint64_t level2PagesOffset = 0; |
| 127 | }; |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 128 | |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 129 | // Compact unwind relocations have different semantics, so we handle them in a |
| 130 | // separate code path from regular relocations. First, we do not wish to add |
| 131 | // rebase opcodes for __LD,__compact_unwind, because that section doesn't |
| 132 | // actually end up in the final binary. Second, personality pointers always |
| 133 | // reside in the GOT and must be treated specially. |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 134 | template <class Ptr> |
Jez Ng | 7f2ba39 | 2021-06-11 23:49:52 | [diff] [blame] | 135 | void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) { |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 136 | assert(isec->segname == segment_names::ld && |
| 137 | isec->name == section_names::compactUnwind); |
Nico Weber | d5a70db | 2021-05-06 18:47:57 | [diff] [blame] | 138 | assert(!isec->shouldOmitFromOutput() && |
| 139 | "__compact_unwind section should not be omitted"); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 140 | |
Nico Weber | 7d4c8a2 | 2021-06-13 17:30:05 | [diff] [blame^] | 141 | // FIXME: Make this skip relocations for CompactUnwindEntries that |
Nico Weber | a564551 | 2021-05-07 21:10:05 | [diff] [blame] | 142 | // point to dead-stripped functions. That might save some amount of |
| 143 | // work. But since there are usually just few personality functions |
| 144 | // that are referenced from many places, at least some of them likely |
| 145 | // live, it wouldn't reduce number of got entries. |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 146 | for (Reloc &r : isec->relocs) { |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 147 | assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 148 | if (r.offset % sizeof(CompactUnwindEntry<Ptr>) != |
| 149 | offsetof(CompactUnwindEntry<Ptr>, personality)) |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 150 | continue; |
| 151 | |
Greg McGary | 427d359 | 2021-03-30 00:19:29 | [diff] [blame] | 152 | if (auto *s = r.referent.dyn_cast<Symbol *>()) { |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 153 | if (auto *undefined = dyn_cast<Undefined>(s)) { |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 154 | treatUndefinedSymbol(*undefined); |
Nico Weber | 0658fc6 | 2021-02-28 18:42:14 | [diff] [blame] | 155 | // treatUndefinedSymbol() can replace s with a DylibSymbol; re-check. |
| 156 | if (isa<Undefined>(s)) |
| 157 | continue; |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 158 | } |
| 159 | if (auto *defined = dyn_cast<Defined>(s)) { |
| 160 | // Check if we have created a synthetic symbol at the same address. |
Greg McGary | 427d359 | 2021-03-30 00:19:29 | [diff] [blame] | 161 | Symbol *&personality = |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 162 | personalityTable[{defined->isec, defined->value}]; |
| 163 | if (personality == nullptr) { |
| 164 | personality = defined; |
| 165 | in.got->addEntry(defined); |
| 166 | } else if (personality != defined) { |
| 167 | r.referent = personality; |
| 168 | } |
| 169 | continue; |
| 170 | } |
| 171 | assert(isa<DylibSymbol>(s)); |
| 172 | in.got->addEntry(s); |
| 173 | continue; |
| 174 | } |
| 175 | |
| 176 | if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 177 | // Personality functions can be referenced via section relocations |
Jez Ng | 4a5e111 | 2021-02-24 02:42:02 | [diff] [blame] | 178 | // if they live in the same object file. Create placeholder synthetic |
| 179 | // symbols for them in the GOT. |
Greg McGary | 427d359 | 2021-03-30 00:19:29 | [diff] [blame] | 180 | Symbol *&s = personalityTable[{referentIsec, r.addend}]; |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 181 | if (s == nullptr) { |
Nico Weber | a564551 | 2021-05-07 21:10:05 | [diff] [blame] | 182 | // This runs after dead stripping, so the noDeadStrip argument does not |
| 183 | // matter. |
Nico Weber | c1b2a7b | 2021-04-22 14:44:56 | [diff] [blame] | 184 | s = make<Defined>("<internal>", /*file=*/nullptr, referentIsec, |
| 185 | r.addend, /*size=*/0, /*isWeakDef=*/false, |
Jez Ng | 05c5363 | 2021-04-30 20:17:26 | [diff] [blame] | 186 | /*isExternal=*/false, /*isPrivateExtern=*/false, |
Nico Weber | a564551 | 2021-05-07 21:10:05 | [diff] [blame] | 187 | /*isThumb=*/false, /*isReferencedDynamically=*/false, |
| 188 | /*noDeadStrip=*/false); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 189 | in.got->addEntry(s); |
| 190 | } |
| 191 | r.referent = s; |
| 192 | r.addend = 0; |
| 193 | } |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | // Unwind info lives in __DATA, and finalization of __TEXT will occur before |
| 198 | // finalization of __DATA. Moreover, the finalization of unwind info depends on |
| 199 | // the exact addresses that it references. So it is safe for compact unwind to |
| 200 | // reference addresses in __TEXT, but not addresses in any other segment. |
Jez Ng | 7f2ba39 | 2021-06-11 23:49:52 | [diff] [blame] | 201 | static ConcatInputSection *checkTextSegment(InputSection *isec) { |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 202 | if (isec->segname != segment_names::text) |
| 203 | error("compact unwind references address in " + toString(isec) + |
| 204 | " which is not in segment __TEXT"); |
Jez Ng | 7f2ba39 | 2021-06-11 23:49:52 | [diff] [blame] | 205 | // __text should always be a ConcatInputSection. |
| 206 | return cast<ConcatInputSection>(isec); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 207 | } |
| 208 | |
| 209 | // We need to apply the relocations to the pre-link compact unwind section |
| 210 | // before converting it to post-link form. There should only be absolute |
| 211 | // relocations here: since we are not emitting the pre-link CU section, there |
| 212 | // is no source address to make a relative location meaningful. |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 213 | template <class Ptr> |
| 214 | static void |
Jez Ng | 3370619 | 2021-05-25 18:57:16 | [diff] [blame] | 215 | relocateCompactUnwind(ConcatOutputSection *compactUnwindSection, |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 216 | std::vector<CompactUnwindEntry<Ptr>> &cuVector) { |
Jez Ng | 04259cd | 2021-06-08 03:47:12 | [diff] [blame] | 217 | for (const ConcatInputSection *isec : compactUnwindSection->inputs) { |
Nico Weber | d5a70db | 2021-05-06 18:47:57 | [diff] [blame] | 218 | assert(isec->parent == compactUnwindSection); |
| 219 | |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 220 | uint8_t *buf = |
| 221 | reinterpret_cast<uint8_t *>(cuVector.data()) + isec->outSecFileOff; |
| 222 | memcpy(buf, isec->data.data(), isec->data.size()); |
| 223 | |
Greg McGary | 98fe9e4 | 2021-03-10 05:41:34 | [diff] [blame] | 224 | for (const Reloc &r : isec->relocs) { |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 225 | uint64_t referentVA = 0; |
Greg McGary | 427d359 | 2021-03-30 00:19:29 | [diff] [blame] | 226 | if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 227 | if (!isa<Undefined>(referentSym)) { |
| 228 | assert(referentSym->isInGot()); |
| 229 | if (auto *defined = dyn_cast<Defined>(referentSym)) |
| 230 | checkTextSegment(defined->isec); |
| 231 | // At this point in the link, we may not yet know the final address of |
| 232 | // the GOT, so we just encode the index. We make it a 1-based index so |
| 233 | // that we can distinguish the null pointer case. |
| 234 | referentVA = referentSym->gotIndex + 1; |
| 235 | } |
| 236 | } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { |
Jez Ng | 7f2ba39 | 2021-06-11 23:49:52 | [diff] [blame] | 237 | ConcatInputSection *concatIsec = checkTextSegment(referentIsec); |
| 238 | if (concatIsec->shouldOmitFromOutput()) |
Nico Weber | d5a70db | 2021-05-06 18:47:57 | [diff] [blame] | 239 | referentVA = UINT64_MAX; // Tombstone value |
| 240 | else |
Jez Ng | 04259cd | 2021-06-08 03:47:12 | [diff] [blame] | 241 | referentVA = referentIsec->getVA(r.addend); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 242 | } |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 243 | |
| 244 | writeAddress(buf + r.offset, referentVA, r.length); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 245 | } |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | // There should only be a handful of unique personality pointers, so we can |
| 250 | // encode them as 2-bit indices into a small array. |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 251 | template <class Ptr> |
| 252 | void encodePersonalities( |
| 253 | const std::vector<CompactUnwindEntry<Ptr> *> &cuPtrVector, |
| 254 | std::vector<uint32_t> &personalities) { |
| 255 | for (CompactUnwindEntry<Ptr> *cu : cuPtrVector) { |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 256 | if (cu->personality == 0) |
| 257 | continue; |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 258 | // Linear search is fast enough for a small array. |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 259 | auto it = find(personalities, cu->personality); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 260 | uint32_t personalityIndex; // 1-based index |
| 261 | if (it != personalities.end()) { |
| 262 | personalityIndex = std::distance(personalities.begin(), it) + 1; |
| 263 | } else { |
| 264 | personalities.push_back(cu->personality); |
| 265 | personalityIndex = personalities.size(); |
| 266 | } |
| 267 | cu->encoding |= |
| 268 | personalityIndex << countTrailingZeros( |
| 269 | static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK)); |
| 270 | } |
| 271 | if (personalities.size() > 3) |
| 272 | error("too many personalities (" + std::to_string(personalities.size()) + |
| 273 | ") for compact unwind to encode"); |
| 274 | } |
| 275 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 276 | // Scan the __LD,__compact_unwind entries and compute the space needs of |
| 277 | // __TEXT,__unwind_info and __TEXT,__eh_frame |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 278 | template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() { |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 279 | if (compactUnwindSection == nullptr) |
| 280 | return; |
| 281 | |
| 282 | // At this point, the address space for __TEXT,__text has been |
| 283 | // assigned, so we can relocate the __LD,__compact_unwind entries |
| 284 | // into a temporary buffer. Relocation is necessary in order to sort |
| 285 | // the CU entries by function address. Sorting is necessary so that |
| 286 | // we can fold adjacent CU entries with identical |
| 287 | // encoding+personality+lsda. Folding is necessary because it reduces |
| 288 | // the number of CU entries by as much as 3 orders of magnitude! |
| 289 | compactUnwindSection->finalize(); |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 290 | assert(compactUnwindSection->getSize() % sizeof(CompactUnwindEntry<Ptr>) == |
| 291 | 0); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 292 | size_t cuCount = |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 293 | compactUnwindSection->getSize() / sizeof(CompactUnwindEntry<Ptr>); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 294 | cuVector.resize(cuCount); |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 295 | relocateCompactUnwind(compactUnwindSection, cuVector); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 296 | |
| 297 | // Rather than sort & fold the 32-byte entries directly, we create a |
| 298 | // vector of pointers to entries and sort & fold that instead. |
| 299 | cuPtrVector.reserve(cuCount); |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 300 | for (CompactUnwindEntry<Ptr> &cuEntry : cuVector) |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 301 | cuPtrVector.emplace_back(&cuEntry); |
Jez Ng | 7ca133c | 2021-04-26 05:23:32 | [diff] [blame] | 302 | llvm::sort(cuPtrVector, [](const CompactUnwindEntry<Ptr> *a, |
| 303 | const CompactUnwindEntry<Ptr> *b) { |
| 304 | return a->functionAddress < b->functionAddress; |
| 305 | }); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 306 | |
Nico Weber | d5a70db | 2021-05-06 18:47:57 | [diff] [blame] | 307 | // Dead-stripped functions get a functionAddress of UINT64_MAX in |
| 308 | // relocateCompactUnwind(). Filter them out here. |
Nico Weber | 7b6dd26 | 2021-05-08 17:03:17 | [diff] [blame] | 309 | // FIXME: This doesn't yet collect associated data like LSDAs kept |
| 310 | // alive only by a now-removed CompactUnwindEntry or other comdat-like |
| 311 | // data (`kindNoneGroupSubordinate*` in ld64). |
Nico Weber | d5a70db | 2021-05-06 18:47:57 | [diff] [blame] | 312 | CompactUnwindEntry<Ptr> tombstone; |
| 313 | tombstone.functionAddress = static_cast<Ptr>(UINT64_MAX); |
| 314 | cuPtrVector.erase( |
| 315 | std::lower_bound(cuPtrVector.begin(), cuPtrVector.end(), &tombstone, |
| 316 | [](const CompactUnwindEntry<Ptr> *a, |
| 317 | const CompactUnwindEntry<Ptr> *b) { |
| 318 | return a->functionAddress < b->functionAddress; |
| 319 | }), |
| 320 | cuPtrVector.end()); |
| 321 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 322 | // Fold adjacent entries with matching encoding+personality+lsda |
| 323 | // We use three iterators on the same cuPtrVector to fold in-situ: |
| 324 | // (1) `foldBegin` is the first of a potential sequence of matching entries |
| 325 | // (2) `foldEnd` is the first non-matching entry after `foldBegin`. |
| 326 | // The semi-open interval [ foldBegin .. foldEnd ) contains a range |
| 327 | // entries that can be folded into a single entry and written to ... |
| 328 | // (3) `foldWrite` |
| 329 | auto foldWrite = cuPtrVector.begin(); |
| 330 | for (auto foldBegin = cuPtrVector.begin(); foldBegin < cuPtrVector.end();) { |
| 331 | auto foldEnd = foldBegin; |
| 332 | while (++foldEnd < cuPtrVector.end() && |
| 333 | (*foldBegin)->encoding == (*foldEnd)->encoding && |
| 334 | (*foldBegin)->personality == (*foldEnd)->personality && |
| 335 | (*foldBegin)->lsda == (*foldEnd)->lsda) |
| 336 | ; |
| 337 | *foldWrite++ = *foldBegin; |
| 338 | foldBegin = foldEnd; |
| 339 | } |
| 340 | cuPtrVector.erase(foldWrite, cuPtrVector.end()); |
| 341 | |
Jez Ng | 525bfa1 | 2021-02-08 18:47:33 | [diff] [blame] | 342 | encodePersonalities(cuPtrVector, personalities); |
| 343 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 344 | // Count frequencies of the folded encodings |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 345 | EncodingMap encodingFrequencies; |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 346 | for (const CompactUnwindEntry<Ptr> *cuPtrEntry : cuPtrVector) |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 347 | encodingFrequencies[cuPtrEntry->encoding]++; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 348 | |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 349 | // Make a vector of encodings, sorted by descending frequency |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 350 | for (const auto &frequency : encodingFrequencies) |
| 351 | commonEncodings.emplace_back(frequency); |
Jez Ng | 7ca133c | 2021-04-26 05:23:32 | [diff] [blame] | 352 | llvm::sort(commonEncodings, |
| 353 | [](const std::pair<compact_unwind_encoding_t, size_t> &a, |
| 354 | const std::pair<compact_unwind_encoding_t, size_t> &b) { |
| 355 | if (a.second == b.second) |
| 356 | // When frequencies match, secondarily sort on encoding |
| 357 | // to maintain parity with validate-unwind-info.py |
| 358 | return a.first > b.first; |
| 359 | return a.second > b.second; |
| 360 | }); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 361 | |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 362 | // Truncate the vector to 127 elements. |
Nico Weber | 5688247 | 2021-01-02 03:28:11 | [diff] [blame] | 363 | // Common encoding indexes are limited to 0..126, while encoding |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 364 | // indexes 127..255 are local to each second-level page |
| 365 | if (commonEncodings.size() > COMMON_ENCODINGS_MAX) |
| 366 | commonEncodings.resize(COMMON_ENCODINGS_MAX); |
| 367 | |
| 368 | // Create a map from encoding to common-encoding-table index |
| 369 | for (size_t i = 0; i < commonEncodings.size(); i++) |
| 370 | commonEncodingIndexes[commonEncodings[i].first] = i; |
| 371 | |
| 372 | // Split folded encodings into pages, where each page is limited by ... |
| 373 | // (a) 4 KiB capacity |
| 374 | // (b) 24-bit difference between first & final function address |
| 375 | // (c) 8-bit compact-encoding-table index, |
| 376 | // for which 0..126 references the global common-encodings table, |
| 377 | // and 127..255 references a local per-second-level-page table. |
| 378 | // First we try the compact format and determine how many entries fit. |
| 379 | // If more entries fit in the regular format, we use that. |
| 380 | for (size_t i = 0; i < cuPtrVector.size();) { |
| 381 | secondLevelPages.emplace_back(); |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 382 | SecondLevelPage &page = secondLevelPages.back(); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 383 | page.entryIndex = i; |
| 384 | uintptr_t functionAddressMax = |
| 385 | cuPtrVector[i]->functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK; |
| 386 | size_t n = commonEncodings.size(); |
| 387 | size_t wordsRemaining = |
| 388 | SECOND_LEVEL_PAGE_WORDS - |
| 389 | sizeof(unwind_info_compressed_second_level_page_header) / |
| 390 | sizeof(uint32_t); |
| 391 | while (wordsRemaining >= 1 && i < cuPtrVector.size()) { |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 392 | const CompactUnwindEntry<Ptr> *cuPtr = cuPtrVector[i]; |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 393 | if (cuPtr->functionAddress >= functionAddressMax) { |
| 394 | break; |
| 395 | } else if (commonEncodingIndexes.count(cuPtr->encoding) || |
| 396 | page.localEncodingIndexes.count(cuPtr->encoding)) { |
| 397 | i++; |
| 398 | wordsRemaining--; |
| 399 | } else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) { |
| 400 | page.localEncodings.emplace_back(cuPtr->encoding); |
| 401 | page.localEncodingIndexes[cuPtr->encoding] = n++; |
| 402 | i++; |
| 403 | wordsRemaining -= 2; |
| 404 | } else { |
| 405 | break; |
| 406 | } |
| 407 | } |
| 408 | page.entryCount = i - page.entryIndex; |
| 409 | |
| 410 | // If this is not the final page, see if it's possible to fit more |
| 411 | // entries by using the regular format. This can happen when there |
| 412 | // are many unique encodings, and we we saturated the local |
| 413 | // encoding table early. |
| 414 | if (i < cuPtrVector.size() && |
| 415 | page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) { |
| 416 | page.kind = UNWIND_SECOND_LEVEL_REGULAR; |
| 417 | page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX, |
| 418 | cuPtrVector.size() - page.entryIndex); |
| 419 | i = page.entryIndex + page.entryCount; |
| 420 | } else { |
| 421 | page.kind = UNWIND_SECOND_LEVEL_COMPRESSED; |
| 422 | } |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 423 | } |
| 424 | |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 425 | for (const CompactUnwindEntry<Ptr> *cu : cuPtrVector) { |
Jez Ng | 5112035 | 2021-02-08 18:47:34 | [diff] [blame] | 426 | uint32_t functionOffset = cu->functionAddress - in.header->addr; |
| 427 | functionToLsdaIndex[functionOffset] = lsdaEntries.size(); |
| 428 | if (cu->lsda != 0) |
| 429 | lsdaEntries.push_back( |
| 430 | {functionOffset, static_cast<uint32_t>(cu->lsda - in.header->addr)}); |
| 431 | } |
| 432 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 433 | // compute size of __TEXT,__unwind_info section |
| 434 | level2PagesOffset = |
| 435 | sizeof(unwind_info_section_header) + |
| 436 | commonEncodings.size() * sizeof(uint32_t) + |
| 437 | personalities.size() * sizeof(uint32_t) + |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 438 | // The extra second-level-page entry is for the sentinel |
| 439 | (secondLevelPages.size() + 1) * |
| 440 | sizeof(unwind_info_section_header_index_entry) + |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 441 | lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 442 | unwindInfoSize = |
| 443 | level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 444 | } |
| 445 | |
Nico Weber | 126f58e | 2020-12-02 01:27:33 | [diff] [blame] | 446 | // All inputs are relocated and output addresses are known, so write! |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 447 | |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 448 | template <class Ptr> |
| 449 | void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const { |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 450 | // section header |
| 451 | auto *uip = reinterpret_cast<unwind_info_section_header *>(buf); |
| 452 | uip->version = 1; |
| 453 | uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header); |
| 454 | uip->commonEncodingsArrayCount = commonEncodings.size(); |
| 455 | uip->personalityArraySectionOffset = |
| 456 | uip->commonEncodingsArraySectionOffset + |
| 457 | (uip->commonEncodingsArrayCount * sizeof(uint32_t)); |
| 458 | uip->personalityArrayCount = personalities.size(); |
| 459 | uip->indexSectionOffset = uip->personalityArraySectionOffset + |
| 460 | (uip->personalityArrayCount * sizeof(uint32_t)); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 461 | uip->indexCount = secondLevelPages.size() + 1; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 462 | |
| 463 | // Common encodings |
| 464 | auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]); |
| 465 | for (const auto &encoding : commonEncodings) |
| 466 | *i32p++ = encoding.first; |
| 467 | |
| 468 | // Personalities |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 469 | for (const uint32_t &personality : personalities) |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 470 | *i32p++ = |
| 471 | in.got->addr + (personality - 1) * target->wordSize - in.header->addr; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 472 | |
| 473 | // Level-1 index |
| 474 | uint32_t lsdaOffset = |
| 475 | uip->indexSectionOffset + |
| 476 | uip->indexCount * sizeof(unwind_info_section_header_index_entry); |
| 477 | uint64_t l2PagesOffset = level2PagesOffset; |
| 478 | auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 479 | for (const SecondLevelPage &page : secondLevelPages) { |
Jez Ng | 5112035 | 2021-02-08 18:47:34 | [diff] [blame] | 480 | iep->functionOffset = |
| 481 | cuPtrVector[page.entryIndex]->functionAddress - in.header->addr; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 482 | iep->secondLevelPagesSectionOffset = l2PagesOffset; |
Jez Ng | 5112035 | 2021-02-08 18:47:34 | [diff] [blame] | 483 | iep->lsdaIndexArraySectionOffset = |
| 484 | lsdaOffset + functionToLsdaIndex.lookup(iep->functionOffset) * |
| 485 | sizeof(unwind_info_section_header_lsda_index_entry); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 486 | iep++; |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 487 | l2PagesOffset += SECOND_LEVEL_PAGE_BYTES; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 488 | } |
| 489 | // Level-1 sentinel |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 490 | const CompactUnwindEntry<Ptr> &cuEnd = cuVector.back(); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 491 | iep->functionOffset = cuEnd.functionAddress + cuEnd.functionLength; |
| 492 | iep->secondLevelPagesSectionOffset = 0; |
Jez Ng | 5112035 | 2021-02-08 18:47:34 | [diff] [blame] | 493 | iep->lsdaIndexArraySectionOffset = |
| 494 | lsdaOffset + |
| 495 | lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 496 | iep++; |
| 497 | |
| 498 | // LSDAs |
Jez Ng | 5112035 | 2021-02-08 18:47:34 | [diff] [blame] | 499 | size_t lsdaBytes = |
| 500 | lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry); |
Jez Ng | ac9dd24 | 2021-02-08 19:50:13 | [diff] [blame] | 501 | if (lsdaBytes > 0) |
| 502 | memcpy(iep, lsdaEntries.data(), lsdaBytes); |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 503 | |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 504 | // Level-2 pages |
Jez Ng | 5112035 | 2021-02-08 18:47:34 | [diff] [blame] | 505 | auto *pp = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(iep) + |
| 506 | lsdaBytes); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 507 | for (const SecondLevelPage &page : secondLevelPages) { |
| 508 | if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) { |
| 509 | uintptr_t functionAddressBase = |
| 510 | cuPtrVector[page.entryIndex]->functionAddress; |
| 511 | auto *p2p = |
| 512 | reinterpret_cast<unwind_info_compressed_second_level_page_header *>( |
| 513 | pp); |
| 514 | p2p->kind = page.kind; |
| 515 | p2p->entryPageOffset = |
| 516 | sizeof(unwind_info_compressed_second_level_page_header); |
| 517 | p2p->entryCount = page.entryCount; |
| 518 | p2p->encodingsPageOffset = |
| 519 | p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t); |
| 520 | p2p->encodingsCount = page.localEncodings.size(); |
| 521 | auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); |
| 522 | for (size_t i = 0; i < page.entryCount; i++) { |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 523 | const CompactUnwindEntry<Ptr> *cuep = cuPtrVector[page.entryIndex + i]; |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 524 | auto it = commonEncodingIndexes.find(cuep->encoding); |
| 525 | if (it == commonEncodingIndexes.end()) |
| 526 | it = page.localEncodingIndexes.find(cuep->encoding); |
| 527 | *ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) | |
| 528 | (cuep->functionAddress - functionAddressBase); |
| 529 | } |
Fangrui Song | 791fe7a | 2020-12-21 04:01:20 | [diff] [blame] | 530 | if (page.localEncodings.size() != 0) |
| 531 | memcpy(ep, page.localEncodings.data(), |
| 532 | page.localEncodings.size() * sizeof(uint32_t)); |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 533 | } else { |
| 534 | auto *p2p = |
| 535 | reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp); |
| 536 | p2p->kind = page.kind; |
| 537 | p2p->entryPageOffset = |
| 538 | sizeof(unwind_info_regular_second_level_page_header); |
| 539 | p2p->entryCount = page.entryCount; |
| 540 | auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); |
| 541 | for (size_t i = 0; i < page.entryCount; i++) { |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 542 | const CompactUnwindEntry<Ptr> *cuep = cuPtrVector[page.entryIndex + i]; |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 543 | *ep++ = cuep->functionAddress; |
| 544 | *ep++ = cuep->encoding; |
| 545 | } |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 546 | } |
Greg McGary | 9993071 | 2020-12-07 06:33:38 | [diff] [blame] | 547 | pp += SECOND_LEVEL_PAGE_WORDS; |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 548 | } |
Greg McGary | 2124ca1 | 2020-08-20 20:05:13 | [diff] [blame] | 549 | } |
Jez Ng | 1460942 | 2021-04-16 01:14:33 | [diff] [blame] | 550 | |
| 551 | UnwindInfoSection *macho::makeUnwindInfoSection() { |
| 552 | if (target->wordSize == 8) |
| 553 | return make<UnwindInfoSectionImpl<uint64_t>>(); |
| 554 | else |
| 555 | return make<UnwindInfoSectionImpl<uint32_t>>(); |
| 556 | } |