Skip to content

Commit cbcac1b

Browse files
committed
[BOLT] Factor out MCInstReference from gadget scanner (NFC)
Move MCInstReference representing a constant reference to an instruction inside a parent entity - either inside a basic block (which has a reference to its parent function) or directly to the function (when CFG information is not available).
1 parent 91b67b6 commit cbcac1b

File tree

5 files changed

+272
-237
lines changed

5 files changed

+272
-237
lines changed

bolt/include/bolt/Core/MCInstUtils.h

+172
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef BOLT_CORE_MCINSTUTILS_H
10+
#define BOLT_CORE_MCINSTUTILS_H
11+
12+
#include "bolt/Core/BinaryBasicBlock.h"
13+
14+
#include <functional>
15+
#include <map>
16+
#include <variant>
17+
18+
namespace llvm {
19+
namespace bolt {
20+
21+
class BinaryFunction;
22+
23+
/// MCInstReference represents a reference to a constant MCInst as stored either
24+
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
25+
/// (after a CFG is created).
26+
class MCInstReference {
27+
using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
28+
29+
// Two cases are possible:
30+
// * functions with CFG reconstructed - a function stores a collection of
31+
// basic blocks, each basic block stores a contiguous vector of MCInst
32+
// * functions without CFG - there are no basic blocks created,
33+
// the instructions are directly stored in std::map in BinaryFunction
34+
//
35+
// In both cases, the direct parent of MCInst is stored together with an
36+
// iterator pointing to the instruction.
37+
38+
// Helper struct: CFG is available, the direct parent is a basic block,
39+
// iterator's type is `MCInst *`.
40+
struct RefInBB {
41+
RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
42+
: BB(BB), It(Inst) {}
43+
RefInBB(const RefInBB &Other) = default;
44+
RefInBB &operator=(const RefInBB &Other) = default;
45+
46+
const BinaryBasicBlock *BB;
47+
BinaryBasicBlock::const_iterator It;
48+
49+
bool operator<(const RefInBB &Other) const {
50+
if (BB != Other.BB)
51+
return std::less<const BinaryBasicBlock *>{}(BB, Other.BB);
52+
return It < Other.It;
53+
}
54+
55+
bool operator==(const RefInBB &Other) const {
56+
return BB == Other.BB && It == Other.It;
57+
}
58+
};
59+
60+
// Helper struct: CFG is *not* available, the direct parent is a function,
61+
// iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
62+
// is an instruction's offset).
63+
struct RefInBF {
64+
RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
65+
: BF(BF), It(It) {}
66+
RefInBF(const RefInBF &Other) = default;
67+
RefInBF &operator=(const RefInBF &Other) = default;
68+
69+
const BinaryFunction *BF;
70+
nocfg_const_iterator It;
71+
72+
bool operator<(const RefInBF &Other) const {
73+
if (BF != Other.BF)
74+
return std::less<const BinaryFunction *>{}(BF, Other.BF);
75+
return It->first < Other.It->first;
76+
}
77+
78+
bool operator==(const RefInBF &Other) const {
79+
return BF == Other.BF && It->first == Other.It->first;
80+
}
81+
};
82+
83+
std::variant<RefInBB, RefInBF> Reference;
84+
85+
// Utility methods to be used like this:
86+
//
87+
// if (auto *Ref = tryGetRefInBB())
88+
// return Ref->doSomething(...);
89+
// return getRefInBF().doSomethingElse(...);
90+
const RefInBB *tryGetRefInBB() const {
91+
assert(std::get_if<RefInBB>(&Reference) ||
92+
std::get_if<RefInBF>(&Reference));
93+
return std::get_if<RefInBB>(&Reference);
94+
}
95+
const RefInBF &getRefInBF() const {
96+
assert(std::get_if<RefInBF>(&Reference));
97+
return *std::get_if<RefInBF>(&Reference);
98+
}
99+
100+
public:
101+
/// Constructs an empty reference.
102+
MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
103+
/// Constructs a reference to the instruction inside the basic block.
104+
MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
105+
: Reference(RefInBB(BB, Inst)) {
106+
assert(BB && Inst && "Neither BB nor Inst should be nullptr");
107+
}
108+
/// Constructs a reference to the instruction inside the basic block.
109+
MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
110+
: Reference(RefInBB(BB, &BB->getInstructionAtIndex(Index))) {
111+
assert(BB && "Basic block should not be nullptr");
112+
}
113+
/// Constructs a reference to the instruction inside the function without
114+
/// CFG information.
115+
MCInstReference(const BinaryFunction *BF, nocfg_const_iterator It)
116+
: Reference(RefInBF(BF, It)) {
117+
assert(BF && "Function should not be nullptr");
118+
}
119+
120+
/// Locates an instruction inside a function and returns a reference.
121+
static MCInstReference get(const MCInst *Inst, const BinaryFunction &BF);
122+
123+
bool operator<(const MCInstReference &Other) const {
124+
return Reference < Other.Reference;
125+
}
126+
127+
bool operator==(const MCInstReference &Other) const {
128+
return Reference == Other.Reference;
129+
}
130+
131+
const MCInst &getMCInst() const {
132+
if (auto *Ref = tryGetRefInBB())
133+
return *Ref->It;
134+
return getRefInBF().It->second;
135+
}
136+
137+
operator const MCInst &() const { return getMCInst(); }
138+
139+
operator bool() const {
140+
if (auto *Ref = tryGetRefInBB())
141+
return Ref->BB != nullptr;
142+
return getRefInBF().BF != nullptr;
143+
}
144+
145+
bool hasCFG() const {
146+
return static_cast<bool>(*this) && tryGetRefInBB() != nullptr;
147+
}
148+
149+
const BinaryFunction *getFunction() const {
150+
if (auto *Ref = tryGetRefInBB())
151+
return Ref->BB->getFunction();
152+
return getRefInBF().BF;
153+
}
154+
155+
const BinaryBasicBlock *getBasicBlock() const {
156+
if (auto *Ref = tryGetRefInBB())
157+
return Ref->BB;
158+
return nullptr;
159+
}
160+
161+
raw_ostream &print(raw_ostream &OS) const;
162+
};
163+
164+
static inline raw_ostream &operator<<(raw_ostream &OS,
165+
const MCInstReference &Ref) {
166+
return Ref.print(OS);
167+
}
168+
169+
} // namespace bolt
170+
} // namespace llvm
171+
172+
#endif

bolt/include/bolt/Passes/PAuthGadgetScanner.h

+1-179
Original file line numberDiff line numberDiff line change
@@ -11,191 +11,13 @@
1111

1212
#include "bolt/Core/BinaryContext.h"
1313
#include "bolt/Core/BinaryFunction.h"
14+
#include "bolt/Core/MCInstUtils.h"
1415
#include "bolt/Passes/BinaryPasses.h"
1516
#include "llvm/Support/raw_ostream.h"
1617
#include <memory>
1718

1819
namespace llvm {
1920
namespace bolt {
20-
21-
/// @brief MCInstReference represents a reference to an MCInst as stored either
22-
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
23-
/// (after a CFG is created). It aims to store the necessary information to be
24-
/// able to find the specific MCInst in either the BinaryFunction or
25-
/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of
26-
/// the corresponding instruction can be computed.
27-
28-
struct MCInstInBBReference {
29-
BinaryBasicBlock *BB;
30-
int64_t BBIndex;
31-
MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex)
32-
: BB(BB), BBIndex(BBIndex) {}
33-
MCInstInBBReference() : BB(nullptr), BBIndex(0) {}
34-
static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) {
35-
for (BinaryBasicBlock &BB : BF)
36-
for (size_t I = 0; I < BB.size(); ++I)
37-
if (Inst == &BB.getInstructionAtIndex(I))
38-
return MCInstInBBReference(&BB, I);
39-
return {};
40-
}
41-
bool operator==(const MCInstInBBReference &RHS) const {
42-
return BB == RHS.BB && BBIndex == RHS.BBIndex;
43-
}
44-
bool operator<(const MCInstInBBReference &RHS) const {
45-
if (BB != RHS.BB)
46-
return BB < RHS.BB;
47-
return BBIndex < RHS.BBIndex;
48-
}
49-
operator MCInst &() const {
50-
assert(BB != nullptr);
51-
return BB->getInstructionAtIndex(BBIndex);
52-
}
53-
uint64_t getAddress() const {
54-
// 4 bytes per instruction on AArch64.
55-
// FIXME: the assumption of 4 byte per instruction needs to be fixed before
56-
// this method gets used on any non-AArch64 binaries (but should be fine for
57-
// pac-ret analysis, as that is an AArch64-specific feature).
58-
return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4;
59-
}
60-
};
61-
62-
raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &);
63-
64-
struct MCInstInBFReference {
65-
BinaryFunction *BF;
66-
uint64_t Offset;
67-
MCInstInBFReference(BinaryFunction *BF, uint64_t Offset)
68-
: BF(BF), Offset(Offset) {}
69-
70-
static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) {
71-
for (auto &I : BF.instrs())
72-
if (Inst == &I.second)
73-
return MCInstInBFReference(&BF, I.first);
74-
return {};
75-
}
76-
77-
MCInstInBFReference() : BF(nullptr), Offset(0) {}
78-
bool operator==(const MCInstInBFReference &RHS) const {
79-
return BF == RHS.BF && Offset == RHS.Offset;
80-
}
81-
bool operator<(const MCInstInBFReference &RHS) const {
82-
if (BF != RHS.BF)
83-
return BF < RHS.BF;
84-
return Offset < RHS.Offset;
85-
}
86-
operator MCInst &() const {
87-
assert(BF != nullptr);
88-
return *BF->getInstructionAtOffset(Offset);
89-
}
90-
91-
uint64_t getOffset() const { return Offset; }
92-
93-
uint64_t getAddress() const { return BF->getAddress() + getOffset(); }
94-
};
95-
96-
raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &);
97-
98-
struct MCInstReference {
99-
enum Kind { FunctionParent, BasicBlockParent };
100-
Kind ParentKind;
101-
union U {
102-
MCInstInBBReference BBRef;
103-
MCInstInBFReference BFRef;
104-
U(MCInstInBBReference BBRef) : BBRef(BBRef) {}
105-
U(MCInstInBFReference BFRef) : BFRef(BFRef) {}
106-
} U;
107-
MCInstReference(MCInstInBBReference BBRef)
108-
: ParentKind(BasicBlockParent), U(BBRef) {}
109-
MCInstReference(MCInstInBFReference BFRef)
110-
: ParentKind(FunctionParent), U(BFRef) {}
111-
MCInstReference(BinaryBasicBlock *BB, int64_t BBIndex)
112-
: MCInstReference(MCInstInBBReference(BB, BBIndex)) {}
113-
MCInstReference(BinaryFunction *BF, uint32_t Offset)
114-
: MCInstReference(MCInstInBFReference(BF, Offset)) {}
115-
116-
static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) {
117-
if (BF.hasCFG())
118-
return MCInstInBBReference::get(Inst, BF);
119-
return MCInstInBFReference::get(Inst, BF);
120-
}
121-
122-
bool operator<(const MCInstReference &RHS) const {
123-
if (ParentKind != RHS.ParentKind)
124-
return ParentKind < RHS.ParentKind;
125-
switch (ParentKind) {
126-
case BasicBlockParent:
127-
return U.BBRef < RHS.U.BBRef;
128-
case FunctionParent:
129-
return U.BFRef < RHS.U.BFRef;
130-
}
131-
llvm_unreachable("");
132-
}
133-
134-
bool operator==(const MCInstReference &RHS) const {
135-
if (ParentKind != RHS.ParentKind)
136-
return false;
137-
switch (ParentKind) {
138-
case BasicBlockParent:
139-
return U.BBRef == RHS.U.BBRef;
140-
case FunctionParent:
141-
return U.BFRef == RHS.U.BFRef;
142-
}
143-
llvm_unreachable("");
144-
}
145-
146-
operator MCInst &() const {
147-
switch (ParentKind) {
148-
case BasicBlockParent:
149-
return U.BBRef;
150-
case FunctionParent:
151-
return U.BFRef;
152-
}
153-
llvm_unreachable("");
154-
}
155-
156-
operator bool() const {
157-
switch (ParentKind) {
158-
case BasicBlockParent:
159-
return U.BBRef.BB != nullptr;
160-
case FunctionParent:
161-
return U.BFRef.BF != nullptr;
162-
}
163-
llvm_unreachable("");
164-
}
165-
166-
uint64_t getAddress() const {
167-
switch (ParentKind) {
168-
case BasicBlockParent:
169-
return U.BBRef.getAddress();
170-
case FunctionParent:
171-
return U.BFRef.getAddress();
172-
}
173-
llvm_unreachable("");
174-
}
175-
176-
BinaryFunction *getFunction() const {
177-
switch (ParentKind) {
178-
case FunctionParent:
179-
return U.BFRef.BF;
180-
case BasicBlockParent:
181-
return U.BBRef.BB->getFunction();
182-
}
183-
llvm_unreachable("");
184-
}
185-
186-
BinaryBasicBlock *getBasicBlock() const {
187-
switch (ParentKind) {
188-
case FunctionParent:
189-
return nullptr;
190-
case BasicBlockParent:
191-
return U.BBRef.BB;
192-
}
193-
llvm_unreachable("");
194-
}
195-
};
196-
197-
raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &);
198-
19921
namespace PAuthGadgetScanner {
20022

20123
// The report classes are designed to be used in an immutable manner.

bolt/lib/Core/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ add_llvm_library(LLVMBOLTCore
3131
GDBIndex.cpp
3232
HashUtilities.cpp
3333
JumpTable.cpp
34+
MCInstUtils.cpp
3435
MCPlusBuilder.cpp
3536
ParallelUtilities.cpp
3637
Relocation.cpp

0 commit comments

Comments
 (0)