Skip to content

Commit d71a694

Browse files
committed
[AArch64][FastISel] Fallback on atomic stlr/cas with non-reg operands.
This has been a latent bug for almost 10 years, but is relatively hard to trigger, needing an address operand that isn't handled by getRegForValue (in the test here, constexpr casts). When that happens, it returns 0, which FastISel happily uses as a register operand, all the way to asm, where we either get a crash on an invalid register, or a silently corrupt instruction. Unfortunately, FastISel is still enabled at -O0 for at least ILP32/arm64_32.
1 parent 96d60c0 commit d71a694

File tree

2 files changed

+62
-6
lines changed

2 files changed

+62
-6
lines changed

llvm/lib/Target/AArch64/AArch64FastISel.cpp

+12-6
Original file line numberDiff line numberDiff line change
@@ -2190,6 +2190,8 @@ bool AArch64FastISel::selectStore(const Instruction *I) {
21902190
if (isReleaseOrStronger(Ord)) {
21912191
// The STLR addressing mode only supports a base reg; pass that directly.
21922192
Register AddrReg = getRegForValue(PtrV);
2193+
if (!AddrReg)
2194+
return false;
21932195
return emitStoreRelease(VT, SrcReg, AddrReg,
21942196
createMachineMemOperandFor(I));
21952197
}
@@ -5070,12 +5072,16 @@ bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
50705072

50715073
const MCInstrDesc &II = TII.get(Opc);
50725074

5073-
const Register AddrReg = constrainOperandRegClass(
5074-
II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5075-
const Register DesiredReg = constrainOperandRegClass(
5076-
II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5077-
const Register NewReg = constrainOperandRegClass(
5078-
II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5075+
Register AddrReg = getRegForValue(I->getPointerOperand());
5076+
Register DesiredReg = getRegForValue(I->getCompareOperand());
5077+
Register NewReg = getRegForValue(I->getNewValOperand());
5078+
5079+
if (!AddrReg || !DesiredReg || !NewReg)
5080+
return false;
5081+
5082+
AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
5083+
DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1);
5084+
NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2);
50795085

50805086
const Register ResultReg1 = createResultReg(ResRC);
50815087
const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=arm64_32-apple-darwin -O0 -fast-isel -verify-machineinstrs \
3+
; RUN: -aarch64-enable-atomic-cfg-tidy=0 -aarch64-enable-collect-loh=0 \
4+
; RUN: < %s | FileCheck %s
5+
6+
; FastISel doesn't support cstexprs as operands here, but make
7+
; sure it knows to fallback, at least.
8+
9+
define void @atomic_store_cstexpr_addr(i32 %val) #0 {
10+
; CHECK-LABEL: atomic_store_cstexpr_addr:
11+
; CHECK: ; %bb.0:
12+
; CHECK-NEXT: adrp x8, _g@PAGE
13+
; CHECK-NEXT: add x8, x8, _g@PAGEOFF
14+
; CHECK-NEXT: ; kill: def $w1 killed $w8 killed $x8
15+
; CHECK-NEXT: adrp x8, _g@PAGE
16+
; CHECK-NEXT: add x8, x8, _g@PAGEOFF
17+
; CHECK-NEXT: stlr w0, [x8]
18+
; CHECK-NEXT: ret
19+
store atomic i32 %val, ptr inttoptr (i32 ptrtoint (ptr @g to i32) to ptr) release, align 4
20+
ret void
21+
}
22+
23+
define i32 @cmpxchg_cstexpr_addr(i32 %cmp, i32 %new, ptr %ps) #0 {
24+
; CHECK-LABEL: cmpxchg_cstexpr_addr:
25+
; CHECK: ; %bb.0:
26+
; CHECK-NEXT: mov w8, w0
27+
; CHECK-NEXT: adrp x10, _g@PAGE
28+
; CHECK-NEXT: add x10, x10, _g@PAGEOFF
29+
; CHECK-NEXT: LBB1_1: ; =>This Inner Loop Header: Depth=1
30+
; CHECK-NEXT: ldaxr w0, [x10]
31+
; CHECK-NEXT: cmp w0, w8
32+
; CHECK-NEXT: b.ne LBB1_3
33+
; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
34+
; CHECK-NEXT: stlxr w9, w1, [x10]
35+
; CHECK-NEXT: cbnz w9, LBB1_1
36+
; CHECK-NEXT: LBB1_3:
37+
; CHECK-NEXT: subs w8, w0, w8
38+
; CHECK-NEXT: cset w8, eq
39+
; CHECK-NEXT: ; kill: def $w1 killed $w8
40+
; CHECK-NEXT: str w8, [x2]
41+
; CHECK-NEXT: ret
42+
%tmp0 = cmpxchg ptr inttoptr (i32 ptrtoint (ptr @g to i32) to ptr), i32 %cmp, i32 %new seq_cst seq_cst
43+
%tmp1 = extractvalue { i32, i1 } %tmp0, 0
44+
%tmp2 = extractvalue { i32, i1 } %tmp0, 1
45+
%tmp3 = zext i1 %tmp2 to i32
46+
store i32 %tmp3, ptr %ps
47+
ret i32 %tmp1
48+
}
49+
50+
@g = global i32 0

0 commit comments

Comments
 (0)