Skip to content

Commit 4c69f82

Browse files
authored
[OpenACC][CIR] Implement basic lowering for combined constructs (#139119)
Combined constructs are emitted a little oddly, in that they are the first ones where there are two operations for a single construct. First, the compute variant is emitted with 'combined(loop)', then the loop operation is emitted with 'combined(<variant>)'. Each gets its own normal terminator. This patch does not yet implement clauses at all, since that is going to require special attention to make sure we get the emitting of them correct, since certain clauses go to different locations, and need their insertion-points set correctly. So this patch sets it up so that we will emit the 'not implemented' diagnostic for all clauses.
1 parent ac4bb42 commit 4c69f82

File tree

4 files changed

+121
-6
lines changed

4 files changed

+121
-6
lines changed

clang/lib/CIR/CodeGen/CIRGenFunction.h

+6
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,12 @@ class CIRGenFunction : public CIRGenTypeCache {
718718
SourceLocation dirLoc, llvm::ArrayRef<const OpenACCClause *> clauses,
719719
const Stmt *associatedStmt);
720720

721+
template <typename Op, typename TermOp>
722+
mlir::LogicalResult emitOpenACCOpCombinedConstruct(
723+
mlir::Location start, mlir::Location end, OpenACCDirectiveKind dirKind,
724+
SourceLocation dirLoc, llvm::ArrayRef<const OpenACCClause *> clauses,
725+
const Stmt *loopStmt);
726+
721727
public:
722728
mlir::LogicalResult
723729
emitOpenACCComputeConstruct(const OpenACCComputeConstruct &s);

clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp

+78-2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,65 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpAssociatedStmt(
5656
return res;
5757
}
5858

59+
namespace {
60+
template <typename Op> struct CombinedType;
61+
template <> struct CombinedType<ParallelOp> {
62+
static constexpr mlir::acc::CombinedConstructsType value =
63+
mlir::acc::CombinedConstructsType::ParallelLoop;
64+
};
65+
template <> struct CombinedType<SerialOp> {
66+
static constexpr mlir::acc::CombinedConstructsType value =
67+
mlir::acc::CombinedConstructsType::SerialLoop;
68+
};
69+
template <> struct CombinedType<KernelsOp> {
70+
static constexpr mlir::acc::CombinedConstructsType value =
71+
mlir::acc::CombinedConstructsType::KernelsLoop;
72+
};
73+
} // namespace
74+
75+
template <typename Op, typename TermOp>
76+
mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct(
77+
mlir::Location start, mlir::Location end, OpenACCDirectiveKind dirKind,
78+
SourceLocation dirLoc, llvm::ArrayRef<const OpenACCClause *> clauses,
79+
const Stmt *loopStmt) {
80+
mlir::LogicalResult res = mlir::success();
81+
82+
llvm::SmallVector<mlir::Type> retTy;
83+
llvm::SmallVector<mlir::Value> operands;
84+
85+
auto computeOp = builder.create<Op>(start, retTy, operands);
86+
computeOp.setCombinedAttr(builder.getUnitAttr());
87+
mlir::acc::LoopOp loopOp;
88+
89+
// First, emit the bodies of both operations, with the loop inside the body of
90+
// the combined construct.
91+
{
92+
mlir::Block &block = computeOp.getRegion().emplaceBlock();
93+
mlir::OpBuilder::InsertionGuard guardCase(builder);
94+
builder.setInsertionPointToEnd(&block);
95+
96+
LexicalScope ls{*this, start, builder.getInsertionBlock()};
97+
auto loopOp = builder.create<LoopOp>(start, retTy, operands);
98+
loopOp.setCombinedAttr(mlir::acc::CombinedConstructsTypeAttr::get(
99+
builder.getContext(), CombinedType<Op>::value));
100+
101+
{
102+
mlir::Block &innerBlock = loopOp.getRegion().emplaceBlock();
103+
mlir::OpBuilder::InsertionGuard guardCase(builder);
104+
builder.setInsertionPointToEnd(&innerBlock);
105+
106+
LexicalScope ls{*this, start, builder.getInsertionBlock()};
107+
res = emitStmt(loopStmt, /*useCurrentScope=*/true);
108+
109+
builder.create<mlir::acc::YieldOp>(end);
110+
}
111+
112+
builder.create<TermOp>(end);
113+
}
114+
115+
return res;
116+
}
117+
59118
template <typename Op>
60119
Op CIRGenFunction::emitOpenACCOp(
61120
mlir::Location start, OpenACCDirectiveKind dirKind, SourceLocation dirLoc,
@@ -170,8 +229,25 @@ CIRGenFunction::emitOpenACCWaitConstruct(const OpenACCWaitConstruct &s) {
170229

171230
mlir::LogicalResult CIRGenFunction::emitOpenACCCombinedConstruct(
172231
const OpenACCCombinedConstruct &s) {
173-
cgm.errorNYI(s.getSourceRange(), "OpenACC Combined Construct");
174-
return mlir::failure();
232+
mlir::Location start = getLoc(s.getSourceRange().getBegin());
233+
mlir::Location end = getLoc(s.getSourceRange().getEnd());
234+
235+
switch (s.getDirectiveKind()) {
236+
case OpenACCDirectiveKind::ParallelLoop:
237+
return emitOpenACCOpCombinedConstruct<ParallelOp, mlir::acc::YieldOp>(
238+
start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(),
239+
s.getLoop());
240+
case OpenACCDirectiveKind::SerialLoop:
241+
return emitOpenACCOpCombinedConstruct<SerialOp, mlir::acc::YieldOp>(
242+
start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(),
243+
s.getLoop());
244+
case OpenACCDirectiveKind::KernelsLoop:
245+
return emitOpenACCOpCombinedConstruct<KernelsOp, mlir::acc::TerminatorOp>(
246+
start, end, s.getDirectiveKind(), s.getDirectiveLoc(), s.clauses(),
247+
s.getLoop());
248+
default:
249+
llvm_unreachable("invalid compute construct kind");
250+
}
175251
}
176252
mlir::LogicalResult CIRGenFunction::emitOpenACCEnterDataConstruct(
177253
const OpenACCEnterDataConstruct &s) {
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
2+
3+
extern "C" void acc_combined(int N) {
4+
// CHECK: cir.func @acc_combined(%[[ARG_N:.*]]: !s32i loc{{.*}}) {
5+
// CHECK-NEXT: %[[ALLOCA_N:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["N", init]
6+
// CHECK-NEXT: cir.store %[[ARG_N]], %[[ALLOCA_N]] : !s32i, !cir.ptr<!s32i>
7+
8+
#pragma acc parallel loop
9+
for(unsigned I = 0; I < N; ++I);
10+
// CHECK: acc.parallel combined(loop) {
11+
// CHECK: acc.loop combined(parallel) {
12+
// CHECK: acc.yield
13+
// CHECK-NEXT: } loc
14+
// CHECK: acc.yield
15+
// CHECK-NEXT: } loc
16+
17+
#pragma acc serial loop
18+
for(unsigned I = 0; I < N; ++I);
19+
// CHECK: acc.serial combined(loop) {
20+
// CHECK: acc.loop combined(serial) {
21+
// CHECK: acc.yield
22+
// CHECK-NEXT: } loc
23+
// CHECK: acc.yield
24+
// CHECK-NEXT: } loc
25+
#pragma acc kernels loop
26+
for(unsigned I = 0; I < N; ++I);
27+
28+
// CHECK: acc.kernels combined(loop) {
29+
// CHECK: acc.loop combined(kernels) {
30+
// CHECK: acc.yield
31+
// CHECK-NEXT: } loc
32+
// CHECK: acc.terminator
33+
// CHECK-NEXT: } loc
34+
}

clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp

+3-4
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@
33

44
void HelloWorld(int *A, int *B, int *C, int N) {
55

6-
// expected-error@+2{{ClangIR code gen Not Yet Implemented: OpenACC Combined Construct}}
6+
// expected-error@+2{{ClangIR code gen Not Yet Implemented: OpenACC Atomic Construct}}
77
// expected-error@+1{{ClangIR code gen Not Yet Implemented: statement}}
8-
#pragma acc parallel loop
9-
for (unsigned I = 0; I < N; ++I)
10-
A[I] = B[I] + C[I];
8+
#pragma acc atomic
9+
N = N + 1;
1110

1211
// expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Declare Construct}}
1312
#pragma acc declare create(A)

0 commit comments

Comments
 (0)