Skip to content

Commit bb09f79

Browse files
authored
[OpenACC] Implement tile/collapse lowering (#138576)
These two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering. The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'.
1 parent 58d4ebb commit bb09f79

File tree

4 files changed

+201
-0
lines changed

4 files changed

+201
-0
lines changed

clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <type_traits>
1414

15+
#include "mlir/Dialect/Arith/IR/Arith.h"
1516
#include "mlir/Dialect/OpenACC/OpenACC.h"
1617
namespace clang {
1718
// Simple type-trait to see if the first template arg is one of the list, so we
@@ -82,6 +83,17 @@ class OpenACCClauseCIREmitter final
8283
return conversionOp.getResult(0);
8384
}
8485

86+
mlir::Value createConstantInt(mlir::Location loc, unsigned width,
87+
int64_t value) {
88+
mlir::IntegerType ty = mlir::IntegerType::get(
89+
&cgf.getMLIRContext(), width,
90+
mlir::IntegerType::SignednessSemantics::Signless);
91+
auto constOp = builder.create<mlir::arith::ConstantOp>(
92+
loc, builder.getIntegerAttr(ty, value));
93+
94+
return constOp.getResult();
95+
}
96+
8597
mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
8698
// '*' case leaves no identifier-info, just a nullptr.
8799
if (!ii)
@@ -336,6 +348,50 @@ class OpenACCClauseCIREmitter final
336348
return clauseNotImplemented(clause);
337349
}
338350
}
351+
352+
void VisitCollapseClause(const OpenACCCollapseClause &clause) {
353+
if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
354+
llvm::APInt value =
355+
clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext());
356+
357+
value = value.sextOrTrunc(64);
358+
operation.setCollapseForDeviceTypes(builder.getContext(),
359+
lastDeviceTypeValues, value);
360+
} else {
361+
// TODO: When we've implemented this for everything, switch this to an
362+
// unreachable. Combined constructs remain.
363+
return clauseNotImplemented(clause);
364+
}
365+
}
366+
367+
void VisitTileClause(const OpenACCTileClause &clause) {
368+
if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
369+
llvm::SmallVector<mlir::Value> values;
370+
371+
for (const Expr *e : clause.getSizeExprs()) {
372+
mlir::Location exprLoc = cgf.cgm.getLoc(e->getBeginLoc());
373+
374+
// We represent the * as -1. Additionally, this is a constant, so we
375+
// can always just emit it as 64 bits to avoid having to do any more
376+
// work to determine signedness or size.
377+
if (isa<OpenACCAsteriskSizeExpr>(e)) {
378+
values.push_back(createConstantInt(exprLoc, 64, -1));
379+
} else {
380+
llvm::APInt curValue =
381+
e->EvaluateKnownConstInt(cgf.cgm.getASTContext());
382+
values.push_back(createConstantInt(
383+
exprLoc, 64, curValue.sextOrTrunc(64).getSExtValue()));
384+
}
385+
}
386+
387+
operation.setTileForDeviceTypes(builder.getContext(),
388+
lastDeviceTypeValues, values);
389+
} else {
390+
// TODO: When we've implemented this for everything, switch this to an
391+
// unreachable. Combined constructs remain.
392+
return clauseNotImplemented(clause);
393+
}
394+
}
339395
};
340396

341397
template <typename OpTy>

clang/test/CIR/CodeGenOpenACC/loop.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
109109
// CHECK: acc.loop {
110110
// CHECK: acc.yield
111111
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
112+
113+
#pragma acc loop collapse(1) device_type(radeon)
114+
for(unsigned I = 0; I < N; ++I)
115+
for(unsigned J = 0; J < N; ++J)
116+
for(unsigned K = 0; K < N; ++K);
117+
// CHECK: acc.loop {
118+
// CHECK: acc.yield
119+
// CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
120+
121+
#pragma acc loop collapse(1) device_type(radeon) collapse (2)
122+
for(unsigned I = 0; I < N; ++I)
123+
for(unsigned J = 0; J < N; ++J)
124+
for(unsigned K = 0; K < N; ++K);
125+
// CHECK: acc.loop {
126+
// CHECK: acc.yield
127+
// CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
128+
129+
#pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
130+
for(unsigned I = 0; I < N; ++I)
131+
for(unsigned J = 0; J < N; ++J)
132+
for(unsigned K = 0; K < N; ++K);
133+
// CHECK: acc.loop {
134+
// CHECK: acc.yield
135+
// CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
136+
#pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
137+
for(unsigned I = 0; I < N; ++I)
138+
for(unsigned J = 0; J < N; ++J)
139+
for(unsigned K = 0; K < N; ++K);
140+
// CHECK: acc.loop {
141+
// CHECK: acc.yield
142+
// CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
143+
144+
#pragma acc loop tile(1, 2, 3)
145+
for(unsigned I = 0; I < N; ++I)
146+
for(unsigned J = 0; J < N; ++J)
147+
for(unsigned K = 0; K < N; ++K);
148+
// CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
149+
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
150+
// CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
151+
// CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
152+
// CHECK: acc.yield
153+
// CHECK-NEXT: } loc
154+
#pragma acc loop tile(2) device_type(radeon)
155+
for(unsigned I = 0; I < N; ++I)
156+
for(unsigned J = 0; J < N; ++J)
157+
for(unsigned K = 0; K < N; ++K);
158+
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
159+
// CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) {
160+
// CHECK: acc.yield
161+
// CHECK-NEXT: } loc
162+
#pragma acc loop tile(2) device_type(radeon) tile (1, *)
163+
for(unsigned I = 0; I < N; ++I)
164+
for(unsigned J = 0; J < N; ++J)
165+
for(unsigned K = 0; K < N; ++K);
166+
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
167+
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
168+
// CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
169+
// CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
170+
// CHECK: acc.yield
171+
// CHECK-NEXT: } loc
172+
#pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2)
173+
for(unsigned I = 0; I < N; ++I)
174+
for(unsigned J = 0; J < N; ++J)
175+
for(unsigned K = 0; K < N; ++K);
176+
// CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
177+
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
178+
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
179+
// CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
180+
// CHECK: acc.yield
181+
// CHECK-NEXT: } loc
182+
#pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
183+
for(unsigned I = 0; I < N; ++I)
184+
for(unsigned J = 0; J < N; ++J)
185+
for(unsigned K = 0; K < N; ++K);
186+
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
187+
// CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
188+
// CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
189+
// CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
190+
// CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
191+
// CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
192+
// CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
193+
// CHECK: acc.yield
194+
// CHECK-NEXT: } loc
195+
112196
}

mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2206,6 +2206,16 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
22062206
void addIndependent(MLIRContext *, llvm::ArrayRef<DeviceType>);
22072207
// Add an entry to the 'auto' attribute for each additional device types.
22082208
void addAuto(MLIRContext *, llvm::ArrayRef<DeviceType>);
2209+
2210+
// Sets the collapse value for this 'loop' for a set of DeviceTypes. Note
2211+
// that this may only be set once per DeviceType, and will fail the verifier
2212+
// if this is set multiple times.
2213+
void setCollapseForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
2214+
llvm::APInt);
2215+
// Sets the tile values for this 'loop' for a set of DeviceTypes. All of the
2216+
// values should be integral constants, with the '*' represented as a '-1'.
2217+
void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
2218+
mlir::ValueRange);
22092219
}];
22102220

22112221
let hasCustomAssemblyFormat = 1;

mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2669,6 +2669,57 @@ void acc::LoopOp::addAuto(MLIRContext *context,
26692669
effectiveDeviceTypes));
26702670
}
26712671

2672+
void acc::LoopOp::setCollapseForDeviceTypes(
2673+
MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
2674+
llvm::APInt value) {
2675+
llvm::SmallVector<mlir::Attribute> newValues;
2676+
llvm::SmallVector<mlir::Attribute> newDeviceTypes;
2677+
2678+
assert((getCollapseAttr() == nullptr) ==
2679+
(getCollapseDeviceTypeAttr() == nullptr));
2680+
assert(value.getBitWidth() == 64);
2681+
2682+
if (getCollapseAttr()) {
2683+
for (const auto &existing :
2684+
llvm::zip_equal(getCollapseAttr(), getCollapseDeviceTypeAttr())) {
2685+
newValues.push_back(std::get<0>(existing));
2686+
newDeviceTypes.push_back(std::get<1>(existing));
2687+
}
2688+
}
2689+
2690+
if (effectiveDeviceTypes.empty()) {
2691+
// If the effective device-types list is empty, this is before there are any
2692+
// being applied by device_type, so this should be added as a 'none'.
2693+
newValues.push_back(
2694+
mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
2695+
newDeviceTypes.push_back(
2696+
acc::DeviceTypeAttr::get(context, DeviceType::None));
2697+
} else {
2698+
for (DeviceType DT : effectiveDeviceTypes) {
2699+
newValues.push_back(
2700+
mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
2701+
newDeviceTypes.push_back(acc::DeviceTypeAttr::get(context, DT));
2702+
}
2703+
}
2704+
2705+
setCollapseAttr(ArrayAttr::get(context, newValues));
2706+
setCollapseDeviceTypeAttr(ArrayAttr::get(context, newDeviceTypes));
2707+
}
2708+
2709+
void acc::LoopOp::setTileForDeviceTypes(
2710+
MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
2711+
ValueRange values) {
2712+
llvm::SmallVector<int32_t> segments;
2713+
if (getTileOperandsSegments())
2714+
llvm::copy(*getTileOperandsSegments(), std::back_inserter(segments));
2715+
2716+
setTileOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
2717+
context, getTileOperandsDeviceTypeAttr(), effectiveDeviceTypes, values,
2718+
getTileOperandsMutable(), segments));
2719+
2720+
setTileOperandsSegments(segments);
2721+
}
2722+
26722723
//===----------------------------------------------------------------------===//
26732724
// DataOp
26742725
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)