-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[OpenACC] Implement tile/collapse lowering #138576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
These two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering. The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'.
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-clang Author: Erich Keane (erichkeane) ChangesThese two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering. The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'. Full diff: https://github.com/llvm/llvm-project/pull/138576.diff 4 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index ff0bf6e7f55dd..7223a8ed15bd5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -12,6 +12,7 @@
#include <type_traits>
+#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
namespace clang {
// Simple type-trait to see if the first template arg is one of the list, so we
@@ -82,6 +83,17 @@ class OpenACCClauseCIREmitter final
return conversionOp.getResult(0);
}
+ mlir::Value createConstantInt(mlir::Location loc, unsigned width,
+ int64_t value) {
+ mlir::IntegerType ty = mlir::IntegerType::get(
+ &cgf.getMLIRContext(), width,
+ mlir::IntegerType::SignednessSemantics::Signless);
+ auto constOp = builder.create<mlir::arith::ConstantOp>(
+ loc, ty, builder.getIntegerAttr(ty, value));
+
+ return constOp.getResult();
+ }
+
mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
// '*' case leaves no identifier-info, just a nullptr.
if (!ii)
@@ -336,6 +348,52 @@ class OpenACCClauseCIREmitter final
return clauseNotImplemented(clause);
}
}
+
+ void VisitCollapseClause(const OpenACCCollapseClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+ llvm::APInt value =
+ clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+
+ if (value.getBitWidth() != 64)
+ value = value.sext(64);
+
+ operation.setCollapseForDeviceTypes(builder.getContext(),
+ lastDeviceTypeValues, value);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
+
+ void VisitTileClause(const OpenACCTileClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+ llvm::SmallVector<mlir::Value> values;
+
+ for (const Expr *e : clause.getSizeExprs()) {
+ mlir::Location exprLoc = cgf.cgm.getLoc(e->getBeginLoc());
+
+ // We represent the * as -1. Additionally, this is a constant, so we
+ // can always just emit it as 64 bits to avoid having to do any more
+ // work to determine signedness or size.
+ if (isa<OpenACCAsteriskSizeExpr>(e)) {
+ values.push_back(createConstantInt(exprLoc, 64, -1));
+ } else {
+ llvm::APInt curValue =
+ e->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+ values.push_back(
+ createConstantInt(exprLoc, 64, curValue.getSExtValue()));
+ }
+ }
+
+ operation.setTileForDeviceTypes(builder.getContext(),
+ lastDeviceTypeValues, values);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
};
template <typename OpTy>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp
index 2757d935e1f76..b255a01adda0e 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp
@@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+
+ #pragma acc loop collapse(1) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
+
+ #pragma acc loop collapse(1) device_type(radeon) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
+
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
+
+ #pragma acc loop tile(1, 2, 3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(2) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(2) device_type(radeon) tile (1, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+
}
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index c3df064cf0ead..41b01a14a6498 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2206,6 +2206,16 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
void addIndependent(MLIRContext *, llvm::ArrayRef<DeviceType>);
// Add an entry to the 'auto' attribute for each additional device types.
void addAuto(MLIRContext *, llvm::ArrayRef<DeviceType>);
+
+ // Sets the collapse value for this 'loop' for a set of DeviceTypes. Note
+ // that this may only be set once per DeviceType, and will fail the verifier
+ // if this is set multiple times.
+ void setCollapseForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+ llvm::APInt);
+ // Sets the tile values for this 'loop' for a set of DeviceTypes. All of the
+ // values should be integral constants, with the '*' represented as a '-1'.
+ void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+ mlir::ValueRange);
}];
let hasCustomAssemblyFormat = 1;
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 39dbb0c92a309..f26b3a5143c0b 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2669,6 +2669,57 @@ void acc::LoopOp::addAuto(MLIRContext *context,
effectiveDeviceTypes));
}
+void acc::LoopOp::setCollapseForDeviceTypes(
+ MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ llvm::APInt value) {
+ llvm::SmallVector<mlir::Attribute> newValues;
+ llvm::SmallVector<mlir::Attribute> newDeviceTypes;
+
+ assert((getCollapseAttr() == nullptr) ==
+ (getCollapseDeviceTypeAttr() == nullptr));
+ assert(value.getBitWidth() == 64);
+
+ if (getCollapseAttr()) {
+ for (const auto &existing :
+ llvm::zip_equal(getCollapseAttr(), getCollapseDeviceTypeAttr())) {
+ newValues.push_back(std::get<0>(existing));
+ newDeviceTypes.push_back(std::get<1>(existing));
+ }
+ }
+
+ if (effectiveDeviceTypes.empty()) {
+ // If the effective device-types list is empty, this is before there are any
+ // being applied by device_type, so this should be added as a 'none'.
+ newValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+ newDeviceTypes.push_back(
+ acc::DeviceTypeAttr::get(context, DeviceType::None));
+ } else {
+ for (DeviceType DT : effectiveDeviceTypes) {
+ newValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+ newDeviceTypes.push_back(acc::DeviceTypeAttr::get(context, DT));
+ }
+ }
+
+ setCollapseAttr(ArrayAttr::get(context, newValues));
+ setCollapseDeviceTypeAttr(ArrayAttr::get(context, newDeviceTypes));
+}
+
+void acc::LoopOp::setTileForDeviceTypes(
+ MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ ValueRange values) {
+ llvm::SmallVector<int32_t> segments;
+ if (getTileOperandsSegments())
+ llvm::copy(*getTileOperandsSegments(), std::back_inserter(segments));
+
+ setTileOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
+ context, getTileOperandsDeviceTypeAttr(), effectiveDeviceTypes, values,
+ getTileOperandsMutable(), segments));
+
+ setTileOperandsSegments(segments);
+}
+
//===----------------------------------------------------------------------===//
// DataOp
//===----------------------------------------------------------------------===//
|
@llvm/pr-subscribers-mlir-openacc Author: Erich Keane (erichkeane) ChangesThese two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering. The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'. Full diff: https://github.com/llvm/llvm-project/pull/138576.diff 4 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index ff0bf6e7f55dd..7223a8ed15bd5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -12,6 +12,7 @@
#include <type_traits>
+#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
namespace clang {
// Simple type-trait to see if the first template arg is one of the list, so we
@@ -82,6 +83,17 @@ class OpenACCClauseCIREmitter final
return conversionOp.getResult(0);
}
+ mlir::Value createConstantInt(mlir::Location loc, unsigned width,
+ int64_t value) {
+ mlir::IntegerType ty = mlir::IntegerType::get(
+ &cgf.getMLIRContext(), width,
+ mlir::IntegerType::SignednessSemantics::Signless);
+ auto constOp = builder.create<mlir::arith::ConstantOp>(
+ loc, ty, builder.getIntegerAttr(ty, value));
+
+ return constOp.getResult();
+ }
+
mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
// '*' case leaves no identifier-info, just a nullptr.
if (!ii)
@@ -336,6 +348,52 @@ class OpenACCClauseCIREmitter final
return clauseNotImplemented(clause);
}
}
+
+ void VisitCollapseClause(const OpenACCCollapseClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+ llvm::APInt value =
+ clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+
+ if (value.getBitWidth() != 64)
+ value = value.sext(64);
+
+ operation.setCollapseForDeviceTypes(builder.getContext(),
+ lastDeviceTypeValues, value);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
+
+ void VisitTileClause(const OpenACCTileClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+ llvm::SmallVector<mlir::Value> values;
+
+ for (const Expr *e : clause.getSizeExprs()) {
+ mlir::Location exprLoc = cgf.cgm.getLoc(e->getBeginLoc());
+
+ // We represent the * as -1. Additionally, this is a constant, so we
+ // can always just emit it as 64 bits to avoid having to do any more
+ // work to determine signedness or size.
+ if (isa<OpenACCAsteriskSizeExpr>(e)) {
+ values.push_back(createConstantInt(exprLoc, 64, -1));
+ } else {
+ llvm::APInt curValue =
+ e->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+ values.push_back(
+ createConstantInt(exprLoc, 64, curValue.getSExtValue()));
+ }
+ }
+
+ operation.setTileForDeviceTypes(builder.getContext(),
+ lastDeviceTypeValues, values);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
};
template <typename OpTy>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp
index 2757d935e1f76..b255a01adda0e 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp
@@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+
+ #pragma acc loop collapse(1) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
+
+ #pragma acc loop collapse(1) device_type(radeon) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
+
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
+
+ #pragma acc loop tile(1, 2, 3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(2) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(2) device_type(radeon) tile (1, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+
}
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index c3df064cf0ead..41b01a14a6498 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2206,6 +2206,16 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
void addIndependent(MLIRContext *, llvm::ArrayRef<DeviceType>);
// Add an entry to the 'auto' attribute for each additional device types.
void addAuto(MLIRContext *, llvm::ArrayRef<DeviceType>);
+
+ // Sets the collapse value for this 'loop' for a set of DeviceTypes. Note
+ // that this may only be set once per DeviceType, and will fail the verifier
+ // if this is set multiple times.
+ void setCollapseForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+ llvm::APInt);
+ // Sets the tile values for this 'loop' for a set of DeviceTypes. All of the
+ // values should be integral constants, with the '*' represented as a '-1'.
+ void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+ mlir::ValueRange);
}];
let hasCustomAssemblyFormat = 1;
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 39dbb0c92a309..f26b3a5143c0b 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2669,6 +2669,57 @@ void acc::LoopOp::addAuto(MLIRContext *context,
effectiveDeviceTypes));
}
+void acc::LoopOp::setCollapseForDeviceTypes(
+ MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ llvm::APInt value) {
+ llvm::SmallVector<mlir::Attribute> newValues;
+ llvm::SmallVector<mlir::Attribute> newDeviceTypes;
+
+ assert((getCollapseAttr() == nullptr) ==
+ (getCollapseDeviceTypeAttr() == nullptr));
+ assert(value.getBitWidth() == 64);
+
+ if (getCollapseAttr()) {
+ for (const auto &existing :
+ llvm::zip_equal(getCollapseAttr(), getCollapseDeviceTypeAttr())) {
+ newValues.push_back(std::get<0>(existing));
+ newDeviceTypes.push_back(std::get<1>(existing));
+ }
+ }
+
+ if (effectiveDeviceTypes.empty()) {
+ // If the effective device-types list is empty, this is before there are any
+ // being applied by device_type, so this should be added as a 'none'.
+ newValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+ newDeviceTypes.push_back(
+ acc::DeviceTypeAttr::get(context, DeviceType::None));
+ } else {
+ for (DeviceType DT : effectiveDeviceTypes) {
+ newValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+ newDeviceTypes.push_back(acc::DeviceTypeAttr::get(context, DT));
+ }
+ }
+
+ setCollapseAttr(ArrayAttr::get(context, newValues));
+ setCollapseDeviceTypeAttr(ArrayAttr::get(context, newDeviceTypes));
+}
+
+void acc::LoopOp::setTileForDeviceTypes(
+ MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ ValueRange values) {
+ llvm::SmallVector<int32_t> segments;
+ if (getTileOperandsSegments())
+ llvm::copy(*getTileOperandsSegments(), std::back_inserter(segments));
+
+ setTileOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
+ context, getTileOperandsDeviceTypeAttr(), effectiveDeviceTypes, values,
+ getTileOperandsMutable(), segments));
+
+ setTileOperandsSegments(segments);
+}
+
//===----------------------------------------------------------------------===//
// DataOp
//===----------------------------------------------------------------------===//
|
@llvm/pr-subscribers-clangir Author: Erich Keane (erichkeane) ChangesThese two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering. The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'. Full diff: https://github.com/llvm/llvm-project/pull/138576.diff 4 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index ff0bf6e7f55dd..7223a8ed15bd5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -12,6 +12,7 @@
#include <type_traits>
+#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
namespace clang {
// Simple type-trait to see if the first template arg is one of the list, so we
@@ -82,6 +83,17 @@ class OpenACCClauseCIREmitter final
return conversionOp.getResult(0);
}
+ mlir::Value createConstantInt(mlir::Location loc, unsigned width,
+ int64_t value) {
+ mlir::IntegerType ty = mlir::IntegerType::get(
+ &cgf.getMLIRContext(), width,
+ mlir::IntegerType::SignednessSemantics::Signless);
+ auto constOp = builder.create<mlir::arith::ConstantOp>(
+ loc, ty, builder.getIntegerAttr(ty, value));
+
+ return constOp.getResult();
+ }
+
mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
// '*' case leaves no identifier-info, just a nullptr.
if (!ii)
@@ -336,6 +348,52 @@ class OpenACCClauseCIREmitter final
return clauseNotImplemented(clause);
}
}
+
+ void VisitCollapseClause(const OpenACCCollapseClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+ llvm::APInt value =
+ clause.getIntExpr()->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+
+ if (value.getBitWidth() != 64)
+ value = value.sext(64);
+
+ operation.setCollapseForDeviceTypes(builder.getContext(),
+ lastDeviceTypeValues, value);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
+
+ void VisitTileClause(const OpenACCTileClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+ llvm::SmallVector<mlir::Value> values;
+
+ for (const Expr *e : clause.getSizeExprs()) {
+ mlir::Location exprLoc = cgf.cgm.getLoc(e->getBeginLoc());
+
+ // We represent the * as -1. Additionally, this is a constant, so we
+ // can always just emit it as 64 bits to avoid having to do any more
+ // work to determine signedness or size.
+ if (isa<OpenACCAsteriskSizeExpr>(e)) {
+ values.push_back(createConstantInt(exprLoc, 64, -1));
+ } else {
+ llvm::APInt curValue =
+ e->EvaluateKnownConstInt(cgf.cgm.getASTContext());
+ values.push_back(
+ createConstantInt(exprLoc, 64, curValue.getSExtValue()));
+ }
+ }
+
+ operation.setTileForDeviceTypes(builder.getContext(),
+ lastDeviceTypeValues, values);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
};
template <typename OpTy>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp
index 2757d935e1f76..b255a01adda0e 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp
@@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
// CHECK: acc.loop {
// CHECK: acc.yield
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+
+ #pragma acc loop collapse(1) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
+
+ #pragma acc loop collapse(1) device_type(radeon) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
+
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
+
+ #pragma acc loop tile(1, 2, 3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(2) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(2) device_type(radeon) tile (1, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+
}
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index c3df064cf0ead..41b01a14a6498 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2206,6 +2206,16 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
void addIndependent(MLIRContext *, llvm::ArrayRef<DeviceType>);
// Add an entry to the 'auto' attribute for each additional device types.
void addAuto(MLIRContext *, llvm::ArrayRef<DeviceType>);
+
+ // Sets the collapse value for this 'loop' for a set of DeviceTypes. Note
+ // that this may only be set once per DeviceType, and will fail the verifier
+ // if this is set multiple times.
+ void setCollapseForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+ llvm::APInt);
+ // Sets the tile values for this 'loop' for a set of DeviceTypes. All of the
+ // values should be integral constants, with the '*' represented as a '-1'.
+ void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
+ mlir::ValueRange);
}];
let hasCustomAssemblyFormat = 1;
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 39dbb0c92a309..f26b3a5143c0b 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2669,6 +2669,57 @@ void acc::LoopOp::addAuto(MLIRContext *context,
effectiveDeviceTypes));
}
+void acc::LoopOp::setCollapseForDeviceTypes(
+ MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ llvm::APInt value) {
+ llvm::SmallVector<mlir::Attribute> newValues;
+ llvm::SmallVector<mlir::Attribute> newDeviceTypes;
+
+ assert((getCollapseAttr() == nullptr) ==
+ (getCollapseDeviceTypeAttr() == nullptr));
+ assert(value.getBitWidth() == 64);
+
+ if (getCollapseAttr()) {
+ for (const auto &existing :
+ llvm::zip_equal(getCollapseAttr(), getCollapseDeviceTypeAttr())) {
+ newValues.push_back(std::get<0>(existing));
+ newDeviceTypes.push_back(std::get<1>(existing));
+ }
+ }
+
+ if (effectiveDeviceTypes.empty()) {
+ // If the effective device-types list is empty, this is before there are any
+ // being applied by device_type, so this should be added as a 'none'.
+ newValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+ newDeviceTypes.push_back(
+ acc::DeviceTypeAttr::get(context, DeviceType::None));
+ } else {
+ for (DeviceType DT : effectiveDeviceTypes) {
+ newValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), value));
+ newDeviceTypes.push_back(acc::DeviceTypeAttr::get(context, DT));
+ }
+ }
+
+ setCollapseAttr(ArrayAttr::get(context, newValues));
+ setCollapseDeviceTypeAttr(ArrayAttr::get(context, newDeviceTypes));
+}
+
+void acc::LoopOp::setTileForDeviceTypes(
+ MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ ValueRange values) {
+ llvm::SmallVector<int32_t> segments;
+ if (getTileOperandsSegments())
+ llvm::copy(*getTileOperandsSegments(), std::back_inserter(segments));
+
+ setTileOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
+ context, getTileOperandsDeviceTypeAttr(), effectiveDeviceTypes, values,
+ getTileOperandsMutable(), segments));
+
+ setTileOperandsSegments(segments);
+}
+
//===----------------------------------------------------------------------===//
// DataOp
//===----------------------------------------------------------------------===//
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you. LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
These two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering. The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'.
These two ended up being pretty similar in frontend implementation, and fairly trivial when doing lowering. The collapse clause jsut results in a normal device_type style attribute with some mild additional complexity, and 'tile' just uses the current infrastructure for 'with segments'.