Skip to content

[flang][fir] Lower do concurrent loop nests to fir.do_concurrent #137928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 136 additions & 92 deletions flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,11 @@ struct IncrementLoopInfo {
template <typename T>
explicit IncrementLoopInfo(Fortran::semantics::Symbol &sym, const T &lower,
const T &upper, const std::optional<T> &step,
bool isUnordered = false)
bool isConcurrent = false)
: loopVariableSym{&sym}, lowerExpr{Fortran::semantics::GetExpr(lower)},
upperExpr{Fortran::semantics::GetExpr(upper)},
stepExpr{Fortran::semantics::GetExpr(step)}, isUnordered{isUnordered} {}
stepExpr{Fortran::semantics::GetExpr(step)},
isConcurrent{isConcurrent} {}

IncrementLoopInfo(IncrementLoopInfo &&) = default;
IncrementLoopInfo &operator=(IncrementLoopInfo &&x) = default;
Expand All @@ -120,7 +121,7 @@ struct IncrementLoopInfo {
const Fortran::lower::SomeExpr *upperExpr;
const Fortran::lower::SomeExpr *stepExpr;
const Fortran::lower::SomeExpr *maskExpr = nullptr;
bool isUnordered; // do concurrent, forall
bool isConcurrent;
llvm::SmallVector<const Fortran::semantics::Symbol *> localSymList;
llvm::SmallVector<const Fortran::semantics::Symbol *> localInitSymList;
llvm::SmallVector<
Expand All @@ -130,7 +131,7 @@ struct IncrementLoopInfo {
mlir::Value loopVariable = nullptr;

// Data members for structured loops.
fir::DoLoopOp doLoop = nullptr;
mlir::Operation *loopOp = nullptr;

// Data members for unstructured loops.
bool hasRealControl = false;
Expand Down Expand Up @@ -1981,7 +1982,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
llvm_unreachable("illegal reduction operator");
}

/// Collect DO CONCURRENT or FORALL loop control information.
/// Collect DO CONCURRENT loop control information.
IncrementLoopNestInfo getConcurrentControl(
const Fortran::parser::ConcurrentHeader &header,
const std::list<Fortran::parser::LocalitySpec> &localityList = {}) {
Expand Down Expand Up @@ -2292,8 +2293,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get(
builder->getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ ua,
/*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
if (has_attrs)
info.doLoop.setLoopAnnotationAttr(la);
if (has_attrs) {
if (auto loopOp = mlir::dyn_cast<fir::DoLoopOp>(info.loopOp))
loopOp.setLoopAnnotationAttr(la);

if (auto doConcurrentOp =
mlir::dyn_cast<fir::DoConcurrentLoopOp>(info.loopOp))
doConcurrentOp.setLoopAnnotationAttr(la);
}
}

/// Generate FIR to begin a structured or unstructured increment loop nest.
Expand All @@ -2302,96 +2309,77 @@ class FirConverter : public Fortran::lower::AbstractConverter {
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
assert(!incrementLoopNestInfo.empty() && "empty loop nest");
mlir::Location loc = toLocation();
mlir::Operation *boundsAndStepIP = nullptr;
mlir::arith::IntegerOverflowFlags iofBackup{};

llvm::SmallVector<mlir::Value> nestLBs;
llvm::SmallVector<mlir::Value> nestUBs;
llvm::SmallVector<mlir::Value> nestSts;
llvm::SmallVector<mlir::Value> nestReduceOperands;
llvm::SmallVector<mlir::Attribute> nestReduceAttrs;
bool genDoConcurrent = false;

for (IncrementLoopInfo &info : incrementLoopNestInfo) {
mlir::Value lowerValue;
mlir::Value upperValue;
mlir::Value stepValue;
genDoConcurrent = info.isStructured() && info.isConcurrent;

{
mlir::OpBuilder::InsertionGuard guard(*builder);
if (!genDoConcurrent)
info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
info.isConcurrent);

// Set the IP before the first loop in the nest so that all nest bounds
// and step values are created outside the nest.
if (boundsAndStepIP)
builder->setInsertionPointAfter(boundsAndStepIP);
if (!getLoweringOptions().getIntegerWrapAround()) {
iofBackup = builder->getIntegerOverflowFlags();
builder->setIntegerOverflowFlags(
mlir::arith::IntegerOverflowFlags::nsw);
}

info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
info.isUnordered);
if (!getLoweringOptions().getIntegerWrapAround()) {
iofBackup = builder->getIntegerOverflowFlags();
builder->setIntegerOverflowFlags(
mlir::arith::IntegerOverflowFlags::nsw);
}
lowerValue = genControlValue(info.lowerExpr, info);
upperValue = genControlValue(info.upperExpr, info);
bool isConst = true;
stepValue = genControlValue(info.stepExpr, info,
info.isStructured() ? nullptr : &isConst);
if (!getLoweringOptions().getIntegerWrapAround())
builder->setIntegerOverflowFlags(iofBackup);
boundsAndStepIP = stepValue.getDefiningOp();

// Use a temp variable for unstructured loops with non-const step.
if (!isConst) {
info.stepVariable =
builder->createTemporary(loc, stepValue.getType());
boundsAndStepIP =
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
nestLBs.push_back(genControlValue(info.lowerExpr, info));
nestUBs.push_back(genControlValue(info.upperExpr, info));
bool isConst = true;
nestSts.push_back(genControlValue(
info.stepExpr, info, info.isStructured() ? nullptr : &isConst));

if (!getLoweringOptions().getIntegerWrapAround())
builder->setIntegerOverflowFlags(iofBackup);

// Use a temp variable for unstructured loops with non-const step.
if (!isConst) {
mlir::Value stepValue = nestSts.back();
info.stepVariable = builder->createTemporary(loc, stepValue.getType());
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
}

if (genDoConcurrent && nestReduceOperands.empty()) {
// Create DO CONCURRENT reduce operands and attributes
for (const auto &reduceSym : info.reduceSymList) {
const fir::ReduceOperationEnum reduceOperation = reduceSym.first;
const Fortran::semantics::Symbol *sym = reduceSym.second;
fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr);
nestReduceOperands.push_back(fir::getBase(exv));
auto reduceAttr =
fir::ReduceAttr::get(builder->getContext(), reduceOperation);
nestReduceAttrs.push_back(reduceAttr);
}
}
}

for (auto [info, lowerValue, upperValue, stepValue] :
llvm::zip_equal(incrementLoopNestInfo, nestLBs, nestUBs, nestSts)) {
// Structured loop - generate fir.do_loop.
if (info.isStructured()) {
if (genDoConcurrent)
continue;

// The loop variable is a doLoop op argument.
mlir::Type loopVarType = info.getLoopVariableType();
mlir::Value loopValue;
if (info.isUnordered) {
llvm::SmallVector<mlir::Value> reduceOperands;
llvm::SmallVector<mlir::Attribute> reduceAttrs;
// Create DO CONCURRENT reduce operands and attributes
for (const auto &reduceSym : info.reduceSymList) {
const fir::ReduceOperationEnum reduce_operation = reduceSym.first;
const Fortran::semantics::Symbol *sym = reduceSym.second;
fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr);
reduceOperands.push_back(fir::getBase(exv));
auto reduce_attr =
fir::ReduceAttr::get(builder->getContext(), reduce_operation);
reduceAttrs.push_back(reduce_attr);
}
// The loop variable value is explicitly updated.
info.doLoop = builder->create<fir::DoLoopOp>(
loc, lowerValue, upperValue, stepValue, /*unordered=*/true,
/*finalCountValue=*/false, /*iterArgs=*/std::nullopt,
llvm::ArrayRef<mlir::Value>(reduceOperands), reduceAttrs);
builder->setInsertionPointToStart(info.doLoop.getBody());
loopValue = builder->createConvert(loc, loopVarType,
info.doLoop.getInductionVar());
} else {
// The loop variable is a doLoop op argument.
info.doLoop = builder->create<fir::DoLoopOp>(
loc, lowerValue, upperValue, stepValue, /*unordered=*/false,
/*finalCountValue=*/true,
builder->createConvert(loc, loopVarType, lowerValue));
builder->setInsertionPointToStart(info.doLoop.getBody());
loopValue = info.doLoop.getRegionIterArgs()[0];
}
auto loopOp = builder->create<fir::DoLoopOp>(
loc, lowerValue, upperValue, stepValue, /*unordered=*/false,
/*finalCountValue=*/true,
builder->createConvert(loc, loopVarType, lowerValue));
info.loopOp = loopOp;
builder->setInsertionPointToStart(loopOp.getBody());
mlir::Value loopValue = loopOp.getRegionIterArgs()[0];

// Update the loop variable value in case it has non-index references.
builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable);
if (info.maskExpr) {
Fortran::lower::StatementContext stmtCtx;
mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx);
stmtCtx.finalizeAndReset();
mlir::Value maskCondCast =
builder->createConvert(loc, builder->getI1Type(), maskCond);
auto ifOp = builder->create<fir::IfOp>(loc, maskCondCast,
/*withElseRegion=*/false);
builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
}
if (info.hasLocalitySpecs())
handleLocalitySpecs(info);

addLoopAnnotationAttr(info, dirs);
continue;
}
Expand Down Expand Up @@ -2455,6 +2443,60 @@ class FirConverter : public Fortran::lower::AbstractConverter {
builder->restoreInsertionPoint(insertPt);
}
}

if (genDoConcurrent) {
auto loopWrapperOp = builder->create<fir::DoConcurrentOp>(loc);
builder->setInsertionPointToStart(
builder->createBlock(&loopWrapperOp.getRegion()));

for (IncrementLoopInfo &info : llvm::reverse(incrementLoopNestInfo)) {
info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
info.isConcurrent);
}

builder->setInsertionPointToEnd(loopWrapperOp.getBody());
auto loopOp = builder->create<fir::DoConcurrentLoopOp>(
loc, nestLBs, nestUBs, nestSts, nestReduceOperands,
nestReduceAttrs.empty()
? nullptr
: mlir::ArrayAttr::get(builder->getContext(), nestReduceAttrs),
nullptr);

llvm::SmallVector<mlir::Type> loopBlockArgTypes(
incrementLoopNestInfo.size(), builder->getIndexType());
llvm::SmallVector<mlir::Location> loopBlockArgLocs(
incrementLoopNestInfo.size(), loc);
mlir::Region &loopRegion = loopOp.getRegion();
mlir::Block *loopBlock = builder->createBlock(
&loopRegion, loopRegion.begin(), loopBlockArgTypes, loopBlockArgLocs);
builder->setInsertionPointToStart(loopBlock);

for (auto [info, blockArg] :
llvm::zip_equal(incrementLoopNestInfo, loopBlock->getArguments())) {
info.loopOp = loopOp;
mlir::Value loopValue =
builder->createConvert(loc, info.getLoopVariableType(), blockArg);
builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable);

if (info.maskExpr) {
Fortran::lower::StatementContext stmtCtx;
mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx);
stmtCtx.finalizeAndReset();
mlir::Value maskCondCast =
builder->createConvert(loc, builder->getI1Type(), maskCond);
auto ifOp = builder->create<fir::IfOp>(loc, maskCondCast,
/*withElseRegion=*/false);
builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
}
}

IncrementLoopInfo &innermostInfo = incrementLoopNestInfo.back();

if (innermostInfo.hasLocalitySpecs())
handleLocalitySpecs(innermostInfo);

addLoopAnnotationAttr(innermostInfo, dirs);
}
}

/// Generate FIR to end a structured or unstructured increment loop nest.
Expand All @@ -2471,29 +2513,31 @@ class FirConverter : public Fortran::lower::AbstractConverter {
it != rend; ++it) {
IncrementLoopInfo &info = *it;
if (info.isStructured()) {
// End fir.do_loop.
if (info.isUnordered) {
builder->setInsertionPointAfter(info.doLoop);
// End fir.do_concurent.loop.
if (info.isConcurrent) {
builder->setInsertionPointAfter(info.loopOp->getParentOp());
continue;
}

// End fir.do_loop.
// Decrement tripVariable.
builder->setInsertionPointToEnd(info.doLoop.getBody());
auto doLoopOp = mlir::cast<fir::DoLoopOp>(info.loopOp);
builder->setInsertionPointToEnd(doLoopOp.getBody());
llvm::SmallVector<mlir::Value, 2> results;
results.push_back(builder->create<mlir::arith::AddIOp>(
loc, info.doLoop.getInductionVar(), info.doLoop.getStep(),
iofAttr));
loc, doLoopOp.getInductionVar(), doLoopOp.getStep(), iofAttr));
// Step loopVariable to help optimizations such as vectorization.
// Induction variable elimination will clean up as necessary.
mlir::Value step = builder->createConvert(
loc, info.getLoopVariableType(), info.doLoop.getStep());
loc, info.getLoopVariableType(), doLoopOp.getStep());
mlir::Value loopVar =
builder->create<fir::LoadOp>(loc, info.loopVariable);
results.push_back(
builder->create<mlir::arith::AddIOp>(loc, loopVar, step, iofAttr));
builder->create<fir::ResultOp>(loc, results);
builder->setInsertionPointAfter(info.doLoop);
builder->setInsertionPointAfter(doLoopOp);
// The loop control variable may be used after the loop.
builder->create<fir::StoreOp>(loc, info.doLoop.getResult(1),
builder->create<fir::StoreOp>(loc, doLoopOp.getResult(1),
info.loopVariable);
continue;
}
Expand Down
3 changes: 3 additions & 0 deletions flang/lib/Optimizer/Builder/FIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
if (auto cufKernelOp = getRegion().getParentOfType<cuf::KernelOp>())
return &cufKernelOp.getRegion().front();

if (auto doConcurentOp = getRegion().getParentOfType<fir::DoConcurrentOp>())
return doConcurentOp.getBody();

return getEntryBlock();
}

Expand Down
39 changes: 32 additions & 7 deletions flang/test/Lower/do_concurrent.f90
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ subroutine sub1(n)
implicit none
integer :: n, m, i, j, k
integer, dimension(n) :: a
!CHECK: %[[N_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFsub1En"}
!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Ea"}

!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
Expand All @@ -29,10 +32,30 @@ subroutine sub1(n)
!CHECK: %[[UB3:.*]] = arith.constant 10 : i32
!CHECK: %[[UB3_CVT:.*]] = fir.convert %[[UB3]] : (i32) -> index

!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
!CHECK: fir.do_loop %{{.*}} = %[[LB3_CVT]] to %[[UB3_CVT]] step %{{.*}} unordered
!CHECK: fir.do_concurrent
!CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i"}
!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I]]
!CHECK: %[[J:.*]] = fir.alloca i32 {bindc_name = "j"}
!CHECK: %[[J_DECL:.*]]:2 = hlfir.declare %[[J]]
!CHECK: %[[K:.*]] = fir.alloca i32 {bindc_name = "k"}
!CHECK: %[[K_DECL:.*]]:2 = hlfir.declare %[[K]]

!CHECK: fir.do_concurrent.loop (%[[I_IV:.*]], %[[J_IV:.*]], %[[K_IV:.*]]) =
!CHECK-SAME: (%[[LB1_CVT]], %[[LB2_CVT]], %[[LB3_CVT]]) to
!CHECK-SAME: (%[[UB1_CVT]], %[[UB2_CVT]], %[[UB3_CVT]]) step
!CHECK-SAME: (%{{.*}}, %{{.*}}, %{{.*}}) {
!CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32
!CHECK: fir.store %[[I_IV_CVT]] to %[[I_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[J_IV_CVT:.*]] = fir.convert %[[J_IV]] : (index) -> i32
!CHECK: fir.store %[[J_IV_CVT]] to %[[J_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[K_IV_CVT:.*]] = fir.convert %[[K_IV]] : (index) -> i32
!CHECK: fir.store %[[K_IV_CVT]] to %[[K_DECL]]#0 : !fir.ref<i32>

!CHECK: %[[N_VAL:.*]] = fir.load %[[N_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[I_VAL:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[I_VAL_CVT:.*]] = fir.convert %[[I_VAL]] : (i32) -> i64
!CHECK: %[[A_ELEM:.*]] = hlfir.designate %[[A_DECL]]#0 (%[[I_VAL_CVT]])
!CHECK: hlfir.assign %[[N_VAL]] to %[[A_ELEM]] : i32, !fir.ref<i32>
do concurrent(i=1:n, j=1:bar(n*m, n/m), k=5:10)
a(i) = n
end do
Expand All @@ -45,22 +68,24 @@ subroutine sub2(n)
integer, dimension(n) :: a
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
!CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref<i32>
!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
!CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index
!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
!CHECK: fir.do_concurrent
!CHECK: fir.do_concurrent.loop (%{{.*}}) = (%[[LB1_CVT]]) to (%[[UB1_CVT]]) step (%{{.*}})

!CHECK: %[[LB2:.*]] = arith.constant 1 : i32
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
!CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index
!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
!CHECK: fir.do_concurrent
!CHECK: fir.do_concurrent.loop (%{{.*}}) = (%[[LB2_CVT]]) to (%[[UB2_CVT]]) step (%{{.*}})
do concurrent(i=1:n)
do concurrent(j=1:bar(n*m, n/m))
a(i) = n
end do
end do
end subroutine


!CHECK-LABEL: unstructured
subroutine unstructured(inner_step)
integer(4) :: i, j, inner_step
Expand Down
Loading