diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 6ed9ac47405d3..a3b19af4adc39 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -210,10 +211,7 @@ template class CodeGenPassBuilder { class AddIRPass { public: AddIRPass(ModulePassManager &MPM, const DerivedT &PB) : MPM(MPM), PB(PB) {} - ~AddIRPass() { - if (!FPM.isEmpty()) - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } + ~AddIRPass() { flushFPMToMPM(); } template void operator()(PassT &&Pass, StringRef Name = PassT::name()) { @@ -231,16 +229,40 @@ template class CodeGenPassBuilder { FPM.addPass(std::forward(Pass)); } else { // Add Module Pass - if (!FPM.isEmpty()) { - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - FPM = FunctionPassManager(); - } - + flushFPMToMPM(); MPM.addPass(std::forward(Pass)); } } + /// Setting this will add passes to the CGSCC pass manager. + void requireCGSCCOrder() { + if (PB.AddInCGSCCOrder) + return; + flushFPMToMPM(); + PB.AddInCGSCCOrder = true; + } + + /// Stop adding passes to the CGSCC pass manager. + /// Existing passes won't be removed. + void stopAddingInCGSCCOrder() { + if (!PB.AddInCGSCCOrder) + return; + flushFPMToMPM(); + PB.AddInCGSCCOrder = false; + } + private: + void flushFPMToMPM() { + if (FPM.isEmpty()) + return; + if (PB.AddInCGSCCOrder) { + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createCGSCCToFunctionPassAdaptor(std::move(FPM)))); + } else { + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + FPM = FunctionPassManager(); + } ModulePassManager &MPM; FunctionPassManager FPM; const DerivedT &PB; @@ -252,13 +274,17 @@ template class CodeGenPassBuilder { AddMachinePass(ModulePassManager &MPM, const DerivedT &PB) : MPM(MPM), PB(PB) {} ~AddMachinePass() { - if (!MFPM.isEmpty()) { - FunctionPassManager FPM; - FPM.addPass( - createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))); - FPM.addPass(InvalidateAnalysisPass()); + if (MFPM.isEmpty()) + return; + + FunctionPassManager FPM; + FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))); + FPM.addPass(InvalidateAnalysisPass()); + if (this->PB.AddInCGSCCOrder) { + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createCGSCCToFunctionPassAdaptor(std::move(FPM)))); + } else MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } } template @@ -276,12 +302,7 @@ template class CodeGenPassBuilder { MFPM.addPass(std::forward(Pass)); } else { // Add Module Pass - if (!MFPM.isEmpty()) { - MPM.addPass(createModuleToFunctionPassAdaptor( - createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)))); - MFPM = MachineFunctionPassManager(); - } - + flushMFPMToMPM(); MPM.addPass(std::forward(Pass)); } @@ -289,7 +310,39 @@ template class CodeGenPassBuilder { C(Name, MFPM); } + /// Setting this will add passes to the CGSCC pass manager. + void requireCGSCCOrder() { + if (PB.AddInCGSCCOrder) + return; + flushMFPMToMPM(); + PB.AddInCGSCCOrder = true; + } + + /// Stop adding passes to the CGSCC pass manager. + /// Existing passes won't be removed. + void stopAddingInCGSCCOrder() { + if (!PB.AddInCGSCCOrder) + return; + flushMFPMToMPM(); + PB.AddInCGSCCOrder = false; + } + private: + void flushMFPMToMPM() { + if (MFPM.isEmpty()) + return; + + if (PB.AddInCGSCCOrder) { + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createCGSCCToFunctionPassAdaptor( + createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))))); + } else { + MPM.addPass(createModuleToFunctionPassAdaptor( + createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)))); + } + MFPM = MachineFunctionPassManager(); + } + ModulePassManager &MPM; MachineFunctionPassManager MFPM; const DerivedT &PB; @@ -555,6 +608,7 @@ template class CodeGenPassBuilder { /// Helper variable for `-start-before/-start-after/-stop-before/-stop-after` mutable bool Started = true; mutable bool Stopped = true; + mutable bool AddInCGSCCOrder = false; }; template @@ -813,6 +867,9 @@ void CodeGenPassBuilder::addISelPrepare( AddIRPass &addPass) const { derived().addPreISel(addPass); + if (Opt.RequiresCodeGenSCCOrder) + addPass.requireCGSCCOrder(); + addPass(CallBrPreparePass()); // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index ccb251b730f16..680a3fb78a6e3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -2079,6 +2079,8 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { // being run on them, which causes crashes in the resource usage analysis). addPass(AMDGPULowerBufferFatPointersPass(TM)); + addPass.requireCGSCCOrder(); + Base::addCodeGenPrepare(addPass); if (isPassEnabled(EnableLoadStoreVectorizer)) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll new file mode 100644 index 0000000000000..e9b57515e71e0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -0,0 +1,21 @@ +; RUN: llc -enable-new-pm -mtriple=amdgcn--amdhsa -O0 -print-pipeline-passes < %s 2>&1 \ +; RUN: | FileCheck -check-prefix=GCN-O0 %s + +; RUN: llc -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ +; RUN: | FileCheck -check-prefix=GCN-O2 %s + +; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ +; RUN: | FileCheck -check-prefix=GCN-O3 %s + + +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) + + +; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,require,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) + +; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,require,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) + + +define void @empty() { + ret void +}