| //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the ShrinkWrapping class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "bolt/Passes/ShrinkWrapping.h" |
| #include "bolt/Passes/DataflowInfoManager.h" |
| #include "bolt/Passes/MCF.h" |
| #include "bolt/Utils/CommandLineOpts.h" |
| #include <numeric> |
| #include <optional> |
| #include <stack> |
| |
| #define DEBUG_TYPE "shrinkwrapping" |
| |
| using namespace llvm; |
| |
| namespace opts { |
| |
| extern cl::opt<bool> TimeOpts; |
| extern cl::OptionCategory BoltOptCategory; |
| |
| static cl::opt<unsigned> ShrinkWrappingThreshold( |
| "shrink-wrapping-threshold", |
| cl::desc("Percentage of prologue execution count to use as threshold when" |
| " evaluating whether a block is cold enough to be profitable to" |
| " move eligible spills there"), |
| cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory)); |
| } // namespace opts |
| |
| namespace llvm { |
| namespace bolt { |
| |
| void CalleeSavedAnalysis::analyzeSaves() { |
| ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs(); |
| StackReachingUses &SRU = Info.getStackReachingUses(); |
| auto &InsnToBB = Info.getInsnToBBMap(); |
| BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false); |
| |
| LLVM_DEBUG(dbgs() << "Checking spill locations\n"); |
| for (BinaryBasicBlock &BB : BF) { |
| LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); |
| const MCInst *Prev = nullptr; |
| for (MCInst &Inst : BB) { |
| if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { |
| // Blacklist weird stores we don't understand |
| if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore && |
| FIE->IsStoreFromReg) { |
| BlacklistedRegs.set(FIE->RegOrImm); |
| CalleeSaved.reset(FIE->RegOrImm); |
| Prev = &Inst; |
| continue; |
| } |
| |
| if (!FIE->IsStore || !FIE->IsStoreFromReg || |
| BlacklistedRegs[FIE->RegOrImm]) { |
| Prev = &Inst; |
| continue; |
| } |
| |
| // If this reg is defined locally, it is not a callee-saved reg |
| if (RD.isReachedBy(FIE->RegOrImm, |
| Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) { |
| BlacklistedRegs.set(FIE->RegOrImm); |
| CalleeSaved.reset(FIE->RegOrImm); |
| Prev = &Inst; |
| continue; |
| } |
| |
| // If this stack position is accessed in another function, we are |
| // probably dealing with a parameter passed in a stack -- do not mess |
| // with it |
| if (SRU.isStoreUsed(*FIE, |
| Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)), |
| /*IncludeLocalAccesses=*/false) { |
| BlacklistedRegs.set(FIE->RegOrImm); |
| CalleeSaved.reset(FIE->RegOrImm); |
| Prev = &Inst; |
| continue; |
| } |
| |
| // If this stack position is loaded elsewhere in another reg, we can't |
| // update it, so blacklist it. |
| if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev) |
| : SRU.expr_begin(BB))) { |
| BlacklistedRegs.set(FIE->RegOrImm); |
| CalleeSaved.reset(FIE->RegOrImm); |
| Prev = &Inst; |
| continue; |
| } |
| |
| // Ignore regs with multiple saves |
| if (CalleeSaved[FIE->RegOrImm]) { |
| BlacklistedRegs.set(FIE->RegOrImm); |
| CalleeSaved.reset(FIE->RegOrImm); |
| Prev = &Inst; |
| continue; |
| } |
| |
| CalleeSaved.set(FIE->RegOrImm); |
| SaveFIEByReg[FIE->RegOrImm] = &*FIE; |
| SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount(); |
| BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId); |
| OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset; |
| LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: " |
| << FIE->RegOrImm << "\n"); |
| } |
| Prev = &Inst; |
| } |
| } |
| } |
| |
| void CalleeSavedAnalysis::analyzeRestores() { |
| ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses(); |
| |
| // Now compute all restores of these callee-saved regs |
| for (BinaryBasicBlock &BB : BF) { |
| const MCInst *Prev = nullptr; |
| for (MCInst &Inst : llvm::reverse(BB)) { |
| if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { |
| if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) { |
| Prev = &Inst; |
| continue; |
| } |
| |
| // If this reg is used locally after a restore, then we are probably |
| // not dealing with a callee-saved reg. Except if this use is by |
| // another store, but we don't cover this case yet. |
| // Also not callee-saved if this load accesses caller stack or isn't |
| // simple. |
| if (!FIE->IsSimple || FIE->StackOffset >= 0 || |
| RU.isReachedBy(FIE->RegOrImm, |
| Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) { |
| CalleeSaved.reset(FIE->RegOrImm); |
| Prev = &Inst; |
| continue; |
| } |
| // If stack offsets between saves/store don't agree with each other, |
| // we don't completely understand what's happening here |
| if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) { |
| CalleeSaved.reset(FIE->RegOrImm); |
| LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a " |
| "mismatching restore: " |
| << FIE->RegOrImm << "\n"); |
| Prev = &Inst; |
| continue; |
| } |
| |
| LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm |
| << "\n"); |
| if (LoadFIEByReg[FIE->RegOrImm] == nullptr) |
| LoadFIEByReg[FIE->RegOrImm] = &*FIE; |
| BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm, |
| AllocatorId); |
| HasRestores.set(FIE->RegOrImm); |
| } |
| Prev = &Inst; |
| } |
| } |
| } |
| |
| std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) { |
| std::vector<MCInst *> Results; |
| for (BinaryBasicBlock &BB : BF) |
| for (MCInst &Inst : BB) |
| if (getSavedReg(Inst) == Reg) |
| Results.push_back(&Inst); |
| return Results; |
| } |
| |
| std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) { |
| std::vector<MCInst *> Results; |
| for (BinaryBasicBlock &BB : BF) |
| for (MCInst &Inst : BB) |
| if (getRestoredReg(Inst) == Reg) |
| Results.push_back(&Inst); |
| return Results; |
| } |
| |
| CalleeSavedAnalysis::~CalleeSavedAnalysis() { |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : BB) { |
| BC.MIB->removeAnnotation(Inst, getSaveTag()); |
| BC.MIB->removeAnnotation(Inst, getRestoreTag()); |
| } |
| } |
| } |
| |
| void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) { |
| if (BlacklistedRegions[Offset] < Size) |
| BlacklistedRegions[Offset] = Size; |
| } |
| |
| bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) { |
| for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions) |
| if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second) |
| return true; |
| return false; |
| } |
| |
| bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset, |
| int64_t Size) { |
| bool HasConflict = false; |
| for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) { |
| std::pair<const int64_t, int64_t> &Elem = *Iter; |
| if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second && |
| (Offset != Elem.first || Size != Elem.second)) { |
| Iter = AvailableRegions.erase(Iter); |
| HasConflict = true; |
| continue; |
| } |
| ++Iter; |
| } |
| if (HasConflict) { |
| blacklistRegion(Offset, Size); |
| return true; |
| } |
| return false; |
| } |
| |
| void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) { |
| StackPointerTracking &SPT = Info.getStackPointerTracking(); |
| if (!BC.MII->get(Point.getOpcode()) |
| .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI)) |
| return; |
| |
| int SPVal, FPVal; |
| std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point); |
| std::pair<MCPhysReg, int64_t> FP; |
| |
| if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) |
| FP = std::make_pair(BC.MIB->getFramePointer(), FPVal); |
| else |
| FP = std::make_pair(0, 0); |
| std::pair<MCPhysReg, int64_t> SP; |
| |
| if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) |
| SP = std::make_pair(BC.MIB->getStackPointer(), SPVal); |
| else |
| SP = std::make_pair(0, 0); |
| |
| int64_t Output; |
| if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) |
| return; |
| |
| // Not your regular frame pointer initialization... bail |
| if (Output != SPVal) |
| blacklistRegion(0, 0); |
| } |
| |
| void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) { |
| StackPointerTracking &SPT = Info.getStackPointerTracking(); |
| if (!BC.MII->get(Point.getOpcode()) |
| .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI)) |
| return; |
| // Check if the definition of SP comes from FP -- in this case, this |
| // value may need to be updated depending on our stack layout changes |
| bool UsesFP = llvm::any_of(BC.MIB->useOperands(Point), [&](MCOperand &Op) { |
| return Op.isReg() && Op.getReg() == BC.MIB->getFramePointer(); |
| }); |
| if (!UsesFP) |
| return; |
| |
| // Setting up evaluation |
| int SPVal, FPVal; |
| std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point); |
| std::pair<MCPhysReg, int64_t> FP; |
| |
| if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) |
| FP = std::make_pair(BC.MIB->getFramePointer(), FPVal); |
| else |
| FP = std::make_pair(0, 0); |
| std::pair<MCPhysReg, int64_t> SP; |
| |
| if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) |
| SP = std::make_pair(BC.MIB->getStackPointer(), SPVal); |
| else |
| SP = std::make_pair(0, 0); |
| |
| int64_t Output; |
| if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) |
| return; |
| |
| // If the value is the same of FP, no need to adjust it |
| if (Output == FPVal) |
| return; |
| |
| // If an allocation happened through FP, bail |
| if (Output <= SPVal) { |
| blacklistRegion(0, 0); |
| return; |
| } |
| |
| // We are restoring SP to an old value based on FP. Mark it as a stack |
| // access to be fixed later. |
| BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId); |
| } |
| |
| void StackLayoutModifier::classifyStackAccesses() { |
| // Understand when stack slots are being used non-locally |
| StackReachingUses &SRU = Info.getStackReachingUses(); |
| |
| for (BinaryBasicBlock &BB : BF) { |
| const MCInst *Prev = nullptr; |
| for (MCInst &Inst : llvm::reverse(BB)) { |
| checkFramePointerInitialization(Inst); |
| checkStackPointerRestore(Inst); |
| ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst); |
| if (!FIEX) { |
| Prev = &Inst; |
| continue; |
| } |
| if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) { |
| blacklistRegion(FIEX->StackOffset, FIEX->Size); |
| Prev = &Inst; |
| continue; |
| } |
| // If this stack position is accessed in another function, we are |
| // probably dealing with a parameter passed in a stack -- do not mess |
| // with it |
| if (SRU.isStoreUsed(*FIEX, |
| Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB), |
| /*IncludeLocalAccesses=*/false)) { |
| blacklistRegion(FIEX->StackOffset, FIEX->Size); |
| Prev = &Inst; |
| continue; |
| } |
| // Now we have a clear stack slot access. Check if its blacklisted or if |
| // it conflicts with another chunk. |
| if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) || |
| blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) { |
| Prev = &Inst; |
| continue; |
| } |
| // We are free to go. Add it as available stack slot which we know how |
| // to move it. |
| AvailableRegions[FIEX->StackOffset] = FIEX->Size; |
| BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId); |
| RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm); |
| RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset); |
| LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size " |
| << (int)FIEX->Size << "\n"); |
| } |
| } |
| } |
| |
| void StackLayoutModifier::classifyCFIs() { |
| std::stack<std::pair<int64_t, uint16_t>> CFIStack; |
| int64_t CfaOffset = -8; |
| uint16_t CfaReg = 7; |
| |
| auto recordAccess = [&](MCInst *Inst, int64_t Offset) { |
| const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false); |
| if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) { |
| BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId); |
| LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n"); |
| } else { |
| IsSimple = false; |
| return; |
| } |
| }; |
| |
| for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
| for (MCInst &Inst : *BB) { |
| if (!BC.MIB->isCFI(Inst)) |
| continue; |
| const MCCFIInstruction *CFI = BF.getCFIFor(Inst); |
| switch (CFI->getOperation()) { |
| case MCCFIInstruction::OpDefCfa: |
| CfaOffset = -CFI->getOffset(); |
| recordAccess(&Inst, CfaOffset); |
| [[fallthrough]]; |
| case MCCFIInstruction::OpDefCfaRegister: |
| CfaReg = CFI->getRegister(); |
| break; |
| case MCCFIInstruction::OpDefCfaOffset: |
| CfaOffset = -CFI->getOffset(); |
| recordAccess(&Inst, CfaOffset); |
| break; |
| case MCCFIInstruction::OpOffset: |
| recordAccess(&Inst, CFI->getOffset()); |
| BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(), |
| BC.MRI->getLLVMRegNum(CFI->getRegister(), |
| /*isEH=*/false), |
| AllocatorId); |
| break; |
| case MCCFIInstruction::OpSameValue: |
| BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(), |
| BC.MRI->getLLVMRegNum(CFI->getRegister(), |
| /*isEH=*/false), |
| AllocatorId); |
| break; |
| case MCCFIInstruction::OpRememberState: |
| CFIStack.push(std::make_pair(CfaOffset, CfaReg)); |
| break; |
| case MCCFIInstruction::OpRestoreState: { |
| assert(!CFIStack.empty() && "Corrupt CFI stack"); |
| std::pair<int64_t, uint16_t> &Elem = CFIStack.top(); |
| CFIStack.pop(); |
| CfaOffset = Elem.first; |
| CfaReg = Elem.second; |
| break; |
| } |
| case MCCFIInstruction::OpRelOffset: |
| case MCCFIInstruction::OpAdjustCfaOffset: |
| llvm_unreachable("Unhandled AdjustCfaOffset"); |
| break; |
| default: |
| break; |
| } |
| } |
| } |
| } |
| |
| void StackLayoutModifier::scheduleChange( |
| MCInst &Inst, StackLayoutModifier::WorklistItem Item) { |
| auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>( |
| Inst, getTodoTag(), AllocatorId); |
| WList.push_back(Item); |
| } |
| |
| bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) { |
| if (!IsSimple || !BC.MIB->isPush(*DeletedPush)) |
| return false; |
| |
| ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush); |
| if (!FIE) |
| return false; |
| |
| return canCollapseRegion(FIE->StackOffset); |
| } |
| |
| bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) { |
| if (!IsInitialized) |
| initialize(); |
| if (!IsSimple) |
| return false; |
| |
| if (CollapsedRegions.count(RegionAddr)) |
| return true; |
| |
| // Check if it is possible to readjust all accesses below RegionAddr |
| if (!BlacklistedRegions.empty()) |
| return false; |
| |
| return true; |
| } |
| |
| bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) { |
| ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush); |
| if (!FIE) |
| return false; |
| int64_t RegionAddr = FIE->StackOffset; |
| int64_t RegionSz = FIE->Size; |
| return collapseRegion(DeletedPush, RegionAddr, RegionSz); |
| } |
| |
| bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr, |
| int64_t RegionSz) { |
| if (!canCollapseRegion(RegionAddr)) |
| return false; |
| |
| assert(IsInitialized); |
| StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis(); |
| |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : BB) { |
| if (!BC.MIB->hasAnnotation(Inst, getSlotTag())) |
| continue; |
| auto Slot = |
| BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( |
| Inst, getSlotTag()); |
| if (!AvailableRegions.count(Slot)) |
| continue; |
| // We need to ensure this access is affected by the deleted push |
| if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]]) |
| continue; |
| |
| if (BC.MIB->isCFI(Inst)) { |
| if (Slot > RegionAddr) |
| continue; |
| scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz)); |
| continue; |
| } |
| ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); |
| if (!FIE) { |
| if (Slot > RegionAddr) |
| continue; |
| // SP update based on frame pointer |
| scheduleChange( |
| Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); |
| continue; |
| } |
| |
| if (Slot == RegionAddr) { |
| BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId); |
| continue; |
| } |
| if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) |
| continue; |
| |
| if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) |
| continue; |
| |
| if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr) |
| continue; |
| |
| scheduleChange( |
| Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); |
| } |
| } |
| |
| CollapsedRegions.insert(RegionAddr); |
| return true; |
| } |
| |
| void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) { |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : BB) { |
| if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) |
| continue; |
| BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos"); |
| scheduleChange( |
| Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset)); |
| } |
| } |
| } |
| |
| bool StackLayoutModifier::canInsertRegion(ProgramPoint P) { |
| if (!IsInitialized) |
| initialize(); |
| if (!IsSimple) |
| return false; |
| |
| StackPointerTracking &SPT = Info.getStackPointerTracking(); |
| int64_t RegionAddr = SPT.getStateBefore(P)->first; |
| if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) |
| return false; |
| |
| if (InsertedRegions.count(RegionAddr)) |
| return true; |
| |
| // Check if we are going to screw up stack accesses at call sites that |
| // pass parameters via stack |
| if (!BlacklistedRegions.empty()) |
| return false; |
| |
| return true; |
| } |
| |
| bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) { |
| if (!canInsertRegion(P)) |
| return false; |
| |
| assert(IsInitialized); |
| StackPointerTracking &SPT = Info.getStackPointerTracking(); |
| // This RegionAddr is slightly different from the one seen in collapseRegion |
| // This is the value of SP before the allocation the user wants to make. |
| int64_t RegionAddr = SPT.getStateBefore(P)->first; |
| if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) |
| return false; |
| |
| DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
| |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : BB) { |
| if (!BC.MIB->hasAnnotation(Inst, getSlotTag())) |
| continue; |
| auto Slot = |
| BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( |
| Inst, getSlotTag()); |
| if (!AvailableRegions.count(Slot)) |
| continue; |
| |
| if (!(DA.doesADominateB(P, Inst))) |
| continue; |
| |
| if (BC.MIB->isCFI(Inst)) { |
| if (Slot >= RegionAddr) |
| continue; |
| scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz)); |
| continue; |
| } |
| ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); |
| if (!FIE) { |
| if (Slot >= RegionAddr) |
| continue; |
| scheduleChange( |
| Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); |
| continue; |
| } |
| |
| if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) |
| continue; |
| if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr) |
| continue; |
| if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) |
| continue; |
| scheduleChange( |
| Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); |
| } |
| } |
| |
| InsertedRegions.insert(RegionAddr); |
| return true; |
| } |
| |
| void StackLayoutModifier::performChanges() { |
| std::set<uint32_t> ModifiedCFIIndices; |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : llvm::reverse(BB)) { |
| if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) { |
| assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst)); |
| BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos"); |
| } |
| if (!BC.MIB->hasAnnotation(Inst, getTodoTag())) |
| continue; |
| auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>( |
| Inst, getTodoTag()); |
| int64_t Adjustment = 0; |
| WorklistItem::ActionType AdjustmentType = WorklistItem::None; |
| for (WorklistItem &WI : WList) { |
| if (WI.Action == WorklistItem::None) |
| continue; |
| assert(WI.Action == WorklistItem::AdjustLoadStoreOffset || |
| WI.Action == WorklistItem::AdjustCFI); |
| assert((AdjustmentType == WorklistItem::None || |
| AdjustmentType == WI.Action) && |
| "Conflicting actions requested at the same program point"); |
| AdjustmentType = WI.Action; |
| Adjustment += WI.OffsetUpdate; |
| } |
| if (!Adjustment) |
| continue; |
| if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) { |
| assert(BC.MIB->isCFI(Inst)); |
| uint32_t CFINum = Inst.getOperand(0).getImm(); |
| if (ModifiedCFIIndices.count(CFINum)) |
| continue; |
| ModifiedCFIIndices.insert(CFINum); |
| const MCCFIInstruction *CFI = BF.getCFIFor(Inst); |
| const MCCFIInstruction::OpType Operation = CFI->getOperation(); |
| if (Operation == MCCFIInstruction::OpDefCfa || |
| Operation == MCCFIInstruction::OpDefCfaOffset) |
| Adjustment = 0 - Adjustment; |
| LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset() |
| << " to " << (CFI->getOffset() + Adjustment) << "\n"); |
| BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment); |
| continue; |
| } |
| int32_t SrcImm = 0; |
| MCPhysReg Reg = 0; |
| MCPhysReg StackPtrReg = 0; |
| int64_t StackOffset = 0; |
| bool IsIndexed = false; |
| bool IsLoad = false; |
| bool IsStore = false; |
| bool IsSimple = false; |
| bool IsStoreFromReg = false; |
| uint8_t Size = 0; |
| bool Success = false; |
| Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, |
| Reg, SrcImm, StackPtrReg, StackOffset, |
| Size, IsSimple, IsIndexed); |
| if (!Success) { |
| // SP update based on FP value |
| Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx); |
| assert(Success); |
| continue; |
| } |
| assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg)); |
| if (StackPtrReg != BC.MIB->getFramePointer()) |
| Adjustment = -Adjustment; |
| if (IsLoad) |
| BC.MIB->createRestoreFromStack(Inst, StackPtrReg, |
| StackOffset + Adjustment, Reg, Size); |
| else if (IsStore) |
| BC.MIB->createSaveToStack(Inst, StackPtrReg, StackOffset + Adjustment, |
| Reg, Size); |
| LLVM_DEBUG({ |
| dbgs() << "Adjusted instruction: "; |
| Inst.dump(); |
| }); |
| } |
| } |
| } |
| |
| void StackLayoutModifier::initialize() { |
| classifyStackAccesses(); |
| classifyCFIs(); |
| IsInitialized = true; |
| } |
| |
| std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0}; |
| std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0}; |
| std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0}; |
| std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0}; |
| std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0}; |
| std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0}; |
| |
| using BBIterTy = BinaryBasicBlock::iterator; |
| |
| void ShrinkWrapping::classifyCSRUses() { |
| DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
| StackPointerTracking &SPT = Info.getStackPointerTracking(); |
| UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(), |
| BitVector(DA.NumInstrs, false)); |
| |
| const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer()); |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : BB) { |
| if (BC.MIB->isCFI(Inst)) |
| continue; |
| BitVector BV = BitVector(BC.MRI->getNumRegs(), false); |
| BC.MIB->getTouchedRegs(Inst, BV); |
| BV &= CSA.CalleeSaved; |
| for (int I : BV.set_bits()) { |
| if (I == 0) |
| continue; |
| if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I) |
| UsesByReg[I].set(DA.ExprToIdx[&Inst]); |
| } |
| if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst)) |
| continue; |
| BV = CSA.CalleeSaved; |
| BV &= FPAliases; |
| for (int I : BV.set_bits()) |
| UsesByReg[I].set(DA.ExprToIdx[&Inst]); |
| } |
| } |
| } |
| |
| void ShrinkWrapping::pruneUnwantedCSRs() { |
| BitVector ParamRegs = BC.MIB->getRegsUsedAsParams(); |
| for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
| if (!CSA.CalleeSaved[I]) |
| continue; |
| if (ParamRegs[I]) { |
| CSA.CalleeSaved.reset(I); |
| continue; |
| } |
| if (UsesByReg[I].empty()) { |
| LLVM_DEBUG( |
| dbgs() |
| << "Dismissing Callee-Saved Reg because we found no uses of it:" << I |
| << "\n"); |
| CSA.CalleeSaved.reset(I); |
| continue; |
| } |
| if (!CSA.HasRestores[I]) { |
| LLVM_DEBUG( |
| dbgs() << "Dismissing Callee-Saved Reg because it does not have " |
| "restores:" |
| << I << "\n"); |
| CSA.CalleeSaved.reset(I); |
| } |
| } |
| } |
| |
| void ShrinkWrapping::computeSaveLocations() { |
| BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs()); |
| ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); |
| DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
| StackPointerTracking &SPT = Info.getStackPointerTracking(); |
| |
| LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n"); |
| for (BinaryBasicBlock &BB : BF) { |
| LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); |
| |
| MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr; |
| if (!First) |
| continue; |
| |
| // Use reaching instructions to detect if we are inside a loop - if we |
| // are, do not consider this BB as valid placement for saves. |
| if (RI.isInLoop(BB)) |
| continue; |
| |
| const std::pair<int, int> SPFP = *SPT.getStateBefore(*First); |
| // If we don't know stack state at this point, bail |
| if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && |
| (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) |
| continue; |
| |
| for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
| if (!CSA.CalleeSaved[I]) |
| continue; |
| |
| BitVector BBDominatedUses = BitVector(DA.NumInstrs, false); |
| for (int J : UsesByReg[I].set_bits()) |
| if (DA.doesADominateB(*First, J)) |
| BBDominatedUses.set(J); |
| LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates " |
| << BBDominatedUses.count() << " uses for reg " << I |
| << ". Total uses for reg is " << UsesByReg[I].count() |
| << "\n"); |
| BBDominatedUses &= UsesByReg[I]; |
| if (BBDominatedUses == UsesByReg[I]) { |
| LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName() |
| << " as a save pos for " << I << "\n"); |
| BestSavePos[I].push_back(First); |
| LLVM_DEBUG({ |
| dbgs() << "Dominated uses are:\n"; |
| for (int J : UsesByReg[I].set_bits()) { |
| dbgs() << "Idx " << J << ": "; |
| BC.printInstruction(dbgs(), *DA.Expressions[J]); |
| DA.Expressions[J]->dump(); |
| } |
| }); |
| } |
| } |
| } |
| |
| BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs()); |
| |
| auto &InsnToBB = Info.getInsnToBBMap(); |
| for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
| if (!CSA.CalleeSaved[I]) |
| continue; |
| |
| std::stable_sort(BestSavePos[I].begin(), BestSavePos[I].end(), |
| [&](const MCInst *A, const MCInst *B) { |
| const BinaryBasicBlock *BBA = InsnToBB[A]; |
| const BinaryBasicBlock *BBB = InsnToBB[B]; |
| const uint64_t CountA = BBA->getKnownExecutionCount(); |
| const uint64_t CountB = BBB->getKnownExecutionCount(); |
| return CountB < CountA; |
| }); |
| |
| for (MCInst *Pos : BestSavePos[I]) { |
| const BinaryBasicBlock *BB = InsnToBB[Pos]; |
| const uint64_t Count = BB->getKnownExecutionCount(); |
| BestSaveCount[I].push_back(Count); |
| } |
| } |
| } |
| |
| void ShrinkWrapping::computeDomOrder() { |
| DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0); |
| std::vector<MCPhysReg> Order; |
| for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
| Order.push_back(I); |
| } |
| |
| DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
| auto &InsnToBB = Info.getInsnToBBMap(); |
| llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) { |
| BinaryBasicBlock *BBA = |
| BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr; |
| BinaryBasicBlock *BBB = |
| BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr; |
| if (BBA == BBB) |
| return A < B; |
| if (!BBA && BBB) |
| return false; |
| if (BBA && !BBB) |
| return true; |
| if (DA.doesADominateB(*BestSavePos[A].back(), *BestSavePos[B].back())) |
| return true; |
| if (DA.doesADominateB(*BestSavePos[B].back(), *BestSavePos[A].back())) |
| return false; |
| return A < B; |
| }); |
| |
| for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) |
| DomOrder[Order[I]] = I; |
| } |
| |
| bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave, |
| uint64_t &TotalEstimatedWin) { |
| const uint64_t CurSavingCost = CSA.SavingCost[CSR]; |
| if (!CSA.CalleeSaved[CSR]) |
| return false; |
| |
| assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() && |
| "save position vectors out of sync"); |
| if (BestSaveCount[CSR].empty()) |
| return false; |
| |
| const uint64_t BestCount = BestSaveCount[CSR].back(); |
| BestPosSave = BestSavePos[CSR].back(); |
| if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) |
| return false; |
| |
| LLVM_DEBUG({ |
| auto &InsnToBB = Info.getInsnToBBMap(); |
| dbgs() << "Better position for saves found in func " << BF.getPrintName() |
| << " count << " << BF.getKnownExecutionCount() << "\n"; |
| dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName() |
| << " Freq reduction: " << (CurSavingCost - BestCount) << "\n"; |
| }); |
| |
| TotalEstimatedWin = CurSavingCost - BestCount; |
| return true; |
| } |
| |
| /// Auxiliary function used to create basic blocks for critical edges and update |
| /// the dominance frontier with these new locations |
| void ShrinkWrapping::splitFrontierCritEdges( |
| BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier, |
| const SmallVector<bool, 4> &IsCritEdge, |
| const SmallVector<BinaryBasicBlock *, 4> &From, |
| const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) { |
| LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func " |
| << BF.getPrintName() << "\n"); |
| // For every FromBB, there might be one or more critical edges, with |
| // To[I] containing destination BBs. It's important to memorize |
| // the original size of the Frontier as we may append to it while splitting |
| // critical edges originating with blocks with multiple destinations. |
| for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) { |
| if (!IsCritEdge[I]) |
| continue; |
| if (To[I].empty()) |
| continue; |
| BinaryBasicBlock *FromBB = From[I]; |
| LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName() |
| << "\n"); |
| // Split edge for every DestinationBBs |
| for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) { |
| BinaryBasicBlock *DestinationBB = To[I][DI]; |
| LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n"); |
| BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB); |
| // Insert dummy instruction so this BB is never empty (we need this for |
| // PredictiveStackPointerTracking to work, since it annotates instructions |
| // and not BBs). |
| if (NewBB->empty()) { |
| MCInst NewInst; |
| BC.MIB->createNoop(NewInst); |
| NewBB->addInstruction(std::move(NewInst)); |
| scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0)); |
| } |
| |
| // Update frontier |
| ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB); |
| if (DI == 0) { |
| // Update frontier inplace |
| Frontier[I] = NewFrontierPP; |
| LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName() |
| << '\n'); |
| } else { |
| // Append new frontier to the end of the list |
| Frontier.push_back(NewFrontierPP); |
| LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName() |
| << '\n'); |
| } |
| } |
| } |
| } |
| |
| SmallVector<ProgramPoint, 4> |
| ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, |
| uint64_t TotalEstimatedWin) { |
| SmallVector<ProgramPoint, 4> Frontier; |
| SmallVector<bool, 4> IsCritEdge; |
| DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
| |
| SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom; |
| SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo; |
| // In case of a critical edge, we need to create extra BBs to host restores |
| // into edges transitioning to the dominance frontier, otherwise we pull these |
| // restores to inside the dominated area. |
| Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector(); |
| LLVM_DEBUG({ |
| dbgs() << "Dumping dominance frontier for "; |
| BC.printInstruction(dbgs(), *BestPosSave); |
| for (ProgramPoint &PP : Frontier) |
| if (PP.isInst()) |
| BC.printInstruction(dbgs(), *PP.getInst()); |
| else |
| dbgs() << PP.getBB()->getName() << "\n"; |
| }); |
| for (ProgramPoint &PP : Frontier) { |
| bool HasCritEdges = false; |
| if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) && |
| doesInstUsesCSR(*PP.getInst(), CSR)) { |
| Frontier.clear(); |
| return Frontier; |
| } |
| BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); |
| CritEdgesFrom.emplace_back(FrontierBB); |
| CritEdgesTo.emplace_back(0); |
| SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back(); |
| // Check for invoke instructions at the dominance frontier, which indicates |
| // the landing pad is not dominated. |
| if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) { |
| Frontier.clear(); |
| return Frontier; |
| } |
| doForAllSuccs(*FrontierBB, [&](ProgramPoint P) { |
| if (!DA.doesADominateB(*BestPosSave, P)) { |
| Dests.emplace_back(Info.getParentBB(P)); |
| return; |
| } |
| HasCritEdges = true; |
| }); |
| IsCritEdge.push_back(HasCritEdges); |
| } |
| // Restores cannot be placed in empty BBs because we have a dataflow |
| // analysis that depends on insertions happening before real instructions |
| // (PredictiveStackPointerTracking). Detect now for empty BBs and add a |
| // dummy nop that is scheduled to be removed later. |
| bool InvalidateRequired = false; |
| for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
| if (BB->size() != 0) |
| continue; |
| MCInst NewInst; |
| BC.MIB->createNoop(NewInst); |
| auto II = BB->addInstruction(std::move(NewInst)); |
| scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0)); |
| InvalidateRequired = true; |
| } |
| if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) { |
| LLVM_DEBUG({ |
| dbgs() << "Now detected critical edges in the following frontier:\n"; |
| for (ProgramPoint &PP : Frontier) { |
| if (PP.isBB()) { |
| dbgs() << " BB: " << PP.getBB()->getName() << "\n"; |
| } else { |
| dbgs() << " Inst: "; |
| PP.getInst()->dump(); |
| } |
| } |
| }); |
| splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom, |
| CritEdgesTo); |
| InvalidateRequired = true; |
| } |
| if (InvalidateRequired) { |
| // BitVectors that represent all insns of the function are invalid now |
| // since we changed BBs/Insts. Re-run steps that depend on pointers being |
| // valid |
| Info.invalidateAll(); |
| classifyCSRUses(); |
| } |
| return Frontier; |
| } |
| |
| bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave, |
| int64_t SaveOffset) { |
| if (FA.requiresAlignment(BF)) { |
| LLVM_DEBUG({ |
| dbgs() << "Reg " << CSR |
| << " is not using push/pops due to function " |
| "alignment requirements.\n"; |
| }); |
| return false; |
| } |
| if (FA.hasStackArithmetic(BF)) { |
| LLVM_DEBUG({ |
| dbgs() << "Reg " << CSR |
| << " is not using push/pops due to function " |
| "taking the address of a stack position.\n"; |
| }); |
| return false; |
| } |
| for (MCInst *Save : CSA.getSavesByReg(CSR)) { |
| if (!SLM.canCollapseRegion(Save)) { |
| LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n"); |
| return false; |
| } |
| } |
| // Abort if one of the restores for this CSR is not a POP. |
| for (MCInst *Load : CSA.getRestoresByReg(CSR)) { |
| if (!BC.MIB->isPop(*Load)) { |
| LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n"); |
| return false; |
| } |
| } |
| |
| StackPointerTracking &SPT = Info.getStackPointerTracking(); |
| // Abort if we are inserting a push into an entry BB (offset -8) and this |
| // func sets up a frame pointer. |
| if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION || |
| SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) { |
| LLVM_DEBUG({ |
| dbgs() << "Reg " << CSR |
| << " cannot insert region or we are " |
| "trying to insert a push into entry bb.\n"; |
| }); |
| return false; |
| } |
| return true; |
| } |
| |
| SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements( |
| const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset, |
| unsigned CSR) { |
| SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints; |
| // Moving pop locations to the correct sp offset |
| ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); |
| StackPointerTracking &SPT = Info.getStackPointerTracking(); |
| for (ProgramPoint &PP : FixedRestorePoints) { |
| BinaryBasicBlock *BB = Info.getParentBB(PP); |
| bool Found = false; |
| if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first == |
| SaveOffset) { |
| BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB)); |
| BV &= UsesByReg[CSR]; |
| if (!BV.any()) { |
| Found = true; |
| PP = BB; |
| continue; |
| } |
| } |
| for (MCInst &Inst : llvm::reverse(*BB)) { |
| if (SPT.getStateBefore(Inst)->first == SaveOffset) { |
| BitVector BV = *RI.getStateAt(Inst); |
| BV &= UsesByReg[CSR]; |
| if (!BV.any()) { |
| Found = true; |
| PP = &Inst; |
| break; |
| } |
| } |
| } |
| if (!Found) { |
| LLVM_DEBUG({ |
| dbgs() << "Could not find restore insertion point for " << CSR |
| << ", falling back to load/store mode\n"; |
| }); |
| FixedRestorePoints.clear(); |
| return FixedRestorePoints; |
| } |
| } |
| return FixedRestorePoints; |
| } |
| |
| void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR, |
| bool UsePushPops) { |
| |
| for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
| std::vector<MCInst *> CFIs; |
| for (MCInst &Inst : llvm::reverse(*BB)) { |
| if (BC.MIB->isCFI(Inst)) { |
| // Delete all offset CFIs related to this CSR |
| if (SLM.getOffsetCFIReg(Inst) == CSR) { |
| HasDeletedOffsetCFIs[CSR] = true; |
| scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR)); |
| continue; |
| } |
| CFIs.push_back(&Inst); |
| continue; |
| } |
| |
| uint16_t SavedReg = CSA.getSavedReg(Inst); |
| uint16_t RestoredReg = CSA.getRestoredReg(Inst); |
| if (SavedReg != CSR && RestoredReg != CSR) { |
| CFIs.clear(); |
| continue; |
| } |
| |
| scheduleChange(&Inst, WorklistItem(UsePushPops |
| ? WorklistItem::Erase |
| : WorklistItem::ChangeToAdjustment, |
| CSR)); |
| |
| // Delete associated CFIs |
| const bool RecordDeletedPushCFIs = |
| SavedReg == CSR && DeletedPushCFIs[CSR].empty(); |
| const bool RecordDeletedPopCFIs = |
| RestoredReg == CSR && DeletedPopCFIs[CSR].empty(); |
| for (MCInst *CFI : CFIs) { |
| const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI); |
| // Do not touch these... |
| if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState || |
| MCCFI->getOperation() == MCCFIInstruction::OpRememberState) |
| continue; |
| scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR)); |
| if (RecordDeletedPushCFIs) { |
| // Do not record this to be replayed later because we are going to |
| // rebuild it. |
| if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) |
| continue; |
| DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm()); |
| } |
| if (RecordDeletedPopCFIs) { |
| if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) |
| continue; |
| DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm()); |
| } |
| } |
| CFIs.clear(); |
| } |
| } |
| } |
| |
| bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) { |
| if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR || |
| CSA.getRestoredReg(Inst) == CSR) |
| return false; |
| BitVector BV = BitVector(BC.MRI->getNumRegs(), false); |
| BC.MIB->getTouchedRegs(Inst, BV); |
| return BV[CSR]; |
| } |
| |
| void ShrinkWrapping::scheduleSaveRestoreInsertions( |
| unsigned CSR, MCInst *BestPosSave, |
| SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) { |
| auto &InsnToBB = Info.getInsnToBBMap(); |
| const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR]; |
| const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR]; |
| assert(FIESave && FIELoad && "Invalid CSR"); |
| |
| LLVM_DEBUG({ |
| dbgs() << "Scheduling save insertion at: "; |
| BestPosSave->dump(); |
| }); |
| |
| scheduleChange(BestPosSave, |
| UsePushPops ? WorklistItem::InsertPushOrPop |
| : WorklistItem::InsertLoadOrStore, |
| *FIESave, CSR); |
| |
| for (ProgramPoint &PP : RestorePoints) { |
| BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); |
| LLVM_DEBUG({ |
| dbgs() << "Scheduling restore insertion at: "; |
| if (PP.isInst()) |
| PP.getInst()->dump(); |
| else |
| dbgs() << PP.getBB()->getName() << "\n"; |
| }); |
| MCInst *Term = |
| FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr); |
| if (Term) |
| PP = Term; |
| bool PrecededByPrefix = false; |
| if (PP.isInst()) { |
| auto Iter = FrontierBB->findInstruction(PP.getInst()); |
| if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) { |
| --Iter; |
| PrecededByPrefix = BC.MIB->isPrefix(*Iter); |
| } |
| } |
| if (PP.isInst() && |
| (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) { |
| assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) && |
| "cannot move to end of bb"); |
| scheduleChange(InsnToBB[PP.getInst()], |
| UsePushPops ? WorklistItem::InsertPushOrPop |
| : WorklistItem::InsertLoadOrStore, |
| *FIELoad, CSR); |
| continue; |
| } |
| scheduleChange(PP, |
| UsePushPops ? WorklistItem::InsertPushOrPop |
| : WorklistItem::InsertLoadOrStore, |
| *FIELoad, CSR); |
| } |
| } |
| |
| void ShrinkWrapping::moveSaveRestores() { |
| bool DisablePushPopMode = false; |
| bool UsedPushPopMode = false; |
| // Keeps info about successfully moved regs: reg index, save position and |
| // save size |
| std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs; |
| uint64_t TotalEstimatedWin = 0; |
| |
| computeDomOrder(); |
| for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
| MCInst *BestPosSave = nullptr; |
| uint64_t EstimatedWin = 0; |
| SmallVector<ProgramPoint, 4> RestorePoints; |
| while (RestorePoints.empty() && |
| isBestSavePosCold(I, BestPosSave, EstimatedWin)) { |
| RestorePoints = doRestorePlacement(BestPosSave, I, EstimatedWin); |
| if (RestorePoints.empty()) { |
| LLVM_DEBUG({ |
| dbgs() << "Dropping opportunity because restore placement failed" |
| " -- total est. freq reduc: " |
| << EstimatedWin << ". Will try " |
| << (BestSaveCount[I].size() - 1) << " more times.\n"; |
| }); |
| BestSaveCount[I].pop_back(); |
| BestSavePos[I].pop_back(); |
| computeDomOrder(); |
| } |
| } |
| if (RestorePoints.empty()) { |
| SpillsFailedDynamicCount += EstimatedWin; |
| continue; |
| } |
| |
| const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I]; |
| const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I]; |
| (void)FIELoad; |
| assert(FIESave && FIELoad); |
| StackPointerTracking &SPT = Info.getStackPointerTracking(); |
| const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave); |
| int SaveOffset = SPFP.first; |
| uint8_t SaveSize = FIESave->Size; |
| |
| // If we don't know stack state at this point, bail |
| if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && |
| (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) { |
| SpillsFailedDynamicCount += EstimatedWin; |
| continue; |
| } |
| |
| // Operation mode: if true, will insert push/pops instead of loads/restores |
| bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset); |
| |
| if (UsePushPops) { |
| SmallVector<ProgramPoint, 4> FixedRestorePoints = |
| fixPopsPlacements(RestorePoints, SaveOffset, I); |
| if (FixedRestorePoints.empty()) |
| UsePushPops = false; |
| else |
| RestorePoints = FixedRestorePoints; |
| } |
| |
| // Disable push-pop mode for all CSRs in this function |
| if (!UsePushPops) |
| DisablePushPopMode = true; |
| else |
| UsedPushPopMode = true; |
| |
| scheduleOldSaveRestoresRemoval(I, UsePushPops); |
| scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops); |
| MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize)); |
| TotalEstimatedWin += EstimatedWin; |
| } |
| |
| // Revert push-pop mode if it failed for a single CSR |
| if (DisablePushPopMode && UsedPushPopMode) { |
| UsedPushPopMode = false; |
| for (BinaryBasicBlock &BB : BF) { |
| auto WRI = Todo.find(&BB); |
| if (WRI != Todo.end()) { |
| std::vector<WorklistItem> &TodoList = WRI->second; |
| for (WorklistItem &Item : TodoList) |
| if (Item.Action == WorklistItem::InsertPushOrPop) |
| Item.Action = WorklistItem::InsertLoadOrStore; |
| } |
| for (MCInst &Inst : llvm::reverse(BB)) { |
| auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( |
| Inst, getAnnotationIndex()); |
| if (!TodoList) |
| continue; |
| bool isCFI = BC.MIB->isCFI(Inst); |
| for (WorklistItem &Item : *TodoList) { |
| if (Item.Action == WorklistItem::InsertPushOrPop) |
| Item.Action = WorklistItem::InsertLoadOrStore; |
| if (!isCFI && Item.Action == WorklistItem::Erase) |
| Item.Action = WorklistItem::ChangeToAdjustment; |
| } |
| } |
| } |
| } |
| SpillsMovedDynamicCount += TotalEstimatedWin; |
| |
| // Update statistics |
| if (!UsedPushPopMode) { |
| SpillsMovedRegularMode += MovedRegs.size(); |
| return; |
| } |
| |
| // Schedule modifications to stack-accessing instructions via |
| // StackLayoutModifier. |
| SpillsMovedPushPopMode += MovedRegs.size(); |
| for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) { |
| unsigned RegNdx; |
| MCInst *SavePos; |
| size_t SaveSize; |
| std::tie(RegNdx, SavePos, SaveSize) = I; |
| for (MCInst *Save : CSA.getSavesByReg(RegNdx)) |
| SLM.collapseRegion(Save); |
| SLM.insertRegion(SavePos, SaveSize); |
| } |
| } |
| |
| namespace { |
| /// Helper function to identify whether two basic blocks created by splitting |
| /// a critical edge have the same contents. |
| bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A, |
| const BinaryBasicBlock &B) { |
| if (A.succ_size() != B.succ_size()) |
| return false; |
| if (A.succ_size() != 1) |
| return false; |
| |
| if (*A.succ_begin() != *B.succ_begin()) |
| return false; |
| |
| if (A.size() != B.size()) |
| return false; |
| |
| // Compare instructions |
| auto I = A.begin(), E = A.end(); |
| auto OtherI = B.begin(), OtherE = B.end(); |
| while (I != E && OtherI != OtherE) { |
| if (I->getOpcode() != OtherI->getOpcode()) |
| return false; |
| if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) { |
| return true; |
| })) |
| return false; |
| ++I; |
| ++OtherI; |
| } |
| return true; |
| } |
| } // namespace |
| |
| bool ShrinkWrapping::foldIdenticalSplitEdges() { |
| bool Changed = false; |
| for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) { |
| BinaryBasicBlock &BB = *Iter; |
| if (!BB.getName().starts_with(".LSplitEdge")) |
| continue; |
| for (BinaryBasicBlock &RBB : llvm::reverse(BF)) { |
| if (&RBB == &BB) |
| break; |
| if (!RBB.getName().starts_with(".LSplitEdge") || !RBB.isValid() || |
| !isIdenticalSplitEdgeBB(BC, *Iter, RBB)) |
| continue; |
| assert(RBB.pred_size() == 1 && "Invalid split edge BB"); |
| BinaryBasicBlock *Pred = *RBB.pred_begin(); |
| uint64_t OrigCount = Pred->branch_info_begin()->Count; |
| uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount; |
| BF.replaceJumpTableEntryIn(Pred, &RBB, &BB); |
| Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds); |
| Changed = true; |
| // Remove the block from CFG |
| RBB.markValid(false); |
| } |
| } |
| |
| return Changed; |
| } |
| |
| namespace { |
| |
| // A special StackPointerTracking that compensates for our future plans |
| // in removing/adding insn. |
| class PredictiveStackPointerTracking |
| : public StackPointerTrackingBase<PredictiveStackPointerTracking> { |
| friend class DataflowAnalysis<PredictiveStackPointerTracking, |
| std::pair<int, int>>; |
| decltype(ShrinkWrapping::Todo) &TodoMap; |
| DataflowInfoManager &Info; |
| |
| std::optional<unsigned> AnnotationIndex; |
| |
| protected: |
| void compNextAux(const MCInst &Point, |
| const std::vector<ShrinkWrapping::WorklistItem> &TodoItems, |
| std::pair<int, int> &Res) { |
| for (const ShrinkWrapping::WorklistItem &Item : TodoItems) { |
| if (Item.Action == ShrinkWrapping::WorklistItem::Erase && |
| BC.MIB->isPush(Point)) { |
| Res.first += BC.MIB->getPushSize(Point); |
| continue; |
| } |
| if (Item.Action == ShrinkWrapping::WorklistItem::Erase && |
| BC.MIB->isPop(Point)) { |
| Res.first -= BC.MIB->getPopSize(Point); |
| continue; |
| } |
| if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && |
| Item.FIEToInsert.IsStore) { |
| Res.first -= Item.FIEToInsert.Size; |
| continue; |
| } |
| if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && |
| Item.FIEToInsert.IsLoad) { |
| Res.first += Item.FIEToInsert.Size; |
| continue; |
| } |
| } |
| } |
| |
| std::pair<int, int> computeNext(const MCInst &Point, |
| const std::pair<int, int> &Cur) { |
| std::pair<int, int> Res = |
| StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext( |
| Point, Cur); |
| if (Res.first == StackPointerTracking::SUPERPOSITION || |
| Res.first == StackPointerTracking::EMPTY) |
| return Res; |
| auto TodoItems = |
| BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>( |
| Point, ShrinkWrapping::getAnnotationName()); |
| if (TodoItems) |
| compNextAux(Point, *TodoItems, Res); |
| auto &InsnToBBMap = Info.getInsnToBBMap(); |
| if (&*InsnToBBMap[&Point]->rbegin() != &Point) |
| return Res; |
| auto WRI = TodoMap.find(InsnToBBMap[&Point]); |
| if (WRI == TodoMap.end()) |
| return Res; |
| compNextAux(Point, WRI->second, Res); |
| return Res; |
| } |
| |
| StringRef getAnnotationName() const { |
| return StringRef("PredictiveStackPointerTracking"); |
| } |
| |
| public: |
| PredictiveStackPointerTracking(BinaryFunction &BF, |
| decltype(ShrinkWrapping::Todo) &TodoMap, |
| DataflowInfoManager &Info, |
| MCPlusBuilder::AllocatorIdTy AllocatorId = 0) |
| : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF, |
| AllocatorId), |
| TodoMap(TodoMap), Info(Info) {} |
| |
| void run() { |
| StackPointerTrackingBase<PredictiveStackPointerTracking>::run(); |
| } |
| }; |
| |
| } // end anonymous namespace |
| |
| void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush, |
| int SPValPop) { |
| MCInst *SavePoint = nullptr; |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : llvm::reverse(BB)) { |
| int32_t SrcImm = 0; |
| MCPhysReg Reg = 0; |
| MCPhysReg StackPtrReg = 0; |
| int64_t StackOffset = 0; |
| bool IsIndexed = false; |
| bool IsLoad = false; |
| bool IsStore = false; |
| bool IsSimple = false; |
| bool IsStoreFromReg = false; |
| uint8_t Size = 0; |
| if (!BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, |
| SrcImm, StackPtrReg, StackOffset, Size, |
| IsSimple, IsIndexed)) |
| continue; |
| if (Reg != CSR || !IsStore || !IsSimple) |
| continue; |
| SavePoint = &Inst; |
| break; |
| } |
| if (SavePoint) |
| break; |
| } |
| assert(SavePoint); |
| LLVM_DEBUG({ |
| dbgs() << "Now using as save point for reg " << CSR << " :"; |
| SavePoint->dump(); |
| }); |
| bool PrevAffectedZone = false; |
| BinaryBasicBlock *PrevBB = nullptr; |
| DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
| for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
| if (BB->size() == 0) |
| continue; |
| const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint); |
| const bool InAffectedZoneAtBegin = |
| (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]]; |
| bool InAffectedZone = InAffectedZoneAtBegin; |
| for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) { |
| const bool CurZone = DA.count(*InstIter, *SavePoint); |
| if (InAffectedZone != CurZone) { |
| auto InsertionIter = InstIter; |
| ++InsertionIter; |
| InAffectedZone = CurZone; |
| if (InAffectedZone) |
| InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0, |
| SPValPop); |
| else |
| InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0, |
| SPValPush); |
| --InstIter; |
| } |
| } |
| // Are we at the first basic block or hot-cold split point? |
| if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) { |
| if (InAffectedZoneAtBegin) |
| insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush); |
| } else if (InAffectedZoneAtBegin != PrevAffectedZone) { |
| if (InAffectedZoneAtBegin) |
| insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush); |
| else |
| insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop); |
| } |
| PrevAffectedZone = InAffectedZoneAtEnd; |
| PrevBB = BB; |
| } |
| } |
| |
| void ShrinkWrapping::rebuildCFIForSP() { |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : BB) { |
| if (!BC.MIB->isCFI(Inst)) |
| continue; |
| const MCCFIInstruction *CFI = BF.getCFIFor(Inst); |
| if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) |
| BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId); |
| } |
| } |
| |
| int PrevSPVal = -8; |
| BinaryBasicBlock *PrevBB = nullptr; |
| StackPointerTracking &SPT = Info.getStackPointerTracking(); |
| for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
| if (BB->size() == 0) |
| continue; |
| const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first; |
| const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first; |
| int SPVal = SPValAtBegin; |
| for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) { |
| const int CurVal = SPT.getStateAt(*Iter)->first; |
| if (SPVal != CurVal) { |
| auto InsertionIter = Iter; |
| ++InsertionIter; |
| Iter = BF.addCFIInstruction( |
| BB, InsertionIter, |
| MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal)); |
| SPVal = CurVal; |
| } |
| } |
| if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold()) |
| BF.addCFIInstruction( |
| BB, BB->begin(), |
| MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin)); |
| else if (SPValAtBegin != PrevSPVal) |
| BF.addCFIInstruction( |
| PrevBB, PrevBB->end(), |
| MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin)); |
| PrevSPVal = SPValAtEnd; |
| PrevBB = BB; |
| } |
| |
| for (BinaryBasicBlock &BB : BF) |
| for (auto I = BB.begin(); I != BB.end();) |
| if (BC.MIB->hasAnnotation(*I, "DeleteMe")) |
| I = BB.eraseInstruction(I); |
| else |
| ++I; |
| } |
| |
| Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal, |
| const FrameIndexEntry &FIE, |
| bool CreatePushOrPop) { |
| MCInst NewInst; |
| if (SPVal != StackPointerTracking::SUPERPOSITION && |
| SPVal != StackPointerTracking::EMPTY) { |
| if (FIE.IsLoad) { |
| BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(), |
| FIE.StackOffset - SPVal, FIE.RegOrImm, |
| FIE.Size); |
| } else { |
| BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(), |
| FIE.StackOffset - SPVal, FIE.RegOrImm, |
| FIE.Size); |
| } |
| if (CreatePushOrPop) |
| BC.MIB->changeToPushOrPop(NewInst); |
| return NewInst; |
| } |
| assert(FPVal != StackPointerTracking::SUPERPOSITION && |
| FPVal != StackPointerTracking::EMPTY); |
| |
| if (FIE.IsLoad) { |
| BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(), |
| FIE.StackOffset - FPVal, FIE.RegOrImm, |
| FIE.Size); |
| } else { |
| BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(), |
| FIE.StackOffset - FPVal, FIE.RegOrImm, FIE.Size); |
| } |
| return NewInst; |
| } |
| |
| void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) { |
| const MCCFIInstruction *CFI = BF.getCFIFor(Inst); |
| if (UpdatedCFIs.count(CFI)) |
| return; |
| |
| switch (CFI->getOperation()) { |
| case MCCFIInstruction::OpDefCfa: |
| case MCCFIInstruction::OpDefCfaRegister: |
| case MCCFIInstruction::OpDefCfaOffset: |
| CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset); |
| break; |
| case MCCFIInstruction::OpOffset: |
| default: |
| break; |
| } |
| |
| UpdatedCFIs.insert(CFI); |
| } |
| |
| BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB, |
| BBIterTy Pos, unsigned Reg, |
| bool isPush, int Sz, |
| int64_t NewOffset) { |
| if (isPush) { |
| for (uint32_t Idx : DeletedPushCFIs[Reg]) { |
| Pos = BF.addCFIPseudo(&BB, Pos, Idx); |
| updateCFIInstOffset(*Pos++, NewOffset); |
| } |
| if (HasDeletedOffsetCFIs[Reg]) { |
| Pos = BF.addCFIInstruction( |
| &BB, Pos, |
| MCCFIInstruction::createOffset( |
| nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset)); |
| ++Pos; |
| } |
| } else { |
| for (uint32_t Idx : DeletedPopCFIs[Reg]) { |
| Pos = BF.addCFIPseudo(&BB, Pos, Idx); |
| updateCFIInstOffset(*Pos++, NewOffset); |
| } |
| if (HasDeletedOffsetCFIs[Reg]) { |
| Pos = BF.addCFIInstruction( |
| &BB, Pos, |
| MCCFIInstruction::createSameValue( |
| nullptr, BC.MRI->getDwarfRegNum(Reg, false))); |
| ++Pos; |
| } |
| } |
| return Pos; |
| } |
| |
| Expected<BBIterTy> ShrinkWrapping::processInsertion(BBIterTy InsertionPoint, |
| BinaryBasicBlock *CurBB, |
| const WorklistItem &Item, |
| int64_t SPVal, |
| int64_t FPVal) { |
| // Trigger CFI reconstruction for this CSR if necessary - writing to |
| // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update |
| if ((Item.FIEToInsert.IsStore && |
| !DeletedPushCFIs[Item.AffectedReg].empty()) || |
| (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) || |
| HasDeletedOffsetCFIs[Item.AffectedReg]) { |
| if (Item.Action == WorklistItem::InsertPushOrPop) { |
| if (Item.FIEToInsert.IsStore) |
| PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size; |
| else |
| PopOffsetByReg[Item.AffectedReg] = SPVal; |
| } else { |
| if (Item.FIEToInsert.IsStore) |
| PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; |
| else |
| PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; |
| } |
| } |
| |
| LLVM_DEBUG({ |
| dbgs() << "Creating stack access with SPVal = " << SPVal |
| << "; stack offset = " << Item.FIEToInsert.StackOffset |
| << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop) |
| << "\n"; |
| }); |
| Expected<MCInst> NewInstOrErr = |
| createStackAccess(SPVal, FPVal, Item.FIEToInsert, |
| Item.Action == WorklistItem::InsertPushOrPop); |
| if (auto E = NewInstOrErr.takeError()) |
| return Error(std::move(E)); |
| MCInst &NewInst = *NewInstOrErr; |
| if (InsertionPoint != CurBB->end()) { |
| LLVM_DEBUG({ |
| dbgs() << "Adding before Inst: "; |
| InsertionPoint->dump(); |
| dbgs() << "the following inst: "; |
| NewInst.dump(); |
| }); |
| BBIterTy Iter = |
| CurBB->insertInstruction(InsertionPoint, std::move(NewInst)); |
| return ++Iter; |
| } |
| CurBB->addInstruction(std::move(NewInst)); |
| LLVM_DEBUG(dbgs() << "Adding to BB!\n"); |
| return CurBB->end(); |
| } |
| |
| Expected<BBIterTy> ShrinkWrapping::processInsertionsList( |
| BBIterTy InsertionPoint, BinaryBasicBlock *CurBB, |
| std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) { |
| bool HasInsertions = llvm::any_of(TodoList, [&](WorklistItem &Item) { |
| return Item.Action == WorklistItem::InsertLoadOrStore || |
| Item.Action == WorklistItem::InsertPushOrPop; |
| }); |
| |
| if (!HasInsertions) |
| return InsertionPoint; |
| |
| assert(((SPVal != StackPointerTracking::SUPERPOSITION && |
| SPVal != StackPointerTracking::EMPTY) || |
| (FPVal != StackPointerTracking::SUPERPOSITION && |
| FPVal != StackPointerTracking::EMPTY)) && |
| "Cannot insert if we have no idea of the stack state here"); |
| |
| // Revert the effect of PSPT for this location, we want SP Value before |
| // insertions |
| if (InsertionPoint == CurBB->end()) { |
| for (WorklistItem &Item : TodoList) { |
| if (Item.Action != WorklistItem::InsertPushOrPop) |
| continue; |
| if (Item.FIEToInsert.IsStore) |
| SPVal += Item.FIEToInsert.Size; |
| if (Item.FIEToInsert.IsLoad) |
| SPVal -= Item.FIEToInsert.Size; |
| } |
| } |
| |
| // Reorder POPs to obey the correct dominance relation between them |
| llvm::stable_sort(TodoList, [&](const WorklistItem &A, |
| const WorklistItem &B) { |
| if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) && |
| (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad)) |
| return false; |
| if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad)) |
| return true; |
| if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad)) |
| return false; |
| return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg]; |
| }); |
| |
| // Process insertions |
| for (WorklistItem &Item : TodoList) { |
| if (Item.Action == WorklistItem::Erase || |
| Item.Action == WorklistItem::ChangeToAdjustment) |
| continue; |
| |
| auto InsertionPointOrErr = |
| processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal); |
| if (auto E = InsertionPointOrErr.takeError()) |
| return Error(std::move(E)); |
| InsertionPoint = *InsertionPointOrErr; |
| if (Item.Action == WorklistItem::InsertPushOrPop && |
| Item.FIEToInsert.IsStore) |
| SPVal -= Item.FIEToInsert.Size; |
| if (Item.Action == WorklistItem::InsertPushOrPop && |
| Item.FIEToInsert.IsLoad) |
| SPVal += Item.FIEToInsert.Size; |
| } |
| return InsertionPoint; |
| } |
| |
| Expected<bool> ShrinkWrapping::processInsertions() { |
| PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId); |
| PSPT.run(); |
| |
| bool Changes = false; |
| for (BinaryBasicBlock &BB : BF) { |
| // Process insertions before some inst. |
| for (auto I = BB.begin(); I != BB.end(); ++I) { |
| MCInst &Inst = *I; |
| auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( |
| Inst, getAnnotationIndex()); |
| if (!TodoList) |
| continue; |
| Changes = true; |
| std::vector<WorklistItem> List = *TodoList; |
| LLVM_DEBUG({ |
| dbgs() << "Now processing insertions in " << BB.getName() |
| << " before inst: "; |
| Inst.dump(); |
| }); |
| auto Iter = I; |
| std::pair<int, int> SPTState = |
| *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter)); |
| auto IterOrErr = |
| processInsertionsList(I, &BB, List, SPTState.first, SPTState.second); |
| if (auto E = IterOrErr.takeError()) |
| return Error(std::move(E)); |
| I = *IterOrErr; |
| } |
| // Process insertions at the end of bb |
| auto WRI = Todo.find(&BB); |
| if (WRI != Todo.end()) { |
| std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin()); |
| if (auto E = processInsertionsList(BB.end(), &BB, WRI->second, |
| SPTState.first, SPTState.second) |
| .takeError()) |
| return Error(std::move(E)); |
| Changes = true; |
| } |
| } |
| return Changes; |
| } |
| |
| void ShrinkWrapping::processDeletions() { |
| LivenessAnalysis &LA = Info.getLivenessAnalysis(); |
| for (BinaryBasicBlock &BB : BF) { |
| for (auto II = BB.begin(); II != BB.end(); ++II) { |
| MCInst &Inst = *II; |
| auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( |
| Inst, getAnnotationIndex()); |
| if (!TodoList) |
| continue; |
| // Process all deletions |
| for (WorklistItem &Item : *TodoList) { |
| if (Item.Action != WorklistItem::Erase && |
| Item.Action != WorklistItem::ChangeToAdjustment) |
| continue; |
| |
| if (Item.Action == WorklistItem::ChangeToAdjustment) { |
| // Is flag reg alive across this func? |
| bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg()); |
| if (int Sz = BC.MIB->getPushSize(Inst)) { |
| BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags); |
| continue; |
| } |
| if (int Sz = BC.MIB->getPopSize(Inst)) { |
| BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags); |
| continue; |
| } |
| } |
| |
| LLVM_DEBUG({ |
| dbgs() << "Erasing: "; |
| BC.printInstruction(dbgs(), Inst); |
| }); |
| II = std::prev(BB.eraseInstruction(II)); |
| break; |
| } |
| } |
| } |
| } |
| |
| void ShrinkWrapping::rebuildCFI() { |
| const bool FP = Info.getStackPointerTracking().HasFramePointer; |
| Info.invalidateAll(); |
| if (!FP) { |
| rebuildCFIForSP(); |
| Info.invalidateAll(); |
| } |
| for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { |
| if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0) |
| continue; |
| const int64_t SPValPush = PushOffsetByReg[I]; |
| const int64_t SPValPop = PopOffsetByReg[I]; |
| insertUpdatedCFI(I, SPValPush, SPValPop); |
| Info.invalidateAll(); |
| } |
| } |
| |
| Expected<bool> ShrinkWrapping::perform(bool HotOnly) { |
| HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false); |
| PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); |
| PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); |
| |
| // Update pass statistics |
| uint64_t TotalInstrs = 0ULL; |
| uint64_t TotalStoreInstrs = 0ULL; |
| for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
| uint64_t BBExecCount = BB->getExecutionCount(); |
| if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) |
| continue; |
| for (const auto &Instr : *BB) { |
| if (BC.MIB->isPseudo(Instr)) |
| continue; |
| if (BC.MIB->mayStore(Instr)) |
| TotalStoreInstrs += BBExecCount; |
| TotalInstrs += BBExecCount; |
| } |
| } |
| InstrDynamicCount += TotalInstrs; |
| StoreDynamicCount += TotalStoreInstrs; |
| |
| if (!FA.hasFrameInfo(BF)) |
| return false; |
| |
| if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold())) |
| return false; |
| |
| if (opts::EqualizeBBCounts) |
| equalizeBBCounts(Info, BF); |
| |
| if (BF.checkForAmbiguousJumpTables()) { |
| LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName() |
| << ".\n"); |
| // We could call disambiguateJumpTables here, but it is probably not worth |
| // the cost (of duplicating potentially large jump tables that could regress |
| // dcache misses). Moreover, ambiguous JTs are rare and coming from code |
| // written in assembly language. Just bail. |
| return false; |
| } |
| SLM.initialize(); |
| CSA.compute(); |
| classifyCSRUses(); |
| pruneUnwantedCSRs(); |
| computeSaveLocations(); |
| moveSaveRestores(); |
| LLVM_DEBUG({ |
| dbgs() << "Func before shrink-wrapping: \n"; |
| BF.dump(); |
| }); |
| SLM.performChanges(); |
| // Early exit if processInsertions doesn't detect any todo items |
| auto ModifiedOrErr = processInsertions(); |
| if (auto E = ModifiedOrErr.takeError()) |
| return Error(std::move(E)); |
| const bool Modified = *ModifiedOrErr; |
| if (!Modified) |
| return false; |
| processDeletions(); |
| if (foldIdenticalSplitEdges()) { |
| const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs(); |
| (void)Stats; |
| LLVM_DEBUG(dbgs() << "Deleted " << Stats.first |
| << " redundant split edge BBs (" << Stats.second |
| << " bytes) for " << BF.getPrintName() << "\n"); |
| } |
| rebuildCFI(); |
| // We may have split edges, creating BBs that need correct branching |
| BF.fixBranches(); |
| LLVM_DEBUG({ |
| dbgs() << "Func after shrink-wrapping: \n"; |
| BF.dump(); |
| }); |
| return true; |
| } |
| |
| void ShrinkWrapping::printStats(BinaryContext &BC) { |
| BC.outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode |
| << " spills inserting load/stores and " << SpillsMovedPushPopMode |
| << " spills inserting push/pops\n"; |
| if (!InstrDynamicCount || !StoreDynamicCount) |
| return; |
| BC.outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount |
| << " store executions (" |
| << format("%.1lf%%", |
| (100.0 * SpillsMovedDynamicCount / InstrDynamicCount)) |
| << " total instructions executed, " |
| << format("%.1lf%%", |
| (100.0 * SpillsMovedDynamicCount / StoreDynamicCount)) |
| << " store instructions)\n"; |
| BC.outs() << "BOLT-INFO: Shrink wrapping failed at reducing " |
| << SpillsFailedDynamicCount << " store executions (" |
| << format("%.1lf%%", |
| (100.0 * SpillsFailedDynamicCount / InstrDynamicCount)) |
| << " total instructions executed, " |
| << format("%.1lf%%", |
| (100.0 * SpillsFailedDynamicCount / StoreDynamicCount)) |
| << " store instructions)\n"; |
| } |
| |
| // Operators necessary as a result of using MCAnnotation |
| raw_ostream &operator<<(raw_ostream &OS, |
| const std::vector<ShrinkWrapping::WorklistItem> &Vec) { |
| OS << "SWTodo["; |
| const char *Sep = ""; |
| for (const ShrinkWrapping::WorklistItem &Item : Vec) { |
| OS << Sep; |
| switch (Item.Action) { |
| case ShrinkWrapping::WorklistItem::Erase: |
| OS << "Erase"; |
| break; |
| case ShrinkWrapping::WorklistItem::ChangeToAdjustment: |
| OS << "ChangeToAdjustment"; |
| break; |
| case ShrinkWrapping::WorklistItem::InsertLoadOrStore: |
| OS << "InsertLoadOrStore"; |
| break; |
| case ShrinkWrapping::WorklistItem::InsertPushOrPop: |
| OS << "InsertPushOrPop"; |
| break; |
| } |
| Sep = ", "; |
| } |
| OS << "]"; |
| return OS; |
| } |
| |
| raw_ostream & |
| operator<<(raw_ostream &OS, |
| const std::vector<StackLayoutModifier::WorklistItem> &Vec) { |
| OS << "SLMTodo["; |
| const char *Sep = ""; |
| for (const StackLayoutModifier::WorklistItem &Item : Vec) { |
| OS << Sep; |
| switch (Item.Action) { |
| case StackLayoutModifier::WorklistItem::None: |
| OS << "None"; |
| break; |
| case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset: |
| OS << "AdjustLoadStoreOffset"; |
| break; |
| case StackLayoutModifier::WorklistItem::AdjustCFI: |
| OS << "AdjustCFI"; |
| break; |
| } |
| Sep = ", "; |
| } |
| OS << "]"; |
| return OS; |
| } |
| |
| bool operator==(const ShrinkWrapping::WorklistItem &A, |
| const ShrinkWrapping::WorklistItem &B) { |
| return (A.Action == B.Action && A.AffectedReg == B.AffectedReg && |
| A.Adjustment == B.Adjustment && |
| A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad && |
| A.FIEToInsert.IsStore == B.FIEToInsert.IsStore && |
| A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm && |
| A.FIEToInsert.Size == B.FIEToInsert.Size && |
| A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple && |
| A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset); |
| } |
| |
| bool operator==(const StackLayoutModifier::WorklistItem &A, |
| const StackLayoutModifier::WorklistItem &B) { |
| return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate); |
| } |
| |
| } // end namespace bolt |
| } // end namespace llvm |