66#define DEBUG_TYPE "si-lower-control-flow"
74class SILowerControlFlow {
91 bool EnableOptimizeEndCf =
false;
126 while (
I != End && !
I->isUnconditionalBranch())
132 void optimizeEndCf();
135 SILowerControlFlow(
const GCNSubtarget *ST, LiveIntervals *LIS,
136 LiveVariables *LV, MachineDominatorTree *MDT,
137 MachinePostDominatorTree *PDT)
138 : LIS(LIS), LV(LV), MDT(MDT), PDT(PDT),
139 LMC(AMDGPU::LaneMaskConstants::
get(*
ST)) {}
140 bool run(MachineFunction &MF);
147 SILowerControlFlowLegacy() : MachineFunctionPass(ID) {}
149 bool runOnMachineFunction(MachineFunction &MF)
override;
151 StringRef getPassName()
const override {
152 return "SI Lower control flow pseudo instructions";
155 void getAnalysisUsage(AnalysisUsage &AU)
const override {
169char SILowerControlFlowLegacy::ID = 0;
188 while (!Worklist.
empty()) {
203 Register SaveExecReg =
MI.getOperand(0).getReg();
204 auto U =
MRI->use_instr_nodbg_begin(SaveExecReg);
206 if (U ==
MRI->use_instr_nodbg_end() ||
207 std::next(U) !=
MRI->use_instr_nodbg_end() ||
208 U->getOpcode() != AMDGPU::SI_END_CF)
214void SILowerControlFlow::emitIf(MachineInstr &
MI) {
215 MachineBasicBlock &
MBB = *
MI.getParent();
218 Register SaveExecReg =
MI.getOperand(0).getReg();
219 MachineOperand&
Cond =
MI.getOperand(1);
220 assert(
Cond.getSubReg() == AMDGPU::NoSubRegister);
222 MachineOperand &ImpDefSCC =
MI.getOperand(4);
233 auto UseMI =
MRI->use_instr_nodbg_begin(SaveExecReg);
239 Register CopyReg = SimpleIf ? SaveExecReg
240 :
MRI->createVirtualRegister(BoolRC);
241 MachineInstr *CopyExec =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::COPY), CopyReg)
244 LoweredIf.
insert(CopyReg);
253 setImpSCCDefDead(*
And,
true);
255 MachineInstr *
Xor =
nullptr;
260 setImpSCCDefDead(*
Xor, ImpDefSCC.
isDead());
265 MachineInstr *SetExec =
273 I = skipToUncondBrOrEnd(
MBB,
I);
277 MachineInstr *NewBr =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::S_CBRANCH_EXECZ))
278 .
add(
MI.getOperand(2));
281 MI.eraseFromParent();
296 MI.eraseFromParent();
301 RecomputeRegs.
insert(SaveExecReg);
307void SILowerControlFlow::emitElse(MachineInstr &
MI) {
308 MachineBasicBlock &
MBB = *
MI.getParent();
318 Register SaveReg =
MRI->createVirtualRegister(BoolRC);
319 MachineInstr *OrSaveExec =
321 .
add(
MI.getOperand(1));
325 MachineBasicBlock *DestBB =
MI.getOperand(2).getMBB();
342 ElsePt = skipToUncondBrOrEnd(
MBB, ElsePt);
349 MI.eraseFromParent();
354 MI.eraseFromParent();
362 RecomputeRegs.
insert(SrcReg);
363 RecomputeRegs.
insert(DstReg);
367void SILowerControlFlow::emitIfBreak(MachineInstr &
MI) {
368 MachineBasicBlock &
MBB = *
MI.getParent();
370 auto Dst =
MI.getOperand(0).getReg();
376 bool SkipAnding =
false;
377 if (
MI.getOperand(1).isReg()) {
378 if (MachineInstr *Def =
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg())) {
379 SkipAnding =
Def->getParent() ==
MI.getParent()
386 MachineInstr *
And =
nullptr, *
Or =
nullptr;
389 AndReg =
MRI->createVirtualRegister(BoolRC);
392 .
add(
MI.getOperand(1));
397 .
add(
MI.getOperand(2));
400 .
add(
MI.getOperand(1))
401 .
add(
MI.getOperand(2));
412 RecomputeRegs.
insert(
And->getOperand(2).getReg());
418 MI.eraseFromParent();
421void SILowerControlFlow::emitLoop(MachineInstr &
MI) {
422 MachineBasicBlock &
MBB = *
MI.getParent();
425 MachineInstr *AndN2 =
428 .
add(
MI.getOperand(0));
432 auto BranchPt = skipToUncondBrOrEnd(
MBB,
MI.getIterator());
435 .
add(
MI.getOperand(1));
438 RecomputeRegs.
insert(
MI.getOperand(0).getReg());
443 MI.eraseFromParent();
447SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
450 SmallPtrSet<const MachineBasicBlock *, 4> Visited;
451 MachineBasicBlock *
B = &
MBB;
457 for ( ; It !=
E; ++It) {
458 if (
TII->mayReadEXEC(*
MRI, *It))
465 if (
B->succ_size() != 1)
469 MachineBasicBlock *Succ = *
B->succ_begin();
476MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &
MI) {
477 MachineBasicBlock &
MBB = *
MI.getParent();
485 bool NeedBlockSplit =
false;
489 if (
I->modifiesRegister(DataReg,
TRI)) {
490 NeedBlockSplit =
true;
495 unsigned Opcode = LMC.
OrOpc;
496 MachineBasicBlock *SplitBB = &
MBB;
497 if (NeedBlockSplit) {
499 if (SplitBB != &
MBB && (MDT || PDT)) {
502 for (MachineBasicBlock *Succ : SplitBB->
successors()) {
503 DTUpdates.
push_back({DomTreeT::Insert, SplitBB, Succ});
518 .
add(
MI.getOperand(0));
522 if (SplitBB != &
MBB) {
527 DenseSet<Register> DefInOrigBlock;
529 for (MachineBasicBlock *BlockPiece : {&
MBB, SplitBB}) {
530 for (MachineInstr &
X : *BlockPiece) {
531 for (MachineOperand &
Op :
X.all_defs()) {
532 if (
Op.getReg().isVirtual())
538 for (
unsigned i = 0, e =
MRI->getNumVirtRegs(); i != e; ++i) {
545 for (MachineInstr *Kill :
VI.Kills) {
554 LoweredEndCf.
insert(NewMI);
559 MI.eraseFromParent();
568void SILowerControlFlow::findMaskOperands(MachineInstr &
MI,
unsigned OpNo,
569 SmallVectorImpl<MachineOperand> &Src)
const {
570 MachineOperand &
Op =
MI.getOperand(OpNo);
571 if (!
Op.isReg() || !
Op.getReg().isVirtual()) {
576 MachineInstr *
Def =
MRI->getUniqueVRegDef(
Op.getReg());
577 if (!Def ||
Def->getParent() !=
MI.getParent() ||
578 !(
Def->isFullCopy() || (
Def->getOpcode() ==
MI.getOpcode())))
584 for (
auto I =
Def->getIterator();
I !=
MI.getIterator(); ++
I)
585 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI) &&
586 !(
I->isCopy() &&
I->getOperand(0).getReg() != LMC.
ExecReg))
589 for (
const auto &SrcOp :
Def->explicit_operands())
590 if (SrcOp.isReg() && SrcOp.isUse() &&
591 (SrcOp.getReg().isVirtual() || SrcOp.getReg() == LMC.
ExecReg))
592 Src.push_back(SrcOp);
599void SILowerControlFlow::combineMasks(MachineInstr &
MI) {
600 assert(
MI.getNumExplicitOperands() == 3);
602 unsigned OpToReplace = 1;
603 findMaskOperands(
MI, 1,
Ops);
604 if (
Ops.size() == 1) OpToReplace = 2;
605 findMaskOperands(
MI, 2,
Ops);
606 if (
Ops.size() != 3)
return;
608 unsigned UniqueOpndIdx;
609 if (
Ops[0].isIdenticalTo(
Ops[1])) UniqueOpndIdx = 2;
610 else if (
Ops[0].isIdenticalTo(
Ops[2])) UniqueOpndIdx = 1;
611 else if (
Ops[1].isIdenticalTo(
Ops[2])) UniqueOpndIdx = 1;
615 MI.removeOperand(OpToReplace);
616 MI.addOperand(
Ops[UniqueOpndIdx]);
618 MRI->getUniqueVRegDef(
Reg)->eraseFromParent();
621void SILowerControlFlow::optimizeEndCf() {
624 if (!EnableOptimizeEndCf)
627 for (MachineInstr *
MI :
reverse(LoweredEndCf)) {
628 MachineBasicBlock &
MBB = *
MI->getParent();
630 skipIgnoreExecInstsTrivialSucc(
MBB, std::next(
MI->getIterator()));
636 =
TII->getNamedOperand(*
Next, AMDGPU::OpName::src1)->getReg();
639 const MachineInstr *
Def =
MRI->getUniqueVRegDef(SavedExec);
640 if (Def && LoweredIf.
count(SavedExec)) {
646 Reg =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
647 MI->eraseFromParent();
650 removeMBBifRedundant(
MBB);
655MachineBasicBlock *SILowerControlFlow::process(MachineInstr &
MI) {
656 MachineBasicBlock &
MBB = *
MI.getParent();
658 MachineInstr *Prev = (
I !=
MBB.
begin()) ? &*(std::prev(
I)) : nullptr;
660 MachineBasicBlock *SplitBB = &
MBB;
662 switch (
MI.getOpcode()) {
667 case AMDGPU::SI_ELSE:
671 case AMDGPU::SI_IF_BREAK:
675 case AMDGPU::SI_LOOP:
679 case AMDGPU::SI_WATERFALL_LOOP:
680 MI.setDesc(
TII->get(AMDGPU::S_CBRANCH_EXECNZ));
683 case AMDGPU::SI_END_CF:
684 SplitBB = emitEndCf(
MI);
688 assert(
false &&
"Attempt to process unsupported instruction");
695 MachineInstr &MaskMI = *
I;
697 case AMDGPU::S_AND_B64:
698 case AMDGPU::S_OR_B64:
699 case AMDGPU::S_AND_B32:
700 case AMDGPU::S_OR_B32:
702 combineMasks(MaskMI);
713bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &
MBB) {
715 if (!
I.isDebugInstr() && !
I.isUnconditionalBranch())
722 MachineBasicBlock *FallThrough =
nullptr;
729 if (
P->getFallThrough(
false) == &
MBB)
732 DTUpdates.
push_back({DomTreeT::Insert,
P, Succ});
750 MachineInstr *BranchMI =
BuildMI(*FallThrough, FallThrough->
end(),
760bool SILowerControlFlow::run(MachineFunction &MF) {
762 TII =
ST.getInstrInfo();
763 TRI = &
TII->getRegisterInfo();
768 BoolRC =
TRI->getBoolRC();
771 const bool CanDemote =
773 for (
auto &
MBB : MF) {
774 bool IsKillBlock =
false;
776 if (
TII->isKillTerminator(
Term.getOpcode())) {
782 if (CanDemote && !IsKillBlock) {
783 for (
auto &
MI :
MBB) {
784 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
795 BI != MF.end(); BI = NextBB) {
796 NextBB = std::next(BI);
797 MachineBasicBlock *
MBB = &*BI;
803 MachineInstr &
MI = *
I;
804 MachineBasicBlock *SplitMBB =
MBB;
806 switch (
MI.getOpcode()) {
808 case AMDGPU::SI_ELSE:
809 case AMDGPU::SI_IF_BREAK:
810 case AMDGPU::SI_WATERFALL_LOOP:
811 case AMDGPU::SI_LOOP:
812 case AMDGPU::SI_END_CF:
813 SplitMBB = process(
MI);
818 if (SplitMBB !=
MBB) {
837 RecomputeRegs.clear();
838 LoweredEndCf.
clear();
845bool SILowerControlFlowLegacy::runOnMachineFunction(MachineFunction &MF) {
848 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
849 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() :
nullptr;
851 auto *LVWrapper = getAnalysisIfAvailable<LiveVariablesWrapperPass>();
852 LiveVariables *LV = LVWrapper ? &LVWrapper->getLV() :
nullptr;
853 auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
854 MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() :
nullptr;
856 getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
857 MachinePostDominatorTree *PDT =
858 PDTWrapper ? &PDTWrapper->getPostDomTree() :
nullptr;
859 return SILowerControlFlow(ST, LIS, LV, MDT, PDT).run(MF);
873 bool Changed = SILowerControlFlow(ST, LIS, LV, MDT, PDT).run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
This file defines the SmallSet class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
const unsigned XorTermOpc
const unsigned MovTermOpc
const unsigned OrSaveExecOpc
const unsigned AndN2TermOpc
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Implements a dense probed hash-table based set.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
LLVM_ABI void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
pred_iterator pred_begin()
LLVM_ABI void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
iterator_range< iterator > terminators()
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
void setIsDead(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static bool isVALU(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A vector that has set insertion semantics.
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
@ Kill
The last use of a register.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
DominatorTreeBase< T, false > DomTreeBase
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
char & SILowerControlFlowLegacyID
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...