26#define DEBUG_TYPE "amdgpu-set-wave-priority"
29 "amdgpu-set-wave-priority-valu-insts-threshold",
30 cl::desc(
"VALU instruction count threshold for adjusting wave priority"),
37 unsigned NumVALUInstsAtStart = 0;
38 bool MayReachVMEMLoad =
false;
44class AMDGPUSetWavePriority {
46 bool run(MachineFunction &MF);
49 MachineInstr *BuildSetprioMI(MachineBasicBlock &
MBB,
51 unsigned priority)
const;
53 const SIInstrInfo *TII;
60 AMDGPUSetWavePriorityLegacy() : MachineFunctionPass(ID) {}
62 StringRef getPassName()
const override {
return "Set wave priority"; }
64 bool runOnMachineFunction(MachineFunction &MF)
override {
68 return AMDGPUSetWavePriority().run(MF);
77char AMDGPUSetWavePriorityLegacy::
ID = 0;
80 return new AMDGPUSetWavePriorityLegacy();
86 unsigned priority)
const {
94 MBBInfoSet &MBBInfos) {
96 if (!MBBInfos[Pred].MayReachVMEMLoad)
99 if (MBBInfos[Succ].MayReachVMEMLoad)
113 if (!AMDGPUSetWavePriority().
run(MF))
120 const unsigned HighPriority = 3;
121 const unsigned LowPriority = 0;
128 TII = ST.getInstrInfo();
131 Attribute A =
F.getFnAttribute(
"amdgpu-wave-priority-threshold");
133 A.getValueAsString().getAsInteger(0, VALUInstsThreshold);
144 unsigned MaxNumVALUInstsInMiddle = 0;
145 unsigned NumVALUInstsAtEnd = 0;
149 MBBInfo &Info = MBBInfos[
MBB];
150 Info.NumVALUInstsAtStart = 0;
151 MaxNumVALUInstsInMiddle = 0;
152 NumVALUInstsAtEnd = 0;
153 Info.LastVMEMLoad = &
MI;
156 MaxNumVALUInstsInMiddle =
157 std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
158 NumVALUInstsAtEnd = 0;
161 ++MBBInfos[
MBB].NumVALUInstsAtStart;
166 bool SuccsMayReachVMEMLoad =
false;
167 unsigned NumFollowingVALUInsts = 0;
169 const MBBInfo &SuccInfo = MBBInfos[Succ];
170 SuccsMayReachVMEMLoad |= SuccInfo.MayReachVMEMLoad;
171 NumFollowingVALUInsts =
172 std::max(NumFollowingVALUInsts, SuccInfo.NumVALUInstsAtStart);
176 Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
177 NumVALUInstsAtEnd += NumFollowingVALUInsts;
179 unsigned MaxNumVALUInsts =
180 std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
181 Info.MayReachVMEMLoad =
182 SuccsMayReachVMEMLoad ||
183 (
Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
187 if (!MBBInfos[&Entry].MayReachVMEMLoad)
194 BuildSetprioMI(Entry,
I, HighPriority);
198 SmallPtrSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
199 for (MachineBasicBlock &
MBB : MF) {
200 if (MBBInfos[&
MBB].MayReachVMEMLoad) {
208 if (MBBInfos[Pred].MayReachVMEMLoad)
209 PriorityLoweringBlocks.
insert(Pred);
223 for (MachineBasicBlock *
MBB : PriorityLoweringBlocks) {
224 MachineInstr *LastVMEMLoad = MBBInfos[
MBB].LastVMEMLoad;
Provides AMDGPU specific target descriptions.
static cl::opt< unsigned > DefaultVALUInstsThreshold("amdgpu-set-wave-priority-valu-insts-threshold", cl::desc("VALU instruction count threshold for adjusting wave priority"), cl::init(100), cl::Hidden)
static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB, MBBInfoSet &MBBInfos)
static bool isVMEMLoad(const MachineInstr &MI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Interface definition for SIInstrInfo.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Functions, function parameters, and return types can have attributes to indicate how they should be t...
FunctionPass class - This class is used to implement most global optimizations.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static bool isDS(const MachineInstr &MI)
static bool isVMEM(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPUSetWavePriorityPass()
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< po_iterator< T > > post_order(const T &G)
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.