38#define DEBUG_TYPE "amdgpu-preload-kern-arg-prolog"
50class AMDGPUPreloadKernArgProlog {
66 void createBackCompatBlock(
unsigned NumKernArgPreloadSGPRs);
71 unsigned NumKernArgPreloadSGPRs);
81 return "AMDGPU Preload Kernel Arguments Prolog";
89char AMDGPUPreloadKernArgPrologLegacy::ID = 0;
92 "AMDGPU Preload Kernel Arguments Prolog",
false,
false)
95 AMDGPUPreloadKernArgPrologLegacy::
ID;
98 return new AMDGPUPreloadKernArgPrologLegacy();
101bool AMDGPUPreloadKernArgPrologLegacy::runOnMachineFunction(
103 return AMDGPUPreloadKernArgProlog(MF).run();
106AMDGPUPreloadKernArgProlog::AMDGPUPreloadKernArgProlog(MachineFunction &MF)
107 : MF(MF),
ST(MF.getSubtarget<GCNSubtarget>()),
108 MFI(*MF.getInfo<SIMachineFunctionInfo>()),
TII(*
ST.getInstrInfo()),
109 TRI(*
ST.getRegisterInfo()) {}
111bool AMDGPUPreloadKernArgProlog::run() {
116 if (!NumKernArgPreloadSGPRs)
119 createBackCompatBlock(NumKernArgPreloadSGPRs);
123void AMDGPUPreloadKernArgProlog::createBackCompatBlock(
124 unsigned NumKernArgPreloadSGPRs) {
125 auto KernelEntryMBB = MF.
begin();
127 MF.
insert(KernelEntryMBB, BackCompatMBB);
130 "Kernel argument segment pointer register not set.");
132 BackCompatMBB->
addLiveIn(KernArgSegmentPtr);
135 addBackCompatLoads(BackCompatMBB, KernArgSegmentPtr, NumKernArgPreloadSGPRs);
146 .
addMBB(&*KernelEntryMBB);
159 unsigned NumKernArgPreloadSGPRs) {
160 static constexpr LoadConfig Configs[] = {
161 {8, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM},
162 {4, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM},
163 {2, &AMDGPU::SReg_64RegClass, AMDGPU::S_LOAD_DWORDX2_IMM}};
165 for (
const auto &Config : Configs) {
166 if (NumKernArgPreloadSGPRs >= Config.Size) {
167 Register LoadReg =
TRI.getMatchingSuperReg(KernArgPreloadSGPR,
168 AMDGPU::sub0, Config.RegClass);
170 LoadConfig
C(Config);
178 return LoadConfig{1, &AMDGPU::SReg_32RegClass, AMDGPU::S_LOAD_DWORD_IMM,
182void AMDGPUPreloadKernArgProlog::addBackCompatLoads(
183 MachineBasicBlock *BackCompatMBB,
Register KernArgSegmentPtr,
184 unsigned NumKernArgPreloadSGPRs) {
189 while (NumKernArgPreloadSGPRs > 0) {
194 .
addReg(KernArgSegmentPtr)
198 Offset += 4 * Config.Size;
199 KernArgPreloadSGPR = KernArgPreloadSGPR.
asMCReg() + Config.Size;
200 NumKernArgPreloadSGPRs -= Config.Size;
207 if (!AMDGPUPreloadKernArgProlog(MF).
run())
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static LoadConfig getLoadParameters(const TargetRegisterInfo &TRI, Register KernArgPreloadSGPR, unsigned NumKernArgPreloadSGPRs)
Find the largest possible load size that fits with SGPR alignment.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static const uint32_t IV[8]
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &AM)
FunctionPass class - This class is used to implement most global optimizations.
bool needsKernArgPreloadProlog() const
bool hasKernargSegmentPtr() const
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
unsigned getNumKernargPreloadedSGPRs() const
AMDGPUFunctionArgInfo & getArgInfo()
StringRef - Represent a constant reference to a string, i.e.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
char & AMDGPUPreloadKernArgPrologLegacyID
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createAMDGPUPreloadKernArgPrologLegacyPass()
ArgDescriptor KernargSegmentPtr
Register FirstKernArgPreloadReg
MCRegister getRegister() const