Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
SIInsertWaitcnts.cpp File Reference

Insert wait instructions for memory reads and writes. More...

Go to the source code of this file.

Classes

struct  llvm::enum_iteration_traits< InstCounterType >

Namespaces

namespace  llvm
 This is an optimization pass for GlobalISel generic memory operations.

Macros

#define DEBUG_TYPE   "si-insert-waitcnts"
#define AMDGPU_DECLARE_WAIT_EVENTS(DECL)
#define AMDGPU_EVENT_ENUM(Name)
#define AMDGPU_EVENT_NAME(Name)

Functions

 DEBUG_COUNTER (ForceExpCounter, DEBUG_TYPE "-forceexp", "Force emit s_waitcnt expcnt(0) instrs")
 DEBUG_COUNTER (ForceLgkmCounter, DEBUG_TYPE "-forcelgkm", "Force emit s_waitcnt lgkmcnt(0) instrs")
 DEBUG_COUNTER (ForceVMCounter, DEBUG_TYPE "-forcevm", "Force emit s_waitcnt vmcnt(0) instrs")
 INITIALIZE_PASS_BEGIN (SIInsertWaitcntsLegacy, DEBUG_TYPE, "SI Insert Waitcnts", false, false) INITIALIZE_PASS_END(SIInsertWaitcntsLegacy
static bool updateOperandIfDifferent (MachineInstr &MI, AMDGPU::OpName OpName, unsigned NewEnc)
static std::optional< InstCounterType > counterTypeForInstr (unsigned Opcode)
 Determine if MI is a gfx12+ single-counter S_WAIT_*CNT instruction, and if so, which counter it is waiting on.
static bool readsVCCZ (const MachineInstr &MI)
static bool callWaitsOnFunctionEntry (const MachineInstr &MI)
static bool callWaitsOnFunctionReturn (const MachineInstr &MI)
static bool isGFX12CacheInvOrWBInst (MachineInstr &Inst)
static bool isWaitInstr (MachineInstr &Inst)

Variables

static cl::opt< boolForceEmitZeroFlag ("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(false), cl::Hidden)
static cl::opt< boolForceEmitZeroLoadFlag ("amdgpu-waitcnt-load-forcezero", cl::desc("Force all waitcnt load counters to wait until 0"), cl::init(false), cl::Hidden)
 DEBUG_TYPE
SI Insert Waitcnts
SI Insert false

Detailed Description

Insert wait instructions for memory reads and writes.

Memory reads and writes are issued asynchronously, so we need to insert S_WAITCNT instructions when we want to access any of their results or overwrite any register that's used asynchronously.

TODO: This pass currently keeps one timeline per hardware counter. A more finely-grained approach that keeps one timeline per event type could sometimes get away with generating weaker s_waitcnt instructions. For example, when both SMEM and LDS are in flight and we need to wait for the i-th-last LDS instruction, then an lgkmcnt(i) is actually sufficient, but the pass will currently generate a conservative lgkmcnt(0) because multiple event types are in flight.

Definition in file SIInsertWaitcnts.cpp.

Macro Definition Documentation

◆ AMDGPU_DECLARE_WAIT_EVENTS

#define AMDGPU_DECLARE_WAIT_EVENTS ( DECL)
Value:
DECL(VMEM_ACCESS) /* vmem read & write */ \
DECL(VMEM_READ_ACCESS) /* vmem read */ \
DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
DECL(SCRATCH_WRITE_ACCESS) /* vmem write that may be scratch */ \
DECL(VMEM_GROUP) /* vmem group */ \
DECL(LDS_ACCESS) /* lds read & write */ \
DECL(GDS_ACCESS) /* gds read & write */ \
DECL(SQ_MESSAGE) /* send message */ \
DECL(SCC_WRITE) /* write to SCC from barrier */ \
DECL(SMEM_ACCESS) /* scalar-memory read & write */ \
DECL(SMEM_GROUP) /* scalar-memory group */ \
DECL(EXP_GPR_LOCK) /* export holding on its data src */ \
DECL(GDS_GPR_LOCK) /* GDS holding on its data and addr src */ \
DECL(EXP_POS_ACCESS) /* write to export position */ \
DECL(EXP_PARAM_ACCESS) /* write to export parameter */ \
DECL(VMW_GPR_LOCK) /* vmem write holding on its data src */ \
DECL(EXP_LDS_ACCESS) /* read by ldsdir counting as export */

Definition at line 113 of file SIInsertWaitcnts.cpp.

◆ AMDGPU_EVENT_ENUM

#define AMDGPU_EVENT_ENUM ( Name)
Value:
Name,

Definition at line 135 of file SIInsertWaitcnts.cpp.

◆ AMDGPU_EVENT_NAME

#define AMDGPU_EVENT_NAME ( Name)
Value:
#Name,

Definition at line 142 of file SIInsertWaitcnts.cpp.

◆ DEBUG_TYPE

#define DEBUG_TYPE   "si-insert-waitcnts"

Definition at line 46 of file SIInsertWaitcnts.cpp.

Function Documentation

◆ callWaitsOnFunctionEntry()

bool callWaitsOnFunctionEntry ( const MachineInstr & MI)
static
Returns
true if the callee inserts an s_waitcnt 0 on function entry.

Definition at line 1841 of file SIInsertWaitcnts.cpp.

References MI.

◆ callWaitsOnFunctionReturn()

bool callWaitsOnFunctionReturn ( const MachineInstr & MI)
static
Returns
true if the callee is expected to wait for any outstanding waits before returning.

Definition at line 1851 of file SIInsertWaitcnts.cpp.

References MI.

◆ counterTypeForInstr()

std::optional< InstCounterType > counterTypeForInstr ( unsigned Opcode)
static

Determine if MI is a gfx12+ single-counter S_WAIT_*CNT instruction, and if so, which counter it is waiting on.

Definition at line 1361 of file SIInsertWaitcnts.cpp.

Referenced by isWaitInstr().

◆ DEBUG_COUNTER() [1/3]

DEBUG_COUNTER ( ForceExpCounter ,
DEBUG_TYPE "-forceexp" ,
"Force emit s_waitcnt expcnt(0) instrs"  )

References DEBUG_TYPE.

◆ DEBUG_COUNTER() [2/3]

DEBUG_COUNTER ( ForceLgkmCounter ,
DEBUG_TYPE "-forcelgkm" ,
"Force emit s_waitcnt lgkmcnt(0) instrs"  )

References DEBUG_TYPE.

◆ DEBUG_COUNTER() [3/3]

DEBUG_COUNTER ( ForceVMCounter ,
DEBUG_TYPE "-forcevm" ,
"Force emit s_waitcnt vmcnt(0) instrs"  )

References DEBUG_TYPE.

◆ INITIALIZE_PASS_BEGIN()

INITIALIZE_PASS_BEGIN ( SIInsertWaitcntsLegacy ,
DEBUG_TYPE ,
"SI Insert Waitcnts" ,
false ,
false  )

◆ isGFX12CacheInvOrWBInst()

bool isGFX12CacheInvOrWBInst ( MachineInstr & Inst)
static

Definition at line 2251 of file SIInsertWaitcnts.cpp.

References llvm::MachineInstr::getOpcode(), and Opc.

◆ isWaitInstr()

◆ readsVCCZ()

bool readsVCCZ ( const MachineInstr & MI)
static

Definition at line 1834 of file SIInsertWaitcnts.cpp.

References MI, and Opc.

◆ updateOperandIfDifferent()

bool updateOperandIfDifferent ( MachineInstr & MI,
AMDGPU::OpName OpName,
unsigned NewEnc )
static

Variable Documentation

◆ DEBUG_TYPE

DEBUG_TYPE

Definition at line 1334 of file SIInsertWaitcnts.cpp.

◆ false

SI Insert false

Definition at line 1335 of file SIInsertWaitcnts.cpp.

◆ ForceEmitZeroFlag

cl::opt< bool > ForceEmitZeroFlag("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(false), cl::Hidden) ( "amdgpu-waitcnt-forcezero" ,
cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)") ,
cl::init(false) ,
cl::Hidden  )
static

◆ ForceEmitZeroLoadFlag

cl::opt< bool > ForceEmitZeroLoadFlag("amdgpu-waitcnt-load-forcezero", cl::desc("Force all waitcnt load counters to wait until 0"), cl::init(false), cl::Hidden) ( "amdgpu-waitcnt-load-forcezero" ,
cl::desc("Force all waitcnt load counters to wait until 0") ,
cl::init(false) ,
cl::Hidden  )
static

◆ Waitcnts

SI Insert Waitcnts

Definition at line 1334 of file SIInsertWaitcnts.cpp.