Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[AMDGPU] Classify FLAT instructions as VMEM #137148

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2420,17 +2420,15 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
Result = true;

else if (((SGMask & SchedGroupMask::VMEM) != SchedGroupMask::NONE) &&
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
TII->isVMEM(MI))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I right in thinking that the isDS test was redundant, since no instructions are both FLAT and DS? @kerbowa

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Otherwise, it should check for !isLDSDMA if we specifically want to avoid DS accesses, I think

Result = true;

else if (((SGMask & SchedGroupMask::VMEM_READ) != SchedGroupMask::NONE) &&
MI.mayLoad() &&
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
MI.mayLoad() && TII->isVMEM(MI))
Result = true;

else if (((SGMask & SchedGroupMask::VMEM_WRITE) != SchedGroupMask::NONE) &&
MI.mayStore() &&
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
MI.mayStore() && TII->isVMEM(MI))
Result = true;

else if (((SGMask & SchedGroupMask::DS) != SchedGroupMask::NONE) &&
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,9 @@ class AMDGPUWaitSGPRHazards {
State.ActiveFlat = true;

// SMEM or VMEM clears hazards
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSMRD(*MI)) {
// FIXME: adapt to add FLAT without VALU (so !isLDSDMA())?
if ((SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI)) ||
SIInstrInfo::isSMRD(*MI)) {
State.VCCHazard = HazardState::None;
State.SALUHazards.reset();
State.VALUHazards.reset();
Expand Down
57 changes: 24 additions & 33 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (ST.hasNoDataDepHazard())
return NoHazard;

// FIXME: Should flat be considered vmem?
if ((SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI))
&& checkVMEMHazards(MI) > 0)
if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
return HazardType;

if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
Expand All @@ -202,8 +199,8 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
return HazardType;

if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
SIInstrInfo::isDS(*MI) || SIInstrInfo::isEXP(*MI)) &&
checkMAIVALUHazards(MI) > 0)
return HazardType;

if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
Expand All @@ -229,9 +226,8 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
return HazardType;

if ((SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) ||
SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
if ((SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isDS(*MI)) &&
checkMAILdStHazards(MI) > 0)
return HazardType;

if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
Expand Down Expand Up @@ -324,7 +320,7 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (ST.hasNoDataDepHazard())
return WaitStates;

if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
if (SIInstrInfo::isVMEM(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));

if (SIInstrInfo::isVALU(*MI))
Expand All @@ -340,8 +336,8 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));

if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
SIInstrInfo::isDS(*MI) || SIInstrInfo::isEXP(*MI)) &&
checkMAIVALUHazards(MI) > 0)
WaitStates = std::max(WaitStates, checkMAIVALUHazards(MI));

if (MI->isInlineAsm())
Expand Down Expand Up @@ -369,9 +365,7 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (SIInstrInfo::isMAI(*MI))
return std::max(WaitStates, checkMAIHazards(MI));

if (SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) ||
SIInstrInfo::isDS(*MI))
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isDS(*MI))
return std::max(WaitStates, checkMAILdStHazards(MI));

if (ST.hasGFX950Insts() && isPermlane(*MI))
Expand Down Expand Up @@ -598,7 +592,7 @@ static bool breaksSMEMSoftClause(MachineInstr *MI) {
}

static bool breaksVMEMSoftClause(MachineInstr *MI) {
return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI);
return !SIInstrInfo::isVMEM(*MI);
}

int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
Expand Down Expand Up @@ -1250,8 +1244,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
const SIRegisterInfo *TRI = ST.getRegisterInfo();

auto IsHazardFn = [TRI, MI](const MachineInstr &I) {
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I) &&
!SIInstrInfo::isFLAT(I))
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I))
return false;

for (const MachineOperand &Def : MI->defs()) {
Expand Down Expand Up @@ -1425,8 +1418,8 @@ static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
for (auto &MBB : MF) {
for (auto &MI : MBB) {
HasLds |= SIInstrInfo::isDS(MI);
HasVmem |=
SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI);
HasVmem |= (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
SIInstrInfo::isSegmentSpecificFLAT(MI);
Comment on lines +1421 to +1422
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks wrong, it's ignoring FLAT_ instructions

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is addressed by #137170 because otherwise this PR wouldn't be NFC anymore

if (HasLds && HasVmem)
return true;
}
Expand All @@ -1450,7 +1443,8 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
auto IsHazardInst = [](const MachineInstr &MI) {
if (SIInstrInfo::isDS(MI))
return 1;
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
SIInstrInfo::isSegmentSpecificFLAT(MI))
return 2;
return 0;
};
Expand Down Expand Up @@ -1517,8 +1511,8 @@ bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) {
if (WaitStates >= NoHazardWaitStates)
return true;
// Instructions which cause va_vdst==0 expire hazard
return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I);
return SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
SIInstrInfo::isEXP(I);
};
auto GetWaitStatesFn = [](const MachineInstr &MI) {
return SIInstrInfo::isVALU(MI) ? 1 : 0;
Expand Down Expand Up @@ -1549,8 +1543,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
const Register VDSTReg = VDST->getReg();

auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) {
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) &&
!SIInstrInfo::isDS(I))
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I))
return false;
return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
};
Expand Down Expand Up @@ -1635,8 +1628,8 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
return HazardExpired;

// Instructions which cause va_vdst==0 expire hazard
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
return HazardExpired;
Expand Down Expand Up @@ -1772,8 +1765,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
return HazardExpired;

// Instructions which cause va_vdst==0 expire hazard
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
I.getOperand(0).getImm() == 0x0fff))
return HazardExpired;
Expand Down Expand Up @@ -2003,7 +1996,7 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
return 0;

auto IsHazardFn = [](const MachineInstr &I) {
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I))
if (!SIInstrInfo::isVMEM(I))
return false;
return SIInstrInfo::isFPAtomic(I);
};
Expand Down Expand Up @@ -2625,9 +2618,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {

int WaitStatesNeeded = 0;

bool IsMem = SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) ||
SIInstrInfo::isDS(*MI);
bool IsMem = SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isDS(*MI);
bool IsMemOrExport = IsMem || SIInstrInfo::isEXP(*MI);
bool IsVALU = SIInstrInfo::isVALU(*MI);

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ void AMDGPUCustomBehaviour::generateWaitCntInfo() {
bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
return MCID.TSFlags & SIInstrFlags::MUBUF ||
MCID.TSFlags & SIInstrFlags::MTBUF ||
MCID.TSFlags & SIInstrFlags::MIMG;
MCID.TSFlags & SIInstrFlags::MIMG || MCID.TSFlags & SIInstrFlags::FLAT;
}

// taken from SIInstrInfo::hasModifiersSet()
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ FunctionPass *llvm::createSIFormMemoryClausesLegacyPass() {
}

static bool isVMEMClauseInst(const MachineInstr &MI) {
return SIInstrInfo::isFLAT(MI) || SIInstrInfo::isVMEM(MI);
return SIInstrInfo::isVMEM(MI);
}

static bool isSMEMClauseInst(const MachineInstr &MI) {
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ class SIInsertHardClauses {
HardClauseType getHardClauseType(const MachineInstr &MI) {
if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
SIInstrInfo::isSegmentSpecificFLAT(MI)) {
if (ST->hasNSAClauseBug()) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
Expand All @@ -121,7 +122,8 @@ class SIInsertHardClauses {
: HARDCLAUSE_MIMG_LOAD
: HARDCLAUSE_MIMG_STORE;
}
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
SIInstrInfo::isSegmentSpecificFLAT(MI)) {
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
: HARDCLAUSE_VMEM_LOAD
: HARDCLAUSE_VMEM_STORE;
Expand Down
15 changes: 8 additions & 7 deletions llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ static const unsigned instrsForExtendedCounterTypes[NUM_EXTENDED_INST_CNTS] = {
AMDGPU::S_WAIT_KMCNT};

static bool updateVMCntOnly(const MachineInstr &Inst) {
return SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLATGlobal(Inst) ||
SIInstrInfo::isFLATScratch(Inst);
return (SIInstrInfo::isVMEM(Inst) && !SIInstrInfo::isFLAT(Inst)) ||
SIInstrInfo::isFLATGlobal(Inst) || SIInstrInfo::isFLATScratch(Inst);
}

#ifndef NDEBUG
Expand Down Expand Up @@ -695,14 +695,14 @@ class SIInsertWaitcnts {
#endif // NDEBUG
}

// Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM or
// FLAT instruction.
// Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
// instruction.
WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
// Maps VMEM access types to their corresponding WaitEventType.
static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};

assert(SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLAT(Inst));
assert(SIInstrInfo::isVMEM(Inst));
// LDS DMA loads are also stores, but on the LDS side. On the VMEM side
// these should use VM_CNT.
if (!ST->hasVscnt() || SIInstrInfo::mayWriteLDSThroughDMA(Inst))
Expand Down Expand Up @@ -2454,8 +2454,9 @@ bool SIInsertWaitcnts::isPreheaderToFlush(
}

bool SIInsertWaitcnts::isVMEMOrFlatVMEM(const MachineInstr &MI) const {
return SIInstrInfo::isVMEM(MI) ||
(SIInstrInfo::isFLAT(MI) && mayAccessVMEMThroughFlat(MI));
if (SIInstrInfo::isFLAT(MI))
return mayAccessVMEMThroughFlat(MI);
return SIInstrInfo::isVMEM(MI);
}

// Return true if it is better to flush the vmcnt counter in the preheader of
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
}

static bool isVMEM(const MachineInstr &MI) {
return isMUBUF(MI) || isMTBUF(MI) || isImage(MI);
return isMUBUF(MI) || isMTBUF(MI) || isImage(MI) || isFLAT(MI);
}

bool isVMEM(uint16_t Opcode) const {
Expand Down