Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 017f7bf

Browse files
committed
[AMDGPU] Classify FLAT instructions as VMEM
Also adapt hazard and wait handling.
1 parent c60f24d commit 017f7bf

10 files changed

+63
-56
lines changed

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -2420,29 +2420,29 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
24202420
Result = true;
24212421

24222422
else if (((SGMask & SchedGroupMask::VMEM) != SchedGroupMask::NONE) &&
2423-
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
2423+
TII->isVMEM(MI))
24242424
Result = true;
24252425

24262426
else if (((SGMask & SchedGroupMask::VMEM_READ) != SchedGroupMask::NONE) &&
24272427
MI.mayLoad() &&
2428-
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
2428+
TII->isVMEM(MI))
24292429
Result = true;
24302430

24312431
else if (((SGMask & SchedGroupMask::VMEM_WRITE) != SchedGroupMask::NONE) &&
24322432
MI.mayStore() &&
2433-
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
2433+
TII->isVMEM(MI))
24342434
Result = true;
24352435

24362436
else if (((SGMask & SchedGroupMask::DS) != SchedGroupMask::NONE) &&
2437-
TII->isDS(MI))
2437+
(TII->isDS(MI) || TII->isLDSDMA(MI)))
24382438
Result = true;
24392439

24402440
else if (((SGMask & SchedGroupMask::DS_READ) != SchedGroupMask::NONE) &&
2441-
MI.mayLoad() && TII->isDS(MI))
2441+
MI.mayLoad() && (TII->isDS(MI) || TII->isLDSDMA(MI)))
24422442
Result = true;
24432443

24442444
else if (((SGMask & SchedGroupMask::DS_WRITE) != SchedGroupMask::NONE) &&
2445-
MI.mayStore() && TII->isDS(MI))
2445+
MI.mayStore() && (TII->isDS(MI) || TII->isLDSDMA(MI)))
24462446
Result = true;
24472447

24482448
else if (((SGMask & SchedGroupMask::TRANS) != SchedGroupMask::NONE) &&

llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,8 @@ class AMDGPUWaitSGPRHazards {
232232
State.ActiveFlat = true;
233233

234234
// SMEM or VMEM clears hazards
235-
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSMRD(*MI)) {
235+
// FIXME: adapt to add FLAT without VALU (so !isLDSDMA())?
236+
if ((SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI)) || SIInstrInfo::isSMRD(*MI)) {
236237
State.VCCHazard = HazardState::None;
237238
State.SALUHazards.reset();
238239
State.VALUHazards.reset();

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

+20-29
Original file line numberDiff line numberDiff line change
@@ -183,10 +183,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
183183
if (ST.hasNoDataDepHazard())
184184
return NoHazard;
185185

186-
// FIXME: Should flat be considered vmem?
187-
if ((SIInstrInfo::isVMEM(*MI) ||
188-
SIInstrInfo::isFLAT(*MI))
189-
&& checkVMEMHazards(MI) > 0)
186+
if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
190187
return HazardType;
191188

192189
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
@@ -202,8 +199,8 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
202199
return HazardType;
203200

204201
if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
205-
SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
206-
SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
202+
SIInstrInfo::isDS(*MI) || SIInstrInfo::isEXP(*MI)) &&
203+
checkMAIVALUHazards(MI) > 0)
207204
return HazardType;
208205

209206
if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
@@ -230,7 +227,6 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
230227
return HazardType;
231228

232229
if ((SIInstrInfo::isVMEM(*MI) ||
233-
SIInstrInfo::isFLAT(*MI) ||
234230
SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
235231
return HazardType;
236232

@@ -324,7 +320,7 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
324320
if (ST.hasNoDataDepHazard())
325321
return WaitStates;
326322

327-
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
323+
if (SIInstrInfo::isVMEM(*MI))
328324
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
329325

330326
if (SIInstrInfo::isVALU(*MI))
@@ -340,8 +336,8 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
340336
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
341337

342338
if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
343-
SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
344-
SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
339+
SIInstrInfo::isDS(*MI) || SIInstrInfo::isEXP(*MI)) &&
340+
checkMAIVALUHazards(MI) > 0)
345341
WaitStates = std::max(WaitStates, checkMAIVALUHazards(MI));
346342

347343
if (MI->isInlineAsm())
@@ -370,7 +366,6 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
370366
return std::max(WaitStates, checkMAIHazards(MI));
371367

372368
if (SIInstrInfo::isVMEM(*MI) ||
373-
SIInstrInfo::isFLAT(*MI) ||
374369
SIInstrInfo::isDS(*MI))
375370
return std::max(WaitStates, checkMAILdStHazards(MI));
376371

@@ -598,7 +593,7 @@ static bool breaksSMEMSoftClause(MachineInstr *MI) {
598593
}
599594

600595
static bool breaksVMEMSoftClause(MachineInstr *MI) {
601-
return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI);
596+
return !SIInstrInfo::isVMEM(*MI);
602597
}
603598

604599
int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
@@ -1250,8 +1245,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
12501245
const SIRegisterInfo *TRI = ST.getRegisterInfo();
12511246

12521247
auto IsHazardFn = [TRI, MI](const MachineInstr &I) {
1253-
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I) &&
1254-
!SIInstrInfo::isFLAT(I))
1248+
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I))
12551249
return false;
12561250

12571251
for (const MachineOperand &Def : MI->defs()) {
@@ -1424,9 +1418,8 @@ static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
14241418
bool HasVmem = false;
14251419
for (auto &MBB : MF) {
14261420
for (auto &MI : MBB) {
1427-
HasLds |= SIInstrInfo::isDS(MI);
1428-
HasVmem |=
1429-
SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI);
1421+
HasLds |= SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI);
1422+
HasVmem |= SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI);
14301423
if (HasLds && HasVmem)
14311424
return true;
14321425
}
@@ -1448,9 +1441,9 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
14481441
assert(!ST.hasExtendedWaitCounts());
14491442

14501443
auto IsHazardInst = [](const MachineInstr &MI) {
1451-
if (SIInstrInfo::isDS(MI))
1444+
if (SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI))
14521445
return 1;
1453-
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
1446+
if (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI))
14541447
return 2;
14551448
return 0;
14561449
};
@@ -1517,8 +1510,8 @@ bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) {
15171510
if (WaitStates >= NoHazardWaitStates)
15181511
return true;
15191512
// Instructions which cause va_vdst==0 expire hazard
1520-
return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
1521-
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I);
1513+
return SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
1514+
SIInstrInfo::isEXP(I);
15221515
};
15231516
auto GetWaitStatesFn = [](const MachineInstr &MI) {
15241517
return SIInstrInfo::isVALU(MI) ? 1 : 0;
@@ -1549,8 +1542,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
15491542
const Register VDSTReg = VDST->getReg();
15501543

15511544
auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) {
1552-
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) &&
1553-
!SIInstrInfo::isDS(I))
1545+
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I))
15541546
return false;
15551547
return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
15561548
};
@@ -1635,8 +1627,8 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
16351627
return HazardExpired;
16361628

16371629
// Instructions which cause va_vdst==0 expire hazard
1638-
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
1639-
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
1630+
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
1631+
SIInstrInfo::isEXP(I) ||
16401632
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
16411633
AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
16421634
return HazardExpired;
@@ -1772,8 +1764,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
17721764
return HazardExpired;
17731765

17741766
// Instructions which cause va_vdst==0 expire hazard
1775-
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
1776-
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
1767+
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
1768+
SIInstrInfo::isEXP(I) ||
17771769
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
17781770
I.getOperand(0).getImm() == 0x0fff))
17791771
return HazardExpired;
@@ -2003,7 +1995,7 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
20031995
return 0;
20041996

20051997
auto IsHazardFn = [](const MachineInstr &I) {
2006-
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I))
1998+
if (!SIInstrInfo::isVMEM(I))
20071999
return false;
20082000
return SIInstrInfo::isFPAtomic(I);
20092001
};
@@ -2626,7 +2618,6 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
26262618
int WaitStatesNeeded = 0;
26272619

26282620
bool IsMem = SIInstrInfo::isVMEM(*MI) ||
2629-
SIInstrInfo::isFLAT(*MI) ||
26302621
SIInstrInfo::isDS(*MI);
26312622
bool IsMemOrExport = IsMem || SIInstrInfo::isEXP(*MI);
26322623
bool IsVALU = SIInstrInfo::isVALU(*MI);

llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,8 @@ void AMDGPUCustomBehaviour::generateWaitCntInfo() {
303303
bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
304304
return MCID.TSFlags & SIInstrFlags::MUBUF ||
305305
MCID.TSFlags & SIInstrFlags::MTBUF ||
306-
MCID.TSFlags & SIInstrFlags::MIMG;
306+
MCID.TSFlags & SIInstrFlags::MIMG ||
307+
MCID.TSFlags & SIInstrFlags::FLAT;
307308
}
308309

309310
// taken from SIInstrInfo::hasModifiersSet()

llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ FunctionPass *llvm::createSIFormMemoryClausesLegacyPass() {
100100
}
101101

102102
static bool isVMEMClauseInst(const MachineInstr &MI) {
103-
return SIInstrInfo::isFLAT(MI) || SIInstrInfo::isVMEM(MI);
103+
return SIInstrInfo::isVMEM(MI);
104104
}
105105

106106
static bool isSMEMClauseInst(const MachineInstr &MI) {

llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class SIInsertHardClauses {
9797
HardClauseType getHardClauseType(const MachineInstr &MI) {
9898
if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
9999
if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
100-
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
100+
if (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI)) {
101101
if (ST->hasNSAClauseBug()) {
102102
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
103103
if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
@@ -121,7 +121,7 @@ class SIInsertHardClauses {
121121
: HARDCLAUSE_MIMG_LOAD
122122
: HARDCLAUSE_MIMG_STORE;
123123
}
124-
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
124+
if (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI)) {
125125
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
126126
: HARDCLAUSE_VMEM_LOAD
127127
: HARDCLAUSE_VMEM_STORE;

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,8 @@ static const unsigned instrsForExtendedCounterTypes[NUM_EXTENDED_INST_CNTS] = {
168168
AMDGPU::S_WAIT_KMCNT};
169169

170170
static bool updateVMCntOnly(const MachineInstr &Inst) {
171-
return SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLATGlobal(Inst) ||
172-
SIInstrInfo::isFLATScratch(Inst);
171+
return (SIInstrInfo::isVMEM(Inst) && !SIInstrInfo::isFLAT(Inst)) ||
172+
SIInstrInfo::isFLATGlobal(Inst) || SIInstrInfo::isFLATScratch(Inst);
173173
}
174174

175175
#ifndef NDEBUG
@@ -695,14 +695,14 @@ class SIInsertWaitcnts {
695695
#endif // NDEBUG
696696
}
697697

698-
// Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM or
699-
// FLAT instruction.
698+
// Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
699+
// instruction.
700700
WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
701701
// Maps VMEM access types to their corresponding WaitEventType.
702702
static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
703703
VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
704704

705-
assert(SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLAT(Inst));
705+
assert(SIInstrInfo::isVMEM(Inst));
706706
// LDS DMA loads are also stores, but on the LDS side. On the VMEM side
707707
// these should use VM_CNT.
708708
if (!ST->hasVscnt() || SIInstrInfo::mayWriteLDSThroughDMA(Inst))
@@ -2454,8 +2454,8 @@ bool SIInsertWaitcnts::isPreheaderToFlush(
24542454
}
24552455

24562456
bool SIInsertWaitcnts::isVMEMOrFlatVMEM(const MachineInstr &MI) const {
2457-
return SIInstrInfo::isVMEM(MI) ||
2458-
(SIInstrInfo::isFLAT(MI) && mayAccessVMEMThroughFlat(MI));
2457+
return (SIInstrInfo::isFLAT(MI) && mayAccessVMEMThroughFlat(MI)) ||
2458+
SIInstrInfo::isVMEM(MI);
24592459
}
24602460

24612461
// Return true if it is better to flush the vmcnt counter in the preheader of

llvm/lib/Target/AMDGPU/SIInstrInfo.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
449449
}
450450

451451
static bool isVMEM(const MachineInstr &MI) {
452-
return isMUBUF(MI) || isMTBUF(MI) || isImage(MI);
452+
if (isFLAT(MI))
453+
assert(usesVM_CNT(MI) && "oh no");
454+
return isMUBUF(MI) || isMTBUF(MI) || isImage(MI) || isFLAT(MI);
453455
}
454456

455457
bool isVMEM(uint16_t Opcode) const {

llvm/test/CodeGen/AMDGPU/hard-clauses.mir

+15-6
Original file line numberDiff line numberDiff line change
@@ -630,20 +630,29 @@ body: |
630630
; CHECK-LABEL: name: flat_global_load
631631
; CHECK: liveins: $vgpr0_vgpr1
632632
; CHECK-NEXT: {{ $}}
633-
; CHECK-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
634-
; CHECK-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
633+
; CHECK-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
634+
; CHECK-NEXT: S_CLAUSE 1
635+
; CHECK-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
636+
; CHECK-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
637+
; CHECK-NEXT: }
635638
;
636639
; GFX11-LABEL: name: flat_global_load
637640
; GFX11: liveins: $vgpr0_vgpr1
638641
; GFX11-NEXT: {{ $}}
639-
; GFX11-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
640-
; GFX11-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
642+
; GFX11-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
643+
; GFX11-NEXT: S_CLAUSE 1
644+
; GFX11-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
645+
; GFX11-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
646+
; GFX11-NEXT: }
641647
;
642648
; GFX12-LABEL: name: flat_global_load
643649
; GFX12: liveins: $vgpr0_vgpr1
644650
; GFX12-NEXT: {{ $}}
645-
; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
646-
; GFX12-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
651+
; GFX12-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
652+
; GFX12-NEXT: S_CLAUSE 1
653+
; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
654+
; GFX12-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
655+
; GFX12-NEXT: }
647656
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
648657
$vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
649658
...

llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir

+5-2
Original file line numberDiff line numberDiff line change
@@ -269,11 +269,14 @@ body: |
269269
S_ENDPGM 0
270270
...
271271

272-
# GCN-LABEL: name: no_hazard_lds_branch_flat
272+
# FLAT_* instructions are "based on per-thread address (VGPR), can load/store:
273+
# global memory, LDS or scratch memory" (RDNA4 ISA)
274+
# GCN-LABEL: name: hazard_lds_branch_flat
273275
# GCN: bb.1:
276+
# GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
274277
# GCN-NEXT: FLAT_LOAD_DWORD
275278
---
276-
name: no_hazard_lds_branch_flat
279+
name: hazard_lds_branch_flat
277280
body: |
278281
bb.0:
279282
successors: %bb.1

0 commit comments

Comments
 (0)