33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
76 unsigned N =
Node->getNumOperands();
77 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
90 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
152bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
155 if (
MI.isCompare()) {
166 switch (
Use.getOpcode()) {
167 case AMDGPU::S_AND_SAVEEXEC_B32:
168 case AMDGPU::S_AND_SAVEEXEC_B64:
170 case AMDGPU::S_AND_B32:
171 case AMDGPU::S_AND_B64:
172 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
182 switch (
MI.getOpcode()) {
185 case AMDGPU::V_READFIRSTLANE_B32:
202 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
207 for (
auto Op :
MI.uses()) {
208 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
214 if (FromCycle ==
nullptr)
220 while (FromCycle && !FromCycle->
contains(ToCycle)) {
240 int64_t &Offset1)
const {
248 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
252 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
268 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
269 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
270 if (Offset0Idx == -1 || Offset1Idx == -1)
277 Offset0Idx -=
get(Opc0).NumDefs;
278 Offset1Idx -=
get(Opc1).NumDefs;
308 if (!Load0Offset || !Load1Offset)
325 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
326 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
328 if (OffIdx0 == -1 || OffIdx1 == -1)
334 OffIdx0 -=
get(Opc0).NumDefs;
335 OffIdx1 -=
get(Opc1).NumDefs;
354 case AMDGPU::DS_READ2ST64_B32:
355 case AMDGPU::DS_READ2ST64_B64:
356 case AMDGPU::DS_WRITE2ST64_B32:
357 case AMDGPU::DS_WRITE2ST64_B64:
372 OffsetIsScalable =
false;
389 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
391 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
392 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
405 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
406 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
407 if (Offset0 + 1 != Offset1)
418 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
426 Offset = EltSize * Offset0;
428 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
429 if (DataOpIdx == -1) {
430 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
432 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
448 if (BaseOp && !BaseOp->
isFI())
456 if (SOffset->
isReg())
462 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
464 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
473 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
474 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
476 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
477 if (VAddr0Idx >= 0) {
479 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
486 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
501 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
518 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
520 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
537 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
545 if (MO1->getAddrSpace() != MO2->getAddrSpace())
548 const auto *Base1 = MO1->getValue();
549 const auto *Base2 = MO2->getValue();
550 if (!Base1 || !Base2)
558 return Base1 == Base2;
562 int64_t Offset1,
bool OffsetIsScalable1,
564 int64_t Offset2,
bool OffsetIsScalable2,
565 unsigned ClusterSize,
566 unsigned NumBytes)
const {
579 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
598 const unsigned LoadSize = NumBytes / ClusterSize;
599 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
600 return NumDWords <= MaxMemoryClusterDWords;
614 int64_t Offset0, int64_t Offset1,
615 unsigned NumLoads)
const {
616 assert(Offset1 > Offset0 &&
617 "Second offset should be larger than first offset!");
622 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
629 const char *Msg =
"illegal VGPR to SGPR copy") {
650 assert((
TII.getSubtarget().hasMAIInsts() &&
651 !
TII.getSubtarget().hasGFX90AInsts()) &&
652 "Expected GFX908 subtarget.");
655 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
656 "Source register of the copy should be either an SGPR or an AGPR.");
659 "Destination register of the copy should be an AGPR.");
668 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
671 if (!Def->modifiesRegister(SrcReg, &RI))
674 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
675 Def->getOperand(0).getReg() != SrcReg)
682 bool SafeToPropagate =
true;
685 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
686 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
687 SafeToPropagate =
false;
689 if (!SafeToPropagate)
692 for (
auto I = Def;
I !=
MI; ++
I)
693 I->clearRegisterKills(DefOp.
getReg(), &RI);
702 if (ImpUseSuperReg) {
703 Builder.addReg(ImpUseSuperReg,
721 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
724 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
725 "VGPR used for an intermediate copy should have been reserved.");
740 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
741 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
742 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
749 if (ImpUseSuperReg) {
750 UseBuilder.
addReg(ImpUseSuperReg,
771 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
772 int16_t SubIdx = BaseIndices[Idx];
773 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
774 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
775 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
776 unsigned Opcode = AMDGPU::S_MOV_B32;
779 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
780 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
781 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
785 DestSubReg = RI.getSubReg(DestReg, SubIdx);
786 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
787 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
788 Opcode = AMDGPU::S_MOV_B64;
803 assert(FirstMI && LastMI);
811 LastMI->addRegisterKilled(SrcReg, &RI);
817 Register SrcReg,
bool KillSrc,
bool RenamableDest,
818 bool RenamableSrc)
const {
820 unsigned Size = RI.getRegSizeInBits(*RC);
822 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
828 if (((
Size == 16) != (SrcSize == 16))) {
830 assert(ST.useRealTrue16Insts());
835 if (DestReg == SrcReg) {
841 RC = RI.getPhysRegBaseClass(DestReg);
842 Size = RI.getRegSizeInBits(*RC);
843 SrcRC = RI.getPhysRegBaseClass(SrcReg);
844 SrcSize = RI.getRegSizeInBits(*SrcRC);
848 if (RC == &AMDGPU::VGPR_32RegClass) {
850 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
851 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
852 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
853 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
859 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
860 RC == &AMDGPU::SReg_32RegClass) {
861 if (SrcReg == AMDGPU::SCC) {
868 if (DestReg == AMDGPU::VCC_LO) {
869 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
883 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
893 if (RC == &AMDGPU::SReg_64RegClass) {
894 if (SrcReg == AMDGPU::SCC) {
901 if (DestReg == AMDGPU::VCC) {
902 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
916 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
926 if (DestReg == AMDGPU::SCC) {
929 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
933 assert(ST.hasScalarCompareEq64());
947 if (RC == &AMDGPU::AGPR_32RegClass) {
948 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
949 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
955 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
964 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
971 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
972 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
974 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
975 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
976 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
977 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
980 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
981 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
994 if (IsAGPRDst || IsAGPRSrc) {
995 if (!DstLow || !SrcLow) {
997 "Cannot use hi16 subreg with an AGPR!");
1004 if (ST.useRealTrue16Insts()) {
1010 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1011 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1023 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1024 if (!DstLow || !SrcLow) {
1026 "Cannot use hi16 subreg on VI!");
1049 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1050 if (ST.hasMovB64()) {
1055 if (ST.hasPkMovB32()) {
1071 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1072 if (RI.isSGPRClass(RC)) {
1073 if (!RI.isSGPRClass(SrcRC)) {
1077 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1083 unsigned EltSize = 4;
1084 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1085 if (RI.isAGPRClass(RC)) {
1086 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1087 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1088 else if (RI.hasVGPRs(SrcRC) ||
1089 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1090 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1092 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1093 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1094 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1095 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1096 (RI.isProperlyAlignedRC(*RC) &&
1097 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1099 if (ST.hasMovB64()) {
1100 Opcode = AMDGPU::V_MOV_B64_e32;
1102 }
else if (ST.hasPkMovB32()) {
1103 Opcode = AMDGPU::V_PK_MOV_B32;
1113 std::unique_ptr<RegScavenger> RS;
1114 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1115 RS = std::make_unique<RegScavenger>();
1121 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1122 const bool CanKillSuperReg = KillSrc && !Overlap;
1124 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1127 SubIdx = SubIndices[Idx];
1129 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1130 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1131 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1132 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1134 bool IsFirstSubreg = Idx == 0;
1135 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1137 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1141 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1142 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1188 return &AMDGPU::VGPR_32RegClass;
1200 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1201 "Not a VGPR32 reg");
1203 if (
Cond.size() == 1) {
1204 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1213 }
else if (
Cond.size() == 2) {
1214 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1216 case SIInstrInfo::SCC_TRUE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::SCC_FALSE: {
1228 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1238 case SIInstrInfo::VCCNZ: {
1241 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1252 case SIInstrInfo::VCCZ: {
1255 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1266 case SIInstrInfo::EXECNZ: {
1267 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1268 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1279 case SIInstrInfo::EXECZ: {
1280 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1281 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1306 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1319 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1329 int64_t &ImmVal)
const {
1330 switch (
MI.getOpcode()) {
1331 case AMDGPU::V_MOV_B32_e32:
1332 case AMDGPU::S_MOV_B32:
1333 case AMDGPU::S_MOVK_I32:
1334 case AMDGPU::S_MOV_B64:
1335 case AMDGPU::V_MOV_B64_e32:
1336 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1337 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1338 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1339 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1340 case AMDGPU::V_MOV_B64_PSEUDO: {
1344 return MI.getOperand(0).getReg() == Reg;
1349 case AMDGPU::S_BREV_B32:
1350 case AMDGPU::V_BFREV_B32_e32:
1351 case AMDGPU::V_BFREV_B32_e64: {
1355 return MI.getOperand(0).getReg() == Reg;
1360 case AMDGPU::S_NOT_B32:
1361 case AMDGPU::V_NOT_B32_e32:
1362 case AMDGPU::V_NOT_B32_e64: {
1365 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1366 return MI.getOperand(0).getReg() == Reg;
1378 if (RI.isAGPRClass(DstRC))
1379 return AMDGPU::COPY;
1380 if (RI.getRegSizeInBits(*DstRC) == 16) {
1383 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1385 if (RI.getRegSizeInBits(*DstRC) == 32)
1386 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1387 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1388 return AMDGPU::S_MOV_B64;
1389 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1390 return AMDGPU::V_MOV_B64_PSEUDO;
1391 return AMDGPU::COPY;
1396 bool IsIndirectSrc)
const {
1397 if (IsIndirectSrc) {
1399 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1401 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1403 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1405 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1407 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1409 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1411 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1413 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1415 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1417 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1419 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1420 if (VecSize <= 1024)
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1427 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1429 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1431 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1433 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1435 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1437 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1439 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1441 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1443 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1445 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1447 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1448 if (VecSize <= 1024)
1449 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1456 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1458 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1460 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1462 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1464 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1466 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1468 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1470 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1472 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1474 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1476 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1477 if (VecSize <= 1024)
1478 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1485 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1487 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1489 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1491 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1493 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1495 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1497 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1499 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1501 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1506 if (VecSize <= 1024)
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1521 if (VecSize <= 1024)
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1529 bool IsSGPR)
const {
1541 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1548 return AMDGPU::SI_SPILL_S32_SAVE;
1550 return AMDGPU::SI_SPILL_S64_SAVE;
1552 return AMDGPU::SI_SPILL_S96_SAVE;
1554 return AMDGPU::SI_SPILL_S128_SAVE;
1556 return AMDGPU::SI_SPILL_S160_SAVE;
1558 return AMDGPU::SI_SPILL_S192_SAVE;
1560 return AMDGPU::SI_SPILL_S224_SAVE;
1562 return AMDGPU::SI_SPILL_S256_SAVE;
1564 return AMDGPU::SI_SPILL_S288_SAVE;
1566 return AMDGPU::SI_SPILL_S320_SAVE;
1568 return AMDGPU::SI_SPILL_S352_SAVE;
1570 return AMDGPU::SI_SPILL_S384_SAVE;
1572 return AMDGPU::SI_SPILL_S512_SAVE;
1574 return AMDGPU::SI_SPILL_S1024_SAVE;
1583 return AMDGPU::SI_SPILL_V16_SAVE;
1585 return AMDGPU::SI_SPILL_V32_SAVE;
1587 return AMDGPU::SI_SPILL_V64_SAVE;
1589 return AMDGPU::SI_SPILL_V96_SAVE;
1591 return AMDGPU::SI_SPILL_V128_SAVE;
1593 return AMDGPU::SI_SPILL_V160_SAVE;
1595 return AMDGPU::SI_SPILL_V192_SAVE;
1597 return AMDGPU::SI_SPILL_V224_SAVE;
1599 return AMDGPU::SI_SPILL_V256_SAVE;
1601 return AMDGPU::SI_SPILL_V288_SAVE;
1603 return AMDGPU::SI_SPILL_V320_SAVE;
1605 return AMDGPU::SI_SPILL_V352_SAVE;
1607 return AMDGPU::SI_SPILL_V384_SAVE;
1609 return AMDGPU::SI_SPILL_V512_SAVE;
1611 return AMDGPU::SI_SPILL_V1024_SAVE;
1620 return AMDGPU::SI_SPILL_AV32_SAVE;
1622 return AMDGPU::SI_SPILL_AV64_SAVE;
1624 return AMDGPU::SI_SPILL_AV96_SAVE;
1626 return AMDGPU::SI_SPILL_AV128_SAVE;
1628 return AMDGPU::SI_SPILL_AV160_SAVE;
1630 return AMDGPU::SI_SPILL_AV192_SAVE;
1632 return AMDGPU::SI_SPILL_AV224_SAVE;
1634 return AMDGPU::SI_SPILL_AV256_SAVE;
1636 return AMDGPU::SI_SPILL_AV288_SAVE;
1638 return AMDGPU::SI_SPILL_AV320_SAVE;
1640 return AMDGPU::SI_SPILL_AV352_SAVE;
1642 return AMDGPU::SI_SPILL_AV384_SAVE;
1644 return AMDGPU::SI_SPILL_AV512_SAVE;
1646 return AMDGPU::SI_SPILL_AV1024_SAVE;
1653 bool IsVectorSuperClass) {
1658 if (IsVectorSuperClass)
1659 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1661 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1667 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1674 if (ST.hasMAIInsts())
1694 FrameInfo.getObjectAlign(FrameIndex));
1695 unsigned SpillSize =
TRI->getSpillSize(*RC);
1698 if (RI.isSGPRClass(RC)) {
1700 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1701 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1702 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1710 if (SrcReg.
isVirtual() && SpillSize == 4) {
1711 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1720 if (RI.spillSGPRToVGPR())
1740 return AMDGPU::SI_SPILL_S32_RESTORE;
1742 return AMDGPU::SI_SPILL_S64_RESTORE;
1744 return AMDGPU::SI_SPILL_S96_RESTORE;
1746 return AMDGPU::SI_SPILL_S128_RESTORE;
1748 return AMDGPU::SI_SPILL_S160_RESTORE;
1750 return AMDGPU::SI_SPILL_S192_RESTORE;
1752 return AMDGPU::SI_SPILL_S224_RESTORE;
1754 return AMDGPU::SI_SPILL_S256_RESTORE;
1756 return AMDGPU::SI_SPILL_S288_RESTORE;
1758 return AMDGPU::SI_SPILL_S320_RESTORE;
1760 return AMDGPU::SI_SPILL_S352_RESTORE;
1762 return AMDGPU::SI_SPILL_S384_RESTORE;
1764 return AMDGPU::SI_SPILL_S512_RESTORE;
1766 return AMDGPU::SI_SPILL_S1024_RESTORE;
1775 return AMDGPU::SI_SPILL_V16_RESTORE;
1777 return AMDGPU::SI_SPILL_V32_RESTORE;
1779 return AMDGPU::SI_SPILL_V64_RESTORE;
1781 return AMDGPU::SI_SPILL_V96_RESTORE;
1783 return AMDGPU::SI_SPILL_V128_RESTORE;
1785 return AMDGPU::SI_SPILL_V160_RESTORE;
1787 return AMDGPU::SI_SPILL_V192_RESTORE;
1789 return AMDGPU::SI_SPILL_V224_RESTORE;
1791 return AMDGPU::SI_SPILL_V256_RESTORE;
1793 return AMDGPU::SI_SPILL_V288_RESTORE;
1795 return AMDGPU::SI_SPILL_V320_RESTORE;
1797 return AMDGPU::SI_SPILL_V352_RESTORE;
1799 return AMDGPU::SI_SPILL_V384_RESTORE;
1801 return AMDGPU::SI_SPILL_V512_RESTORE;
1803 return AMDGPU::SI_SPILL_V1024_RESTORE;
1812 return AMDGPU::SI_SPILL_AV32_RESTORE;
1814 return AMDGPU::SI_SPILL_AV64_RESTORE;
1816 return AMDGPU::SI_SPILL_AV96_RESTORE;
1818 return AMDGPU::SI_SPILL_AV128_RESTORE;
1820 return AMDGPU::SI_SPILL_AV160_RESTORE;
1822 return AMDGPU::SI_SPILL_AV192_RESTORE;
1824 return AMDGPU::SI_SPILL_AV224_RESTORE;
1826 return AMDGPU::SI_SPILL_AV256_RESTORE;
1828 return AMDGPU::SI_SPILL_AV288_RESTORE;
1830 return AMDGPU::SI_SPILL_AV320_RESTORE;
1832 return AMDGPU::SI_SPILL_AV352_RESTORE;
1834 return AMDGPU::SI_SPILL_AV384_RESTORE;
1836 return AMDGPU::SI_SPILL_AV512_RESTORE;
1838 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1845 bool IsVectorSuperClass) {
1850 if (IsVectorSuperClass)
1851 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1853 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1859 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1866 if (ST.hasMAIInsts())
1869 assert(!RI.isAGPRClass(RC));
1884 unsigned SpillSize =
TRI->getSpillSize(*RC);
1891 FrameInfo.getObjectAlign(FrameIndex));
1893 if (RI.isSGPRClass(RC)) {
1895 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1896 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1897 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1902 if (DestReg.
isVirtual() && SpillSize == 4) {
1904 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1907 if (RI.spillSGPRToVGPR())
1933 unsigned Quantity)
const {
1935 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1936 while (Quantity > 0) {
1937 unsigned Arg = std::min(Quantity, MaxSNopCount);
1944 auto *MF =
MBB.getParent();
1947 assert(Info->isEntryFunction());
1949 if (
MBB.succ_empty()) {
1950 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1951 if (HasNoTerminator) {
1952 if (Info->returnsVoid()) {
1966 constexpr unsigned DoorbellIDMask = 0x3ff;
1967 constexpr unsigned ECQueueWaveAbort = 0x400;
1973 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1974 ContBB =
MBB.splitAt(
MI,
false);
1978 MBB.addSuccessor(TrapBB);
1985 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1989 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1992 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1993 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
1997 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1998 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
1999 .
addUse(DoorbellRegMasked)
2000 .
addImm(ECQueueWaveAbort);
2001 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2002 .
addUse(SetWaveAbortBit);
2005 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2020 switch (
MI.getOpcode()) {
2022 if (
MI.isMetaInstruction())
2027 return MI.getOperand(0).getImm() + 1;
2037 switch (
MI.getOpcode()) {
2039 case AMDGPU::S_MOV_B64_term:
2042 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2045 case AMDGPU::S_MOV_B32_term:
2048 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2051 case AMDGPU::S_XOR_B64_term:
2054 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2057 case AMDGPU::S_XOR_B32_term:
2060 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2062 case AMDGPU::S_OR_B64_term:
2065 MI.setDesc(
get(AMDGPU::S_OR_B64));
2067 case AMDGPU::S_OR_B32_term:
2070 MI.setDesc(
get(AMDGPU::S_OR_B32));
2073 case AMDGPU::S_ANDN2_B64_term:
2076 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2079 case AMDGPU::S_ANDN2_B32_term:
2082 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2085 case AMDGPU::S_AND_B64_term:
2088 MI.setDesc(
get(AMDGPU::S_AND_B64));
2091 case AMDGPU::S_AND_B32_term:
2094 MI.setDesc(
get(AMDGPU::S_AND_B32));
2097 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2100 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2103 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2106 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2109 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2110 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2113 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2114 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2115 MI.getMF()->getRegInfo().constrainRegClass(
MI.getOperand(0).getReg(),
2116 &AMDGPU::SReg_32_XM0RegClass);
2118 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2122 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2125 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2128 int64_t Imm =
MI.getOperand(1).getImm();
2130 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2131 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2138 MI.eraseFromParent();
2144 case AMDGPU::V_MOV_B64_PSEUDO: {
2146 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2147 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2152 if (ST.hasMovB64()) {
2153 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2158 if (
SrcOp.isImm()) {
2160 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2161 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2183 if (ST.hasPkMovB32() &&
2204 MI.eraseFromParent();
2207 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2211 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2215 if (ST.has64BitLiterals()) {
2216 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2222 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2227 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2228 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2230 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2231 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2238 MI.eraseFromParent();
2241 case AMDGPU::V_SET_INACTIVE_B32: {
2245 .
add(
MI.getOperand(3))
2246 .
add(
MI.getOperand(4))
2247 .
add(
MI.getOperand(1))
2248 .
add(
MI.getOperand(2))
2249 .
add(
MI.getOperand(5));
2250 MI.eraseFromParent();
2253 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2254 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2255 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2256 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2257 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2258 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2259 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2260 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2261 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2262 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2263 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2264 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2265 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2266 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2267 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2268 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2269 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2270 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2271 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2272 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2273 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2274 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2275 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2276 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2281 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2285 if (RI.hasVGPRs(EltRC)) {
2286 Opc = AMDGPU::V_MOVRELD_B32_e32;
2288 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2289 : AMDGPU::S_MOVRELD_B32;
2294 bool IsUndef =
MI.getOperand(1).isUndef();
2295 unsigned SubReg =
MI.getOperand(3).getImm();
2296 assert(VecReg ==
MI.getOperand(1).getReg());
2301 .
add(
MI.getOperand(2))
2305 const int ImpDefIdx =
2307 const int ImpUseIdx = ImpDefIdx + 1;
2309 MI.eraseFromParent();
2312 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2317 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2318 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2319 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2320 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2321 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2322 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2323 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2324 assert(ST.useVGPRIndexMode());
2326 bool IsUndef =
MI.getOperand(1).isUndef();
2335 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2339 .
add(
MI.getOperand(2))
2344 const int ImpDefIdx =
2346 const int ImpUseIdx = ImpDefIdx + 1;
2353 MI.eraseFromParent();
2356 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2357 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2358 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2359 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2360 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2361 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2362 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2363 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2364 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2365 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2366 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2367 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2368 assert(ST.useVGPRIndexMode());
2371 bool IsUndef =
MI.getOperand(1).isUndef();
2389 MI.eraseFromParent();
2392 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2395 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2396 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2415 if (ST.hasGetPCZeroExtension()) {
2419 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2426 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2436 MI.eraseFromParent();
2439 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2449 Op.setOffset(
Op.getOffset() + 4);
2451 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2455 MI.eraseFromParent();
2458 case AMDGPU::ENTER_STRICT_WWM: {
2464 case AMDGPU::ENTER_STRICT_WQM: {
2471 MI.eraseFromParent();
2474 case AMDGPU::EXIT_STRICT_WWM:
2475 case AMDGPU::EXIT_STRICT_WQM: {
2481 case AMDGPU::SI_RETURN: {
2495 MI.eraseFromParent();
2499 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2500 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2501 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2504 case AMDGPU::S_GETPC_B64_pseudo:
2505 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2506 if (ST.hasGetPCZeroExtension()) {
2508 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2517 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2518 assert(ST.hasBF16PackedInsts());
2519 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2543 case AMDGPU::S_LOAD_DWORDX16_IMM:
2544 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2557 for (
auto &CandMO :
I->operands()) {
2558 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2566 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2570 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2574 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2576 unsigned NewOpcode = -1;
2577 if (SubregSize == 256)
2578 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2579 else if (SubregSize == 128)
2580 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2587 MRI.setRegClass(DestReg, NewRC);
2590 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2595 MI->getOperand(0).setReg(DestReg);
2596 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2600 OffsetMO->
setImm(FinalOffset);
2606 MI->setMemRefs(*MF, NewMMOs);
2619std::pair<MachineInstr*, MachineInstr*>
2621 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2623 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2626 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2627 return std::pair(&
MI,
nullptr);
2638 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2640 if (Dst.isPhysical()) {
2641 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2644 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2648 for (
unsigned I = 1;
I <= 2; ++
I) {
2651 if (
SrcOp.isImm()) {
2653 Imm.ashrInPlace(Part * 32);
2654 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2658 if (Src.isPhysical())
2659 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2666 MovDPP.addImm(MO.getImm());
2668 Split[Part] = MovDPP;
2672 if (Dst.isVirtual())
2679 MI.eraseFromParent();
2680 return std::pair(Split[0], Split[1]);
2683std::optional<DestSourcePair>
2685 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2688 return std::nullopt;
2692 AMDGPU::OpName Src0OpName,
2694 AMDGPU::OpName Src1OpName)
const {
2701 "All commutable instructions have both src0 and src1 modifiers");
2703 int Src0ModsVal = Src0Mods->
getImm();
2704 int Src1ModsVal = Src1Mods->
getImm();
2706 Src1Mods->
setImm(Src0ModsVal);
2707 Src0Mods->
setImm(Src1ModsVal);
2716 bool IsKill = RegOp.
isKill();
2718 bool IsUndef = RegOp.
isUndef();
2719 bool IsDebug = RegOp.
isDebug();
2721 if (NonRegOp.
isImm())
2723 else if (NonRegOp.
isFI())
2744 int64_t NonRegVal = NonRegOp1.
getImm();
2747 NonRegOp2.
setImm(NonRegVal);
2754 unsigned OpIdx1)
const {
2759 unsigned Opc =
MI.getOpcode();
2760 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2770 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2773 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2778 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2784 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2799 unsigned Src1Idx)
const {
2800 assert(!NewMI &&
"this should never be used");
2802 unsigned Opc =
MI.getOpcode();
2804 if (CommutedOpcode == -1)
2807 if (Src0Idx > Src1Idx)
2810 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2811 static_cast<int>(Src0Idx) &&
2812 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2813 static_cast<int>(Src1Idx) &&
2814 "inconsistency with findCommutedOpIndices");
2839 Src1, AMDGPU::OpName::src1_modifiers);
2842 AMDGPU::OpName::src1_sel);
2854 unsigned &SrcOpIdx0,
2855 unsigned &SrcOpIdx1)
const {
2860 unsigned &SrcOpIdx0,
2861 unsigned &SrcOpIdx1)
const {
2862 if (!
Desc.isCommutable())
2865 unsigned Opc =
Desc.getOpcode();
2866 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2870 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2874 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2878 int64_t BrOffset)
const {
2895 return MI.getOperand(0).getMBB();
2900 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2901 MI.getOpcode() == AMDGPU::SI_LOOP)
2913 "new block should be inserted for expanding unconditional branch");
2916 "restore block should be inserted for restoring clobbered registers");
2924 if (ST.hasAddPC64Inst()) {
2926 MCCtx.createTempSymbol(
"offset",
true);
2930 MCCtx.createTempSymbol(
"post_addpc",
true);
2931 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2935 Offset->setVariableValue(OffsetExpr);
2939 assert(RS &&
"RegScavenger required for long branching");
2943 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2947 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2948 ST.hasVALUReadSGPRHazard();
2949 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2950 if (FlushSGPRWrites)
2958 ApplyHazardWorkarounds();
2961 MCCtx.createTempSymbol(
"post_getpc",
true);
2965 MCCtx.createTempSymbol(
"offset_lo",
true);
2967 MCCtx.createTempSymbol(
"offset_hi",
true);
2970 .
addReg(PCReg, 0, AMDGPU::sub0)
2974 .
addReg(PCReg, 0, AMDGPU::sub1)
2976 ApplyHazardWorkarounds();
3017 if (LongBranchReservedReg) {
3019 Scav = LongBranchReservedReg;
3028 MRI.replaceRegWith(PCReg, Scav);
3029 MRI.clearVirtRegs();
3035 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3036 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3037 MRI.clearVirtRegs();
3052unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3054 case SIInstrInfo::SCC_TRUE:
3055 return AMDGPU::S_CBRANCH_SCC1;
3056 case SIInstrInfo::SCC_FALSE:
3057 return AMDGPU::S_CBRANCH_SCC0;
3058 case SIInstrInfo::VCCNZ:
3059 return AMDGPU::S_CBRANCH_VCCNZ;
3060 case SIInstrInfo::VCCZ:
3061 return AMDGPU::S_CBRANCH_VCCZ;
3062 case SIInstrInfo::EXECNZ:
3063 return AMDGPU::S_CBRANCH_EXECNZ;
3064 case SIInstrInfo::EXECZ:
3065 return AMDGPU::S_CBRANCH_EXECZ;
3071SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3073 case AMDGPU::S_CBRANCH_SCC0:
3075 case AMDGPU::S_CBRANCH_SCC1:
3077 case AMDGPU::S_CBRANCH_VCCNZ:
3079 case AMDGPU::S_CBRANCH_VCCZ:
3081 case AMDGPU::S_CBRANCH_EXECNZ:
3083 case AMDGPU::S_CBRANCH_EXECZ:
3095 bool AllowModify)
const {
3096 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3098 TBB =
I->getOperand(0).getMBB();
3102 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3103 if (Pred == INVALID_BR)
3108 Cond.push_back(
I->getOperand(1));
3112 if (
I ==
MBB.end()) {
3118 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3120 FBB =
I->getOperand(0).getMBB();
3130 bool AllowModify)
const {
3138 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3139 switch (
I->getOpcode()) {
3140 case AMDGPU::S_MOV_B64_term:
3141 case AMDGPU::S_XOR_B64_term:
3142 case AMDGPU::S_OR_B64_term:
3143 case AMDGPU::S_ANDN2_B64_term:
3144 case AMDGPU::S_AND_B64_term:
3145 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3146 case AMDGPU::S_MOV_B32_term:
3147 case AMDGPU::S_XOR_B32_term:
3148 case AMDGPU::S_OR_B32_term:
3149 case AMDGPU::S_ANDN2_B32_term:
3150 case AMDGPU::S_AND_B32_term:
3151 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3154 case AMDGPU::SI_ELSE:
3155 case AMDGPU::SI_KILL_I1_TERMINATOR:
3156 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3173 int *BytesRemoved)
const {
3175 unsigned RemovedSize = 0;
3178 if (
MI.isBranch() ||
MI.isReturn()) {
3180 MI.eraseFromParent();
3186 *BytesRemoved = RemovedSize;
3203 int *BytesAdded)
const {
3204 if (!FBB &&
Cond.empty()) {
3208 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3215 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3227 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3245 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3252 if (
Cond.size() != 2) {
3256 if (
Cond[0].isImm()) {
3267 Register FalseReg,
int &CondCycles,
3268 int &TrueCycles,
int &FalseCycles)
const {
3274 if (
MRI.getRegClass(FalseReg) != RC)
3278 CondCycles = TrueCycles = FalseCycles = NumInsts;
3281 return RI.hasVGPRs(RC) && NumInsts <= 6;
3289 if (
MRI.getRegClass(FalseReg) != RC)
3295 if (NumInsts % 2 == 0)
3298 CondCycles = TrueCycles = FalseCycles = NumInsts;
3299 return RI.isSGPRClass(RC);
3310 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3311 if (Pred == VCCZ || Pred == SCC_FALSE) {
3312 Pred =
static_cast<BranchPredicate
>(-Pred);
3318 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3320 if (DstSize == 32) {
3322 if (Pred == SCC_TRUE) {
3337 if (DstSize == 64 && Pred == SCC_TRUE) {
3347 static const int16_t Sub0_15[] = {
3348 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3349 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3350 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3351 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3354 static const int16_t Sub0_15_64[] = {
3355 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3356 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3357 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3358 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3361 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3363 const int16_t *SubIndices = Sub0_15;
3364 int NElts = DstSize / 32;
3368 if (Pred == SCC_TRUE) {
3370 SelOp = AMDGPU::S_CSELECT_B32;
3371 EltRC = &AMDGPU::SGPR_32RegClass;
3373 SelOp = AMDGPU::S_CSELECT_B64;
3374 EltRC = &AMDGPU::SGPR_64RegClass;
3375 SubIndices = Sub0_15_64;
3381 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3386 for (
int Idx = 0; Idx != NElts; ++Idx) {
3387 Register DstElt =
MRI.createVirtualRegister(EltRC);
3390 unsigned SubIdx = SubIndices[Idx];
3393 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3396 .
addReg(FalseReg, 0, SubIdx)
3397 .
addReg(TrueReg, 0, SubIdx);
3401 .
addReg(TrueReg, 0, SubIdx)
3402 .
addReg(FalseReg, 0, SubIdx);
3414 switch (
MI.getOpcode()) {
3415 case AMDGPU::V_MOV_B16_t16_e32:
3416 case AMDGPU::V_MOV_B16_t16_e64:
3417 case AMDGPU::V_MOV_B32_e32:
3418 case AMDGPU::V_MOV_B32_e64:
3419 case AMDGPU::V_MOV_B64_PSEUDO:
3420 case AMDGPU::V_MOV_B64_e32:
3421 case AMDGPU::V_MOV_B64_e64:
3422 case AMDGPU::S_MOV_B32:
3423 case AMDGPU::S_MOV_B64:
3424 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3426 case AMDGPU::WWM_COPY:
3427 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3428 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3429 case AMDGPU::V_ACCVGPR_MOV_B32:
3430 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3431 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3439 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3440 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3441 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3444 unsigned Opc =
MI.getOpcode();
3446 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3448 MI.removeOperand(Idx);
3453 unsigned SubRegIndex) {
3454 switch (SubRegIndex) {
3455 case AMDGPU::NoSubRegister:
3465 case AMDGPU::sub1_lo16:
3467 case AMDGPU::sub1_hi16:
3470 return std::nullopt;
3478 case AMDGPU::V_MAC_F16_e32:
3479 case AMDGPU::V_MAC_F16_e64:
3480 case AMDGPU::V_MAD_F16_e64:
3481 return AMDGPU::V_MADAK_F16;
3482 case AMDGPU::V_MAC_F32_e32:
3483 case AMDGPU::V_MAC_F32_e64:
3484 case AMDGPU::V_MAD_F32_e64:
3485 return AMDGPU::V_MADAK_F32;
3486 case AMDGPU::V_FMAC_F32_e32:
3487 case AMDGPU::V_FMAC_F32_e64:
3488 case AMDGPU::V_FMA_F32_e64:
3489 return AMDGPU::V_FMAAK_F32;
3490 case AMDGPU::V_FMAC_F16_e32:
3491 case AMDGPU::V_FMAC_F16_e64:
3492 case AMDGPU::V_FMAC_F16_t16_e64:
3493 case AMDGPU::V_FMAC_F16_fake16_e64:
3494 case AMDGPU::V_FMA_F16_e64:
3495 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3496 ? AMDGPU::V_FMAAK_F16_t16
3497 : AMDGPU::V_FMAAK_F16_fake16
3498 : AMDGPU::V_FMAAK_F16;
3499 case AMDGPU::V_FMAC_F64_e32:
3500 case AMDGPU::V_FMAC_F64_e64:
3501 case AMDGPU::V_FMA_F64_e64:
3502 return AMDGPU::V_FMAAK_F64;
3510 case AMDGPU::V_MAC_F16_e32:
3511 case AMDGPU::V_MAC_F16_e64:
3512 case AMDGPU::V_MAD_F16_e64:
3513 return AMDGPU::V_MADMK_F16;
3514 case AMDGPU::V_MAC_F32_e32:
3515 case AMDGPU::V_MAC_F32_e64:
3516 case AMDGPU::V_MAD_F32_e64:
3517 return AMDGPU::V_MADMK_F32;
3518 case AMDGPU::V_FMAC_F32_e32:
3519 case AMDGPU::V_FMAC_F32_e64:
3520 case AMDGPU::V_FMA_F32_e64:
3521 return AMDGPU::V_FMAMK_F32;
3522 case AMDGPU::V_FMAC_F16_e32:
3523 case AMDGPU::V_FMAC_F16_e64:
3524 case AMDGPU::V_FMAC_F16_t16_e64:
3525 case AMDGPU::V_FMAC_F16_fake16_e64:
3526 case AMDGPU::V_FMA_F16_e64:
3527 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3528 ? AMDGPU::V_FMAMK_F16_t16
3529 : AMDGPU::V_FMAMK_F16_fake16
3530 : AMDGPU::V_FMAMK_F16;
3531 case AMDGPU::V_FMAC_F64_e32:
3532 case AMDGPU::V_FMAC_F64_e64:
3533 case AMDGPU::V_FMA_F64_e64:
3534 return AMDGPU::V_FMAMK_F64;
3546 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3548 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3551 if (
Opc == AMDGPU::COPY) {
3552 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3559 if (HasMultipleUses) {
3562 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3565 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3573 if (ImmDefSize == 32 &&
3578 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3579 RI.getSubRegIdxSize(UseSubReg) == 16;
3582 if (RI.hasVGPRs(DstRC))
3585 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3591 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3598 for (
unsigned MovOp :
3599 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3600 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3608 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3612 if (MovDstPhysReg) {
3616 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3623 if (MovDstPhysReg) {
3624 if (!MovDstRC->
contains(MovDstPhysReg))
3626 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3640 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3648 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3652 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3654 UseMI.getOperand(0).setReg(MovDstPhysReg);
3659 UseMI.setDesc(NewMCID);
3660 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3661 UseMI.addImplicitDefUseOperands(*MF);
3665 if (HasMultipleUses)
3668 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3669 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3670 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3671 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3672 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3673 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3674 Opc == AMDGPU::V_FMAC_F64_e64) {
3683 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3698 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3699 if (!RegSrc->
isReg())
3701 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3702 ST.getConstantBusLimit(
Opc) < 2)
3705 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3717 if (Def && Def->isMoveImmediate() &&
3728 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3729 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3739 unsigned SrcSubReg = RegSrc->
getSubReg();
3744 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3745 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3746 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3747 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3748 UseMI.untieRegOperand(
3749 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3756 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3758 DefMI.eraseFromParent();
3765 if (ST.getConstantBusLimit(
Opc) < 2) {
3768 bool Src0Inlined =
false;
3769 if (Src0->
isReg()) {
3774 if (Def && Def->isMoveImmediate() &&
3779 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3786 if (Src1->
isReg() && !Src0Inlined) {
3789 if (Def && Def->isMoveImmediate() &&
3791 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3793 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3806 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3807 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3813 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3814 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3815 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3816 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3817 UseMI.untieRegOperand(
3818 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3820 const std::optional<int64_t> SubRegImm =
3834 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3836 DefMI.eraseFromParent();
3848 if (BaseOps1.
size() != BaseOps2.
size())
3850 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3851 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3859 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3860 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3861 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3863 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3866bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3869 int64_t Offset0, Offset1;
3872 bool Offset0IsScalable, Offset1IsScalable;
3886 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3887 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3894 "MIa must load from or modify a memory location");
3896 "MIb must load from or modify a memory location");
3915 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3922 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3932 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3946 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3957 if (
Reg.isPhysical())
3959 auto *Def =
MRI.getUniqueVRegDef(
Reg);
3961 Imm = Def->getOperand(1).getImm();
3981 unsigned NumOps =
MI.getNumOperands();
3984 if (
Op.isReg() &&
Op.isKill())
3992 case AMDGPU::V_MAC_F16_e32:
3993 case AMDGPU::V_MAC_F16_e64:
3994 return AMDGPU::V_MAD_F16_e64;
3995 case AMDGPU::V_MAC_F32_e32:
3996 case AMDGPU::V_MAC_F32_e64:
3997 return AMDGPU::V_MAD_F32_e64;
3998 case AMDGPU::V_MAC_LEGACY_F32_e32:
3999 case AMDGPU::V_MAC_LEGACY_F32_e64:
4000 return AMDGPU::V_MAD_LEGACY_F32_e64;
4001 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4002 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4003 return AMDGPU::V_FMA_LEGACY_F32_e64;
4004 case AMDGPU::V_FMAC_F16_e32:
4005 case AMDGPU::V_FMAC_F16_e64:
4006 case AMDGPU::V_FMAC_F16_t16_e64:
4007 case AMDGPU::V_FMAC_F16_fake16_e64:
4008 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4009 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4010 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4011 : AMDGPU::V_FMA_F16_gfx9_e64;
4012 case AMDGPU::V_FMAC_F32_e32:
4013 case AMDGPU::V_FMAC_F32_e64:
4014 return AMDGPU::V_FMA_F32_e64;
4015 case AMDGPU::V_FMAC_F64_e32:
4016 case AMDGPU::V_FMAC_F64_e64:
4017 return AMDGPU::V_FMA_F64_e64;
4027 unsigned Opc =
MI.getOpcode();
4031 if (NewMFMAOpc != -1) {
4034 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4035 MIB.
add(
MI.getOperand(
I));
4041 if (Def.isEarlyClobber() && Def.isReg() &&
4046 auto UpdateDefIndex = [&](
LiveRange &LR) {
4047 auto *S = LR.find(OldIndex);
4048 if (S != LR.end() && S->start == OldIndex) {
4049 assert(S->valno && S->valno->def == OldIndex);
4050 S->start = NewIndex;
4051 S->valno->def = NewIndex;
4055 for (
auto &SR : LI.subranges())
4066 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4076 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4077 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4078 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4082 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4083 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4084 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4085 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4086 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4087 bool Src0Literal =
false;
4092 case AMDGPU::V_MAC_F16_e64:
4093 case AMDGPU::V_FMAC_F16_e64:
4094 case AMDGPU::V_FMAC_F16_t16_e64:
4095 case AMDGPU::V_FMAC_F16_fake16_e64:
4096 case AMDGPU::V_MAC_F32_e64:
4097 case AMDGPU::V_MAC_LEGACY_F32_e64:
4098 case AMDGPU::V_FMAC_F32_e64:
4099 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4100 case AMDGPU::V_FMAC_F64_e64:
4102 case AMDGPU::V_MAC_F16_e32:
4103 case AMDGPU::V_FMAC_F16_e32:
4104 case AMDGPU::V_MAC_F32_e32:
4105 case AMDGPU::V_MAC_LEGACY_F32_e32:
4106 case AMDGPU::V_FMAC_F32_e32:
4107 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4108 case AMDGPU::V_FMAC_F64_e32: {
4109 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4110 AMDGPU::OpName::src0);
4137 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4138 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4140 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4141 !RI.isSGPRReg(
MBB.getParent()->getRegInfo(), Src0->
getReg()))) {
4143 const auto killDef = [&]() ->
void {
4148 if (
MRI.hasOneNonDBGUse(DefReg)) {
4150 DefMI->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4151 DefMI->getOperand(0).setIsDead(
true);
4152 for (
unsigned I =
DefMI->getNumOperands() - 1;
I != 0; --
I)
4165 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4167 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4168 MIOp.setIsUndef(
true);
4169 MIOp.setReg(DummyReg);
4218 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4239 if (Src0Literal && !ST.hasVOP3Literal())
4259 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4270 switch (
MI.getOpcode()) {
4271 case AMDGPU::S_SET_GPR_IDX_ON:
4272 case AMDGPU::S_SET_GPR_IDX_MODE:
4273 case AMDGPU::S_SET_GPR_IDX_OFF:
4291 if (
MI.isTerminator() ||
MI.isPosition())
4295 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4298 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4304 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4305 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4306 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4307 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4308 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4313 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4314 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4315 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4323 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4332 if (
MI.memoperands_empty())
4337 unsigned AS = Memop->getAddrSpace();
4338 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4339 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4340 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4341 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4356 if (
MI.memoperands_empty())
4365 unsigned AS = Memop->getAddrSpace();
4382 if (ST.isTgSplitEnabled())
4387 if (
MI.memoperands_empty())
4392 unsigned AS = Memop->getAddrSpace();
4408 unsigned Opcode =
MI.getOpcode();
4423 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4424 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4425 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4428 if (
MI.isCall() ||
MI.isInlineAsm())
4444 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4445 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4446 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4447 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4455 if (
MI.isMetaInstruction())
4459 if (
MI.isCopyLike()) {
4460 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4464 return MI.readsRegister(AMDGPU::EXEC, &RI);
4475 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4479 switch (Imm.getBitWidth()) {
4485 ST.hasInv2PiInlineImm());
4488 ST.hasInv2PiInlineImm());
4490 return ST.has16BitInsts() &&
4492 ST.hasInv2PiInlineImm());
4499 APInt IntImm = Imm.bitcastToAPInt();
4501 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4509 return ST.has16BitInsts() &&
4512 return ST.has16BitInsts() &&
4522 switch (OperandType) {
4532 int32_t Trunc =
static_cast<int32_t
>(Imm);
4572 int16_t Trunc =
static_cast<int16_t
>(Imm);
4573 return ST.has16BitInsts() &&
4582 int16_t Trunc =
static_cast<int16_t
>(Imm);
4583 return ST.has16BitInsts() &&
4634 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4640 return ST.hasVOP3Literal();
4644 int64_t ImmVal)
const {
4647 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4648 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4649 AMDGPU::OpName::src2))
4651 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4663 "unexpected imm-like operand kind");
4676 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4694 AMDGPU::OpName
OpName)
const {
4696 return Mods && Mods->
getImm();
4709 switch (
MI.getOpcode()) {
4710 default:
return false;
4712 case AMDGPU::V_ADDC_U32_e64:
4713 case AMDGPU::V_SUBB_U32_e64:
4714 case AMDGPU::V_SUBBREV_U32_e64: {
4722 case AMDGPU::V_MAC_F16_e64:
4723 case AMDGPU::V_MAC_F32_e64:
4724 case AMDGPU::V_MAC_LEGACY_F32_e64:
4725 case AMDGPU::V_FMAC_F16_e64:
4726 case AMDGPU::V_FMAC_F16_t16_e64:
4727 case AMDGPU::V_FMAC_F16_fake16_e64:
4728 case AMDGPU::V_FMAC_F32_e64:
4729 case AMDGPU::V_FMAC_F64_e64:
4730 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4736 case AMDGPU::V_CNDMASK_B32_e64:
4742 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4772 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4781 unsigned Op32)
const {
4795 Inst32.
add(
MI.getOperand(
I));
4799 int Idx =
MI.getNumExplicitDefs();
4801 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4806 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4828 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4836 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4839 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4840 AMDGPU::SReg_64RegClass.contains(Reg);
4846 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4858 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4868 switch (MO.getReg()) {
4870 case AMDGPU::VCC_LO:
4871 case AMDGPU::VCC_HI:
4873 case AMDGPU::FLAT_SCR:
4886 switch (
MI.getOpcode()) {
4887 case AMDGPU::V_READLANE_B32:
4888 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4889 case AMDGPU::V_WRITELANE_B32:
4890 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4897 if (
MI.isPreISelOpcode() ||
4898 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4913 if (
SubReg.getReg().isPhysical())
4916 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4927 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
4928 ErrInfo =
"illegal copy from vector register to SGPR";
4946 if (!
MRI.isSSA() &&
MI.isCopy())
4947 return verifyCopy(
MI,
MRI, ErrInfo);
4949 if (SIInstrInfo::isGenericOpcode(Opcode))
4952 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4953 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4954 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4956 if (Src0Idx == -1) {
4958 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
4959 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
4960 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
4961 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
4966 if (!
Desc.isVariadic() &&
4967 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4968 ErrInfo =
"Instruction has wrong number of operands.";
4972 if (
MI.isInlineAsm()) {
4985 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4986 ErrInfo =
"inlineasm operand has incorrect register class.";
4994 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4995 ErrInfo =
"missing memory operand from image instruction.";
5000 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5003 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5004 "all fp values to integers.";
5008 int RegClass =
Desc.operands()[i].RegClass;
5011 switch (OpInfo.OperandType) {
5013 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5014 ErrInfo =
"Illegal immediate value for operand.";
5048 ErrInfo =
"Illegal immediate value for operand.";
5055 ErrInfo =
"Expected inline constant for operand.";
5070 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5071 ErrInfo =
"Expected immediate, but got non-immediate";
5080 if (OpInfo.isGenericType())
5095 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5097 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5099 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5100 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5107 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5108 ErrInfo =
"Subtarget requires even aligned vector registers";
5113 if (RegClass != -1) {
5114 if (Reg.isVirtual())
5119 ErrInfo =
"Operand has incorrect register class.";
5127 if (!ST.hasSDWA()) {
5128 ErrInfo =
"SDWA is not supported on this target";
5132 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5133 AMDGPU::OpName::dst_sel}) {
5137 int64_t Imm = MO->
getImm();
5139 ErrInfo =
"Invalid SDWA selection";
5144 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5146 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5151 if (!ST.hasSDWAScalar()) {
5153 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5154 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5161 "Only reg allowed as operands in SDWA instructions on GFX9+";
5167 if (!ST.hasSDWAOmod()) {
5170 if (OMod !=
nullptr &&
5172 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5177 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5178 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5179 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5180 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5183 unsigned Mods = Src0ModsMO->
getImm();
5186 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5192 if (
isVOPC(BasicOpcode)) {
5193 if (!ST.hasSDWASdst() && DstIdx != -1) {
5196 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5197 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5200 }
else if (!ST.hasSDWAOutModsVOPC()) {
5203 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5204 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5210 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5211 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5218 if (DstUnused && DstUnused->isImm() &&
5221 if (!Dst.isReg() || !Dst.isTied()) {
5222 ErrInfo =
"Dst register should have tied register";
5227 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5230 "Dst register should be tied to implicit use of preserved register";
5234 ErrInfo =
"Dst register should use same physical register as preserved";
5241 if (
isImage(Opcode) && !
MI.mayStore()) {
5253 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5261 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5265 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5266 if (RegCount > DstSize) {
5267 ErrInfo =
"Image instruction returns too many registers for dst "
5276 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5277 unsigned ConstantBusCount = 0;
5278 bool UsesLiteral =
false;
5281 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5285 LiteralVal = &
MI.getOperand(ImmIdx);
5294 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5305 }
else if (!MO.
isFI()) {
5312 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5322 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5323 return !RI.regsOverlap(SGPRUsed, SGPR);
5332 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5333 Opcode != AMDGPU::V_WRITELANE_B32) {
5334 ErrInfo =
"VOP* instruction violates constant bus restriction";
5338 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5339 ErrInfo =
"VOP3 instruction uses literal";
5346 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5347 unsigned SGPRCount = 0;
5350 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5358 if (MO.
getReg() != SGPRUsed)
5363 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5364 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5371 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5372 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5379 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5389 ErrInfo =
"ABS not allowed in VOP3B instructions";
5402 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5409 if (
Desc.isBranch()) {
5411 ErrInfo =
"invalid branch target for SOPK instruction";
5418 ErrInfo =
"invalid immediate for SOPK instruction";
5423 ErrInfo =
"invalid immediate for SOPK instruction";
5430 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5431 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5432 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5433 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5434 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5435 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5437 const unsigned StaticNumOps =
5438 Desc.getNumOperands() +
Desc.implicit_uses().size();
5439 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5444 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5445 ErrInfo =
"missing implicit register operands";
5451 if (!Dst->isUse()) {
5452 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5457 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5458 UseOpIdx != StaticNumOps + 1) {
5459 ErrInfo =
"movrel implicit operands should be tied";
5466 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5468 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5469 ErrInfo =
"src0 should be subreg of implicit vector use";
5477 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5478 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5484 if (
MI.mayStore() &&
5489 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5490 ErrInfo =
"scalar stores must use m0 as offset register";
5496 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5498 if (
Offset->getImm() != 0) {
5499 ErrInfo =
"subtarget does not support offsets in flat instructions";
5504 if (
isDS(
MI) && !ST.hasGDS()) {
5506 if (GDSOp && GDSOp->
getImm() != 0) {
5507 ErrInfo =
"GDS is not supported on this subtarget";
5515 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5516 AMDGPU::OpName::vaddr0);
5517 AMDGPU::OpName RSrcOpName =
5518 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5519 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5527 ErrInfo =
"dim is out of range";
5532 if (ST.hasR128A16()) {
5534 IsA16 = R128A16->
getImm() != 0;
5535 }
else if (ST.hasA16()) {
5537 IsA16 = A16->
getImm() != 0;
5540 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5542 unsigned AddrWords =
5545 unsigned VAddrWords;
5547 VAddrWords = RsrcIdx - VAddr0Idx;
5548 if (ST.hasPartialNSAEncoding() &&
5550 unsigned LastVAddrIdx = RsrcIdx - 1;
5551 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5559 if (VAddrWords != AddrWords) {
5561 <<
" but got " << VAddrWords <<
"\n");
5562 ErrInfo =
"bad vaddr size";
5572 unsigned DC = DppCt->
getImm();
5573 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5574 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5575 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5576 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5577 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5578 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5579 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5580 ErrInfo =
"Invalid dpp_ctrl value";
5583 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5585 ErrInfo =
"Invalid dpp_ctrl value: "
5586 "wavefront shifts are not supported on GFX10+";
5589 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5591 ErrInfo =
"Invalid dpp_ctrl value: "
5592 "broadcasts are not supported on GFX10+";
5595 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5597 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5598 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5599 !ST.hasGFX90AInsts()) {
5600 ErrInfo =
"Invalid dpp_ctrl value: "
5601 "row_newbroadcast/row_share is not supported before "
5605 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5606 ErrInfo =
"Invalid dpp_ctrl value: "
5607 "row_share and row_xmask are not supported before GFX10";
5612 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5615 ErrInfo =
"Invalid dpp_ctrl value: "
5616 "DP ALU dpp only support row_newbcast";
5623 AMDGPU::OpName DataName =
5624 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5630 if (ST.hasGFX90AInsts()) {
5631 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5632 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5633 ErrInfo =
"Invalid register class: "
5634 "vdata and vdst should be both VGPR or AGPR";
5637 if (
Data && Data2 &&
5639 ErrInfo =
"Invalid register class: "
5640 "both data operands should be VGPR or AGPR";
5644 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5646 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5647 ErrInfo =
"Invalid register class: "
5648 "agpr loads and stores not supported on this GPU";
5654 if (ST.needsAlignedVGPRs()) {
5655 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5660 if (Reg.isPhysical())
5661 return !(RI.getHWRegIndex(Reg) & 1);
5663 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5664 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5667 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5668 Opcode == AMDGPU::DS_GWS_BARRIER) {
5670 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5671 ErrInfo =
"Subtarget requires even aligned vector registers "
5672 "for DS_GWS instructions";
5678 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5679 ErrInfo =
"Subtarget requires even aligned vector registers "
5680 "for vaddr operand of image instructions";
5686 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5688 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5689 ErrInfo =
"Invalid register class: "
5690 "v_accvgpr_write with an SGPR is not supported on this GPU";
5695 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5698 ErrInfo =
"pseudo expects only physical SGPRs";
5705 if (!ST.hasScaleOffset()) {
5706 ErrInfo =
"Subtarget does not support offset scaling";
5710 ErrInfo =
"Instruction does not support offset scaling";
5719 for (
unsigned I = 0;
I < 3; ++
I) {
5732 switch (
MI.getOpcode()) {
5733 default:
return AMDGPU::INSTRUCTION_LIST_END;
5734 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5735 case AMDGPU::COPY:
return AMDGPU::COPY;
5736 case AMDGPU::PHI:
return AMDGPU::PHI;
5737 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5738 case AMDGPU::WQM:
return AMDGPU::WQM;
5739 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5740 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5741 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5742 case AMDGPU::S_MOV_B32: {
5744 return MI.getOperand(1).isReg() ||
5745 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5746 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5748 case AMDGPU::S_ADD_I32:
5749 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5750 case AMDGPU::S_ADDC_U32:
5751 return AMDGPU::V_ADDC_U32_e32;
5752 case AMDGPU::S_SUB_I32:
5753 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5756 case AMDGPU::S_ADD_U32:
5757 return AMDGPU::V_ADD_CO_U32_e32;
5758 case AMDGPU::S_SUB_U32:
5759 return AMDGPU::V_SUB_CO_U32_e32;
5760 case AMDGPU::S_ADD_U64_PSEUDO:
5761 return AMDGPU::V_ADD_U64_PSEUDO;
5762 case AMDGPU::S_SUB_U64_PSEUDO:
5763 return AMDGPU::V_SUB_U64_PSEUDO;
5764 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5765 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5766 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5767 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5768 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5769 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5770 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5771 case AMDGPU::S_XNOR_B32:
5772 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5773 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5774 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5775 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5776 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5777 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5778 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5779 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5780 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5781 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5782 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5783 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5784 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5785 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5786 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5787 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5788 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5789 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5790 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5791 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5792 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5793 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5794 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5795 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5796 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5797 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5798 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5799 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5800 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5801 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5802 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5803 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5804 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5805 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5806 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5807 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5808 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5809 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5810 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5811 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5812 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5813 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5814 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5815 case AMDGPU::S_CVT_F32_F16:
5816 case AMDGPU::S_CVT_HI_F32_F16:
5817 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5818 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5819 case AMDGPU::S_CVT_F16_F32:
5820 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5821 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5822 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5823 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5824 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5825 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5826 case AMDGPU::S_CEIL_F16:
5827 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5828 : AMDGPU::V_CEIL_F16_fake16_e64;
5829 case AMDGPU::S_FLOOR_F16:
5830 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5831 : AMDGPU::V_FLOOR_F16_fake16_e64;
5832 case AMDGPU::S_TRUNC_F16:
5833 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5834 : AMDGPU::V_TRUNC_F16_fake16_e64;
5835 case AMDGPU::S_RNDNE_F16:
5836 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5837 : AMDGPU::V_RNDNE_F16_fake16_e64;
5838 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5839 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5840 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5841 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5842 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5843 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5844 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5845 case AMDGPU::S_ADD_F16:
5846 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5847 : AMDGPU::V_ADD_F16_fake16_e64;
5848 case AMDGPU::S_SUB_F16:
5849 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5850 : AMDGPU::V_SUB_F16_fake16_e64;
5851 case AMDGPU::S_MIN_F16:
5852 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5853 : AMDGPU::V_MIN_F16_fake16_e64;
5854 case AMDGPU::S_MAX_F16:
5855 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5856 : AMDGPU::V_MAX_F16_fake16_e64;
5857 case AMDGPU::S_MINIMUM_F16:
5858 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5859 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5860 case AMDGPU::S_MAXIMUM_F16:
5861 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5862 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5863 case AMDGPU::S_MUL_F16:
5864 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5865 : AMDGPU::V_MUL_F16_fake16_e64;
5866 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5867 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5868 case AMDGPU::S_FMAC_F16:
5869 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5870 : AMDGPU::V_FMAC_F16_fake16_e64;
5871 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5872 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5873 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5874 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5875 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5876 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5877 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5878 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5879 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5880 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5881 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5882 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5883 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5884 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5885 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5886 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5887 case AMDGPU::S_CMP_LT_F16:
5888 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
5889 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5890 case AMDGPU::S_CMP_EQ_F16:
5891 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
5892 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5893 case AMDGPU::S_CMP_LE_F16:
5894 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
5895 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5896 case AMDGPU::S_CMP_GT_F16:
5897 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
5898 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5899 case AMDGPU::S_CMP_LG_F16:
5900 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
5901 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5902 case AMDGPU::S_CMP_GE_F16:
5903 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
5904 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5905 case AMDGPU::S_CMP_O_F16:
5906 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
5907 : AMDGPU::V_CMP_O_F16_fake16_e64;
5908 case AMDGPU::S_CMP_U_F16:
5909 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
5910 : AMDGPU::V_CMP_U_F16_fake16_e64;
5911 case AMDGPU::S_CMP_NGE_F16:
5912 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
5913 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5914 case AMDGPU::S_CMP_NLG_F16:
5915 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
5916 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5917 case AMDGPU::S_CMP_NGT_F16:
5918 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
5919 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5920 case AMDGPU::S_CMP_NLE_F16:
5921 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
5922 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5923 case AMDGPU::S_CMP_NEQ_F16:
5924 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
5925 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5926 case AMDGPU::S_CMP_NLT_F16:
5927 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
5928 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5929 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5930 case AMDGPU::V_S_EXP_F16_e64:
5931 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
5932 : AMDGPU::V_EXP_F16_fake16_e64;
5933 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5934 case AMDGPU::V_S_LOG_F16_e64:
5935 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
5936 : AMDGPU::V_LOG_F16_fake16_e64;
5937 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5938 case AMDGPU::V_S_RCP_F16_e64:
5939 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
5940 : AMDGPU::V_RCP_F16_fake16_e64;
5941 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5942 case AMDGPU::V_S_RSQ_F16_e64:
5943 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
5944 : AMDGPU::V_RSQ_F16_fake16_e64;
5945 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5946 case AMDGPU::V_S_SQRT_F16_e64:
5947 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
5948 : AMDGPU::V_SQRT_F16_fake16_e64;
5951 "Unexpected scalar opcode without corresponding vector one!");
6000 "Not a whole wave func");
6003 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6004 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6015 case AMDGPU::AV_32RegClassID:
6016 RCID = AMDGPU::VGPR_32RegClassID;
6018 case AMDGPU::AV_64RegClassID:
6019 RCID = AMDGPU::VReg_64RegClassID;
6021 case AMDGPU::AV_96RegClassID:
6022 RCID = AMDGPU::VReg_96RegClassID;
6024 case AMDGPU::AV_128RegClassID:
6025 RCID = AMDGPU::VReg_128RegClassID;
6027 case AMDGPU::AV_160RegClassID:
6028 RCID = AMDGPU::VReg_160RegClassID;
6030 case AMDGPU::AV_512RegClassID:
6031 RCID = AMDGPU::VReg_512RegClassID;
6046 auto RegClass = TID.
operands()[OpNum].RegClass;
6049 return RI.getRegClass(RegClass);
6055 unsigned OpNo)
const {
6057 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6058 Desc.operands()[OpNo].RegClass == -1) {
6061 if (Reg.isVirtual()) {
6063 MI.getParent()->getParent()->getRegInfo();
6064 return MRI.getRegClass(Reg);
6066 return RI.getPhysRegBaseClass(Reg);
6069 unsigned RCID =
Desc.operands()[OpNo].RegClass;
6078 unsigned RCID =
get(
MI.getOpcode()).operands()[
OpIdx].RegClass;
6080 unsigned Size = RI.getRegSizeInBits(*RC);
6081 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6082 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6083 : AMDGPU::V_MOV_B32_e32;
6085 Opcode = AMDGPU::COPY;
6086 else if (RI.isSGPRClass(RC))
6087 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6101 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6107 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6118 if (SubIdx == AMDGPU::sub0)
6120 if (SubIdx == AMDGPU::sub1)
6132void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6148 if (Reg.isPhysical())
6158 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6161 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6168 unsigned Opc =
MI.getOpcode();
6174 constexpr const AMDGPU::OpName OpNames[] = {
6175 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6178 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6179 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6189 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6190 if (IsAGPR && !ST.hasMAIInsts())
6192 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6196 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6197 const int DataIdx = AMDGPU::getNamedOperandIdx(
6198 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6199 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6200 MI.getOperand(DataIdx).isReg() &&
6201 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6203 if ((
int)
OpIdx == DataIdx) {
6204 if (VDstIdx != -1 &&
6205 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6208 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6209 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6210 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6215 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6216 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6236 constexpr const unsigned NumOps = 3;
6237 constexpr const AMDGPU::OpName OpNames[
NumOps * 2] = {
6238 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6239 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6240 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6245 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6248 MO = &
MI.getOperand(SrcIdx);
6255 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6259 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6263 return !OpSel && !OpSelHi;
6273 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) :
nullptr;
6282 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6283 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6287 if (!LiteralLimit--)
6297 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6305 if (--ConstantBusLimit <= 0)
6317 if (!LiteralLimit--)
6319 if (--ConstantBusLimit <= 0)
6325 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6329 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6331 !
Op.isIdenticalTo(*MO))
6341 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6355 bool Is64BitOp = Is64BitFPOp ||
6362 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6371 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6389 bool IsGFX950Only = ST.hasGFX950Insts();
6390 bool IsGFX940Only = ST.hasGFX940Insts();
6392 if (!IsGFX950Only && !IsGFX940Only)
6410 unsigned Opcode =
MI.getOpcode();
6412 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6413 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6414 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6415 case AMDGPU::V_MQSAD_U32_U8_e64:
6416 case AMDGPU::V_PK_ADD_F16:
6417 case AMDGPU::V_PK_ADD_F32:
6418 case AMDGPU::V_PK_ADD_I16:
6419 case AMDGPU::V_PK_ADD_U16:
6420 case AMDGPU::V_PK_ASHRREV_I16:
6421 case AMDGPU::V_PK_FMA_F16:
6422 case AMDGPU::V_PK_FMA_F32:
6423 case AMDGPU::V_PK_FMAC_F16_e32:
6424 case AMDGPU::V_PK_FMAC_F16_e64:
6425 case AMDGPU::V_PK_LSHLREV_B16:
6426 case AMDGPU::V_PK_LSHRREV_B16:
6427 case AMDGPU::V_PK_MAD_I16:
6428 case AMDGPU::V_PK_MAD_U16:
6429 case AMDGPU::V_PK_MAX_F16:
6430 case AMDGPU::V_PK_MAX_I16:
6431 case AMDGPU::V_PK_MAX_U16:
6432 case AMDGPU::V_PK_MIN_F16:
6433 case AMDGPU::V_PK_MIN_I16:
6434 case AMDGPU::V_PK_MIN_U16:
6435 case AMDGPU::V_PK_MOV_B32:
6436 case AMDGPU::V_PK_MUL_F16:
6437 case AMDGPU::V_PK_MUL_F32:
6438 case AMDGPU::V_PK_MUL_LO_U16:
6439 case AMDGPU::V_PK_SUB_I16:
6440 case AMDGPU::V_PK_SUB_U16:
6441 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6450 unsigned Opc =
MI.getOpcode();
6453 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6456 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6462 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6469 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6472 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6478 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6488 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6489 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6490 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6502 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6504 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6516 if (HasImplicitSGPR || !
MI.isCommutable()) {
6533 if (CommutedOpc == -1) {
6538 MI.setDesc(
get(CommutedOpc));
6542 bool Src0Kill = Src0.
isKill();
6546 else if (Src1.
isReg()) {
6561 unsigned Opc =
MI.getOpcode();
6564 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6565 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6566 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6569 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6570 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6571 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6572 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6573 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6574 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6575 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6579 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6580 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6585 if (VOP3Idx[2] != -1) {
6587 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6588 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6597 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6598 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6600 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6602 SGPRsUsed.
insert(SGPRReg);
6606 for (
int Idx : VOP3Idx) {
6615 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6627 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6634 if (ConstantBusLimit > 0) {
6646 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6647 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6653 for (
unsigned I = 0;
I < 3; ++
I) {
6666 SRC = RI.getCommonSubClass(SRC, DstRC);
6669 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6671 if (RI.hasAGPRs(VRC)) {
6672 VRC = RI.getEquivalentVGPRClass(VRC);
6673 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6675 get(TargetOpcode::COPY), NewSrcReg)
6682 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6688 for (
unsigned i = 0; i < SubRegs; ++i) {
6689 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6691 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6692 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6698 get(AMDGPU::REG_SEQUENCE), DstReg);
6699 for (
unsigned i = 0; i < SubRegs; ++i) {
6701 MIB.
addImm(RI.getSubRegFromChannel(i));
6714 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6716 SBase->setReg(SGPR);
6719 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6727 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6728 if (OldSAddrIdx < 0)
6744 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6745 if (NewVAddrIdx < 0)
6748 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6752 if (OldVAddrIdx >= 0) {
6754 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6766 if (OldVAddrIdx == NewVAddrIdx) {
6769 MRI.removeRegOperandFromUseList(&NewVAddr);
6770 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6774 MRI.removeRegOperandFromUseList(&NewVAddr);
6775 MRI.addRegOperandToUseList(&NewVAddr);
6777 assert(OldSAddrIdx == NewVAddrIdx);
6779 if (OldVAddrIdx >= 0) {
6780 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6781 AMDGPU::OpName::vdst_in);
6785 if (NewVDstIn != -1) {
6786 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6792 if (NewVDstIn != -1) {
6793 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6814 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6834 unsigned OpSubReg =
Op.getSubReg();
6837 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6843 Register DstReg =
MRI.createVirtualRegister(DstRC);
6853 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6856 bool ImpDef = Def->isImplicitDef();
6857 while (!ImpDef && Def && Def->isCopy()) {
6858 if (Def->getOperand(1).getReg().isPhysical())
6860 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6861 ImpDef = Def && Def->isImplicitDef();
6863 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6882 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6888 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6889 unsigned NumSubRegs =
RegSize / 32;
6890 Register VScalarOp = ScalarOp->getReg();
6892 if (NumSubRegs == 1) {
6893 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6895 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6898 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6900 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6906 CondReg = NewCondReg;
6908 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6916 ScalarOp->setReg(CurReg);
6917 ScalarOp->setIsKill();
6921 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6922 "Unhandled register size");
6924 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
6926 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6928 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6931 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6932 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
6935 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6936 .
addReg(VScalarOp, VScalarOpUndef,
6937 TRI->getSubRegFromChannel(Idx + 1));
6943 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6944 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6950 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6951 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6954 if (NumSubRegs <= 2)
6955 Cmp.addReg(VScalarOp);
6957 Cmp.addReg(VScalarOp, VScalarOpUndef,
6958 TRI->getSubRegFromChannel(Idx, 2));
6962 CondReg = NewCondReg;
6964 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6972 const auto *SScalarOpRC =
6973 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6974 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6978 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6979 unsigned Channel = 0;
6980 for (
Register Piece : ReadlanePieces) {
6981 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6985 ScalarOp->setReg(SScalarOp);
6986 ScalarOp->setIsKill();
6990 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6991 MRI.setSimpleHint(SaveExec, CondReg);
7022 if (!Begin.isValid())
7024 if (!End.isValid()) {
7030 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7038 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7039 std::numeric_limits<unsigned>::max()) !=
7042 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7048 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7057 for (
auto I = Begin;
I != AfterMI;
I++) {
7058 for (
auto &MO :
I->all_uses())
7059 MRI.clearKillFlags(MO.getReg());
7084 MBB.addSuccessor(LoopBB);
7094 for (
auto &Succ : RemainderBB->
successors()) {
7118static std::tuple<unsigned, unsigned>
7126 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7127 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7130 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7131 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7132 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7133 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7134 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7151 .
addImm(AMDGPU::sub0_sub1)
7157 return std::tuple(RsrcPtr, NewSRsrc);
7194 if (
MI.getOpcode() == AMDGPU::PHI) {
7196 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7197 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7200 MRI.getRegClass(
MI.getOperand(i).getReg());
7201 if (RI.hasVectorRegisters(OpRC)) {
7215 VRC = &AMDGPU::VReg_1RegClass;
7218 ? RI.getEquivalentAGPRClass(SRC)
7219 : RI.getEquivalentVGPRClass(SRC);
7222 ? RI.getEquivalentAGPRClass(VRC)
7223 : RI.getEquivalentVGPRClass(VRC);
7231 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7233 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7249 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7252 if (RI.hasVGPRs(DstRC)) {
7256 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7258 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7276 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7281 if (DstRC != Src0RC) {
7290 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7292 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7298 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7299 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7300 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7301 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7302 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7303 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7304 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7306 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7319 ? AMDGPU::OpName::rsrc
7320 : AMDGPU::OpName::srsrc;
7322 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7325 AMDGPU::OpName SampOpName =
7326 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7328 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7335 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7337 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7341 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7342 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7347 while (Start->getOpcode() != FrameSetupOpcode)
7350 while (End->getOpcode() != FrameDestroyOpcode)
7354 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7355 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7363 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7365 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7367 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7377 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7378 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7379 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7380 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7382 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7389 bool isSoffsetLegal =
true;
7391 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7392 if (SoffsetIdx != -1) {
7395 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7396 isSoffsetLegal =
false;
7400 bool isRsrcLegal =
true;
7402 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7403 if (RsrcIdx != -1) {
7406 isRsrcLegal =
false;
7410 if (isRsrcLegal && isSoffsetLegal)
7434 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7435 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7436 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7438 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7439 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7440 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7442 unsigned RsrcPtr, NewSRsrc;
7449 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7456 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7470 }
else if (!VAddr && ST.hasAddr64()) {
7474 "FIXME: Need to emit flat atomics here");
7476 unsigned RsrcPtr, NewSRsrc;
7479 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7502 MIB.
addImm(CPol->getImm());
7507 MIB.
addImm(TFE->getImm());
7527 MI.removeFromParent();
7532 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7534 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7538 if (!isSoffsetLegal) {
7550 if (!isSoffsetLegal) {
7562 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7563 if (RsrcIdx != -1) {
7564 DeferredList.insert(
MI);
7569 return DeferredList.contains(
MI);
7579 if (!ST.useRealTrue16Insts())
7582 unsigned Opcode =
MI.getOpcode();
7586 OpIdx >=
get(Opcode).getNumOperands() ||
7587 get(Opcode).operands()[
OpIdx].RegClass == -1)
7591 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7595 if (!RI.isVGPRClass(CurrRC))
7598 unsigned RCID =
get(Opcode).operands()[
OpIdx].RegClass;
7600 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7601 Op.setSubReg(AMDGPU::lo16);
7602 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7604 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7605 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7612 Op.setReg(NewDstReg);
7624 while (!Worklist.
empty()) {
7638 "Deferred MachineInstr are not supposed to re-populate worklist");
7656 case AMDGPU::S_ADD_I32:
7657 case AMDGPU::S_SUB_I32: {
7661 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7669 case AMDGPU::S_MUL_U64:
7670 if (ST.hasVectorMulU64()) {
7671 NewOpcode = AMDGPU::V_MUL_U64_e64;
7675 splitScalarSMulU64(Worklist, Inst, MDT);
7679 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7680 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7683 splitScalarSMulPseudo(Worklist, Inst, MDT);
7687 case AMDGPU::S_AND_B64:
7688 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7692 case AMDGPU::S_OR_B64:
7693 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7697 case AMDGPU::S_XOR_B64:
7698 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7702 case AMDGPU::S_NAND_B64:
7703 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7707 case AMDGPU::S_NOR_B64:
7708 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7712 case AMDGPU::S_XNOR_B64:
7713 if (ST.hasDLInsts())
7714 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7716 splitScalar64BitXnor(Worklist, Inst, MDT);
7720 case AMDGPU::S_ANDN2_B64:
7721 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7725 case AMDGPU::S_ORN2_B64:
7726 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7730 case AMDGPU::S_BREV_B64:
7731 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7735 case AMDGPU::S_NOT_B64:
7736 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7740 case AMDGPU::S_BCNT1_I32_B64:
7741 splitScalar64BitBCNT(Worklist, Inst);
7745 case AMDGPU::S_BFE_I64:
7746 splitScalar64BitBFE(Worklist, Inst);
7750 case AMDGPU::S_FLBIT_I32_B64:
7751 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7754 case AMDGPU::S_FF1_I32_B64:
7755 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7759 case AMDGPU::S_LSHL_B32:
7760 if (ST.hasOnlyRevVALUShifts()) {
7761 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7765 case AMDGPU::S_ASHR_I32:
7766 if (ST.hasOnlyRevVALUShifts()) {
7767 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7771 case AMDGPU::S_LSHR_B32:
7772 if (ST.hasOnlyRevVALUShifts()) {
7773 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7777 case AMDGPU::S_LSHL_B64:
7778 if (ST.hasOnlyRevVALUShifts()) {
7780 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7781 : AMDGPU::V_LSHLREV_B64_e64;
7785 case AMDGPU::S_ASHR_I64:
7786 if (ST.hasOnlyRevVALUShifts()) {
7787 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7791 case AMDGPU::S_LSHR_B64:
7792 if (ST.hasOnlyRevVALUShifts()) {
7793 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7798 case AMDGPU::S_ABS_I32:
7799 lowerScalarAbs(Worklist, Inst);
7803 case AMDGPU::S_CBRANCH_SCC0:
7804 case AMDGPU::S_CBRANCH_SCC1: {
7807 bool IsSCC = CondReg == AMDGPU::SCC;
7815 case AMDGPU::S_BFE_U64:
7816 case AMDGPU::S_BFM_B64:
7819 case AMDGPU::S_PACK_LL_B32_B16:
7820 case AMDGPU::S_PACK_LH_B32_B16:
7821 case AMDGPU::S_PACK_HL_B32_B16:
7822 case AMDGPU::S_PACK_HH_B32_B16:
7823 movePackToVALU(Worklist,
MRI, Inst);
7827 case AMDGPU::S_XNOR_B32:
7828 lowerScalarXnor(Worklist, Inst);
7832 case AMDGPU::S_NAND_B32:
7833 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7837 case AMDGPU::S_NOR_B32:
7838 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7842 case AMDGPU::S_ANDN2_B32:
7843 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7847 case AMDGPU::S_ORN2_B32:
7848 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7856 case AMDGPU::S_ADD_CO_PSEUDO:
7857 case AMDGPU::S_SUB_CO_PSEUDO: {
7858 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7859 ? AMDGPU::V_ADDC_U32_e64
7860 : AMDGPU::V_SUBB_U32_e64;
7861 const auto *CarryRC = RI.getWaveMaskRegClass();
7864 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7865 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7872 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
7883 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7887 case AMDGPU::S_UADDO_PSEUDO:
7888 case AMDGPU::S_USUBO_PSEUDO: {
7895 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7896 ? AMDGPU::V_ADD_CO_U32_e64
7897 : AMDGPU::V_SUB_CO_U32_e64;
7899 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
7900 Register DestReg =
MRI.createVirtualRegister(NewRC);
7908 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7915 case AMDGPU::S_CSELECT_B32:
7916 case AMDGPU::S_CSELECT_B64:
7917 lowerSelect(Worklist, Inst, MDT);
7920 case AMDGPU::S_CMP_EQ_I32:
7921 case AMDGPU::S_CMP_LG_I32:
7922 case AMDGPU::S_CMP_GT_I32:
7923 case AMDGPU::S_CMP_GE_I32:
7924 case AMDGPU::S_CMP_LT_I32:
7925 case AMDGPU::S_CMP_LE_I32:
7926 case AMDGPU::S_CMP_EQ_U32:
7927 case AMDGPU::S_CMP_LG_U32:
7928 case AMDGPU::S_CMP_GT_U32:
7929 case AMDGPU::S_CMP_GE_U32:
7930 case AMDGPU::S_CMP_LT_U32:
7931 case AMDGPU::S_CMP_LE_U32:
7932 case AMDGPU::S_CMP_EQ_U64:
7933 case AMDGPU::S_CMP_LG_U64:
7934 case AMDGPU::S_CMP_LT_F32:
7935 case AMDGPU::S_CMP_EQ_F32:
7936 case AMDGPU::S_CMP_LE_F32:
7937 case AMDGPU::S_CMP_GT_F32:
7938 case AMDGPU::S_CMP_LG_F32:
7939 case AMDGPU::S_CMP_GE_F32:
7940 case AMDGPU::S_CMP_O_F32:
7941 case AMDGPU::S_CMP_U_F32:
7942 case AMDGPU::S_CMP_NGE_F32:
7943 case AMDGPU::S_CMP_NLG_F32:
7944 case AMDGPU::S_CMP_NGT_F32:
7945 case AMDGPU::S_CMP_NLE_F32:
7946 case AMDGPU::S_CMP_NEQ_F32:
7947 case AMDGPU::S_CMP_NLT_F32: {
7948 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7952 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7966 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7970 case AMDGPU::S_CMP_LT_F16:
7971 case AMDGPU::S_CMP_EQ_F16:
7972 case AMDGPU::S_CMP_LE_F16:
7973 case AMDGPU::S_CMP_GT_F16:
7974 case AMDGPU::S_CMP_LG_F16:
7975 case AMDGPU::S_CMP_GE_F16:
7976 case AMDGPU::S_CMP_O_F16:
7977 case AMDGPU::S_CMP_U_F16:
7978 case AMDGPU::S_CMP_NGE_F16:
7979 case AMDGPU::S_CMP_NLG_F16:
7980 case AMDGPU::S_CMP_NGT_F16:
7981 case AMDGPU::S_CMP_NLE_F16:
7982 case AMDGPU::S_CMP_NEQ_F16:
7983 case AMDGPU::S_CMP_NLT_F16: {
7984 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8006 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8010 case AMDGPU::S_CVT_HI_F32_F16: {
8012 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8013 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8014 if (ST.useRealTrue16Insts()) {
8019 .
addReg(TmpReg, 0, AMDGPU::hi16)
8035 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8039 case AMDGPU::S_MINIMUM_F32:
8040 case AMDGPU::S_MAXIMUM_F32: {
8042 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8053 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8057 case AMDGPU::S_MINIMUM_F16:
8058 case AMDGPU::S_MAXIMUM_F16: {
8060 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8061 ? &AMDGPU::VGPR_16RegClass
8062 : &AMDGPU::VGPR_32RegClass);
8074 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8078 case AMDGPU::V_S_EXP_F16_e64:
8079 case AMDGPU::V_S_LOG_F16_e64:
8080 case AMDGPU::V_S_RCP_F16_e64:
8081 case AMDGPU::V_S_RSQ_F16_e64:
8082 case AMDGPU::V_S_SQRT_F16_e64: {
8084 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8085 ? &AMDGPU::VGPR_16RegClass
8086 : &AMDGPU::VGPR_32RegClass);
8098 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8104 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8112 if (NewOpcode == Opcode) {
8121 if (
MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
8123 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
8127 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8129 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8147 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8149 MRI.replaceRegWith(DstReg, NewDstReg);
8150 MRI.clearKillFlags(NewDstReg);
8164 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8168 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8169 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8170 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8172 get(AMDGPU::IMPLICIT_DEF), Undef);
8174 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8180 MRI.replaceRegWith(DstReg, NewDstReg);
8181 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8183 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8186 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8187 MRI.replaceRegWith(DstReg, NewDstReg);
8188 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8193 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8194 MRI.replaceRegWith(DstReg, NewDstReg);
8196 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8206 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8207 AMDGPU::OpName::src0_modifiers) >= 0)
8211 NewInstr->addOperand(Src);
8214 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8217 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8219 NewInstr.addImm(
Size);
8220 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8224 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8229 "Scalar BFE is only implemented for constant width and offset");
8237 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8238 AMDGPU::OpName::src1_modifiers) >= 0)
8240 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8242 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8243 AMDGPU::OpName::src2_modifiers) >= 0)
8245 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8247 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8249 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8251 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8257 NewInstr->addOperand(
Op);
8264 if (
Op.getReg() == AMDGPU::SCC) {
8266 if (
Op.isDef() && !
Op.isDead())
8267 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8269 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8274 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8275 Register DstReg = NewInstr->getOperand(0).getReg();
8280 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8281 MRI.replaceRegWith(DstReg, NewDstReg);
8290 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8294std::pair<bool, MachineBasicBlock *>
8306 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8309 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8311 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8312 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8320 MRI.replaceRegWith(OldDstReg, ResultReg);
8323 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8324 return std::pair(
true, NewBB);
8327 return std::pair(
false,
nullptr);
8344 bool IsSCC = (CondReg == AMDGPU::SCC);
8352 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8358 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8359 NewCondReg =
MRI.createVirtualRegister(TC);
8363 bool CopyFound =
false;
8364 for (MachineInstr &CandI :
8367 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8369 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8371 .
addReg(CandI.getOperand(1).getReg());
8383 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8391 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8392 MachineInstr *NewInst;
8393 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8394 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8407 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8409 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8421 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8422 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8424 unsigned SubOp = ST.hasAddNoCarry() ?
8425 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8435 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8436 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8450 if (ST.hasDLInsts()) {
8451 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8459 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8460 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8466 bool Src0IsSGPR = Src0.
isReg() &&
8467 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8468 bool Src1IsSGPR = Src1.
isReg() &&
8469 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8471 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8472 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8482 }
else if (Src1IsSGPR) {
8496 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8500 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8506 unsigned Opcode)
const {
8516 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8517 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8529 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8530 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8535 unsigned Opcode)
const {
8545 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8546 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8558 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8559 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8574 const MCInstrDesc &InstDesc =
get(Opcode);
8575 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8577 &AMDGPU::SGPR_32RegClass;
8579 const TargetRegisterClass *Src0SubRC =
8580 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8583 AMDGPU::sub0, Src0SubRC);
8585 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8586 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8587 const TargetRegisterClass *NewDestSubRC =
8588 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8590 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8591 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8594 AMDGPU::sub1, Src0SubRC);
8596 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8597 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8602 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8609 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8611 Worklist.
insert(&LoHalf);
8612 Worklist.
insert(&HiHalf);
8618 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8629 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8630 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8631 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8639 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8640 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8641 const TargetRegisterClass *Src0SubRC =
8642 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8643 if (RI.isSGPRClass(Src0SubRC))
8644 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8645 const TargetRegisterClass *Src1SubRC =
8646 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8647 if (RI.isSGPRClass(Src1SubRC))
8648 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8652 MachineOperand Op0L =
8654 MachineOperand Op1L =
8656 MachineOperand Op0H =
8658 MachineOperand Op1H =
8676 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8677 MachineInstr *Op1L_Op0H =
8682 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8683 MachineInstr *Op1H_Op0L =
8688 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8689 MachineInstr *Carry =
8694 MachineInstr *LoHalf =
8699 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8704 MachineInstr *HiHalf =
8715 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8727 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8738 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8739 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8740 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8748 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8749 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8750 const TargetRegisterClass *Src0SubRC =
8751 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8752 if (RI.isSGPRClass(Src0SubRC))
8753 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8754 const TargetRegisterClass *Src1SubRC =
8755 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8756 if (RI.isSGPRClass(Src1SubRC))
8757 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8761 MachineOperand Op0L =
8763 MachineOperand Op1L =
8767 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8768 ? AMDGPU::V_MUL_HI_U32_e64
8769 : AMDGPU::V_MUL_HI_I32_e64;
8770 MachineInstr *HiHalf =
8773 MachineInstr *LoHalf =
8784 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8792 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8808 const MCInstrDesc &InstDesc =
get(Opcode);
8809 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8811 &AMDGPU::SGPR_32RegClass;
8813 const TargetRegisterClass *Src0SubRC =
8814 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8815 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8817 &AMDGPU::SGPR_32RegClass;
8819 const TargetRegisterClass *Src1SubRC =
8820 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8823 AMDGPU::sub0, Src0SubRC);
8825 AMDGPU::sub0, Src1SubRC);
8827 AMDGPU::sub1, Src0SubRC);
8829 AMDGPU::sub1, Src1SubRC);
8831 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8832 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8833 const TargetRegisterClass *NewDestSubRC =
8834 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8836 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8837 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
8841 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8842 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
8846 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8853 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8855 Worklist.
insert(&LoHalf);
8856 Worklist.
insert(&HiHalf);
8859 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8875 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8877 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8879 MachineOperand* Op0;
8880 MachineOperand* Op1;
8893 Register NewDest =
MRI.createVirtualRegister(DestRC);
8899 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8915 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
8916 const TargetRegisterClass *SrcRC = Src.isReg() ?
8917 MRI.getRegClass(Src.getReg()) :
8918 &AMDGPU::SGPR_32RegClass;
8920 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8921 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8923 const TargetRegisterClass *SrcSubRC =
8924 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8927 AMDGPU::sub0, SrcSubRC);
8929 AMDGPU::sub1, SrcSubRC);
8935 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8939 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8958 Offset == 0 &&
"Not implemented");
8961 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8962 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8963 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8980 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8981 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8986 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8987 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8991 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8994 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8999 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9000 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9019 const MCInstrDesc &InstDesc =
get(Opcode);
9021 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9022 unsigned OpcodeAdd =
9023 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
9025 const TargetRegisterClass *SrcRC =
9026 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9027 const TargetRegisterClass *SrcSubRC =
9028 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9030 MachineOperand SrcRegSub0 =
9032 MachineOperand SrcRegSub1 =
9035 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9036 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9037 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9038 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9045 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9051 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9053 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9055 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9058void SIInstrInfo::addUsersToMoveToVALUWorklist(
9062 MachineInstr &
UseMI = *MO.getParent();
9066 switch (
UseMI.getOpcode()) {
9069 case AMDGPU::SOFT_WQM:
9070 case AMDGPU::STRICT_WWM:
9071 case AMDGPU::STRICT_WQM:
9072 case AMDGPU::REG_SEQUENCE:
9074 case AMDGPU::INSERT_SUBREG:
9077 OpNo = MO.getOperandNo();
9082 MRI.constrainRegClass(DstReg, OpRC);
9084 if (!RI.hasVectorRegisters(OpRC))
9095 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9102 case AMDGPU::S_PACK_LL_B32_B16: {
9103 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9104 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9121 case AMDGPU::S_PACK_LH_B32_B16: {
9122 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9131 case AMDGPU::S_PACK_HL_B32_B16: {
9132 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9142 case AMDGPU::S_PACK_HH_B32_B16: {
9143 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9144 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9161 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9162 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9171 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9172 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9173 SmallVector<MachineInstr *, 4> CopyToDelete;
9176 for (MachineInstr &
MI :
9180 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9183 MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9184 Register DestReg =
MI.getOperand(0).getReg();
9186 MRI.replaceRegWith(DestReg, NewCond);
9191 MI.getOperand(SCCIdx).setReg(NewCond);
9197 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9200 for (
auto &Copy : CopyToDelete)
9201 Copy->eraseFromParent();
9209void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9215 for (MachineInstr &
MI :
9218 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9220 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9229 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9237 case AMDGPU::REG_SEQUENCE:
9238 case AMDGPU::INSERT_SUBREG:
9240 case AMDGPU::SOFT_WQM:
9241 case AMDGPU::STRICT_WWM:
9242 case AMDGPU::STRICT_WQM: {
9244 if (RI.isAGPRClass(SrcRC)) {
9245 if (RI.isAGPRClass(NewDstRC))
9250 case AMDGPU::REG_SEQUENCE:
9251 case AMDGPU::INSERT_SUBREG:
9252 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9255 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9261 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9264 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9278 int OpIndices[3])
const {
9279 const MCInstrDesc &
Desc =
MI.getDesc();
9295 const MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9297 for (
unsigned i = 0; i < 3; ++i) {
9298 int Idx = OpIndices[i];
9302 const MachineOperand &MO =
MI.getOperand(Idx);
9308 const TargetRegisterClass *OpRC =
9309 RI.getRegClass(
Desc.operands()[Idx].RegClass);
9310 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9316 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9317 if (RI.isSGPRClass(RegRC))
9335 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9336 SGPRReg = UsedSGPRs[0];
9339 if (!SGPRReg && UsedSGPRs[1]) {
9340 if (UsedSGPRs[1] == UsedSGPRs[2])
9341 SGPRReg = UsedSGPRs[1];
9348 AMDGPU::OpName OperandName)
const {
9349 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9352 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9356 return &
MI.getOperand(Idx);
9370 if (ST.isAmdHsaOS()) {
9373 RsrcDataFormat |= (1ULL << 56);
9378 RsrcDataFormat |= (2ULL << 59);
9381 return RsrcDataFormat;
9391 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9396 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9403 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9409 unsigned Opc =
MI.getOpcode();
9415 return get(
Opc).mayLoad() &&
9420 int &FrameIndex)
const {
9422 if (!Addr || !Addr->
isFI())
9433 int &FrameIndex)
const {
9441 int &FrameIndex)
const {
9455 int &FrameIndex)
const {
9472 while (++
I != E &&
I->isInsideBundle()) {
9473 assert(!
I->isBundle() &&
"No nested bundle!");
9481 unsigned Opc =
MI.getOpcode();
9483 unsigned DescSize =
Desc.getSize();
9488 unsigned Size = DescSize;
9492 if (
MI.isBranch() && ST.hasOffset3fBug())
9503 bool HasLiteral =
false;
9504 unsigned LiteralSize = 4;
9505 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9510 if (ST.has64BitLiterals()) {
9511 switch (OpInfo.OperandType) {
9527 return HasLiteral ? DescSize + LiteralSize : DescSize;
9532 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9536 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9537 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9541 case TargetOpcode::BUNDLE:
9543 case TargetOpcode::INLINEASM:
9544 case TargetOpcode::INLINEASM_BR: {
9546 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9550 if (
MI.isMetaInstruction())
9554 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9557 unsigned LoInstOpcode = D16Info->LoOp;
9559 DescSize =
Desc.getSize();
9563 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9566 DescSize =
Desc.getSize();
9577 if (
MI.memoperands_empty())
9589 static const std::pair<int, const char *> TargetIndices[] = {
9627std::pair<unsigned, unsigned>
9634 static const std::pair<unsigned, const char *> TargetFlags[] = {
9652 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9667 return AMDGPU::WWM_COPY;
9669 return AMDGPU::COPY;
9681 bool IsNullOrVectorRegister =
true;
9684 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9689 return IsNullOrVectorRegister &&
9691 (Opcode == AMDGPU::IMPLICIT_DEF &&
9693 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9694 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9702 if (ST.hasAddNoCarry())
9706 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9707 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9718 if (ST.hasAddNoCarry())
9725 *RI.getBoolRC(),
I,
false,
9738 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9739 case AMDGPU::SI_KILL_I1_TERMINATOR:
9748 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9749 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9750 case AMDGPU::SI_KILL_I1_PSEUDO:
9751 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9763 const unsigned OffsetBits =
9765 return (1 << OffsetBits) - 1;
9772 if (
MI.isInlineAsm())
9775 for (
auto &
Op :
MI.implicit_operands()) {
9776 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9777 Op.setReg(AMDGPU::VCC_LO);
9786 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9790 const auto RCID =
MI.getDesc().operands()[Idx].RegClass;
9791 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
9808 if (Imm <= MaxImm + 64) {
9810 Overflow = Imm - MaxImm;
9837 if (ST.hasRestrictedSOffset())
9880 if (!ST.hasFlatInstOffsets())
9888 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9900std::pair<int64_t, int64_t>
9903 int64_t RemainderOffset = COffsetVal;
9904 int64_t ImmField = 0;
9909 if (AllowNegative) {
9911 int64_t
D = 1LL << NumBits;
9912 RemainderOffset = (COffsetVal /
D) *
D;
9913 ImmField = COffsetVal - RemainderOffset;
9915 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9917 (ImmField % 4) != 0) {
9919 RemainderOffset += ImmField % 4;
9920 ImmField -= ImmField % 4;
9922 }
else if (COffsetVal >= 0) {
9924 RemainderOffset = COffsetVal - ImmField;
9928 assert(RemainderOffset + ImmField == COffsetVal);
9929 return {ImmField, RemainderOffset};
9933 if (ST.hasNegativeScratchOffsetBug() &&
9941 switch (ST.getGeneration()) {
9967 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9968 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9969 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9970 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9971 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9972 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9973 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9974 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9981#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
9982 case OPCODE##_dpp: \
9983 case OPCODE##_e32: \
9984 case OPCODE##_e64: \
9985 case OPCODE##_e64_dpp: \
10000 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10001 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10002 case AMDGPU::V_FMA_F16_gfx9_e64:
10003 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10004 case AMDGPU::V_INTERP_P2_F16:
10005 case AMDGPU::V_MAD_F16_e64:
10006 case AMDGPU::V_MAD_U16_e64:
10007 case AMDGPU::V_MAD_I16_e64:
10029 switch (ST.getGeneration()) {
10042 if (
isMAI(Opcode)) {
10050 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10057 if (ST.hasGFX90AInsts()) {
10059 if (ST.hasGFX940Insts())
10090 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10091 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10092 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10104 switch (
MI.getOpcode()) {
10106 case AMDGPU::REG_SEQUENCE:
10110 case AMDGPU::INSERT_SUBREG:
10111 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10128 if (!
P.Reg.isVirtual())
10132 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10133 while (
auto *
MI = DefInst) {
10135 switch (
MI->getOpcode()) {
10137 case AMDGPU::V_MOV_B32_e32: {
10138 auto &Op1 =
MI->getOperand(1);
10143 DefInst =
MRI.getVRegDef(RSR.Reg);
10151 DefInst =
MRI.getVRegDef(RSR.Reg);
10164 assert(
MRI.isSSA() &&
"Must be run on SSA");
10166 auto *
TRI =
MRI.getTargetRegisterInfo();
10167 auto *DefBB =
DefMI.getParent();
10171 if (
UseMI.getParent() != DefBB)
10174 const int MaxInstScan = 20;
10178 auto E =
UseMI.getIterator();
10179 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10180 if (
I->isDebugInstr())
10183 if (++NumInst > MaxInstScan)
10186 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10196 assert(
MRI.isSSA() &&
"Must be run on SSA");
10198 auto *
TRI =
MRI.getTargetRegisterInfo();
10199 auto *DefBB =
DefMI.getParent();
10201 const int MaxUseScan = 10;
10204 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10205 auto &UseInst = *
Use.getParent();
10208 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10211 if (++NumUse > MaxUseScan)
10218 const int MaxInstScan = 20;
10222 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10225 if (
I->isDebugInstr())
10228 if (++NumInst > MaxInstScan)
10241 if (Reg == VReg && --NumUse == 0)
10243 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10252 auto Cur =
MBB.begin();
10253 if (Cur !=
MBB.end())
10255 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10258 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10267 if (InsPt !=
MBB.end() &&
10268 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10269 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10270 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10271 InsPt->definesRegister(Src,
nullptr)) {
10275 .
addReg(Src, 0, SrcSubReg)
10300 if (isFullCopyInstr(
MI)) {
10301 Register DstReg =
MI.getOperand(0).getReg();
10302 Register SrcReg =
MI.getOperand(1).getReg();
10309 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10313 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10324 unsigned *PredCost)
const {
10325 if (
MI.isBundle()) {
10328 unsigned Lat = 0,
Count = 0;
10329 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10331 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10333 return Lat +
Count - 1;
10336 return SchedModel.computeInstrLatency(&
MI);
10342 unsigned Opcode =
MI.getOpcode();
10347 :
MI.getOperand(1).getReg();
10348 LLT DstTy =
MRI.getType(Dst);
10349 LLT SrcTy =
MRI.getType(Src);
10351 unsigned SrcAS = SrcTy.getAddressSpace();
10354 ST.hasGloballyAddressableScratch()
10362 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10363 return HandleAddrSpaceCast(
MI);
10366 auto IID = GI->getIntrinsicID();
10373 case Intrinsic::amdgcn_addrspacecast_nonnull:
10374 return HandleAddrSpaceCast(
MI);
10375 case Intrinsic::amdgcn_if:
10376 case Intrinsic::amdgcn_else:
10390 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10391 Opcode == AMDGPU::G_SEXTLOAD) {
10392 if (
MI.memoperands_empty())
10396 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10397 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10405 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10406 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10407 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10420 unsigned opcode =
MI.getOpcode();
10421 if (opcode == AMDGPU::V_READLANE_B32 ||
10422 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10423 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10426 if (isCopyInstr(
MI)) {
10430 RI.getPhysRegBaseClass(srcOp.
getReg());
10438 if (
MI.isPreISelOpcode())
10453 if (
MI.memoperands_empty())
10457 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10458 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10473 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10475 if (!
SrcOp.isReg())
10479 if (!Reg || !
SrcOp.readsReg())
10485 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10512 F,
"ds_ordered_count unsupported for this calling conv"));
10526 Register &SrcReg2, int64_t &CmpMask,
10527 int64_t &CmpValue)
const {
10528 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10531 switch (
MI.getOpcode()) {
10534 case AMDGPU::S_CMP_EQ_U32:
10535 case AMDGPU::S_CMP_EQ_I32:
10536 case AMDGPU::S_CMP_LG_U32:
10537 case AMDGPU::S_CMP_LG_I32:
10538 case AMDGPU::S_CMP_LT_U32:
10539 case AMDGPU::S_CMP_LT_I32:
10540 case AMDGPU::S_CMP_GT_U32:
10541 case AMDGPU::S_CMP_GT_I32:
10542 case AMDGPU::S_CMP_LE_U32:
10543 case AMDGPU::S_CMP_LE_I32:
10544 case AMDGPU::S_CMP_GE_U32:
10545 case AMDGPU::S_CMP_GE_I32:
10546 case AMDGPU::S_CMP_EQ_U64:
10547 case AMDGPU::S_CMP_LG_U64:
10548 SrcReg =
MI.getOperand(0).getReg();
10549 if (
MI.getOperand(1).isReg()) {
10550 if (
MI.getOperand(1).getSubReg())
10552 SrcReg2 =
MI.getOperand(1).getReg();
10554 }
else if (
MI.getOperand(1).isImm()) {
10556 CmpValue =
MI.getOperand(1).getImm();
10562 case AMDGPU::S_CMPK_EQ_U32:
10563 case AMDGPU::S_CMPK_EQ_I32:
10564 case AMDGPU::S_CMPK_LG_U32:
10565 case AMDGPU::S_CMPK_LG_I32:
10566 case AMDGPU::S_CMPK_LT_U32:
10567 case AMDGPU::S_CMPK_LT_I32:
10568 case AMDGPU::S_CMPK_GT_U32:
10569 case AMDGPU::S_CMPK_GT_I32:
10570 case AMDGPU::S_CMPK_LE_U32:
10571 case AMDGPU::S_CMPK_LE_I32:
10572 case AMDGPU::S_CMPK_GE_U32:
10573 case AMDGPU::S_CMPK_GE_I32:
10574 SrcReg =
MI.getOperand(0).getReg();
10576 CmpValue =
MI.getOperand(1).getImm();
10585 Register SrcReg2, int64_t CmpMask,
10594 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10595 this](int64_t ExpectedValue,
unsigned SrcSize,
10596 bool IsReversible,
bool IsSigned) ->
bool {
10621 if (!Def || Def->getParent() != CmpInstr.
getParent())
10624 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10625 Def->getOpcode() != AMDGPU::S_AND_B64)
10629 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10640 SrcOp = &Def->getOperand(2);
10641 else if (isMask(&Def->getOperand(2)))
10642 SrcOp = &Def->getOperand(1);
10650 if (IsSigned && BitNo == SrcSize - 1)
10653 ExpectedValue <<= BitNo;
10655 bool IsReversedCC =
false;
10656 if (CmpValue != ExpectedValue) {
10659 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10664 Register DefReg = Def->getOperand(0).getReg();
10665 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10668 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
10670 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
10671 I->killsRegister(AMDGPU::SCC, &RI))
10676 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10680 if (!
MRI->use_nodbg_empty(DefReg)) {
10688 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10689 : AMDGPU::S_BITCMP1_B32
10690 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10691 : AMDGPU::S_BITCMP1_B64;
10696 Def->eraseFromParent();
10704 case AMDGPU::S_CMP_EQ_U32:
10705 case AMDGPU::S_CMP_EQ_I32:
10706 case AMDGPU::S_CMPK_EQ_U32:
10707 case AMDGPU::S_CMPK_EQ_I32:
10708 return optimizeCmpAnd(1, 32,
true,
false);
10709 case AMDGPU::S_CMP_GE_U32:
10710 case AMDGPU::S_CMPK_GE_U32:
10711 return optimizeCmpAnd(1, 32,
false,
false);
10712 case AMDGPU::S_CMP_GE_I32:
10713 case AMDGPU::S_CMPK_GE_I32:
10714 return optimizeCmpAnd(1, 32,
false,
true);
10715 case AMDGPU::S_CMP_EQ_U64:
10716 return optimizeCmpAnd(1, 64,
true,
false);
10717 case AMDGPU::S_CMP_LG_U32:
10718 case AMDGPU::S_CMP_LG_I32:
10719 case AMDGPU::S_CMPK_LG_U32:
10720 case AMDGPU::S_CMPK_LG_I32:
10721 return optimizeCmpAnd(0, 32,
true,
false);
10722 case AMDGPU::S_CMP_GT_U32:
10723 case AMDGPU::S_CMPK_GT_U32:
10724 return optimizeCmpAnd(0, 32,
false,
false);
10725 case AMDGPU::S_CMP_GT_I32:
10726 case AMDGPU::S_CMPK_GT_I32:
10727 return optimizeCmpAnd(0, 32,
false,
true);
10728 case AMDGPU::S_CMP_LG_U64:
10729 return optimizeCmpAnd(0, 64,
true,
false);
10736 AMDGPU::OpName
OpName)
const {
10737 if (!ST.needsAlignedVGPRs())
10740 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
10752 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
10754 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10757 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10758 : &AMDGPU::VReg_64_Align2RegClass);
10760 .
addReg(DataReg, 0,
Op.getSubReg())
10765 Op.setSubReg(AMDGPU::sub0);
10787 unsigned Opcode =
MI.getOpcode();
10793 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
10794 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
10797 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MCInstrDesc &TID, unsigned RCID)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI) const override
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.