25#include "llvm/IR/IntrinsicsAMDGPU.h"
27#define DEBUG_TYPE "amdgpu-regbanklegalize"
35 : ST(B.getMF().getSubtarget<
GCNSubtarget>()), B(B), MRI(*B.getMRI()),
36 MUI(MUI), RBI(RBI), RBLRules(RBLRules), IsWave32(ST.isWave32()),
37 SgprRB(&RBI.getRegBank(
AMDGPU::SGPRRegBankID)),
38 VgprRB(&RBI.getRegBank(
AMDGPU::VGPRRegBankID)),
39 VccRB(&RBI.getRegBank(
AMDGPU::VCCRegBankID)) {}
48 B.setInsertPt(*
MI.getParent(), std::next(
MI.getIterator()));
56 lower(
MI, Mapping, WaterfallSgprs);
59bool RegBankLegalizeHelper::executeInWaterfallLoop(
71 unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
73 MovExecOpc = AMDGPU::S_MOV_B32;
74 MovExecTermOpc = AMDGPU::S_MOV_B32_term;
75 XorTermOpc = AMDGPU::S_XOR_B32_term;
76 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
77 ExecReg = AMDGPU::EXEC_LO;
79 MovExecOpc = AMDGPU::S_MOV_B64;
80 MovExecTermOpc = AMDGPU::S_MOV_B64_term;
81 XorTermOpc = AMDGPU::S_XOR_B64_term;
82 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
83 ExecReg = AMDGPU::EXEC;
87 const int OrigRangeSize = std::distance(
Range.begin(),
Range.end());
91 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
92 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
95 B.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(InitSaveExecReg);
97 Register SavedExec =
MRI.createVirtualRegister(WaveRC);
121 MBB.addSuccessor(LoopBB);
124 B.setInsertPt(*LoopBB, LoopBB->
end());
175 auto NewEnd = BodyBB->
end();
176 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
184 if (!SGPROperandRegs.
count(OldReg))
189 auto OldVal = WaterfalledRegMap.
find(OldReg);
190 if (OldVal != WaterfalledRegMap.
end()) {
191 Op.setReg(OldVal->second);
196 LLT OpTy = MRI.getType(OpReg);
199 assert(MRI.getRegBank(OpReg) == VgprRB);
200 Register CurrentLaneReg = MRI.createVirtualRegister({SgprRB, OpTy});
205 unsigned PartSize = (OpSize % 64 == 0) ? 64 : 32;
207 unsigned NumParts = OpSize / PartSize;
213 CurrentLaneParts.
push_back(CurrentLaneReg);
215 auto UnmergeOp = B.buildUnmerge({VgprRB, PartTy}, OpReg);
216 auto UnmergeCurrLane = B.buildUnmerge({SgprRB, PartTy}, CurrentLaneReg);
217 for (
unsigned i = 0; i < NumParts; ++i) {
219 CurrentLaneParts.
push_back(UnmergeCurrLane.getReg(i));
223 for (
unsigned i = 0; i < NumParts; ++i) {
224 Register CmpReg = MRI.createVirtualRegister(VccRB_S1);
230 CondReg = B.buildAnd(VccRB_S1, CondReg, CmpReg).getReg(0);
233 Op.setReg(CurrentLaneReg);
236 WaterfalledRegMap.
insert(std::pair(OldReg,
Op.getReg()));
242 MRI.createVirtualRegister({WaveRC,
LLT::scalar(IsWave32 ? 32 : 64)});
243 B.buildIntrinsic(Intrinsic::amdgcn_ballot, CondRegLM).addReg(CondReg);
246 B.buildInstr(AndSaveExecOpc)
249 MRI.setSimpleHint(SavedExec, CondRegLM);
251 B.setInsertPt(*BodyBB, BodyBB->
end());
254 B.buildInstr(XorTermOpc).addDef(ExecReg).addReg(ExecReg).addReg(SavedExec);
260 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
264 B.buildInstr(MovExecOpc).addDef(SaveExecReg).addReg(ExecReg);
267 B.setInsertPt(*RestoreExecBB, RestoreExecBB->
begin());
268 B.buildInstr(MovExecTermOpc).addDef(ExecReg).addReg(SaveExecReg);
272 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
277void RegBankLegalizeHelper::splitLoad(MachineInstr &
MI,
279 MachineFunction &MF = B.getMF();
280 assert(
MI.getNumMemOperands() == 1);
281 MachineMemOperand &BaseMMO = **
MI.memoperands_begin();
283 const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
285 LLT PtrTy = MRI.getType(
Base);
286 const RegisterBank *PtrRB = MRI.getRegBankOrNull(
Base);
290 unsigned ByteOffset = 0;
291 for (LLT PartTy : LLTBreakdown) {
293 if (ByteOffset == 0) {
294 BasePlusOffset =
Base;
296 auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
301 auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
302 LoadPartRegs.
push_back(LoadPart.getReg(0));
308 B.buildMergeLikeInstr(Dst, LoadPartRegs);
314 if (MRI.getType(
Reg) == MergeTy) {
317 auto Unmerge = B.buildUnmerge({DstRB, MergeTy},
Reg);
318 for (
unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i)
319 MergeTyParts.
push_back(Unmerge.getReg(i));
322 B.buildMergeLikeInstr(Dst, MergeTyParts);
324 MI.eraseFromParent();
327void RegBankLegalizeHelper::widenLoad(MachineInstr &
MI, LLT WideTy,
329 MachineFunction &MF = B.getMF();
330 assert(
MI.getNumMemOperands() == 1);
331 MachineMemOperand &BaseMMO = **
MI.memoperands_begin();
333 const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
337 auto WideLoad = B.buildLoad({DstRB, WideTy},
Base, *WideMMO);
340 B.buildTrunc(Dst, WideLoad);
343 auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, WideLoad);
345 LLT DstTy = MRI.getType(Dst);
347 for (
unsigned i = 0; i < NumElts; ++i) {
348 MergeTyParts.
push_back(Unmerge.getReg(i));
350 B.buildMergeLikeInstr(Dst, MergeTyParts);
352 MI.eraseFromParent();
355void RegBankLegalizeHelper::widenMMOToS32(GAnyLoad &
MI)
const {
358 MachineMemOperand &MMO =
MI.getMMO();
361 MachineMemOperand *WideMMO = B.getMF().getMachineMemOperand(&MMO, 0, S32);
363 if (
MI.getOpcode() == G_LOAD) {
364 B.buildLoad(Dst,
Ptr, *WideMMO);
366 auto Load = B.buildLoad(SgprRB_S32,
Ptr, *WideMMO);
368 if (
MI.getOpcode() == G_ZEXTLOAD) {
370 auto MaskCst = B.buildConstant(SgprRB_S32, Mask);
371 B.buildAnd(Dst, Load, MaskCst);
373 assert(
MI.getOpcode() == G_SEXTLOAD);
374 B.buildSExtInReg(Dst, Load, MemSize);
378 MI.eraseFromParent();
381void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &
MI) {
383 LLT Ty = MRI.getType(Dst);
385 unsigned Opc =
MI.getOpcode();
386 int TrueExtCst =
Opc == G_SEXT ? -1 : 1;
387 if (Ty == S32 || Ty == S16) {
388 auto True = B.buildConstant({VgprRB, Ty}, TrueExtCst);
389 auto False = B.buildConstant({VgprRB, Ty}, 0);
390 B.buildSelect(Dst, Src, True, False);
391 }
else if (Ty == S64) {
392 auto True = B.buildConstant({VgprRB_S32}, TrueExtCst);
393 auto False = B.buildConstant({VgprRB_S32}, 0);
394 auto Lo = B.buildSelect({VgprRB_S32}, Src, True, False);
395 MachineInstrBuilder
Hi;
404 Hi = B.buildUndef({VgprRB_S32});
410 B.buildMergeValues(Dst, {
Lo.getReg(0),
Hi.getReg(0)});
415 MI.eraseFromParent();
418std::pair<Register, Register> RegBankLegalizeHelper::unpackZExt(
Register Reg) {
419 auto PackedS32 = B.buildBitcast(SgprRB_S32,
Reg);
420 auto Mask = B.buildConstant(SgprRB_S32, 0x0000ffff);
421 auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
422 auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
423 return {
Lo.getReg(0),
Hi.getReg(0)};
426std::pair<Register, Register> RegBankLegalizeHelper::unpackSExt(
Register Reg) {
427 auto PackedS32 = B.buildBitcast(SgprRB_S32,
Reg);
428 auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
429 auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
430 return {
Lo.getReg(0),
Hi.getReg(0)};
433std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(
Register Reg) {
434 auto PackedS32 = B.buildBitcast(SgprRB_S32,
Reg);
436 auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
437 return {
Lo.getReg(0),
Hi.getReg(0)};
440void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &
MI) {
442 switch (
MI.getOpcode()) {
443 case AMDGPU::G_SHL: {
444 auto [Val0, Val1] = unpackAExt(
MI.getOperand(1).getReg());
445 auto [Amt0, Amt1] = unpackAExt(
MI.getOperand(2).getReg());
446 Lo = B.buildInstr(
MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
447 Hi = B.buildInstr(
MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
450 case AMDGPU::G_LSHR: {
451 auto [Val0, Val1] = unpackZExt(
MI.getOperand(1).getReg());
452 auto [Amt0, Amt1] = unpackZExt(
MI.getOperand(2).getReg());
453 Lo = B.buildInstr(
MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
454 Hi = B.buildInstr(
MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
457 case AMDGPU::G_ASHR: {
458 auto [Val0, Val1] = unpackSExt(
MI.getOperand(1).getReg());
459 auto [Amt0, Amt1] = unpackSExt(
MI.getOperand(2).getReg());
460 Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
461 Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
467 B.buildBuildVectorTrunc(
MI.getOperand(0).getReg(), {Lo, Hi});
468 MI.eraseFromParent();
473 return (GI->is(Intrinsic::amdgcn_sbfe));
475 return MI.getOpcode() == AMDGPU::G_SBFX;
478void RegBankLegalizeHelper::lowerV_BFE(MachineInstr &
MI) {
485 Register Src =
MI.getOperand(FirstOpnd).getReg();
486 Register LSBit =
MI.getOperand(FirstOpnd + 1).getReg();
487 Register Width =
MI.getOperand(FirstOpnd + 2).getReg();
492 unsigned SHROpc =
Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
493 auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
501 auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
502 auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
503 B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
504 MI.eraseFromParent();
508 uint64_t WidthImm = ConstWidth->Value.getZExtValue();
509 auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
510 Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
511 Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
512 auto Zero = B.buildConstant({VgprRB, S32}, 0);
513 unsigned BFXOpc =
Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
515 if (WidthImm <= 32) {
517 auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo,
Zero, Width});
518 MachineInstrBuilder
Hi;
521 Hi = B.buildAShr(VgprRB_S32,
Lo, B.buildConstant(VgprRB_S32, 31));
526 B.buildMergeLikeInstr(Dst, {
Lo,
Hi});
528 auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
530 auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi,
Zero, Amt});
531 B.buildMergeLikeInstr(Dst, {SHRSrcLo,
Hi});
534 MI.eraseFromParent();
537void RegBankLegalizeHelper::lowerS_BFE(MachineInstr &
MI) {
539 LLT Ty = MRI.getType(DstReg);
542 Register Src =
MI.getOperand(FirstOpnd).getReg();
543 Register LSBit =
MI.getOperand(FirstOpnd + 1).getReg();
544 Register Width =
MI.getOperand(FirstOpnd + 2).getReg();
551 auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
552 auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
553 auto Src1 = B.buildOr(SgprRB_S32, FieldOffset,
Size);
554 unsigned Opc32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
555 unsigned Opc64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
556 unsigned Opc = Ty == S32 ? Opc32 : Opc64;
560 auto S_BFE = B.buildInstr(
Opc, {{SgprRB, Ty}},
561 {B.buildCopy(Ty, Src), B.buildCopy(S32, Src1)});
563 *ST.getRegisterInfo(), RBI))
566 B.buildCopy(DstReg,
S_BFE->getOperand(0).getReg());
567 MI.eraseFromParent();
570void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &
MI) {
572 LLT DstTy = MRI.getType(Dst);
573 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64);
574 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
575 auto Op1 = B.buildUnmerge({VgprRB, Ty},
MI.getOperand(1).
getReg());
576 auto Op2 = B.buildUnmerge({VgprRB, Ty},
MI.getOperand(2).
getReg());
577 unsigned Opc =
MI.getOpcode();
580 B.buildInstr(
Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)},
Flags);
582 B.buildInstr(
Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)},
Flags);
583 B.buildMergeLikeInstr(Dst, {
Lo,
Hi});
584 MI.eraseFromParent();
587void RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &
MI) {
589 LLT DstTy = MRI.getType(Dst);
590 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64 ||
592 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
593 auto Op2 = B.buildUnmerge({VgprRB, Ty},
MI.getOperand(2).
getReg());
594 auto Op3 = B.buildUnmerge({VgprRB, Ty},
MI.getOperand(3).
getReg());
598 B.buildSelect({VgprRB, Ty},
Cond, Op2.getReg(0), Op3.getReg(0), Flags);
600 B.buildSelect({VgprRB, Ty},
Cond, Op2.getReg(1), Op3.getReg(1), Flags);
602 B.buildMergeLikeInstr(Dst, {
Lo,
Hi});
603 MI.eraseFromParent();
606void RegBankLegalizeHelper::lowerSplitTo32SExtInReg(MachineInstr &
MI) {
607 auto Op1 = B.buildUnmerge(VgprRB_S32,
MI.getOperand(1).getReg());
608 int Amt =
MI.getOperand(2).getImm();
612 auto Freeze = B.buildFreeze(VgprRB_S32, Op1.getReg(0));
615 Lo = Freeze.getReg(0);
618 Lo = B.buildSExtInReg(VgprRB_S32, Freeze, Amt).getReg(0);
621 auto SignExtCst = B.buildConstant(SgprRB_S32, 31);
622 Hi = B.buildAShr(VgprRB_S32,
Lo, SignExtCst).getReg(0);
626 Hi = B.buildSExtInReg(VgprRB_S32, Op1.getReg(1), Amt - 32).getReg(0);
629 B.buildMergeLikeInstr(
MI.getOperand(0).getReg(), {Lo, Hi});
630 MI.eraseFromParent();
633void RegBankLegalizeHelper::lower(MachineInstr &
MI,
635 SmallSet<Register, 4> &WaterfallSgprs) {
641 return lowerVccExtToSel(
MI);
643 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
644 auto True = B.buildConstant({SgprRB, Ty},
645 MI.getOpcode() == AMDGPU::G_SEXT ? -1 : 1);
646 auto False = B.buildConstant({SgprRB, Ty}, 0);
650 B.buildSelect(
MI.getOperand(0).getReg(),
MI.getOperand(1).getReg(), True,
652 MI.eraseFromParent();
656 return lowerUnpackBitShift(
MI);
658 const RegisterBank *RB = MRI.getRegBank(
MI.getOperand(0).getReg());
659 MachineInstrBuilder
Hi;
660 switch (
MI.getOpcode()) {
661 case AMDGPU::G_ZEXT: {
662 Hi = B.buildConstant({RB, S32}, 0);
665 case AMDGPU::G_SEXT: {
667 auto ShiftAmt = B.buildConstant({RB, S32}, 31);
668 Hi = B.buildAShr({RB, S32},
MI.getOperand(1).
getReg(), ShiftAmt);
671 case AMDGPU::G_ANYEXT: {
672 Hi = B.buildUndef({RB, S32});
679 B.buildMergeLikeInstr(
MI.getOperand(0).getReg(),
680 {MI.getOperand(1).getReg(), Hi});
681 MI.eraseFromParent();
685 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
686 B.buildConstant(
MI.getOperand(0).getReg(), ConstVal);
688 MI.eraseFromParent();
693 LLT Ty = MRI.getType(Src);
697 Register BoolSrc = MRI.createVirtualRegister({VgprRB, Ty});
699 auto Src64 = B.buildUnmerge(VgprRB_S32, Src);
700 auto One = B.buildConstant(VgprRB_S32, 1);
701 auto AndLo = B.buildAnd(VgprRB_S32, Src64.getReg(0), One);
702 auto Zero = B.buildConstant(VgprRB_S32, 0);
703 auto AndHi = B.buildAnd(VgprRB_S32, Src64.getReg(1), Zero);
704 B.buildMergeLikeInstr(BoolSrc, {AndLo, AndHi});
706 assert(Ty == S32 || Ty == S16);
707 auto One = B.buildConstant({VgprRB, Ty}, 1);
708 B.buildAnd(BoolSrc, Src, One);
710 auto Zero = B.buildConstant({VgprRB, Ty}, 0);
712 MI.eraseFromParent();
716 return lowerV_BFE(
MI);
718 return lowerS_BFE(
MI);
720 return lowerSplitTo32(
MI);
722 return lowerSplitTo32Select(
MI);
724 return lowerSplitTo32SExtInReg(
MI);
726 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
739 else if (
Size / 128 == 4)
747 else if (DstTy == S96)
748 splitLoad(
MI, {S64, S32}, S32);
749 else if (DstTy == V3S32)
750 splitLoad(
MI, {V2S32, S32}, S32);
751 else if (DstTy == V6S16)
752 splitLoad(
MI, {V4S16, V2S16}, V2S16);
760 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
763 else if (DstTy == V3S32)
764 widenLoad(
MI, V4S32, S32);
765 else if (DstTy == V6S16)
766 widenLoad(
MI, V8S16, V2S16);
777 if (!WaterfallSgprs.
empty()) {
779 executeInWaterfallLoop(B,
make_range(
I, std::next(
I)), WaterfallSgprs);
852 return isAnyPtr(Ty, 32) ? Ty : LLT();
855 return isAnyPtr(Ty, 64) ? Ty : LLT();
858 return isAnyPtr(Ty, 128) ? Ty : LLT();
972void RegBankLegalizeHelper::applyMappingDst(
973 MachineInstr &
MI,
unsigned &
OpIdx,
974 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs) {
979 MachineOperand &
Op =
MI.getOperand(
OpIdx);
981 LLT Ty = MRI.getType(
Reg);
982 [[maybe_unused]]
const RegisterBank *RB = MRI.getRegBank(
Reg);
984 switch (MethodIDs[
OpIdx]) {
1041 Register NewDst = MRI.createVirtualRegister(VccRB_S1);
1044 B.buildInstr(AMDGPU::G_AMDGPU_COPY_SCC_VCC, {SgprRB_S32}, {NewDst});
1045 B.buildTrunc(
Reg, CopyS32_Vcc);
1051 Register NewVgprDstS16 = MRI.createVirtualRegister({VgprRB, S16});
1052 Register NewVgprDstS32 = MRI.createVirtualRegister({VgprRB, S32});
1053 Register NewSgprDstS32 = MRI.createVirtualRegister({SgprRB, S32});
1054 Op.setReg(NewVgprDstS16);
1055 B.buildAnyExt(NewVgprDstS32, NewVgprDstS16);
1057 B.buildTrunc(
Reg, NewSgprDstS32);
1065 Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
1066 Op.setReg(NewVgprDst);
1078 Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
1079 Op.setReg(NewVgprDst);
1087 Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
1089 B.buildTrunc(
Reg, NewDst);
1102void RegBankLegalizeHelper::applyMappingSrc(
1103 MachineInstr &
MI,
unsigned &
OpIdx,
1104 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
1105 SmallSet<Register, 4> &SgprWaterfallOperandRegs) {
1106 for (
unsigned i = 0; i < MethodIDs.
size(); ++
OpIdx, ++i) {
1107 if (MethodIDs[i] ==
None || MethodIDs[i] ==
IntrId || MethodIDs[i] ==
Imm)
1110 MachineOperand &
Op =
MI.getOperand(
OpIdx);
1112 LLT Ty = MRI.getType(
Reg);
1113 const RegisterBank *RB = MRI.getRegBank(
Reg);
1115 switch (MethodIDs[i]) {
1118 assert(RB == VccRB || RB == SgprRB);
1120 auto Aext = B.buildAnyExt(SgprRB_S32,
Reg);
1122 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {VccRB_S1}, {Aext});
1123 Op.setReg(CopyVcc_Scc.getReg(0));
1139 assert(Ty == getTyFromID(MethodIDs[i]));
1140 assert(RB == getRegBankFromID(MethodIDs[i]));
1153 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
1154 assert(RB == getRegBankFromID(MethodIDs[i]));
1170 assert(Ty == getTyFromID(MethodIDs[i]));
1172 auto CopyToVgpr = B.buildCopy({VgprRB, Ty},
Reg);
1173 Op.setReg(CopyToVgpr.getReg(0));
1187 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
1189 auto CopyToVgpr = B.buildCopy({VgprRB, Ty},
Reg);
1190 Op.setReg(CopyToVgpr.getReg(0));
1197 assert(Ty == getTyFromID(MethodIDs[i]));
1207 auto Aext = B.buildAnyExt(SgprRB_S32,
Reg);
1208 Op.setReg(Aext.getReg(0));
1215 auto Aext = B.buildAnyExt(SgprRB_S32,
Reg);
1218 auto Cst1 = B.buildConstant(SgprRB_S32, 1);
1219 auto BoolInReg = B.buildAnd(SgprRB_S32, Aext, Cst1);
1220 Op.setReg(BoolInReg.getReg(0));
1226 auto Sext = B.buildSExt(SgprRB_S32,
Reg);
1227 Op.setReg(Sext.getReg(0));
1233 auto Zext = B.buildZExt({SgprRB, S32},
Reg);
1234 Op.setReg(Zext.getReg(0));
1241 auto Sext = B.buildSExt({VgprRB, S32},
Reg);
1242 Op.setReg(Sext.getReg(0));
1249 auto Zext = B.buildZExt({VgprRB, S32},
Reg);
1250 Op.setReg(Zext.getReg(0));
1261 LLT Ty = MRI.getType(Dst);
1264 B.setInsertPt(*
MI.getParent(),
MI.getParent()->getFirstNonPHI());
1266 Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
1267 MI.getOperand(0).setReg(NewDst);
1268 B.buildTrunc(Dst, NewDst);
1270 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1273 auto DefMI = MRI.getVRegDef(
UseReg)->getIterator();
1278 auto NewUse = B.buildAnyExt(SgprRB_S32,
UseReg);
1279 MI.getOperand(i).setReg(NewUse.getReg(0));
1288 if (Ty ==
LLT::scalar(1) && MUI.isDivergent(Dst)) {
1291 "before RegBankLegalize to lower lane mask(vcc) phis");
1309 unsigned StartOpIdx,
1310 unsigned EndOpIdx) {
1311 for (
unsigned i = StartOpIdx; i <= EndOpIdx; ++i) {
1312 if (
MRI.getRegBankOrNull(
MI.getOperand(i).getReg()) != RB)
1319 const RegisterBank *RB = MRI.getRegBank(
MI.getOperand(0).getReg());
1321 unsigned NumDefs =
MI.getNumDefs();
1322 unsigned NumOperands =
MI.getNumOperands();
1330 for (
unsigned i = NumDefs; i < NumOperands; ++i) {
1332 if (MRI.getRegBank(Reg) != RB) {
1333 auto Copy = B.buildCopy({VgprRB, MRI.getType(Reg)}, Reg);
1334 MI.getOperand(i).setReg(Copy.getReg(0));
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Provides AMDGPU specific target descriptions.
static bool isSignedBFE(MachineInstr &MI)
static bool verifyRegBankOnOperands(MachineInstr &MI, const RegisterBank *RB, MachineRegisterInfo &MRI, unsigned StartOpIdx, unsigned EndOpIdx)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
static Register UseReg(const MachineOperand &MO)
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
void applyMappingTrivial(MachineInstr &MI)
RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI, const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
void findRuleAndApplyMapping(MachineInstr &MI)
void applyMappingPHI(MachineInstr &MI)
const RegBankLLTMapping & findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
const SIRegisterInfo * getRegisterInfo() const override
Represents a call to an intrinsic.
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
TypeSize getValue() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
BasicBlockListType::iterator iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
Representation of each machine instruction.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isAnyPtr(LLT Ty, unsigned Width)
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
void buildReadFirstLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LoweringMethodID LoweringMethod
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping