@@ -62,7 +62,6 @@ class SIPeepholeSDWA {
6262 std::unique_ptr<SDWAOperand> matchSDWAOperand (MachineInstr &MI);
6363 void pseudoOpConvertToVOP2 (MachineInstr &MI,
6464 const GCNSubtarget &ST) const ;
65- void convertVcndmaskToVOP2 (MachineInstr &MI, const GCNSubtarget &ST) const ;
6665 MachineInstr *createSDWAVersion (MachineInstr &MI);
6766 bool convertToSDWA (MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
6867 void legalizeScalarOperands (MachineInstr &MI, const GCNSubtarget &ST) const ;
@@ -1038,8 +1037,7 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
10381037 return ;
10391038 // Make sure VCC or its subregs are dead before MI.
10401039 MachineBasicBlock &MBB = *MI.getParent ();
1041- MachineBasicBlock::LivenessQueryResult Liveness =
1042- MBB.computeRegisterLiveness (TRI, AMDGPU::VCC, MI, 25 );
1040+ auto Liveness = MBB.computeRegisterLiveness (TRI, AMDGPU::VCC, MI, 25 );
10431041 if (Liveness != MachineBasicBlock::LQR_Dead)
10441042 return ;
10451043 // Check if VCC is referenced in range of (MI,MISucc].
@@ -1063,53 +1061,6 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
10631061 MISucc.substituteRegister (CarryIn->getReg (), TRI->getVCC (), 0 , *TRI);
10641062}
10651063
1066- // / Try to convert an \p MI in VOP3 which takes an src2 carry-in
1067- // / operand into the corresponding VOP2 form which expects the
1068- // / argument in VCC. To this end, add an copy from the carry-in to
1069- // / VCC. The conversion will only be applied if \p MI can be shrunk
1070- // / to VOP2 and if VCC can be proven to be dead before \p MI.
1071- void SIPeepholeSDWA::convertVcndmaskToVOP2 (MachineInstr &MI,
1072- const GCNSubtarget &ST) const {
1073- assert (MI.getOpcode () == AMDGPU::V_CNDMASK_B32_e64);
1074-
1075- LLVM_DEBUG (dbgs () << " Attempting VOP2 conversion: " << MI);
1076- if (!TII->canShrink (MI, *MRI)) {
1077- LLVM_DEBUG (dbgs () << " Cannot shrink instruction\n " );
1078- return ;
1079- }
1080-
1081- const MachineOperand &CarryIn =
1082- *TII->getNamedOperand (MI, AMDGPU::OpName::src2);
1083- Register CarryReg = CarryIn.getReg ();
1084- MachineInstr *CarryDef = MRI->getVRegDef (CarryReg);
1085- if (!CarryDef) {
1086- LLVM_DEBUG (dbgs () << " Missing carry-in operand definition\n " );
1087- return ;
1088- }
1089-
1090- // Make sure VCC or its subregs are dead before MI.
1091- MCRegister Vcc = TRI->getVCC ();
1092- MachineBasicBlock &MBB = *MI.getParent ();
1093- MachineBasicBlock::LivenessQueryResult Liveness =
1094- MBB.computeRegisterLiveness (TRI, Vcc, MI);
1095- if (Liveness != MachineBasicBlock::LQR_Dead) {
1096- LLVM_DEBUG (dbgs () << " VCC not known to be dead before instruction\n " );
1097- return ;
1098- }
1099-
1100- BuildMI (MBB, MI, MI.getDebugLoc (), TII->get (AMDGPU::COPY), Vcc).add (CarryIn);
1101-
1102- auto Converted = BuildMI (MBB, MI, MI.getDebugLoc (),
1103- TII->get (AMDGPU::getVOPe32 (MI.getOpcode ())))
1104- .add (*TII->getNamedOperand (MI, AMDGPU::OpName::vdst))
1105- .add (*TII->getNamedOperand (MI, AMDGPU::OpName::src0))
1106- .add (*TII->getNamedOperand (MI, AMDGPU::OpName::src1))
1107- .setMIFlags (MI.getFlags ());
1108- TII->fixImplicitOperands (*Converted);
1109- LLVM_DEBUG (dbgs () << " Converted to VOP2: " << *Converted);
1110- MI.eraseFromParent ();
1111- }
1112-
11131064namespace {
11141065bool isConvertibleToSDWA (MachineInstr &MI,
11151066 const GCNSubtarget &ST,
@@ -1119,11 +1070,6 @@ bool isConvertibleToSDWA(MachineInstr &MI,
11191070 if (TII->isSDWA (Opc))
11201071 return true ;
11211072
1122- // Can only be handled after ealier conversion to
1123- // AMDGPU::V_CNDMASK_B32_e32 which is not always possible.
1124- if (Opc == AMDGPU::V_CNDMASK_B32_e64)
1125- return false ;
1126-
11271073 // Check if this instruction has opcode that supports SDWA
11281074 if (AMDGPU::getSDWAOp (Opc) == -1 )
11291075 Opc = AMDGPU::getVOPe32 (Opc);
@@ -1162,6 +1108,10 @@ bool isConvertibleToSDWA(MachineInstr &MI,
11621108 if (TII->pseudoToMCOpcode (Opc) == -1 )
11631109 return false ;
11641110
1111+ // FIXME: has SDWA but require handling of implicit VCC use
1112+ if (Opc == AMDGPU::V_CNDMASK_B32_e32)
1113+ return false ;
1114+
11651115 if (MachineOperand *Src0 = TII->getNamedOperand (MI, AMDGPU::OpName::src0)) {
11661116 if (!Src0->isReg () && !Src0->isImm ())
11671117 return false ;
@@ -1316,9 +1266,7 @@ MachineInstr *SIPeepholeSDWA::createSDWAVersion(MachineInstr &MI) {
13161266 SDWAInst->tieOperands (PreserveDstIdx, SDWAInst->getNumOperands () - 1 );
13171267 }
13181268
1319- MachineInstr *Ret = SDWAInst.getInstr ();
1320- TII->fixImplicitOperands (*Ret);
1321- return Ret;
1269+ return SDWAInst.getInstr ();
13221270}
13231271
13241272bool SIPeepholeSDWA::convertToSDWA (MachineInstr &MI,
@@ -1436,18 +1384,10 @@ bool SIPeepholeSDWA::run(MachineFunction &MF) {
14361384 for (const auto &OperandPair : SDWAOperands) {
14371385 const auto &Operand = OperandPair.second ;
14381386 MachineInstr *PotentialMI = Operand->potentialToConvert (TII, ST);
1439- if (!PotentialMI)
1440- continue ;
1441-
1442- switch (PotentialMI->getOpcode ()) {
1443- case AMDGPU::V_ADD_CO_U32_e64:
1444- case AMDGPU::V_SUB_CO_U32_e64:
1387+ if (PotentialMI &&
1388+ (PotentialMI->getOpcode () == AMDGPU::V_ADD_CO_U32_e64 ||
1389+ PotentialMI->getOpcode () == AMDGPU::V_SUB_CO_U32_e64))
14451390 pseudoOpConvertToVOP2 (*PotentialMI, ST);
1446- break ;
1447- case AMDGPU::V_CNDMASK_B32_e64:
1448- convertVcndmaskToVOP2 (*PotentialMI, ST);
1449- break ;
1450- };
14511391 }
14521392 SDWAOperands.clear ();
14531393
0 commit comments