18#include "llvm/IR/IntrinsicsRISCV.h"
25#define DEBUG_TYPE "riscvtti"
28 "riscv-v-register-bit-width-lmul",
30 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
31 "by autovectorized code. Fractional LMULs are not supported."),
37 "Overrides result used for getMaximumVF query which is used "
38 "exclusively by SLP vectorizer."),
43 cl::desc(
"Set the lower bound of a trip count to decide on "
44 "vectorization while tail-folding."),
53 size_t NumInstr = OpCodes.size();
58 return LMULCost * NumInstr;
60 for (
auto Op : OpCodes) {
62 case RISCV::VRGATHER_VI:
65 case RISCV::VRGATHER_VV:
68 case RISCV::VSLIDEUP_VI:
69 case RISCV::VSLIDEDOWN_VI:
72 case RISCV::VSLIDEUP_VX:
73 case RISCV::VSLIDEDOWN_VX:
76 case RISCV::VREDMAX_VS:
77 case RISCV::VREDMIN_VS:
78 case RISCV::VREDMAXU_VS:
79 case RISCV::VREDMINU_VS:
80 case RISCV::VREDSUM_VS:
81 case RISCV::VREDAND_VS:
82 case RISCV::VREDOR_VS:
83 case RISCV::VREDXOR_VS:
84 case RISCV::VFREDMAX_VS:
85 case RISCV::VFREDMIN_VS:
86 case RISCV::VFREDUSUM_VS: {
93 case RISCV::VFREDOSUM_VS: {
102 case RISCV::VFMV_F_S:
103 case RISCV::VFMV_S_F:
105 case RISCV::VMXOR_MM:
106 case RISCV::VMAND_MM:
107 case RISCV::VMANDN_MM:
108 case RISCV::VMNAND_MM:
110 case RISCV::VFIRST_M:
125 assert(Ty->isIntegerTy() &&
126 "getIntImmCost can only estimate cost of materialising integers");
149 if (!BO || !BO->hasOneUse())
152 if (BO->getOpcode() != Instruction::Shl)
163 if (ShAmt == Trailing)
174 assert(Ty->isIntegerTy() &&
175 "getIntImmCost can only estimate cost of materialising integers");
183 bool Takes12BitImm =
false;
184 unsigned ImmArgIdx = ~0U;
187 case Instruction::GetElementPtr:
192 case Instruction::Store: {
197 if (Idx == 1 || !Inst)
202 if (!getTLI()->allowsMemoryAccessForAlignment(
210 case Instruction::Load:
213 case Instruction::And:
215 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
218 if (Imm == UINT64_C(0xffffffff) &&
219 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
222 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
224 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
227 Takes12BitImm =
true;
229 case Instruction::Add:
230 Takes12BitImm =
true;
232 case Instruction::Or:
233 case Instruction::Xor:
235 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
237 Takes12BitImm =
true;
239 case Instruction::Mul:
241 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
244 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
247 Takes12BitImm =
true;
249 case Instruction::Sub:
250 case Instruction::Shl:
251 case Instruction::LShr:
252 case Instruction::AShr:
253 Takes12BitImm =
true;
264 if (Imm.getSignificantBits() <= 64 &&
287 return ST->hasVInstructions();
297 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
304 if (!ST->hasStdExtZvqdotq() || ST->getELen() < 64 ||
305 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
306 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
314 getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second,
CostKind);
321 switch (
II->getIntrinsicID()) {
325 case Intrinsic::vector_reduce_mul:
326 case Intrinsic::vector_reduce_fmul:
332 if (ST->hasVInstructions())
338 if (ST->hasVInstructions())
339 if (
unsigned MinVLen = ST->getRealMinVLen();
354 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
357 (ST->hasVInstructions() &&
367RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
377 unsigned Size = Mask.size();
380 for (
unsigned I = 0;
I !=
Size; ++
I) {
381 if (
static_cast<unsigned>(Mask[
I]) ==
I)
387 for (
unsigned J =
I + 1; J !=
Size; ++J)
389 if (
static_cast<unsigned>(Mask[J]) != J %
I)
417 "Expected fixed vector type and non-empty mask");
420 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
424 if (NumOfDests <= 1 ||
426 Tp->getElementType()->getPrimitiveSizeInBits() ||
427 LegalNumElts >= Tp->getElementCount().getFixedValue())
430 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
433 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
437 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
438 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
439 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
441 assert(NormalizedVF >= Mask.size() &&
442 "Normalized mask expected to be not shorter than original mask.");
447 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
448 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
451 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
454 Cost +=
TTI.getShuffleCost(
457 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
459 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
460 Cost +=
TTI.getShuffleCost(
463 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
486 if (!VLen || Mask.empty())
490 LegalVT =
TTI.getTypeLegalizationCost(
496 if (NumOfDests <= 1 ||
498 Tp->getElementType()->getPrimitiveSizeInBits() ||
502 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
505 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
511 unsigned NormalizedVF =
516 assert(NormalizedVF >= Mask.size() &&
517 "Normalized mask expected to be not shorter than original mask.");
523 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
524 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
527 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
532 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
534 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
536 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
543 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
544 (NumOfDestRegs <= 2 && NumShuffles < 4))
559 if (!
LT.second.isFixedLengthVector())
567 auto GetSlideOpcode = [&](
int SlideAmt) {
569 bool IsVI =
isUInt<5>(std::abs(SlideAmt));
571 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
572 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
575 std::array<std::pair<int, int>, 2> SrcInfo;
579 if (SrcInfo[1].second == 0)
583 if (SrcInfo[0].second != 0) {
584 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
585 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
588 if (SrcInfo[1].first == -1)
589 return FirstSlideCost;
592 if (SrcInfo[1].second != 0) {
593 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
594 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
597 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
604 return FirstSlideCost + SecondSlideCost + MaskCost;
615 "Expected the Mask to match the return size if given");
617 "Expected the same scalar types");
626 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
628 *
this, LT.second, ST->getRealVLen(),
630 if (VRegSplittingCost.
isValid())
631 return VRegSplittingCost;
636 if (Mask.size() >= 2) {
637 MVT EltTp = LT.second.getVectorElementType();
648 return 2 * LT.first * TLI->getLMULCost(LT.second);
650 if (Mask[0] == 0 || Mask[0] == 1) {
654 if (
equal(DeinterleaveMask, Mask))
655 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
660 if (LT.second.getScalarSizeInBits() != 1 &&
663 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
665 for (
unsigned I = 0;
I != NumSlides; ++
I) {
666 unsigned InsertIndex = SubVectorSize * (1 <<
I);
671 std::pair<InstructionCost, MVT> DestLT =
676 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
690 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
691 LT.second.getVectorNumElements() <= 256)) {
696 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
710 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
711 LT.second.getVectorNumElements() <= 256)) {
712 auto &
C = SrcTy->getContext();
713 auto EC = SrcTy->getElementCount();
718 return 2 * IndexCost +
719 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
738 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
766 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
767 if (std::optional<unsigned> VLen = ST->getRealVLen();
768 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
769 SubLT.second.getSizeInBits() <= *VLen)
777 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
784 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
796 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
801 Instruction::InsertElement);
802 if (LT.second.getScalarSizeInBits() == 1) {
810 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
823 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
824 RISCV::VMV_X_S, RISCV::VMV_V_X,
833 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
839 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
845 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
846 if (Index >= 0 && Index < 32)
847 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
848 else if (Index < 0 && Index > -32)
849 Opcodes[1] = RISCV::VSLIDEUP_VI;
850 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
854 if (!LT.second.isVector())
860 if (SrcTy->getElementType()->isIntegerTy(1)) {
872 MVT ContainerVT = LT.second;
873 if (LT.second.isFixedLengthVector())
874 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
876 if (ContainerVT.
bitsLE(M1VT)) {
886 if (LT.second.isFixedLengthVector())
888 LenCost =
isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
889 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
890 if (LT.second.isFixedLengthVector() &&
891 isInt<5>(LT.second.getVectorNumElements() - 1))
892 Opcodes[1] = RISCV::VRSUB_VI;
894 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
895 return LT.first * (LenCost + GatherCost);
902 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
904 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
908 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
910 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
911 return FixedCost + LT.first * (GatherCost + SlideCost);
938 Ty, DemandedElts, Insert, Extract,
CostKind);
940 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
941 if (Ty->getScalarSizeInBits() == 1) {
951 assert(LT.second.isFixedLengthVector());
952 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
956 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
979 bool UseMaskForCond,
bool UseMaskForGaps)
const {
985 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
989 if (LT.second.isVector()) {
992 VTy->getElementCount().divideCoefficientBy(Factor));
993 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
994 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
999 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1002 MVT SubVecVT = getTLI()->getValueType(
DL, SubVecTy).getSimpleVT();
1003 Cost += Factor * TLI->getLMULCost(SubVecVT);
1004 return LT.first *
Cost;
1011 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1012 unsigned NumLoads = getEstimatedVLFor(VTy);
1013 return NumLoads * MemOpCost;
1026 unsigned VF = FVTy->getNumElements() / Factor;
1033 if (Opcode == Instruction::Load) {
1035 for (
unsigned Index : Indices) {
1039 Mask.resize(VF * Factor, -1);
1043 Cost += ShuffleCost;
1061 UseMaskForCond, UseMaskForGaps);
1063 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1070 return MemCost + ShuffleCost;
1074 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1080 if ((Opcode == Instruction::Load &&
1082 (Opcode == Instruction::Store &&
1093 {TTI::OK_AnyValue, TTI::OP_None},
I);
1094 unsigned NumLoads = getEstimatedVLFor(&VTy);
1095 return NumLoads * MemOpCost;
1099 unsigned Opcode,
Type *DataTy,
bool VariableMask,
Align Alignment,
1101 bool IsLegal = (Opcode == Instruction::Store &&
1103 (Opcode == Instruction::Load &&
1128 if (Opcode == Instruction::Store)
1129 Opcodes.
append({RISCV::VCOMPRESS_VM});
1131 Opcodes.
append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1133 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1137 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1139 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1141 (Opcode != Instruction::Load && Opcode != Instruction::Store))
1154 {TTI::OK_AnyValue, TTI::OP_None},
I);
1155 unsigned NumLoads = getEstimatedVLFor(&VTy);
1156 return NumLoads * MemOpCost;
1166 for (
auto *Ty : Tys) {
1167 if (!Ty->isVectorTy())
1181 {Intrinsic::floor, MVT::f32, 9},
1182 {Intrinsic::floor, MVT::f64, 9},
1183 {Intrinsic::ceil, MVT::f32, 9},
1184 {Intrinsic::ceil, MVT::f64, 9},
1185 {Intrinsic::trunc, MVT::f32, 7},
1186 {Intrinsic::trunc, MVT::f64, 7},
1187 {Intrinsic::round, MVT::f32, 9},
1188 {Intrinsic::round, MVT::f64, 9},
1189 {Intrinsic::roundeven, MVT::f32, 9},
1190 {Intrinsic::roundeven, MVT::f64, 9},
1191 {Intrinsic::rint, MVT::f32, 7},
1192 {Intrinsic::rint, MVT::f64, 7},
1193 {Intrinsic::nearbyint, MVT::f32, 9},
1194 {Intrinsic::nearbyint, MVT::f64, 9},
1195 {Intrinsic::bswap, MVT::i16, 3},
1196 {Intrinsic::bswap, MVT::i32, 12},
1197 {Intrinsic::bswap, MVT::i64, 31},
1198 {Intrinsic::vp_bswap, MVT::i16, 3},
1199 {Intrinsic::vp_bswap, MVT::i32, 12},
1200 {Intrinsic::vp_bswap, MVT::i64, 31},
1201 {Intrinsic::vp_fshl, MVT::i8, 7},
1202 {Intrinsic::vp_fshl, MVT::i16, 7},
1203 {Intrinsic::vp_fshl, MVT::i32, 7},
1204 {Intrinsic::vp_fshl, MVT::i64, 7},
1205 {Intrinsic::vp_fshr, MVT::i8, 7},
1206 {Intrinsic::vp_fshr, MVT::i16, 7},
1207 {Intrinsic::vp_fshr, MVT::i32, 7},
1208 {Intrinsic::vp_fshr, MVT::i64, 7},
1209 {Intrinsic::bitreverse, MVT::i8, 17},
1210 {Intrinsic::bitreverse, MVT::i16, 24},
1211 {Intrinsic::bitreverse, MVT::i32, 33},
1212 {Intrinsic::bitreverse, MVT::i64, 52},
1213 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1214 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1215 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1216 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1217 {Intrinsic::ctpop, MVT::i8, 12},
1218 {Intrinsic::ctpop, MVT::i16, 19},
1219 {Intrinsic::ctpop, MVT::i32, 20},
1220 {Intrinsic::ctpop, MVT::i64, 21},
1221 {Intrinsic::ctlz, MVT::i8, 19},
1222 {Intrinsic::ctlz, MVT::i16, 28},
1223 {Intrinsic::ctlz, MVT::i32, 31},
1224 {Intrinsic::ctlz, MVT::i64, 35},
1225 {Intrinsic::cttz, MVT::i8, 16},
1226 {Intrinsic::cttz, MVT::i16, 23},
1227 {Intrinsic::cttz, MVT::i32, 24},
1228 {Intrinsic::cttz, MVT::i64, 25},
1229 {Intrinsic::vp_ctpop, MVT::i8, 12},
1230 {Intrinsic::vp_ctpop, MVT::i16, 19},
1231 {Intrinsic::vp_ctpop, MVT::i32, 20},
1232 {Intrinsic::vp_ctpop, MVT::i64, 21},
1233 {Intrinsic::vp_ctlz, MVT::i8, 19},
1234 {Intrinsic::vp_ctlz, MVT::i16, 28},
1235 {Intrinsic::vp_ctlz, MVT::i32, 31},
1236 {Intrinsic::vp_ctlz, MVT::i64, 35},
1237 {Intrinsic::vp_cttz, MVT::i8, 16},
1238 {Intrinsic::vp_cttz, MVT::i16, 23},
1239 {Intrinsic::vp_cttz, MVT::i32, 24},
1240 {Intrinsic::vp_cttz, MVT::i64, 25},
1247 switch (ICA.
getID()) {
1248 case Intrinsic::lrint:
1249 case Intrinsic::llrint:
1250 case Intrinsic::lround:
1251 case Intrinsic::llround: {
1255 if (ST->hasVInstructions() && LT.second.isVector()) {
1257 unsigned SrcEltSz =
DL.getTypeSizeInBits(SrcTy->getScalarType());
1258 unsigned DstEltSz =
DL.getTypeSizeInBits(RetTy->getScalarType());
1259 if (LT.second.getVectorElementType() == MVT::bf16) {
1260 if (!ST->hasVInstructionsBF16Minimal())
1263 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1265 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1266 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1267 !ST->hasVInstructionsF16()) {
1268 if (!ST->hasVInstructionsF16Minimal())
1271 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1273 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1275 }
else if (SrcEltSz > DstEltSz) {
1276 Ops = {RISCV::VFNCVT_X_F_W};
1277 }
else if (SrcEltSz < DstEltSz) {
1278 Ops = {RISCV::VFWCVT_X_F_V};
1280 Ops = {RISCV::VFCVT_X_F_V};
1285 if (SrcEltSz > DstEltSz)
1286 return SrcLT.first *
1287 getRISCVInstructionCost(
Ops, SrcLT.second,
CostKind);
1288 return LT.first * getRISCVInstructionCost(
Ops, LT.second,
CostKind);
1292 case Intrinsic::ceil:
1293 case Intrinsic::floor:
1294 case Intrinsic::trunc:
1295 case Intrinsic::rint:
1296 case Intrinsic::round:
1297 case Intrinsic::roundeven: {
1300 if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
1301 return LT.first * 8;
1304 case Intrinsic::umin:
1305 case Intrinsic::umax:
1306 case Intrinsic::smin:
1307 case Intrinsic::smax: {
1309 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1312 if (ST->hasVInstructions() && LT.second.isVector()) {
1314 switch (ICA.
getID()) {
1315 case Intrinsic::umin:
1316 Op = RISCV::VMINU_VV;
1318 case Intrinsic::umax:
1319 Op = RISCV::VMAXU_VV;
1321 case Intrinsic::smin:
1322 Op = RISCV::VMIN_VV;
1324 case Intrinsic::smax:
1325 Op = RISCV::VMAX_VV;
1328 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1332 case Intrinsic::sadd_sat:
1333 case Intrinsic::ssub_sat:
1334 case Intrinsic::uadd_sat:
1335 case Intrinsic::usub_sat: {
1337 if (ST->hasVInstructions() && LT.second.isVector()) {
1339 switch (ICA.
getID()) {
1340 case Intrinsic::sadd_sat:
1341 Op = RISCV::VSADD_VV;
1343 case Intrinsic::ssub_sat:
1344 Op = RISCV::VSSUBU_VV;
1346 case Intrinsic::uadd_sat:
1347 Op = RISCV::VSADDU_VV;
1349 case Intrinsic::usub_sat:
1350 Op = RISCV::VSSUBU_VV;
1353 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1357 case Intrinsic::fma:
1358 case Intrinsic::fmuladd: {
1361 if (ST->hasVInstructions() && LT.second.isVector())
1363 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1366 case Intrinsic::fabs: {
1368 if (ST->hasVInstructions() && LT.second.isVector()) {
1374 if (LT.second.getVectorElementType() == MVT::bf16 ||
1375 (LT.second.getVectorElementType() == MVT::f16 &&
1376 !ST->hasVInstructionsF16()))
1377 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1382 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1386 case Intrinsic::sqrt: {
1388 if (ST->hasVInstructions() && LT.second.isVector()) {
1391 MVT ConvType = LT.second;
1392 MVT FsqrtType = LT.second;
1395 if (LT.second.getVectorElementType() == MVT::bf16) {
1396 if (LT.second == MVT::nxv32bf16) {
1397 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1398 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1399 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1400 ConvType = MVT::nxv16f16;
1401 FsqrtType = MVT::nxv16f32;
1403 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1404 FsqrtOp = {RISCV::VFSQRT_V};
1405 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1407 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1408 !ST->hasVInstructionsF16()) {
1409 if (LT.second == MVT::nxv32f16) {
1410 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1411 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1412 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1413 ConvType = MVT::nxv16f16;
1414 FsqrtType = MVT::nxv16f32;
1416 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1417 FsqrtOp = {RISCV::VFSQRT_V};
1418 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1421 FsqrtOp = {RISCV::VFSQRT_V};
1424 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1425 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1429 case Intrinsic::cttz:
1430 case Intrinsic::ctlz:
1431 case Intrinsic::ctpop: {
1433 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1435 switch (ICA.
getID()) {
1436 case Intrinsic::cttz:
1439 case Intrinsic::ctlz:
1442 case Intrinsic::ctpop:
1443 Op = RISCV::VCPOP_V;
1446 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1450 case Intrinsic::abs: {
1452 if (ST->hasVInstructions() && LT.second.isVector()) {
1456 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1461 case Intrinsic::get_active_lane_mask: {
1462 if (ST->hasVInstructions()) {
1471 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1477 case Intrinsic::stepvector: {
1481 if (ST->hasVInstructions())
1482 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1484 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1485 return 1 + (LT.first - 1);
1487 case Intrinsic::experimental_cttz_elts: {
1489 EVT ArgType = TLI->getValueType(
DL, ArgTy,
true);
1490 if (getTLI()->shouldExpandCttzElements(ArgType))
1507 case Intrinsic::experimental_vp_splat: {
1510 if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1)
1512 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1517 case Intrinsic::experimental_vp_splice: {
1525 case Intrinsic::fptoui_sat:
1526 case Intrinsic::fptosi_sat: {
1528 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1533 if (!SrcTy->isVectorTy())
1536 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1555 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1557 LT.second.isVector()) {
1558 MVT EltTy = LT.second.getVectorElementType();
1560 ICA.
getID(), EltTy))
1561 return LT.first * Entry->Cost;
1574 if (ST->hasVInstructions() && PtrTy->
isVectorTy())
1592 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1593 Dst->getScalarSizeInBits() > ST->getELen())
1596 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1611 if (Src->getScalarSizeInBits() == 1) {
1616 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1617 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1623 if (Dst->getScalarSizeInBits() == 1) {
1629 return SrcLT.first *
1630 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1642 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1643 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1645 SrcLT.second.getSizeInBits()) ||
1647 DstLT.second.getSizeInBits()))
1651 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1653 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1654 (int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1658 if ((PowDiff < 1) || (PowDiff > 3))
1660 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1661 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1664 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1667 case ISD::FP_EXTEND:
1670 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1671 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1674 : (
ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V
1675 : RISCV::VFNCVT_F_F_W;
1677 for (; SrcEltSize != DstEltSize;) {
1681 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1683 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1691 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1693 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1695 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1696 unsigned SrcEltSize = Src->getScalarSizeInBits();
1697 unsigned DstEltSize = Dst->getScalarSizeInBits();
1699 if ((SrcEltSize == 16) &&
1700 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1706 std::pair<InstructionCost, MVT> VecF32LT =
1709 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1714 if (DstEltSize == SrcEltSize)
1715 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1716 else if (DstEltSize > SrcEltSize)
1717 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1722 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1723 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1724 if ((SrcEltSize / 2) > DstEltSize) {
1735 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1736 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1737 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1738 unsigned SrcEltSize = Src->getScalarSizeInBits();
1739 unsigned DstEltSize = Dst->getScalarSizeInBits();
1742 if ((DstEltSize == 16) &&
1743 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1749 std::pair<InstructionCost, MVT> VecF32LT =
1752 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1757 if (DstEltSize == SrcEltSize)
1758 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1759 else if (DstEltSize > SrcEltSize) {
1760 if ((DstEltSize / 2) > SrcEltSize) {
1764 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1767 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1769 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1776unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
1778 const unsigned EltSize =
DL.getTypeSizeInBits(Ty->getElementType());
1779 const unsigned MinSize =
DL.getTypeSizeInBits(Ty).getKnownMinValue();
1794 if (Ty->getScalarSizeInBits() > ST->getELen())
1798 if (Ty->getElementType()->isIntegerTy(1)) {
1802 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1808 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1812 case Intrinsic::maximum:
1814 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1816 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1831 case Intrinsic::minimum:
1833 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1835 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1841 const unsigned EltTyBits =
DL.getTypeSizeInBits(DstTy);
1850 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1859 case Intrinsic::smax:
1860 SplitOp = RISCV::VMAX_VV;
1861 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1863 case Intrinsic::smin:
1864 SplitOp = RISCV::VMIN_VV;
1865 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1867 case Intrinsic::umax:
1868 SplitOp = RISCV::VMAXU_VV;
1869 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1871 case Intrinsic::umin:
1872 SplitOp = RISCV::VMINU_VV;
1873 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1875 case Intrinsic::maxnum:
1876 SplitOp = RISCV::VFMAX_VV;
1877 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1879 case Intrinsic::minnum:
1880 SplitOp = RISCV::VFMIN_VV;
1881 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1886 (LT.first > 1) ? (LT.first - 1) *
1887 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1889 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1894 std::optional<FastMathFlags> FMF,
1900 if (Ty->getScalarSizeInBits() > ST->getELen())
1903 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1911 Type *ElementTy = Ty->getElementType();
1916 if (LT.second == MVT::v1i1)
1917 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
1935 return ((LT.first > 2) ? (LT.first - 2) : 0) *
1936 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
1937 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
1938 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1947 return (LT.first - 1) *
1948 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
1949 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
1957 return (LT.first - 1) *
1958 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
1959 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1972 SplitOp = RISCV::VADD_VV;
1973 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1976 SplitOp = RISCV::VOR_VV;
1977 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
1980 SplitOp = RISCV::VXOR_VV;
1981 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1984 SplitOp = RISCV::VAND_VV;
1985 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
1989 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
1990 LT.second.getScalarType() == MVT::bf16)
1994 for (
unsigned i = 0; i < LT.first.getValue(); i++)
1997 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1999 SplitOp = RISCV::VFADD_VV;
2000 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2005 (LT.first > 1) ? (LT.first - 1) *
2006 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2008 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2012 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2023 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2029 if (IsUnsigned && Opcode == Instruction::Add &&
2030 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2034 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2041 return (LT.first - 1) +
2048 assert(OpInfo.isConstant() &&
"non constant operand?");
2055 if (OpInfo.isUniform())
2061 return getConstantPoolLoadCost(Ty,
CostKind);
2070 EVT VT = TLI->getValueType(
DL, Src,
true);
2072 if (VT == MVT::Other)
2077 if (Opcode == Instruction::Store && OpInfo.isConstant())
2092 if (Src->
isVectorTy() && LT.second.isVector() &&
2094 LT.second.getSizeInBits()))
2105 BaseCost *= TLI->getLMULCost(LT.second);
2106 return Cost + BaseCost;
2115 Op1Info, Op2Info,
I);
2119 Op1Info, Op2Info,
I);
2122 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
2124 Op1Info, Op2Info,
I);
2126 auto GetConstantMatCost =
2128 if (OpInfo.isUniform())
2133 return getConstantPoolLoadCost(ValTy,
CostKind);
2138 ConstantMatCost += GetConstantMatCost(Op1Info);
2140 ConstantMatCost += GetConstantMatCost(Op2Info);
2143 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
2145 if (ValTy->getScalarSizeInBits() == 1) {
2149 return ConstantMatCost +
2151 getRISCVInstructionCost(
2152 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2156 return ConstantMatCost +
2157 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2161 if (ValTy->getScalarSizeInBits() == 1) {
2167 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2168 return ConstantMatCost +
2170 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2172 LT.first * getRISCVInstructionCost(
2173 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2180 return ConstantMatCost +
2181 LT.first * getRISCVInstructionCost(
2182 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2186 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
2190 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2195 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
2200 return ConstantMatCost +
2201 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2207 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
2208 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
2209 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
2211 Op1Info, Op2Info,
I);
2220 return ConstantMatCost +
2221 LT.first * getRISCVInstructionCost(
2222 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2229 return ConstantMatCost +
2231 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2240 return ConstantMatCost +
2242 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2253 ValTy->isIntegerTy() && !
I->user_empty()) {
2255 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2256 U->getType()->isIntegerTy() &&
2257 !isa<ConstantData>(U->getOperand(1)) &&
2258 !isa<ConstantData>(U->getOperand(2));
2266 Op1Info, Op2Info,
I);
2273 return Opcode == Instruction::PHI ? 0 : 1;
2282 const Value *Op1)
const {
2285 if (Opcode != Instruction::ExtractElement &&
2286 Opcode != Instruction::InsertElement)
2293 if (!LT.second.isVector()) {
2302 Type *ElemTy = FixedVecTy->getElementType();
2303 auto NumElems = FixedVecTy->getNumElements();
2304 auto Align =
DL.getPrefTypeAlign(ElemTy);
2309 return Opcode == Instruction::ExtractElement
2310 ? StoreCost * NumElems + LoadCost
2311 : (StoreCost + LoadCost) * NumElems + StoreCost;
2315 if (LT.second.isScalableVector() && !LT.first.isValid())
2323 if (Opcode == Instruction::ExtractElement) {
2329 return ExtendCost + ExtractCost;
2339 return ExtendCost + InsertCost + TruncCost;
2345 unsigned BaseCost = 1;
2347 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2352 if (LT.second.isFixedLengthVector()) {
2353 unsigned Width = LT.second.getVectorNumElements();
2354 Index = Index % Width;
2359 if (
auto VLEN = ST->getRealVLen()) {
2360 unsigned EltSize = LT.second.getScalarSizeInBits();
2361 unsigned M1Max = *VLEN / EltSize;
2362 Index = Index % M1Max;
2368 else if (ST->hasVendorXRivosVisni() &&
isUInt<5>(Index) &&
2371 else if (Opcode == Instruction::InsertElement)
2379 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2380 LT.second.isScalableVector()))) {
2382 Align VecAlign =
DL.getPrefTypeAlign(Val);
2383 Align SclAlign =
DL.getPrefTypeAlign(ScalarType);
2388 if (Opcode == Instruction::ExtractElement)
2424 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2426 return BaseCost + SlideCost;
2432 unsigned Index)
const {
2441 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2443 EC.getKnownMinValue() - 1 - Index,
nullptr,
2470 if (!LT.second.isVector())
2476 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2478 if ((LT.second.getVectorElementType() == MVT::f16 ||
2479 LT.second.getVectorElementType() == MVT::bf16) &&
2480 TLI->getOperationAction(ISDOpcode, LT.second) ==
2482 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2486 CastCost += LT.first * Args.size() *
2494 LT.second = PromotedVT;
2497 auto getConstantMatCost =
2507 return getConstantPoolLoadCost(Ty,
CostKind);
2513 ConstantMatCost += getConstantMatCost(0, Op1Info);
2515 ConstantMatCost += getConstantMatCost(1, Op2Info);
2518 switch (ISDOpcode) {
2521 Op = RISCV::VADD_VV;
2526 Op = RISCV::VSLL_VV;
2531 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2536 Op = RISCV::VMUL_VV;
2540 Op = RISCV::VDIV_VV;
2544 Op = RISCV::VREM_VV;
2548 Op = RISCV::VFADD_VV;
2551 Op = RISCV::VFMUL_VV;
2554 Op = RISCV::VFDIV_VV;
2557 Op = RISCV::VFSGNJN_VV;
2562 return CastCost + ConstantMatCost +
2571 if (Ty->isFPOrFPVectorTy())
2573 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2596 if (Info.isSameBase() && V !=
Base) {
2597 if (
GEP->hasAllConstantIndices())
2603 unsigned Stride =
DL.getTypeStoreSize(AccessTy);
2604 if (Info.isUnitStride() &&
2610 GEP->getType()->getPointerAddressSpace()))
2613 {TTI::OK_AnyValue, TTI::OP_None},
2614 {TTI::OK_AnyValue, TTI::OP_None}, {});
2631 if (ST->enableDefaultUnroll())
2641 if (L->getHeader()->getParent()->hasOptSize())
2645 L->getExitingBlocks(ExitingBlocks);
2647 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2648 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2652 if (ExitingBlocks.
size() > 2)
2657 if (L->getNumBlocks() > 4)
2665 for (
auto *BB : L->getBlocks()) {
2666 for (
auto &
I : *BB) {
2670 if (IsVectorized &&
I.getType()->isVectorTy())
2710 bool HasMask =
false;
2712 case Intrinsic::riscv_vle_mask:
2713 case Intrinsic::riscv_vse_mask:
2716 case Intrinsic::riscv_vle:
2717 case Intrinsic::riscv_vse: {
2725 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2726 unsigned VLIndex = RVVIInfo->VLOperand;
2727 unsigned PtrOperandNo = VLIndex - 1 - HasMask;
2735 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2736 Alignment, Mask, EVL);
2739 case Intrinsic::riscv_vlse_mask:
2740 case Intrinsic::riscv_vsse_mask:
2743 case Intrinsic::riscv_vlse:
2744 case Intrinsic::riscv_vsse: {
2752 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2753 unsigned VLIndex = RVVIInfo->VLOperand;
2754 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
2766 Alignment =
Align(1);
2773 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2774 Alignment, Mask, EVL, Stride);
2782 if (Ty->isVectorTy()) {
2785 if ((EltTy->
isHalfTy() && !ST->hasVInstructionsF16()) ||
2791 if (
Size.isScalable() && ST->hasVInstructions())
2794 if (ST->useRVVForFixedLengthVectors())
2814 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
2822 return ST->enableUnalignedVectorMem();
2828 if (ST->hasVendorXCVmem() && !ST->is64Bit())
2850 Align Alignment)
const {
2852 if (!VTy || VTy->isScalableTy())
2860 if (VTy->getElementType()->isIntegerTy(8))
2861 if (VTy->getElementCount().getFixedValue() > 256)
2862 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
2863 ST->getMaxLMULForFixedLengthVectors();
2868 Align Alignment)
const {
2870 if (!VTy || VTy->isScalableTy())
2884 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
2885 bool Considerable =
false;
2886 AllowPromotionWithoutCommonHeader =
false;
2889 Type *ConsideredSExtType =
2891 if (
I.getType() != ConsideredSExtType)
2895 for (
const User *U :
I.users()) {
2897 Considerable =
true;
2901 if (GEPInst->getNumOperands() > 2) {
2902 AllowPromotionWithoutCommonHeader =
true;
2907 return Considerable;
2912 case Instruction::Add:
2913 case Instruction::Sub:
2914 case Instruction::Mul:
2915 case Instruction::And:
2916 case Instruction::Or:
2917 case Instruction::Xor:
2918 case Instruction::FAdd:
2919 case Instruction::FSub:
2920 case Instruction::FMul:
2921 case Instruction::FDiv:
2922 case Instruction::ICmp:
2923 case Instruction::FCmp:
2925 case Instruction::Shl:
2926 case Instruction::LShr:
2927 case Instruction::AShr:
2928 case Instruction::UDiv:
2929 case Instruction::SDiv:
2930 case Instruction::URem:
2931 case Instruction::SRem:
2932 case Instruction::Select:
2933 return Operand == 1;
2940 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
2950 switch (
II->getIntrinsicID()) {
2951 case Intrinsic::fma:
2952 case Intrinsic::vp_fma:
2953 case Intrinsic::fmuladd:
2954 case Intrinsic::vp_fmuladd:
2955 return Operand == 0 || Operand == 1;
2956 case Intrinsic::vp_shl:
2957 case Intrinsic::vp_lshr:
2958 case Intrinsic::vp_ashr:
2959 case Intrinsic::vp_udiv:
2960 case Intrinsic::vp_sdiv:
2961 case Intrinsic::vp_urem:
2962 case Intrinsic::vp_srem:
2963 case Intrinsic::ssub_sat:
2964 case Intrinsic::vp_ssub_sat:
2965 case Intrinsic::usub_sat:
2966 case Intrinsic::vp_usub_sat:
2967 case Intrinsic::vp_select:
2968 return Operand == 1;
2970 case Intrinsic::vp_add:
2971 case Intrinsic::vp_mul:
2972 case Intrinsic::vp_and:
2973 case Intrinsic::vp_or:
2974 case Intrinsic::vp_xor:
2975 case Intrinsic::vp_fadd:
2976 case Intrinsic::vp_fmul:
2977 case Intrinsic::vp_icmp:
2978 case Intrinsic::vp_fcmp:
2979 case Intrinsic::smin:
2980 case Intrinsic::vp_smin:
2981 case Intrinsic::umin:
2982 case Intrinsic::vp_umin:
2983 case Intrinsic::smax:
2984 case Intrinsic::vp_smax:
2985 case Intrinsic::umax:
2986 case Intrinsic::vp_umax:
2987 case Intrinsic::sadd_sat:
2988 case Intrinsic::vp_sadd_sat:
2989 case Intrinsic::uadd_sat:
2990 case Intrinsic::vp_uadd_sat:
2992 case Intrinsic::vp_sub:
2993 case Intrinsic::vp_fsub:
2994 case Intrinsic::vp_fdiv:
2995 return Operand == 0 || Operand == 1;
3008 if (
I->isBitwiseLogicOp()) {
3009 if (!
I->getType()->isVectorTy()) {
3010 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
3011 for (
auto &
Op :
I->operands()) {
3019 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
3020 for (
auto &
Op :
I->operands()) {
3032 Ops.push_back(&Not);
3033 Ops.push_back(&InsertElt);
3041 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3049 if (!ST->sinkSplatOperands())
3075 for (
Use &U :
Op->uses()) {
3084 Ops.push_back(&
Op->getOperandUse(0));
3086 Use *InsertEltUse = &
Op->getOperandUse(0);
3089 Ops.push_back(&InsertElt->getOperandUse(1));
3090 Ops.push_back(InsertEltUse);
3102 if (!ST->enableUnalignedScalarMem())
3105 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3108 Options.AllowOverlappingLoads =
true;
3109 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3111 if (ST->is64Bit()) {
3112 Options.LoadSizes = {8, 4, 2, 1};
3113 Options.AllowedTailExpansions = {3, 5, 6};
3115 Options.LoadSizes = {4, 2, 1};
3116 Options.AllowedTailExpansions = {3};
3119 if (IsZeroCmp && ST->hasVInstructions()) {
3120 unsigned VLenB = ST->getRealMinVLen() / 8;
3123 unsigned MinSize = ST->getXLen() / 8 + 1;
3124 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
Value * getArgOperand(unsigned i) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *Src, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
bool preferAlternateOpcodeVectorization() const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Information about a load/store intrinsic defined by the target.