43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
88 const TargetTransformInfo &TTI;
89 const DominatorTree &DT;
94 const SimplifyQuery SQ;
98 bool TryEarlyFoldsOnly;
100 InstructionWorklist Worklist;
109 bool vectorizeLoadInsert(Instruction &
I);
110 bool widenSubvectorLoad(Instruction &
I);
111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
112 ExtractElementInst *Ext1,
113 unsigned PreferredExtractIndex)
const;
114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
115 const Instruction &
I,
116 ExtractElementInst *&ConvertToShuffle,
117 unsigned PreferredExtractIndex);
120 bool foldExtractExtract(Instruction &
I);
121 bool foldInsExtFNeg(Instruction &
I);
122 bool foldInsExtBinop(Instruction &
I);
123 bool foldInsExtVectorToShuffle(Instruction &
I);
124 bool foldBitOpOfCastops(Instruction &
I);
125 bool foldBitOpOfCastConstant(Instruction &
I);
126 bool foldBitcastShuffle(Instruction &
I);
127 bool scalarizeOpOrCmp(Instruction &
I);
128 bool scalarizeVPIntrinsic(Instruction &
I);
129 bool foldExtractedCmps(Instruction &
I);
130 bool foldBinopOfReductions(Instruction &
I);
131 bool foldSingleElementStore(Instruction &
I);
132 bool scalarizeLoadExtract(Instruction &
I);
133 bool scalarizeExtExtract(Instruction &
I);
134 bool foldConcatOfBoolMasks(Instruction &
I);
135 bool foldPermuteOfBinops(Instruction &
I);
136 bool foldShuffleOfBinops(Instruction &
I);
137 bool foldShuffleOfSelects(Instruction &
I);
138 bool foldShuffleOfCastops(Instruction &
I);
139 bool foldShuffleOfShuffles(Instruction &
I);
140 bool foldShuffleOfIntrinsics(Instruction &
I);
141 bool foldShuffleToIdentity(Instruction &
I);
142 bool foldShuffleFromReductions(Instruction &
I);
143 bool foldShuffleChainsToReduce(Instruction &
I);
144 bool foldCastFromReductions(Instruction &
I);
145 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
146 bool foldInterleaveIntrinsics(Instruction &
I);
147 bool shrinkType(Instruction &
I);
148 bool shrinkLoadForShuffles(Instruction &
I);
149 bool shrinkPhiOfShuffles(Instruction &
I);
151 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
157 Worklist.pushUsersToWorkList(*NewI);
158 Worklist.pushValue(NewI);
175 SmallPtrSet<Value *, 4> Visited;
180 OpI,
nullptr,
nullptr, [&](
Value *V) {
185 NextInst = NextInst->getNextNode();
190 Worklist.pushUsersToWorkList(*OpI);
191 Worklist.pushValue(OpI);
211 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
212 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
218 Type *ScalarTy = Load->getType()->getScalarType();
220 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
221 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
228bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
254 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
257 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
258 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
259 unsigned OffsetEltIndex = 0;
267 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
268 APInt
Offset(OffsetBitWidth, 0);
278 uint64_t ScalarSizeInBytes = ScalarSize / 8;
279 if (
Offset.urem(ScalarSizeInBytes) != 0)
283 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
284 if (OffsetEltIndex >= MinVecNumElts)
301 unsigned AS =
Load->getPointerAddressSpace();
320 unsigned OutputNumElts = Ty->getNumElements();
322 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
323 Mask[0] = OffsetEltIndex;
330 if (OldCost < NewCost || !NewCost.
isValid())
341 replaceValue(
I, *VecLd);
349bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
352 if (!Shuf->isIdentityWithPadding())
358 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
359 return M >= (int)(NumOpElts);
370 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
378 unsigned AS =
Load->getPointerAddressSpace();
393 if (OldCost < NewCost || !NewCost.
isValid())
400 replaceValue(
I, *VecLd);
407ExtractElementInst *VectorCombine::getShuffleExtract(
408 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
412 assert(Index0C && Index1C &&
"Expected constant extract indexes");
414 unsigned Index0 = Index0C->getZExtValue();
415 unsigned Index1 = Index1C->getZExtValue();
418 if (Index0 == Index1)
442 if (PreferredExtractIndex == Index0)
444 if (PreferredExtractIndex == Index1)
448 return Index0 > Index1 ? Ext0 : Ext1;
456bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
457 ExtractElementInst *Ext1,
458 const Instruction &
I,
459 ExtractElementInst *&ConvertToShuffle,
460 unsigned PreferredExtractIndex) {
463 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
465 unsigned Opcode =
I.getOpcode();
478 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
479 "Expected a compare");
489 unsigned Ext0Index = Ext0IndexC->getZExtValue();
490 unsigned Ext1Index = Ext1IndexC->getZExtValue();
504 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
505 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
506 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
511 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
516 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
518 OldCost = CheapExtractCost + ScalarOpCost;
519 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
523 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
524 NewCost = VectorOpCost + CheapExtractCost +
529 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
530 if (ConvertToShuffle) {
542 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
544 ShuffleMask[BestInsIndex] = BestExtIndex;
546 VecTy, VecTy, ShuffleMask,
CostKind, 0,
547 nullptr, {ConvertToShuffle});
550 VecTy, VecTy, {},
CostKind, 0,
nullptr,
558 return OldCost < NewCost;
570 ShufMask[NewIndex] = OldIndex;
571 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
623 V1,
"foldExtExtBinop");
628 VecBOInst->copyIRFlags(&
I);
634bool VectorCombine::foldExtractExtract(Instruction &
I) {
665 ExtractElementInst *ExtractToChange;
666 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
672 if (ExtractToChange) {
673 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
678 if (ExtractToChange == Ext0)
687 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
688 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
691 replaceValue(
I, *NewExt);
697bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
717 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
721 unsigned NumElts = VecTy->getNumElements();
722 if (Index >= NumElts)
728 SmallVector<int>
Mask(NumElts);
729 std::iota(
Mask.begin(),
Mask.end(), 0);
746 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
749 SmallVector<int> SrcMask;
754 VecTy, SrcVecTy, SrcMask,
CostKind);
757 if (NewCost > OldCost)
772 replaceValue(
I, *NewShuf);
778bool VectorCombine::foldInsExtBinop(Instruction &
I) {
779 BinaryOperator *VecBinOp, *SclBinOp;
811 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
813 if (NewCost > OldCost)
824 NewInst->copyIRFlags(VecBinOp);
825 NewInst->andIRFlags(SclBinOp);
830 replaceValue(
I, *NewBO);
836bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
839 if (!BinOp || !BinOp->isBitwiseLogicOp())
845 if (!LHSCast || !RHSCast) {
846 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
852 if (CastOpcode != RHSCast->getOpcode())
856 switch (CastOpcode) {
857 case Instruction::BitCast:
858 case Instruction::Trunc:
859 case Instruction::SExt:
860 case Instruction::ZExt:
866 Value *LHSSrc = LHSCast->getOperand(0);
867 Value *RHSSrc = RHSCast->getOperand(0);
873 auto *SrcTy = LHSSrc->
getType();
874 auto *DstTy =
I.getType();
877 if (CastOpcode != Instruction::BitCast &&
882 if (!SrcTy->getScalarType()->isIntegerTy() ||
883 !DstTy->getScalarType()->isIntegerTy())
898 LHSCastCost + RHSCastCost;
909 if (!LHSCast->hasOneUse())
910 NewCost += LHSCastCost;
911 if (!RHSCast->hasOneUse())
912 NewCost += RHSCastCost;
915 <<
" NewCost=" << NewCost <<
"\n");
917 if (NewCost > OldCost)
922 BinOp->getName() +
".inner");
924 NewBinOp->copyIRFlags(BinOp);
938 replaceValue(
I, *Result);
947bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
963 switch (CastOpcode) {
964 case Instruction::BitCast:
965 case Instruction::ZExt:
966 case Instruction::SExt:
967 case Instruction::Trunc:
973 Value *LHSSrc = LHSCast->getOperand(0);
975 auto *SrcTy = LHSSrc->
getType();
976 auto *DstTy =
I.getType();
979 if (CastOpcode != Instruction::BitCast &&
984 if (!SrcTy->getScalarType()->isIntegerTy() ||
985 !DstTy->getScalarType()->isIntegerTy())
989 PreservedCastFlags RHSFlags;
1014 if (!LHSCast->hasOneUse())
1015 NewCost += LHSCastCost;
1017 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1018 <<
" NewCost=" << NewCost <<
"\n");
1020 if (NewCost > OldCost)
1025 LHSSrc, InvC,
I.getName() +
".inner");
1027 NewBinOp->copyIRFlags(&
I);
1047 replaceValue(
I, *Result);
1054bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1068 if (!DestTy || !SrcTy)
1071 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1072 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1073 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1083 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1084 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1088 SmallVector<int, 16> NewMask;
1089 if (DestEltSize <= SrcEltSize) {
1092 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1093 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1098 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1099 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1106 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1107 auto *NewShuffleTy =
1109 auto *OldShuffleTy =
1111 unsigned NumOps = IsUnary ? 1 : 2;
1121 TargetTransformInfo::CastContextHint::None,
1126 TargetTransformInfo::CastContextHint::None,
1129 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1130 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1132 if (NewCost > OldCost || !NewCost.
isValid())
1140 replaceValue(
I, *Shuf);
1147bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1161 if (!ScalarOp0 || !ScalarOp1)
1169 auto IsAllTrueMask = [](
Value *MaskVal) {
1172 return ConstValue->isAllOnesValue();
1186 SmallVector<int>
Mask;
1188 Mask.resize(FVTy->getNumElements(), 0);
1197 Args.push_back(
V->getType());
1198 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1203 std::optional<unsigned> FunctionalOpcode =
1205 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1206 if (!FunctionalOpcode) {
1215 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1225 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1227 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1230 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1233 if (OldCost < NewCost || !NewCost.
isValid())
1244 bool SafeToSpeculate;
1250 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1251 if (!SafeToSpeculate &&
1258 {ScalarOp0, ScalarOp1})
1260 ScalarOp0, ScalarOp1);
1269bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1274 if (!UO && !BO && !CI && !
II)
1282 if (Arg->getType() !=
II->getType() &&
1292 for (User *U :
I.users())
1299 std::optional<uint64_t>
Index;
1301 auto Ops =
II ?
II->args() :
I.operands();
1305 uint64_t InsIdx = 0;
1310 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1316 else if (InsIdx != *Index)
1333 if (!
Index.has_value())
1337 Type *ScalarTy = VecTy->getScalarType();
1338 assert(VecTy->isVectorTy() &&
1341 "Unexpected types for insert element into binop or cmp");
1343 unsigned Opcode =
I.getOpcode();
1351 }
else if (UO || BO) {
1355 IntrinsicCostAttributes ScalarICA(
1356 II->getIntrinsicID(), ScalarTy,
1359 IntrinsicCostAttributes VectorICA(
1360 II->getIntrinsicID(), VecTy,
1367 Value *NewVecC =
nullptr;
1369 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1372 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1374 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1388 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1390 II->getIntrinsicID(), Idx, &
TTI)))
1393 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1394 OldCost += InsertCost;
1395 NewCost += !
Op->hasOneUse() * InsertCost;
1399 if (OldCost < NewCost || !NewCost.
isValid())
1409 ++NumScalarIntrinsic;
1419 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1425 Scalar->setName(
I.getName() +
".scalar");
1430 ScalarInst->copyIRFlags(&
I);
1433 replaceValue(
I, *Insert);
1440bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1445 if (!BI || !
I.getType()->isIntegerTy(1))
1450 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1453 CmpPredicate
P0,
P1;
1465 uint64_t Index0, Index1;
1472 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1475 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1476 "Unknown ExtractElementInst");
1481 unsigned CmpOpcode =
1496 Ext0Cost + Ext1Cost + CmpCost * 2 +
1502 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1503 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1508 ShufMask[CheapIndex] = ExpensiveIndex;
1513 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1514 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1519 if (OldCost < NewCost || !NewCost.
isValid())
1529 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1530 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1533 replaceValue(
I, *NewExt);
1546 unsigned ReductionOpc =
1552 CostBeforeReduction =
1553 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1555 CostAfterReduction =
1556 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1560 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1566 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1573 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1576 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1578 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1581 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1582 CostAfterReduction =
TTI.getMulAccReductionCost(
1583 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1586 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1590bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1593 if (BinOpOpc == Instruction::Sub)
1594 ReductionIID = Intrinsic::vector_reduce_add;
1598 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1603 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1604 return II->getArgOperand(0);
1608 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1611 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1620 unsigned ReductionOpc =
1633 CostOfRedOperand0 + CostOfRedOperand1 +
1636 if (NewCost >= OldCost || !NewCost.
isValid())
1640 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1643 if (BinOpOpc == Instruction::Or)
1644 VectorBO = Builder.
CreateOr(V0, V1,
"",
1650 replaceValue(
I, *Rdx);
1658 unsigned NumScanned = 0;
1659 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1668class ScalarizationResult {
1669 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1674 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1675 : Status(Status), ToFreeze(ToFreeze) {}
1678 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1679 ~ScalarizationResult() {
1680 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1683 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1684 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1685 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1686 return {StatusTy::SafeWithFreeze, ToFreeze};
1690 bool isSafe()
const {
return Status == StatusTy::Safe; }
1692 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1695 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1700 Status = StatusTy::Unsafe;
1704 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1705 assert(isSafeWithFreeze() &&
1706 "should only be used when freezing is required");
1708 "UserI must be a user of ToFreeze");
1709 IRBuilder<>::InsertPointGuard Guard(Builder);
1714 if (
U.get() == ToFreeze)
1731 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1735 if (
C->getValue().ult(NumElements))
1736 return ScalarizationResult::safe();
1737 return ScalarizationResult::unsafe();
1742 return ScalarizationResult::unsafe();
1744 APInt Zero(IntWidth, 0);
1745 APInt MaxElts(IntWidth, NumElements);
1751 true, &AC, CtxI, &DT)))
1752 return ScalarizationResult::safe();
1753 return ScalarizationResult::unsafe();
1766 if (ValidIndices.
contains(IdxRange))
1767 return ScalarizationResult::safeWithFreeze(IdxBase);
1768 return ScalarizationResult::unsafe();
1780 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1792bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1804 if (!
match(
SI->getValueOperand(),
1811 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1814 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1815 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1816 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1820 if (ScalarizableIdx.isUnsafe() ||
1827 Worklist.
push(Load);
1829 if (ScalarizableIdx.isSafeWithFreeze())
1832 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1833 {ConstantInt::get(Idx->getType(), 0), Idx});
1837 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1840 replaceValue(
I, *NSI);
1849bool VectorCombine::scalarizeLoadExtract(Instruction &
I) {
1859 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1864 LI->getPointerAddressSpace(),
CostKind);
1868 unsigned NumInstChecked = 0;
1869 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
1872 for (
auto &Pair : NeedFreeze)
1873 Pair.second.discard();
1879 for (User *U : LI->users()) {
1881 if (!UI || UI->getParent() != LI->getParent())
1886 if (UI->use_empty())
1892 for (Instruction &
I :
1893 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1900 LastCheckedInst = UI;
1905 if (ScalarIdx.isUnsafe())
1907 if (ScalarIdx.isSafeWithFreeze()) {
1908 NeedFreeze.try_emplace(UI, ScalarIdx);
1909 ScalarIdx.discard();
1915 Index ?
Index->getZExtValue() : -1);
1924 <<
"\n LoadExtractCost: " << OriginalCost
1925 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
1927 if (ScalarizedCost >= OriginalCost)
1934 Type *ElemType = VecTy->getElementType();
1937 for (User *U : LI->users()) {
1939 Value *Idx = EI->getIndexOperand();
1942 auto It = NeedFreeze.find(EI);
1943 if (It != NeedFreeze.end())
1950 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
1952 Align ScalarOpAlignment =
1954 NewLoad->setAlignment(ScalarOpAlignment);
1957 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
1958 AAMDNodes OldAAMD = LI->getAAMetadata();
1962 replaceValue(*EI, *NewLoad,
false);
1965 FailureGuard.release();
1969bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
1984 Type *ScalarDstTy = DstTy->getElementType();
1985 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
1991 unsigned ExtCnt = 0;
1992 bool ExtLane0 =
false;
1993 for (User *U :
Ext->users()) {
2007 Instruction::And, ScalarDstTy,
CostKind,
2010 (ExtCnt - ExtLane0) *
2012 Instruction::LShr, ScalarDstTy,
CostKind,
2015 if (ScalarCost > VectorCost)
2018 Value *ScalarV =
Ext->getOperand(0);
2025 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2026 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
2027 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2029 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2030 for (User *U :
Ext->users()) {
2036 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2037 : (Idx * SrcEltSizeInBits);
2040 U->replaceAllUsesWith(
And);
2048bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2049 Type *Ty =
I.getType();
2054 if (
DL->isBigEndian())
2065 uint64_t ShAmtX = 0;
2073 uint64_t ShAmtY = 0;
2081 if (ShAmtX > ShAmtY) {
2089 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2090 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2095 MaskTy->getNumElements() != ShAmtDiff ||
2096 MaskTy->getNumElements() > (
BitWidth / 2))
2101 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2102 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2105 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2122 if (Ty != ConcatIntTy)
2128 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2129 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2132 if (NewCost > OldCost)
2142 if (Ty != ConcatIntTy) {
2152 replaceValue(
I, *Result);
2158bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2159 BinaryOperator *BinOp;
2160 ArrayRef<int> OuterMask;
2169 Value *Op00, *Op01, *Op10, *Op11;
2170 ArrayRef<int> Mask0, Mask1;
2177 if (!Match0 && !Match1)
2190 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2193 unsigned NumSrcElts = BinOpTy->getNumElements();
2198 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2202 SmallVector<int> NewMask0, NewMask1;
2203 for (
int M : OuterMask) {
2204 if (M < 0 || M >= (
int)NumSrcElts) {
2208 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2209 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2213 unsigned NumOpElts = Op0Ty->getNumElements();
2214 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2215 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2217 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2218 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2225 BinOpTy, OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
2241 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2245 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2247 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2248 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2252 if (NewCost > OldCost)
2263 NewInst->copyIRFlags(BinOp);
2267 replaceValue(
I, *NewBO);
2273bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2274 ArrayRef<int> OldMask;
2281 if (
LHS->getOpcode() !=
RHS->getOpcode())
2285 bool IsCommutative =
false;
2294 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2305 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2308 unsigned NumSrcElts = BinOpTy->getNumElements();
2311 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2314 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2315 if (M >= (
int)NumSrcElts)
2319 SmallVector<int> NewMask0(OldMask);
2327 SmallVector<int> NewMask1(OldMask);
2350 ArrayRef<int> InnerMask;
2352 m_Mask(InnerMask)))) &&
2355 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2367 bool ReducedInstCount =
false;
2368 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2369 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2370 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2371 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2373 auto *ShuffleCmpTy =
2390 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2397 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2405 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2409 NewInst->copyIRFlags(
LHS);
2410 NewInst->andIRFlags(
RHS);
2415 replaceValue(
I, *NewBO);
2422bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2424 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2433 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2439 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2440 ((SI0FOp !=
nullptr) &&
2441 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2447 auto SelOp = Instruction::Select;
2454 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2458 Mask,
CostKind, 0,
nullptr, {C1, C2});
2464 toVectorTy(Type::getInt1Ty(
I.getContext()), DstVecTy->getNumElements()));
2469 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2471 if (NewCost > OldCost)
2480 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2481 SI0FOp->getFastMathFlags());
2483 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2488 replaceValue(
I, *NewSel);
2494bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2496 ArrayRef<int> OldMask;
2505 if (!C0 || (IsBinaryShuffle && !C1))
2512 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2515 if (IsBinaryShuffle) {
2516 if (C0->getSrcTy() != C1->getSrcTy())
2519 if (Opcode != C1->getOpcode()) {
2521 Opcode = Instruction::SExt;
2530 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2533 unsigned NumSrcElts = CastSrcTy->getNumElements();
2534 unsigned NumDstElts = CastDstTy->getNumElements();
2535 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2536 "Only bitcasts expected to alter src/dst element counts");
2540 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2541 (NumDstElts % NumSrcElts) != 0)
2544 SmallVector<int, 16> NewMask;
2545 if (NumSrcElts >= NumDstElts) {
2548 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2549 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2554 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2555 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2560 auto *NewShuffleDstTy =
2569 if (IsBinaryShuffle)
2584 if (IsBinaryShuffle) {
2594 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2596 if (NewCost > OldCost)
2600 if (IsBinaryShuffle)
2610 NewInst->copyIRFlags(C0);
2611 if (IsBinaryShuffle)
2612 NewInst->andIRFlags(C1);
2616 replaceValue(
I, *Cast);
2626bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2627 ArrayRef<int> OuterMask;
2628 Value *OuterV0, *OuterV1;
2633 ArrayRef<int> InnerMask0, InnerMask1;
2634 Value *X0, *X1, *Y0, *Y1;
2639 if (!Match0 && !Match1)
2644 SmallVector<int, 16> PoisonMask1;
2649 InnerMask1 = PoisonMask1;
2653 X0 = Match0 ? X0 : OuterV0;
2654 Y0 = Match0 ? Y0 : OuterV0;
2655 X1 = Match1 ? X1 : OuterV1;
2656 Y1 = Match1 ? Y1 : OuterV1;
2660 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2664 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2665 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2670 SmallVector<int, 16> NewMask(OuterMask);
2671 Value *NewX =
nullptr, *NewY =
nullptr;
2672 for (
int &M : NewMask) {
2673 Value *Src =
nullptr;
2674 if (0 <= M && M < (
int)NumImmElts) {
2678 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2679 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2681 }
else if (M >= (
int)NumImmElts) {
2686 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2687 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2691 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2700 if (!NewX || NewX == Src) {
2704 if (!NewY || NewY == Src) {
2720 replaceValue(
I, *NewX);
2737 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2743 nullptr, {NewX, NewY});
2745 NewCost += InnerCost0;
2747 NewCost += InnerCost1;
2750 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2752 if (NewCost > OldCost)
2756 replaceValue(
I, *Shuf);
2762bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
2764 ArrayRef<int> OldMask;
2775 if (IID != II1->getIntrinsicID())
2780 if (!ShuffleDstTy || !II0Ty)
2786 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2788 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2795 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
2799 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
2801 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2805 ShuffleDstTy->getNumElements());
2811 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
2815 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2818 if (NewCost > OldCost)
2822 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2827 II1->getArgOperand(
I), OldMask);
2835 NewInst->copyIRFlags(II0);
2836 NewInst->andIRFlags(II1);
2839 replaceValue(
I, *NewIntrinsic);
2849 int M = SV->getMaskValue(Lane);
2852 if (
static_cast<unsigned>(M) < NumElts) {
2853 U = &SV->getOperandUse(0);
2856 U = &SV->getOperandUse(1);
2867 auto [U, Lane] = IL;
2881 unsigned NumElts = Ty->getNumElements();
2882 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2888 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2894 unsigned NumSlices = Item.
size() / NumElts;
2899 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2900 Use *SliceV = Item[Slice * NumElts].first;
2901 if (!SliceV || SliceV->get()->
getType() != Ty)
2903 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2904 auto [V, Lane] = Item[Slice * NumElts + Elt];
2905 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2918 auto [FrontU, FrontLane] = Item.
front();
2920 if (IdentityLeafs.
contains(FrontU)) {
2921 return FrontU->get();
2925 return Builder.CreateShuffleVector(FrontU->get(), Mask);
2927 if (ConcatLeafs.
contains(FrontU)) {
2931 for (
unsigned S = 0; S < Values.
size(); ++S)
2932 Values[S] = Item[S * NumElts].first->get();
2934 while (Values.
size() > 1) {
2937 std::iota(Mask.begin(), Mask.end(), 0);
2939 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2941 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
2949 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2951 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
2954 Ops[Idx] =
II->getOperand(Idx);
2958 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2963 for (
const auto &Lane : Item)
2976 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
2986 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
2991 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3005bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3007 if (!Ty ||
I.use_empty())
3011 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3016 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3017 unsigned NumVisited = 0;
3019 while (!Worklist.
empty()) {
3024 auto [FrontU, FrontLane] = Item.
front();
3032 return X->getType() ==
Y->getType() &&
3037 if (FrontLane == 0 &&
3039 Ty->getNumElements() &&
3042 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3043 E.value().second == (int)
E.index());
3045 IdentityLeafs.
insert(FrontU);
3050 C &&
C->getSplatValue() &&
3058 SplatLeafs.
insert(FrontU);
3063 auto [FrontU, FrontLane] = Item.
front();
3064 auto [
U, Lane] = IL;
3065 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3067 SplatLeafs.
insert(FrontU);
3073 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3077 Value *
V = IL.first->get();
3083 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3086 if (CI->getSrcTy()->getScalarType() !=
3091 SI->getOperand(0)->getType() !=
3098 II->getIntrinsicID() ==
3100 !
II->hasOperandBundles());
3107 BO && BO->isIntDivRem())
3112 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3113 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3120 if (DstTy && SrcTy &&
3121 SrcTy->getNumElements() == DstTy->getNumElements()) {
3132 !
II->hasOperandBundles()) {
3133 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3152 ConcatLeafs.
insert(FrontU);
3159 if (NumVisited <= 1)
3162 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3168 ConcatLeafs, Builder, &
TTI);
3169 replaceValue(
I, *V);
3176bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3180 switch (
II->getIntrinsicID()) {
3181 case Intrinsic::vector_reduce_add:
3182 case Intrinsic::vector_reduce_mul:
3183 case Intrinsic::vector_reduce_and:
3184 case Intrinsic::vector_reduce_or:
3185 case Intrinsic::vector_reduce_xor:
3186 case Intrinsic::vector_reduce_smin:
3187 case Intrinsic::vector_reduce_smax:
3188 case Intrinsic::vector_reduce_umin:
3189 case Intrinsic::vector_reduce_umax:
3198 std::queue<Value *> Worklist;
3199 SmallPtrSet<Value *, 4> Visited;
3200 ShuffleVectorInst *Shuffle =
nullptr;
3204 while (!Worklist.empty()) {
3205 Value *CV = Worklist.front();
3217 if (CI->isBinaryOp()) {
3218 for (
auto *
Op : CI->operand_values())
3222 if (Shuffle && Shuffle != SV)
3239 for (
auto *V : Visited)
3240 for (
auto *U :
V->users())
3241 if (!Visited.contains(U) && U != &
I)
3244 FixedVectorType *VecType =
3248 FixedVectorType *ShuffleInputType =
3250 if (!ShuffleInputType)
3256 SmallVector<int> ConcatMask;
3258 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3259 bool UsesSecondVec =
3260 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3267 ShuffleInputType, ConcatMask,
CostKind);
3269 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3271 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3273 bool MadeChanges =
false;
3274 if (NewCost < OldCost) {
3278 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3279 replaceValue(*Shuffle, *NewShuffle);
3285 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3331bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3333 std::queue<Value *> InstWorklist;
3337 std::optional<unsigned int> CommonCallOp = std::nullopt;
3338 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3340 bool IsFirstCallOrBinInst =
true;
3341 bool ShouldBeCallOrBinInst =
true;
3347 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3357 int64_t
VecSize = FVT->getNumElements();
3363 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3364 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3374 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3375 Cur = (Cur + 1) / 2, --
Mask) {
3377 ExpectedParityMask |= (1ll <<
Mask);
3380 InstWorklist.push(VecOpEE);
3382 while (!InstWorklist.empty()) {
3383 Value *CI = InstWorklist.front();
3387 if (!ShouldBeCallOrBinInst)
3390 if (!IsFirstCallOrBinInst &&
3391 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3396 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3398 IsFirstCallOrBinInst =
false;
3401 CommonCallOp =
II->getIntrinsicID();
3402 if (
II->getIntrinsicID() != *CommonCallOp)
3405 switch (
II->getIntrinsicID()) {
3406 case Intrinsic::umin:
3407 case Intrinsic::umax:
3408 case Intrinsic::smin:
3409 case Intrinsic::smax: {
3410 auto *Op0 =
II->getOperand(0);
3411 auto *Op1 =
II->getOperand(1);
3419 ShouldBeCallOrBinInst ^= 1;
3421 IntrinsicCostAttributes ICA(
3422 *CommonCallOp,
II->getType(),
3423 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3430 InstWorklist.push(PrevVecV[1]);
3431 InstWorklist.push(PrevVecV[0]);
3435 if (!ShouldBeCallOrBinInst)
3438 if (!IsFirstCallOrBinInst &&
3439 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3442 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3444 IsFirstCallOrBinInst =
false;
3452 switch (*CommonBinOp) {
3453 case BinaryOperator::Add:
3454 case BinaryOperator::Mul:
3455 case BinaryOperator::Or:
3456 case BinaryOperator::And:
3457 case BinaryOperator::Xor: {
3467 ShouldBeCallOrBinInst ^= 1;
3474 InstWorklist.push(PrevVecV[1]);
3475 InstWorklist.push(PrevVecV[0]);
3479 if (ShouldBeCallOrBinInst ||
3480 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3483 if (SVInst != PrevVecV[1])
3486 ArrayRef<int> CurMask;
3492 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
3493 if (Mask < ShuffleMaskHalf &&
3494 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3496 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3501 ShuffleMaskHalf *= 2;
3502 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3503 ExpectedParityMask >>= 1;
3506 SVInst->getType(), SVInst->getType(),
3510 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3513 ShouldBeCallOrBinInst ^= 1;
3520 if (ShouldBeCallOrBinInst)
3523 assert(VecSize != -1 &&
"Expected Match for Vector Size");
3525 Value *FinalVecV = PrevVecV[0];
3537 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
3540 if (NewCost >= OrigCost)
3543 auto *ReducedResult =
3545 replaceValue(
I, *ReducedResult);
3554bool VectorCombine::foldCastFromReductions(Instruction &
I) {
3559 bool TruncOnly =
false;
3562 case Intrinsic::vector_reduce_add:
3563 case Intrinsic::vector_reduce_mul:
3566 case Intrinsic::vector_reduce_and:
3567 case Intrinsic::vector_reduce_or:
3568 case Intrinsic::vector_reduce_xor:
3575 Value *ReductionSrc =
I.getOperand(0);
3587 Type *ResultTy =
I.getType();
3590 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
3600 if (OldCost <= NewCost || !NewCost.
isValid())
3604 II->getIntrinsicID(), {Src});
3606 replaceValue(
I, *NewCast);
3615 constexpr unsigned MaxVisited = 32;
3618 bool FoundReduction =
false;
3621 while (!WorkList.
empty()) {
3623 for (
User *U :
I->users()) {
3625 if (!UI || !Visited.
insert(UI).second)
3627 if (Visited.
size() > MaxVisited)
3633 switch (
II->getIntrinsicID()) {
3634 case Intrinsic::vector_reduce_add:
3635 case Intrinsic::vector_reduce_mul:
3636 case Intrinsic::vector_reduce_and:
3637 case Intrinsic::vector_reduce_or:
3638 case Intrinsic::vector_reduce_xor:
3639 case Intrinsic::vector_reduce_smin:
3640 case Intrinsic::vector_reduce_smax:
3641 case Intrinsic::vector_reduce_umin:
3642 case Intrinsic::vector_reduce_umax:
3643 FoundReduction =
true;
3656 return FoundReduction;
3669bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
3674 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
3682 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
3684 if (!
I ||
I->getOperand(0)->getType() != VT)
3686 return any_of(
I->users(), [&](User *U) {
3687 return U != Op0 && U != Op1 &&
3688 !(isa<ShuffleVectorInst>(U) &&
3689 (InputShuffles.contains(cast<Instruction>(U)) ||
3690 isInstructionTriviallyDead(cast<Instruction>(U))));
3693 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
3694 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
3702 for (
auto *U :
I->users()) {
3704 if (!SV || SV->getType() != VT)
3706 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
3707 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
3714 if (!collectShuffles(Op0) || !collectShuffles(Op1))
3718 if (FromReduction && Shuffles.
size() > 1)
3723 if (!FromReduction) {
3724 for (ShuffleVectorInst *SV : Shuffles) {
3725 for (
auto *U : SV->users()) {
3728 Shuffles.push_back(SSV);
3740 int MaxV1Elt = 0, MaxV2Elt = 0;
3741 unsigned NumElts = VT->getNumElements();
3742 for (ShuffleVectorInst *SVN : Shuffles) {
3743 SmallVector<int>
Mask;
3744 SVN->getShuffleMask(Mask);
3748 Value *SVOp0 = SVN->getOperand(0);
3749 Value *SVOp1 = SVN->getOperand(1);
3754 for (
int &Elem : Mask) {
3760 if (SVOp0 == Op1 && SVOp1 == Op0) {
3764 if (SVOp0 != Op0 || SVOp1 != Op1)
3770 SmallVector<int> ReconstructMask;
3771 for (
unsigned I = 0;
I <
Mask.size();
I++) {
3774 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
3775 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
3776 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
3777 return Mask[
I] ==
A.first;
3786 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
3787 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
3788 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
3802 sort(ReconstructMask);
3803 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
3811 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
3812 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
3824 if (InputShuffles.contains(SSV))
3826 return SV->getMaskValue(M);
3834 std::pair<int, int>
Y) {
3835 int MXA = GetBaseMaskValue(
A,
X.first);
3836 int MYA = GetBaseMaskValue(
A,
Y.first);
3839 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3840 return SortBase(SVI0A,
A,
B);
3842 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3843 return SortBase(SVI1A,
A,
B);
3848 for (
const auto &Mask : OrigReconstructMasks) {
3849 SmallVector<int> ReconstructMask;
3850 for (
int M : Mask) {
3852 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
3853 assert(It !=
V.end() &&
"Expected all entries in Mask");
3854 return std::distance(
V.begin(), It);
3858 else if (M <
static_cast<int>(NumElts)) {
3859 ReconstructMask.
push_back(FindIndex(V1, M));
3861 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
3864 ReconstructMasks.
push_back(std::move(ReconstructMask));
3869 SmallVector<int> V1A, V1B, V2A, V2B;
3870 for (
unsigned I = 0;
I < V1.
size();
I++) {
3871 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
3872 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
3874 for (
unsigned I = 0;
I < V2.
size();
I++) {
3875 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
3876 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
3878 while (V1A.
size() < NumElts) {
3882 while (V2A.
size() < NumElts) {
3894 VT, VT, SV->getShuffleMask(),
CostKind);
3901 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
3902 unsigned MaxVectorSize =
3904 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
3905 if (MaxElementsInVector == 0)
3914 std::set<SmallVector<int, 4>> UniqueShuffles;
3919 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
3920 if (NumFullVectors < 2)
3921 return C + ShuffleCost;
3922 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
3923 unsigned NumUniqueGroups = 0;
3924 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
3927 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
3928 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
3929 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
3930 if (UniqueShuffles.insert(SubShuffle).second)
3931 NumUniqueGroups += 1;
3933 return C + ShuffleCost * NumUniqueGroups / NumGroups;
3939 SmallVector<int, 16>
Mask;
3940 SV->getShuffleMask(Mask);
3941 return AddShuffleMaskAdjustedCost(
C, Mask);
3944 auto AllShufflesHaveSameOperands =
3945 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
3946 if (InputShuffles.size() < 2)
3948 ShuffleVectorInst *FirstSV =
3955 std::next(InputShuffles.begin()), InputShuffles.end(),
3956 [&](Instruction *
I) {
3957 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
3958 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
3967 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
3969 if (AllShufflesHaveSameOperands(InputShuffles)) {
3970 UniqueShuffles.clear();
3971 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3974 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3980 FixedVectorType *Op0SmallVT =
3982 FixedVectorType *Op1SmallVT =
3987 UniqueShuffles.clear();
3988 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
3990 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
3992 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
3995 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
3997 <<
" vs CostAfter: " << CostAfter <<
"\n");
3998 if (CostBefore < CostAfter ||
4009 if (InputShuffles.contains(SSV))
4011 return SV->getOperand(
Op);
4015 GetShuffleOperand(SVI0A, 1), V1A);
4018 GetShuffleOperand(SVI0B, 1), V1B);
4021 GetShuffleOperand(SVI1A, 1), V2A);
4024 GetShuffleOperand(SVI1B, 1), V2B);
4029 I->copyIRFlags(Op0,
true);
4034 I->copyIRFlags(Op1,
true);
4036 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
4039 replaceValue(*Shuffles[S], *NSV,
false);
4042 Worklist.pushValue(NSV0A);
4043 Worklist.pushValue(NSV0B);
4044 Worklist.pushValue(NSV1A);
4045 Worklist.pushValue(NSV1B);
4055bool VectorCombine::shrinkType(Instruction &
I) {
4056 Value *ZExted, *OtherOperand;
4062 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
4066 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4068 if (
I.getOpcode() == Instruction::LShr) {
4085 Instruction::ZExt, BigTy, SmallTy,
4086 TargetTransformInfo::CastContextHint::None,
CostKind);
4091 for (User *U : ZExtOperand->
users()) {
4098 ShrinkCost += ZExtCost;
4113 ShrinkCost += ZExtCost;
4120 Instruction::Trunc, SmallTy, BigTy,
4121 TargetTransformInfo::CastContextHint::None,
CostKind);
4126 if (ShrinkCost > CurrentCost)
4130 Value *Op0 = ZExted;
4133 if (
I.getOperand(0) == OtherOperand)
4140 replaceValue(
I, *NewZExtr);
4146bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
4147 Value *DstVec, *SrcVec;
4148 uint64_t ExtIdx, InsIdx;
4158 if (!DstVecTy || !SrcVecTy ||
4159 SrcVecTy->getElementType() != DstVecTy->getElementType())
4162 unsigned NumDstElts = DstVecTy->getNumElements();
4163 unsigned NumSrcElts = SrcVecTy->getNumElements();
4164 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4171 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4172 bool IsExtIdxInBounds = ExtIdx < NumDstElts;
4174 if (NeedDstSrcSwap) {
4176 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4179 Mask[InsIdx] = ExtIdx;
4183 std::iota(
Mask.begin(),
Mask.end(), 0);
4184 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4185 Mask[InsIdx] = NumDstElts;
4187 Mask[InsIdx] = ExtIdx + NumDstElts;
4200 SmallVector<int> ExtToVecMask;
4201 if (!NeedExpOrNarrow) {
4206 nullptr, {DstVec, SrcVec});
4212 if (IsExtIdxInBounds)
4213 ExtToVecMask[ExtIdx] = ExtIdx;
4215 ExtToVecMask[0] = ExtIdx;
4218 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4222 if (!
Ext->hasOneUse())
4225 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4226 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4229 if (OldCost < NewCost)
4232 if (NeedExpOrNarrow) {
4233 if (!NeedDstSrcSwap)
4246 replaceValue(
I, *Shuf);
4255bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
4256 const APInt *SplatVal0, *SplatVal1;
4266 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4267 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4276 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4277 << *
I.getType() <<
" is too high.\n");
4281 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4282 NewSplatVal <<= Width;
4283 NewSplatVal |= SplatVal0->
zext(Width * 2);
4285 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4293bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
4295 if (!OldLoad || !OldLoad->isSimple())
4302 unsigned const OldNumElements = OldLoadTy->getNumElements();
4308 using IndexRange = std::pair<int, int>;
4309 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4310 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4311 for (llvm::Use &Use :
I.uses()) {
4313 User *Shuffle =
Use.getUser();
4318 return std::nullopt;
4325 for (
int Index : Mask) {
4326 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4327 OutputRange.first = std::min(Index, OutputRange.first);
4328 OutputRange.second = std::max(Index, OutputRange.second);
4333 if (OutputRange.second < OutputRange.first)
4334 return std::nullopt;
4340 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4341 unsigned const NewNumElements = Indices->second + 1u;
4345 if (NewNumElements < OldNumElements) {
4350 Type *ElemTy = OldLoadTy->getElementType();
4352 Value *PtrOp = OldLoad->getPointerOperand();
4355 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4356 OldLoad->getPointerAddressSpace(),
CostKind);
4359 OldLoad->getPointerAddressSpace(),
CostKind);
4361 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4363 unsigned const MaxIndex = NewNumElements * 2u;
4365 for (llvm::Use &Use :
I.uses()) {
4367 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
4373 for (
int Index : OldMask) {
4374 if (Index >=
static_cast<int>(MaxIndex))
4388 dbgs() <<
"Found a load used only by shufflevector instructions: "
4389 <<
I <<
"\n OldCost: " << OldCost
4390 <<
" vs NewCost: " << NewCost <<
"\n");
4392 if (OldCost < NewCost || !NewCost.
isValid())
4398 NewLoad->copyMetadata(
I);
4401 for (UseEntry &Use : NewUses) {
4402 ShuffleVectorInst *Shuffle =
Use.first;
4403 std::vector<int> &NewMask =
Use.second;
4410 replaceValue(*Shuffle, *NewShuffle,
false);
4423bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
4425 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4429 ArrayRef<int> Mask0;
4430 ArrayRef<int> Mask1;
4443 auto const InputNumElements = InputVT->getNumElements();
4445 if (InputNumElements >= ResultVT->getNumElements())
4450 SmallVector<int, 16> NewMask;
4453 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
4454 if (
M0 >= 0 &&
M1 >= 0)
4456 else if (
M0 == -1 &&
M1 == -1)
4469 int MaskOffset = NewMask[0
u];
4470 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
4473 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
4487 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4490 if (NewCost > OldCost)
4502 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
4504 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
4510 replaceValue(*Phi, *NewShuf1);
4516bool VectorCombine::run() {
4530 auto Opcode =
I.getOpcode();
4538 if (IsFixedVectorType) {
4540 case Instruction::InsertElement:
4541 if (vectorizeLoadInsert(
I))
4544 case Instruction::ShuffleVector:
4545 if (widenSubvectorLoad(
I))
4556 if (scalarizeOpOrCmp(
I))
4558 if (scalarizeLoadExtract(
I))
4560 if (scalarizeExtExtract(
I))
4562 if (scalarizeVPIntrinsic(
I))
4564 if (foldInterleaveIntrinsics(
I))
4568 if (Opcode == Instruction::Store)
4569 if (foldSingleElementStore(
I))
4573 if (TryEarlyFoldsOnly)
4580 if (IsFixedVectorType) {
4582 case Instruction::InsertElement:
4583 if (foldInsExtFNeg(
I))
4585 if (foldInsExtBinop(
I))
4587 if (foldInsExtVectorToShuffle(
I))
4590 case Instruction::ShuffleVector:
4591 if (foldPermuteOfBinops(
I))
4593 if (foldShuffleOfBinops(
I))
4595 if (foldShuffleOfSelects(
I))
4597 if (foldShuffleOfCastops(
I))
4599 if (foldShuffleOfShuffles(
I))
4601 if (foldShuffleOfIntrinsics(
I))
4603 if (foldSelectShuffle(
I))
4605 if (foldShuffleToIdentity(
I))
4608 case Instruction::Load:
4609 if (shrinkLoadForShuffles(
I))
4612 case Instruction::BitCast:
4613 if (foldBitcastShuffle(
I))
4616 case Instruction::And:
4617 case Instruction::Or:
4618 case Instruction::Xor:
4619 if (foldBitOpOfCastops(
I))
4621 if (foldBitOpOfCastConstant(
I))
4624 case Instruction::PHI:
4625 if (shrinkPhiOfShuffles(
I))
4635 case Instruction::Call:
4636 if (foldShuffleFromReductions(
I))
4638 if (foldCastFromReductions(
I))
4641 case Instruction::ExtractElement:
4642 if (foldShuffleChainsToReduce(
I))
4645 case Instruction::ICmp:
4646 case Instruction::FCmp:
4647 if (foldExtractExtract(
I))
4650 case Instruction::Or:
4651 if (foldConcatOfBoolMasks(
I))
4656 if (foldExtractExtract(
I))
4658 if (foldExtractedCmps(
I))
4660 if (foldBinopOfReductions(
I))
4669 bool MadeChange =
false;
4670 for (BasicBlock &BB :
F) {
4682 if (!
I->isDebugOrPseudoInst())
4683 MadeChange |= FoldInst(*
I);
4690 while (!Worklist.isEmpty()) {
4700 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.