Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpPredicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
102 SmallVector<Instruction *, 2> Extracts;
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. Its first operand is the
144 // scalar PHI, and the second operand is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 Value *newPHIUser = InsertNewInstWith(
154 scalarPHI, Op, B0), B0->getIterator());
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 } else {
157 // Scalarize PHI input:
158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159 // Insert the new instruction into the predecessor basic block.
160 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161 BasicBlock::iterator InsertPos;
162 if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 } else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168 InsertNewInstWith(newEI, InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174 for (auto *E : Extracts) {
175 replaceInstUsesWith(*E, scalarPHI);
176 // Add old extract to worklist for DCE.
178 }
179
180 return &EI;
181}
182
183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
188 return nullptr;
189
190 ElementCount NumElts =
191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192 Type *DestTy = Ext.getType();
193 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194 bool IsBigEndian = DL.isBigEndian();
195
196 // If we are casting an integer to vector and extracting a portion, that is
197 // a shift-right and truncate.
198 if (X->getType()->isIntegerTy()) {
199 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200 "Expected fixed vector type for bitcast from scalar integer");
201
202 // Big endian requires adjusting the extract index since MSB is at index 0.
203 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205 if (IsBigEndian)
206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207 unsigned ShiftAmountC = ExtIndexC * DestWidth;
208 if ((!ShiftAmountC ||
209 isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
210 Ext.getVectorOperand()->hasOneUse()) {
211 if (ShiftAmountC)
212 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
213 if (DestTy->isFloatingPointTy()) {
214 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
215 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
216 return new BitCastInst(Trunc, DestTy);
217 }
218 return new TruncInst(X, DestTy);
219 }
220 }
221
222 if (!X->getType()->isVectorTy())
223 return nullptr;
224
225 // If this extractelement is using a bitcast from a vector of the same number
226 // of elements, see if we can find the source element from the source vector:
227 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228 auto *SrcTy = cast<VectorType>(X->getType());
229 ElementCount NumSrcElts = SrcTy->getElementCount();
230 if (NumSrcElts == NumElts)
231 if (Value *Elt = findScalarElement(X, ExtIndexC))
232 return new BitCastInst(Elt, DestTy);
233
234 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235 "Src and Dst must be the same sort of vector type");
236
237 // If the source elements are wider than the destination, try to shift and
238 // truncate a subset of scalar bits of an insert op.
239 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240 Value *Scalar;
241 Value *Vec;
242 uint64_t InsIndexC;
243 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
244 m_ConstantInt(InsIndexC))))
245 return nullptr;
246
247 // The extract must be from the subset of vector elements that we inserted
248 // into. Example: if we inserted element 1 of a <2 x i64> and we are
249 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250 // of elements 4-7 of the bitcasted vector.
251 unsigned NarrowingRatio =
252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254 if (ExtIndexC / NarrowingRatio != InsIndexC) {
255 // Remove insertelement, if we don't use the inserted element.
256 // extractelement (bitcast (insertelement (Vec, b)), a) ->
257 // extractelement (bitcast (Vec), a)
258 // FIXME: this should be removed to SimplifyDemandedVectorElts,
259 // once scale vectors are supported.
260 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
262 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
263 }
264 return nullptr;
265 }
266
267 // We are extracting part of the original scalar. How that scalar is
268 // inserted into the vector depends on the endian-ness. Example:
269 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270 // +--+--+--+--+--+--+--+--+
271 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
272 // extelt <4 x i16> V', 3: | |S2|S3|
273 // +--+--+--+--+--+--+--+--+
274 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
275 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
276 // In this example, we must right-shift little-endian. Big-endian is just a
277 // truncate.
278 unsigned Chunk = ExtIndexC % NarrowingRatio;
279 if (IsBigEndian)
280 Chunk = NarrowingRatio - 1 - Chunk;
281
282 // Bail out if this is an FP vector to FP vector sequence. That would take
283 // more instructions than we started with unless there is no shift, and it
284 // may not be handled as well in the backend.
285 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286 bool NeedDestBitcast = DestTy->isFloatingPointTy();
287 if (NeedSrcBitcast && NeedDestBitcast)
288 return nullptr;
289
290 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291 unsigned ShAmt = Chunk * DestWidth;
292
293 // TODO: This limitation is more strict than necessary. We could sum the
294 // number of new instructions and subtract the number eliminated to know if
295 // we can proceed.
296 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
297 if (NeedSrcBitcast || NeedDestBitcast)
298 return nullptr;
299
300 if (NeedSrcBitcast) {
301 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
302 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
303 }
304
305 if (ShAmt) {
306 // Bail out if we could end with more instructions than we started with.
307 if (!Ext.getVectorOperand()->hasOneUse())
308 return nullptr;
309 Scalar = Builder.CreateLShr(Scalar, ShAmt);
310 }
311
312 if (NeedDestBitcast) {
313 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
314 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
315 }
316 return new TruncInst(Scalar, DestTy);
317 }
318
319 return nullptr;
320}
321
322/// Find elements of V demanded by UserInstr. If returns false, we were not able
323/// to determine all elements.
325 APInt &UnionUsedElts) {
326 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
327
328 switch (UserInstr->getOpcode()) {
329 case Instruction::ExtractElement: {
331 assert(EEI->getVectorOperand() == V);
333 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
334 UnionUsedElts.setBit(EEIIndexC->getZExtValue());
335 return true;
336 }
337 break;
338 }
339 case Instruction::ShuffleVector: {
340 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
341 unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
343
344 for (auto I : llvm::seq(MaskNumElts)) {
345 unsigned MaskVal = Shuffle->getMaskValue(I);
346 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
347 continue;
348 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
349 UnionUsedElts.setBit(MaskVal);
350 if (Shuffle->getOperand(1) == V &&
351 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
352 UnionUsedElts.setBit(MaskVal - VWidth);
353 }
354 return true;
355 }
356 default:
357 break;
358 }
359
360 return false;
361}
362
363/// Find union of elements of V demanded by all its users.
364/// If it is known by querying findDemandedEltsBySingleUser that
365/// no user demands an element of V, then the corresponding bit
366/// remains unset in the returned value.
368 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
369
370 APInt UnionUsedElts(VWidth, 0);
371 for (const Use &U : V->uses()) {
372 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
373 if (!findDemandedEltsBySingleUser(V, I, UnionUsedElts))
374 return APInt::getAllOnes(VWidth);
375 } else {
376 UnionUsedElts = APInt::getAllOnes(VWidth);
377 break;
378 }
379
380 if (UnionUsedElts.isAllOnes())
381 break;
382 }
383
384 return UnionUsedElts;
385}
386
387/// Given a constant index for a extractelement or insertelement instruction,
388/// return it with the canonical type if it isn't already canonical. We
389/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
390/// matter, we just want a consistent type to simplify CSE.
392 const unsigned IndexBW = IndexC->getBitWidth();
393 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
394 return nullptr;
395 return ConstantInt::get(IndexC->getContext(),
396 IndexC->getValue().zextOrTrunc(64));
397}
398
400 Value *SrcVec = EI.getVectorOperand();
401 Value *Index = EI.getIndexOperand();
402 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
403 SQ.getWithInstruction(&EI)))
404 return replaceInstUsesWith(EI, V);
405
406 // extractelt (select %x, %vec1, %vec2), %const ->
407 // select %x, %vec1[%const], %vec2[%const]
408 // TODO: Support constant folding of multiple select operands:
409 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
410 // If the extractelement will for instance try to do out of bounds accesses
411 // because of the values of %c1 and/or %c2, the sequence could be optimized
412 // early. This is currently not possible because constant folding will reach
413 // an unreachable assertion if it doesn't find a constant operand.
415 if (SI->getCondition()->getType()->isIntegerTy() &&
417 if (Instruction *R = FoldOpIntoSelect(EI, SI))
418 return R;
419
420 // If extracting a specified index from the vector, see if we can recursively
421 // find a previously computed scalar that was inserted into the vector.
422 auto *IndexC = dyn_cast<ConstantInt>(Index);
423 bool HasKnownValidIndex = false;
424 if (IndexC) {
425 // Canonicalize type of constant indices to i64 to simplify CSE
426 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
427 return replaceOperand(EI, 1, NewIdx);
428
430 unsigned NumElts = EC.getKnownMinValue();
431 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
432
434 Intrinsic::ID IID = II->getIntrinsicID();
435 // Index needs to be lower than the minimum size of the vector, because
436 // for scalable vector, the vector size is known at run time.
437 if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {
438 Type *Ty = EI.getType();
439 unsigned BitWidth = Ty->getIntegerBitWidth();
440 Value *Idx;
441 // Return index when its value does not exceed the allowed limit
442 // for the element type of the vector, otherwise return undefined.
443 if (IndexC->getValue().getActiveBits() <= BitWidth)
444 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
445 else
446 Idx = PoisonValue::get(Ty);
447 return replaceInstUsesWith(EI, Idx);
448 }
449 }
450
451 // InstSimplify should handle cases where the index is invalid.
452 // For fixed-length vector, it's invalid to extract out-of-range element.
453 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
454 return nullptr;
455
456 if (Instruction *I = foldBitcastExtElt(EI))
457 return I;
458
459 // If there's a vector PHI feeding a scalar use through this extractelement
460 // instruction, try to scalarize the PHI.
461 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
462 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
463 return ScalarPHI;
464 }
465
466 // If SrcVec is a subvector starting at index 0, extract from the
467 // wider source vector
468 Value *V;
469 if (match(SrcVec,
471 return ExtractElementInst::Create(V, Index);
472
473 // TODO come up with a n-ary matcher that subsumes both unary and
474 // binary matchers.
475 UnaryOperator *UO;
476 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
477 // extelt (unop X), Index --> unop (extelt X, Index)
478 Value *X = UO->getOperand(0);
479 Value *E = Builder.CreateExtractElement(X, Index);
481 }
482
483 // If the binop is not speculatable, we cannot hoist the extractelement if
484 // it may make the operand poison.
485 BinaryOperator *BO;
486 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&
487 (HasKnownValidIndex ||
489 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
490 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
491 Value *E0 = Builder.CreateExtractElement(X, Index);
492 Value *E1 = Builder.CreateExtractElement(Y, Index);
493 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
494 }
495
496 Value *X, *Y;
497 CmpPredicate Pred;
498 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
499 cheapToScalarize(SrcVec, Index)) {
500 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
501 Value *E0 = Builder.CreateExtractElement(X, Index);
502 Value *E1 = Builder.CreateExtractElement(Y, Index);
503 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
504 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
505 SrcCmpInst);
506 }
507
508 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
509 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
510 // instsimplify already handled the case where the indices are constants
511 // and equal by value, if both are constants, they must not be the same
512 // value, extract from the pre-inserted value instead.
513 if (isa<Constant>(IE->getOperand(2)) && IndexC)
514 return replaceOperand(EI, 0, IE->getOperand(0));
515 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
516 auto *VecType = cast<VectorType>(GEP->getType());
517 ElementCount EC = VecType->getElementCount();
518 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
519 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
520 // Find out why we have a vector result - these are a few examples:
521 // 1. We have a scalar pointer and a vector of indices, or
522 // 2. We have a vector of pointers and a scalar index, or
523 // 3. We have a vector of pointers and a vector of indices, etc.
524 // Here we only consider combining when there is exactly one vector
525 // operand, since the optimization is less obviously a win due to
526 // needing more than one extractelements.
527
528 unsigned VectorOps =
529 llvm::count_if(GEP->operands(), [](const Value *V) {
530 return isa<VectorType>(V->getType());
531 });
532 if (VectorOps == 1) {
533 Value *NewPtr = GEP->getPointerOperand();
534 if (isa<VectorType>(NewPtr->getType()))
535 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
536
538 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
539 Value *Op = GEP->getOperand(I);
540 if (isa<VectorType>(Op->getType()))
541 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
542 else
543 NewOps.push_back(Op);
544 }
545
547 GEP->getSourceElementType(), NewPtr, NewOps);
548 NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
549 return NewGEP;
550 }
551 }
552 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
553 int SplatIndex = getSplatIndex(SVI->getShuffleMask());
554 // We know the all-0 splat must be reading from the first operand, even
555 // in the case of scalable vectors (vscale is always > 0).
556 if (SplatIndex == 0)
557 return ExtractElementInst::Create(SVI->getOperand(0),
558 Builder.getInt64(0));
559
560 if (isa<FixedVectorType>(SVI->getType())) {
561 std::optional<int> SrcIdx;
562 // getSplatIndex returns -1 to mean not-found.
563 if (SplatIndex != -1)
564 SrcIdx = SplatIndex;
565 else if (ConstantInt *CI = dyn_cast<ConstantInt>(Index))
566 SrcIdx = SVI->getMaskValue(CI->getZExtValue());
567
568 if (SrcIdx) {
569 Value *Src;
570 unsigned LHSWidth =
571 cast<FixedVectorType>(SVI->getOperand(0)->getType())
572 ->getNumElements();
573
574 if (*SrcIdx < 0)
576 if (*SrcIdx < (int)LHSWidth)
577 Src = SVI->getOperand(0);
578 else {
579 *SrcIdx -= LHSWidth;
580 Src = SVI->getOperand(1);
581 }
582 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
584 Src, ConstantInt::get(Int64Ty, *SrcIdx, false));
585 }
586 }
587 } else if (auto *CI = dyn_cast<CastInst>(I)) {
588 // Canonicalize extractelement(cast) -> cast(extractelement).
589 // Bitcasts can change the number of vector elements, and they cost
590 // nothing.
591 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
592 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
593 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
594 }
595 }
596 }
597
598 // Run demanded elements after other transforms as this can drop flags on
599 // binops. If there's two paths to the same final result, we prefer the
600 // one which doesn't force us to drop flags.
601 if (IndexC) {
603 unsigned NumElts = EC.getKnownMinValue();
604 // This instruction only demands the single element from the input vector.
605 // Skip for scalable type, the number of elements is unknown at
606 // compile-time.
607 if (!EC.isScalable() && NumElts != 1) {
608 // If the input vector has a single use, simplify it based on this use
609 // property.
610 if (SrcVec->hasOneUse()) {
611 APInt PoisonElts(NumElts, 0);
612 APInt DemandedElts(NumElts, 0);
613 DemandedElts.setBit(IndexC->getZExtValue());
614 if (Value *V =
615 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
616 return replaceOperand(EI, 0, V);
617 } else {
618 // If the input vector has multiple uses, simplify it based on a union
619 // of all elements used.
620 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
621 if (!DemandedElts.isAllOnes()) {
622 APInt PoisonElts(NumElts, 0);
624 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
625 true /* AllowMultipleUsers */)) {
626 if (V != SrcVec) {
627 Worklist.addValue(SrcVec);
628 SrcVec->replaceAllUsesWith(V);
629 return &EI;
630 }
631 }
632 }
633 }
634 }
635 }
636 return nullptr;
637}
638
639/// If V is a shuffle of values that ONLY returns elements from either LHS or
640/// RHS, return the shuffle mask and true. Otherwise, return false.
642 SmallVectorImpl<int> &Mask) {
643 assert(LHS->getType() == RHS->getType() &&
644 "Invalid CollectSingleShuffleElements");
645 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
646
647 if (match(V, m_Poison())) {
648 Mask.assign(NumElts, -1);
649 return true;
650 }
651
652 if (V == LHS) {
653 for (unsigned i = 0; i != NumElts; ++i)
654 Mask.push_back(i);
655 return true;
656 }
657
658 if (V == RHS) {
659 for (unsigned i = 0; i != NumElts; ++i)
660 Mask.push_back(i + NumElts);
661 return true;
662 }
663
665 // If this is an insert of an extract from some other vector, include it.
666 Value *VecOp = IEI->getOperand(0);
667 Value *ScalarOp = IEI->getOperand(1);
668 Value *IdxOp = IEI->getOperand(2);
669
670 if (!isa<ConstantInt>(IdxOp))
671 return false;
672 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
673
674 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
675 // We can handle this if the vector we are inserting into is
676 // transitively ok.
677 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
678 // If so, update the mask to reflect the inserted poison.
679 Mask[InsertedIdx] = -1;
680 return true;
681 }
682 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
683 if (isa<ConstantInt>(EI->getOperand(1))) {
684 unsigned ExtractedIdx =
685 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
686 unsigned NumLHSElts =
687 cast<FixedVectorType>(LHS->getType())->getNumElements();
688
689 // This must be extracting from either LHS or RHS.
690 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
691 // We can handle this if the vector we are inserting into is
692 // transitively ok.
693 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
694 // If so, update the mask to reflect the inserted value.
695 if (EI->getOperand(0) == LHS) {
696 Mask[InsertedIdx % NumElts] = ExtractedIdx;
697 } else {
698 assert(EI->getOperand(0) == RHS);
699 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
700 }
701 return true;
702 }
703 }
704 }
705 }
706 }
707
708 return false;
709}
710
711/// If we have insertion into a vector that is wider than the vector that we
712/// are extracting from, try to widen the source vector to allow a single
713/// shufflevector to replace one or more insert/extract pairs.
715 ExtractElementInst *ExtElt,
716 InstCombinerImpl &IC) {
717 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
718 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
719 unsigned NumInsElts = InsVecType->getNumElements();
720 unsigned NumExtElts = ExtVecType->getNumElements();
721
722 // The inserted-to vector must be wider than the extracted-from vector.
723 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
724 NumExtElts >= NumInsElts)
725 return false;
726
727 Value *ExtVecOp = ExtElt->getVectorOperand();
728 // Bail out on constant vectors.
729 if (isa<ConstantData>(ExtVecOp))
730 return false;
731
732 // Create a shuffle mask to widen the extended-from vector using poison
733 // values. The mask selects all of the values of the original vector followed
734 // by as many poison values as needed to create a vector of the same length
735 // as the inserted-to vector.
736 SmallVector<int, 16> ExtendMask;
737 for (unsigned i = 0; i < NumExtElts; ++i)
738 ExtendMask.push_back(i);
739 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
740 ExtendMask.push_back(-1);
741
742 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
743 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
744 ? ExtVecOpInst->getParent()
745 : ExtElt->getParent();
746
747 // TODO: This restriction matches the basic block check below when creating
748 // new extractelement instructions. If that limitation is removed, this one
749 // could also be removed. But for now, we just bail out to ensure that we
750 // will replace the extractelement instruction that is feeding our
751 // insertelement instruction. This allows the insertelement to then be
752 // replaced by a shufflevector. If the insertelement is not replaced, we can
753 // induce infinite looping because there's an optimization for extractelement
754 // that will delete our widening shuffle. This would trigger another attempt
755 // here to create that shuffle, and we spin forever.
756 if (InsertionBlock != InsElt->getParent())
757 return false;
758
759 // TODO: This restriction matches the check in visitInsertElementInst() and
760 // prevents an infinite loop caused by not turning the extract/insert pair
761 // into a shuffle. We really should not need either check, but we're lacking
762 // folds for shufflevectors because we're afraid to generate shuffle masks
763 // that the backend can't handle.
764 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
765 return false;
766
767 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
768
769 // Insert the new shuffle after the vector operand of the extract is defined
770 // (as long as it's not a PHI) or at the start of the basic block of the
771 // extract, so any subsequent extracts in the same basic block can use it.
772 // TODO: Insert before the earliest ExtractElementInst that is replaced.
773 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
774 WideVec->insertAfter(ExtVecOpInst->getIterator());
775 else
776 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
777
778 // Replace extracts from the original narrow vector with extracts from the new
779 // wide vector.
780 for (User *U : ExtVecOp->users()) {
782 if (!OldExt || OldExt->getParent() != WideVec->getParent())
783 continue;
784 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
785 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
786 IC.replaceInstUsesWith(*OldExt, NewExt);
787 // Add the old extracts to the worklist for DCE. We can't remove the
788 // extracts directly, because they may still be used by the calling code.
789 IC.addToWorklist(OldExt);
790 }
791
792 return true;
793}
794
795/// We are building a shuffle to create V, which is a sequence of insertelement,
796/// extractelement pairs. If PermittedRHS is set, then we must either use it or
797/// not rely on the second vector source. Return a std::pair containing the
798/// left and right vectors of the proposed shuffle (or 0), and set the Mask
799/// parameter as required.
800///
801/// Note: we intentionally don't try to fold earlier shuffles since they have
802/// often been chosen carefully to be efficiently implementable on the target.
803using ShuffleOps = std::pair<Value *, Value *>;
804
806 Value *PermittedRHS,
807 InstCombinerImpl &IC, bool &Rerun) {
808 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
809 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
810
811 if (match(V, m_Poison())) {
812 Mask.assign(NumElts, -1);
813 return std::make_pair(
814 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
815 }
816
818 Mask.assign(NumElts, 0);
819 return std::make_pair(V, nullptr);
820 }
821
823 // If this is an insert of an extract from some other vector, include it.
824 Value *VecOp = IEI->getOperand(0);
825 Value *ScalarOp = IEI->getOperand(1);
826 Value *IdxOp = IEI->getOperand(2);
827
829 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
830 unsigned ExtractedIdx =
831 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
832 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
833
834 // Either the extracted from or inserted into vector must be RHSVec,
835 // otherwise we'd end up with a shuffle of three inputs.
836 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
837 Value *RHS = EI->getOperand(0);
838 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
839 assert(LR.second == nullptr || LR.second == RHS);
840
841 if (LR.first->getType() != RHS->getType()) {
842 // Although we are giving up for now, see if we can create extracts
843 // that match the inserts for another round of combining.
844 if (replaceExtractElements(IEI, EI, IC))
845 Rerun = true;
846
847 // We tried our best, but we can't find anything compatible with RHS
848 // further up the chain. Return a trivial shuffle.
849 for (unsigned i = 0; i < NumElts; ++i)
850 Mask[i] = i;
851 return std::make_pair(V, nullptr);
852 }
853
854 unsigned NumLHSElts =
855 cast<FixedVectorType>(RHS->getType())->getNumElements();
856 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
857 return std::make_pair(LR.first, RHS);
858 }
859
860 if (VecOp == PermittedRHS) {
861 // We've gone as far as we can: anything on the other side of the
862 // extractelement will already have been converted into a shuffle.
863 unsigned NumLHSElts =
865 ->getNumElements();
866 for (unsigned i = 0; i != NumElts; ++i)
867 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
868 return std::make_pair(EI->getOperand(0), PermittedRHS);
869 }
870
871 // If this insertelement is a chain that comes from exactly these two
872 // vectors, return the vector and the effective shuffle.
873 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
874 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
875 Mask))
876 return std::make_pair(EI->getOperand(0), PermittedRHS);
877 }
878 }
879 }
880
881 // Otherwise, we can't do anything fancy. Return an identity vector.
882 for (unsigned i = 0; i != NumElts; ++i)
883 Mask.push_back(i);
884 return std::make_pair(V, nullptr);
885}
886
887/// Look for chain of insertvalue's that fully define an aggregate, and trace
888/// back the values inserted, see if they are all were extractvalue'd from
889/// the same source aggregate from the exact same element indexes.
890/// If they were, just reuse the source aggregate.
891/// This potentially deals with PHI indirections.
893 InsertValueInst &OrigIVI) {
894 Type *AggTy = OrigIVI.getType();
895 unsigned NumAggElts;
896 switch (AggTy->getTypeID()) {
897 case Type::StructTyID:
898 NumAggElts = AggTy->getStructNumElements();
899 break;
900 case Type::ArrayTyID:
901 NumAggElts = AggTy->getArrayNumElements();
902 break;
903 default:
904 llvm_unreachable("Unhandled aggregate type?");
905 }
906
907 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
908 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
909 // FIXME: any interesting patterns to be caught with larger limit?
910 assert(NumAggElts > 0 && "Aggregate should have elements.");
911 if (NumAggElts > 2)
912 return nullptr;
913
914 static constexpr auto NotFound = std::nullopt;
915 static constexpr auto FoundMismatch = nullptr;
916
917 // Try to find a value of each element of an aggregate.
918 // FIXME: deal with more complex, not one-dimensional, aggregate types
919 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
920
921 // Do we know values for each element of the aggregate?
922 auto KnowAllElts = [&AggElts]() {
923 return !llvm::is_contained(AggElts, NotFound);
924 };
925
926 int Depth = 0;
927
928 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
929 // every element being overwritten twice, which should never happen.
930 static const int DepthLimit = 2 * NumAggElts;
931
932 // Recurse up the chain of `insertvalue` aggregate operands until either we've
933 // reconstructed full initializer or can't visit any more `insertvalue`'s.
934 for (InsertValueInst *CurrIVI = &OrigIVI;
935 Depth < DepthLimit && CurrIVI && !KnowAllElts();
936 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
937 ++Depth) {
938 auto *InsertedValue =
939 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
940 if (!InsertedValue)
941 return nullptr; // Inserted value must be produced by an instruction.
942
943 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
944
945 // Don't bother with more than single-level aggregates.
946 if (Indices.size() != 1)
947 return nullptr; // FIXME: deal with more complex aggregates?
948
949 // Now, we may have already previously recorded the value for this element
950 // of an aggregate. If we did, that means the CurrIVI will later be
951 // overwritten with the already-recorded value. But if not, let's record it!
952 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
953 Elt = Elt.value_or(InsertedValue);
954
955 // FIXME: should we handle chain-terminating undef base operand?
956 }
957
958 // Was that sufficient to deduce the full initializer for the aggregate?
959 if (!KnowAllElts())
960 return nullptr; // Give up then.
961
962 // We now want to find the source[s] of the aggregate elements we've found.
963 // And with "source" we mean the original aggregate[s] from which
964 // the inserted elements were extracted. This may require PHI translation.
965
966 enum class AggregateDescription {
967 /// When analyzing the value that was inserted into an aggregate, we did
968 /// not manage to find defining `extractvalue` instruction to analyze.
969 NotFound,
970 /// When analyzing the value that was inserted into an aggregate, we did
971 /// manage to find defining `extractvalue` instruction[s], and everything
972 /// matched perfectly - aggregate type, element insertion/extraction index.
973 Found,
974 /// When analyzing the value that was inserted into an aggregate, we did
975 /// manage to find defining `extractvalue` instruction, but there was
976 /// a mismatch: either the source type from which the extraction was didn't
977 /// match the aggregate type into which the insertion was,
978 /// or the extraction/insertion channels mismatched,
979 /// or different elements had different source aggregates.
980 FoundMismatch
981 };
982 auto Describe = [](std::optional<Value *> SourceAggregate) {
983 if (SourceAggregate == NotFound)
984 return AggregateDescription::NotFound;
985 if (*SourceAggregate == FoundMismatch)
986 return AggregateDescription::FoundMismatch;
987 return AggregateDescription::Found;
988 };
989
990 // If an aggregate element is defined in UseBB, we can't use it in PredBB.
991 bool EltDefinedInUseBB = false;
992
993 // Given the value \p Elt that was being inserted into element \p EltIdx of an
994 // aggregate AggTy, see if \p Elt was originally defined by an
995 // appropriate extractvalue (same element index, same aggregate type).
996 // If found, return the source aggregate from which the extraction was.
997 // If \p PredBB is provided, does PHI translation of an \p Elt first.
998 auto FindSourceAggregate =
999 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
1000 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1001 // For now(?), only deal with, at most, a single level of PHI indirection.
1002 if (UseBB && PredBB) {
1003 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
1004 if (Elt && Elt->getParent() == *UseBB)
1005 EltDefinedInUseBB = true;
1006 }
1007 // FIXME: deal with multiple levels of PHI indirection?
1008
1009 // Did we find an extraction?
1010 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
1011 if (!EVI)
1012 return NotFound;
1013
1014 Value *SourceAggregate = EVI->getAggregateOperand();
1015
1016 // Is the extraction from the same type into which the insertion was?
1017 if (SourceAggregate->getType() != AggTy)
1018 return FoundMismatch;
1019 // And the element index doesn't change between extraction and insertion?
1020 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
1021 return FoundMismatch;
1022
1023 return SourceAggregate; // AggregateDescription::Found
1024 };
1025
1026 // Given elements AggElts that were constructing an aggregate OrigIVI,
1027 // see if we can find appropriate source aggregate for each of the elements,
1028 // and see it's the same aggregate for each element. If so, return it.
1029 auto FindCommonSourceAggregate =
1030 [&](std::optional<BasicBlock *> UseBB,
1031 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1032 std::optional<Value *> SourceAggregate;
1033
1034 for (auto I : enumerate(AggElts)) {
1035 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1036 "We don't store nullptr in SourceAggregate!");
1037 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1038 (I.index() != 0) &&
1039 "SourceAggregate should be valid after the first element,");
1040
1041 // For this element, is there a plausible source aggregate?
1042 // FIXME: we could special-case undef element, IFF we know that in the
1043 // source aggregate said element isn't poison.
1044 std::optional<Value *> SourceAggregateForElement =
1045 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1046
1047 // Okay, what have we found? Does that correlate with previous findings?
1048
1049 // Regardless of whether or not we have previously found source
1050 // aggregate for previous elements (if any), if we didn't find one for
1051 // this element, passthrough whatever we have just found.
1052 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1053 return SourceAggregateForElement;
1054
1055 // Okay, we have found source aggregate for this element.
1056 // Let's see what we already know from previous elements, if any.
1057 switch (Describe(SourceAggregate)) {
1058 case AggregateDescription::NotFound:
1059 // This is apparently the first element that we have examined.
1060 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1061 continue; // Great, now look at next element.
1062 case AggregateDescription::Found:
1063 // We have previously already successfully examined other elements.
1064 // Is this the same source aggregate we've found for other elements?
1065 if (*SourceAggregateForElement != *SourceAggregate)
1066 return FoundMismatch;
1067 continue; // Still the same aggregate, look at next element.
1068 case AggregateDescription::FoundMismatch:
1069 llvm_unreachable("Can't happen. We would have early-exited then.");
1070 };
1071 }
1072
1073 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1074 "Must be a valid Value");
1075 return *SourceAggregate;
1076 };
1077
1078 std::optional<Value *> SourceAggregate;
1079
1080 // Can we find the source aggregate without looking at predecessors?
1081 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1082 /*PredBB=*/std::nullopt);
1083 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1084 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1085 return nullptr; // Conflicting source aggregates!
1086 ++NumAggregateReconstructionsSimplified;
1087 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1088 }
1089
1090 // Okay, apparently we need to look at predecessors.
1091
1092 // We should be smart about picking the "use" basic block, which will be the
1093 // merge point for aggregate, where we'll insert the final PHI that will be
1094 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1095 // We should look in which blocks each of the AggElts is being defined,
1096 // they all should be defined in the same basic block.
1097 BasicBlock *UseBB = nullptr;
1098
1099 for (const std::optional<Instruction *> &I : AggElts) {
1100 BasicBlock *BB = (*I)->getParent();
1101 // If it's the first instruction we've encountered, record the basic block.
1102 if (!UseBB) {
1103 UseBB = BB;
1104 continue;
1105 }
1106 // Otherwise, this must be the same basic block we've seen previously.
1107 if (UseBB != BB)
1108 return nullptr;
1109 }
1110
1111 // If *all* of the elements are basic-block-independent, meaning they are
1112 // either function arguments, or constant expressions, then if we didn't
1113 // handle them without predecessor-aware handling, we won't handle them now.
1114 if (!UseBB)
1115 return nullptr;
1116
1117 // If we didn't manage to find source aggregate without looking at
1118 // predecessors, and there are no predecessors to look at, then we're done.
1119 if (pred_empty(UseBB))
1120 return nullptr;
1121
1122 // Arbitrary predecessor count limit.
1123 static const int PredCountLimit = 64;
1124
1125 // Cache the (non-uniqified!) list of predecessors in a vector,
1126 // checking the limit at the same time for efficiency.
1127 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1128 for (BasicBlock *Pred : predecessors(UseBB)) {
1129 // Don't bother if there are too many predecessors.
1130 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1131 return nullptr;
1132 Preds.emplace_back(Pred);
1133 }
1134
1135 // For each predecessor, what is the source aggregate,
1136 // from which all the elements were originally extracted from?
1137 // Note that we want for the map to have stable iteration order!
1139 bool FoundSrcAgg = false;
1140 for (BasicBlock *Pred : Preds) {
1141 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1142 SourceAggregates.try_emplace(Pred);
1143 // Did we already evaluate this predecessor?
1144 if (!IV.second)
1145 continue;
1146
1147 // Let's hope that when coming from predecessor Pred, all elements of the
1148 // aggregate produced by OrigIVI must have been originally extracted from
1149 // the same aggregate. Is that so? Can we find said original aggregate?
1150 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1151 if (Describe(SourceAggregate) == AggregateDescription::Found) {
1152 FoundSrcAgg = true;
1153 IV.first->second = *SourceAggregate;
1154 } else {
1155 // If UseBB is the single successor of Pred, we can add InsertValue to
1156 // Pred.
1157 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1158 if (!BI || !BI->isUnconditional())
1159 return nullptr;
1160 }
1161 }
1162
1163 if (!FoundSrcAgg)
1164 return nullptr;
1165
1166 // Do some sanity check if we need to add insertvalue into predecessors.
1167 auto OrigBB = OrigIVI.getParent();
1168 for (auto &It : SourceAggregates) {
1169 if (Describe(It.second) == AggregateDescription::Found)
1170 continue;
1171
1172 // Element is defined in UseBB, so it can't be used in predecessors.
1173 if (EltDefinedInUseBB)
1174 return nullptr;
1175
1176 // Do this transformation cross loop boundary may cause dead loop. So we
1177 // should avoid this situation. But LoopInfo is not generally available, we
1178 // must be conservative here.
1179 // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1180 // can't be in inner loop.
1181 if (UseBB != OrigBB)
1182 return nullptr;
1183
1184 // Avoid constructing constant aggregate because constant value may expose
1185 // more optimizations.
1186 bool ConstAgg = true;
1187 for (auto Val : AggElts) {
1188 Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);
1189 if (!isa<Constant>(Elt)) {
1190 ConstAgg = false;
1191 break;
1192 }
1193 }
1194 if (ConstAgg)
1195 return nullptr;
1196 }
1197
1198 // For predecessors without appropriate source aggregate, create one in the
1199 // predecessor.
1200 for (auto &It : SourceAggregates) {
1201 if (Describe(It.second) == AggregateDescription::Found)
1202 continue;
1203
1204 BasicBlock *Pred = It.first;
1205 Builder.SetInsertPoint(Pred->getTerminator());
1206 Value *V = PoisonValue::get(AggTy);
1207 for (auto [Idx, Val] : enumerate(AggElts)) {
1208 Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);
1209 V = Builder.CreateInsertValue(V, Elt, Idx);
1210 }
1211
1212 It.second = V;
1213 }
1214
1215 // All good! Now we just need to thread the source aggregates here.
1216 // Note that we have to insert the new PHI here, ourselves, because we can't
1217 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1218 // Note that the same block can be a predecessor more than once,
1219 // and we need to preserve that invariant for the PHI node.
1221 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1222 auto *PHI =
1223 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1224 for (BasicBlock *Pred : Preds)
1225 PHI->addIncoming(SourceAggregates[Pred], Pred);
1226
1227 ++NumAggregateReconstructionsSimplified;
1228 return replaceInstUsesWith(OrigIVI, PHI);
1229}
1230
1231/// Try to find redundant insertvalue instructions, like the following ones:
1232/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1233/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1234/// Here the second instruction inserts values at the same indices, as the
1235/// first one, making the first one redundant.
1236/// It should be transformed to:
1237/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1240 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1241 SQ.getWithInstruction(&I)))
1242 return replaceInstUsesWith(I, V);
1243
1244 bool IsRedundant = false;
1245 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1246
1247 // If there is a chain of insertvalue instructions (each of them except the
1248 // last one has only one use and it's another insertvalue insn from this
1249 // chain), check if any of the 'children' uses the same indices as the first
1250 // instruction. In this case, the first one is redundant.
1251 Value *V = &I;
1252 unsigned Depth = 0;
1253 while (V->hasOneUse() && Depth < 10) {
1254 User *U = V->user_back();
1255 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1256 if (!UserInsInst || U->getOperand(0) != V)
1257 break;
1258 if (UserInsInst->getIndices() == FirstIndices) {
1259 IsRedundant = true;
1260 break;
1261 }
1262 V = UserInsInst;
1263 Depth++;
1264 }
1265
1266 if (IsRedundant)
1267 return replaceInstUsesWith(I, I.getOperand(0));
1268
1270 return NewI;
1271
1272 return nullptr;
1273}
1274
1276 // Can not analyze scalable type, the number of elements is not a compile-time
1277 // constant.
1279 return false;
1280
1281 int MaskSize = Shuf.getShuffleMask().size();
1282 int VecSize =
1283 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1284
1285 // A vector select does not change the size of the operands.
1286 if (MaskSize != VecSize)
1287 return false;
1288
1289 // Each mask element must be undefined or choose a vector element from one of
1290 // the source operands without crossing vector lanes.
1291 for (int i = 0; i != MaskSize; ++i) {
1292 int Elt = Shuf.getMaskValue(i);
1293 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1294 return false;
1295 }
1296
1297 return true;
1298}
1299
1300/// Turn a chain of inserts that splats a value into an insert + shuffle:
1301/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1302/// shufflevector(insertelt(X, %k, 0), poison, zero)
1304 // We are interested in the last insert in a chain. So if this insert has a
1305 // single user and that user is an insert, bail.
1306 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1307 return nullptr;
1308
1309 VectorType *VecTy = InsElt.getType();
1310 // Can not handle scalable type, the number of elements is not a compile-time
1311 // constant.
1312 if (isa<ScalableVectorType>(VecTy))
1313 return nullptr;
1314 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1315
1316 // Do not try to do this for a one-element vector, since that's a nop,
1317 // and will cause an inf-loop.
1318 if (NumElements == 1)
1319 return nullptr;
1320
1321 Value *SplatVal = InsElt.getOperand(1);
1322 InsertElementInst *CurrIE = &InsElt;
1323 SmallBitVector ElementPresent(NumElements, false);
1324 InsertElementInst *FirstIE = nullptr;
1325
1326 // Walk the chain backwards, keeping track of which indices we inserted into,
1327 // until we hit something that isn't an insert of the splatted value.
1328 while (CurrIE) {
1329 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1330 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1331 return nullptr;
1332
1333 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1334 // Check none of the intermediate steps have any additional uses, except
1335 // for the root insertelement instruction, which can be re-used, if it
1336 // inserts at position 0.
1337 if (CurrIE != &InsElt &&
1338 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1339 return nullptr;
1340
1341 ElementPresent[Idx->getZExtValue()] = true;
1342 FirstIE = CurrIE;
1343 CurrIE = NextIE;
1344 }
1345
1346 // If this is just a single insertelement (not a sequence), we are done.
1347 if (FirstIE == &InsElt)
1348 return nullptr;
1349
1350 // If we are not inserting into a poison vector, make sure we've seen an
1351 // insert into every element.
1352 // TODO: If the base vector is not undef, it might be better to create a splat
1353 // and then a select-shuffle (blend) with the base vector.
1354 if (!match(FirstIE->getOperand(0), m_Poison()))
1355 if (!ElementPresent.all())
1356 return nullptr;
1357
1358 // Create the insert + shuffle.
1359 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1360 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1361 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1362 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1363 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1364 InsElt.getIterator());
1365
1366 // Splat from element 0, but replace absent elements with poison in the mask.
1367 SmallVector<int, 16> Mask(NumElements, 0);
1368 for (unsigned i = 0; i != NumElements; ++i)
1369 if (!ElementPresent[i])
1370 Mask[i] = -1;
1371
1372 return new ShuffleVectorInst(FirstIE, Mask);
1373}
1374
1375/// Try to fold an insert element into an existing splat shuffle by changing
1376/// the shuffle's mask to include the index of this insert element.
1378 // Check if the vector operand of this insert is a canonical splat shuffle.
1379 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1380 if (!Shuf || !Shuf->isZeroEltSplat())
1381 return nullptr;
1382
1383 // Bail out early if shuffle is scalable type. The number of elements in
1384 // shuffle mask is unknown at compile-time.
1385 if (isa<ScalableVectorType>(Shuf->getType()))
1386 return nullptr;
1387
1388 // Check for a constant insertion index.
1389 uint64_t IdxC;
1390 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1391 return nullptr;
1392
1393 // Check if the splat shuffle's input is the same as this insert's scalar op.
1394 Value *X = InsElt.getOperand(1);
1395 Value *Op0 = Shuf->getOperand(0);
1396 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1397 return nullptr;
1398
1399 // Replace the shuffle mask element at the index of this insert with a zero.
1400 // For example:
1401 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1402 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1403 unsigned NumMaskElts =
1404 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1405 SmallVector<int, 16> NewMask(NumMaskElts);
1406 for (unsigned i = 0; i != NumMaskElts; ++i)
1407 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1408
1409 return new ShuffleVectorInst(Op0, NewMask);
1410}
1411
1412/// Try to fold an extract+insert element into an existing identity shuffle by
1413/// changing the shuffle's mask to include the index of this insert element.
1415 // Check if the vector operand of this insert is an identity shuffle.
1416 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1417 if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
1418 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1419 return nullptr;
1420
1421 // Bail out early if shuffle is scalable type. The number of elements in
1422 // shuffle mask is unknown at compile-time.
1423 if (isa<ScalableVectorType>(Shuf->getType()))
1424 return nullptr;
1425
1426 // Check for a constant insertion index.
1427 uint64_t IdxC;
1428 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1429 return nullptr;
1430
1431 // Check if this insert's scalar op is extracted from the identity shuffle's
1432 // input vector.
1433 Value *Scalar = InsElt.getOperand(1);
1434 Value *X = Shuf->getOperand(0);
1435 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1436 return nullptr;
1437
1438 // Replace the shuffle mask element at the index of this extract+insert with
1439 // that same index value.
1440 // For example:
1441 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1442 unsigned NumMaskElts =
1443 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1444 SmallVector<int, 16> NewMask(NumMaskElts);
1445 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1446 for (unsigned i = 0; i != NumMaskElts; ++i) {
1447 if (i != IdxC) {
1448 // All mask elements besides the inserted element remain the same.
1449 NewMask[i] = OldMask[i];
1450 } else if (OldMask[i] == (int)IdxC) {
1451 // If the mask element was already set, there's nothing to do
1452 // (demanded elements analysis may unset it later).
1453 return nullptr;
1454 } else {
1455 assert(OldMask[i] == PoisonMaskElem &&
1456 "Unexpected shuffle mask element for identity shuffle");
1457 NewMask[i] = IdxC;
1458 }
1459 }
1460
1461 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1462}
1463
1464/// If we have an insertelement instruction feeding into another insertelement
1465/// and the 2nd is inserting a constant into the vector, canonicalize that
1466/// constant insertion before the insertion of a variable:
1467///
1468/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1469/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1470///
1471/// This has the potential of eliminating the 2nd insertelement instruction
1472/// via constant folding of the scalar constant into a vector constant.
1474 InstCombiner::BuilderTy &Builder) {
1475 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1476 if (!InsElt1 || !InsElt1->hasOneUse())
1477 return nullptr;
1478
1479 Value *X, *Y;
1480 Constant *ScalarC;
1481 ConstantInt *IdxC1, *IdxC2;
1482 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1483 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1484 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1485 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1486 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1487 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1488 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1489 }
1490
1491 return nullptr;
1492}
1493
1494/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1495/// --> shufflevector X, CVec', Mask'
1497 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1498 // Bail out if the parent has more than one use. In that case, we'd be
1499 // replacing the insertelt with a shuffle, and that's not a clear win.
1500 if (!Inst || !Inst->hasOneUse())
1501 return nullptr;
1502 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1503 // The shuffle must have a constant vector operand. The insertelt must have
1504 // a constant scalar being inserted at a constant position in the vector.
1505 Constant *ShufConstVec, *InsEltScalar;
1506 uint64_t InsEltIndex;
1507 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1508 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1509 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1510 return nullptr;
1511
1512 // Adding an element to an arbitrary shuffle could be expensive, but a
1513 // shuffle that selects elements from vectors without crossing lanes is
1514 // assumed cheap.
1515 // If we're just adding a constant into that shuffle, it will still be
1516 // cheap.
1517 if (!isShuffleEquivalentToSelect(*Shuf))
1518 return nullptr;
1519
1520 // From the above 'select' check, we know that the mask has the same number
1521 // of elements as the vector input operands. We also know that each constant
1522 // input element is used in its lane and can not be used more than once by
1523 // the shuffle. Therefore, replace the constant in the shuffle's constant
1524 // vector with the insertelt constant. Replace the constant in the shuffle's
1525 // mask vector with the insertelt index plus the length of the vector
1526 // (because the constant vector operand of a shuffle is always the 2nd
1527 // operand).
1528 ArrayRef<int> Mask = Shuf->getShuffleMask();
1529 unsigned NumElts = Mask.size();
1530 SmallVector<Constant *, 16> NewShufElts(NumElts);
1531 SmallVector<int, 16> NewMaskElts(NumElts);
1532 for (unsigned I = 0; I != NumElts; ++I) {
1533 if (I == InsEltIndex) {
1534 NewShufElts[I] = InsEltScalar;
1535 NewMaskElts[I] = InsEltIndex + NumElts;
1536 } else {
1537 // Copy over the existing values.
1538 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1539 NewMaskElts[I] = Mask[I];
1540 }
1541
1542 // Bail if we failed to find an element.
1543 if (!NewShufElts[I])
1544 return nullptr;
1545 }
1546
1547 // Create new operands for a shuffle that includes the constant of the
1548 // original insertelt. The old shuffle will be dead now.
1549 return new ShuffleVectorInst(Shuf->getOperand(0),
1550 ConstantVector::get(NewShufElts), NewMaskElts);
1551 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1552 // Transform sequences of insertelements ops with constant data/indexes into
1553 // a single shuffle op.
1554 // Can not handle scalable type, the number of elements needed to create
1555 // shuffle mask is not a compile-time constant.
1556 if (isa<ScalableVectorType>(InsElt.getType()))
1557 return nullptr;
1558 unsigned NumElts =
1559 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1560
1561 uint64_t InsertIdx[2];
1562 Constant *Val[2];
1563 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1564 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1565 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1566 !match(IEI->getOperand(1), m_Constant(Val[1])))
1567 return nullptr;
1568 SmallVector<Constant *, 16> Values(NumElts);
1569 SmallVector<int, 16> Mask(NumElts);
1570 auto ValI = std::begin(Val);
1571 // Generate new constant vector and mask.
1572 // We have 2 values/masks from the insertelements instructions. Insert them
1573 // into new value/mask vectors.
1574 for (uint64_t I : InsertIdx) {
1575 if (!Values[I]) {
1576 Values[I] = *ValI;
1577 Mask[I] = NumElts + I;
1578 }
1579 ++ValI;
1580 }
1581 // Remaining values are filled with 'poison' values.
1582 for (unsigned I = 0; I < NumElts; ++I) {
1583 if (!Values[I]) {
1584 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1585 Mask[I] = I;
1586 }
1587 }
1588 // Create new operands for a shuffle that includes the constant of the
1589 // original insertelt.
1590 return new ShuffleVectorInst(IEI->getOperand(0),
1591 ConstantVector::get(Values), Mask);
1592 }
1593 return nullptr;
1594}
1595
1596/// If both the base vector and the inserted element are extended from the same
1597/// type, do the insert element in the narrow source type followed by extend.
1598/// TODO: This can be extended to include other cast opcodes, but particularly
1599/// if we create a wider insertelement, make sure codegen is not harmed.
1601 InstCombiner::BuilderTy &Builder) {
1602 // We are creating a vector extend. If the original vector extend has another
1603 // use, that would mean we end up with 2 vector extends, so avoid that.
1604 // TODO: We could ease the use-clause to "if at least one op has one use"
1605 // (assuming that the source types match - see next TODO comment).
1606 Value *Vec = InsElt.getOperand(0);
1607 if (!Vec->hasOneUse())
1608 return nullptr;
1609
1610 Value *Scalar = InsElt.getOperand(1);
1611 Value *X, *Y;
1612 CastInst::CastOps CastOpcode;
1613 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1614 CastOpcode = Instruction::FPExt;
1615 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1616 CastOpcode = Instruction::SExt;
1617 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1618 CastOpcode = Instruction::ZExt;
1619 else
1620 return nullptr;
1621
1622 // TODO: We can allow mismatched types by creating an intermediate cast.
1623 if (X->getType()->getScalarType() != Y->getType())
1624 return nullptr;
1625
1626 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1627 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1628 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1629}
1630
1631/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1632/// try to convert to a single insert with appropriate bitcasts.
1634 bool IsBigEndian,
1635 InstCombiner::BuilderTy &Builder) {
1636 Value *VecOp = InsElt.getOperand(0);
1637 Value *ScalarOp = InsElt.getOperand(1);
1638 Value *IndexOp = InsElt.getOperand(2);
1639
1640 // Pattern depends on endian because we expect lower index is inserted first.
1641 // Big endian:
1642 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1643 // Little endian:
1644 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1645 // Note: It is not safe to do this transform with an arbitrary base vector
1646 // because the bitcast of that vector to fewer/larger elements could
1647 // allow poison to spill into an element that was not poison before.
1648 // TODO: Detect smaller fractions of the scalar.
1649 // TODO: One-use checks are conservative.
1650 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1651 Value *Scalar0, *BaseVec;
1652 uint64_t Index0, Index1;
1653 if (!VTy || (VTy->getNumElements() & 1) ||
1654 !match(IndexOp, m_ConstantInt(Index1)) ||
1655 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1656 m_ConstantInt(Index0))) ||
1657 !match(BaseVec, m_Undef()))
1658 return nullptr;
1659
1660 // The first insert must be to the index one less than this one, and
1661 // the first insert must be to an even index.
1662 if (Index0 + 1 != Index1 || Index0 & 1)
1663 return nullptr;
1664
1665 // For big endian, the high half of the value should be inserted first.
1666 // For little endian, the low half of the value should be inserted first.
1667 Value *X;
1668 uint64_t ShAmt;
1669 if (IsBigEndian) {
1670 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1671 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1672 return nullptr;
1673 } else {
1674 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1675 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1676 return nullptr;
1677 }
1678
1679 Type *SrcTy = X->getType();
1680 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1681 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1682 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1683 return nullptr;
1684
1685 // Bitcast the base vector to a vector type with the source element type.
1686 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1687 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1688
1689 // Scale the insert index for a vector with half as many elements.
1690 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1691 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1692 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1693 return new BitCastInst(NewInsert, VTy);
1694}
1695
1697 Value *VecOp = IE.getOperand(0);
1698 Value *ScalarOp = IE.getOperand(1);
1699 Value *IdxOp = IE.getOperand(2);
1700
1701 if (auto *V = simplifyInsertElementInst(
1702 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1703 return replaceInstUsesWith(IE, V);
1704
1705 // Canonicalize type of constant indices to i64 to simplify CSE
1706 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1707 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1708 return replaceOperand(IE, 2, NewIdx);
1709
1710 Value *BaseVec, *OtherScalar;
1711 uint64_t OtherIndexVal;
1712 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1713 m_Value(OtherScalar),
1714 m_ConstantInt(OtherIndexVal)))) &&
1715 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1716 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1717 return InsertElementInst::Create(NewIns, OtherScalar,
1718 Builder.getInt64(OtherIndexVal));
1719 }
1720 }
1721
1722 // If the scalar is bitcast and inserted into undef, do the insert in the
1723 // source type followed by bitcast.
1724 // TODO: Generalize for insert into any constant, not just undef?
1725 Value *ScalarSrc;
1726 if (match(VecOp, m_Undef()) &&
1727 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1728 (ScalarSrc->getType()->isIntegerTy() ||
1729 ScalarSrc->getType()->isFloatingPointTy())) {
1730 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1731 // bitcast (inselt undef, ScalarSrc, IdxOp)
1732 Type *ScalarTy = ScalarSrc->getType();
1733 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1734 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1735 : UndefValue::get(VecTy);
1736 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1737 return new BitCastInst(NewInsElt, IE.getType());
1738 }
1739
1740 // If the vector and scalar are both bitcast from the same element type, do
1741 // the insert in that source type followed by bitcast.
1742 Value *VecSrc;
1743 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1744 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1745 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1746 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1747 cast<VectorType>(VecSrc->getType())->getElementType() ==
1748 ScalarSrc->getType()) {
1749 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1750 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1751 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1752 return new BitCastInst(NewInsElt, IE.getType());
1753 }
1754
1755 // If the inserted element was extracted from some other fixed-length vector
1756 // and both indexes are valid constants, try to turn this into a shuffle.
1757 // Can not handle scalable vector type, the number of elements needed to
1758 // create shuffle mask is not a compile-time constant.
1759 uint64_t InsertedIdx, ExtractedIdx;
1760 Value *ExtVecOp;
1761 if (isa<FixedVectorType>(IE.getType()) &&
1762 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1763 match(ScalarOp,
1764 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1765 isa<FixedVectorType>(ExtVecOp->getType()) &&
1766 ExtractedIdx <
1767 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1768 // TODO: Looking at the user(s) to determine if this insert is a
1769 // fold-to-shuffle opportunity does not match the usual instcombine
1770 // constraints. We should decide if the transform is worthy based only
1771 // on this instruction and its operands, but that may not work currently.
1772 //
1773 // Here, we are trying to avoid creating shuffles before reaching
1774 // the end of a chain of extract-insert pairs. This is complicated because
1775 // we do not generally form arbitrary shuffle masks in instcombine
1776 // (because those may codegen poorly), but collectShuffleElements() does
1777 // exactly that.
1778 //
1779 // The rules for determining what is an acceptable target-independent
1780 // shuffle mask are fuzzy because they evolve based on the backend's
1781 // capabilities and real-world impact.
1782 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1783 if (!Insert.hasOneUse())
1784 return true;
1785 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1786 if (!InsertUser)
1787 return true;
1788 return false;
1789 };
1790
1791 // Try to form a shuffle from a chain of extract-insert ops.
1792 if (isShuffleRootCandidate(IE)) {
1793 bool Rerun = true;
1794 while (Rerun) {
1795 Rerun = false;
1796
1798 ShuffleOps LR =
1799 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1800
1801 // The proposed shuffle may be trivial, in which case we shouldn't
1802 // perform the combine.
1803 if (LR.first != &IE && LR.second != &IE) {
1804 // We now have a shuffle of LHS, RHS, Mask.
1805 if (LR.second == nullptr)
1806 LR.second = PoisonValue::get(LR.first->getType());
1807 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1808 }
1809 }
1810 }
1811 }
1812
1813 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1814 unsigned VWidth = VecTy->getNumElements();
1815 APInt PoisonElts(VWidth, 0);
1816 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1817 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1818 PoisonElts)) {
1819 if (V != &IE)
1820 return replaceInstUsesWith(IE, V);
1821 return &IE;
1822 }
1823 }
1824
1826 return Shuf;
1827
1828 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1829 return NewInsElt;
1830
1831 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1832 return Broadcast;
1833
1835 return Splat;
1836
1837 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1838 return IdentityShuf;
1839
1840 if (Instruction *Ext = narrowInsElt(IE, Builder))
1841 return Ext;
1842
1843 if (Instruction *Ext = foldTruncInsEltPair(IE, DL.isBigEndian(), Builder))
1844 return Ext;
1845
1846 return nullptr;
1847}
1848
1849/// Return true if we can evaluate the specified expression tree if the vector
1850/// elements were shuffled in a different order.
1852 unsigned Depth = 5) {
1853 // We can always reorder the elements of a constant.
1854 if (isa<Constant>(V))
1855 return true;
1856
1857 // We won't reorder vector arguments. No IPO here.
1859 if (!I) return false;
1860
1861 // Two users may expect different orders of the elements. Don't try it.
1862 if (!I->hasOneUse())
1863 return false;
1864
1865 if (Depth == 0) return false;
1866
1867 switch (I->getOpcode()) {
1868 case Instruction::UDiv:
1869 case Instruction::SDiv:
1870 case Instruction::URem:
1871 case Instruction::SRem:
1872 // Propagating an undefined shuffle mask element to integer div/rem is not
1873 // allowed because those opcodes can create immediate undefined behavior
1874 // from an undefined element in an operand.
1875 if (llvm::is_contained(Mask, -1))
1876 return false;
1877 [[fallthrough]];
1878 case Instruction::Add:
1879 case Instruction::FAdd:
1880 case Instruction::Sub:
1881 case Instruction::FSub:
1882 case Instruction::Mul:
1883 case Instruction::FMul:
1884 case Instruction::FDiv:
1885 case Instruction::FRem:
1886 case Instruction::Shl:
1887 case Instruction::LShr:
1888 case Instruction::AShr:
1889 case Instruction::And:
1890 case Instruction::Or:
1891 case Instruction::Xor:
1892 case Instruction::ICmp:
1893 case Instruction::FCmp:
1894 case Instruction::Trunc:
1895 case Instruction::ZExt:
1896 case Instruction::SExt:
1897 case Instruction::FPToUI:
1898 case Instruction::FPToSI:
1899 case Instruction::UIToFP:
1900 case Instruction::SIToFP:
1901 case Instruction::FPTrunc:
1902 case Instruction::FPExt:
1903 case Instruction::GetElementPtr: {
1904 // Bail out if we would create longer vector ops. We could allow creating
1905 // longer vector ops, but that may result in more expensive codegen.
1906 Type *ITy = I->getType();
1907 if (ITy->isVectorTy() &&
1908 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1909 return false;
1910 for (Value *Operand : I->operands()) {
1911 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1912 return false;
1913 }
1914 return true;
1915 }
1916 case Instruction::InsertElement: {
1917 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1918 if (!CI) return false;
1919 int ElementNumber = CI->getLimitedValue();
1920
1921 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1922 // can't put an element into multiple indices.
1923 bool SeenOnce = false;
1924 for (int I : Mask) {
1925 if (I == ElementNumber) {
1926 if (SeenOnce)
1927 return false;
1928 SeenOnce = true;
1929 }
1930 }
1931 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1932 }
1933 }
1934 return false;
1935}
1936
1937/// Rebuild a new instruction just like 'I' but with the new operands given.
1938/// In the event of type mismatch, the type of the operands is correct.
1940 IRBuilderBase &Builder) {
1941 Builder.SetInsertPoint(I);
1942 switch (I->getOpcode()) {
1943 case Instruction::Add:
1944 case Instruction::FAdd:
1945 case Instruction::Sub:
1946 case Instruction::FSub:
1947 case Instruction::Mul:
1948 case Instruction::FMul:
1949 case Instruction::UDiv:
1950 case Instruction::SDiv:
1951 case Instruction::FDiv:
1952 case Instruction::URem:
1953 case Instruction::SRem:
1954 case Instruction::FRem:
1955 case Instruction::Shl:
1956 case Instruction::LShr:
1957 case Instruction::AShr:
1958 case Instruction::And:
1959 case Instruction::Or:
1960 case Instruction::Xor: {
1962 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1963 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1964 NewOps[0], NewOps[1]);
1965 if (auto *NewI = dyn_cast<Instruction>(New)) {
1967 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1968 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1969 }
1971 NewI->setIsExact(BO->isExact());
1972 }
1973 if (isa<FPMathOperator>(BO))
1974 NewI->copyFastMathFlags(I);
1975 }
1976 return New;
1977 }
1978 case Instruction::ICmp:
1979 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1980 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1981 NewOps[1]);
1982 case Instruction::FCmp:
1983 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1984 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1985 NewOps[1]);
1986 case Instruction::Trunc:
1987 case Instruction::ZExt:
1988 case Instruction::SExt:
1989 case Instruction::FPToUI:
1990 case Instruction::FPToSI:
1991 case Instruction::UIToFP:
1992 case Instruction::SIToFP:
1993 case Instruction::FPTrunc:
1994 case Instruction::FPExt: {
1995 // It's possible that the mask has a different number of elements from
1996 // the original cast. We recompute the destination type to match the mask.
1997 Type *DestTy = VectorType::get(
1998 I->getType()->getScalarType(),
1999 cast<VectorType>(NewOps[0]->getType())->getElementCount());
2000 assert(NewOps.size() == 1 && "cast with #ops != 1");
2001 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
2002 DestTy);
2003 }
2004 case Instruction::GetElementPtr: {
2005 Value *Ptr = NewOps[0];
2006 ArrayRef<Value*> Idx = NewOps.slice(1);
2007 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
2008 Ptr, Idx, "",
2009 cast<GEPOperator>(I)->getNoWrapFlags());
2010 }
2011 }
2012 llvm_unreachable("failed to rebuild vector instructions");
2013}
2014
2016 IRBuilderBase &Builder) {
2017 // Mask.size() does not need to be equal to the number of vector elements.
2018
2019 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
2020 Type *EltTy = V->getType()->getScalarType();
2021
2022 if (isa<PoisonValue>(V))
2023 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
2024
2025 if (match(V, m_Undef()))
2026 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
2027
2029 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
2030
2031 if (Constant *C = dyn_cast<Constant>(V))
2033 Mask);
2034
2036 switch (I->getOpcode()) {
2037 case Instruction::Add:
2038 case Instruction::FAdd:
2039 case Instruction::Sub:
2040 case Instruction::FSub:
2041 case Instruction::Mul:
2042 case Instruction::FMul:
2043 case Instruction::UDiv:
2044 case Instruction::SDiv:
2045 case Instruction::FDiv:
2046 case Instruction::URem:
2047 case Instruction::SRem:
2048 case Instruction::FRem:
2049 case Instruction::Shl:
2050 case Instruction::LShr:
2051 case Instruction::AShr:
2052 case Instruction::And:
2053 case Instruction::Or:
2054 case Instruction::Xor:
2055 case Instruction::ICmp:
2056 case Instruction::FCmp:
2057 case Instruction::Trunc:
2058 case Instruction::ZExt:
2059 case Instruction::SExt:
2060 case Instruction::FPToUI:
2061 case Instruction::FPToSI:
2062 case Instruction::UIToFP:
2063 case Instruction::SIToFP:
2064 case Instruction::FPTrunc:
2065 case Instruction::FPExt:
2066 case Instruction::Select:
2067 case Instruction::GetElementPtr: {
2069 bool NeedsRebuild =
2070 (Mask.size() !=
2071 cast<FixedVectorType>(I->getType())->getNumElements());
2072 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
2073 Value *V;
2074 // Recursively call evaluateInDifferentElementOrder on vector arguments
2075 // as well. E.g. GetElementPtr may have scalar operands even if the
2076 // return value is a vector, so we need to examine the operand type.
2077 if (I->getOperand(i)->getType()->isVectorTy())
2078 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
2079 else
2080 V = I->getOperand(i);
2081 NewOps.push_back(V);
2082 NeedsRebuild |= (V != I->getOperand(i));
2083 }
2084 if (NeedsRebuild)
2085 return buildNew(I, NewOps, Builder);
2086 return I;
2087 }
2088 case Instruction::InsertElement: {
2089 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
2090
2091 // The insertelement was inserting at Element. Figure out which element
2092 // that becomes after shuffling. The answer is guaranteed to be unique
2093 // by CanEvaluateShuffled.
2094 bool Found = false;
2095 int Index = 0;
2096 for (int e = Mask.size(); Index != e; ++Index) {
2097 if (Mask[Index] == Element) {
2098 Found = true;
2099 break;
2100 }
2101 }
2102
2103 // If element is not in Mask, no need to handle the operand 1 (element to
2104 // be inserted). Just evaluate values in operand 0 according to Mask.
2105 if (!Found)
2106 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2107
2108 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2109 Builder);
2110 Builder.SetInsertPoint(I);
2111 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2112 }
2113 }
2114 llvm_unreachable("failed to reorder elements of vector instruction!");
2115}
2116
2117// Returns true if the shuffle is extracting a contiguous range of values from
2118// LHS, for example:
2119// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2120// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2121// Shuffles to: |EE|FF|GG|HH|
2122// +--+--+--+--+
2124 ArrayRef<int> Mask) {
2125 unsigned LHSElems =
2126 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2127 unsigned MaskElems = Mask.size();
2128 unsigned BegIdx = Mask.front();
2129 unsigned EndIdx = Mask.back();
2130 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2131 return false;
2132 for (unsigned I = 0; I != MaskElems; ++I)
2133 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2134 return false;
2135 return true;
2136}
2137
2138/// These are the ingredients in an alternate form binary operator as described
2139/// below.
2145 Value *V0 = nullptr, Value *V1 = nullptr) :
2146 Opcode(Opc), Op0(V0), Op1(V1) {}
2147 operator bool() const { return Opcode != 0; }
2148};
2149
2150/// Binops may be transformed into binops with different opcodes and operands.
2151/// Reverse the usual canonicalization to enable folds with the non-canonical
2152/// form of the binop. If a transform is possible, return the elements of the
2153/// new binop. If not, return invalid elements.
2155 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2156 Type *Ty = BO->getType();
2157 switch (BO->getOpcode()) {
2158 case Instruction::Shl: {
2159 // shl X, C --> mul X, (1 << C)
2160 Constant *C;
2161 if (match(BO1, m_ImmConstant(C))) {
2163 Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2164 assert(ShlOne && "Constant folding of immediate constants failed");
2165 return {Instruction::Mul, BO0, ShlOne};
2166 }
2167 break;
2168 }
2169 case Instruction::Or: {
2170 // or disjoin X, C --> add X, C
2171 if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2172 return {Instruction::Add, BO0, BO1};
2173 break;
2174 }
2175 case Instruction::Sub:
2176 // sub 0, X --> mul X, -1
2177 if (match(BO0, m_ZeroInt()))
2178 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2179 break;
2180 default:
2181 break;
2182 }
2183 return {};
2184}
2185
2186/// A select shuffle of a select shuffle with a shared operand can be reduced
2187/// to a single select shuffle. This is an obvious improvement in IR, and the
2188/// backend is expected to lower select shuffles efficiently.
2190 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2191
2192 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2194 Shuf.getShuffleMask(Mask);
2195 unsigned NumElts = Mask.size();
2196
2197 // Canonicalize a select shuffle with common operand as Op1.
2198 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2199 if (ShufOp && ShufOp->isSelect() &&
2200 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2201 std::swap(Op0, Op1);
2203 }
2204
2205 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2206 if (!ShufOp || !ShufOp->isSelect() ||
2207 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2208 return nullptr;
2209
2210 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2212 ShufOp->getShuffleMask(Mask1);
2213 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2214
2215 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2216 if (Y == Op0) {
2217 std::swap(X, Y);
2219 }
2220
2221 // If the mask chooses from X (operand 0), it stays the same.
2222 // If the mask chooses from the earlier shuffle, the other mask value is
2223 // transferred to the combined select shuffle:
2224 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2225 SmallVector<int, 16> NewMask(NumElts);
2226 for (unsigned i = 0; i != NumElts; ++i)
2227 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2228
2229 // A select mask with undef elements might look like an identity mask.
2230 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2231 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2232 "Unexpected shuffle mask");
2233 return new ShuffleVectorInst(X, Y, NewMask);
2234}
2235
2237 const SimplifyQuery &SQ) {
2238 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2239
2240 // Are we shuffling together some value and that same value after it has been
2241 // modified by a binop with a constant?
2242 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2243 Constant *C;
2244 bool Op0IsBinop;
2245 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2246 Op0IsBinop = true;
2247 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2248 Op0IsBinop = false;
2249 else
2250 return nullptr;
2251
2252 // The identity constant for a binop leaves a variable operand unchanged. For
2253 // a vector, this is a splat of something like 0, -1, or 1.
2254 // If there's no identity constant for this binop, we're done.
2255 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2256 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2257 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2258 if (!IdC)
2259 return nullptr;
2260
2261 Value *X = Op0IsBinop ? Op1 : Op0;
2262
2263 // Prevent folding in the case the non-binop operand might have NaN values.
2264 // If X can have NaN elements then we have that the floating point math
2265 // operation in the transformed code may not preserve the exact NaN
2266 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2267 // This makes the transformation incorrect since the original program would
2268 // have preserved the exact NaN bit-pattern.
2269 // Avoid the folding if X can have NaN elements.
2270 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2271 !isKnownNeverNaN(X, SQ))
2272 return nullptr;
2273
2274 // Shuffle identity constants into the lanes that return the original value.
2275 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2276 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2277 // The existing binop constant vector remains in the same operand position.
2278 ArrayRef<int> Mask = Shuf.getShuffleMask();
2279 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2281
2282 bool MightCreatePoisonOrUB =
2284 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2285 if (MightCreatePoisonOrUB)
2286 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2287
2288 // shuf (bop X, C), X, M --> bop X, C'
2289 // shuf X, (bop X, C), M --> bop X, C'
2290 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2291 NewBO->copyIRFlags(BO);
2292
2293 // An undef shuffle mask element may propagate as an undef constant element in
2294 // the new binop. That would produce poison where the original code might not.
2295 // If we already made a safe constant, then there's no danger.
2296 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2298 return NewBO;
2299}
2300
2301/// If we have an insert of a scalar to a non-zero element of an undefined
2302/// vector and then shuffle that value, that's the same as inserting to the zero
2303/// element and shuffling. Splatting from the zero element is recognized as the
2304/// canonical form of splat.
2306 InstCombiner::BuilderTy &Builder) {
2307 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2308 ArrayRef<int> Mask = Shuf.getShuffleMask();
2309 Value *X;
2310 uint64_t IndexC;
2311
2312 // Match a shuffle that is a splat to a non-zero element.
2314 m_ConstantInt(IndexC)))) ||
2315 !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2316 return nullptr;
2317
2318 // Insert into element 0 of a poison vector.
2319 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2320 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2321
2322 // Splat from element 0. Any mask element that is poison remains poison.
2323 // For example:
2324 // shuf (inselt poison, X, 2), _, <2,2,undef>
2325 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2326 unsigned NumMaskElts =
2327 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2328 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2329 for (unsigned i = 0; i != NumMaskElts; ++i)
2330 if (Mask[i] == PoisonMaskElem)
2331 NewMask[i] = Mask[i];
2332
2333 return new ShuffleVectorInst(NewIns, NewMask);
2334}
2335
2336/// Try to fold shuffles that are the equivalent of a vector select.
2338 if (!Shuf.isSelect())
2339 return nullptr;
2340
2341 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2342 // Commuting undef to operand 0 conflicts with another canonicalization.
2343 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2344 if (!match(Shuf.getOperand(1), m_Undef()) &&
2345 Shuf.getMaskValue(0) >= (int)NumElts) {
2346 // TODO: Can we assert that both operands of a shuffle-select are not undef
2347 // (otherwise, it would have been folded by instsimplify?
2348 Shuf.commute();
2349 return &Shuf;
2350 }
2351
2353 return I;
2354
2356 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2357 return I;
2358
2359 BinaryOperator *B0, *B1;
2360 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2361 !match(Shuf.getOperand(1), m_BinOp(B1)))
2362 return nullptr;
2363
2364 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2365 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2366 // with a multiply, we will exit because C0/C1 will not be set.
2367 Value *X, *Y;
2368 Constant *C0 = nullptr, *C1 = nullptr;
2369 bool ConstantsAreOp1;
2370 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2371 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2372 ConstantsAreOp1 = false;
2373 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2374 m_Neg(m_Value(X)))) &&
2376 m_Neg(m_Value(Y)))))
2377 ConstantsAreOp1 = true;
2378 else
2379 return nullptr;
2380
2381 // We need matching binops to fold the lanes together.
2384 bool DropNSW = false;
2385 if (ConstantsAreOp1 && Opc0 != Opc1) {
2386 // TODO: We drop "nsw" if shift is converted into multiply because it may
2387 // not be correct when the shift amount is BitWidth - 1. We could examine
2388 // each vector element to determine if it is safe to keep that flag.
2389 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2390 DropNSW = true;
2391 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2392 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2393 Opc0 = AltB0.Opcode;
2394 C0 = cast<Constant>(AltB0.Op1);
2395 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2396 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2397 Opc1 = AltB1.Opcode;
2398 C1 = cast<Constant>(AltB1.Op1);
2399 }
2400 }
2401
2402 if (Opc0 != Opc1 || !C0 || !C1)
2403 return nullptr;
2404
2405 // The opcodes must be the same. Use a new name to make that clear.
2406 BinaryOperator::BinaryOps BOpc = Opc0;
2407
2408 // Select the constant elements needed for the single binop.
2409 ArrayRef<int> Mask = Shuf.getShuffleMask();
2410 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2411
2412 // We are moving a binop after a shuffle. When a shuffle has an undefined
2413 // mask element, the result is undefined, but it is not poison or undefined
2414 // behavior. That is not necessarily true for div/rem/shift.
2415 bool MightCreatePoisonOrUB =
2418 if (MightCreatePoisonOrUB)
2420 ConstantsAreOp1);
2421
2422 Value *V;
2423 if (X == Y) {
2424 // Remove a binop and the shuffle by rearranging the constant:
2425 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2426 // shuffle (op C0, V), (op C1, V), M --> op C', V
2427 V = X;
2428 } else {
2429 // If there are 2 different variable operands, we must create a new shuffle
2430 // (select) first, so check uses to ensure that we don't end up with more
2431 // instructions than we started with.
2432 if (!B0->hasOneUse() && !B1->hasOneUse())
2433 return nullptr;
2434
2435 // If we use the original shuffle mask and op1 is *variable*, we would be
2436 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2437 // poison. We do not have to guard against UB when *constants* are op1
2438 // because safe constants guarantee that we do not overflow sdiv/srem (and
2439 // there's no danger for other opcodes).
2440 // TODO: To allow this case, create a new shuffle mask with no undefs.
2441 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2442 return nullptr;
2443
2444 // Note: In general, we do not create new shuffles in InstCombine because we
2445 // do not know if a target can lower an arbitrary shuffle optimally. In this
2446 // case, the shuffle uses the existing mask, so there is no additional risk.
2447
2448 // Select the variable vectors first, then perform the binop:
2449 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2450 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2451 V = Builder.CreateShuffleVector(X, Y, Mask);
2452 }
2453
2454 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2455 Builder.CreateBinOp(BOpc, NewC, V);
2456
2457 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2458 // 1. If we changed an opcode, poison conditions might have changed.
2459 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2460 // where the original code did not. But if we already made a safe constant,
2461 // then there's no danger.
2462 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2463 NewI->copyIRFlags(B0);
2464 NewI->andIRFlags(B1);
2465 if (DropNSW)
2466 NewI->setHasNoSignedWrap(false);
2467 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2468 NewI->dropPoisonGeneratingFlags();
2469 }
2470 return replaceInstUsesWith(Shuf, NewBO);
2471}
2472
2473/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2474/// Example (little endian):
2475/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2477 bool IsBigEndian) {
2478 // This must be a bitcasted shuffle of 1 vector integer operand.
2479 Type *DestType = Shuf.getType();
2480 Value *X;
2481 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2482 !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
2483 return nullptr;
2484
2485 // The source type must have the same number of elements as the shuffle,
2486 // and the source element type must be larger than the shuffle element type.
2487 Type *SrcType = X->getType();
2488 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2489 cast<FixedVectorType>(SrcType)->getNumElements() !=
2490 cast<FixedVectorType>(DestType)->getNumElements() ||
2491 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2492 return nullptr;
2493
2494 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2495 "Expected a shuffle that decreases length");
2496
2497 // Last, check that the mask chooses the correct low bits for each narrow
2498 // element in the result.
2499 uint64_t TruncRatio =
2500 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2501 ArrayRef<int> Mask = Shuf.getShuffleMask();
2502 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2503 if (Mask[i] == PoisonMaskElem)
2504 continue;
2505 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2506 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2507 if (Mask[i] != (int)LSBIndex)
2508 return nullptr;
2509 }
2510
2511 return new TruncInst(X, DestType);
2512}
2513
2514/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2515/// narrowing (concatenating with poison and extracting back to the original
2516/// length). This allows replacing the wide select with a narrow select.
2518 InstCombiner::BuilderTy &Builder) {
2519 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2520 // of the 1st vector operand of a shuffle.
2521 if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
2522 return nullptr;
2523
2524 // The vector being shuffled must be a vector select that we can eliminate.
2525 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2526 Value *Cond, *X, *Y;
2527 if (!match(Shuf.getOperand(0),
2529 return nullptr;
2530
2531 // We need a narrow condition value. It must be extended with poison elements
2532 // and have the same number of elements as this shuffle.
2533 unsigned NarrowNumElts =
2534 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2535 Value *NarrowCond;
2536 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2537 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2538 NarrowNumElts ||
2539 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2540 return nullptr;
2541
2542 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2543 // -->
2544 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2545 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2546 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2547 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2548}
2549
2550/// Canonicalize FP negate/abs after shuffle.
2552 InstCombiner::BuilderTy &Builder) {
2553 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2554 Value *X;
2555 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2556 return nullptr;
2557
2558 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2559
2560 // Match 2-input (binary) shuffle.
2561 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2562 Value *Y;
2563 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2564 S0->getOpcode() != S1->getOpcode() ||
2565 (!S0->hasOneUse() && !S1->hasOneUse()))
2566 return nullptr;
2567
2568 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2569 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2570 Instruction *NewF;
2571 if (IsFNeg) {
2572 NewF = UnaryOperator::CreateFNeg(NewShuf);
2573 } else {
2575 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2576 NewF = CallInst::Create(FAbs, {NewShuf});
2577 }
2578 NewF->copyIRFlags(S0);
2579 NewF->andIRFlags(S1);
2580 return NewF;
2581}
2582
2583/// Canonicalize casts after shuffle.
2585 InstCombiner::BuilderTy &Builder) {
2586 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2587 if (!Cast0)
2588 return nullptr;
2589
2590 // TODO: Allow other opcodes? That would require easing the type restrictions
2591 // below here.
2592 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2593 switch (CastOpcode) {
2594 case Instruction::SExt:
2595 case Instruction::ZExt:
2596 case Instruction::FPToSI:
2597 case Instruction::FPToUI:
2598 case Instruction::SIToFP:
2599 case Instruction::UIToFP:
2600 break;
2601 default:
2602 return nullptr;
2603 }
2604
2605 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2606 VectorType *ShufTy = Shuf.getType();
2607 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2608
2609 // TODO: Allow length-increasing shuffles?
2610 if (ShufTy->getElementCount().getKnownMinValue() >
2611 ShufOpTy->getElementCount().getKnownMinValue())
2612 return nullptr;
2613
2614 // shuffle (cast X), Poison, identity-with-extract-mask -->
2615 // cast (shuffle X, Poison, identity-with-extract-mask).
2616 if (isa<PoisonValue>(Shuf.getOperand(1)) && Cast0->hasOneUse() &&
2617 Shuf.isIdentityWithExtract()) {
2618 auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
2619 PoisonValue::get(CastSrcTy),
2620 Shuf.getShuffleMask());
2621 return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
2622 }
2623
2624 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2625 // Do we have 2 matching cast operands?
2626 if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2627 Cast0->getSrcTy() != Cast1->getSrcTy())
2628 return nullptr;
2629
2630 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2631 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2632 "Expected fixed vector operands for casts and binary shuffle");
2633 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2634 return nullptr;
2635
2636 // At least one of the operands must have only one use (the shuffle).
2637 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2638 return nullptr;
2639
2640 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2641 Value *X = Cast0->getOperand(0);
2642 Value *Y = Cast1->getOperand(0);
2643 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2644 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2645}
2646
2647/// Try to fold an extract subvector operation.
2649 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2650 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
2651 return nullptr;
2652
2653 // Check if we are extracting all bits of an inserted scalar:
2654 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2655 Value *X;
2656 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2657 X->getType()->getPrimitiveSizeInBits() ==
2659 return new BitCastInst(X, Shuf.getType());
2660
2661 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2662 Value *Y;
2663 ArrayRef<int> Mask;
2664 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2665 return nullptr;
2666
2667 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2668 // then combining may result in worse codegen.
2669 if (!Op0->hasOneUse())
2670 return nullptr;
2671
2672 // We are extracting a subvector from a shuffle. Remove excess elements from
2673 // the 1st shuffle mask to eliminate the extract.
2674 //
2675 // This transform is conservatively limited to identity extracts because we do
2676 // not allow arbitrary shuffle mask creation as a target-independent transform
2677 // (because we can't guarantee that will lower efficiently).
2678 //
2679 // If the extracting shuffle has an poison mask element, it transfers to the
2680 // new shuffle mask. Otherwise, copy the original mask element. Example:
2681 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2682 // shuf X, Y, <C0, poison, C2, poison>
2683 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2684 SmallVector<int, 16> NewMask(NumElts);
2685 assert(NumElts < Mask.size() &&
2686 "Identity with extract must have less elements than its inputs");
2687
2688 for (unsigned i = 0; i != NumElts; ++i) {
2689 int ExtractMaskElt = Shuf.getMaskValue(i);
2690 int MaskElt = Mask[i];
2691 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2692 }
2693 return new ShuffleVectorInst(X, Y, NewMask);
2694}
2695
2696/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2697/// operand with the operand of an insertelement.
2699 InstCombinerImpl &IC) {
2700 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2702 Shuf.getShuffleMask(Mask);
2703
2704 int NumElts = Mask.size();
2705 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2706
2707 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2708 // not be able to handle it there if the insertelement has >1 use.
2709 // If the shuffle has an insertelement operand but does not choose the
2710 // inserted scalar element from that value, then we can replace that shuffle
2711 // operand with the source vector of the insertelement.
2712 Value *X;
2713 uint64_t IdxC;
2714 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2715 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2716 if (!is_contained(Mask, (int)IdxC))
2717 return IC.replaceOperand(Shuf, 0, X);
2718 }
2719 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2720 // Offset the index constant by the vector width because we are checking for
2721 // accesses to the 2nd vector input of the shuffle.
2722 IdxC += InpNumElts;
2723 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2724 if (!is_contained(Mask, (int)IdxC))
2725 return IC.replaceOperand(Shuf, 1, X);
2726 }
2727 // For the rest of the transform, the shuffle must not change vector sizes.
2728 // TODO: This restriction could be removed if the insert has only one use
2729 // (because the transform would require a new length-changing shuffle).
2730 if (NumElts != InpNumElts)
2731 return nullptr;
2732
2733 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2734 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2735 // We need an insertelement with a constant index.
2736 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2737 m_ConstantInt(IndexC))))
2738 return false;
2739
2740 // Test the shuffle mask to see if it splices the inserted scalar into the
2741 // operand 1 vector of the shuffle.
2742 int NewInsIndex = -1;
2743 for (int i = 0; i != NumElts; ++i) {
2744 // Ignore undef mask elements.
2745 if (Mask[i] == -1)
2746 continue;
2747
2748 // The shuffle takes elements of operand 1 without lane changes.
2749 if (Mask[i] == NumElts + i)
2750 continue;
2751
2752 // The shuffle must choose the inserted scalar exactly once.
2753 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2754 return false;
2755
2756 // The shuffle is placing the inserted scalar into element i.
2757 NewInsIndex = i;
2758 }
2759
2760 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2761
2762 // Index is updated to the potentially translated insertion lane.
2763 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2764 return true;
2765 };
2766
2767 // If the shuffle is unnecessary, insert the scalar operand directly into
2768 // operand 1 of the shuffle. Example:
2769 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2770 Value *Scalar;
2771 ConstantInt *IndexC;
2772 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2773 return InsertElementInst::Create(V1, Scalar, IndexC);
2774
2775 // Try again after commuting shuffle. Example:
2776 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2777 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2778 std::swap(V0, V1);
2780 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2781 return InsertElementInst::Create(V1, Scalar, IndexC);
2782
2783 return nullptr;
2784}
2785
2787 // Match the operands as identity with padding (also known as concatenation
2788 // with undef) shuffles of the same source type. The backend is expected to
2789 // recreate these concatenations from a shuffle of narrow operands.
2790 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2791 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2792 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2793 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2794 return nullptr;
2795
2796 // We limit this transform to power-of-2 types because we expect that the
2797 // backend can convert the simplified IR patterns to identical nodes as the
2798 // original IR.
2799 // TODO: If we can verify the same behavior for arbitrary types, the
2800 // power-of-2 checks can be removed.
2801 Value *X = Shuffle0->getOperand(0);
2802 Value *Y = Shuffle1->getOperand(0);
2803 if (X->getType() != Y->getType() ||
2804 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2806 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2807 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2808 match(X, m_Undef()) || match(Y, m_Undef()))
2809 return nullptr;
2810 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2811 match(Shuffle1->getOperand(1), m_Undef()) &&
2812 "Unexpected operand for identity shuffle");
2813
2814 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2815 // operands directly by adjusting the shuffle mask to account for the narrower
2816 // types:
2817 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2818 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2819 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2820 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2821
2822 ArrayRef<int> Mask = Shuf.getShuffleMask();
2823 SmallVector<int, 16> NewMask(Mask.size(), -1);
2824 for (int i = 0, e = Mask.size(); i != e; ++i) {
2825 if (Mask[i] == -1)
2826 continue;
2827
2828 // If this shuffle is choosing an undef element from 1 of the sources, that
2829 // element is undef.
2830 if (Mask[i] < WideElts) {
2831 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2832 continue;
2833 } else {
2834 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2835 continue;
2836 }
2837
2838 // If this shuffle is choosing from the 1st narrow op, the mask element is
2839 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2840 // element is offset down to adjust for the narrow vector widths.
2841 if (Mask[i] < WideElts) {
2842 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2843 NewMask[i] = Mask[i];
2844 } else {
2845 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2846 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2847 }
2848 }
2849 return new ShuffleVectorInst(X, Y, NewMask);
2850}
2851
2852// Splatting the first element of the result of a BinOp, where any of the
2853// BinOp's operands are the result of a first element splat can be simplified to
2854// splatting the first element of the result of the BinOp
2856 if (!match(SVI.getOperand(1), m_Poison()) ||
2857 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2858 !SVI.getOperand(0)->hasOneUse())
2859 return nullptr;
2860
2861 Value *Op0 = SVI.getOperand(0);
2862 Value *X, *Y;
2864 m_Value(Y))) &&
2865 !match(Op0, m_BinOp(m_Value(X),
2867 return nullptr;
2868 if (X->getType() != Y->getType())
2869 return nullptr;
2870
2871 auto *BinOp = cast<BinaryOperator>(Op0);
2873 return nullptr;
2874
2875 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2876 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2877 NewBOI->copyIRFlags(BinOp);
2878
2879 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2880}
2881
2883 Value *LHS = SVI.getOperand(0);
2884 Value *RHS = SVI.getOperand(1);
2885 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2886 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2887 SVI.getType(), ShufQuery))
2888 return replaceInstUsesWith(SVI, V);
2889
2890 if (Instruction *I = simplifyBinOpSplats(SVI))
2891 return I;
2892
2893 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2894 // order to support scalable vectors.
2895 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2896 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2897
2898 if (isa<ScalableVectorType>(LHS->getType()))
2899 return nullptr;
2900
2901 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2902 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2903
2904 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2905 //
2906 // if X and Y are of the same (vector) type, and the element size is not
2907 // changed by the bitcasts, we can distribute the bitcasts through the
2908 // shuffle, hopefully reducing the number of instructions. We make sure that
2909 // at least one bitcast only has one use, so we don't *increase* the number of
2910 // instructions here.
2911 Value *X, *Y;
2912 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2913 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2914 X->getType()->getScalarSizeInBits() ==
2915 SVI.getType()->getScalarSizeInBits() &&
2916 (LHS->hasOneUse() || RHS->hasOneUse())) {
2917 Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2918 SVI.getName() + ".uncasted");
2919 return new BitCastInst(V, SVI.getType());
2920 }
2921
2922 ArrayRef<int> Mask = SVI.getShuffleMask();
2923
2924 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2925 // simulated shuffle can simplify, then this shuffle is unnecessary:
2926 // shuf (bitcast X), undef, Mask --> bitcast X'
2927 // TODO: This could be extended to allow length-changing shuffles.
2928 // The transform might also be obsoleted if we allowed canonicalization
2929 // of bitcasted shuffles.
2930 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2931 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2932 // Try to create a scaled mask constant.
2933 auto *XType = cast<FixedVectorType>(X->getType());
2934 unsigned XNumElts = XType->getNumElements();
2935 SmallVector<int, 16> ScaledMask;
2936 if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2937 // If the shuffled source vector simplifies, cast that value to this
2938 // shuffle's type.
2939 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2940 ScaledMask, XType, ShufQuery))
2941 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2942 }
2943 }
2944
2945 // shuffle x, x, mask --> shuffle x, undef, mask'
2946 if (LHS == RHS) {
2947 assert(!match(RHS, m_Undef()) &&
2948 "Shuffle with 2 undef ops not simplified?");
2949 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2950 }
2951
2952 // shuffle undef, x, mask --> shuffle x, undef, mask'
2953 if (match(LHS, m_Undef())) {
2954 SVI.commute();
2955 return &SVI;
2956 }
2957
2959 return I;
2960
2961 if (Instruction *I = foldSelectShuffle(SVI))
2962 return I;
2963
2964 if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2965 return I;
2966
2968 return I;
2969
2971 return I;
2972
2973 if (Instruction *I = foldCastShuffle(SVI, Builder))
2974 return I;
2975
2976 APInt PoisonElts(VWidth, 0);
2977 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2978 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2979 if (V != &SVI)
2980 return replaceInstUsesWith(SVI, V);
2981 return &SVI;
2982 }
2983
2985 return I;
2986
2987 // These transforms have the potential to lose undef knowledge, so they are
2988 // intentionally placed after SimplifyDemandedVectorElts().
2989 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2990 return I;
2992 return I;
2993
2994 if (match(RHS, m_Constant())) {
2995 if (auto *SI = dyn_cast<SelectInst>(LHS)) {
2996 // We cannot do this fold for elementwise select since ShuffleVector is
2997 // not elementwise.
2998 if (SI->getCondition()->getType()->isIntegerTy() &&
2999 (isa<PoisonValue>(RHS) ||
3000 isGuaranteedNotToBePoison(SI->getCondition()))) {
3001 if (Instruction *I = FoldOpIntoSelect(SVI, SI))
3002 return I;
3003 }
3004 }
3005 if (auto *PN = dyn_cast<PHINode>(LHS)) {
3006 if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))
3007 return I;
3008 }
3009 }
3010
3011 if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
3013 return replaceInstUsesWith(SVI, V);
3014 }
3015
3016 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
3017 // a non-vector type. We can instead bitcast the original vector followed by
3018 // an extract of the desired element:
3019 //
3020 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
3021 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3022 // %1 = bitcast <4 x i8> %sroa to i32
3023 // Becomes:
3024 // %bc = bitcast <16 x i8> %in to <4 x i32>
3025 // %ext = extractelement <4 x i32> %bc, i32 0
3026 //
3027 // If the shuffle is extracting a contiguous range of values from the input
3028 // vector then each use which is a bitcast of the extracted size can be
3029 // replaced. This will work if the vector types are compatible, and the begin
3030 // index is aligned to a value in the casted vector type. If the begin index
3031 // isn't aligned then we can shuffle the original vector (keeping the same
3032 // vector type) before extracting.
3033 //
3034 // This code will bail out if the target type is fundamentally incompatible
3035 // with vectors of the source type.
3036 //
3037 // Example of <16 x i8>, target type i32:
3038 // Index range [4,8): v-----------v Will work.
3039 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3040 // <16 x i8>: | | | | | | | | | | | | | | | | |
3041 // <4 x i32>: | | | | |
3042 // +-----------+-----------+-----------+-----------+
3043 // Index range [6,10): ^-----------^ Needs an extra shuffle.
3044 // Target type i40: ^--------------^ Won't work, bail.
3045 bool MadeChange = false;
3046 if (isShuffleExtractingFromLHS(SVI, Mask)) {
3047 Value *V = LHS;
3048 unsigned MaskElems = Mask.size();
3049 auto *SrcTy = cast<FixedVectorType>(V->getType());
3050 unsigned VecBitWidth = DL.getTypeSizeInBits(SrcTy);
3051 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
3052 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3053 unsigned SrcNumElems = SrcTy->getNumElements();
3056 for (User *U : SVI.users())
3057 if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
3058 // Only visit bitcasts that weren't previously handled.
3059 if (BC->use_empty())
3060 continue;
3061 // Prefer to combine bitcasts of bitcasts before attempting this fold.
3062 if (BC->hasOneUse()) {
3063 auto *BC2 = dyn_cast<BitCastInst>(BC->user_back());
3064 if (BC2 && isEliminableCastPair(BC, BC2))
3065 continue;
3066 }
3067 BCs.push_back(BC);
3068 }
3069 for (BitCastInst *BC : BCs) {
3070 unsigned BegIdx = Mask.front();
3071 Type *TgtTy = BC->getDestTy();
3072 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
3073 if (!TgtElemBitWidth)
3074 continue;
3075 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3076 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3077 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3078 if (!VecBitWidthsEqual)
3079 continue;
3081 continue;
3082 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
3083 if (!BegIsAligned) {
3084 // Shuffle the input so [0,NumElements) contains the output, and
3085 // [NumElems,SrcNumElems) is undef.
3086 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3087 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3088 ShuffleMask[I] = Idx;
3089 V = Builder.CreateShuffleVector(V, ShuffleMask,
3090 SVI.getName() + ".extract");
3091 BegIdx = 0;
3092 }
3093 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3094 assert(SrcElemsPerTgtElem);
3095 BegIdx /= SrcElemsPerTgtElem;
3096 auto [It, Inserted] = NewBCs.try_emplace(CastSrcTy);
3097 if (Inserted)
3098 It->second = Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
3099 auto *Ext = Builder.CreateExtractElement(It->second, BegIdx,
3100 SVI.getName() + ".extract");
3101 // The shufflevector isn't being replaced: the bitcast that used it
3102 // is. InstCombine will visit the newly-created instructions.
3103 replaceInstUsesWith(*BC, Ext);
3104 MadeChange = true;
3105 }
3106 }
3107
3108 // If the LHS is a shufflevector itself, see if we can combine it with this
3109 // one without producing an unusual shuffle.
3110 // Cases that might be simplified:
3111 // 1.
3112 // x1=shuffle(v1,v2,mask1)
3113 // x=shuffle(x1,undef,mask)
3114 // ==>
3115 // x=shuffle(v1,undef,newMask)
3116 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3117 // 2.
3118 // x1=shuffle(v1,undef,mask1)
3119 // x=shuffle(x1,x2,mask)
3120 // where v1.size() == mask1.size()
3121 // ==>
3122 // x=shuffle(v1,x2,newMask)
3123 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3124 // 3.
3125 // x2=shuffle(v2,undef,mask2)
3126 // x=shuffle(x1,x2,mask)
3127 // where v2.size() == mask2.size()
3128 // ==>
3129 // x=shuffle(x1,v2,newMask)
3130 // newMask[i] = (mask[i] < x1.size())
3131 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3132 // 4.
3133 // x1=shuffle(v1,undef,mask1)
3134 // x2=shuffle(v2,undef,mask2)
3135 // x=shuffle(x1,x2,mask)
3136 // where v1.size() == v2.size()
3137 // ==>
3138 // x=shuffle(v1,v2,newMask)
3139 // newMask[i] = (mask[i] < x1.size())
3140 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3141 //
3142 // Here we are really conservative:
3143 // we are absolutely afraid of producing a shuffle mask not in the input
3144 // program, because the code gen may not be smart enough to turn a merged
3145 // shuffle into two specific shuffles: it may produce worse code. As such,
3146 // we only merge two shuffles if the result is either a splat or one of the
3147 // input shuffle masks. In this case, merging the shuffles just removes
3148 // one instruction, which we know is safe. This is good for things like
3149 // turning: (splat(splat)) -> splat, or
3150 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3153 if (LHSShuffle)
3154 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3155 !match(RHS, m_Poison()))
3156 LHSShuffle = nullptr;
3157 if (RHSShuffle)
3158 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3159 RHSShuffle = nullptr;
3160 if (!LHSShuffle && !RHSShuffle)
3161 return MadeChange ? &SVI : nullptr;
3162
3163 Value* LHSOp0 = nullptr;
3164 Value* LHSOp1 = nullptr;
3165 Value* RHSOp0 = nullptr;
3166 unsigned LHSOp0Width = 0;
3167 unsigned RHSOp0Width = 0;
3168 if (LHSShuffle) {
3169 LHSOp0 = LHSShuffle->getOperand(0);
3170 LHSOp1 = LHSShuffle->getOperand(1);
3171 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3172 }
3173 if (RHSShuffle) {
3174 RHSOp0 = RHSShuffle->getOperand(0);
3175 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3176 }
3177 Value* newLHS = LHS;
3178 Value* newRHS = RHS;
3179 if (LHSShuffle) {
3180 // case 1
3181 if (match(RHS, m_Poison())) {
3182 newLHS = LHSOp0;
3183 newRHS = LHSOp1;
3184 }
3185 // case 2 or 4
3186 else if (LHSOp0Width == LHSWidth) {
3187 newLHS = LHSOp0;
3188 }
3189 }
3190 // case 3 or 4
3191 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3192 newRHS = RHSOp0;
3193 }
3194 // case 4
3195 if (LHSOp0 == RHSOp0) {
3196 newLHS = LHSOp0;
3197 newRHS = nullptr;
3198 }
3199
3200 if (newLHS == LHS && newRHS == RHS)
3201 return MadeChange ? &SVI : nullptr;
3202
3203 ArrayRef<int> LHSMask;
3204 ArrayRef<int> RHSMask;
3205 if (newLHS != LHS)
3206 LHSMask = LHSShuffle->getShuffleMask();
3207 if (RHSShuffle && newRHS != RHS)
3208 RHSMask = RHSShuffle->getShuffleMask();
3209
3210 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3211 SmallVector<int, 16> newMask;
3212 bool isSplat = true;
3213 int SplatElt = -1;
3214 // Create a new mask for the new ShuffleVectorInst so that the new
3215 // ShuffleVectorInst is equivalent to the original one.
3216 for (unsigned i = 0; i < VWidth; ++i) {
3217 int eltMask;
3218 if (Mask[i] < 0) {
3219 // This element is a poison value.
3220 eltMask = -1;
3221 } else if (Mask[i] < (int)LHSWidth) {
3222 // This element is from left hand side vector operand.
3223 //
3224 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3225 // new mask value for the element.
3226 if (newLHS != LHS) {
3227 eltMask = LHSMask[Mask[i]];
3228 // If the value selected is an poison value, explicitly specify it
3229 // with a -1 mask value.
3230 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3231 eltMask = -1;
3232 } else
3233 eltMask = Mask[i];
3234 } else {
3235 // This element is from right hand side vector operand
3236 //
3237 // If the value selected is a poison value, explicitly specify it
3238 // with a -1 mask value. (case 1)
3239 if (match(RHS, m_Poison()))
3240 eltMask = -1;
3241 // If RHS is going to be replaced (case 3 or 4), calculate the
3242 // new mask value for the element.
3243 else if (newRHS != RHS) {
3244 eltMask = RHSMask[Mask[i]-LHSWidth];
3245 // If the value selected is an poison value, explicitly specify it
3246 // with a -1 mask value.
3247 if (eltMask >= (int)RHSOp0Width) {
3248 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3249 "should have been check above");
3250 eltMask = -1;
3251 }
3252 } else
3253 eltMask = Mask[i]-LHSWidth;
3254
3255 // If LHS's width is changed, shift the mask value accordingly.
3256 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3257 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3258 // If newRHS == newLHS, we want to remap any references from newRHS to
3259 // newLHS so that we can properly identify splats that may occur due to
3260 // obfuscation across the two vectors.
3261 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3262 eltMask += newLHSWidth;
3263 }
3264
3265 // Check if this could still be a splat.
3266 if (eltMask >= 0) {
3267 if (SplatElt >= 0 && SplatElt != eltMask)
3268 isSplat = false;
3269 SplatElt = eltMask;
3270 }
3271
3272 newMask.push_back(eltMask);
3273 }
3274
3275 // If the result mask is equal to one of the original shuffle masks,
3276 // or is a splat, do the replacement.
3277 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3278 if (!newRHS)
3279 newRHS = PoisonValue::get(newLHS->getType());
3280 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3281 }
3282
3283 return MadeChange ? &SVI : nullptr;
3284}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr, APInt &UnionUsedElts)
Find elements of V demanded by UserInstr.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition MD5.cpp:58
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:219
This class represents a no-op cast from one type to another.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
static LLVM_ABI CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition InstrTypes.h:762
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:237
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
A wrapper class for inspecting calls to intrinsic functions.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Definition MapVector.h:111
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1468
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
LLVM_ABI bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
LLVM_ABI void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
LLVM_ABI unsigned getStructNumElements() const
LLVM_ABI uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition Type.h:74
@ StructTyID
Structures.
Definition Type.h:73
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:136
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
UnaryOps getOpcode() const
Definition InstrTypes.h:154
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition Value.cpp:1091
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
User * user_back()
Definition Value.h:412
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOpc_match< LHS, RHS, false > m_BinOp(unsigned Opcode, const LHS &L, const RHS &R)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:754
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:257