Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cstddef>
84#include <cstdint>
85#include <iterator>
86#include <map>
87#include <optional>
88#include <set>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace PatternMatch;
95
96#define DEBUG_TYPE "simplifycfg"
97
99 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
100
101 cl::desc(
102 "Temporary development switch used to gradually uplift SimplifyCFG "
103 "into preserving DomTree,"));
104
105// Chosen as 2 so as to be cheap, but still to have enough power to fold
106// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
107// To catch this, we need to fold a compare and a select, hence '2' being the
108// minimum reasonable default.
110 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
111 cl::desc(
112 "Control the amount of phi node folding to perform (default = 2)"));
113
115 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
116 cl::desc("Control the maximal total instruction cost that we are willing "
117 "to speculatively execute to fold a 2-entry PHI node into a "
118 "select (default = 4)"));
119
120static cl::opt<bool>
121 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
122 cl::desc("Hoist common instructions up to the parent block"));
123
125 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
126 cl::desc("Hoist loads if the target supports conditional faulting"));
127
129 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
130 cl::desc("Hoist stores if the target supports conditional faulting"));
131
133 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
134 cl::desc("Control the maximal conditional load/store that we are willing "
135 "to speculatively execute to eliminate conditional branch "
136 "(default = 6)"));
137
139 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
140 cl::init(20),
141 cl::desc("Allow reordering across at most this many "
142 "instructions when hoisting"));
143
144static cl::opt<bool>
145 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
146 cl::desc("Sink common instructions down to the end block"));
147
149 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
150 cl::desc("Hoist conditional stores if an unconditional store precedes"));
151
153 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
154 cl::desc("Hoist conditional stores even if an unconditional store does not "
155 "precede - hoist multiple conditional stores into a single "
156 "predicated store"));
157
159 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
160 cl::desc("When merging conditional stores, do so even if the resultant "
161 "basic blocks are unlikely to be if-converted as a result"));
162
164 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
165 cl::desc("Allow exactly one expensive instruction to be speculatively "
166 "executed"));
167
169 "max-speculation-depth", cl::Hidden, cl::init(10),
170 cl::desc("Limit maximum recursion depth when calculating costs of "
171 "speculatively executed instructions"));
172
173static cl::opt<int>
174 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
175 cl::init(10),
176 cl::desc("Max size of a block which is still considered "
177 "small enough to thread through"));
178
179// Two is chosen to allow one negation and a logical combine.
181 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
182 cl::init(2),
183 cl::desc("Maximum cost of combining conditions when "
184 "folding branches"));
185
187 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
188 cl::init(2),
189 cl::desc("Multiplier to apply to threshold when determining whether or not "
190 "to fold branch to common destination when vector operations are "
191 "present"));
192
194 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
195 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
196
198 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
199 cl::desc("Limit cases to analyze when converting a switch to select"));
200
202 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
203 cl::desc("Limit number of blocks a define in a threaded block is allowed "
204 "to be live in"));
205
207
208STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
209STATISTIC(NumLinearMaps,
210 "Number of switch instructions turned into linear mapping");
211STATISTIC(NumLookupTables,
212 "Number of switch instructions turned into lookup tables");
214 NumLookupTablesHoles,
215 "Number of switch instructions turned into lookup tables (holes checked)");
216STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
217STATISTIC(NumFoldValueComparisonIntoPredecessors,
218 "Number of value comparisons folded into predecessor basic blocks");
219STATISTIC(NumFoldBranchToCommonDest,
220 "Number of branches folded into predecessor basic block");
222 NumHoistCommonCode,
223 "Number of common instruction 'blocks' hoisted up to the begin block");
224STATISTIC(NumHoistCommonInstrs,
225 "Number of common instructions hoisted up to the begin block");
226STATISTIC(NumSinkCommonCode,
227 "Number of common instruction 'blocks' sunk down to the end block");
228STATISTIC(NumSinkCommonInstrs,
229 "Number of common instructions sunk down to the end block");
230STATISTIC(NumSpeculations, "Number of speculative executed instructions");
231STATISTIC(NumInvokes,
232 "Number of invokes with empty resume blocks simplified into calls");
233STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
234STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
235
236namespace {
237
238// The first field contains the value that the switch produces when a certain
239// case group is selected, and the second field is a vector containing the
240// cases composing the case group.
241using SwitchCaseResultVectorTy =
243
244// The first field contains the phi node that generates a result of the switch
245// and the second field contains the value generated for a certain case in the
246// switch for that PHI.
247using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
248
249/// ValueEqualityComparisonCase - Represents a case of a switch.
250struct ValueEqualityComparisonCase {
252 BasicBlock *Dest;
253
254 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
255 : Value(Value), Dest(Dest) {}
256
257 bool operator<(ValueEqualityComparisonCase RHS) const {
258 // Comparing pointers is ok as we only rely on the order for uniquing.
259 return Value < RHS.Value;
260 }
261
262 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
263};
264
265class SimplifyCFGOpt {
266 const TargetTransformInfo &TTI;
267 DomTreeUpdater *DTU;
268 const DataLayout &DL;
269 ArrayRef<WeakVH> LoopHeaders;
270 const SimplifyCFGOptions &Options;
271 bool Resimplify;
272
273 Value *isValueEqualityComparison(Instruction *TI);
274 BasicBlock *getValueEqualityComparisonCases(
275 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
276 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
277 BasicBlock *Pred,
278 IRBuilder<> &Builder);
279 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
280 Instruction *PTI,
281 IRBuilder<> &Builder);
282 bool foldValueComparisonIntoPredecessors(Instruction *TI,
283 IRBuilder<> &Builder);
284
285 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
286 bool simplifySingleResume(ResumeInst *RI);
287 bool simplifyCommonResume(ResumeInst *RI);
288 bool simplifyCleanupReturn(CleanupReturnInst *RI);
289 bool simplifyUnreachable(UnreachableInst *UI);
290 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
291 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
292 bool simplifyIndirectBr(IndirectBrInst *IBI);
293 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
294 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
295 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
296 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
297
298 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
299 IRBuilder<> &Builder);
300
301 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
302 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
303 Instruction *TI, Instruction *I1,
304 SmallVectorImpl<Instruction *> &OtherSuccTIs);
305 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
306 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
307 BasicBlock *TrueBB, BasicBlock *FalseBB,
308 uint32_t TrueWeight, uint32_t FalseWeight);
309 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
310 const DataLayout &DL);
311 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
312 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
313 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
314
315public:
316 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
317 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
318 const SimplifyCFGOptions &Opts)
319 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
320 assert((!DTU || !DTU->hasPostDomTree()) &&
321 "SimplifyCFG is not yet capable of maintaining validity of a "
322 "PostDomTree, so don't ask for it.");
323 }
324
325 bool simplifyOnce(BasicBlock *BB);
326 bool run(BasicBlock *BB);
327
328 // Helper to set Resimplify and return change indication.
329 bool requestResimplify() {
330 Resimplify = true;
331 return true;
332 }
333};
334
335// we synthesize a || b as select a, true, b
336// we synthesize a && b as select a, b, false
337// this function determines if SI is playing one of those roles.
338[[maybe_unused]] bool
339isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
340 return ((isa<ConstantInt>(SI->getTrueValue()) &&
341 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
342 (isa<ConstantInt>(SI->getFalseValue()) &&
343 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
344}
345
346} // end anonymous namespace
347
348/// Return true if all the PHI nodes in the basic block \p BB
349/// receive compatible (identical) incoming values when coming from
350/// all of the predecessor blocks that are specified in \p IncomingBlocks.
351///
352/// Note that if the values aren't exactly identical, but \p EquivalenceSet
353/// is provided, and *both* of the values are present in the set,
354/// then they are considered equal.
356 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
357 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
358 assert(IncomingBlocks.size() == 2 &&
359 "Only for a pair of incoming blocks at the time!");
360
361 // FIXME: it is okay if one of the incoming values is an `undef` value,
362 // iff the other incoming value is guaranteed to be a non-poison value.
363 // FIXME: it is okay if one of the incoming values is a `poison` value.
364 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
365 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
366 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
367 if (IV0 == IV1)
368 return true;
369 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
370 EquivalenceSet->contains(IV1))
371 return true;
372 return false;
373 });
374}
375
376/// Return true if it is safe to merge these two
377/// terminator instructions together.
378static bool
380 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
381 if (SI1 == SI2)
382 return false; // Can't merge with self!
383
384 // It is not safe to merge these two switch instructions if they have a common
385 // successor, and if that successor has a PHI node, and if *that* PHI node has
386 // conflicting incoming values from the two switch blocks.
387 BasicBlock *SI1BB = SI1->getParent();
388 BasicBlock *SI2BB = SI2->getParent();
389
391 bool Fail = false;
392 for (BasicBlock *Succ : successors(SI2BB)) {
393 if (!SI1Succs.count(Succ))
394 continue;
395 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
396 continue;
397 Fail = true;
398 if (FailBlocks)
399 FailBlocks->insert(Succ);
400 else
401 break;
402 }
403
404 return !Fail;
405}
406
407/// Update PHI nodes in Succ to indicate that there will now be entries in it
408/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
409/// will be the same as those coming in from ExistPred, an existing predecessor
410/// of Succ.
411static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
412 BasicBlock *ExistPred,
413 MemorySSAUpdater *MSSAU = nullptr) {
414 for (PHINode &PN : Succ->phis())
415 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
416 if (MSSAU)
417 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
418 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
419}
420
421/// Compute an abstract "cost" of speculating the given instruction,
422/// which is assumed to be safe to speculate. TCC_Free means cheap,
423/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
424/// expensive.
426 const TargetTransformInfo &TTI) {
427 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
428}
429
430/// If we have a merge point of an "if condition" as accepted above,
431/// return true if the specified value dominates the block. We don't handle
432/// the true generality of domination here, just a special case which works
433/// well enough for us.
434///
435/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
436/// see if V (which must be an instruction) and its recursive operands
437/// that do not dominate BB have a combined cost lower than Budget and
438/// are non-trapping. If both are true, the instruction is inserted into the
439/// set and true is returned.
440///
441/// The cost for most non-trapping instructions is defined as 1 except for
442/// Select whose cost is 2.
443///
444/// After this function returns, Cost is increased by the cost of
445/// V plus its non-dominating operands. If that cost is greater than
446/// Budget, false is returned and Cost is undefined.
448 Value *V, BasicBlock *BB, Instruction *InsertPt,
449 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
451 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
452 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
453 // so limit the recursion depth.
454 // TODO: While this recursion limit does prevent pathological behavior, it
455 // would be better to track visited instructions to avoid cycles.
457 return false;
458
460 if (!I) {
461 // Non-instructions dominate all instructions and can be executed
462 // unconditionally.
463 return true;
464 }
465 BasicBlock *PBB = I->getParent();
466
467 // We don't want to allow weird loops that might have the "if condition" in
468 // the bottom of this block.
469 if (PBB == BB)
470 return false;
471
472 // If this instruction is defined in a block that contains an unconditional
473 // branch to BB, then it must be in the 'conditional' part of the "if
474 // statement". If not, it definitely dominates the region.
476 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
477 return true;
478
479 // If we have seen this instruction before, don't count it again.
480 if (AggressiveInsts.count(I))
481 return true;
482
483 // Okay, it looks like the instruction IS in the "condition". Check to
484 // see if it's a cheap instruction to unconditionally compute, and if it
485 // only uses stuff defined outside of the condition. If so, hoist it out.
486 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
487 return false;
488
489 // Overflow arithmetic instruction plus extract value are usually generated
490 // when a division is being replaced. But, in this case, the zero check may
491 // still be kept in the code. In that case it would be worth to hoist these
492 // two instruction out of the basic block. Let's treat this pattern as one
493 // single cheap instruction here!
494 WithOverflowInst *OverflowInst;
495 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
496 ZeroCostInstructions.insert(OverflowInst);
497 Cost += 1;
498 } else if (!ZeroCostInstructions.contains(I))
499 Cost += computeSpeculationCost(I, TTI);
500
501 // Allow exactly one instruction to be speculated regardless of its cost
502 // (as long as it is safe to do so).
503 // This is intended to flatten the CFG even if the instruction is a division
504 // or other expensive operation. The speculation of an expensive instruction
505 // is expected to be undone in CodeGenPrepare if the speculation has not
506 // enabled further IR optimizations.
507 if (Cost > Budget &&
508 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
509 !Cost.isValid()))
510 return false;
511
512 // Okay, we can only really hoist these out if their operands do
513 // not take us over the cost threshold.
514 for (Use &Op : I->operands())
515 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
516 TTI, AC, ZeroCostInstructions, Depth + 1))
517 return false;
518 // Okay, it's safe to do this! Remember this instruction.
519 AggressiveInsts.insert(I);
520 return true;
521}
522
523/// Extract ConstantInt from value, looking through IntToPtr
524/// and PointerNullValue. Return NULL if value is not a constant int.
526 // Normal constant int.
528 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
529 return CI;
530
531 // It is not safe to look through inttoptr or ptrtoint when using unstable
532 // pointer types.
533 if (DL.hasUnstableRepresentation(V->getType()))
534 return nullptr;
535
536 // This is some kind of pointer constant. Turn it into a pointer-sized
537 // ConstantInt if possible.
538 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
539
540 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
542 return ConstantInt::get(IntPtrTy, 0);
543
544 // IntToPtr const int, we can look through this if the semantics of
545 // inttoptr for this address space are a simple (truncating) bitcast.
547 if (CE->getOpcode() == Instruction::IntToPtr)
548 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
549 // The constant is very likely to have the right type already.
550 if (CI->getType() == IntPtrTy)
551 return CI;
552 else
553 return cast<ConstantInt>(
554 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
555 }
556 return nullptr;
557}
558
559namespace {
560
561/// Given a chain of or (||) or and (&&) comparison of a value against a
562/// constant, this will try to recover the information required for a switch
563/// structure.
564/// It will depth-first traverse the chain of comparison, seeking for patterns
565/// like %a == 12 or %a < 4 and combine them to produce a set of integer
566/// representing the different cases for the switch.
567/// Note that if the chain is composed of '||' it will build the set of elements
568/// that matches the comparisons (i.e. any of this value validate the chain)
569/// while for a chain of '&&' it will build the set elements that make the test
570/// fail.
571struct ConstantComparesGatherer {
572 const DataLayout &DL;
573
574 /// Value found for the switch comparison
575 Value *CompValue = nullptr;
576
577 /// Extra clause to be checked before the switch
578 Value *Extra = nullptr;
579
580 /// Set of integers to match in switch
582
583 /// Number of comparisons matched in the and/or chain
584 unsigned UsedICmps = 0;
585
586 /// If the elements in Vals matches the comparisons
587 bool IsEq = false;
588
589 // Used to check if the first matched CompValue shall be the Extra check.
590 bool IgnoreFirstMatch = false;
591 bool MultipleMatches = false;
592
593 /// Construct and compute the result for the comparison instruction Cond
594 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
595 gather(Cond);
596 if (CompValue || !MultipleMatches)
597 return;
598 Extra = nullptr;
599 Vals.clear();
600 UsedICmps = 0;
601 IgnoreFirstMatch = true;
602 gather(Cond);
603 }
604
605 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
606 ConstantComparesGatherer &
607 operator=(const ConstantComparesGatherer &) = delete;
608
609private:
610 /// Try to set the current value used for the comparison, it succeeds only if
611 /// it wasn't set before or if the new value is the same as the old one
612 bool setValueOnce(Value *NewVal) {
613 if (IgnoreFirstMatch) {
614 IgnoreFirstMatch = false;
615 return false;
616 }
617 if (CompValue && CompValue != NewVal) {
618 MultipleMatches = true;
619 return false;
620 }
621 CompValue = NewVal;
622 return true;
623 }
624
625 /// Try to match Instruction "I" as a comparison against a constant and
626 /// populates the array Vals with the set of values that match (or do not
627 /// match depending on isEQ).
628 /// Return false on failure. On success, the Value the comparison matched
629 /// against is placed in CompValue.
630 /// If CompValue is already set, the function is expected to fail if a match
631 /// is found but the value compared to is different.
632 bool matchInstruction(Instruction *I, bool isEQ) {
633 if (match(I, m_Not(m_Instruction(I))))
634 isEQ = !isEQ;
635
636 Value *Val;
637 if (match(I, m_NUWTrunc(m_Value(Val)))) {
638 // If we already have a value for the switch, it has to match!
639 if (!setValueOnce(Val))
640 return false;
641 UsedICmps++;
642 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
643 return true;
644 }
645 // If this is an icmp against a constant, handle this as one of the cases.
646 ICmpInst *ICI;
647 ConstantInt *C;
648 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
649 (C = getConstantInt(I->getOperand(1), DL)))) {
650 return false;
651 }
652
653 Value *RHSVal;
654 const APInt *RHSC;
655
656 // Pattern match a special case
657 // (x & ~2^z) == y --> x == y || x == y|2^z
658 // This undoes a transformation done by instcombine to fuse 2 compares.
659 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
660 // It's a little bit hard to see why the following transformations are
661 // correct. Here is a CVC3 program to verify them for 64-bit values:
662
663 /*
664 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
665 x : BITVECTOR(64);
666 y : BITVECTOR(64);
667 z : BITVECTOR(64);
668 mask : BITVECTOR(64) = BVSHL(ONE, z);
669 QUERY( (y & ~mask = y) =>
670 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
671 );
672 QUERY( (y | mask = y) =>
673 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
674 );
675 */
676
677 // Please note that each pattern must be a dual implication (<--> or
678 // iff). One directional implication can create spurious matches. If the
679 // implication is only one-way, an unsatisfiable condition on the left
680 // side can imply a satisfiable condition on the right side. Dual
681 // implication ensures that satisfiable conditions are transformed to
682 // other satisfiable conditions and unsatisfiable conditions are
683 // transformed to other unsatisfiable conditions.
684
685 // Here is a concrete example of a unsatisfiable condition on the left
686 // implying a satisfiable condition on the right:
687 //
688 // mask = (1 << z)
689 // (x & ~mask) == y --> (x == y || x == (y | mask))
690 //
691 // Substituting y = 3, z = 0 yields:
692 // (x & -2) == 3 --> (x == 3 || x == 2)
693
694 // Pattern match a special case:
695 /*
696 QUERY( (y & ~mask = y) =>
697 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
698 );
699 */
700 if (match(ICI->getOperand(0),
701 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
702 APInt Mask = ~*RHSC;
703 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
704 // If we already have a value for the switch, it has to match!
705 if (!setValueOnce(RHSVal))
706 return false;
707
708 Vals.push_back(C);
709 Vals.push_back(
710 ConstantInt::get(C->getContext(),
711 C->getValue() | Mask));
712 UsedICmps++;
713 return true;
714 }
715 }
716
717 // Pattern match a special case:
718 /*
719 QUERY( (y | mask = y) =>
720 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
721 );
722 */
723 if (match(ICI->getOperand(0),
724 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
725 APInt Mask = *RHSC;
726 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
727 // If we already have a value for the switch, it has to match!
728 if (!setValueOnce(RHSVal))
729 return false;
730
731 Vals.push_back(C);
732 Vals.push_back(ConstantInt::get(C->getContext(),
733 C->getValue() & ~Mask));
734 UsedICmps++;
735 return true;
736 }
737 }
738
739 // If we already have a value for the switch, it has to match!
740 if (!setValueOnce(ICI->getOperand(0)))
741 return false;
742
743 UsedICmps++;
744 Vals.push_back(C);
745 return true;
746 }
747
748 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
749 ConstantRange Span =
751
752 // Shift the range if the compare is fed by an add. This is the range
753 // compare idiom as emitted by instcombine.
754 Value *CandidateVal = I->getOperand(0);
755 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
756 Span = Span.subtract(*RHSC);
757 CandidateVal = RHSVal;
758 }
759
760 // If this is an and/!= check, then we are looking to build the set of
761 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
762 // x != 0 && x != 1.
763 if (!isEQ)
764 Span = Span.inverse();
765
766 // If there are a ton of values, we don't want to make a ginormous switch.
767 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
768 return false;
769 }
770
771 // If we already have a value for the switch, it has to match!
772 if (!setValueOnce(CandidateVal))
773 return false;
774
775 // Add all values from the range to the set
776 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
777 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
778
779 UsedICmps++;
780 return true;
781 }
782
783 /// Given a potentially 'or'd or 'and'd together collection of icmp
784 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
785 /// the value being compared, and stick the list constants into the Vals
786 /// vector.
787 /// One "Extra" case is allowed to differ from the other.
788 void gather(Value *V) {
789 Value *Op0, *Op1;
790 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
791 IsEq = true;
792 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
793 IsEq = false;
794 else
795 return;
796 // Keep a stack (SmallVector for efficiency) for depth-first traversal
797 SmallVector<Value *, 8> DFT{Op0, Op1};
798 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
799
800 while (!DFT.empty()) {
801 V = DFT.pop_back_val();
802
803 if (Instruction *I = dyn_cast<Instruction>(V)) {
804 // If it is a || (or && depending on isEQ), process the operands.
805 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
806 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
807 if (Visited.insert(Op1).second)
808 DFT.push_back(Op1);
809 if (Visited.insert(Op0).second)
810 DFT.push_back(Op0);
811
812 continue;
813 }
814
815 // Try to match the current instruction
816 if (matchInstruction(I, IsEq))
817 // Match succeed, continue the loop
818 continue;
819 }
820
821 // One element of the sequence of || (or &&) could not be match as a
822 // comparison against the same value as the others.
823 // We allow only one "Extra" case to be checked before the switch
824 if (!Extra) {
825 Extra = V;
826 continue;
827 }
828 // Failed to parse a proper sequence, abort now
829 CompValue = nullptr;
830 break;
831 }
832 }
833};
834
835} // end anonymous namespace
836
838 MemorySSAUpdater *MSSAU = nullptr) {
839 Instruction *Cond = nullptr;
841 Cond = dyn_cast<Instruction>(SI->getCondition());
842 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
843 if (BI->isConditional())
844 Cond = dyn_cast<Instruction>(BI->getCondition());
845 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
846 Cond = dyn_cast<Instruction>(IBI->getAddress());
847 }
848
849 TI->eraseFromParent();
850 if (Cond)
852}
853
854/// Return true if the specified terminator checks
855/// to see if a value is equal to constant integer value.
856Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
857 Value *CV = nullptr;
858 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
859 // Do not permit merging of large switch instructions into their
860 // predecessors unless there is only one predecessor.
861 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
862 CV = SI->getCondition();
863 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
864 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
865 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
866 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
867 CV = ICI->getOperand(0);
868 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
869 if (Trunc->hasNoUnsignedWrap())
870 CV = Trunc->getOperand(0);
871 }
872 }
873
874 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
875 if (CV) {
876 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
877 Value *Ptr = PTII->getPointerOperand();
878 if (DL.hasUnstableRepresentation(Ptr->getType()))
879 return CV;
880 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
881 CV = Ptr;
882 }
883 }
884 return CV;
885}
886
887/// Given a value comparison instruction,
888/// decode all of the 'cases' that it represents and return the 'default' block.
889BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
890 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
891 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
892 Cases.reserve(SI->getNumCases());
893 for (auto Case : SI->cases())
894 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
895 Case.getCaseSuccessor()));
896 return SI->getDefaultDest();
897 }
898
899 BranchInst *BI = cast<BranchInst>(TI);
900 Value *Cond = BI->getCondition();
901 ICmpInst::Predicate Pred;
902 ConstantInt *C;
903 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
904 Pred = ICI->getPredicate();
905 C = getConstantInt(ICI->getOperand(1), DL);
906 } else {
907 Pred = ICmpInst::ICMP_NE;
908 auto *Trunc = cast<TruncInst>(Cond);
909 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
910 }
911 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
912 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
913 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
914}
915
916/// Given a vector of bb/value pairs, remove any entries
917/// in the list that match the specified block.
918static void
920 std::vector<ValueEqualityComparisonCase> &Cases) {
921 llvm::erase(Cases, BB);
922}
923
924/// Return true if there are any keys in C1 that exist in C2 as well.
925static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
926 std::vector<ValueEqualityComparisonCase> &C2) {
927 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
928
929 // Make V1 be smaller than V2.
930 if (V1->size() > V2->size())
931 std::swap(V1, V2);
932
933 if (V1->empty())
934 return false;
935 if (V1->size() == 1) {
936 // Just scan V2.
937 ConstantInt *TheVal = (*V1)[0].Value;
938 for (const ValueEqualityComparisonCase &VECC : *V2)
939 if (TheVal == VECC.Value)
940 return true;
941 }
942
943 // Otherwise, just sort both lists and compare element by element.
944 array_pod_sort(V1->begin(), V1->end());
945 array_pod_sort(V2->begin(), V2->end());
946 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
947 while (i1 != e1 && i2 != e2) {
948 if ((*V1)[i1].Value == (*V2)[i2].Value)
949 return true;
950 if ((*V1)[i1].Value < (*V2)[i2].Value)
951 ++i1;
952 else
953 ++i2;
954 }
955 return false;
956}
957
958// Set branch weights on SwitchInst. This sets the metadata if there is at
959// least one non-zero weight.
961 bool IsExpected) {
962 // Check that there is at least one non-zero weight. Otherwise, pass
963 // nullptr to setMetadata which will erase the existing metadata.
964 MDNode *N = nullptr;
965 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
966 N = MDBuilder(SI->getParent()->getContext())
967 .createBranchWeights(Weights, IsExpected);
968 SI->setMetadata(LLVMContext::MD_prof, N);
969}
970
971// Similar to the above, but for branch and select instructions that take
972// exactly 2 weights.
973static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
974 uint32_t FalseWeight, bool IsExpected) {
976 // Check that there is at least one non-zero weight. Otherwise, pass
977 // nullptr to setMetadata which will erase the existing metadata.
978 MDNode *N = nullptr;
979 if (TrueWeight || FalseWeight)
980 N = MDBuilder(I->getParent()->getContext())
981 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
982 I->setMetadata(LLVMContext::MD_prof, N);
983}
984
985/// If TI is known to be a terminator instruction and its block is known to
986/// only have a single predecessor block, check to see if that predecessor is
987/// also a value comparison with the same value, and if that comparison
988/// determines the outcome of this comparison. If so, simplify TI. This does a
989/// very limited form of jump threading.
990bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
991 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
992 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
993 if (!PredVal)
994 return false; // Not a value comparison in predecessor.
995
996 Value *ThisVal = isValueEqualityComparison(TI);
997 assert(ThisVal && "This isn't a value comparison!!");
998 if (ThisVal != PredVal)
999 return false; // Different predicates.
1000
1001 // TODO: Preserve branch weight metadata, similarly to how
1002 // foldValueComparisonIntoPredecessors preserves it.
1003
1004 // Find out information about when control will move from Pred to TI's block.
1005 std::vector<ValueEqualityComparisonCase> PredCases;
1006 BasicBlock *PredDef =
1007 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
1008 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
1009
1010 // Find information about how control leaves this block.
1011 std::vector<ValueEqualityComparisonCase> ThisCases;
1012 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
1013 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
1014
1015 // If TI's block is the default block from Pred's comparison, potentially
1016 // simplify TI based on this knowledge.
1017 if (PredDef == TI->getParent()) {
1018 // If we are here, we know that the value is none of those cases listed in
1019 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1020 // can simplify TI.
1021 if (!valuesOverlap(PredCases, ThisCases))
1022 return false;
1023
1024 if (isa<BranchInst>(TI)) {
1025 // Okay, one of the successors of this condbr is dead. Convert it to a
1026 // uncond br.
1027 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1028 // Insert the new branch.
1029 Instruction *NI = Builder.CreateBr(ThisDef);
1030 (void)NI;
1031
1032 // Remove PHI node entries for the dead edge.
1033 ThisCases[0].Dest->removePredecessor(PredDef);
1034
1035 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1036 << "Through successor TI: " << *TI << "Leaving: " << *NI
1037 << "\n");
1038
1040
1041 if (DTU)
1042 DTU->applyUpdates(
1043 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1044
1045 return true;
1046 }
1047
1048 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1049 // Okay, TI has cases that are statically dead, prune them away.
1050 SmallPtrSet<Constant *, 16> DeadCases;
1051 for (const ValueEqualityComparisonCase &Case : PredCases)
1052 DeadCases.insert(Case.Value);
1053
1054 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1055 << "Through successor TI: " << *TI);
1056
1057 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1058 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1059 --i;
1060 auto *Successor = i->getCaseSuccessor();
1061 if (DTU)
1062 ++NumPerSuccessorCases[Successor];
1063 if (DeadCases.count(i->getCaseValue())) {
1064 Successor->removePredecessor(PredDef);
1065 SI.removeCase(i);
1066 if (DTU)
1067 --NumPerSuccessorCases[Successor];
1068 }
1069 }
1070
1071 if (DTU) {
1072 std::vector<DominatorTree::UpdateType> Updates;
1073 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1074 if (I.second == 0)
1075 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1076 DTU->applyUpdates(Updates);
1077 }
1078
1079 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1080 return true;
1081 }
1082
1083 // Otherwise, TI's block must correspond to some matched value. Find out
1084 // which value (or set of values) this is.
1085 ConstantInt *TIV = nullptr;
1086 BasicBlock *TIBB = TI->getParent();
1087 for (const auto &[Value, Dest] : PredCases)
1088 if (Dest == TIBB) {
1089 if (TIV)
1090 return false; // Cannot handle multiple values coming to this block.
1091 TIV = Value;
1092 }
1093 assert(TIV && "No edge from pred to succ?");
1094
1095 // Okay, we found the one constant that our value can be if we get into TI's
1096 // BB. Find out which successor will unconditionally be branched to.
1097 BasicBlock *TheRealDest = nullptr;
1098 for (const auto &[Value, Dest] : ThisCases)
1099 if (Value == TIV) {
1100 TheRealDest = Dest;
1101 break;
1102 }
1103
1104 // If not handled by any explicit cases, it is handled by the default case.
1105 if (!TheRealDest)
1106 TheRealDest = ThisDef;
1107
1108 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1109
1110 // Remove PHI node entries for dead edges.
1111 BasicBlock *CheckEdge = TheRealDest;
1112 for (BasicBlock *Succ : successors(TIBB))
1113 if (Succ != CheckEdge) {
1114 if (Succ != TheRealDest)
1115 RemovedSuccs.insert(Succ);
1116 Succ->removePredecessor(TIBB);
1117 } else
1118 CheckEdge = nullptr;
1119
1120 // Insert the new branch.
1121 Instruction *NI = Builder.CreateBr(TheRealDest);
1122 (void)NI;
1123
1124 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1125 << "Through successor TI: " << *TI << "Leaving: " << *NI
1126 << "\n");
1127
1129 if (DTU) {
1130 SmallVector<DominatorTree::UpdateType, 2> Updates;
1131 Updates.reserve(RemovedSuccs.size());
1132 for (auto *RemovedSucc : RemovedSuccs)
1133 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1134 DTU->applyUpdates(Updates);
1135 }
1136 return true;
1137}
1138
1139namespace {
1140
1141/// This class implements a stable ordering of constant
1142/// integers that does not depend on their address. This is important for
1143/// applications that sort ConstantInt's to ensure uniqueness.
1144struct ConstantIntOrdering {
1145 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1146 return LHS->getValue().ult(RHS->getValue());
1147 }
1148};
1149
1150} // end anonymous namespace
1151
1153 ConstantInt *const *P2) {
1154 const ConstantInt *LHS = *P1;
1155 const ConstantInt *RHS = *P2;
1156 if (LHS == RHS)
1157 return 0;
1158 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1159}
1160
1161/// Get Weights of a given terminator, the default weight is at the front
1162/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1163/// metadata.
1165 SmallVectorImpl<uint64_t> &Weights) {
1166 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1167 assert(MD && "Invalid branch-weight metadata");
1168 extractFromBranchWeightMD64(MD, Weights);
1169
1170 // If TI is a conditional eq, the default case is the false case,
1171 // and the corresponding branch-weight data is at index 2. We swap the
1172 // default weight to be the first entry.
1173 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1174 assert(Weights.size() == 2);
1175 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1176 if (!ICI)
1177 return;
1178
1179 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1180 std::swap(Weights.front(), Weights.back());
1181 }
1182}
1183
1184/// Keep halving the weights until all can fit in uint32_t.
1186 uint64_t Max = *llvm::max_element(Weights);
1187 if (Max > UINT_MAX) {
1188 unsigned Offset = 32 - llvm::countl_zero(Max);
1189 for (uint64_t &I : Weights)
1190 I >>= Offset;
1191 }
1192}
1193
1195 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1196 Instruction *PTI = PredBlock->getTerminator();
1197
1198 // If we have bonus instructions, clone them into the predecessor block.
1199 // Note that there may be multiple predecessor blocks, so we cannot move
1200 // bonus instructions to a predecessor block.
1201 for (Instruction &BonusInst : *BB) {
1202 if (BonusInst.isTerminator())
1203 continue;
1204
1205 Instruction *NewBonusInst = BonusInst.clone();
1206
1207 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1208 // Unless the instruction has the same !dbg location as the original
1209 // branch, drop it. When we fold the bonus instructions we want to make
1210 // sure we reset their debug locations in order to avoid stepping on
1211 // dead code caused by folding dead branches.
1212 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1213 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1214 mapAtomInstance(DL, VMap);
1215 }
1216
1217 RemapInstruction(NewBonusInst, VMap,
1219
1220 // If we speculated an instruction, we need to drop any metadata that may
1221 // result in undefined behavior, as the metadata might have been valid
1222 // only given the branch precondition.
1223 // Similarly strip attributes on call parameters that may cause UB in
1224 // location the call is moved to.
1225 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1226
1227 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1228 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1229 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1231
1232 NewBonusInst->takeName(&BonusInst);
1233 BonusInst.setName(NewBonusInst->getName() + ".old");
1234 VMap[&BonusInst] = NewBonusInst;
1235
1236 // Update (liveout) uses of bonus instructions,
1237 // now that the bonus instruction has been cloned into predecessor.
1238 // Note that we expect to be in a block-closed SSA form for this to work!
1239 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1240 auto *UI = cast<Instruction>(U.getUser());
1241 auto *PN = dyn_cast<PHINode>(UI);
1242 if (!PN) {
1243 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1244 "If the user is not a PHI node, then it should be in the same "
1245 "block as, and come after, the original bonus instruction.");
1246 continue; // Keep using the original bonus instruction.
1247 }
1248 // Is this the block-closed SSA form PHI node?
1249 if (PN->getIncomingBlock(U) == BB)
1250 continue; // Great, keep using the original bonus instruction.
1251 // The only other alternative is an "use" when coming from
1252 // the predecessor block - here we should refer to the cloned bonus instr.
1253 assert(PN->getIncomingBlock(U) == PredBlock &&
1254 "Not in block-closed SSA form?");
1255 U.set(NewBonusInst);
1256 }
1257 }
1258
1259 // Key Instructions: We may have propagated atom info into the pred. If the
1260 // pred's terminator already has atom info do nothing as merging would drop
1261 // one atom group anyway. If it doesn't, propagte the remapped atom group
1262 // from BB's terminator.
1263 if (auto &PredDL = PTI->getDebugLoc()) {
1264 auto &DL = BB->getTerminator()->getDebugLoc();
1265 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1266 PredDL.isSameSourceLocation(DL)) {
1267 PTI->setDebugLoc(DL);
1268 RemapSourceAtom(PTI, VMap);
1269 }
1270 }
1271}
1272
1273bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1274 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1275 BasicBlock *BB = TI->getParent();
1276 BasicBlock *Pred = PTI->getParent();
1277
1279
1280 // Figure out which 'cases' to copy from SI to PSI.
1281 std::vector<ValueEqualityComparisonCase> BBCases;
1282 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1283
1284 std::vector<ValueEqualityComparisonCase> PredCases;
1285 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1286
1287 // Based on whether the default edge from PTI goes to BB or not, fill in
1288 // PredCases and PredDefault with the new switch cases we would like to
1289 // build.
1290 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1291
1292 // Update the branch weight metadata along the way
1293 SmallVector<uint64_t, 8> Weights;
1294 bool PredHasWeights = hasBranchWeightMD(*PTI);
1295 bool SuccHasWeights = hasBranchWeightMD(*TI);
1296
1297 if (PredHasWeights) {
1298 getBranchWeights(PTI, Weights);
1299 // branch-weight metadata is inconsistent here.
1300 if (Weights.size() != 1 + PredCases.size())
1301 PredHasWeights = SuccHasWeights = false;
1302 } else if (SuccHasWeights)
1303 // If there are no predecessor weights but there are successor weights,
1304 // populate Weights with 1, which will later be scaled to the sum of
1305 // successor's weights
1306 Weights.assign(1 + PredCases.size(), 1);
1307
1308 SmallVector<uint64_t, 8> SuccWeights;
1309 if (SuccHasWeights) {
1310 getBranchWeights(TI, SuccWeights);
1311 // branch-weight metadata is inconsistent here.
1312 if (SuccWeights.size() != 1 + BBCases.size())
1313 PredHasWeights = SuccHasWeights = false;
1314 } else if (PredHasWeights)
1315 SuccWeights.assign(1 + BBCases.size(), 1);
1316
1317 if (PredDefault == BB) {
1318 // If this is the default destination from PTI, only the edges in TI
1319 // that don't occur in PTI, or that branch to BB will be activated.
1320 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1321 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1322 if (PredCases[i].Dest != BB)
1323 PTIHandled.insert(PredCases[i].Value);
1324 else {
1325 // The default destination is BB, we don't need explicit targets.
1326 std::swap(PredCases[i], PredCases.back());
1327
1328 if (PredHasWeights || SuccHasWeights) {
1329 // Increase weight for the default case.
1330 Weights[0] += Weights[i + 1];
1331 std::swap(Weights[i + 1], Weights.back());
1332 Weights.pop_back();
1333 }
1334
1335 PredCases.pop_back();
1336 --i;
1337 --e;
1338 }
1339
1340 // Reconstruct the new switch statement we will be building.
1341 if (PredDefault != BBDefault) {
1342 PredDefault->removePredecessor(Pred);
1343 if (DTU && PredDefault != BB)
1344 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1345 PredDefault = BBDefault;
1346 ++NewSuccessors[BBDefault];
1347 }
1348
1349 unsigned CasesFromPred = Weights.size();
1350 uint64_t ValidTotalSuccWeight = 0;
1351 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1352 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1353 PredCases.push_back(BBCases[i]);
1354 ++NewSuccessors[BBCases[i].Dest];
1355 if (SuccHasWeights || PredHasWeights) {
1356 // The default weight is at index 0, so weight for the ith case
1357 // should be at index i+1. Scale the cases from successor by
1358 // PredDefaultWeight (Weights[0]).
1359 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1360 ValidTotalSuccWeight += SuccWeights[i + 1];
1361 }
1362 }
1363
1364 if (SuccHasWeights || PredHasWeights) {
1365 ValidTotalSuccWeight += SuccWeights[0];
1366 // Scale the cases from predecessor by ValidTotalSuccWeight.
1367 for (unsigned i = 1; i < CasesFromPred; ++i)
1368 Weights[i] *= ValidTotalSuccWeight;
1369 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1370 Weights[0] *= SuccWeights[0];
1371 }
1372 } else {
1373 // If this is not the default destination from PSI, only the edges
1374 // in SI that occur in PSI with a destination of BB will be
1375 // activated.
1376 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1377 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1378 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1379 if (PredCases[i].Dest == BB) {
1380 PTIHandled.insert(PredCases[i].Value);
1381
1382 if (PredHasWeights || SuccHasWeights) {
1383 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1384 std::swap(Weights[i + 1], Weights.back());
1385 Weights.pop_back();
1386 }
1387
1388 std::swap(PredCases[i], PredCases.back());
1389 PredCases.pop_back();
1390 --i;
1391 --e;
1392 }
1393
1394 // Okay, now we know which constants were sent to BB from the
1395 // predecessor. Figure out where they will all go now.
1396 for (const ValueEqualityComparisonCase &Case : BBCases)
1397 if (PTIHandled.count(Case.Value)) {
1398 // If this is one we are capable of getting...
1399 if (PredHasWeights || SuccHasWeights)
1400 Weights.push_back(WeightsForHandled[Case.Value]);
1401 PredCases.push_back(Case);
1402 ++NewSuccessors[Case.Dest];
1403 PTIHandled.erase(Case.Value); // This constant is taken care of
1404 }
1405
1406 // If there are any constants vectored to BB that TI doesn't handle,
1407 // they must go to the default destination of TI.
1408 for (ConstantInt *I : PTIHandled) {
1409 if (PredHasWeights || SuccHasWeights)
1410 Weights.push_back(WeightsForHandled[I]);
1411 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1412 ++NewSuccessors[BBDefault];
1413 }
1414 }
1415
1416 // Okay, at this point, we know which new successor Pred will get. Make
1417 // sure we update the number of entries in the PHI nodes for these
1418 // successors.
1419 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1420 if (DTU) {
1421 SuccsOfPred = {llvm::from_range, successors(Pred)};
1422 Updates.reserve(Updates.size() + NewSuccessors.size());
1423 }
1424 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1425 NewSuccessors) {
1426 for (auto I : seq(NewSuccessor.second)) {
1427 (void)I;
1428 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1429 }
1430 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1431 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1432 }
1433
1434 Builder.SetInsertPoint(PTI);
1435 // Convert pointer to int before we switch.
1436 if (CV->getType()->isPointerTy()) {
1437 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1438 "Should not end up here with unstable pointers");
1439 CV =
1440 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1441 }
1442
1443 // Now that the successors are updated, create the new Switch instruction.
1444 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1445 NewSI->setDebugLoc(PTI->getDebugLoc());
1446 for (ValueEqualityComparisonCase &V : PredCases)
1447 NewSI->addCase(V.Value, V.Dest);
1448
1449 if (PredHasWeights || SuccHasWeights) {
1450 // Halve the weights if any of them cannot fit in an uint32_t
1451 fitWeights(Weights);
1452
1453 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1454
1455 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1456 }
1457
1459
1460 // Okay, last check. If BB is still a successor of PSI, then we must
1461 // have an infinite loop case. If so, add an infinitely looping block
1462 // to handle the case to preserve the behavior of the code.
1463 BasicBlock *InfLoopBlock = nullptr;
1464 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1465 if (NewSI->getSuccessor(i) == BB) {
1466 if (!InfLoopBlock) {
1467 // Insert it at the end of the function, because it's either code,
1468 // or it won't matter if it's hot. :)
1469 InfLoopBlock =
1470 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1471 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1472 if (DTU)
1473 Updates.push_back(
1474 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1475 }
1476 NewSI->setSuccessor(i, InfLoopBlock);
1477 }
1478
1479 if (DTU) {
1480 if (InfLoopBlock)
1481 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1482
1483 Updates.push_back({DominatorTree::Delete, Pred, BB});
1484
1485 DTU->applyUpdates(Updates);
1486 }
1487
1488 ++NumFoldValueComparisonIntoPredecessors;
1489 return true;
1490}
1491
1492/// The specified terminator is a value equality comparison instruction
1493/// (either a switch or a branch on "X == c").
1494/// See if any of the predecessors of the terminator block are value comparisons
1495/// on the same value. If so, and if safe to do so, fold them together.
1496bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1497 IRBuilder<> &Builder) {
1498 BasicBlock *BB = TI->getParent();
1499 Value *CV = isValueEqualityComparison(TI); // CondVal
1500 assert(CV && "Not a comparison?");
1501
1502 bool Changed = false;
1503
1504 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1505 while (!Preds.empty()) {
1506 BasicBlock *Pred = Preds.pop_back_val();
1507 Instruction *PTI = Pred->getTerminator();
1508
1509 // Don't try to fold into itself.
1510 if (Pred == BB)
1511 continue;
1512
1513 // See if the predecessor is a comparison with the same value.
1514 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1515 if (PCV != CV)
1516 continue;
1517
1518 SmallSetVector<BasicBlock *, 4> FailBlocks;
1519 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1520 for (auto *Succ : FailBlocks) {
1521 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1522 return false;
1523 }
1524 }
1525
1526 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1527 Changed = true;
1528 }
1529 return Changed;
1530}
1531
1532// If we would need to insert a select that uses the value of this invoke
1533// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1534// need to do this), we can't hoist the invoke, as there is nowhere to put the
1535// select in this case.
1537 Instruction *I1, Instruction *I2) {
1538 for (BasicBlock *Succ : successors(BB1)) {
1539 for (const PHINode &PN : Succ->phis()) {
1540 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1541 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1542 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1543 return false;
1544 }
1545 }
1546 }
1547 return true;
1548}
1549
1550// Get interesting characteristics of instructions that
1551// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1552// instructions can be reordered across.
1558
1560 unsigned Flags = 0;
1561 if (I->mayReadFromMemory())
1562 Flags |= SkipReadMem;
1563 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1564 // inalloca) across stacksave/stackrestore boundaries.
1565 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1566 Flags |= SkipSideEffect;
1568 Flags |= SkipImplicitControlFlow;
1569 return Flags;
1570}
1571
1572// Returns true if it is safe to reorder an instruction across preceding
1573// instructions in a basic block.
1574static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1575 // Don't reorder a store over a load.
1576 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1577 return false;
1578
1579 // If we have seen an instruction with side effects, it's unsafe to reorder an
1580 // instruction which reads memory or itself has side effects.
1581 if ((Flags & SkipSideEffect) &&
1582 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1583 return false;
1584
1585 // Reordering across an instruction which does not necessarily transfer
1586 // control to the next instruction is speculation.
1588 return false;
1589
1590 // Hoisting of llvm.deoptimize is only legal together with the next return
1591 // instruction, which this pass is not always able to do.
1592 if (auto *CB = dyn_cast<CallBase>(I))
1593 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1594 return false;
1595
1596 // It's also unsafe/illegal to hoist an instruction above its instruction
1597 // operands
1598 BasicBlock *BB = I->getParent();
1599 for (Value *Op : I->operands()) {
1600 if (auto *J = dyn_cast<Instruction>(Op))
1601 if (J->getParent() == BB)
1602 return false;
1603 }
1604
1605 return true;
1606}
1607
1608static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1609
1610/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1611/// instructions \p I1 and \p I2 can and should be hoisted.
1613 const TargetTransformInfo &TTI) {
1614 // If we're going to hoist a call, make sure that the two instructions
1615 // we're commoning/hoisting are both marked with musttail, or neither of
1616 // them is marked as such. Otherwise, we might end up in a situation where
1617 // we hoist from a block where the terminator is a `ret` to a block where
1618 // the terminator is a `br`, and `musttail` calls expect to be followed by
1619 // a return.
1620 auto *C1 = dyn_cast<CallInst>(I1);
1621 auto *C2 = dyn_cast<CallInst>(I2);
1622 if (C1 && C2)
1623 if (C1->isMustTailCall() != C2->isMustTailCall())
1624 return false;
1625
1626 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1627 return false;
1628
1629 // If any of the two call sites has nomerge or convergent attribute, stop
1630 // hoisting.
1631 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1632 if (CB1->cannotMerge() || CB1->isConvergent())
1633 return false;
1634 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1635 if (CB2->cannotMerge() || CB2->isConvergent())
1636 return false;
1637
1638 return true;
1639}
1640
1641/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1642/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1643/// hoistCommonCodeFromSuccessors. e.g. The input:
1644/// I1 DVRs: { x, z },
1645/// OtherInsts: { I2 DVRs: { x, y, z } }
1646/// would result in hoisting only DbgVariableRecord x.
1648 Instruction *TI, Instruction *I1,
1649 SmallVectorImpl<Instruction *> &OtherInsts) {
1650 if (!I1->hasDbgRecords())
1651 return;
1652 using CurrentAndEndIt =
1653 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1654 // Vector of {Current, End} iterators.
1656 Itrs.reserve(OtherInsts.size() + 1);
1657 // Helper lambdas for lock-step checks:
1658 // Return true if this Current == End.
1659 auto atEnd = [](const CurrentAndEndIt &Pair) {
1660 return Pair.first == Pair.second;
1661 };
1662 // Return true if all Current are identical.
1663 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1664 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1666 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1667 });
1668 };
1669
1670 // Collect the iterators.
1671 Itrs.push_back(
1672 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1673 for (Instruction *Other : OtherInsts) {
1674 if (!Other->hasDbgRecords())
1675 return;
1676 Itrs.push_back(
1677 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1678 }
1679
1680 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1681 // the lock-step DbgRecord are identical, hoist all of them to TI.
1682 // This replicates the dbg.* intrinsic behaviour in
1683 // hoistCommonCodeFromSuccessors.
1684 while (none_of(Itrs, atEnd)) {
1685 bool HoistDVRs = allIdentical(Itrs);
1686 for (CurrentAndEndIt &Pair : Itrs) {
1687 // Increment Current iterator now as we may be about to move the
1688 // DbgRecord.
1689 DbgRecord &DR = *Pair.first++;
1690 if (HoistDVRs) {
1691 DR.removeFromParent();
1692 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1693 }
1694 }
1695 }
1696}
1697
1699 const Instruction *I2) {
1700 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1701 return true;
1702
1703 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1704 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1705 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1706 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1707 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1708
1709 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1710 return I1->getOperand(0) == I2->getOperand(1) &&
1711 I1->getOperand(1) == I2->getOperand(0) &&
1712 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1713 }
1714
1715 return false;
1716}
1717
1718/// If the target supports conditional faulting,
1719/// we look for the following pattern:
1720/// \code
1721/// BB:
1722/// ...
1723/// %cond = icmp ult %x, %y
1724/// br i1 %cond, label %TrueBB, label %FalseBB
1725/// FalseBB:
1726/// store i32 1, ptr %q, align 4
1727/// ...
1728/// TrueBB:
1729/// %maskedloadstore = load i32, ptr %b, align 4
1730/// store i32 %maskedloadstore, ptr %p, align 4
1731/// ...
1732/// \endcode
1733///
1734/// and transform it into:
1735///
1736/// \code
1737/// BB:
1738/// ...
1739/// %cond = icmp ult %x, %y
1740/// %maskedloadstore = cload i32, ptr %b, %cond
1741/// cstore i32 %maskedloadstore, ptr %p, %cond
1742/// cstore i32 1, ptr %q, ~%cond
1743/// br i1 %cond, label %TrueBB, label %FalseBB
1744/// FalseBB:
1745/// ...
1746/// TrueBB:
1747/// ...
1748/// \endcode
1749///
1750/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1751/// e.g.
1752///
1753/// \code
1754/// %vcond = bitcast i1 %cond to <1 x i1>
1755/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1756/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1757/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1758/// call void @llvm.masked.store.v1i32.p0
1759/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1760/// %cond.not = xor i1 %cond, true
1761/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1762/// call void @llvm.masked.store.v1i32.p0
1763/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1764/// \endcode
1765///
1766/// So we need to turn hoisted load/store into cload/cstore.
1767///
1768/// \param BI The branch instruction.
1769/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1770/// will be speculated.
1771/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1773 BranchInst *BI,
1774 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1775 std::optional<bool> Invert, Instruction *Sel) {
1776 auto &Context = BI->getParent()->getContext();
1777 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1778 auto *Cond = BI->getOperand(0);
1779 // Construct the condition if needed.
1780 BasicBlock *BB = BI->getParent();
1781 Value *Mask = nullptr;
1782 Value *MaskFalse = nullptr;
1783 Value *MaskTrue = nullptr;
1784 if (Invert.has_value()) {
1785 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1786 Mask = Builder.CreateBitCast(
1787 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1788 VCondTy);
1789 } else {
1790 IRBuilder<> Builder(BI);
1791 MaskFalse = Builder.CreateBitCast(
1792 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1793 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1794 }
1795 auto PeekThroughBitcasts = [](Value *V) {
1796 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1797 V = BitCast->getOperand(0);
1798 return V;
1799 };
1800 for (auto *I : SpeculatedConditionalLoadsStores) {
1801 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1802 if (!Invert.has_value())
1803 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1804 // We currently assume conditional faulting load/store is supported for
1805 // scalar types only when creating new instructions. This can be easily
1806 // extended for vector types in the future.
1807 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1808 auto *Op0 = I->getOperand(0);
1809 CallInst *MaskedLoadStore = nullptr;
1810 if (auto *LI = dyn_cast<LoadInst>(I)) {
1811 // Handle Load.
1812 auto *Ty = I->getType();
1813 PHINode *PN = nullptr;
1814 Value *PassThru = nullptr;
1815 if (Invert.has_value())
1816 for (User *U : I->users()) {
1817 if ((PN = dyn_cast<PHINode>(U))) {
1818 PassThru = Builder.CreateBitCast(
1819 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1820 FixedVectorType::get(Ty, 1));
1821 } else if (auto *Ins = cast<Instruction>(U);
1822 Sel && Ins->getParent() == BB) {
1823 // This happens when store or/and a speculative instruction between
1824 // load and store were hoisted to the BB. Make sure the masked load
1825 // inserted before its use.
1826 // We assume there's one of such use.
1827 Builder.SetInsertPoint(Ins);
1828 }
1829 }
1830 MaskedLoadStore = Builder.CreateMaskedLoad(
1831 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1832 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1833 if (PN)
1834 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1835 I->replaceAllUsesWith(NewLoadStore);
1836 } else {
1837 // Handle Store.
1838 auto *StoredVal = Builder.CreateBitCast(
1839 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1840 MaskedLoadStore = Builder.CreateMaskedStore(
1841 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1842 }
1843 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1844 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1845 //
1846 // !nonnull, !align : Not support pointer type, no need to keep.
1847 // !range: Load type is changed from scalar to vector, but the metadata on
1848 // vector specifies a per-element range, so the semantics stay the
1849 // same. Keep it.
1850 // !annotation: Not impact semantics. Keep it.
1851 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1852 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1853 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1854 // FIXME: DIAssignID is not supported for masked store yet.
1855 // (Verifier::visitDIAssignIDMetadata)
1857 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1858 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1859 });
1860 MaskedLoadStore->copyMetadata(*I);
1861 I->eraseFromParent();
1862 }
1863}
1864
1866 const TargetTransformInfo &TTI) {
1867 // Not handle volatile or atomic.
1868 bool IsStore = false;
1869 if (auto *L = dyn_cast<LoadInst>(I)) {
1870 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1871 return false;
1872 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1873 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1874 return false;
1875 IsStore = true;
1876 } else
1877 return false;
1878
1879 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1880 // That's why we have the alignment limitation.
1881 // FIXME: Update the prototype of the intrinsics?
1882 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1884}
1885
1886/// Hoist any common code in the successor blocks up into the block. This
1887/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1888/// given, only perform hoisting in case all successors blocks contain matching
1889/// instructions only. In that case, all instructions can be hoisted and the
1890/// original branch will be replaced and selects for PHIs are added.
1891bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1892 bool AllInstsEqOnly) {
1893 // This does very trivial matching, with limited scanning, to find identical
1894 // instructions in the two blocks. In particular, we don't want to get into
1895 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1896 // such, we currently just scan for obviously identical instructions in an
1897 // identical order, possibly separated by the same number of non-identical
1898 // instructions.
1899 BasicBlock *BB = TI->getParent();
1900 unsigned int SuccSize = succ_size(BB);
1901 if (SuccSize < 2)
1902 return false;
1903
1904 // If either of the blocks has it's address taken, then we can't do this fold,
1905 // because the code we'd hoist would no longer run when we jump into the block
1906 // by it's address.
1907 for (auto *Succ : successors(BB))
1908 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1909 return false;
1910
1911 // The second of pair is a SkipFlags bitmask.
1912 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1913 SmallVector<SuccIterPair, 8> SuccIterPairs;
1914 for (auto *Succ : successors(BB)) {
1915 BasicBlock::iterator SuccItr = Succ->begin();
1916 if (isa<PHINode>(*SuccItr))
1917 return false;
1918 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1919 }
1920
1921 if (AllInstsEqOnly) {
1922 // Check if all instructions in the successor blocks match. This allows
1923 // hoisting all instructions and removing the blocks we are hoisting from,
1924 // so does not add any new instructions.
1926 // Check if sizes and terminators of all successors match.
1927 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1928 Instruction *Term0 = Succs[0]->getTerminator();
1929 Instruction *Term = Succ->getTerminator();
1930 return !Term->isSameOperationAs(Term0) ||
1931 !equal(Term->operands(), Term0->operands()) ||
1932 Succs[0]->size() != Succ->size();
1933 });
1934 if (!AllSame)
1935 return false;
1936 if (AllSame) {
1937 LockstepReverseIterator<true> LRI(Succs);
1938 while (LRI.isValid()) {
1939 Instruction *I0 = (*LRI)[0];
1940 if (any_of(*LRI, [I0](Instruction *I) {
1941 return !areIdenticalUpToCommutativity(I0, I);
1942 })) {
1943 return false;
1944 }
1945 --LRI;
1946 }
1947 }
1948 // Now we know that all instructions in all successors can be hoisted. Let
1949 // the loop below handle the hoisting.
1950 }
1951
1952 // Count how many instructions were not hoisted so far. There's a limit on how
1953 // many instructions we skip, serving as a compilation time control as well as
1954 // preventing excessive increase of life ranges.
1955 unsigned NumSkipped = 0;
1956 // If we find an unreachable instruction at the beginning of a basic block, we
1957 // can still hoist instructions from the rest of the basic blocks.
1958 if (SuccIterPairs.size() > 2) {
1959 erase_if(SuccIterPairs,
1960 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1961 if (SuccIterPairs.size() < 2)
1962 return false;
1963 }
1964
1965 bool Changed = false;
1966
1967 for (;;) {
1968 auto *SuccIterPairBegin = SuccIterPairs.begin();
1969 auto &BB1ItrPair = *SuccIterPairBegin++;
1970 auto OtherSuccIterPairRange =
1971 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1972 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1973
1974 Instruction *I1 = &*BB1ItrPair.first;
1975
1976 bool AllInstsAreIdentical = true;
1977 bool HasTerminator = I1->isTerminator();
1978 for (auto &SuccIter : OtherSuccIterRange) {
1979 Instruction *I2 = &*SuccIter;
1980 HasTerminator |= I2->isTerminator();
1981 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1982 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1983 AllInstsAreIdentical = false;
1984 }
1985
1986 SmallVector<Instruction *, 8> OtherInsts;
1987 for (auto &SuccIter : OtherSuccIterRange)
1988 OtherInsts.push_back(&*SuccIter);
1989
1990 // If we are hoisting the terminator instruction, don't move one (making a
1991 // broken BB), instead clone it, and remove BI.
1992 if (HasTerminator) {
1993 // Even if BB, which contains only one unreachable instruction, is ignored
1994 // at the beginning of the loop, we can hoist the terminator instruction.
1995 // If any instructions remain in the block, we cannot hoist terminators.
1996 if (NumSkipped || !AllInstsAreIdentical) {
1997 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1998 return Changed;
1999 }
2000
2001 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
2002 Changed;
2003 }
2004
2005 if (AllInstsAreIdentical) {
2006 unsigned SkipFlagsBB1 = BB1ItrPair.second;
2007 AllInstsAreIdentical =
2008 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
2009 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
2010 Instruction *I2 = &*Pair.first;
2011 unsigned SkipFlagsBB2 = Pair.second;
2012 // Even if the instructions are identical, it may not
2013 // be safe to hoist them if we have skipped over
2014 // instructions with side effects or their operands
2015 // weren't hoisted.
2016 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
2018 });
2019 }
2020
2021 if (AllInstsAreIdentical) {
2022 BB1ItrPair.first++;
2023 // For a normal instruction, we just move one to right before the
2024 // branch, then replace all uses of the other with the first. Finally,
2025 // we remove the now redundant second instruction.
2026 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2027 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2028 // and leave any that were not hoisted behind (by calling moveBefore
2029 // rather than moveBeforePreserving).
2030 I1->moveBefore(TI->getIterator());
2031 for (auto &SuccIter : OtherSuccIterRange) {
2032 Instruction *I2 = &*SuccIter++;
2033 assert(I2 != I1);
2034 if (!I2->use_empty())
2035 I2->replaceAllUsesWith(I1);
2036 I1->andIRFlags(I2);
2037 if (auto *CB = dyn_cast<CallBase>(I1)) {
2038 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2039 assert(Success && "We should not be trying to hoist callbases "
2040 "with non-intersectable attributes");
2041 // For NDEBUG Compile.
2042 (void)Success;
2043 }
2044
2045 combineMetadataForCSE(I1, I2, true);
2046 // I1 and I2 are being combined into a single instruction. Its debug
2047 // location is the merged locations of the original instructions.
2048 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2049 I2->eraseFromParent();
2050 }
2051 if (!Changed)
2052 NumHoistCommonCode += SuccIterPairs.size();
2053 Changed = true;
2054 NumHoistCommonInstrs += SuccIterPairs.size();
2055 } else {
2056 if (NumSkipped >= HoistCommonSkipLimit) {
2057 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2058 return Changed;
2059 }
2060 // We are about to skip over a pair of non-identical instructions. Record
2061 // if any have characteristics that would prevent reordering instructions
2062 // across them.
2063 for (auto &SuccIterPair : SuccIterPairs) {
2064 Instruction *I = &*SuccIterPair.first++;
2065 SuccIterPair.second |= skippedInstrFlags(I);
2066 }
2067 ++NumSkipped;
2068 }
2069 }
2070}
2071
2072bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2073 Instruction *TI, Instruction *I1,
2074 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2075
2076 auto *BI = dyn_cast<BranchInst>(TI);
2077
2078 bool Changed = false;
2079 BasicBlock *TIParent = TI->getParent();
2080 BasicBlock *BB1 = I1->getParent();
2081
2082 // Use only for an if statement.
2083 auto *I2 = *OtherSuccTIs.begin();
2084 auto *BB2 = I2->getParent();
2085 if (BI) {
2086 assert(OtherSuccTIs.size() == 1);
2087 assert(BI->getSuccessor(0) == I1->getParent());
2088 assert(BI->getSuccessor(1) == I2->getParent());
2089 }
2090
2091 // In the case of an if statement, we try to hoist an invoke.
2092 // FIXME: Can we define a safety predicate for CallBr?
2093 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2094 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2095 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2096 return false;
2097
2098 // TODO: callbr hoisting currently disabled pending further study.
2099 if (isa<CallBrInst>(I1))
2100 return false;
2101
2102 for (BasicBlock *Succ : successors(BB1)) {
2103 for (PHINode &PN : Succ->phis()) {
2104 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2105 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2106 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2107 if (BB1V == BB2V)
2108 continue;
2109
2110 // In the case of an if statement, check for
2111 // passingValueIsAlwaysUndefined here because we would rather eliminate
2112 // undefined control flow then converting it to a select.
2113 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2115 return false;
2116 }
2117 }
2118 }
2119
2120 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2121 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2122 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2123 // Clone the terminator and hoist it into the pred, without any debug info.
2124 Instruction *NT = I1->clone();
2125 NT->insertInto(TIParent, TI->getIterator());
2126 if (!NT->getType()->isVoidTy()) {
2127 I1->replaceAllUsesWith(NT);
2128 for (Instruction *OtherSuccTI : OtherSuccTIs)
2129 OtherSuccTI->replaceAllUsesWith(NT);
2130 NT->takeName(I1);
2131 }
2132 Changed = true;
2133 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2134
2135 // Ensure terminator gets a debug location, even an unknown one, in case
2136 // it involves inlinable calls.
2138 Locs.push_back(I1->getDebugLoc());
2139 for (auto *OtherSuccTI : OtherSuccTIs)
2140 Locs.push_back(OtherSuccTI->getDebugLoc());
2141 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2142
2143 // PHIs created below will adopt NT's merged DebugLoc.
2144 IRBuilder<NoFolder> Builder(NT);
2145
2146 // In the case of an if statement, hoisting one of the terminators from our
2147 // successor is a great thing. Unfortunately, the successors of the if/else
2148 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2149 // must agree for all PHI nodes, so we insert select instruction to compute
2150 // the final result.
2151 if (BI) {
2152 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2153 for (BasicBlock *Succ : successors(BB1)) {
2154 for (PHINode &PN : Succ->phis()) {
2155 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2156 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2157 if (BB1V == BB2V)
2158 continue;
2159
2160 // These values do not agree. Insert a select instruction before NT
2161 // that determines the right value.
2162 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2163 if (!SI) {
2164 // Propagate fast-math-flags from phi node to its replacement select.
2166 BI->getCondition(), BB1V, BB2V,
2167 isa<FPMathOperator>(PN) ? &PN : nullptr,
2168 BB1V->getName() + "." + BB2V->getName(), BI));
2169 }
2170
2171 // Make the PHI node use the select for all incoming values for BB1/BB2
2172 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2173 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2174 PN.setIncomingValue(i, SI);
2175 }
2176 }
2177 }
2178
2180
2181 // Update any PHI nodes in our new successors.
2182 for (BasicBlock *Succ : successors(BB1)) {
2183 addPredecessorToBlock(Succ, TIParent, BB1);
2184 if (DTU)
2185 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2186 }
2187
2188 if (DTU)
2189 for (BasicBlock *Succ : successors(TI))
2190 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2191
2193 if (DTU)
2194 DTU->applyUpdates(Updates);
2195 return Changed;
2196}
2197
2198// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2199// into variables.
2201 int OpIdx) {
2202 // Divide/Remainder by constant is typically much cheaper than by variable.
2203 if (I->isIntDivRem())
2204 return OpIdx != 1;
2205 return !isa<IntrinsicInst>(I);
2206}
2207
2208// All instructions in Insts belong to different blocks that all unconditionally
2209// branch to a common successor. Analyze each instruction and return true if it
2210// would be possible to sink them into their successor, creating one common
2211// instruction instead. For every value that would be required to be provided by
2212// PHI node (because an operand varies in each input block), add to PHIOperands.
2215 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2216 // Prune out obviously bad instructions to move. Each instruction must have
2217 // the same number of uses, and we check later that the uses are consistent.
2218 std::optional<unsigned> NumUses;
2219 for (auto *I : Insts) {
2220 // These instructions may change or break semantics if moved.
2221 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2222 I->getType()->isTokenTy())
2223 return false;
2224
2225 // Do not try to sink an instruction in an infinite loop - it can cause
2226 // this algorithm to infinite loop.
2227 if (I->getParent()->getSingleSuccessor() == I->getParent())
2228 return false;
2229
2230 // Conservatively return false if I is an inline-asm instruction. Sinking
2231 // and merging inline-asm instructions can potentially create arguments
2232 // that cannot satisfy the inline-asm constraints.
2233 // If the instruction has nomerge or convergent attribute, return false.
2234 if (const auto *C = dyn_cast<CallBase>(I))
2235 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2236 return false;
2237
2238 if (!NumUses)
2239 NumUses = I->getNumUses();
2240 else if (NumUses != I->getNumUses())
2241 return false;
2242 }
2243
2244 const Instruction *I0 = Insts.front();
2245 const auto I0MMRA = MMRAMetadata(*I0);
2246 for (auto *I : Insts) {
2247 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2248 return false;
2249
2250 // Treat MMRAs conservatively. This pass can be quite aggressive and
2251 // could drop a lot of MMRAs otherwise.
2252 if (MMRAMetadata(*I) != I0MMRA)
2253 return false;
2254 }
2255
2256 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2257 // then the other phi operands must match the instructions from Insts. This
2258 // also has to hold true for any phi nodes that would be created as a result
2259 // of sinking. Both of these cases are represented by PhiOperands.
2260 for (const Use &U : I0->uses()) {
2261 auto It = PHIOperands.find(&U);
2262 if (It == PHIOperands.end())
2263 // There may be uses in other blocks when sinking into a loop header.
2264 return false;
2265 if (!equal(Insts, It->second))
2266 return false;
2267 }
2268
2269 // For calls to be sinkable, they must all be indirect, or have same callee.
2270 // I.e. if we have two direct calls to different callees, we don't want to
2271 // turn that into an indirect call. Likewise, if we have an indirect call,
2272 // and a direct call, we don't actually want to have a single indirect call.
2273 if (isa<CallBase>(I0)) {
2274 auto IsIndirectCall = [](const Instruction *I) {
2275 return cast<CallBase>(I)->isIndirectCall();
2276 };
2277 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2278 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2279 if (HaveIndirectCalls) {
2280 if (!AllCallsAreIndirect)
2281 return false;
2282 } else {
2283 // All callees must be identical.
2284 Value *Callee = nullptr;
2285 for (const Instruction *I : Insts) {
2286 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2287 if (!Callee)
2288 Callee = CurrCallee;
2289 else if (Callee != CurrCallee)
2290 return false;
2291 }
2292 }
2293 }
2294
2295 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2296 Value *Op = I0->getOperand(OI);
2297 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2298 assert(I->getNumOperands() == I0->getNumOperands());
2299 return I->getOperand(OI) == I0->getOperand(OI);
2300 };
2301 if (!all_of(Insts, SameAsI0)) {
2304 // We can't create a PHI from this GEP.
2305 return false;
2306 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2307 for (auto *I : Insts)
2308 Ops.push_back(I->getOperand(OI));
2309 }
2310 }
2311 return true;
2312}
2313
2314// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2315// instruction of every block in Blocks to their common successor, commoning
2316// into one instruction.
2318 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2319
2320 // canSinkInstructions returning true guarantees that every block has at
2321 // least one non-terminator instruction.
2323 for (auto *BB : Blocks) {
2324 Instruction *I = BB->getTerminator();
2325 I = I->getPrevNode();
2326 Insts.push_back(I);
2327 }
2328
2329 // We don't need to do any more checking here; canSinkInstructions should
2330 // have done it all for us.
2331 SmallVector<Value*, 4> NewOperands;
2332 Instruction *I0 = Insts.front();
2333 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2334 // This check is different to that in canSinkInstructions. There, we
2335 // cared about the global view once simplifycfg (and instcombine) have
2336 // completed - it takes into account PHIs that become trivially
2337 // simplifiable. However here we need a more local view; if an operand
2338 // differs we create a PHI and rely on instcombine to clean up the very
2339 // small mess we may make.
2340 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2341 return I->getOperand(O) != I0->getOperand(O);
2342 });
2343 if (!NeedPHI) {
2344 NewOperands.push_back(I0->getOperand(O));
2345 continue;
2346 }
2347
2348 // Create a new PHI in the successor block and populate it.
2349 auto *Op = I0->getOperand(O);
2350 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2351 auto *PN =
2352 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2353 PN->insertBefore(BBEnd->begin());
2354 for (auto *I : Insts)
2355 PN->addIncoming(I->getOperand(O), I->getParent());
2356 NewOperands.push_back(PN);
2357 }
2358
2359 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2360 // and move it to the start of the successor block.
2361 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2362 I0->getOperandUse(O).set(NewOperands[O]);
2363
2364 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2365
2366 // Update metadata and IR flags, and merge debug locations.
2367 for (auto *I : Insts)
2368 if (I != I0) {
2369 // The debug location for the "common" instruction is the merged locations
2370 // of all the commoned instructions. We start with the original location
2371 // of the "common" instruction and iteratively merge each location in the
2372 // loop below.
2373 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2374 // However, as N-way merge for CallInst is rare, so we use simplified API
2375 // instead of using complex API for N-way merge.
2376 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2377 combineMetadataForCSE(I0, I, true);
2378 I0->andIRFlags(I);
2379 if (auto *CB = dyn_cast<CallBase>(I0)) {
2380 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2381 assert(Success && "We should not be trying to sink callbases "
2382 "with non-intersectable attributes");
2383 // For NDEBUG Compile.
2384 (void)Success;
2385 }
2386 }
2387
2388 for (User *U : make_early_inc_range(I0->users())) {
2389 // canSinkLastInstruction checked that all instructions are only used by
2390 // phi nodes in a way that allows replacing the phi node with the common
2391 // instruction.
2392 auto *PN = cast<PHINode>(U);
2393 PN->replaceAllUsesWith(I0);
2394 PN->eraseFromParent();
2395 }
2396
2397 // Finally nuke all instructions apart from the common instruction.
2398 for (auto *I : Insts) {
2399 if (I == I0)
2400 continue;
2401 // The remaining uses are debug users, replace those with the common inst.
2402 // In most (all?) cases this just introduces a use-before-def.
2403 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2404 I->replaceAllUsesWith(I0);
2405 I->eraseFromParent();
2406 }
2407}
2408
2409/// Check whether BB's predecessors end with unconditional branches. If it is
2410/// true, sink any common code from the predecessors to BB.
2412 DomTreeUpdater *DTU) {
2413 // We support two situations:
2414 // (1) all incoming arcs are unconditional
2415 // (2) there are non-unconditional incoming arcs
2416 //
2417 // (2) is very common in switch defaults and
2418 // else-if patterns;
2419 //
2420 // if (a) f(1);
2421 // else if (b) f(2);
2422 //
2423 // produces:
2424 //
2425 // [if]
2426 // / \
2427 // [f(1)] [if]
2428 // | | \
2429 // | | |
2430 // | [f(2)]|
2431 // \ | /
2432 // [ end ]
2433 //
2434 // [end] has two unconditional predecessor arcs and one conditional. The
2435 // conditional refers to the implicit empty 'else' arc. This conditional
2436 // arc can also be caused by an empty default block in a switch.
2437 //
2438 // In this case, we attempt to sink code from all *unconditional* arcs.
2439 // If we can sink instructions from these arcs (determined during the scan
2440 // phase below) we insert a common successor for all unconditional arcs and
2441 // connect that to [end], to enable sinking:
2442 //
2443 // [if]
2444 // / \
2445 // [x(1)] [if]
2446 // | | \
2447 // | | \
2448 // | [x(2)] |
2449 // \ / |
2450 // [sink.split] |
2451 // \ /
2452 // [ end ]
2453 //
2454 SmallVector<BasicBlock*,4> UnconditionalPreds;
2455 bool HaveNonUnconditionalPredecessors = false;
2456 for (auto *PredBB : predecessors(BB)) {
2457 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2458 if (PredBr && PredBr->isUnconditional())
2459 UnconditionalPreds.push_back(PredBB);
2460 else
2461 HaveNonUnconditionalPredecessors = true;
2462 }
2463 if (UnconditionalPreds.size() < 2)
2464 return false;
2465
2466 // We take a two-step approach to tail sinking. First we scan from the end of
2467 // each block upwards in lockstep. If the n'th instruction from the end of each
2468 // block can be sunk, those instructions are added to ValuesToSink and we
2469 // carry on. If we can sink an instruction but need to PHI-merge some operands
2470 // (because they're not identical in each instruction) we add these to
2471 // PHIOperands.
2472 // We prepopulate PHIOperands with the phis that already exist in BB.
2474 for (PHINode &PN : BB->phis()) {
2476 for (const Use &U : PN.incoming_values())
2477 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2478 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2479 for (BasicBlock *Pred : UnconditionalPreds)
2480 Ops.push_back(*IncomingVals[Pred]);
2481 }
2482
2483 int ScanIdx = 0;
2484 SmallPtrSet<Value*,4> InstructionsToSink;
2485 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2486 while (LRI.isValid() &&
2487 canSinkInstructions(*LRI, PHIOperands)) {
2488 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2489 << "\n");
2490 InstructionsToSink.insert_range(*LRI);
2491 ++ScanIdx;
2492 --LRI;
2493 }
2494
2495 // If no instructions can be sunk, early-return.
2496 if (ScanIdx == 0)
2497 return false;
2498
2499 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2500
2501 if (!followedByDeoptOrUnreachable) {
2502 // Check whether this is the pointer operand of a load/store.
2503 auto IsMemOperand = [](Use &U) {
2504 auto *I = cast<Instruction>(U.getUser());
2505 if (isa<LoadInst>(I))
2506 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2507 if (isa<StoreInst>(I))
2508 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2509 return false;
2510 };
2511
2512 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2513 // actually sink before encountering instruction that is unprofitable to
2514 // sink?
2515 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2516 unsigned NumPHIInsts = 0;
2517 for (Use &U : (*LRI)[0]->operands()) {
2518 auto It = PHIOperands.find(&U);
2519 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2520 return InstructionsToSink.contains(V);
2521 })) {
2522 ++NumPHIInsts;
2523 // Do not separate a load/store from the gep producing the address.
2524 // The gep can likely be folded into the load/store as an addressing
2525 // mode. Additionally, a load of a gep is easier to analyze than a
2526 // load of a phi.
2527 if (IsMemOperand(U) &&
2528 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2529 return false;
2530 // FIXME: this check is overly optimistic. We may end up not sinking
2531 // said instruction, due to the very same profitability check.
2532 // See @creating_too_many_phis in sink-common-code.ll.
2533 }
2534 }
2535 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2536 return NumPHIInsts <= 1;
2537 };
2538
2539 // We've determined that we are going to sink last ScanIdx instructions,
2540 // and recorded them in InstructionsToSink. Now, some instructions may be
2541 // unprofitable to sink. But that determination depends on the instructions
2542 // that we are going to sink.
2543
2544 // First, forward scan: find the first instruction unprofitable to sink,
2545 // recording all the ones that are profitable to sink.
2546 // FIXME: would it be better, after we detect that not all are profitable.
2547 // to either record the profitable ones, or erase the unprofitable ones?
2548 // Maybe we need to choose (at runtime) the one that will touch least
2549 // instrs?
2550 LRI.reset();
2551 int Idx = 0;
2552 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2553 while (Idx < ScanIdx) {
2554 if (!ProfitableToSinkInstruction(LRI)) {
2555 // Too many PHIs would be created.
2556 LLVM_DEBUG(
2557 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2558 break;
2559 }
2560 InstructionsProfitableToSink.insert_range(*LRI);
2561 --LRI;
2562 ++Idx;
2563 }
2564
2565 // If no instructions can be sunk, early-return.
2566 if (Idx == 0)
2567 return false;
2568
2569 // Did we determine that (only) some instructions are unprofitable to sink?
2570 if (Idx < ScanIdx) {
2571 // Okay, some instructions are unprofitable.
2572 ScanIdx = Idx;
2573 InstructionsToSink = InstructionsProfitableToSink;
2574
2575 // But, that may make other instructions unprofitable, too.
2576 // So, do a backward scan, do any earlier instructions become
2577 // unprofitable?
2578 assert(
2579 !ProfitableToSinkInstruction(LRI) &&
2580 "We already know that the last instruction is unprofitable to sink");
2581 ++LRI;
2582 --Idx;
2583 while (Idx >= 0) {
2584 // If we detect that an instruction becomes unprofitable to sink,
2585 // all earlier instructions won't be sunk either,
2586 // so preemptively keep InstructionsProfitableToSink in sync.
2587 // FIXME: is this the most performant approach?
2588 for (auto *I : *LRI)
2589 InstructionsProfitableToSink.erase(I);
2590 if (!ProfitableToSinkInstruction(LRI)) {
2591 // Everything starting with this instruction won't be sunk.
2592 ScanIdx = Idx;
2593 InstructionsToSink = InstructionsProfitableToSink;
2594 }
2595 ++LRI;
2596 --Idx;
2597 }
2598 }
2599
2600 // If no instructions can be sunk, early-return.
2601 if (ScanIdx == 0)
2602 return false;
2603 }
2604
2605 bool Changed = false;
2606
2607 if (HaveNonUnconditionalPredecessors) {
2608 if (!followedByDeoptOrUnreachable) {
2609 // It is always legal to sink common instructions from unconditional
2610 // predecessors. However, if not all predecessors are unconditional,
2611 // this transformation might be pessimizing. So as a rule of thumb,
2612 // don't do it unless we'd sink at least one non-speculatable instruction.
2613 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2614 LRI.reset();
2615 int Idx = 0;
2616 bool Profitable = false;
2617 while (Idx < ScanIdx) {
2618 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2619 Profitable = true;
2620 break;
2621 }
2622 --LRI;
2623 ++Idx;
2624 }
2625 if (!Profitable)
2626 return false;
2627 }
2628
2629 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2630 // We have a conditional edge and we're going to sink some instructions.
2631 // Insert a new block postdominating all blocks we're going to sink from.
2632 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2633 // Edges couldn't be split.
2634 return false;
2635 Changed = true;
2636 }
2637
2638 // Now that we've analyzed all potential sinking candidates, perform the
2639 // actual sink. We iteratively sink the last non-terminator of the source
2640 // blocks into their common successor unless doing so would require too
2641 // many PHI instructions to be generated (currently only one PHI is allowed
2642 // per sunk instruction).
2643 //
2644 // We can use InstructionsToSink to discount values needing PHI-merging that will
2645 // actually be sunk in a later iteration. This allows us to be more
2646 // aggressive in what we sink. This does allow a false positive where we
2647 // sink presuming a later value will also be sunk, but stop half way through
2648 // and never actually sink it which means we produce more PHIs than intended.
2649 // This is unlikely in practice though.
2650 int SinkIdx = 0;
2651 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2652 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2653 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2654 << "\n");
2655
2656 // Because we've sunk every instruction in turn, the current instruction to
2657 // sink is always at index 0.
2658 LRI.reset();
2659
2660 sinkLastInstruction(UnconditionalPreds);
2661 NumSinkCommonInstrs++;
2662 Changed = true;
2663 }
2664 if (SinkIdx != 0)
2665 ++NumSinkCommonCode;
2666 return Changed;
2667}
2668
2669namespace {
2670
2671struct CompatibleSets {
2672 using SetTy = SmallVector<InvokeInst *, 2>;
2673
2675
2676 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2677
2678 SetTy &getCompatibleSet(InvokeInst *II);
2679
2680 void insert(InvokeInst *II);
2681};
2682
2683CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2684 // Perform a linear scan over all the existing sets, see if the new `invoke`
2685 // is compatible with any particular set. Since we know that all the `invokes`
2686 // within a set are compatible, only check the first `invoke` in each set.
2687 // WARNING: at worst, this has quadratic complexity.
2688 for (CompatibleSets::SetTy &Set : Sets) {
2689 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2690 return Set;
2691 }
2692
2693 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2694 return Sets.emplace_back();
2695}
2696
2697void CompatibleSets::insert(InvokeInst *II) {
2698 getCompatibleSet(II).emplace_back(II);
2699}
2700
2701bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2702 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2703
2704 // Can we theoretically merge these `invoke`s?
2705 auto IsIllegalToMerge = [](InvokeInst *II) {
2706 return II->cannotMerge() || II->isInlineAsm();
2707 };
2708 if (any_of(Invokes, IsIllegalToMerge))
2709 return false;
2710
2711 // Either both `invoke`s must be direct,
2712 // or both `invoke`s must be indirect.
2713 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2714 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2715 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2716 if (HaveIndirectCalls) {
2717 if (!AllCallsAreIndirect)
2718 return false;
2719 } else {
2720 // All callees must be identical.
2721 Value *Callee = nullptr;
2722 for (InvokeInst *II : Invokes) {
2723 Value *CurrCallee = II->getCalledOperand();
2724 assert(CurrCallee && "There is always a called operand.");
2725 if (!Callee)
2726 Callee = CurrCallee;
2727 else if (Callee != CurrCallee)
2728 return false;
2729 }
2730 }
2731
2732 // Either both `invoke`s must not have a normal destination,
2733 // or both `invoke`s must have a normal destination,
2734 auto HasNormalDest = [](InvokeInst *II) {
2735 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2736 };
2737 if (any_of(Invokes, HasNormalDest)) {
2738 // Do not merge `invoke` that does not have a normal destination with one
2739 // that does have a normal destination, even though doing so would be legal.
2740 if (!all_of(Invokes, HasNormalDest))
2741 return false;
2742
2743 // All normal destinations must be identical.
2744 BasicBlock *NormalBB = nullptr;
2745 for (InvokeInst *II : Invokes) {
2746 BasicBlock *CurrNormalBB = II->getNormalDest();
2747 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2748 if (!NormalBB)
2749 NormalBB = CurrNormalBB;
2750 else if (NormalBB != CurrNormalBB)
2751 return false;
2752 }
2753
2754 // In the normal destination, the incoming values for these two `invoke`s
2755 // must be compatible.
2756 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2758 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2759 &EquivalenceSet))
2760 return false;
2761 }
2762
2763#ifndef NDEBUG
2764 // All unwind destinations must be identical.
2765 // We know that because we have started from said unwind destination.
2766 BasicBlock *UnwindBB = nullptr;
2767 for (InvokeInst *II : Invokes) {
2768 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2769 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2770 if (!UnwindBB)
2771 UnwindBB = CurrUnwindBB;
2772 else
2773 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2774 }
2775#endif
2776
2777 // In the unwind destination, the incoming values for these two `invoke`s
2778 // must be compatible.
2780 Invokes.front()->getUnwindDest(),
2781 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2782 return false;
2783
2784 // Ignoring arguments, these `invoke`s must be identical,
2785 // including operand bundles.
2786 const InvokeInst *II0 = Invokes.front();
2787 for (auto *II : Invokes.drop_front())
2788 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2789 return false;
2790
2791 // Can we theoretically form the data operands for the merged `invoke`?
2792 auto IsIllegalToMergeArguments = [](auto Ops) {
2793 Use &U0 = std::get<0>(Ops);
2794 Use &U1 = std::get<1>(Ops);
2795 if (U0 == U1)
2796 return false;
2798 U0.getOperandNo());
2799 };
2800 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2801 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2802 IsIllegalToMergeArguments))
2803 return false;
2804
2805 return true;
2806}
2807
2808} // namespace
2809
2810// Merge all invokes in the provided set, all of which are compatible
2811// as per the `CompatibleSets::shouldBelongToSameSet()`.
2813 DomTreeUpdater *DTU) {
2814 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2815
2817 if (DTU)
2818 Updates.reserve(2 + 3 * Invokes.size());
2819
2820 bool HasNormalDest =
2821 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2822
2823 // Clone one of the invokes into a new basic block.
2824 // Since they are all compatible, it doesn't matter which invoke is cloned.
2825 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2826 InvokeInst *II0 = Invokes.front();
2827 BasicBlock *II0BB = II0->getParent();
2828 BasicBlock *InsertBeforeBlock =
2829 II0->getParent()->getIterator()->getNextNode();
2830 Function *Func = II0BB->getParent();
2831 LLVMContext &Ctx = II0->getContext();
2832
2833 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2834 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2835
2836 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2837 // NOTE: all invokes have the same attributes, so no handling needed.
2838 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2839
2840 if (!HasNormalDest) {
2841 // This set does not have a normal destination,
2842 // so just form a new block with unreachable terminator.
2843 BasicBlock *MergedNormalDest = BasicBlock::Create(
2844 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2845 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2846 UI->setDebugLoc(DebugLoc::getTemporary());
2847 MergedInvoke->setNormalDest(MergedNormalDest);
2848 }
2849
2850 // The unwind destination, however, remainds identical for all invokes here.
2851
2852 return MergedInvoke;
2853 }();
2854
2855 if (DTU) {
2856 // Predecessor blocks that contained these invokes will now branch to
2857 // the new block that contains the merged invoke, ...
2858 for (InvokeInst *II : Invokes)
2859 Updates.push_back(
2860 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2861
2862 // ... which has the new `unreachable` block as normal destination,
2863 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2864 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2865 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2866 SuccBBOfMergedInvoke});
2867
2868 // Since predecessor blocks now unconditionally branch to a new block,
2869 // they no longer branch to their original successors.
2870 for (InvokeInst *II : Invokes)
2871 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2872 Updates.push_back(
2873 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2874 }
2875
2876 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2877
2878 // Form the merged operands for the merged invoke.
2879 for (Use &U : MergedInvoke->operands()) {
2880 // Only PHI together the indirect callees and data operands.
2881 if (MergedInvoke->isCallee(&U)) {
2882 if (!IsIndirectCall)
2883 continue;
2884 } else if (!MergedInvoke->isDataOperand(&U))
2885 continue;
2886
2887 // Don't create trivial PHI's with all-identical incoming values.
2888 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2889 return II->getOperand(U.getOperandNo()) != U.get();
2890 });
2891 if (!NeedPHI)
2892 continue;
2893
2894 // Form a PHI out of all the data ops under this index.
2896 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2897 for (InvokeInst *II : Invokes)
2898 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2899
2900 U.set(PN);
2901 }
2902
2903 // We've ensured that each PHI node has compatible (identical) incoming values
2904 // when coming from each of the `invoke`s in the current merge set,
2905 // so update the PHI nodes accordingly.
2906 for (BasicBlock *Succ : successors(MergedInvoke))
2907 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2908 /*ExistPred=*/Invokes.front()->getParent());
2909
2910 // And finally, replace the original `invoke`s with an unconditional branch
2911 // to the block with the merged `invoke`. Also, give that merged `invoke`
2912 // the merged debugloc of all the original `invoke`s.
2913 DILocation *MergedDebugLoc = nullptr;
2914 for (InvokeInst *II : Invokes) {
2915 // Compute the debug location common to all the original `invoke`s.
2916 if (!MergedDebugLoc)
2917 MergedDebugLoc = II->getDebugLoc();
2918 else
2919 MergedDebugLoc =
2920 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2921
2922 // And replace the old `invoke` with an unconditionally branch
2923 // to the block with the merged `invoke`.
2924 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2925 OrigSuccBB->removePredecessor(II->getParent());
2926 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2927 // The unconditional branch is part of the replacement for the original
2928 // invoke, so should use its DebugLoc.
2929 BI->setDebugLoc(II->getDebugLoc());
2930 bool Success = MergedInvoke->tryIntersectAttributes(II);
2931 assert(Success && "Merged invokes with incompatible attributes");
2932 // For NDEBUG Compile
2933 (void)Success;
2934 II->replaceAllUsesWith(MergedInvoke);
2935 II->eraseFromParent();
2936 ++NumInvokesMerged;
2937 }
2938 MergedInvoke->setDebugLoc(MergedDebugLoc);
2939 ++NumInvokeSetsFormed;
2940
2941 if (DTU)
2942 DTU->applyUpdates(Updates);
2943}
2944
2945/// If this block is a `landingpad` exception handling block, categorize all
2946/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2947/// being "mergeable" together, and then merge invokes in each set together.
2948///
2949/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2950/// [...] [...]
2951/// | |
2952/// [invoke0] [invoke1]
2953/// / \ / \
2954/// [cont0] [landingpad] [cont1]
2955/// to:
2956/// [...] [...]
2957/// \ /
2958/// [invoke]
2959/// / \
2960/// [cont] [landingpad]
2961///
2962/// But of course we can only do that if the invokes share the `landingpad`,
2963/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2964/// and the invoked functions are "compatible".
2967 return false;
2968
2969 bool Changed = false;
2970
2971 // FIXME: generalize to all exception handling blocks?
2972 if (!BB->isLandingPad())
2973 return Changed;
2974
2975 CompatibleSets Grouper;
2976
2977 // Record all the predecessors of this `landingpad`. As per verifier,
2978 // the only allowed predecessor is the unwind edge of an `invoke`.
2979 // We want to group "compatible" `invokes` into the same set to be merged.
2980 for (BasicBlock *PredBB : predecessors(BB))
2981 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2982
2983 // And now, merge `invoke`s that were grouped togeter.
2984 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2985 if (Invokes.size() < 2)
2986 continue;
2987 Changed = true;
2988 mergeCompatibleInvokesImpl(Invokes, DTU);
2989 }
2990
2991 return Changed;
2992}
2993
2994namespace {
2995/// Track ephemeral values, which should be ignored for cost-modelling
2996/// purposes. Requires walking instructions in reverse order.
2997class EphemeralValueTracker {
2998 SmallPtrSet<const Instruction *, 32> EphValues;
2999
3000 bool isEphemeral(const Instruction *I) {
3001 if (isa<AssumeInst>(I))
3002 return true;
3003 return !I->mayHaveSideEffects() && !I->isTerminator() &&
3004 all_of(I->users(), [&](const User *U) {
3005 return EphValues.count(cast<Instruction>(U));
3006 });
3007 }
3008
3009public:
3010 bool track(const Instruction *I) {
3011 if (isEphemeral(I)) {
3012 EphValues.insert(I);
3013 return true;
3014 }
3015 return false;
3016 }
3017
3018 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3019};
3020} // namespace
3021
3022/// Determine if we can hoist sink a sole store instruction out of a
3023/// conditional block.
3024///
3025/// We are looking for code like the following:
3026/// BrBB:
3027/// store i32 %add, i32* %arrayidx2
3028/// ... // No other stores or function calls (we could be calling a memory
3029/// ... // function).
3030/// %cmp = icmp ult %x, %y
3031/// br i1 %cmp, label %EndBB, label %ThenBB
3032/// ThenBB:
3033/// store i32 %add5, i32* %arrayidx2
3034/// br label EndBB
3035/// EndBB:
3036/// ...
3037/// We are going to transform this into:
3038/// BrBB:
3039/// store i32 %add, i32* %arrayidx2
3040/// ... //
3041/// %cmp = icmp ult %x, %y
3042/// %add.add5 = select i1 %cmp, i32 %add, %add5
3043/// store i32 %add.add5, i32* %arrayidx2
3044/// ...
3045///
3046/// \return The pointer to the value of the previous store if the store can be
3047/// hoisted into the predecessor block. 0 otherwise.
3049 BasicBlock *StoreBB, BasicBlock *EndBB) {
3050 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3051 if (!StoreToHoist)
3052 return nullptr;
3053
3054 // Volatile or atomic.
3055 if (!StoreToHoist->isSimple())
3056 return nullptr;
3057
3058 Value *StorePtr = StoreToHoist->getPointerOperand();
3059 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3060
3061 // Look for a store to the same pointer in BrBB.
3062 unsigned MaxNumInstToLookAt = 9;
3063 // Skip pseudo probe intrinsic calls which are not really killing any memory
3064 // accesses.
3065 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3066 if (!MaxNumInstToLookAt)
3067 break;
3068 --MaxNumInstToLookAt;
3069
3070 // Could be calling an instruction that affects memory like free().
3071 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3072 return nullptr;
3073
3074 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3075 // Found the previous store to same location and type. Make sure it is
3076 // simple, to avoid introducing a spurious non-atomic write after an
3077 // atomic write.
3078 if (SI->getPointerOperand() == StorePtr &&
3079 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3080 SI->getAlign() >= StoreToHoist->getAlign())
3081 // Found the previous store, return its value operand.
3082 return SI->getValueOperand();
3083 return nullptr; // Unknown store.
3084 }
3085
3086 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3087 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3088 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3089 Value *Obj = getUnderlyingObject(StorePtr);
3090 bool ExplicitlyDereferenceableOnly;
3091 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3093 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3095 (!ExplicitlyDereferenceableOnly ||
3096 isDereferenceablePointer(StorePtr, StoreTy,
3097 LI->getDataLayout()))) {
3098 // Found a previous load, return it.
3099 return LI;
3100 }
3101 }
3102 // The load didn't work out, but we may still find a store.
3103 }
3104 }
3105
3106 return nullptr;
3107}
3108
3109/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3110/// converted to selects.
3112 BasicBlock *EndBB,
3113 unsigned &SpeculatedInstructions,
3114 InstructionCost &Cost,
3115 const TargetTransformInfo &TTI) {
3117 BB->getParent()->hasMinSize()
3120
3121 bool HaveRewritablePHIs = false;
3122 for (PHINode &PN : EndBB->phis()) {
3123 Value *OrigV = PN.getIncomingValueForBlock(BB);
3124 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3125
3126 // FIXME: Try to remove some of the duplication with
3127 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3128 if (ThenV == OrigV)
3129 continue;
3130
3131 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3132 CmpInst::makeCmpResultType(PN.getType()),
3134
3135 // Don't convert to selects if we could remove undefined behavior instead.
3136 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3138 return false;
3139
3140 HaveRewritablePHIs = true;
3141 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3142 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3143 if (!OrigCE && !ThenCE)
3144 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3145
3146 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3147 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3148 InstructionCost MaxCost =
3150 if (OrigCost + ThenCost > MaxCost)
3151 return false;
3152
3153 // Account for the cost of an unfolded ConstantExpr which could end up
3154 // getting expanded into Instructions.
3155 // FIXME: This doesn't account for how many operations are combined in the
3156 // constant expression.
3157 ++SpeculatedInstructions;
3158 if (SpeculatedInstructions > 1)
3159 return false;
3160 }
3161
3162 return HaveRewritablePHIs;
3163}
3164
3166 std::optional<bool> Invert,
3167 const TargetTransformInfo &TTI) {
3168 // If the branch is non-unpredictable, and is predicted to *not* branch to
3169 // the `then` block, then avoid speculating it.
3170 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3171 return true;
3172
3173 uint64_t TWeight, FWeight;
3174 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3175 return true;
3176
3177 if (!Invert.has_value())
3178 return false;
3179
3180 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3181 BranchProbability BIEndProb =
3182 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3183 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3184 return BIEndProb < Likely;
3185}
3186
3187/// Speculate a conditional basic block flattening the CFG.
3188///
3189/// Note that this is a very risky transform currently. Speculating
3190/// instructions like this is most often not desirable. Instead, there is an MI
3191/// pass which can do it with full awareness of the resource constraints.
3192/// However, some cases are "obvious" and we should do directly. An example of
3193/// this is speculating a single, reasonably cheap instruction.
3194///
3195/// There is only one distinct advantage to flattening the CFG at the IR level:
3196/// it makes very common but simplistic optimizations such as are common in
3197/// instcombine and the DAG combiner more powerful by removing CFG edges and
3198/// modeling their effects with easier to reason about SSA value graphs.
3199///
3200///
3201/// An illustration of this transform is turning this IR:
3202/// \code
3203/// BB:
3204/// %cmp = icmp ult %x, %y
3205/// br i1 %cmp, label %EndBB, label %ThenBB
3206/// ThenBB:
3207/// %sub = sub %x, %y
3208/// br label BB2
3209/// EndBB:
3210/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3211/// ...
3212/// \endcode
3213///
3214/// Into this IR:
3215/// \code
3216/// BB:
3217/// %cmp = icmp ult %x, %y
3218/// %sub = sub %x, %y
3219/// %cond = select i1 %cmp, 0, %sub
3220/// ...
3221/// \endcode
3222///
3223/// \returns true if the conditional block is removed.
3224bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3225 BasicBlock *ThenBB) {
3226 if (!Options.SpeculateBlocks)
3227 return false;
3228
3229 // Be conservative for now. FP select instruction can often be expensive.
3230 Value *BrCond = BI->getCondition();
3231 if (isa<FCmpInst>(BrCond))
3232 return false;
3233
3234 BasicBlock *BB = BI->getParent();
3235 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3236 InstructionCost Budget =
3238
3239 // If ThenBB is actually on the false edge of the conditional branch, remember
3240 // to swap the select operands later.
3241 bool Invert = false;
3242 if (ThenBB != BI->getSuccessor(0)) {
3243 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3244 Invert = true;
3245 }
3246 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3247
3248 if (!isProfitableToSpeculate(BI, Invert, TTI))
3249 return false;
3250
3251 // Keep a count of how many times instructions are used within ThenBB when
3252 // they are candidates for sinking into ThenBB. Specifically:
3253 // - They are defined in BB, and
3254 // - They have no side effects, and
3255 // - All of their uses are in ThenBB.
3256 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3257
3258 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3259
3260 unsigned SpeculatedInstructions = 0;
3261 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3262 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3263 Value *SpeculatedStoreValue = nullptr;
3264 StoreInst *SpeculatedStore = nullptr;
3265 EphemeralValueTracker EphTracker;
3266 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3267 // Skip pseudo probes. The consequence is we lose track of the branch
3268 // probability for ThenBB, which is fine since the optimization here takes
3269 // place regardless of the branch probability.
3270 if (isa<PseudoProbeInst>(I)) {
3271 // The probe should be deleted so that it will not be over-counted when
3272 // the samples collected on the non-conditional path are counted towards
3273 // the conditional path. We leave it for the counts inference algorithm to
3274 // figure out a proper count for an unknown probe.
3275 SpeculatedPseudoProbes.push_back(&I);
3276 continue;
3277 }
3278
3279 // Ignore ephemeral values, they will be dropped by the transform.
3280 if (EphTracker.track(&I))
3281 continue;
3282
3283 // Only speculatively execute a single instruction (not counting the
3284 // terminator) for now.
3285 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3287 SpeculatedConditionalLoadsStores.size() <
3289 // Not count load/store into cost if target supports conditional faulting
3290 // b/c it's cheap to speculate it.
3291 if (IsSafeCheapLoadStore)
3292 SpeculatedConditionalLoadsStores.push_back(&I);
3293 else
3294 ++SpeculatedInstructions;
3295
3296 if (SpeculatedInstructions > 1)
3297 return false;
3298
3299 // Don't hoist the instruction if it's unsafe or expensive.
3300 if (!IsSafeCheapLoadStore &&
3302 !(HoistCondStores && !SpeculatedStoreValue &&
3303 (SpeculatedStoreValue =
3304 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3305 return false;
3306 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3309 return false;
3310
3311 // Store the store speculation candidate.
3312 if (!SpeculatedStore && SpeculatedStoreValue)
3313 SpeculatedStore = cast<StoreInst>(&I);
3314
3315 // Do not hoist the instruction if any of its operands are defined but not
3316 // used in BB. The transformation will prevent the operand from
3317 // being sunk into the use block.
3318 for (Use &Op : I.operands()) {
3320 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3321 continue; // Not a candidate for sinking.
3322
3323 ++SinkCandidateUseCounts[OpI];
3324 }
3325 }
3326
3327 // Consider any sink candidates which are only used in ThenBB as costs for
3328 // speculation. Note, while we iterate over a DenseMap here, we are summing
3329 // and so iteration order isn't significant.
3330 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3331 if (Inst->hasNUses(Count)) {
3332 ++SpeculatedInstructions;
3333 if (SpeculatedInstructions > 1)
3334 return false;
3335 }
3336
3337 // Check that we can insert the selects and that it's not too expensive to do
3338 // so.
3339 bool Convert =
3340 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3342 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3343 SpeculatedInstructions, Cost, TTI);
3344 if (!Convert || Cost > Budget)
3345 return false;
3346
3347 // If we get here, we can hoist the instruction and if-convert.
3348 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3349
3350 Instruction *Sel = nullptr;
3351 // Insert a select of the value of the speculated store.
3352 if (SpeculatedStoreValue) {
3353 IRBuilder<NoFolder> Builder(BI);
3354 Value *OrigV = SpeculatedStore->getValueOperand();
3355 Value *TrueV = SpeculatedStore->getValueOperand();
3356 Value *FalseV = SpeculatedStoreValue;
3357 if (Invert)
3358 std::swap(TrueV, FalseV);
3359 Value *S = Builder.CreateSelect(
3360 BrCond, TrueV, FalseV, "spec.store.select", BI);
3361 Sel = cast<Instruction>(S);
3362 SpeculatedStore->setOperand(0, S);
3363 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3364 SpeculatedStore->getDebugLoc());
3365 // The value stored is still conditional, but the store itself is now
3366 // unconditonally executed, so we must be sure that any linked dbg.assign
3367 // intrinsics are tracking the new stored value (the result of the
3368 // select). If we don't, and the store were to be removed by another pass
3369 // (e.g. DSE), then we'd eventually end up emitting a location describing
3370 // the conditional value, unconditionally.
3371 //
3372 // === Before this transformation ===
3373 // pred:
3374 // store %one, %x.dest, !DIAssignID !1
3375 // dbg.assign %one, "x", ..., !1, ...
3376 // br %cond if.then
3377 //
3378 // if.then:
3379 // store %two, %x.dest, !DIAssignID !2
3380 // dbg.assign %two, "x", ..., !2, ...
3381 //
3382 // === After this transformation ===
3383 // pred:
3384 // store %one, %x.dest, !DIAssignID !1
3385 // dbg.assign %one, "x", ..., !1
3386 /// ...
3387 // %merge = select %cond, %two, %one
3388 // store %merge, %x.dest, !DIAssignID !2
3389 // dbg.assign %merge, "x", ..., !2
3390 for (DbgVariableRecord *DbgAssign :
3391 at::getDVRAssignmentMarkers(SpeculatedStore))
3392 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3393 DbgAssign->replaceVariableLocationOp(OrigV, S);
3394 }
3395
3396 // Metadata can be dependent on the condition we are hoisting above.
3397 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3398 // to avoid making it appear as if the condition is a constant, which would
3399 // be misleading while debugging.
3400 // Similarly strip attributes that maybe dependent on condition we are
3401 // hoisting above.
3402 for (auto &I : make_early_inc_range(*ThenBB)) {
3403 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3404 I.dropLocation();
3405 }
3406 I.dropUBImplyingAttrsAndMetadata();
3407
3408 // Drop ephemeral values.
3409 if (EphTracker.contains(&I)) {
3410 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3411 I.eraseFromParent();
3412 }
3413 }
3414
3415 // Hoist the instructions.
3416 // Drop DbgVariableRecords attached to these instructions.
3417 for (auto &It : *ThenBB)
3418 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3419 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3420 // equivalent).
3421 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3422 !DVR || !DVR->isDbgAssign())
3423 It.dropOneDbgRecord(&DR);
3424 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3425 std::prev(ThenBB->end()));
3426
3427 if (!SpeculatedConditionalLoadsStores.empty())
3428 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3429 Sel);
3430
3431 // Insert selects and rewrite the PHI operands.
3432 IRBuilder<NoFolder> Builder(BI);
3433 for (PHINode &PN : EndBB->phis()) {
3434 unsigned OrigI = PN.getBasicBlockIndex(BB);
3435 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3436 Value *OrigV = PN.getIncomingValue(OrigI);
3437 Value *ThenV = PN.getIncomingValue(ThenI);
3438
3439 // Skip PHIs which are trivial.
3440 if (OrigV == ThenV)
3441 continue;
3442
3443 // Create a select whose true value is the speculatively executed value and
3444 // false value is the pre-existing value. Swap them if the branch
3445 // destinations were inverted.
3446 Value *TrueV = ThenV, *FalseV = OrigV;
3447 if (Invert)
3448 std::swap(TrueV, FalseV);
3449 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3450 PN.setIncomingValue(OrigI, V);
3451 PN.setIncomingValue(ThenI, V);
3452 }
3453
3454 // Remove speculated pseudo probes.
3455 for (Instruction *I : SpeculatedPseudoProbes)
3456 I->eraseFromParent();
3457
3458 ++NumSpeculations;
3459 return true;
3460}
3461
3463
3464// Return false if number of blocks searched is too much.
3465static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3466 BlocksSet &ReachesNonLocalUses) {
3467 if (BB == DefBB)
3468 return true;
3469 if (!ReachesNonLocalUses.insert(BB).second)
3470 return true;
3471
3472 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3473 return false;
3474 for (BasicBlock *Pred : predecessors(BB))
3475 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3476 return false;
3477 return true;
3478}
3479
3480/// Return true if we can thread a branch across this block.
3482 BlocksSet &NonLocalUseBlocks) {
3483 int Size = 0;
3484 EphemeralValueTracker EphTracker;
3485
3486 // Walk the loop in reverse so that we can identify ephemeral values properly
3487 // (values only feeding assumes).
3488 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3489 // Can't fold blocks that contain noduplicate or convergent calls.
3490 if (CallInst *CI = dyn_cast<CallInst>(&I))
3491 if (CI->cannotDuplicate() || CI->isConvergent())
3492 return false;
3493
3494 // Ignore ephemeral values which are deleted during codegen.
3495 // We will delete Phis while threading, so Phis should not be accounted in
3496 // block's size.
3497 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3498 if (Size++ > MaxSmallBlockSize)
3499 return false; // Don't clone large BB's.
3500 }
3501
3502 // Record blocks with non-local uses of values defined in the current basic
3503 // block.
3504 for (User *U : I.users()) {
3506 BasicBlock *UsedInBB = UI->getParent();
3507 if (UsedInBB == BB) {
3508 if (isa<PHINode>(UI))
3509 return false;
3510 } else
3511 NonLocalUseBlocks.insert(UsedInBB);
3512 }
3513
3514 // Looks ok, continue checking.
3515 }
3516
3517 return true;
3518}
3519
3521 BasicBlock *To) {
3522 // Don't look past the block defining the value, we might get the value from
3523 // a previous loop iteration.
3524 auto *I = dyn_cast<Instruction>(V);
3525 if (I && I->getParent() == To)
3526 return nullptr;
3527
3528 // We know the value if the From block branches on it.
3529 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3530 if (BI && BI->isConditional() && BI->getCondition() == V &&
3531 BI->getSuccessor(0) != BI->getSuccessor(1))
3532 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3534
3535 return nullptr;
3536}
3537
3538/// If we have a conditional branch on something for which we know the constant
3539/// value in predecessors (e.g. a phi node in the current block), thread edges
3540/// from the predecessor to their ultimate destination.
3541static std::optional<bool>
3543 const DataLayout &DL,
3544 AssumptionCache *AC) {
3546 BasicBlock *BB = BI->getParent();
3547 Value *Cond = BI->getCondition();
3549 if (PN && PN->getParent() == BB) {
3550 // Degenerate case of a single entry PHI.
3551 if (PN->getNumIncomingValues() == 1) {
3553 return true;
3554 }
3555
3556 for (Use &U : PN->incoming_values())
3557 if (auto *CB = dyn_cast<ConstantInt>(U))
3558 KnownValues[CB].insert(PN->getIncomingBlock(U));
3559 } else {
3560 for (BasicBlock *Pred : predecessors(BB)) {
3561 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3562 KnownValues[CB].insert(Pred);
3563 }
3564 }
3565
3566 if (KnownValues.empty())
3567 return false;
3568
3569 // Now we know that this block has multiple preds and two succs.
3570 // Check that the block is small enough and record which non-local blocks use
3571 // values defined in the block.
3572
3573 BlocksSet NonLocalUseBlocks;
3574 BlocksSet ReachesNonLocalUseBlocks;
3575 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3576 return false;
3577
3578 // Jump-threading can only be done to destinations where no values defined
3579 // in BB are live.
3580
3581 // Quickly check if both destinations have uses. If so, jump-threading cannot
3582 // be done.
3583 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3584 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3585 return false;
3586
3587 // Search backward from NonLocalUseBlocks to find which blocks
3588 // reach non-local uses.
3589 for (BasicBlock *UseBB : NonLocalUseBlocks)
3590 // Give up if too many blocks are searched.
3591 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3592 return false;
3593
3594 for (const auto &Pair : KnownValues) {
3595 ConstantInt *CB = Pair.first;
3596 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3597 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3598
3599 // Okay, we now know that all edges from PredBB should be revectored to
3600 // branch to RealDest.
3601 if (RealDest == BB)
3602 continue; // Skip self loops.
3603
3604 // Skip if the predecessor's terminator is an indirect branch.
3605 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3606 return isa<IndirectBrInst>(PredBB->getTerminator());
3607 }))
3608 continue;
3609
3610 // Only revector to RealDest if no values defined in BB are live.
3611 if (ReachesNonLocalUseBlocks.contains(RealDest))
3612 continue;
3613
3614 LLVM_DEBUG({
3615 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3616 << " has value " << *Pair.first << " in predecessors:\n";
3617 for (const BasicBlock *PredBB : Pair.second)
3618 dbgs() << " " << PredBB->getName() << "\n";
3619 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3620 });
3621
3622 // Split the predecessors we are threading into a new edge block. We'll
3623 // clone the instructions into this block, and then redirect it to RealDest.
3624 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3625
3626 // TODO: These just exist to reduce test diff, we can drop them if we like.
3627 EdgeBB->setName(RealDest->getName() + ".critedge");
3628 EdgeBB->moveBefore(RealDest);
3629
3630 // Update PHI nodes.
3631 addPredecessorToBlock(RealDest, EdgeBB, BB);
3632
3633 // BB may have instructions that are being threaded over. Clone these
3634 // instructions into EdgeBB. We know that there will be no uses of the
3635 // cloned instructions outside of EdgeBB.
3636 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3637 ValueToValueMapTy TranslateMap; // Track translated values.
3638 TranslateMap[Cond] = CB;
3639
3640 // RemoveDIs: track instructions that we optimise away while folding, so
3641 // that we can copy DbgVariableRecords from them later.
3642 BasicBlock::iterator SrcDbgCursor = BB->begin();
3643 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3644 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3645 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3646 continue;
3647 }
3648 // Clone the instruction.
3649 Instruction *N = BBI->clone();
3650 // Insert the new instruction into its new home.
3651 N->insertInto(EdgeBB, InsertPt);
3652
3653 if (BBI->hasName())
3654 N->setName(BBI->getName() + ".c");
3655
3656 // Update operands due to translation.
3657 // Key Instructions: Remap all the atom groups.
3658 if (const DebugLoc &DL = BBI->getDebugLoc())
3659 mapAtomInstance(DL, TranslateMap);
3660 RemapInstruction(N, TranslateMap,
3662
3663 // Check for trivial simplification.
3664 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3665 if (!BBI->use_empty())
3666 TranslateMap[&*BBI] = V;
3667 if (!N->mayHaveSideEffects()) {
3668 N->eraseFromParent(); // Instruction folded away, don't need actual
3669 // inst
3670 N = nullptr;
3671 }
3672 } else {
3673 if (!BBI->use_empty())
3674 TranslateMap[&*BBI] = N;
3675 }
3676 if (N) {
3677 // Copy all debug-info attached to instructions from the last we
3678 // successfully clone, up to this instruction (they might have been
3679 // folded away).
3680 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3681 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3682 SrcDbgCursor = std::next(BBI);
3683 // Clone debug-info on this instruction too.
3684 N->cloneDebugInfoFrom(&*BBI);
3685
3686 // Register the new instruction with the assumption cache if necessary.
3687 if (auto *Assume = dyn_cast<AssumeInst>(N))
3688 if (AC)
3689 AC->registerAssumption(Assume);
3690 }
3691 }
3692
3693 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3694 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3695 InsertPt->cloneDebugInfoFrom(BI);
3696
3697 BB->removePredecessor(EdgeBB);
3698 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3699 EdgeBI->setSuccessor(0, RealDest);
3700 EdgeBI->setDebugLoc(BI->getDebugLoc());
3701
3702 if (DTU) {
3704 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3705 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3706 DTU->applyUpdates(Updates);
3707 }
3708
3709 // For simplicity, we created a separate basic block for the edge. Merge
3710 // it back into the predecessor if possible. This not only avoids
3711 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3712 // bypass the check for trivial cycles above.
3713 MergeBlockIntoPredecessor(EdgeBB, DTU);
3714
3715 // Signal repeat, simplifying any other constants.
3716 return std::nullopt;
3717 }
3718
3719 return false;
3720}
3721
3722bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3723 // Note: If BB is a loop header then there is a risk that threading introduces
3724 // a non-canonical loop by moving a back edge. So we avoid this optimization
3725 // for loop headers if NeedCanonicalLoop is set.
3726 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3727 return false;
3728
3729 std::optional<bool> Result;
3730 bool EverChanged = false;
3731 do {
3732 // Note that None means "we changed things, but recurse further."
3733 Result =
3735 EverChanged |= Result == std::nullopt || *Result;
3736 } while (Result == std::nullopt);
3737 return EverChanged;
3738}
3739
3740/// Given a BB that starts with the specified two-entry PHI node,
3741/// see if we can eliminate it.
3744 const DataLayout &DL,
3745 bool SpeculateUnpredictables) {
3746 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3747 // statement", which has a very simple dominance structure. Basically, we
3748 // are trying to find the condition that is being branched on, which
3749 // subsequently causes this merge to happen. We really want control
3750 // dependence information for this check, but simplifycfg can't keep it up
3751 // to date, and this catches most of the cases we care about anyway.
3752 BasicBlock *BB = PN->getParent();
3753
3754 BasicBlock *IfTrue, *IfFalse;
3755 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3756 if (!DomBI)
3757 return false;
3758 Value *IfCond = DomBI->getCondition();
3759 // Don't bother if the branch will be constant folded trivially.
3760 if (isa<ConstantInt>(IfCond))
3761 return false;
3762
3763 BasicBlock *DomBlock = DomBI->getParent();
3766 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3767 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3768 });
3769 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3770 "Will have either one or two blocks to speculate.");
3771
3772 // If the branch is non-unpredictable, see if we either predictably jump to
3773 // the merge bb (if we have only a single 'then' block), or if we predictably
3774 // jump to one specific 'then' block (if we have two of them).
3775 // It isn't beneficial to speculatively execute the code
3776 // from the block that we know is predictably not entered.
3777 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3778 if (!IsUnpredictable) {
3779 uint64_t TWeight, FWeight;
3780 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3781 (TWeight + FWeight) != 0) {
3782 BranchProbability BITrueProb =
3783 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3784 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3785 BranchProbability BIFalseProb = BITrueProb.getCompl();
3786 if (IfBlocks.size() == 1) {
3787 BranchProbability BIBBProb =
3788 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3789 if (BIBBProb >= Likely)
3790 return false;
3791 } else {
3792 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3793 return false;
3794 }
3795 }
3796 }
3797
3798 // Don't try to fold an unreachable block. For example, the phi node itself
3799 // can't be the candidate if-condition for a select that we want to form.
3800 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3801 if (IfCondPhiInst->getParent() == BB)
3802 return false;
3803
3804 // Okay, we found that we can merge this two-entry phi node into a select.
3805 // Doing so would require us to fold *all* two entry phi nodes in this block.
3806 // At some point this becomes non-profitable (particularly if the target
3807 // doesn't support cmov's). Only do this transformation if there are two or
3808 // fewer PHI nodes in this block.
3809 unsigned NumPhis = 0;
3810 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3811 if (NumPhis > 2)
3812 return false;
3813
3814 // Loop over the PHI's seeing if we can promote them all to select
3815 // instructions. While we are at it, keep track of the instructions
3816 // that need to be moved to the dominating block.
3817 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3818 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3819 InstructionCost Cost = 0;
3820 InstructionCost Budget =
3822 if (SpeculateUnpredictables && IsUnpredictable)
3823 Budget += TTI.getBranchMispredictPenalty();
3824
3825 bool Changed = false;
3826 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3827 PHINode *PN = cast<PHINode>(II++);
3828 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3829 PN->replaceAllUsesWith(V);
3830 PN->eraseFromParent();
3831 Changed = true;
3832 continue;
3833 }
3834
3835 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3836 AggressiveInsts, Cost, Budget, TTI, AC,
3837 ZeroCostInstructions) ||
3838 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3839 AggressiveInsts, Cost, Budget, TTI, AC,
3840 ZeroCostInstructions))
3841 return Changed;
3842 }
3843
3844 // If we folded the first phi, PN dangles at this point. Refresh it. If
3845 // we ran out of PHIs then we simplified them all.
3846 PN = dyn_cast<PHINode>(BB->begin());
3847 if (!PN)
3848 return true;
3849
3850 // Return true if at least one of these is a 'not', and another is either
3851 // a 'not' too, or a constant.
3852 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3853 if (!match(V0, m_Not(m_Value())))
3854 std::swap(V0, V1);
3855 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3856 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3857 };
3858
3859 // Don't fold i1 branches on PHIs which contain binary operators or
3860 // (possibly inverted) select form of or/ands, unless one of
3861 // the incoming values is an 'not' and another one is freely invertible.
3862 // These can often be turned into switches and other things.
3863 auto IsBinOpOrAnd = [](Value *V) {
3864 return match(
3866 };
3867 if (PN->getType()->isIntegerTy(1) &&
3868 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3869 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3870 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3871 PN->getIncomingValue(1)))
3872 return Changed;
3873
3874 // If all PHI nodes are promotable, check to make sure that all instructions
3875 // in the predecessor blocks can be promoted as well. If not, we won't be able
3876 // to get rid of the control flow, so it's not worth promoting to select
3877 // instructions.
3878 for (BasicBlock *IfBlock : IfBlocks)
3879 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3880 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3881 // This is not an aggressive instruction that we can promote.
3882 // Because of this, we won't be able to get rid of the control flow, so
3883 // the xform is not worth it.
3884 return Changed;
3885 }
3886
3887 // If either of the blocks has it's address taken, we can't do this fold.
3888 if (any_of(IfBlocks,
3889 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3890 return Changed;
3891
3892 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3893 if (IsUnpredictable) dbgs() << " (unpredictable)";
3894 dbgs() << " T: " << IfTrue->getName()
3895 << " F: " << IfFalse->getName() << "\n");
3896
3897 // If we can still promote the PHI nodes after this gauntlet of tests,
3898 // do all of the PHI's now.
3899
3900 // Move all 'aggressive' instructions, which are defined in the
3901 // conditional parts of the if's up to the dominating block.
3902 for (BasicBlock *IfBlock : IfBlocks)
3903 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3904
3905 IRBuilder<NoFolder> Builder(DomBI);
3906 // Propagate fast-math-flags from phi nodes to replacement selects.
3907 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3908 // Change the PHI node into a select instruction.
3909 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3910 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3911
3912 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3913 isa<FPMathOperator>(PN) ? PN : nullptr,
3914 "", DomBI);
3915 PN->replaceAllUsesWith(Sel);
3916 Sel->takeName(PN);
3917 PN->eraseFromParent();
3918 }
3919
3920 // At this point, all IfBlocks are empty, so our if statement
3921 // has been flattened. Change DomBlock to jump directly to our new block to
3922 // avoid other simplifycfg's kicking in on the diamond.
3923 Builder.CreateBr(BB);
3924
3926 if (DTU) {
3927 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3928 for (auto *Successor : successors(DomBlock))
3929 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3930 }
3931
3932 DomBI->eraseFromParent();
3933 if (DTU)
3934 DTU->applyUpdates(Updates);
3935
3936 return true;
3937}
3938
3941 Value *RHS, const Twine &Name = "") {
3942 // Try to relax logical op to binary op.
3943 if (impliesPoison(RHS, LHS))
3944 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3945 if (Opc == Instruction::And)
3946 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3947 if (Opc == Instruction::Or)
3948 return Builder.CreateLogicalOr(LHS, RHS, Name);
3949 llvm_unreachable("Invalid logical opcode");
3950}
3951
3952/// Return true if either PBI or BI has branch weight available, and store
3953/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3954/// not have branch weight, use 1:1 as its weight.
3956 uint64_t &PredTrueWeight,
3957 uint64_t &PredFalseWeight,
3958 uint64_t &SuccTrueWeight,
3959 uint64_t &SuccFalseWeight) {
3960 bool PredHasWeights =
3961 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3962 bool SuccHasWeights =
3963 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3964 if (PredHasWeights || SuccHasWeights) {
3965 if (!PredHasWeights)
3966 PredTrueWeight = PredFalseWeight = 1;
3967 if (!SuccHasWeights)
3968 SuccTrueWeight = SuccFalseWeight = 1;
3969 return true;
3970 } else {
3971 return false;
3972 }
3973}
3974
3975/// Determine if the two branches share a common destination and deduce a glue
3976/// that joins the branches' conditions to arrive at the common destination if
3977/// that would be profitable.
3978static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3980 const TargetTransformInfo *TTI) {
3981 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3982 "Both blocks must end with a conditional branches.");
3984 "PredBB must be a predecessor of BB.");
3985
3986 // We have the potential to fold the conditions together, but if the
3987 // predecessor branch is predictable, we may not want to merge them.
3988 uint64_t PTWeight, PFWeight;
3989 BranchProbability PBITrueProb, Likely;
3990 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3991 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3992 (PTWeight + PFWeight) != 0) {
3993 PBITrueProb =
3994 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3995 Likely = TTI->getPredictableBranchThreshold();
3996 }
3997
3998 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3999 // Speculate the 2nd condition unless the 1st is probably true.
4000 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4001 return {{BI->getSuccessor(0), Instruction::Or, false}};
4002 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4003 // Speculate the 2nd condition unless the 1st is probably false.
4004 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4005 return {{BI->getSuccessor(1), Instruction::And, false}};
4006 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4007 // Speculate the 2nd condition unless the 1st is probably true.
4008 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4009 return {{BI->getSuccessor(1), Instruction::And, true}};
4010 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4011 // Speculate the 2nd condition unless the 1st is probably false.
4012 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4013 return {{BI->getSuccessor(0), Instruction::Or, true}};
4014 }
4015 return std::nullopt;
4016}
4017
4019 DomTreeUpdater *DTU,
4020 MemorySSAUpdater *MSSAU,
4021 const TargetTransformInfo *TTI) {
4022 BasicBlock *BB = BI->getParent();
4023 BasicBlock *PredBlock = PBI->getParent();
4024
4025 // Determine if the two branches share a common destination.
4026 BasicBlock *CommonSucc;
4028 bool InvertPredCond;
4029 std::tie(CommonSucc, Opc, InvertPredCond) =
4031
4032 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4033
4034 IRBuilder<> Builder(PBI);
4035 // The builder is used to create instructions to eliminate the branch in BB.
4036 // If BB's terminator has !annotation metadata, add it to the new
4037 // instructions.
4038 Builder.CollectMetadataToCopy(BB->getTerminator(),
4039 {LLVMContext::MD_annotation});
4040
4041 // If we need to invert the condition in the pred block to match, do so now.
4042 if (InvertPredCond) {
4043 InvertBranch(PBI, Builder);
4044 }
4045
4046 BasicBlock *UniqueSucc =
4047 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4048
4049 // Before cloning instructions, notify the successor basic block that it
4050 // is about to have a new predecessor. This will update PHI nodes,
4051 // which will allow us to update live-out uses of bonus instructions.
4052 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4053
4054 // Try to update branch weights.
4055 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4056 SmallVector<uint32_t, 2> MDWeights;
4057 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4058 SuccTrueWeight, SuccFalseWeight)) {
4059 SmallVector<uint64_t, 8> NewWeights;
4060
4061 if (PBI->getSuccessor(0) == BB) {
4062 // PBI: br i1 %x, BB, FalseDest
4063 // BI: br i1 %y, UniqueSucc, FalseDest
4064 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4065 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
4066 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4067 // TrueWeight for PBI * FalseWeight for BI.
4068 // We assume that total weights of a BranchInst can fit into 32 bits.
4069 // Therefore, we will not have overflow using 64-bit arithmetic.
4070 NewWeights.push_back(PredFalseWeight *
4071 (SuccFalseWeight + SuccTrueWeight) +
4072 PredTrueWeight * SuccFalseWeight);
4073 } else {
4074 // PBI: br i1 %x, TrueDest, BB
4075 // BI: br i1 %y, TrueDest, UniqueSucc
4076 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4077 // FalseWeight for PBI * TrueWeight for BI.
4078 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4079 PredFalseWeight * SuccTrueWeight);
4080 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4081 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
4082 }
4083
4084 // Halve the weights if any of them cannot fit in an uint32_t
4085 fitWeights(NewWeights);
4086
4087 append_range(MDWeights, NewWeights);
4088 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
4089
4090 // TODO: If BB is reachable from all paths through PredBlock, then we
4091 // could replace PBI's branch probabilities with BI's.
4092 } else
4093 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4094
4095 // Now, update the CFG.
4096 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4097
4098 if (DTU)
4099 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4100 {DominatorTree::Delete, PredBlock, BB}});
4101
4102 // If BI was a loop latch, it may have had associated loop metadata.
4103 // We need to copy it to the new latch, that is, PBI.
4104 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4105 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4106
4107 ValueToValueMapTy VMap; // maps original values to cloned values
4109
4110 Module *M = BB->getModule();
4111
4112 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4113 for (DbgVariableRecord &DVR :
4115 RemapDbgRecord(M, &DVR, VMap,
4117 }
4118
4119 // Now that the Cond was cloned into the predecessor basic block,
4120 // or/and the two conditions together.
4121 Value *BICond = VMap[BI->getCondition()];
4122 PBI->setCondition(
4123 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4125 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4126 if (!MDWeights.empty()) {
4127 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4128 setBranchWeights(SI, MDWeights[0], MDWeights[1],
4129 /*IsExpected=*/false);
4130 }
4131
4132 ++NumFoldBranchToCommonDest;
4133 return true;
4134}
4135
4136/// Return if an instruction's type or any of its operands' types are a vector
4137/// type.
4138static bool isVectorOp(Instruction &I) {
4139 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4140 return U->getType()->isVectorTy();
4141 });
4142}
4143
4144/// If this basic block is simple enough, and if a predecessor branches to us
4145/// and one of our successors, fold the block into the predecessor and use
4146/// logical operations to pick the right destination.
4148 MemorySSAUpdater *MSSAU,
4149 const TargetTransformInfo *TTI,
4150 unsigned BonusInstThreshold) {
4151 // If this block ends with an unconditional branch,
4152 // let speculativelyExecuteBB() deal with it.
4153 if (!BI->isConditional())
4154 return false;
4155
4156 BasicBlock *BB = BI->getParent();
4160
4162
4164 Cond->getParent() != BB || !Cond->hasOneUse())
4165 return false;
4166
4167 // Finally, don't infinitely unroll conditional loops.
4168 if (is_contained(successors(BB), BB))
4169 return false;
4170
4171 // With which predecessors will we want to deal with?
4173 for (BasicBlock *PredBlock : predecessors(BB)) {
4174 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4175
4176 // Check that we have two conditional branches. If there is a PHI node in
4177 // the common successor, verify that the same value flows in from both
4178 // blocks.
4179 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4180 continue;
4181
4182 // Determine if the two branches share a common destination.
4183 BasicBlock *CommonSucc;
4185 bool InvertPredCond;
4186 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4187 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4188 else
4189 continue;
4190
4191 // Check the cost of inserting the necessary logic before performing the
4192 // transformation.
4193 if (TTI) {
4194 Type *Ty = BI->getCondition()->getType();
4195 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4196 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4197 !isa<CmpInst>(PBI->getCondition())))
4198 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4199
4201 continue;
4202 }
4203
4204 // Ok, we do want to deal with this predecessor. Record it.
4205 Preds.emplace_back(PredBlock);
4206 }
4207
4208 // If there aren't any predecessors into which we can fold,
4209 // don't bother checking the cost.
4210 if (Preds.empty())
4211 return false;
4212
4213 // Only allow this transformation if computing the condition doesn't involve
4214 // too many instructions and these involved instructions can be executed
4215 // unconditionally. We denote all involved instructions except the condition
4216 // as "bonus instructions", and only allow this transformation when the
4217 // number of the bonus instructions we'll need to create when cloning into
4218 // each predecessor does not exceed a certain threshold.
4219 unsigned NumBonusInsts = 0;
4220 bool SawVectorOp = false;
4221 const unsigned PredCount = Preds.size();
4222 for (Instruction &I : *BB) {
4223 // Don't check the branch condition comparison itself.
4224 if (&I == Cond)
4225 continue;
4226 // Ignore the terminator.
4227 if (isa<BranchInst>(I))
4228 continue;
4229 // I must be safe to execute unconditionally.
4231 return false;
4232 SawVectorOp |= isVectorOp(I);
4233
4234 // Account for the cost of duplicating this instruction into each
4235 // predecessor. Ignore free instructions.
4236 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4238 NumBonusInsts += PredCount;
4239
4240 // Early exits once we reach the limit.
4241 if (NumBonusInsts >
4242 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4243 return false;
4244 }
4245
4246 auto IsBCSSAUse = [BB, &I](Use &U) {
4247 auto *UI = cast<Instruction>(U.getUser());
4248 if (auto *PN = dyn_cast<PHINode>(UI))
4249 return PN->getIncomingBlock(U) == BB;
4250 return UI->getParent() == BB && I.comesBefore(UI);
4251 };
4252
4253 // Does this instruction require rewriting of uses?
4254 if (!all_of(I.uses(), IsBCSSAUse))
4255 return false;
4256 }
4257 if (NumBonusInsts >
4258 BonusInstThreshold *
4259 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4260 return false;
4261
4262 // Ok, we have the budget. Perform the transformation.
4263 for (BasicBlock *PredBlock : Preds) {
4264 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4265 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4266 }
4267 return false;
4268}
4269
4270// If there is only one store in BB1 and BB2, return it, otherwise return
4271// nullptr.
4273 StoreInst *S = nullptr;
4274 for (auto *BB : {BB1, BB2}) {
4275 if (!BB)
4276 continue;
4277 for (auto &I : *BB)
4278 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4279 if (S)
4280 // Multiple stores seen.
4281 return nullptr;
4282 else
4283 S = SI;
4284 }
4285 }
4286 return S;
4287}
4288
4290 Value *AlternativeV = nullptr) {
4291 // PHI is going to be a PHI node that allows the value V that is defined in
4292 // BB to be referenced in BB's only successor.
4293 //
4294 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4295 // doesn't matter to us what the other operand is (it'll never get used). We
4296 // could just create a new PHI with an undef incoming value, but that could
4297 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4298 // other PHI. So here we directly look for some PHI in BB's successor with V
4299 // as an incoming operand. If we find one, we use it, else we create a new
4300 // one.
4301 //
4302 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4303 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4304 // where OtherBB is the single other predecessor of BB's only successor.
4305 PHINode *PHI = nullptr;
4306 BasicBlock *Succ = BB->getSingleSuccessor();
4307
4308 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4309 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4310 PHI = cast<PHINode>(I);
4311 if (!AlternativeV)
4312 break;
4313
4314 assert(Succ->hasNPredecessors(2));
4315 auto PredI = pred_begin(Succ);
4316 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4317 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4318 break;
4319 PHI = nullptr;
4320 }
4321 if (PHI)
4322 return PHI;
4323
4324 // If V is not an instruction defined in BB, just return it.
4325 if (!AlternativeV &&
4326 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4327 return V;
4328
4329 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4330 PHI->insertBefore(Succ->begin());
4331 PHI->addIncoming(V, BB);
4332 for (BasicBlock *PredBB : predecessors(Succ))
4333 if (PredBB != BB)
4334 PHI->addIncoming(
4335 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4336 return PHI;
4337}
4338
4340 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4341 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4342 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4343 // For every pointer, there must be exactly two stores, one coming from
4344 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4345 // store (to any address) in PTB,PFB or QTB,QFB.
4346 // FIXME: We could relax this restriction with a bit more work and performance
4347 // testing.
4348 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4349 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4350 if (!PStore || !QStore)
4351 return false;
4352
4353 // Now check the stores are compatible.
4354 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4355 PStore->getValueOperand()->getType() !=
4356 QStore->getValueOperand()->getType())
4357 return false;
4358
4359 // Check that sinking the store won't cause program behavior changes. Sinking
4360 // the store out of the Q blocks won't change any behavior as we're sinking
4361 // from a block to its unconditional successor. But we're moving a store from
4362 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4363 // So we need to check that there are no aliasing loads or stores in
4364 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4365 // operations between PStore and the end of its parent block.
4366 //
4367 // The ideal way to do this is to query AliasAnalysis, but we don't
4368 // preserve AA currently so that is dangerous. Be super safe and just
4369 // check there are no other memory operations at all.
4370 for (auto &I : *QFB->getSinglePredecessor())
4371 if (I.mayReadOrWriteMemory())
4372 return false;
4373 for (auto &I : *QFB)
4374 if (&I != QStore && I.mayReadOrWriteMemory())
4375 return false;
4376 if (QTB)
4377 for (auto &I : *QTB)
4378 if (&I != QStore && I.mayReadOrWriteMemory())
4379 return false;
4380 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4381 I != E; ++I)
4382 if (&*I != PStore && I->mayReadOrWriteMemory())
4383 return false;
4384
4385 // If we're not in aggressive mode, we only optimize if we have some
4386 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4387 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4388 if (!BB)
4389 return true;
4390 // Heuristic: if the block can be if-converted/phi-folded and the
4391 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4392 // thread this store.
4393 InstructionCost Cost = 0;
4394 InstructionCost Budget =
4396 for (auto &I : BB->instructionsWithoutDebug(false)) {
4397 // Consider terminator instruction to be free.
4398 if (I.isTerminator())
4399 continue;
4400 // If this is one the stores that we want to speculate out of this BB,
4401 // then don't count it's cost, consider it to be free.
4402 if (auto *S = dyn_cast<StoreInst>(&I))
4403 if (llvm::find(FreeStores, S))
4404 continue;
4405 // Else, we have a white-list of instructions that we are ak speculating.
4407 return false; // Not in white-list - not worthwhile folding.
4408 // And finally, if this is a non-free instruction that we are okay
4409 // speculating, ensure that we consider the speculation budget.
4410 Cost +=
4411 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4412 if (Cost > Budget)
4413 return false; // Eagerly refuse to fold as soon as we're out of budget.
4414 }
4415 assert(Cost <= Budget &&
4416 "When we run out of budget we will eagerly return from within the "
4417 "per-instruction loop.");
4418 return true;
4419 };
4420
4421 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4423 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4424 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4425 return false;
4426
4427 // If PostBB has more than two predecessors, we need to split it so we can
4428 // sink the store.
4429 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4430 // We know that QFB's only successor is PostBB. And QFB has a single
4431 // predecessor. If QTB exists, then its only successor is also PostBB.
4432 // If QTB does not exist, then QFB's only predecessor has a conditional
4433 // branch to QFB and PostBB.
4434 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4435 BasicBlock *NewBB =
4436 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4437 if (!NewBB)
4438 return false;
4439 PostBB = NewBB;
4440 }
4441
4442 // OK, we're going to sink the stores to PostBB. The store has to be
4443 // conditional though, so first create the predicate.
4444 BranchInst *PBranch =
4446 BranchInst *QBranch =
4448 Value *PCond = PBranch->getCondition();
4449 Value *QCond = QBranch->getCondition();
4450
4452 PStore->getParent());
4454 QStore->getParent(), PPHI);
4455
4456 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4457 IRBuilder<> QB(PostBB, PostBBFirst);
4458 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4459
4460 InvertPCond ^= (PStore->getParent() != PTB);
4461 InvertQCond ^= (QStore->getParent() != QTB);
4462 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4463 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4464
4465 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4466
4467 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4468 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4469 /*Unreachable=*/false,
4470 /*BranchWeights=*/nullptr, DTU);
4471 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4473 SmallVector<uint32_t, 2> PWeights, QWeights;
4474 extractBranchWeights(*PBranch, PWeights);
4475 extractBranchWeights(*QBranch, QWeights);
4476 if (InvertPCond)
4477 std::swap(PWeights[0], PWeights[1]);
4478 if (InvertQCond)
4479 std::swap(QWeights[0], QWeights[1]);
4480 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4481 setBranchWeights(PostBB->getTerminator(), CombinedWeights[0],
4482 CombinedWeights[1],
4483 /*IsExpected=*/false);
4484 }
4485
4486 QB.SetInsertPoint(T);
4487 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4488 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4489 // Choose the minimum alignment. If we could prove both stores execute, we
4490 // could use biggest one. In this case, though, we only know that one of the
4491 // stores executes. And we don't know it's safe to take the alignment from a
4492 // store that doesn't execute.
4493 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4494
4495 QStore->eraseFromParent();
4496 PStore->eraseFromParent();
4497
4498 return true;
4499}
4500
4502 DomTreeUpdater *DTU, const DataLayout &DL,
4503 const TargetTransformInfo &TTI) {
4504 // The intention here is to find diamonds or triangles (see below) where each
4505 // conditional block contains a store to the same address. Both of these
4506 // stores are conditional, so they can't be unconditionally sunk. But it may
4507 // be profitable to speculatively sink the stores into one merged store at the
4508 // end, and predicate the merged store on the union of the two conditions of
4509 // PBI and QBI.
4510 //
4511 // This can reduce the number of stores executed if both of the conditions are
4512 // true, and can allow the blocks to become small enough to be if-converted.
4513 // This optimization will also chain, so that ladders of test-and-set
4514 // sequences can be if-converted away.
4515 //
4516 // We only deal with simple diamonds or triangles:
4517 //
4518 // PBI or PBI or a combination of the two
4519 // / \ | \
4520 // PTB PFB | PFB
4521 // \ / | /
4522 // QBI QBI
4523 // / \ | \
4524 // QTB QFB | QFB
4525 // \ / | /
4526 // PostBB PostBB
4527 //
4528 // We model triangles as a type of diamond with a nullptr "true" block.
4529 // Triangles are canonicalized so that the fallthrough edge is represented by
4530 // a true condition, as in the diagram above.
4531 BasicBlock *PTB = PBI->getSuccessor(0);
4532 BasicBlock *PFB = PBI->getSuccessor(1);
4533 BasicBlock *QTB = QBI->getSuccessor(0);
4534 BasicBlock *QFB = QBI->getSuccessor(1);
4535 BasicBlock *PostBB = QFB->getSingleSuccessor();
4536
4537 // Make sure we have a good guess for PostBB. If QTB's only successor is
4538 // QFB, then QFB is a better PostBB.
4539 if (QTB->getSingleSuccessor() == QFB)
4540 PostBB = QFB;
4541
4542 // If we couldn't find a good PostBB, stop.
4543 if (!PostBB)
4544 return false;
4545
4546 bool InvertPCond = false, InvertQCond = false;
4547 // Canonicalize fallthroughs to the true branches.
4548 if (PFB == QBI->getParent()) {
4549 std::swap(PFB, PTB);
4550 InvertPCond = true;
4551 }
4552 if (QFB == PostBB) {
4553 std::swap(QFB, QTB);
4554 InvertQCond = true;
4555 }
4556
4557 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4558 // and QFB may not. Model fallthroughs as a nullptr block.
4559 if (PTB == QBI->getParent())
4560 PTB = nullptr;
4561 if (QTB == PostBB)
4562 QTB = nullptr;
4563
4564 // Legality bailouts. We must have at least the non-fallthrough blocks and
4565 // the post-dominating block, and the non-fallthroughs must only have one
4566 // predecessor.
4567 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4568 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4569 };
4570 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4571 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4572 return false;
4573 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4574 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4575 return false;
4576 if (!QBI->getParent()->hasNUses(2))
4577 return false;
4578
4579 // OK, this is a sequence of two diamonds or triangles.
4580 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4581 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4582 for (auto *BB : {PTB, PFB}) {
4583 if (!BB)
4584 continue;
4585 for (auto &I : *BB)
4587 PStoreAddresses.insert(SI->getPointerOperand());
4588 }
4589 for (auto *BB : {QTB, QFB}) {
4590 if (!BB)
4591 continue;
4592 for (auto &I : *BB)
4594 QStoreAddresses.insert(SI->getPointerOperand());
4595 }
4596
4597 set_intersect(PStoreAddresses, QStoreAddresses);
4598 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4599 // clear what it contains.
4600 auto &CommonAddresses = PStoreAddresses;
4601
4602 bool Changed = false;
4603 for (auto *Address : CommonAddresses)
4604 Changed |=
4605 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4606 InvertPCond, InvertQCond, DTU, DL, TTI);
4607 return Changed;
4608}
4609
4610/// If the previous block ended with a widenable branch, determine if reusing
4611/// the target block is profitable and legal. This will have the effect of
4612/// "widening" PBI, but doesn't require us to reason about hosting safety.
4614 DomTreeUpdater *DTU) {
4615 // TODO: This can be generalized in two important ways:
4616 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4617 // values from the PBI edge.
4618 // 2) We can sink side effecting instructions into BI's fallthrough
4619 // successor provided they doesn't contribute to computation of
4620 // BI's condition.
4621 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4622 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4623 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4624 !BI->getParent()->getSinglePredecessor())
4625 return false;
4626 if (!IfFalseBB->phis().empty())
4627 return false; // TODO
4628 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4629 // may undo the transform done here.
4630 // TODO: There might be a more fine-grained solution to this.
4631 if (!llvm::succ_empty(IfFalseBB))
4632 return false;
4633 // Use lambda to lazily compute expensive condition after cheap ones.
4634 auto NoSideEffects = [](BasicBlock &BB) {
4635 return llvm::none_of(BB, [](const Instruction &I) {
4636 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4637 });
4638 };
4639 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4640 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4641 NoSideEffects(*BI->getParent())) {
4642 auto *OldSuccessor = BI->getSuccessor(1);
4643 OldSuccessor->removePredecessor(BI->getParent());
4644 BI->setSuccessor(1, IfFalseBB);
4645 if (DTU)
4646 DTU->applyUpdates(
4647 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4648 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4649 return true;
4650 }
4651 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4652 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4653 NoSideEffects(*BI->getParent())) {
4654 auto *OldSuccessor = BI->getSuccessor(0);
4655 OldSuccessor->removePredecessor(BI->getParent());
4656 BI->setSuccessor(0, IfFalseBB);
4657 if (DTU)
4658 DTU->applyUpdates(
4659 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4660 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4661 return true;
4662 }
4663 return false;
4664}
4665
4666/// If we have a conditional branch as a predecessor of another block,
4667/// this function tries to simplify it. We know
4668/// that PBI and BI are both conditional branches, and BI is in one of the
4669/// successor blocks of PBI - PBI branches to BI.
4671 DomTreeUpdater *DTU,
4672 const DataLayout &DL,
4673 const TargetTransformInfo &TTI) {
4674 assert(PBI->isConditional() && BI->isConditional());
4675 BasicBlock *BB = BI->getParent();
4676
4677 // If this block ends with a branch instruction, and if there is a
4678 // predecessor that ends on a branch of the same condition, make
4679 // this conditional branch redundant.
4680 if (PBI->getCondition() == BI->getCondition() &&
4681 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4682 // Okay, the outcome of this conditional branch is statically
4683 // knowable. If this block had a single pred, handle specially, otherwise
4684 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4685 if (BB->getSinglePredecessor()) {
4686 // Turn this into a branch on constant.
4687 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4688 BI->setCondition(
4689 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4690 return true; // Nuke the branch on constant.
4691 }
4692 }
4693
4694 // If the previous block ended with a widenable branch, determine if reusing
4695 // the target block is profitable and legal. This will have the effect of
4696 // "widening" PBI, but doesn't require us to reason about hosting safety.
4697 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4698 return true;
4699
4700 // If both branches are conditional and both contain stores to the same
4701 // address, remove the stores from the conditionals and create a conditional
4702 // merged store at the end.
4703 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4704 return true;
4705
4706 // If this is a conditional branch in an empty block, and if any
4707 // predecessors are a conditional branch to one of our destinations,
4708 // fold the conditions into logical ops and one cond br.
4709
4710 // Ignore dbg intrinsics.
4711 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4712 return false;
4713
4714 int PBIOp, BIOp;
4715 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4716 PBIOp = 0;
4717 BIOp = 0;
4718 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4719 PBIOp = 0;
4720 BIOp = 1;
4721 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4722 PBIOp = 1;
4723 BIOp = 0;
4724 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4725 PBIOp = 1;
4726 BIOp = 1;
4727 } else {
4728 return false;
4729 }
4730
4731 // Check to make sure that the other destination of this branch
4732 // isn't BB itself. If so, this is an infinite loop that will
4733 // keep getting unwound.
4734 if (PBI->getSuccessor(PBIOp) == BB)
4735 return false;
4736
4737 // If predecessor's branch probability to BB is too low don't merge branches.
4738 SmallVector<uint32_t, 2> PredWeights;
4739 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4740 extractBranchWeights(*PBI, PredWeights) &&
4741 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4742
4744 PredWeights[PBIOp],
4745 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4746
4747 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4748 if (CommonDestProb >= Likely)
4749 return false;
4750 }
4751
4752 // Do not perform this transformation if it would require
4753 // insertion of a large number of select instructions. For targets
4754 // without predication/cmovs, this is a big pessimization.
4755
4756 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4757 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4758 unsigned NumPhis = 0;
4759 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4760 ++II, ++NumPhis) {
4761 if (NumPhis > 2) // Disable this xform.
4762 return false;
4763 }
4764
4765 // Finally, if everything is ok, fold the branches to logical ops.
4766 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4767
4768 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4769 << "AND: " << *BI->getParent());
4770
4772
4773 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4774 // branch in it, where one edge (OtherDest) goes back to itself but the other
4775 // exits. We don't *know* that the program avoids the infinite loop
4776 // (even though that seems likely). If we do this xform naively, we'll end up
4777 // recursively unpeeling the loop. Since we know that (after the xform is
4778 // done) that the block *is* infinite if reached, we just make it an obviously
4779 // infinite loop with no cond branch.
4780 if (OtherDest == BB) {
4781 // Insert it at the end of the function, because it's either code,
4782 // or it won't matter if it's hot. :)
4783 BasicBlock *InfLoopBlock =
4784 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4785 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4786 if (DTU)
4787 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4788 OtherDest = InfLoopBlock;
4789 }
4790
4791 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4792
4793 // BI may have other predecessors. Because of this, we leave
4794 // it alone, but modify PBI.
4795
4796 // Make sure we get to CommonDest on True&True directions.
4797 Value *PBICond = PBI->getCondition();
4798 IRBuilder<NoFolder> Builder(PBI);
4799 if (PBIOp)
4800 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4801
4802 Value *BICond = BI->getCondition();
4803 if (BIOp)
4804 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4805
4806 // Merge the conditions.
4807 Value *Cond =
4808 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4809
4810 // Modify PBI to branch on the new condition to the new dests.
4811 PBI->setCondition(Cond);
4812 PBI->setSuccessor(0, CommonDest);
4813 PBI->setSuccessor(1, OtherDest);
4814
4815 if (DTU) {
4816 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4817 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4818
4819 DTU->applyUpdates(Updates);
4820 }
4821
4822 // Update branch weight for PBI.
4823 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4824 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4825 bool HasWeights =
4826 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4827 SuccTrueWeight, SuccFalseWeight);
4828 if (HasWeights) {
4829 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4830 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4831 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4832 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4833 // The weight to CommonDest should be PredCommon * SuccTotal +
4834 // PredOther * SuccCommon.
4835 // The weight to OtherDest should be PredOther * SuccOther.
4836 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4837 PredOther * SuccCommon,
4838 PredOther * SuccOther};
4839 // Halve the weights if any of them cannot fit in an uint32_t
4840 fitWeights(NewWeights);
4841
4842 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4843 // Cond may be a select instruction with the first operand set to "true", or
4844 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4846 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4847 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4848 // The select is predicated on PBICond
4849 assert(dyn_cast<SelectInst>(SI)->getCondition() == PBICond);
4850 // The corresponding probabilities are what was referred to above as
4851 // PredCommon and PredOther.
4852 setBranchWeights(SI, PredCommon, PredOther,
4853 /*IsExpected=*/false);
4854 }
4855 }
4856
4857 // OtherDest may have phi nodes. If so, add an entry from PBI's
4858 // block that are identical to the entries for BI's block.
4859 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4860
4861 // We know that the CommonDest already had an edge from PBI to
4862 // it. If it has PHIs though, the PHIs may have different
4863 // entries for BB and PBI's BB. If so, insert a select to make
4864 // them agree.
4865 for (PHINode &PN : CommonDest->phis()) {
4866 Value *BIV = PN.getIncomingValueForBlock(BB);
4867 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4868 Value *PBIV = PN.getIncomingValue(PBBIdx);
4869 if (BIV != PBIV) {
4870 // Insert a select in PBI to pick the right value.
4872 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4873 PN.setIncomingValue(PBBIdx, NV);
4874 // The select has the same condition as PBI, in the same BB. The
4875 // probabilities don't change.
4876 if (HasWeights) {
4877 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4878 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4879 setBranchWeights(NV, TrueWeight, FalseWeight,
4880 /*IsExpected=*/false);
4881 }
4882 }
4883 }
4884
4885 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4886 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4887
4888 // This basic block is probably dead. We know it has at least
4889 // one fewer predecessor.
4890 return true;
4891}
4892
4893// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4894// true or to FalseBB if Cond is false.
4895// Takes care of updating the successors and removing the old terminator.
4896// Also makes sure not to introduce new successors by assuming that edges to
4897// non-successor TrueBBs and FalseBBs aren't reachable.
4898bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4899 Value *Cond, BasicBlock *TrueBB,
4900 BasicBlock *FalseBB,
4901 uint32_t TrueWeight,
4902 uint32_t FalseWeight) {
4903 auto *BB = OldTerm->getParent();
4904 // Remove any superfluous successor edges from the CFG.
4905 // First, figure out which successors to preserve.
4906 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4907 // successor.
4908 BasicBlock *KeepEdge1 = TrueBB;
4909 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4910
4911 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4912
4913 // Then remove the rest.
4914 for (BasicBlock *Succ : successors(OldTerm)) {
4915 // Make sure only to keep exactly one copy of each edge.
4916 if (Succ == KeepEdge1)
4917 KeepEdge1 = nullptr;
4918 else if (Succ == KeepEdge2)
4919 KeepEdge2 = nullptr;
4920 else {
4921 Succ->removePredecessor(BB,
4922 /*KeepOneInputPHIs=*/true);
4923
4924 if (Succ != TrueBB && Succ != FalseBB)
4925 RemovedSuccessors.insert(Succ);
4926 }
4927 }
4928
4929 IRBuilder<> Builder(OldTerm);
4930 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4931
4932 // Insert an appropriate new terminator.
4933 if (!KeepEdge1 && !KeepEdge2) {
4934 if (TrueBB == FalseBB) {
4935 // We were only looking for one successor, and it was present.
4936 // Create an unconditional branch to it.
4937 Builder.CreateBr(TrueBB);
4938 } else {
4939 // We found both of the successors we were looking for.
4940 // Create a conditional branch sharing the condition of the select.
4941 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4942 if (TrueWeight != FalseWeight)
4943 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4944 }
4945 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4946 // Neither of the selected blocks were successors, so this
4947 // terminator must be unreachable.
4948 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4949 } else {
4950 // One of the selected values was a successor, but the other wasn't.
4951 // Insert an unconditional branch to the one that was found;
4952 // the edge to the one that wasn't must be unreachable.
4953 if (!KeepEdge1) {
4954 // Only TrueBB was found.
4955 Builder.CreateBr(TrueBB);
4956 } else {
4957 // Only FalseBB was found.
4958 Builder.CreateBr(FalseBB);
4959 }
4960 }
4961
4963
4964 if (DTU) {
4965 SmallVector<DominatorTree::UpdateType, 2> Updates;
4966 Updates.reserve(RemovedSuccessors.size());
4967 for (auto *RemovedSuccessor : RemovedSuccessors)
4968 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4969 DTU->applyUpdates(Updates);
4970 }
4971
4972 return true;
4973}
4974
4975// Replaces
4976// (switch (select cond, X, Y)) on constant X, Y
4977// with a branch - conditional if X and Y lead to distinct BBs,
4978// unconditional otherwise.
4979bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4980 SelectInst *Select) {
4981 // Check for constant integer values in the select.
4982 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4983 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4984 if (!TrueVal || !FalseVal)
4985 return false;
4986
4987 // Find the relevant condition and destinations.
4988 Value *Condition = Select->getCondition();
4989 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4990 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4991
4992 // Get weight for TrueBB and FalseBB.
4993 uint32_t TrueWeight = 0, FalseWeight = 0;
4994 SmallVector<uint64_t, 8> Weights;
4995 bool HasWeights = hasBranchWeightMD(*SI);
4996 if (HasWeights) {
4997 getBranchWeights(SI, Weights);
4998 if (Weights.size() == 1 + SI->getNumCases()) {
4999 TrueWeight =
5000 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
5001 FalseWeight =
5002 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
5003 }
5004 }
5005
5006 // Perform the actual simplification.
5007 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
5008 FalseWeight);
5009}
5010
5011// Replaces
5012// (indirectbr (select cond, blockaddress(@fn, BlockA),
5013// blockaddress(@fn, BlockB)))
5014// with
5015// (br cond, BlockA, BlockB).
5016bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
5017 SelectInst *SI) {
5018 // Check that both operands of the select are block addresses.
5019 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
5020 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
5021 if (!TBA || !FBA)
5022 return false;
5023
5024 // Extract the actual blocks.
5025 BasicBlock *TrueBB = TBA->getBasicBlock();
5026 BasicBlock *FalseBB = FBA->getBasicBlock();
5027
5028 // Perform the actual simplification.
5029 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
5030 0);
5031}
5032
5033/// This is called when we find an icmp instruction
5034/// (a seteq/setne with a constant) as the only instruction in a
5035/// block that ends with an uncond branch. We are looking for a very specific
5036/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5037/// this case, we merge the first two "or's of icmp" into a switch, but then the
5038/// default value goes to an uncond block with a seteq in it, we get something
5039/// like:
5040///
5041/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5042/// DEFAULT:
5043/// %tmp = icmp eq i8 %A, 92
5044/// br label %end
5045/// end:
5046/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5047///
5048/// We prefer to split the edge to 'end' so that there is a true/false entry to
5049/// the PHI, merging the third icmp into the switch.
5050bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5051 ICmpInst *ICI, IRBuilder<> &Builder) {
5052 BasicBlock *BB = ICI->getParent();
5053
5054 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5055 // complex.
5056 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5057 return false;
5058
5059 Value *V = ICI->getOperand(0);
5060 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5061
5062 // The pattern we're looking for is where our only predecessor is a switch on
5063 // 'V' and this block is the default case for the switch. In this case we can
5064 // fold the compared value into the switch to simplify things.
5065 BasicBlock *Pred = BB->getSinglePredecessor();
5066 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5067 return false;
5068
5069 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5070 if (SI->getCondition() != V)
5071 return false;
5072
5073 // If BB is reachable on a non-default case, then we simply know the value of
5074 // V in this block. Substitute it and constant fold the icmp instruction
5075 // away.
5076 if (SI->getDefaultDest() != BB) {
5077 ConstantInt *VVal = SI->findCaseDest(BB);
5078 assert(VVal && "Should have a unique destination value");
5079 ICI->setOperand(0, VVal);
5080
5081 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5082 ICI->replaceAllUsesWith(V);
5083 ICI->eraseFromParent();
5084 }
5085 // BB is now empty, so it is likely to simplify away.
5086 return requestResimplify();
5087 }
5088
5089 // Ok, the block is reachable from the default dest. If the constant we're
5090 // comparing exists in one of the other edges, then we can constant fold ICI
5091 // and zap it.
5092 if (SI->findCaseValue(Cst) != SI->case_default()) {
5093 Value *V;
5094 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5096 else
5098
5099 ICI->replaceAllUsesWith(V);
5100 ICI->eraseFromParent();
5101 // BB is now empty, so it is likely to simplify away.
5102 return requestResimplify();
5103 }
5104
5105 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5106 // the block.
5107 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5108 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5109 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5111 return false;
5112
5113 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5114 // true in the PHI.
5115 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5116 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5117
5118 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5119 std::swap(DefaultCst, NewCst);
5120
5121 // Replace ICI (which is used by the PHI for the default value) with true or
5122 // false depending on if it is EQ or NE.
5123 ICI->replaceAllUsesWith(DefaultCst);
5124 ICI->eraseFromParent();
5125
5126 SmallVector<DominatorTree::UpdateType, 2> Updates;
5127
5128 // Okay, the switch goes to this block on a default value. Add an edge from
5129 // the switch to the merge point on the compared value.
5130 BasicBlock *NewBB =
5131 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5132 {
5133 SwitchInstProfUpdateWrapper SIW(*SI);
5134 auto W0 = SIW.getSuccessorWeight(0);
5136 if (W0) {
5137 NewW = ((uint64_t(*W0) + 1) >> 1);
5138 SIW.setSuccessorWeight(0, *NewW);
5139 }
5140 SIW.addCase(Cst, NewBB, NewW);
5141 if (DTU)
5142 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5143 }
5144
5145 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5146 Builder.SetInsertPoint(NewBB);
5147 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5148 Builder.CreateBr(SuccBlock);
5149 PHIUse->addIncoming(NewCst, NewBB);
5150 if (DTU) {
5151 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5152 DTU->applyUpdates(Updates);
5153 }
5154 return true;
5155}
5156
5157/// The specified branch is a conditional branch.
5158/// Check to see if it is branching on an or/and chain of icmp instructions, and
5159/// fold it into a switch instruction if so.
5160bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5161 IRBuilder<> &Builder,
5162 const DataLayout &DL) {
5164 if (!Cond)
5165 return false;
5166
5167 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5168 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5169 // 'setne's and'ed together, collect them.
5170
5171 // Try to gather values from a chain of and/or to be turned into a switch
5172 ConstantComparesGatherer ConstantCompare(Cond, DL);
5173 // Unpack the result
5174 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5175 Value *CompVal = ConstantCompare.CompValue;
5176 unsigned UsedICmps = ConstantCompare.UsedICmps;
5177 Value *ExtraCase = ConstantCompare.Extra;
5178 bool TrueWhenEqual = ConstantCompare.IsEq;
5179
5180 // If we didn't have a multiply compared value, fail.
5181 if (!CompVal)
5182 return false;
5183
5184 // Avoid turning single icmps into a switch.
5185 if (UsedICmps <= 1)
5186 return false;
5187
5188 // There might be duplicate constants in the list, which the switch
5189 // instruction can't handle, remove them now.
5190 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5191 Values.erase(llvm::unique(Values), Values.end());
5192
5193 // If Extra was used, we require at least two switch values to do the
5194 // transformation. A switch with one value is just a conditional branch.
5195 if (ExtraCase && Values.size() < 2)
5196 return false;
5197
5198 // TODO: Preserve branch weight metadata, similarly to how
5199 // foldValueComparisonIntoPredecessors preserves it.
5200
5201 // Figure out which block is which destination.
5202 BasicBlock *DefaultBB = BI->getSuccessor(1);
5203 BasicBlock *EdgeBB = BI->getSuccessor(0);
5204 if (!TrueWhenEqual)
5205 std::swap(DefaultBB, EdgeBB);
5206
5207 BasicBlock *BB = BI->getParent();
5208
5209 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5210 << " cases into SWITCH. BB is:\n"
5211 << *BB);
5212
5213 SmallVector<DominatorTree::UpdateType, 2> Updates;
5214
5215 // If there are any extra values that couldn't be folded into the switch
5216 // then we evaluate them with an explicit branch first. Split the block
5217 // right before the condbr to handle it.
5218 if (ExtraCase) {
5219 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5220 /*MSSAU=*/nullptr, "switch.early.test");
5221
5222 // Remove the uncond branch added to the old block.
5223 Instruction *OldTI = BB->getTerminator();
5224 Builder.SetInsertPoint(OldTI);
5225
5226 // There can be an unintended UB if extra values are Poison. Before the
5227 // transformation, extra values may not be evaluated according to the
5228 // condition, and it will not raise UB. But after transformation, we are
5229 // evaluating extra values before checking the condition, and it will raise
5230 // UB. It can be solved by adding freeze instruction to extra values.
5231 AssumptionCache *AC = Options.AC;
5232
5233 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5234 ExtraCase = Builder.CreateFreeze(ExtraCase);
5235
5236 if (TrueWhenEqual)
5237 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5238 else
5239 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5240
5241 OldTI->eraseFromParent();
5242
5243 if (DTU)
5244 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5245
5246 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5247 // for the edge we just added.
5248 addPredecessorToBlock(EdgeBB, BB, NewBB);
5249
5250 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5251 << "\nEXTRABB = " << *BB);
5252 BB = NewBB;
5253 }
5254
5255 Builder.SetInsertPoint(BI);
5256 // Convert pointer to int before we switch.
5257 if (CompVal->getType()->isPointerTy()) {
5258 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5259 "Should not end up here with unstable pointers");
5260 CompVal = Builder.CreatePtrToInt(
5261 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5262 }
5263
5264 // Create the new switch instruction now.
5265 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5266
5267 // Add all of the 'cases' to the switch instruction.
5268 for (ConstantInt *Val : Values)
5269 New->addCase(Val, EdgeBB);
5270
5271 // We added edges from PI to the EdgeBB. As such, if there were any
5272 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5273 // the number of edges added.
5274 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5275 PHINode *PN = cast<PHINode>(BBI);
5276 Value *InVal = PN->getIncomingValueForBlock(BB);
5277 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5278 PN->addIncoming(InVal, BB);
5279 }
5280
5281 // Erase the old branch instruction.
5283 if (DTU)
5284 DTU->applyUpdates(Updates);
5285
5286 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5287 return true;
5288}
5289
5290bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5291 if (isa<PHINode>(RI->getValue()))
5292 return simplifyCommonResume(RI);
5293 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5294 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5295 // The resume must unwind the exception that caused control to branch here.
5296 return simplifySingleResume(RI);
5297
5298 return false;
5299}
5300
5301// Check if cleanup block is empty
5303 for (Instruction &I : R) {
5304 auto *II = dyn_cast<IntrinsicInst>(&I);
5305 if (!II)
5306 return false;
5307
5308 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5309 switch (IntrinsicID) {
5310 case Intrinsic::dbg_declare:
5311 case Intrinsic::dbg_value:
5312 case Intrinsic::dbg_label:
5313 case Intrinsic::lifetime_end:
5314 break;
5315 default:
5316 return false;
5317 }
5318 }
5319 return true;
5320}
5321
5322// Simplify resume that is shared by several landing pads (phi of landing pad).
5323bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5324 BasicBlock *BB = RI->getParent();
5325
5326 // Check that there are no other instructions except for debug and lifetime
5327 // intrinsics between the phi's and resume instruction.
5328 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5329 BB->getTerminator()->getIterator())))
5330 return false;
5331
5332 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5333 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5334
5335 // Check incoming blocks to see if any of them are trivial.
5336 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5337 Idx++) {
5338 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5339 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5340
5341 // If the block has other successors, we can not delete it because
5342 // it has other dependents.
5343 if (IncomingBB->getUniqueSuccessor() != BB)
5344 continue;
5345
5346 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5347 // Not the landing pad that caused the control to branch here.
5348 if (IncomingValue != LandingPad)
5349 continue;
5350
5352 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5353 TrivialUnwindBlocks.insert(IncomingBB);
5354 }
5355
5356 // If no trivial unwind blocks, don't do any simplifications.
5357 if (TrivialUnwindBlocks.empty())
5358 return false;
5359
5360 // Turn all invokes that unwind here into calls.
5361 for (auto *TrivialBB : TrivialUnwindBlocks) {
5362 // Blocks that will be simplified should be removed from the phi node.
5363 // Note there could be multiple edges to the resume block, and we need
5364 // to remove them all.
5365 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5366 BB->removePredecessor(TrivialBB, true);
5367
5368 for (BasicBlock *Pred :
5370 removeUnwindEdge(Pred, DTU);
5371 ++NumInvokes;
5372 }
5373
5374 // In each SimplifyCFG run, only the current processed block can be erased.
5375 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5376 // of erasing TrivialBB, we only remove the branch to the common resume
5377 // block so that we can later erase the resume block since it has no
5378 // predecessors.
5379 TrivialBB->getTerminator()->eraseFromParent();
5380 new UnreachableInst(RI->getContext(), TrivialBB);
5381 if (DTU)
5382 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5383 }
5384
5385 // Delete the resume block if all its predecessors have been removed.
5386 if (pred_empty(BB))
5387 DeleteDeadBlock(BB, DTU);
5388
5389 return !TrivialUnwindBlocks.empty();
5390}
5391
5392// Simplify resume that is only used by a single (non-phi) landing pad.
5393bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5394 BasicBlock *BB = RI->getParent();
5395 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5396 assert(RI->getValue() == LPInst &&
5397 "Resume must unwind the exception that caused control to here");
5398
5399 // Check that there are no other instructions except for debug intrinsics.
5401 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5402 return false;
5403
5404 // Turn all invokes that unwind here into calls and delete the basic block.
5405 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5406 removeUnwindEdge(Pred, DTU);
5407 ++NumInvokes;
5408 }
5409
5410 // The landingpad is now unreachable. Zap it.
5411 DeleteDeadBlock(BB, DTU);
5412 return true;
5413}
5414
5416 // If this is a trivial cleanup pad that executes no instructions, it can be
5417 // eliminated. If the cleanup pad continues to the caller, any predecessor
5418 // that is an EH pad will be updated to continue to the caller and any
5419 // predecessor that terminates with an invoke instruction will have its invoke
5420 // instruction converted to a call instruction. If the cleanup pad being
5421 // simplified does not continue to the caller, each predecessor will be
5422 // updated to continue to the unwind destination of the cleanup pad being
5423 // simplified.
5424 BasicBlock *BB = RI->getParent();
5425 CleanupPadInst *CPInst = RI->getCleanupPad();
5426 if (CPInst->getParent() != BB)
5427 // This isn't an empty cleanup.
5428 return false;
5429
5430 // We cannot kill the pad if it has multiple uses. This typically arises
5431 // from unreachable basic blocks.
5432 if (!CPInst->hasOneUse())
5433 return false;
5434
5435 // Check that there are no other instructions except for benign intrinsics.
5437 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5438 return false;
5439
5440 // If the cleanup return we are simplifying unwinds to the caller, this will
5441 // set UnwindDest to nullptr.
5442 BasicBlock *UnwindDest = RI->getUnwindDest();
5443
5444 // We're about to remove BB from the control flow. Before we do, sink any
5445 // PHINodes into the unwind destination. Doing this before changing the
5446 // control flow avoids some potentially slow checks, since we can currently
5447 // be certain that UnwindDest and BB have no common predecessors (since they
5448 // are both EH pads).
5449 if (UnwindDest) {
5450 // First, go through the PHI nodes in UnwindDest and update any nodes that
5451 // reference the block we are removing
5452 for (PHINode &DestPN : UnwindDest->phis()) {
5453 int Idx = DestPN.getBasicBlockIndex(BB);
5454 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5455 assert(Idx != -1);
5456 // This PHI node has an incoming value that corresponds to a control
5457 // path through the cleanup pad we are removing. If the incoming
5458 // value is in the cleanup pad, it must be a PHINode (because we
5459 // verified above that the block is otherwise empty). Otherwise, the
5460 // value is either a constant or a value that dominates the cleanup
5461 // pad being removed.
5462 //
5463 // Because BB and UnwindDest are both EH pads, all of their
5464 // predecessors must unwind to these blocks, and since no instruction
5465 // can have multiple unwind destinations, there will be no overlap in
5466 // incoming blocks between SrcPN and DestPN.
5467 Value *SrcVal = DestPN.getIncomingValue(Idx);
5468 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5469
5470 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5471 for (auto *Pred : predecessors(BB)) {
5472 Value *Incoming =
5473 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5474 DestPN.addIncoming(Incoming, Pred);
5475 }
5476 }
5477
5478 // Sink any remaining PHI nodes directly into UnwindDest.
5479 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5480 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5481 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5482 // If the PHI node has no uses or all of its uses are in this basic
5483 // block (meaning they are debug or lifetime intrinsics), just leave
5484 // it. It will be erased when we erase BB below.
5485 continue;
5486
5487 // Otherwise, sink this PHI node into UnwindDest.
5488 // Any predecessors to UnwindDest which are not already represented
5489 // must be back edges which inherit the value from the path through
5490 // BB. In this case, the PHI value must reference itself.
5491 for (auto *pred : predecessors(UnwindDest))
5492 if (pred != BB)
5493 PN.addIncoming(&PN, pred);
5494 PN.moveBefore(InsertPt);
5495 // Also, add a dummy incoming value for the original BB itself,
5496 // so that the PHI is well-formed until we drop said predecessor.
5497 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5498 }
5499 }
5500
5501 std::vector<DominatorTree::UpdateType> Updates;
5502
5503 // We use make_early_inc_range here because we will remove all predecessors.
5505 if (UnwindDest == nullptr) {
5506 if (DTU) {
5507 DTU->applyUpdates(Updates);
5508 Updates.clear();
5509 }
5510 removeUnwindEdge(PredBB, DTU);
5511 ++NumInvokes;
5512 } else {
5513 BB->removePredecessor(PredBB);
5514 Instruction *TI = PredBB->getTerminator();
5515 TI->replaceUsesOfWith(BB, UnwindDest);
5516 if (DTU) {
5517 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5518 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5519 }
5520 }
5521 }
5522
5523 if (DTU)
5524 DTU->applyUpdates(Updates);
5525
5526 DeleteDeadBlock(BB, DTU);
5527
5528 return true;
5529}
5530
5531// Try to merge two cleanuppads together.
5533 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5534 // with.
5535 BasicBlock *UnwindDest = RI->getUnwindDest();
5536 if (!UnwindDest)
5537 return false;
5538
5539 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5540 // be safe to merge without code duplication.
5541 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5542 return false;
5543
5544 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5545 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5546 if (!SuccessorCleanupPad)
5547 return false;
5548
5549 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5550 // Replace any uses of the successor cleanupad with the predecessor pad
5551 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5552 // funclet bundle operands.
5553 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5554 // Remove the old cleanuppad.
5555 SuccessorCleanupPad->eraseFromParent();
5556 // Now, we simply replace the cleanupret with a branch to the unwind
5557 // destination.
5558 BranchInst::Create(UnwindDest, RI->getParent());
5559 RI->eraseFromParent();
5560
5561 return true;
5562}
5563
5564bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5565 // It is possible to transiantly have an undef cleanuppad operand because we
5566 // have deleted some, but not all, dead blocks.
5567 // Eventually, this block will be deleted.
5568 if (isa<UndefValue>(RI->getOperand(0)))
5569 return false;
5570
5571 if (mergeCleanupPad(RI))
5572 return true;
5573
5574 if (removeEmptyCleanup(RI, DTU))
5575 return true;
5576
5577 return false;
5578}
5579
5580// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5581bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5582 BasicBlock *BB = UI->getParent();
5583
5584 bool Changed = false;
5585
5586 // Ensure that any debug-info records that used to occur after the Unreachable
5587 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5588 // the block.
5590
5591 // Debug-info records on the unreachable inst itself should be deleted, as
5592 // below we delete everything past the final executable instruction.
5593 UI->dropDbgRecords();
5594
5595 // If there are any instructions immediately before the unreachable that can
5596 // be removed, do so.
5597 while (UI->getIterator() != BB->begin()) {
5599 --BBI;
5600
5602 break; // Can not drop any more instructions. We're done here.
5603 // Otherwise, this instruction can be freely erased,
5604 // even if it is not side-effect free.
5605
5606 // Note that deleting EH's here is in fact okay, although it involves a bit
5607 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5608 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5609 // and we can therefore guarantee this block will be erased.
5610
5611 // If we're deleting this, we're deleting any subsequent debug info, so
5612 // delete DbgRecords.
5613 BBI->dropDbgRecords();
5614
5615 // Delete this instruction (any uses are guaranteed to be dead)
5616 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5617 BBI->eraseFromParent();
5618 Changed = true;
5619 }
5620
5621 // If the unreachable instruction is the first in the block, take a gander
5622 // at all of the predecessors of this instruction, and simplify them.
5623 if (&BB->front() != UI)
5624 return Changed;
5625
5626 std::vector<DominatorTree::UpdateType> Updates;
5627
5628 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5629 for (BasicBlock *Predecessor : Preds) {
5630 Instruction *TI = Predecessor->getTerminator();
5631 IRBuilder<> Builder(TI);
5632 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5633 // We could either have a proper unconditional branch,
5634 // or a degenerate conditional branch with matching destinations.
5635 if (all_of(BI->successors(),
5636 [BB](auto *Successor) { return Successor == BB; })) {
5637 new UnreachableInst(TI->getContext(), TI->getIterator());
5638 TI->eraseFromParent();
5639 Changed = true;
5640 } else {
5641 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5642 Value* Cond = BI->getCondition();
5643 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5644 "The destinations are guaranteed to be different here.");
5645 CallInst *Assumption;
5646 if (BI->getSuccessor(0) == BB) {
5647 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5648 Builder.CreateBr(BI->getSuccessor(1));
5649 } else {
5650 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5651 Assumption = Builder.CreateAssumption(Cond);
5652 Builder.CreateBr(BI->getSuccessor(0));
5653 }
5654 if (Options.AC)
5655 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5656
5658 Changed = true;
5659 }
5660 if (DTU)
5661 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5662 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5663 SwitchInstProfUpdateWrapper SU(*SI);
5664 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5665 if (i->getCaseSuccessor() != BB) {
5666 ++i;
5667 continue;
5668 }
5669 BB->removePredecessor(SU->getParent());
5670 i = SU.removeCase(i);
5671 e = SU->case_end();
5672 Changed = true;
5673 }
5674 // Note that the default destination can't be removed!
5675 if (DTU && SI->getDefaultDest() != BB)
5676 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5677 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5678 if (II->getUnwindDest() == BB) {
5679 if (DTU) {
5680 DTU->applyUpdates(Updates);
5681 Updates.clear();
5682 }
5683 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5684 if (!CI->doesNotThrow())
5685 CI->setDoesNotThrow();
5686 Changed = true;
5687 }
5688 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5689 if (CSI->getUnwindDest() == BB) {
5690 if (DTU) {
5691 DTU->applyUpdates(Updates);
5692 Updates.clear();
5693 }
5694 removeUnwindEdge(TI->getParent(), DTU);
5695 Changed = true;
5696 continue;
5697 }
5698
5699 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5700 E = CSI->handler_end();
5701 I != E; ++I) {
5702 if (*I == BB) {
5703 CSI->removeHandler(I);
5704 --I;
5705 --E;
5706 Changed = true;
5707 }
5708 }
5709 if (DTU)
5710 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5711 if (CSI->getNumHandlers() == 0) {
5712 if (CSI->hasUnwindDest()) {
5713 // Redirect all predecessors of the block containing CatchSwitchInst
5714 // to instead branch to the CatchSwitchInst's unwind destination.
5715 if (DTU) {
5716 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5717 Updates.push_back({DominatorTree::Insert,
5718 PredecessorOfPredecessor,
5719 CSI->getUnwindDest()});
5720 Updates.push_back({DominatorTree::Delete,
5721 PredecessorOfPredecessor, Predecessor});
5722 }
5723 }
5724 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5725 } else {
5726 // Rewrite all preds to unwind to caller (or from invoke to call).
5727 if (DTU) {
5728 DTU->applyUpdates(Updates);
5729 Updates.clear();
5730 }
5731 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5732 for (BasicBlock *EHPred : EHPreds)
5733 removeUnwindEdge(EHPred, DTU);
5734 }
5735 // The catchswitch is no longer reachable.
5736 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5737 CSI->eraseFromParent();
5738 Changed = true;
5739 }
5740 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5741 (void)CRI;
5742 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5743 "Expected to always have an unwind to BB.");
5744 if (DTU)
5745 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5746 new UnreachableInst(TI->getContext(), TI->getIterator());
5747 TI->eraseFromParent();
5748 Changed = true;
5749 }
5750 }
5751
5752 if (DTU)
5753 DTU->applyUpdates(Updates);
5754
5755 // If this block is now dead, remove it.
5756 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5757 DeleteDeadBlock(BB, DTU);
5758 return true;
5759 }
5760
5761 return Changed;
5762}
5763
5765 assert(Cases.size() >= 1);
5766
5768 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5769 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5770 return false;
5771 }
5772 return true;
5773}
5774
5776 DomTreeUpdater *DTU,
5777 bool RemoveOrigDefaultBlock = true) {
5778 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5779 auto *BB = Switch->getParent();
5780 auto *OrigDefaultBlock = Switch->getDefaultDest();
5781 if (RemoveOrigDefaultBlock)
5782 OrigDefaultBlock->removePredecessor(BB);
5783 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5784 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5785 OrigDefaultBlock);
5786 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5788 Switch->setDefaultDest(&*NewDefaultBlock);
5789 if (DTU) {
5791 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5792 if (RemoveOrigDefaultBlock &&
5793 !is_contained(successors(BB), OrigDefaultBlock))
5794 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5795 DTU->applyUpdates(Updates);
5796 }
5797}
5798
5799/// Turn a switch into an integer range comparison and branch.
5800/// Switches with more than 2 destinations are ignored.
5801/// Switches with 1 destination are also ignored.
5802bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5803 IRBuilder<> &Builder) {
5804 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5805
5806 bool HasDefault = !SI->defaultDestUnreachable();
5807
5808 auto *BB = SI->getParent();
5809
5810 // Partition the cases into two sets with different destinations.
5811 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5812 BasicBlock *DestB = nullptr;
5815
5816 for (auto Case : SI->cases()) {
5817 BasicBlock *Dest = Case.getCaseSuccessor();
5818 if (!DestA)
5819 DestA = Dest;
5820 if (Dest == DestA) {
5821 CasesA.push_back(Case.getCaseValue());
5822 continue;
5823 }
5824 if (!DestB)
5825 DestB = Dest;
5826 if (Dest == DestB) {
5827 CasesB.push_back(Case.getCaseValue());
5828 continue;
5829 }
5830 return false; // More than two destinations.
5831 }
5832 if (!DestB)
5833 return false; // All destinations are the same and the default is unreachable
5834
5835 assert(DestA && DestB &&
5836 "Single-destination switch should have been folded.");
5837 assert(DestA != DestB);
5838 assert(DestB != SI->getDefaultDest());
5839 assert(!CasesB.empty() && "There must be non-default cases.");
5840 assert(!CasesA.empty() || HasDefault);
5841
5842 // Figure out if one of the sets of cases form a contiguous range.
5843 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5844 BasicBlock *ContiguousDest = nullptr;
5845 BasicBlock *OtherDest = nullptr;
5846 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5847 ContiguousCases = &CasesA;
5848 ContiguousDest = DestA;
5849 OtherDest = DestB;
5850 } else if (casesAreContiguous(CasesB)) {
5851 ContiguousCases = &CasesB;
5852 ContiguousDest = DestB;
5853 OtherDest = DestA;
5854 } else
5855 return false;
5856
5857 // Start building the compare and branch.
5858
5859 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5860 Constant *NumCases =
5861 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5862
5863 Value *Sub = SI->getCondition();
5864 if (!Offset->isNullValue())
5865 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5866
5867 Value *Cmp;
5868 // If NumCases overflowed, then all possible values jump to the successor.
5869 if (NumCases->isNullValue() && !ContiguousCases->empty())
5870 Cmp = ConstantInt::getTrue(SI->getContext());
5871 else
5872 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5873 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5874
5875 // Update weight for the newly-created conditional branch.
5876 if (hasBranchWeightMD(*SI)) {
5877 SmallVector<uint64_t, 8> Weights;
5878 getBranchWeights(SI, Weights);
5879 if (Weights.size() == 1 + SI->getNumCases()) {
5880 uint64_t TrueWeight = 0;
5881 uint64_t FalseWeight = 0;
5882 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5883 if (SI->getSuccessor(I) == ContiguousDest)
5884 TrueWeight += Weights[I];
5885 else
5886 FalseWeight += Weights[I];
5887 }
5888 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5889 TrueWeight /= 2;
5890 FalseWeight /= 2;
5891 }
5892 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5893 }
5894 }
5895
5896 // Prune obsolete incoming values off the successors' PHI nodes.
5897 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5898 unsigned PreviousEdges = ContiguousCases->size();
5899 if (ContiguousDest == SI->getDefaultDest())
5900 ++PreviousEdges;
5901 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5902 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5903 }
5904 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5905 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5906 if (OtherDest == SI->getDefaultDest())
5907 ++PreviousEdges;
5908 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5909 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5910 }
5911
5912 // Clean up the default block - it may have phis or other instructions before
5913 // the unreachable terminator.
5914 if (!HasDefault)
5916
5917 auto *UnreachableDefault = SI->getDefaultDest();
5918
5919 // Drop the switch.
5920 SI->eraseFromParent();
5921
5922 if (!HasDefault && DTU)
5923 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5924
5925 return true;
5926}
5927
5928/// Compute masked bits for the condition of a switch
5929/// and use it to remove dead cases.
5931 AssumptionCache *AC,
5932 const DataLayout &DL) {
5933 Value *Cond = SI->getCondition();
5934 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
5935
5936 // We can also eliminate cases by determining that their values are outside of
5937 // the limited range of the condition based on how many significant (non-sign)
5938 // bits are in the condition value.
5939 unsigned MaxSignificantBitsInCond =
5941
5942 // Gather dead cases.
5944 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5945 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5946 for (const auto &Case : SI->cases()) {
5947 auto *Successor = Case.getCaseSuccessor();
5948 if (DTU) {
5949 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
5950 if (Inserted)
5951 UniqueSuccessors.push_back(Successor);
5952 ++It->second;
5953 }
5954 const APInt &CaseVal = Case.getCaseValue()->getValue();
5955 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5956 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5957 DeadCases.push_back(Case.getCaseValue());
5958 if (DTU)
5959 --NumPerSuccessorCases[Successor];
5960 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5961 << " is dead.\n");
5962 }
5963 }
5964
5965 // If we can prove that the cases must cover all possible values, the
5966 // default destination becomes dead and we can remove it. If we know some
5967 // of the bits in the value, we can use that to more precisely compute the
5968 // number of possible unique case values.
5969 bool HasDefault = !SI->defaultDestUnreachable();
5970 const unsigned NumUnknownBits =
5971 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5972 assert(NumUnknownBits <= Known.getBitWidth());
5973 if (HasDefault && DeadCases.empty() &&
5974 NumUnknownBits < 64 /* avoid overflow */) {
5975 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5976 if (SI->getNumCases() == AllNumCases) {
5978 return true;
5979 }
5980 // When only one case value is missing, replace default with that case.
5981 // Eliminating the default branch will provide more opportunities for
5982 // optimization, such as lookup tables.
5983 if (SI->getNumCases() == AllNumCases - 1) {
5984 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5985 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5986 if (CondTy->getIntegerBitWidth() > 64 ||
5987 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5988 return false;
5989
5990 uint64_t MissingCaseVal = 0;
5991 for (const auto &Case : SI->cases())
5992 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5993 auto *MissingCase =
5994 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5996 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5997 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5998 SIW.setSuccessorWeight(0, 0);
5999 return true;
6000 }
6001 }
6002
6003 if (DeadCases.empty())
6004 return false;
6005
6007 for (ConstantInt *DeadCase : DeadCases) {
6008 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6009 assert(CaseI != SI->case_default() &&
6010 "Case was not found. Probably mistake in DeadCases forming.");
6011 // Prune unused values from PHI nodes.
6012 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6013 SIW.removeCase(CaseI);
6014 }
6015
6016 if (DTU) {
6017 std::vector<DominatorTree::UpdateType> Updates;
6018 for (auto *Successor : UniqueSuccessors)
6019 if (NumPerSuccessorCases[Successor] == 0)
6020 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6021 DTU->applyUpdates(Updates);
6022 }
6023
6024 return true;
6025}
6026
6027/// If BB would be eligible for simplification by
6028/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6029/// by an unconditional branch), look at the phi node for BB in the successor
6030/// block and see if the incoming value is equal to CaseValue. If so, return
6031/// the phi node, and set PhiIndex to BB's index in the phi node.
6033 BasicBlock *BB, int *PhiIndex) {
6034 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6035 return nullptr; // BB must be empty to be a candidate for simplification.
6036 if (!BB->getSinglePredecessor())
6037 return nullptr; // BB must be dominated by the switch.
6038
6040 if (!Branch || !Branch->isUnconditional())
6041 return nullptr; // Terminator must be unconditional branch.
6042
6043 BasicBlock *Succ = Branch->getSuccessor(0);
6044
6045 for (PHINode &PHI : Succ->phis()) {
6046 int Idx = PHI.getBasicBlockIndex(BB);
6047 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6048
6049 Value *InValue = PHI.getIncomingValue(Idx);
6050 if (InValue != CaseValue)
6051 continue;
6052
6053 *PhiIndex = Idx;
6054 return &PHI;
6055 }
6056
6057 return nullptr;
6058}
6059
6060/// Try to forward the condition of a switch instruction to a phi node
6061/// dominated by the switch, if that would mean that some of the destination
6062/// blocks of the switch can be folded away. Return true if a change is made.
6064 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6065
6066 ForwardingNodesMap ForwardingNodes;
6067 BasicBlock *SwitchBlock = SI->getParent();
6068 bool Changed = false;
6069 for (const auto &Case : SI->cases()) {
6070 ConstantInt *CaseValue = Case.getCaseValue();
6071 BasicBlock *CaseDest = Case.getCaseSuccessor();
6072
6073 // Replace phi operands in successor blocks that are using the constant case
6074 // value rather than the switch condition variable:
6075 // switchbb:
6076 // switch i32 %x, label %default [
6077 // i32 17, label %succ
6078 // ...
6079 // succ:
6080 // %r = phi i32 ... [ 17, %switchbb ] ...
6081 // -->
6082 // %r = phi i32 ... [ %x, %switchbb ] ...
6083
6084 for (PHINode &Phi : CaseDest->phis()) {
6085 // This only works if there is exactly 1 incoming edge from the switch to
6086 // a phi. If there is >1, that means multiple cases of the switch map to 1
6087 // value in the phi, and that phi value is not the switch condition. Thus,
6088 // this transform would not make sense (the phi would be invalid because
6089 // a phi can't have different incoming values from the same block).
6090 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6091 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6092 count(Phi.blocks(), SwitchBlock) == 1) {
6093 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6094 Changed = true;
6095 }
6096 }
6097
6098 // Collect phi nodes that are indirectly using this switch's case constants.
6099 int PhiIdx;
6100 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6101 ForwardingNodes[Phi].push_back(PhiIdx);
6102 }
6103
6104 for (auto &ForwardingNode : ForwardingNodes) {
6105 PHINode *Phi = ForwardingNode.first;
6106 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6107 // Check if it helps to fold PHI.
6108 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6109 continue;
6110
6111 for (int Index : Indexes)
6112 Phi->setIncomingValue(Index, SI->getCondition());
6113 Changed = true;
6114 }
6115
6116 return Changed;
6117}
6118
6119/// Return true if the backend will be able to handle
6120/// initializing an array of constants like C.
6122 if (C->isThreadDependent())
6123 return false;
6124 if (C->isDLLImportDependent())
6125 return false;
6126
6127 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6130 return false;
6131
6133 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6134 // materializing the array of constants.
6135 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6136 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6137 return false;
6138 }
6139
6140 if (!TTI.shouldBuildLookupTablesForConstant(C))
6141 return false;
6142
6143 return true;
6144}
6145
6146/// If V is a Constant, return it. Otherwise, try to look up
6147/// its constant value in ConstantPool, returning 0 if it's not there.
6148static Constant *
6151 if (Constant *C = dyn_cast<Constant>(V))
6152 return C;
6153 return ConstantPool.lookup(V);
6154}
6155
6156/// Try to fold instruction I into a constant. This works for
6157/// simple instructions such as binary operations where both operands are
6158/// constant or can be replaced by constants from the ConstantPool. Returns the
6159/// resulting constant on success, 0 otherwise.
6160static Constant *
6164 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6165 if (!A)
6166 return nullptr;
6167 if (A->isAllOnesValue())
6168 return lookupConstant(Select->getTrueValue(), ConstantPool);
6169 if (A->isNullValue())
6170 return lookupConstant(Select->getFalseValue(), ConstantPool);
6171 return nullptr;
6172 }
6173
6175 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6176 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6177 COps.push_back(A);
6178 else
6179 return nullptr;
6180 }
6181
6182 return ConstantFoldInstOperands(I, COps, DL);
6183}
6184
6185/// Try to determine the resulting constant values in phi nodes
6186/// at the common destination basic block, *CommonDest, for one of the case
6187/// destionations CaseDest corresponding to value CaseVal (0 for the default
6188/// case), of a switch instruction SI.
6189static bool
6191 BasicBlock **CommonDest,
6192 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6193 const DataLayout &DL, const TargetTransformInfo &TTI) {
6194 // The block from which we enter the common destination.
6195 BasicBlock *Pred = SI->getParent();
6196
6197 // If CaseDest is empty except for some side-effect free instructions through
6198 // which we can constant-propagate the CaseVal, continue to its successor.
6200 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6201 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6202 if (I.isTerminator()) {
6203 // If the terminator is a simple branch, continue to the next block.
6204 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6205 return false;
6206 Pred = CaseDest;
6207 CaseDest = I.getSuccessor(0);
6208 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6209 // Instruction is side-effect free and constant.
6210
6211 // If the instruction has uses outside this block or a phi node slot for
6212 // the block, it is not safe to bypass the instruction since it would then
6213 // no longer dominate all its uses.
6214 for (auto &Use : I.uses()) {
6215 User *User = Use.getUser();
6217 if (I->getParent() == CaseDest)
6218 continue;
6219 if (PHINode *Phi = dyn_cast<PHINode>(User))
6220 if (Phi->getIncomingBlock(Use) == CaseDest)
6221 continue;
6222 return false;
6223 }
6224
6225 ConstantPool.insert(std::make_pair(&I, C));
6226 } else {
6227 break;
6228 }
6229 }
6230
6231 // If we did not have a CommonDest before, use the current one.
6232 if (!*CommonDest)
6233 *CommonDest = CaseDest;
6234 // If the destination isn't the common one, abort.
6235 if (CaseDest != *CommonDest)
6236 return false;
6237
6238 // Get the values for this case from phi nodes in the destination block.
6239 for (PHINode &PHI : (*CommonDest)->phis()) {
6240 int Idx = PHI.getBasicBlockIndex(Pred);
6241 if (Idx == -1)
6242 continue;
6243
6244 Constant *ConstVal =
6245 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6246 if (!ConstVal)
6247 return false;
6248
6249 // Be conservative about which kinds of constants we support.
6250 if (!validLookupTableConstant(ConstVal, TTI))
6251 return false;
6252
6253 Res.push_back(std::make_pair(&PHI, ConstVal));
6254 }
6255
6256 return Res.size() > 0;
6257}
6258
6259// Helper function used to add CaseVal to the list of cases that generate
6260// Result. Returns the updated number of cases that generate this result.
6261static size_t mapCaseToResult(ConstantInt *CaseVal,
6262 SwitchCaseResultVectorTy &UniqueResults,
6263 Constant *Result) {
6264 for (auto &I : UniqueResults) {
6265 if (I.first == Result) {
6266 I.second.push_back(CaseVal);
6267 return I.second.size();
6268 }
6269 }
6270 UniqueResults.push_back(
6271 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6272 return 1;
6273}
6274
6275// Helper function that initializes a map containing
6276// results for the PHI node of the common destination block for a switch
6277// instruction. Returns false if multiple PHI nodes have been found or if
6278// there is not a common destination block for the switch.
6280 BasicBlock *&CommonDest,
6281 SwitchCaseResultVectorTy &UniqueResults,
6282 Constant *&DefaultResult,
6283 const DataLayout &DL,
6284 const TargetTransformInfo &TTI,
6285 uintptr_t MaxUniqueResults) {
6286 for (const auto &I : SI->cases()) {
6287 ConstantInt *CaseVal = I.getCaseValue();
6288
6289 // Resulting value at phi nodes for this case value.
6290 SwitchCaseResultsTy Results;
6291 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6292 DL, TTI))
6293 return false;
6294
6295 // Only one value per case is permitted.
6296 if (Results.size() > 1)
6297 return false;
6298
6299 // Add the case->result mapping to UniqueResults.
6300 const size_t NumCasesForResult =
6301 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6302
6303 // Early out if there are too many cases for this result.
6304 if (NumCasesForResult > MaxSwitchCasesPerResult)
6305 return false;
6306
6307 // Early out if there are too many unique results.
6308 if (UniqueResults.size() > MaxUniqueResults)
6309 return false;
6310
6311 // Check the PHI consistency.
6312 if (!PHI)
6313 PHI = Results[0].first;
6314 else if (PHI != Results[0].first)
6315 return false;
6316 }
6317 // Find the default result value.
6319 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6320 DL, TTI);
6321 // If the default value is not found abort unless the default destination
6322 // is unreachable.
6323 DefaultResult =
6324 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6325
6326 return DefaultResult || SI->defaultDestUnreachable();
6327}
6328
6329// Helper function that checks if it is possible to transform a switch with only
6330// two cases (or two cases + default) that produces a result into a select.
6331// TODO: Handle switches with more than 2 cases that map to the same result.
6332static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6333 Constant *DefaultResult, Value *Condition,
6334 IRBuilder<> &Builder, const DataLayout &DL) {
6335 // If we are selecting between only two cases transform into a simple
6336 // select or a two-way select if default is possible.
6337 // Example:
6338 // switch (a) { %0 = icmp eq i32 %a, 10
6339 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6340 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6341 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6342 // }
6343 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6344 ResultVector[1].second.size() == 1) {
6345 ConstantInt *FirstCase = ResultVector[0].second[0];
6346 ConstantInt *SecondCase = ResultVector[1].second[0];
6347 Value *SelectValue = ResultVector[1].first;
6348 if (DefaultResult) {
6349 Value *ValueCompare =
6350 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6351 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6352 DefaultResult, "switch.select");
6353 }
6354 Value *ValueCompare =
6355 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6356 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6357 SelectValue, "switch.select");
6358 }
6359
6360 // Handle the degenerate case where two cases have the same result value.
6361 if (ResultVector.size() == 1 && DefaultResult) {
6362 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6363 unsigned CaseCount = CaseValues.size();
6364 // n bits group cases map to the same result:
6365 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6366 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6367 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6368 if (isPowerOf2_32(CaseCount)) {
6369 ConstantInt *MinCaseVal = CaseValues[0];
6370 // If there are bits that are set exclusively by CaseValues, we
6371 // can transform the switch into a select if the conjunction of
6372 // all the values uniquely identify CaseValues.
6373 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6374
6375 // Find the minimum value and compute the and of all the case values.
6376 for (auto *Case : CaseValues) {
6377 if (Case->getValue().slt(MinCaseVal->getValue()))
6378 MinCaseVal = Case;
6379 AndMask &= Case->getValue();
6380 }
6381 KnownBits Known = computeKnownBits(Condition, DL);
6382
6383 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6384 // Compute the number of bits that are free to vary.
6385 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6386
6387 // Check if the number of values covered by the mask is equal
6388 // to the number of cases.
6389 if (FreeBits == Log2_32(CaseCount)) {
6390 Value *And = Builder.CreateAnd(Condition, AndMask);
6391 Value *Cmp = Builder.CreateICmpEQ(
6392 And, Constant::getIntegerValue(And->getType(), AndMask));
6393 return Builder.CreateSelect(Cmp, ResultVector[0].first,
6394 DefaultResult);
6395 }
6396 }
6397
6398 // Mark the bits case number touched.
6399 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6400 for (auto *Case : CaseValues)
6401 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6402
6403 // Check if cases with the same result can cover all number
6404 // in touched bits.
6405 if (BitMask.popcount() == Log2_32(CaseCount)) {
6406 if (!MinCaseVal->isNullValue())
6407 Condition = Builder.CreateSub(Condition, MinCaseVal);
6408 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6409 Value *Cmp = Builder.CreateICmpEQ(
6410 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6411 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6412 }
6413 }
6414
6415 // Handle the degenerate case where two cases have the same value.
6416 if (CaseValues.size() == 2) {
6417 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6418 "switch.selectcmp.case1");
6419 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6420 "switch.selectcmp.case2");
6421 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6422 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6423 }
6424 }
6425
6426 return nullptr;
6427}
6428
6429// Helper function to cleanup a switch instruction that has been converted into
6430// a select, fixing up PHI nodes and basic blocks.
6432 Value *SelectValue,
6433 IRBuilder<> &Builder,
6434 DomTreeUpdater *DTU) {
6435 std::vector<DominatorTree::UpdateType> Updates;
6436
6437 BasicBlock *SelectBB = SI->getParent();
6438 BasicBlock *DestBB = PHI->getParent();
6439
6440 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6441 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6442 Builder.CreateBr(DestBB);
6443
6444 // Remove the switch.
6445
6446 PHI->removeIncomingValueIf(
6447 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6448 PHI->addIncoming(SelectValue, SelectBB);
6449
6450 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6451 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6452 BasicBlock *Succ = SI->getSuccessor(i);
6453
6454 if (Succ == DestBB)
6455 continue;
6456 Succ->removePredecessor(SelectBB);
6457 if (DTU && RemovedSuccessors.insert(Succ).second)
6458 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6459 }
6460 SI->eraseFromParent();
6461 if (DTU)
6462 DTU->applyUpdates(Updates);
6463}
6464
6465/// If a switch is only used to initialize one or more phi nodes in a common
6466/// successor block with only two different constant values, try to replace the
6467/// switch with a select. Returns true if the fold was made.
6469 DomTreeUpdater *DTU, const DataLayout &DL,
6470 const TargetTransformInfo &TTI) {
6471 Value *const Cond = SI->getCondition();
6472 PHINode *PHI = nullptr;
6473 BasicBlock *CommonDest = nullptr;
6474 Constant *DefaultResult;
6475 SwitchCaseResultVectorTy UniqueResults;
6476 // Collect all the cases that will deliver the same value from the switch.
6477 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6478 DL, TTI, /*MaxUniqueResults*/ 2))
6479 return false;
6480
6481 assert(PHI != nullptr && "PHI for value select not found");
6482 Builder.SetInsertPoint(SI);
6483 Value *SelectValue =
6484 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder, DL);
6485 if (!SelectValue)
6486 return false;
6487
6488 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6489 return true;
6490}
6491
6492namespace {
6493
6494/// This class finds alternatives for switches to ultimately
6495/// replace the switch.
6496class SwitchReplacement {
6497public:
6498 /// Create a helper for optimizations to use as a switch replacement.
6499 /// Find a better representation for the content of Values,
6500 /// using DefaultValue to fill any holes in the table.
6501 SwitchReplacement(
6502 Module &M, uint64_t TableSize, ConstantInt *Offset,
6503 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6504 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6505
6506 /// Build instructions with Builder to retrieve values using Index
6507 /// and replace the switch.
6508 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6509 Function *Func);
6510
6511 /// Return true if a table with TableSize elements of
6512 /// type ElementType would fit in a target-legal register.
6513 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6514 Type *ElementType);
6515
6516 /// Return the default value of the switch.
6517 Constant *getDefaultValue();
6518
6519 /// Return true if the replacement is a lookup table.
6520 bool isLookupTable();
6521
6522private:
6523 // Depending on the switch, there are different alternatives.
6524 enum {
6525 // For switches where each case contains the same value, we just have to
6526 // store that single value and return it for each lookup.
6527 SingleValueKind,
6528
6529 // For switches where there is a linear relationship between table index
6530 // and values. We calculate the result with a simple multiplication
6531 // and addition instead of a table lookup.
6532 LinearMapKind,
6533
6534 // For small tables with integer elements, we can pack them into a bitmap
6535 // that fits into a target-legal register. Values are retrieved by
6536 // shift and mask operations.
6537 BitMapKind,
6538
6539 // The table is stored as an array of values. Values are retrieved by load
6540 // instructions from the table.
6541 LookupTableKind
6542 } Kind;
6543
6544 // The default value of the switch.
6545 Constant *DefaultValue;
6546
6547 // The type of the output values.
6548 Type *ValueType;
6549
6550 // For SingleValueKind, this is the single value.
6551 Constant *SingleValue = nullptr;
6552
6553 // For BitMapKind, this is the bitmap.
6554 ConstantInt *BitMap = nullptr;
6555 IntegerType *BitMapElementTy = nullptr;
6556
6557 // For LinearMapKind, these are the constants used to derive the value.
6558 ConstantInt *LinearOffset = nullptr;
6559 ConstantInt *LinearMultiplier = nullptr;
6560 bool LinearMapValWrapped = false;
6561
6562 // For LookupTableKind, this is the table.
6563 Constant *Initializer = nullptr;
6564};
6565
6566} // end anonymous namespace
6567
6568SwitchReplacement::SwitchReplacement(
6569 Module &M, uint64_t TableSize, ConstantInt *Offset,
6570 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6571 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6572 : DefaultValue(DefaultValue) {
6573 assert(Values.size() && "Can't build lookup table without values!");
6574 assert(TableSize >= Values.size() && "Can't fit values in table!");
6575
6576 // If all values in the table are equal, this is that value.
6577 SingleValue = Values.begin()->second;
6578
6579 ValueType = Values.begin()->second->getType();
6580
6581 // Build up the table contents.
6582 SmallVector<Constant *, 64> TableContents(TableSize);
6583 for (const auto &[CaseVal, CaseRes] : Values) {
6584 assert(CaseRes->getType() == ValueType);
6585
6586 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6587 TableContents[Idx] = CaseRes;
6588
6589 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6590 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6591 }
6592
6593 // Fill in any holes in the table with the default result.
6594 if (Values.size() < TableSize) {
6595 assert(DefaultValue &&
6596 "Need a default value to fill the lookup table holes.");
6597 assert(DefaultValue->getType() == ValueType);
6598 for (uint64_t I = 0; I < TableSize; ++I) {
6599 if (!TableContents[I])
6600 TableContents[I] = DefaultValue;
6601 }
6602
6603 // If the default value is poison, all the holes are poison.
6604 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6605
6606 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6607 SingleValue = nullptr;
6608 }
6609
6610 // If each element in the table contains the same value, we only need to store
6611 // that single value.
6612 if (SingleValue) {
6613 Kind = SingleValueKind;
6614 return;
6615 }
6616
6617 // Check if we can derive the value with a linear transformation from the
6618 // table index.
6620 bool LinearMappingPossible = true;
6621 APInt PrevVal;
6622 APInt DistToPrev;
6623 // When linear map is monotonic and signed overflow doesn't happen on
6624 // maximum index, we can attach nsw on Add and Mul.
6625 bool NonMonotonic = false;
6626 assert(TableSize >= 2 && "Should be a SingleValue table.");
6627 // Check if there is the same distance between two consecutive values.
6628 for (uint64_t I = 0; I < TableSize; ++I) {
6629 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6630
6631 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6632 // This is an poison, so it's (probably) a lookup table hole.
6633 // To prevent any regressions from before we switched to using poison as
6634 // the default value, holes will fall back to using the first value.
6635 // This can be removed once we add proper handling for poisons in lookup
6636 // tables.
6637 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6638 }
6639
6640 if (!ConstVal) {
6641 // This is an undef. We could deal with it, but undefs in lookup tables
6642 // are very seldom. It's probably not worth the additional complexity.
6643 LinearMappingPossible = false;
6644 break;
6645 }
6646 const APInt &Val = ConstVal->getValue();
6647 if (I != 0) {
6648 APInt Dist = Val - PrevVal;
6649 if (I == 1) {
6650 DistToPrev = Dist;
6651 } else if (Dist != DistToPrev) {
6652 LinearMappingPossible = false;
6653 break;
6654 }
6655 NonMonotonic |=
6656 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6657 }
6658 PrevVal = Val;
6659 }
6660 if (LinearMappingPossible) {
6661 LinearOffset = cast<ConstantInt>(TableContents[0]);
6662 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6663 APInt M = LinearMultiplier->getValue();
6664 bool MayWrap = true;
6665 if (isIntN(M.getBitWidth(), TableSize - 1))
6666 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6667 LinearMapValWrapped = NonMonotonic || MayWrap;
6668 Kind = LinearMapKind;
6669 return;
6670 }
6671 }
6672
6673 // If the type is integer and the table fits in a register, build a bitmap.
6674 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6676 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6677 for (uint64_t I = TableSize; I > 0; --I) {
6678 TableInt <<= IT->getBitWidth();
6679 // Insert values into the bitmap. Undef values are set to zero.
6680 if (!isa<UndefValue>(TableContents[I - 1])) {
6681 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6682 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6683 }
6684 }
6685 BitMap = ConstantInt::get(M.getContext(), TableInt);
6686 BitMapElementTy = IT;
6687 Kind = BitMapKind;
6688 return;
6689 }
6690
6691 // Store the table in an array.
6692 auto *TableTy = ArrayType::get(ValueType, TableSize);
6693 Initializer = ConstantArray::get(TableTy, TableContents);
6694
6695 Kind = LookupTableKind;
6696}
6697
6698Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6699 const DataLayout &DL, Function *Func) {
6700 switch (Kind) {
6701 case SingleValueKind:
6702 return SingleValue;
6703 case LinearMapKind: {
6704 ++NumLinearMaps;
6705 // Derive the result value from the input value.
6706 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6707 false, "switch.idx.cast");
6708 if (!LinearMultiplier->isOne())
6709 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6710 /*HasNUW = */ false,
6711 /*HasNSW = */ !LinearMapValWrapped);
6712
6713 if (!LinearOffset->isZero())
6714 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6715 /*HasNUW = */ false,
6716 /*HasNSW = */ !LinearMapValWrapped);
6717 return Result;
6718 }
6719 case BitMapKind: {
6720 ++NumBitMaps;
6721 // Type of the bitmap (e.g. i59).
6722 IntegerType *MapTy = BitMap->getIntegerType();
6723
6724 // Cast Index to the same type as the bitmap.
6725 // Note: The Index is <= the number of elements in the table, so
6726 // truncating it to the width of the bitmask is safe.
6727 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6728
6729 // Multiply the shift amount by the element width. NUW/NSW can always be
6730 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6731 // BitMap's bit width.
6732 ShiftAmt = Builder.CreateMul(
6733 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6734 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6735
6736 // Shift down.
6737 Value *DownShifted =
6738 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6739 // Mask off.
6740 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6741 }
6742 case LookupTableKind: {
6743 ++NumLookupTables;
6744 auto *Table =
6745 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6746 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6747 Initializer, "switch.table." + Func->getName());
6748 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6749 // Set the alignment to that of an array items. We will be only loading one
6750 // value out of it.
6751 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
6752 Type *IndexTy = DL.getIndexType(Table->getType());
6753 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
6754
6755 if (Index->getType() != IndexTy) {
6756 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6757 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6758 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6759 Zext->setNonNeg(
6760 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6761 }
6762
6763 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6764 Value *GEP =
6765 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
6766 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6767 }
6768 }
6769 llvm_unreachable("Unknown helper kind!");
6770}
6771
6772bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6773 uint64_t TableSize,
6774 Type *ElementType) {
6775 auto *IT = dyn_cast<IntegerType>(ElementType);
6776 if (!IT)
6777 return false;
6778 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6779 // are <= 15, we could try to narrow the type.
6780
6781 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6782 if (TableSize >= UINT_MAX / IT->getBitWidth())
6783 return false;
6784 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6785}
6786
6788 const DataLayout &DL) {
6789 // Allow any legal type.
6790 if (TTI.isTypeLegal(Ty))
6791 return true;
6792
6793 auto *IT = dyn_cast<IntegerType>(Ty);
6794 if (!IT)
6795 return false;
6796
6797 // Also allow power of 2 integer types that have at least 8 bits and fit in
6798 // a register. These types are common in frontend languages and targets
6799 // usually support loads of these types.
6800 // TODO: We could relax this to any integer that fits in a register and rely
6801 // on ABI alignment and padding in the table to allow the load to be widened.
6802 // Or we could widen the constants and truncate the load.
6803 unsigned BitWidth = IT->getBitWidth();
6804 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6805 DL.fitsInLegalInteger(IT->getBitWidth());
6806}
6807
6808Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
6809
6810bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
6811
6812static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6813 // 40% is the default density for building a jump table in optsize/minsize
6814 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6815 // function was based on.
6816 const uint64_t MinDensity = 40;
6817
6818 if (CaseRange >= UINT64_MAX / 100)
6819 return false; // Avoid multiplication overflows below.
6820
6821 return NumCases * 100 >= CaseRange * MinDensity;
6822}
6823
6825 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6826 uint64_t Range = Diff + 1;
6827 if (Range < Diff)
6828 return false; // Overflow.
6829
6830 return isSwitchDense(Values.size(), Range);
6831}
6832
6833/// Determine whether a lookup table should be built for this switch, based on
6834/// the number of cases, size of the table, and the types of the results.
6835// TODO: We could support larger than legal types by limiting based on the
6836// number of loads required and/or table size. If the constants are small we
6837// could use smaller table entries and extend after the load.
6839 const TargetTransformInfo &TTI,
6840 const DataLayout &DL,
6841 const SmallVector<Type *> &ResultTypes) {
6842 if (SI->getNumCases() > TableSize)
6843 return false; // TableSize overflowed.
6844
6845 bool AllTablesFitInRegister = true;
6846 bool HasIllegalType = false;
6847 for (const auto &Ty : ResultTypes) {
6848 // Saturate this flag to true.
6849 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6850
6851 // Saturate this flag to false.
6852 AllTablesFitInRegister =
6853 AllTablesFitInRegister &&
6854 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
6855
6856 // If both flags saturate, we're done. NOTE: This *only* works with
6857 // saturating flags, and all flags have to saturate first due to the
6858 // non-deterministic behavior of iterating over a dense map.
6859 if (HasIllegalType && !AllTablesFitInRegister)
6860 break;
6861 }
6862
6863 // If each table would fit in a register, we should build it anyway.
6864 if (AllTablesFitInRegister)
6865 return true;
6866
6867 // Don't build a table that doesn't fit in-register if it has illegal types.
6868 if (HasIllegalType)
6869 return false;
6870
6871 return isSwitchDense(SI->getNumCases(), TableSize);
6872}
6873
6875 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6876 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
6877 const DataLayout &DL, const TargetTransformInfo &TTI) {
6878 if (MinCaseVal.isNullValue())
6879 return true;
6880 if (MinCaseVal.isNegative() ||
6881 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6882 !HasDefaultResults)
6883 return false;
6884 return all_of(ResultTypes, [&](const auto &ResultType) {
6885 return SwitchReplacement::wouldFitInRegister(
6886 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
6887 });
6888}
6889
6890/// Try to reuse the switch table index compare. Following pattern:
6891/// \code
6892/// if (idx < tablesize)
6893/// r = table[idx]; // table does not contain default_value
6894/// else
6895/// r = default_value;
6896/// if (r != default_value)
6897/// ...
6898/// \endcode
6899/// Is optimized to:
6900/// \code
6901/// cond = idx < tablesize;
6902/// if (cond)
6903/// r = table[idx];
6904/// else
6905/// r = default_value;
6906/// if (cond)
6907/// ...
6908/// \endcode
6909/// Jump threading will then eliminate the second if(cond).
6911 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6912 Constant *DefaultValue,
6913 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6915 if (!CmpInst)
6916 return;
6917
6918 // We require that the compare is in the same block as the phi so that jump
6919 // threading can do its work afterwards.
6920 if (CmpInst->getParent() != PhiBlock)
6921 return;
6922
6924 if (!CmpOp1)
6925 return;
6926
6927 Value *RangeCmp = RangeCheckBranch->getCondition();
6928 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6929 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6930
6931 // Check if the compare with the default value is constant true or false.
6932 const DataLayout &DL = PhiBlock->getDataLayout();
6934 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6935 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6936 return;
6937
6938 // Check if the compare with the case values is distinct from the default
6939 // compare result.
6940 for (auto ValuePair : Values) {
6942 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6943 if (!CaseConst || CaseConst == DefaultConst ||
6944 (CaseConst != TrueConst && CaseConst != FalseConst))
6945 return;
6946 }
6947
6948 // Check if the branch instruction dominates the phi node. It's a simple
6949 // dominance check, but sufficient for our needs.
6950 // Although this check is invariant in the calling loops, it's better to do it
6951 // at this late stage. Practically we do it at most once for a switch.
6952 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6953 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6954 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6955 return;
6956 }
6957
6958 if (DefaultConst == FalseConst) {
6959 // The compare yields the same result. We can replace it.
6960 CmpInst->replaceAllUsesWith(RangeCmp);
6961 ++NumTableCmpReuses;
6962 } else {
6963 // The compare yields the same result, just inverted. We can replace it.
6964 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6965 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6966 RangeCheckBranch->getIterator());
6967 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6968 ++NumTableCmpReuses;
6969 }
6970}
6971
6972/// If the switch is only used to initialize one or more phi nodes in a common
6973/// successor block with different constant values, replace the switch with
6974/// lookup tables.
6976 DomTreeUpdater *DTU, const DataLayout &DL,
6977 const TargetTransformInfo &TTI) {
6978 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6979
6980 BasicBlock *BB = SI->getParent();
6981 Function *Fn = BB->getParent();
6982
6983 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6984 // split off a dense part and build a lookup table for that.
6985
6986 // FIXME: This creates arrays of GEPs to constant strings, which means each
6987 // GEP needs a runtime relocation in PIC code. We should just build one big
6988 // string and lookup indices into that.
6989
6990 // Ignore switches with less than three cases. Lookup tables will not make
6991 // them faster, so we don't analyze them.
6992 if (SI->getNumCases() < 3)
6993 return false;
6994
6995 // Figure out the corresponding result for each case value and phi node in the
6996 // common destination, as well as the min and max case values.
6997 assert(!SI->cases().empty());
6998 SwitchInst::CaseIt CI = SI->case_begin();
6999 ConstantInt *MinCaseVal = CI->getCaseValue();
7000 ConstantInt *MaxCaseVal = CI->getCaseValue();
7001
7002 BasicBlock *CommonDest = nullptr;
7003
7004 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7006
7008 SmallVector<Type *> ResultTypes;
7010
7011 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7012 ConstantInt *CaseVal = CI->getCaseValue();
7013 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7014 MinCaseVal = CaseVal;
7015 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7016 MaxCaseVal = CaseVal;
7017
7018 // Resulting value at phi nodes for this case value.
7020 ResultsTy Results;
7021 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7022 Results, DL, TTI))
7023 return false;
7024
7025 // Append the result and result types from this case to the list for each
7026 // phi.
7027 for (const auto &I : Results) {
7028 PHINode *PHI = I.first;
7029 Constant *Value = I.second;
7030 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7031 if (Inserted)
7032 PHIs.push_back(PHI);
7033 It->second.push_back(std::make_pair(CaseVal, Value));
7034 ResultTypes.push_back(PHI->getType());
7035 }
7036 }
7037
7038 // If the table has holes, we need a constant result for the default case
7039 // or a bitmask that fits in a register.
7040 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7041 bool HasDefaultResults =
7042 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7043 DefaultResultsList, DL, TTI);
7044 for (const auto &I : DefaultResultsList) {
7045 PHINode *PHI = I.first;
7046 Constant *Result = I.second;
7047 DefaultResults[PHI] = Result;
7048 }
7049
7050 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7051 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7052 uint64_t TableSize;
7053 ConstantInt *TableIndexOffset;
7054 if (UseSwitchConditionAsTableIndex) {
7055 TableSize = MaxCaseVal->getLimitedValue() + 1;
7056 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7057 } else {
7058 TableSize =
7059 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7060
7061 TableIndexOffset = MinCaseVal;
7062 }
7063
7064 // If the default destination is unreachable, or if the lookup table covers
7065 // all values of the conditional variable, branch directly to the lookup table
7066 // BB. Otherwise, check that the condition is within the case range.
7067 uint64_t NumResults = ResultLists[PHIs[0]].size();
7068 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7069
7070 bool TableHasHoles = (NumResults < TableSize);
7071
7072 // If the table has holes but the default destination doesn't produce any
7073 // constant results, the lookup table entries corresponding to the holes will
7074 // contain poison.
7075 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7076
7077 // If the default destination doesn't produce a constant result but is still
7078 // reachable, and the lookup table has holes, we need to use a mask to
7079 // determine if the current index should load from the lookup table or jump
7080 // to the default case.
7081 // The mask is unnecessary if the table has holes but the default destination
7082 // is unreachable, as in that case the holes must also be unreachable.
7083 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7084 if (NeedMask) {
7085 // As an extra penalty for the validity test we require more cases.
7086 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7087 return false;
7088 if (!DL.fitsInLegalInteger(TableSize))
7089 return false;
7090 }
7091
7092 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7093 return false;
7094
7095 // Compute the table index value.
7096 Value *TableIndex;
7097 if (UseSwitchConditionAsTableIndex) {
7098 TableIndex = SI->getCondition();
7099 if (HasDefaultResults) {
7100 // Grow the table to cover all possible index values to avoid the range
7101 // check. It will use the default result to fill in the table hole later,
7102 // so make sure it exist.
7103 ConstantRange CR =
7104 computeConstantRange(TableIndex, /* ForSigned */ false);
7105 // Grow the table shouldn't have any size impact by checking
7106 // wouldFitInRegister.
7107 // TODO: Consider growing the table also when it doesn't fit in a register
7108 // if no optsize is specified.
7109 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7110 if (!CR.isUpperWrapped() &&
7111 all_of(ResultTypes, [&](const auto &ResultType) {
7112 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7113 ResultType);
7114 })) {
7115 // There may be some case index larger than the UpperBound (unreachable
7116 // case), so make sure the table size does not get smaller.
7117 TableSize = std::max(UpperBound, TableSize);
7118 // The default branch is unreachable after we enlarge the lookup table.
7119 // Adjust DefaultIsReachable to reuse code path.
7120 DefaultIsReachable = false;
7121 }
7122 }
7123 }
7124
7125 // Keep track of the switch replacement for each phi
7127 for (PHINode *PHI : PHIs) {
7128 const auto &ResultList = ResultLists[PHI];
7129
7130 Type *ResultType = ResultList.begin()->second->getType();
7131 // Use any value to fill the lookup table holes.
7133 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7134 StringRef FuncName = Fn->getName();
7135 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7136 ResultList, DefaultVal, DL, FuncName);
7137 PhiToReplacementMap.insert({PHI, Replacement});
7138 }
7139
7140 bool AnyLookupTables = any_of(
7141 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7142
7143 // A few conditions prevent the generation of lookup tables:
7144 // 1. The target does not support lookup tables.
7145 // 2. The "no-jump-tables" function attribute is set.
7146 // However, these objections do not apply to other switch replacements, like
7147 // the bitmap, so we only stop here if any of these conditions are met and we
7148 // want to create a LUT. Otherwise, continue with the switch replacement.
7149 if (AnyLookupTables &&
7150 (!TTI.shouldBuildLookupTables() ||
7151 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7152 return false;
7153
7154 Builder.SetInsertPoint(SI);
7155 // TableIndex is the switch condition - TableIndexOffset if we don't
7156 // use the condition directly
7157 if (!UseSwitchConditionAsTableIndex) {
7158 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7159 // we can try to attach nsw.
7160 bool MayWrap = true;
7161 if (!DefaultIsReachable) {
7162 APInt Res =
7163 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7164 (void)Res;
7165 }
7166 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7167 "switch.tableidx", /*HasNUW =*/false,
7168 /*HasNSW =*/!MayWrap);
7169 }
7170
7171 std::vector<DominatorTree::UpdateType> Updates;
7172
7173 // Compute the maximum table size representable by the integer type we are
7174 // switching upon.
7175 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7176 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7177 assert(MaxTableSize >= TableSize &&
7178 "It is impossible for a switch to have more entries than the max "
7179 "representable value of its input integer type's size.");
7180
7181 // Create the BB that does the lookups.
7182 Module &Mod = *CommonDest->getParent()->getParent();
7183 BasicBlock *LookupBB = BasicBlock::Create(
7184 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7185
7186 BranchInst *RangeCheckBranch = nullptr;
7187
7188 Builder.SetInsertPoint(SI);
7189 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7190 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7191 Builder.CreateBr(LookupBB);
7192 if (DTU)
7193 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7194 // Note: We call removeProdecessor later since we need to be able to get the
7195 // PHI value for the default case in case we're using a bit mask.
7196 } else {
7197 Value *Cmp = Builder.CreateICmpULT(
7198 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7199 RangeCheckBranch =
7200 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7201 if (DTU)
7202 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7203 }
7204
7205 // Populate the BB that does the lookups.
7206 Builder.SetInsertPoint(LookupBB);
7207
7208 if (NeedMask) {
7209 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7210 // re-purposed to do the hole check, and we create a new LookupBB.
7211 BasicBlock *MaskBB = LookupBB;
7212 MaskBB->setName("switch.hole_check");
7213 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7214 CommonDest->getParent(), CommonDest);
7215
7216 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7217 // unnecessary illegal types.
7218 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7219 APInt MaskInt(TableSizePowOf2, 0);
7220 APInt One(TableSizePowOf2, 1);
7221 // Build bitmask; fill in a 1 bit for every case.
7222 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7223 for (const auto &Result : ResultList) {
7224 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7225 .getLimitedValue();
7226 MaskInt |= One << Idx;
7227 }
7228 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7229
7230 // Get the TableIndex'th bit of the bitmask.
7231 // If this bit is 0 (meaning hole) jump to the default destination,
7232 // else continue with table lookup.
7233 IntegerType *MapTy = TableMask->getIntegerType();
7234 Value *MaskIndex =
7235 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7236 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7237 Value *LoBit = Builder.CreateTrunc(
7238 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7239 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7240 if (DTU) {
7241 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7242 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7243 }
7244 Builder.SetInsertPoint(LookupBB);
7245 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7246 }
7247
7248 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7249 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7250 // do not delete PHINodes here.
7251 SI->getDefaultDest()->removePredecessor(BB,
7252 /*KeepOneInputPHIs=*/true);
7253 if (DTU)
7254 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7255 }
7256
7257 for (PHINode *PHI : PHIs) {
7258 const ResultListTy &ResultList = ResultLists[PHI];
7259 auto Replacement = PhiToReplacementMap.at(PHI);
7260 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7261 // Do a small peephole optimization: re-use the switch table compare if
7262 // possible.
7263 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7264 BasicBlock *PhiBlock = PHI->getParent();
7265 // Search for compare instructions which use the phi.
7266 for (auto *User : PHI->users()) {
7267 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7268 Replacement.getDefaultValue(), ResultList);
7269 }
7270 }
7271
7272 PHI->addIncoming(Result, LookupBB);
7273 }
7274
7275 Builder.CreateBr(CommonDest);
7276 if (DTU)
7277 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7278
7279 // Remove the switch.
7280 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7281 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7282 BasicBlock *Succ = SI->getSuccessor(i);
7283
7284 if (Succ == SI->getDefaultDest())
7285 continue;
7286 Succ->removePredecessor(BB);
7287 if (DTU && RemovedSuccessors.insert(Succ).second)
7288 Updates.push_back({DominatorTree::Delete, BB, Succ});
7289 }
7290 SI->eraseFromParent();
7291
7292 if (DTU)
7293 DTU->applyUpdates(Updates);
7294
7295 if (NeedMask)
7296 ++NumLookupTablesHoles;
7297 return true;
7298}
7299
7300/// Try to transform a switch that has "holes" in it to a contiguous sequence
7301/// of cases.
7302///
7303/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7304/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7305///
7306/// This converts a sparse switch into a dense switch which allows better
7307/// lowering and could also allow transforming into a lookup table.
7309 const DataLayout &DL,
7310 const TargetTransformInfo &TTI) {
7311 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7312 if (CondTy->getIntegerBitWidth() > 64 ||
7313 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7314 return false;
7315 // Only bother with this optimization if there are more than 3 switch cases;
7316 // SDAG will only bother creating jump tables for 4 or more cases.
7317 if (SI->getNumCases() < 4)
7318 return false;
7319
7320 // This transform is agnostic to the signedness of the input or case values. We
7321 // can treat the case values as signed or unsigned. We can optimize more common
7322 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7323 // as signed.
7325 for (const auto &C : SI->cases())
7326 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7327 llvm::sort(Values);
7328
7329 // If the switch is already dense, there's nothing useful to do here.
7330 if (isSwitchDense(Values))
7331 return false;
7332
7333 // First, transform the values such that they start at zero and ascend.
7334 int64_t Base = Values[0];
7335 for (auto &V : Values)
7336 V -= (uint64_t)(Base);
7337
7338 // Now we have signed numbers that have been shifted so that, given enough
7339 // precision, there are no negative values. Since the rest of the transform
7340 // is bitwise only, we switch now to an unsigned representation.
7341
7342 // This transform can be done speculatively because it is so cheap - it
7343 // results in a single rotate operation being inserted.
7344
7345 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7346 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7347 // less than 64.
7348 unsigned Shift = 64;
7349 for (auto &V : Values)
7350 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7351 assert(Shift < 64);
7352 if (Shift > 0)
7353 for (auto &V : Values)
7354 V = (int64_t)((uint64_t)V >> Shift);
7355
7356 if (!isSwitchDense(Values))
7357 // Transform didn't create a dense switch.
7358 return false;
7359
7360 // The obvious transform is to shift the switch condition right and emit a
7361 // check that the condition actually cleanly divided by GCD, i.e.
7362 // C & (1 << Shift - 1) == 0
7363 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7364 //
7365 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7366 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7367 // are nonzero then the switch condition will be very large and will hit the
7368 // default case.
7369
7370 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7371 Builder.SetInsertPoint(SI);
7372 Value *Sub =
7373 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7374 Value *Rot = Builder.CreateIntrinsic(
7375 Ty, Intrinsic::fshl,
7376 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7377 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7378
7379 for (auto Case : SI->cases()) {
7380 auto *Orig = Case.getCaseValue();
7381 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7382 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7383 }
7384 return true;
7385}
7386
7387/// Tries to transform switch of powers of two to reduce switch range.
7388/// For example, switch like:
7389/// switch (C) { case 1: case 2: case 64: case 128: }
7390/// will be transformed to:
7391/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7392///
7393/// This transformation allows better lowering and may transform the switch
7394/// instruction into a sequence of bit manipulation and a smaller
7395/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7396/// address of the jump target, and indirectly jump to it).
7398 const DataLayout &DL,
7399 const TargetTransformInfo &TTI) {
7400 Value *Condition = SI->getCondition();
7401 LLVMContext &Context = SI->getContext();
7402 auto *CondTy = cast<IntegerType>(Condition->getType());
7403
7404 if (CondTy->getIntegerBitWidth() > 64 ||
7405 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7406 return false;
7407
7408 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7409 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7410 {Condition, ConstantInt::getTrue(Context)});
7411 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7412 TTI::TCC_Basic * 2)
7413 return false;
7414
7415 // Only bother with this optimization if there are more than 3 switch cases.
7416 // SDAG will start emitting jump tables for 4 or more cases.
7417 if (SI->getNumCases() < 4)
7418 return false;
7419
7420 // We perform this optimization only for switches with
7421 // unreachable default case.
7422 // This assumtion will save us from checking if `Condition` is a power of two.
7423 if (!SI->defaultDestUnreachable())
7424 return false;
7425
7426 // Check that switch cases are powers of two.
7428 for (const auto &Case : SI->cases()) {
7429 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7430 if (llvm::has_single_bit(CaseValue))
7431 Values.push_back(CaseValue);
7432 else
7433 return false;
7434 }
7435
7436 // isSwichDense requires case values to be sorted.
7437 llvm::sort(Values);
7438 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7439 llvm::countr_zero(Values.front()) + 1))
7440 // Transform is unable to generate dense switch.
7441 return false;
7442
7443 Builder.SetInsertPoint(SI);
7444
7445 // Replace each case with its trailing zeros number.
7446 for (auto &Case : SI->cases()) {
7447 auto *OrigValue = Case.getCaseValue();
7448 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7449 OrigValue->getValue().countr_zero()));
7450 }
7451
7452 // Replace condition with its trailing zeros number.
7453 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7454 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7455
7456 SI->setCondition(ConditionTrailingZeros);
7457
7458 return true;
7459}
7460
7461/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7462/// the same destination.
7464 DomTreeUpdater *DTU) {
7465 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7466 if (!Cmp || !Cmp->hasOneUse())
7467 return false;
7468
7470 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7471 if (!HasWeights)
7472 Weights.resize(4); // Avoid checking HasWeights everywhere.
7473
7474 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7475 int64_t Res;
7476 BasicBlock *Succ, *OtherSucc;
7477 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7478 BasicBlock *Unreachable = nullptr;
7479
7480 if (SI->getNumCases() == 2) {
7481 // Find which of 1, 0 or -1 is missing (handled by default dest).
7482 SmallSet<int64_t, 3> Missing;
7483 Missing.insert(1);
7484 Missing.insert(0);
7485 Missing.insert(-1);
7486
7487 Succ = SI->getDefaultDest();
7488 SuccWeight = Weights[0];
7489 OtherSucc = nullptr;
7490 for (auto &Case : SI->cases()) {
7491 std::optional<int64_t> Val =
7492 Case.getCaseValue()->getValue().trySExtValue();
7493 if (!Val)
7494 return false;
7495 if (!Missing.erase(*Val))
7496 return false;
7497 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7498 return false;
7499 OtherSucc = Case.getCaseSuccessor();
7500 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7501 }
7502
7503 assert(Missing.size() == 1 && "Should have one case left");
7504 Res = *Missing.begin();
7505 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7506 // Normalize so that Succ is taken once and OtherSucc twice.
7507 Unreachable = SI->getDefaultDest();
7508 Succ = OtherSucc = nullptr;
7509 for (auto &Case : SI->cases()) {
7510 BasicBlock *NewSucc = Case.getCaseSuccessor();
7511 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7512 if (!OtherSucc || OtherSucc == NewSucc) {
7513 OtherSucc = NewSucc;
7514 OtherSuccWeight += Weight;
7515 } else if (!Succ) {
7516 Succ = NewSucc;
7517 SuccWeight = Weight;
7518 } else if (Succ == NewSucc) {
7519 std::swap(Succ, OtherSucc);
7520 std::swap(SuccWeight, OtherSuccWeight);
7521 } else
7522 return false;
7523 }
7524 for (auto &Case : SI->cases()) {
7525 std::optional<int64_t> Val =
7526 Case.getCaseValue()->getValue().trySExtValue();
7527 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7528 return false;
7529 if (Case.getCaseSuccessor() == Succ) {
7530 Res = *Val;
7531 break;
7532 }
7533 }
7534 } else {
7535 return false;
7536 }
7537
7538 // Determine predicate for the missing case.
7540 switch (Res) {
7541 case 1:
7542 Pred = ICmpInst::ICMP_UGT;
7543 break;
7544 case 0:
7545 Pred = ICmpInst::ICMP_EQ;
7546 break;
7547 case -1:
7548 Pred = ICmpInst::ICMP_ULT;
7549 break;
7550 }
7551 if (Cmp->isSigned())
7552 Pred = ICmpInst::getSignedPredicate(Pred);
7553
7554 MDNode *NewWeights = nullptr;
7555 if (HasWeights)
7556 NewWeights = MDBuilder(SI->getContext())
7557 .createBranchWeights(SuccWeight, OtherSuccWeight);
7558
7559 BasicBlock *BB = SI->getParent();
7560 Builder.SetInsertPoint(SI->getIterator());
7561 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7562 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7563 SI->getMetadata(LLVMContext::MD_unpredictable));
7564 OtherSucc->removePredecessor(BB);
7565 if (Unreachable)
7566 Unreachable->removePredecessor(BB);
7567 SI->eraseFromParent();
7568 Cmp->eraseFromParent();
7569 if (DTU && Unreachable)
7570 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7571 return true;
7572}
7573
7574/// Checking whether two cases of SI are equal depends on the contents of the
7575/// BasicBlock and the incoming values of their successor PHINodes.
7576/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7577/// calling this function on each BasicBlock every time isEqual is called,
7578/// especially since the same BasicBlock may be passed as an argument multiple
7579/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7580/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7581/// of the incoming values.
7586
7587namespace llvm {
7588template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7590 return static_cast<SwitchSuccWrapper *>(
7592 }
7594 return static_cast<SwitchSuccWrapper *>(
7596 }
7597 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7598 BasicBlock *Succ = SSW->Dest;
7600 assert(BI->isUnconditional() &&
7601 "Only supporting unconditional branches for now");
7602 assert(BI->getNumSuccessors() == 1 &&
7603 "Expected unconditional branches to have one successor");
7604 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7605
7606 // Since we assume the BB is just a single BranchInst with a single
7607 // successor, we hash as the BB and the incoming Values of its successor
7608 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7609 // including the incoming PHI values leads to better performance.
7610 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7611 // time and passing it in SwitchSuccWrapper, but this slowed down the
7612 // average compile time without having any impact on the worst case compile
7613 // time.
7614 BasicBlock *BB = BI->getSuccessor(0);
7615 SmallVector<Value *> PhiValsForBB;
7616 for (PHINode &Phi : BB->phis())
7617 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7618
7619 return hash_combine(BB, hash_combine_range(PhiValsForBB));
7620 }
7621 static bool isEqual(const SwitchSuccWrapper *LHS,
7622 const SwitchSuccWrapper *RHS) {
7625 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7626 return LHS == RHS;
7627
7628 BasicBlock *A = LHS->Dest;
7629 BasicBlock *B = RHS->Dest;
7630
7631 // FIXME: we checked that the size of A and B are both 1 in
7632 // simplifyDuplicateSwitchArms to make the Case list smaller to
7633 // improve performance. If we decide to support BasicBlocks with more
7634 // than just a single instruction, we need to check that A.size() ==
7635 // B.size() here, and we need to check more than just the BranchInsts
7636 // for equality.
7637
7638 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7639 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7640 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7641 "Only supporting unconditional branches for now");
7642 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7643 return false;
7644
7645 // Need to check that PHIs in successor have matching values
7646 BasicBlock *Succ = ABI->getSuccessor(0);
7647 for (PHINode &Phi : Succ->phis()) {
7648 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7649 if (PredIVs[A] != PredIVs[B])
7650 return false;
7651 }
7652
7653 return true;
7654 }
7655};
7656} // namespace llvm
7657
7658bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7659 DomTreeUpdater *DTU) {
7660 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7661 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7662 // an entire PHI at once after the loop, opposed to calling
7663 // getIncomingValueForBlock inside this loop, since each call to
7664 // getIncomingValueForBlock is O(|Preds|).
7665 SmallPtrSet<PHINode *, 8> Phis;
7666 SmallPtrSet<BasicBlock *, 8> Seen;
7667 DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> PhiPredIVs;
7668 DenseMap<BasicBlock *, SmallVector<unsigned, 32>> BBToSuccessorIndexes;
7670 Cases.reserve(SI->getNumSuccessors());
7671
7672 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7673 BasicBlock *BB = SI->getSuccessor(I);
7674
7675 // FIXME: Support more than just a single BranchInst. One way we could do
7676 // this is by taking a hashing approach of all insts in BB.
7677 if (BB->size() != 1)
7678 continue;
7679
7680 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7681 // on other kinds of terminators. We decide to only support unconditional
7682 // branches for now for compile time reasons.
7683 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7684 if (!BI || BI->isConditional())
7685 continue;
7686
7687 if (!Seen.insert(BB).second) {
7688 auto It = BBToSuccessorIndexes.find(BB);
7689 if (It != BBToSuccessorIndexes.end())
7690 It->second.emplace_back(I);
7691 continue;
7692 }
7693
7694 // FIXME: This case needs some extra care because the terminators other than
7695 // SI need to be updated. For now, consider only backedges to the SI.
7696 if (BB->getUniquePredecessor() != SI->getParent())
7697 continue;
7698
7699 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7700 for (BasicBlock *Succ : BI->successors())
7702
7703 // Add the successor only if not previously visited.
7704 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7705 BBToSuccessorIndexes[BB].emplace_back(I);
7706 }
7707
7708 // Precompute a data structure to improve performance of isEqual for
7709 // SwitchSuccWrapper.
7710 PhiPredIVs.reserve(Phis.size());
7711 for (PHINode *Phi : Phis) {
7712 auto &IVs =
7713 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
7714 for (auto &IV : Phi->incoming_values())
7715 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
7716 }
7717
7718 // Build a set such that if the SwitchSuccWrapper exists in the set and
7719 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7720 // which is not in the set should be replaced with the one in the set. If the
7721 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7722 // other SwitchSuccWrappers can check against it in the same manner. We use
7723 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7724 // around information to isEquality, getHashValue, and when doing the
7725 // replacement with better performance.
7726 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
7727 ReplaceWith.reserve(Cases.size());
7728
7730 Updates.reserve(ReplaceWith.size());
7731 bool MadeChange = false;
7732 for (auto &SSW : Cases) {
7733 // SSW is a candidate for simplification. If we find a duplicate BB,
7734 // replace it.
7735 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7736 if (!Inserted) {
7737 // We know that SI's parent BB no longer dominates the old case successor
7738 // since we are making it dead.
7739 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7740 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7741 for (unsigned Idx : Successors)
7742 SI->setSuccessor(Idx, (*It)->Dest);
7743 MadeChange = true;
7744 }
7745 }
7746
7747 if (DTU)
7748 DTU->applyUpdates(Updates);
7749
7750 return MadeChange;
7751}
7752
7753bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7754 BasicBlock *BB = SI->getParent();
7755
7756 if (isValueEqualityComparison(SI)) {
7757 // If we only have one predecessor, and if it is a branch on this value,
7758 // see if that predecessor totally determines the outcome of this switch.
7759 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7760 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7761 return requestResimplify();
7762
7763 Value *Cond = SI->getCondition();
7764 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7765 if (simplifySwitchOnSelect(SI, Select))
7766 return requestResimplify();
7767
7768 // If the block only contains the switch, see if we can fold the block
7769 // away into any preds.
7770 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7771 if (foldValueComparisonIntoPredecessors(SI, Builder))
7772 return requestResimplify();
7773 }
7774
7775 // Try to transform the switch into an icmp and a branch.
7776 // The conversion from switch to comparison may lose information on
7777 // impossible switch values, so disable it early in the pipeline.
7778 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7779 return requestResimplify();
7780
7781 // Remove unreachable cases.
7782 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7783 return requestResimplify();
7784
7785 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7786 return requestResimplify();
7787
7788 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7789 return requestResimplify();
7790
7791 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7792 return requestResimplify();
7793
7794 // The conversion from switch to lookup tables results in difficult-to-analyze
7795 // code and makes pruning branches much harder. This is a problem if the
7796 // switch expression itself can still be restricted as a result of inlining or
7797 // CVP. Therefore, only apply this transformation during late stages of the
7798 // optimisation pipeline.
7799 if (Options.ConvertSwitchToLookupTable &&
7800 simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
7801 return requestResimplify();
7802
7803 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7804 return requestResimplify();
7805
7806 if (reduceSwitchRange(SI, Builder, DL, TTI))
7807 return requestResimplify();
7808
7809 if (HoistCommon &&
7810 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7811 return requestResimplify();
7812
7813 if (simplifyDuplicateSwitchArms(SI, DTU))
7814 return requestResimplify();
7815
7816 return false;
7817}
7818
7819bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7820 BasicBlock *BB = IBI->getParent();
7821 bool Changed = false;
7822
7823 // Eliminate redundant destinations.
7824 SmallPtrSet<Value *, 8> Succs;
7825 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
7826 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7827 BasicBlock *Dest = IBI->getDestination(i);
7828 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7829 if (!Dest->hasAddressTaken())
7830 RemovedSuccs.insert(Dest);
7831 Dest->removePredecessor(BB);
7832 IBI->removeDestination(i);
7833 --i;
7834 --e;
7835 Changed = true;
7836 }
7837 }
7838
7839 if (DTU) {
7840 std::vector<DominatorTree::UpdateType> Updates;
7841 Updates.reserve(RemovedSuccs.size());
7842 for (auto *RemovedSucc : RemovedSuccs)
7843 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7844 DTU->applyUpdates(Updates);
7845 }
7846
7847 if (IBI->getNumDestinations() == 0) {
7848 // If the indirectbr has no successors, change it to unreachable.
7849 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7851 return true;
7852 }
7853
7854 if (IBI->getNumDestinations() == 1) {
7855 // If the indirectbr has one successor, change it to a direct branch.
7858 return true;
7859 }
7860
7861 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7862 if (simplifyIndirectBrOnSelect(IBI, SI))
7863 return requestResimplify();
7864 }
7865 return Changed;
7866}
7867
7868/// Given an block with only a single landing pad and a unconditional branch
7869/// try to find another basic block which this one can be merged with. This
7870/// handles cases where we have multiple invokes with unique landing pads, but
7871/// a shared handler.
7872///
7873/// We specifically choose to not worry about merging non-empty blocks
7874/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7875/// practice, the optimizer produces empty landing pad blocks quite frequently
7876/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7877/// sinking in this file)
7878///
7879/// This is primarily a code size optimization. We need to avoid performing
7880/// any transform which might inhibit optimization (such as our ability to
7881/// specialize a particular handler via tail commoning). We do this by not
7882/// merging any blocks which require us to introduce a phi. Since the same
7883/// values are flowing through both blocks, we don't lose any ability to
7884/// specialize. If anything, we make such specialization more likely.
7885///
7886/// TODO - This transformation could remove entries from a phi in the target
7887/// block when the inputs in the phi are the same for the two blocks being
7888/// merged. In some cases, this could result in removal of the PHI entirely.
7890 BasicBlock *BB, DomTreeUpdater *DTU) {
7891 auto Succ = BB->getUniqueSuccessor();
7892 assert(Succ);
7893 // If there's a phi in the successor block, we'd likely have to introduce
7894 // a phi into the merged landing pad block.
7895 if (isa<PHINode>(*Succ->begin()))
7896 return false;
7897
7898 for (BasicBlock *OtherPred : predecessors(Succ)) {
7899 if (BB == OtherPred)
7900 continue;
7901 BasicBlock::iterator I = OtherPred->begin();
7903 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7904 continue;
7905 ++I;
7907 if (!BI2 || !BI2->isIdenticalTo(BI))
7908 continue;
7909
7910 std::vector<DominatorTree::UpdateType> Updates;
7911
7912 // We've found an identical block. Update our predecessors to take that
7913 // path instead and make ourselves dead.
7915 for (BasicBlock *Pred : UniquePreds) {
7916 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7917 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7918 "unexpected successor");
7919 II->setUnwindDest(OtherPred);
7920 if (DTU) {
7921 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7922 Updates.push_back({DominatorTree::Delete, Pred, BB});
7923 }
7924 }
7925
7927 for (BasicBlock *Succ : UniqueSuccs) {
7928 Succ->removePredecessor(BB);
7929 if (DTU)
7930 Updates.push_back({DominatorTree::Delete, BB, Succ});
7931 }
7932
7933 IRBuilder<> Builder(BI);
7934 Builder.CreateUnreachable();
7935 BI->eraseFromParent();
7936 if (DTU)
7937 DTU->applyUpdates(Updates);
7938 return true;
7939 }
7940 return false;
7941}
7942
7943bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7944 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7945 : simplifyCondBranch(Branch, Builder);
7946}
7947
7948bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7949 IRBuilder<> &Builder) {
7950 BasicBlock *BB = BI->getParent();
7951 BasicBlock *Succ = BI->getSuccessor(0);
7952
7953 // If the Terminator is the only non-phi instruction, simplify the block.
7954 // If LoopHeader is provided, check if the block or its successor is a loop
7955 // header. (This is for early invocations before loop simplify and
7956 // vectorization to keep canonical loop forms for nested loops. These blocks
7957 // can be eliminated when the pass is invoked later in the back-end.)
7958 // Note that if BB has only one predecessor then we do not introduce new
7959 // backedge, so we can eliminate BB.
7960 bool NeedCanonicalLoop =
7961 Options.NeedCanonicalLoop &&
7962 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7963 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7965 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7966 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7967 return true;
7968
7969 // If the only instruction in the block is a seteq/setne comparison against a
7970 // constant, try to simplify the block.
7971 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7972 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7973 ++I;
7974 if (I->isTerminator() &&
7975 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7976 return true;
7977 }
7978
7979 // See if we can merge an empty landing pad block with another which is
7980 // equivalent.
7981 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7982 ++I;
7983 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7984 return true;
7985 }
7986
7987 // If this basic block is ONLY a compare and a branch, and if a predecessor
7988 // branches to us and our successor, fold the comparison into the
7989 // predecessor and use logical operations to update the incoming value
7990 // for PHI nodes in common successor.
7991 if (Options.SpeculateBlocks &&
7992 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7993 Options.BonusInstThreshold))
7994 return requestResimplify();
7995 return false;
7996}
7997
7999 BasicBlock *PredPred = nullptr;
8000 for (auto *P : predecessors(BB)) {
8001 BasicBlock *PPred = P->getSinglePredecessor();
8002 if (!PPred || (PredPred && PredPred != PPred))
8003 return nullptr;
8004 PredPred = PPred;
8005 }
8006 return PredPred;
8007}
8008
8009/// Fold the following pattern:
8010/// bb0:
8011/// br i1 %cond1, label %bb1, label %bb2
8012/// bb1:
8013/// br i1 %cond2, label %bb3, label %bb4
8014/// bb2:
8015/// br i1 %cond2, label %bb4, label %bb3
8016/// bb3:
8017/// ...
8018/// bb4:
8019/// ...
8020/// into
8021/// bb0:
8022/// %cond = xor i1 %cond1, %cond2
8023/// br i1 %cond, label %bb4, label %bb3
8024/// bb3:
8025/// ...
8026/// bb4:
8027/// ...
8028/// NOTE: %cond2 always dominates the terminator of bb0.
8030 BasicBlock *BB = BI->getParent();
8031 BasicBlock *BB1 = BI->getSuccessor(0);
8032 BasicBlock *BB2 = BI->getSuccessor(1);
8033 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8034 if (Succ == BB)
8035 return false;
8036 if (&Succ->front() != Succ->getTerminator())
8037 return false;
8038 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8039 if (!SuccBI || !SuccBI->isConditional())
8040 return false;
8041 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8042 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8043 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8044 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8045 };
8046 BranchInst *BB1BI, *BB2BI;
8047 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8048 return false;
8049
8050 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8051 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8052 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8053 return false;
8054
8055 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8056 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8057 IRBuilder<> Builder(BI);
8058 BI->setCondition(
8059 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8060 BB1->removePredecessor(BB);
8061 BI->setSuccessor(0, BB4);
8062 BB2->removePredecessor(BB);
8063 BI->setSuccessor(1, BB3);
8064 if (DTU) {
8066 Updates.push_back({DominatorTree::Delete, BB, BB1});
8067 Updates.push_back({DominatorTree::Insert, BB, BB4});
8068 Updates.push_back({DominatorTree::Delete, BB, BB2});
8069 Updates.push_back({DominatorTree::Insert, BB, BB3});
8070
8071 DTU->applyUpdates(Updates);
8072 }
8073 bool HasWeight = false;
8074 uint64_t BBTWeight, BBFWeight;
8075 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8076 HasWeight = true;
8077 else
8078 BBTWeight = BBFWeight = 1;
8079 uint64_t BB1TWeight, BB1FWeight;
8080 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8081 HasWeight = true;
8082 else
8083 BB1TWeight = BB1FWeight = 1;
8084 uint64_t BB2TWeight, BB2FWeight;
8085 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8086 HasWeight = true;
8087 else
8088 BB2TWeight = BB2FWeight = 1;
8089 if (HasWeight) {
8090 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8091 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8092 fitWeights(Weights);
8093 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
8094 }
8095 return true;
8096}
8097
8098bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8099 assert(
8101 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8102 "Tautological conditional branch should have been eliminated already.");
8103
8104 BasicBlock *BB = BI->getParent();
8105 if (!Options.SimplifyCondBranch ||
8106 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8107 return false;
8108
8109 // Conditional branch
8110 if (isValueEqualityComparison(BI)) {
8111 // If we only have one predecessor, and if it is a branch on this value,
8112 // see if that predecessor totally determines the outcome of this
8113 // switch.
8114 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8115 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8116 return requestResimplify();
8117
8118 // This block must be empty, except for the setcond inst, if it exists.
8119 // Ignore dbg and pseudo intrinsics.
8120 auto I = BB->instructionsWithoutDebug(true).begin();
8121 if (&*I == BI) {
8122 if (foldValueComparisonIntoPredecessors(BI, Builder))
8123 return requestResimplify();
8124 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8125 ++I;
8126 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8127 return requestResimplify();
8128 }
8129 }
8130
8131 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8132 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8133 return true;
8134
8135 // If this basic block has dominating predecessor blocks and the dominating
8136 // blocks' conditions imply BI's condition, we know the direction of BI.
8137 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8138 if (Imp) {
8139 // Turn this into a branch on constant.
8140 auto *OldCond = BI->getCondition();
8141 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8142 : ConstantInt::getFalse(BB->getContext());
8143 BI->setCondition(TorF);
8145 return requestResimplify();
8146 }
8147
8148 // If this basic block is ONLY a compare and a branch, and if a predecessor
8149 // branches to us and one of our successors, fold the comparison into the
8150 // predecessor and use logical operations to pick the right destination.
8151 if (Options.SpeculateBlocks &&
8152 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8153 Options.BonusInstThreshold))
8154 return requestResimplify();
8155
8156 // We have a conditional branch to two blocks that are only reachable
8157 // from BI. We know that the condbr dominates the two blocks, so see if
8158 // there is any identical code in the "then" and "else" blocks. If so, we
8159 // can hoist it up to the branching block.
8160 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8161 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8162 if (HoistCommon &&
8163 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8164 return requestResimplify();
8165
8166 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8167 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8168 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8169 auto CanSpeculateConditionalLoadsStores = [&]() {
8170 for (auto *Succ : successors(BB)) {
8171 for (Instruction &I : *Succ) {
8172 if (I.isTerminator()) {
8173 if (I.getNumSuccessors() > 1)
8174 return false;
8175 continue;
8176 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8177 SpeculatedConditionalLoadsStores.size() ==
8179 return false;
8180 }
8181 SpeculatedConditionalLoadsStores.push_back(&I);
8182 }
8183 }
8184 return !SpeculatedConditionalLoadsStores.empty();
8185 };
8186
8187 if (CanSpeculateConditionalLoadsStores()) {
8188 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8189 std::nullopt, nullptr);
8190 return requestResimplify();
8191 }
8192 }
8193 } else {
8194 // If Successor #1 has multiple preds, we may be able to conditionally
8195 // execute Successor #0 if it branches to Successor #1.
8196 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8197 if (Succ0TI->getNumSuccessors() == 1 &&
8198 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8199 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8200 return requestResimplify();
8201 }
8202 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8203 // If Successor #0 has multiple preds, we may be able to conditionally
8204 // execute Successor #1 if it branches to Successor #0.
8205 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8206 if (Succ1TI->getNumSuccessors() == 1 &&
8207 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8208 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8209 return requestResimplify();
8210 }
8211
8212 // If this is a branch on something for which we know the constant value in
8213 // predecessors (e.g. a phi node in the current block), thread control
8214 // through this block.
8215 if (foldCondBranchOnValueKnownInPredecessor(BI))
8216 return requestResimplify();
8217
8218 // Scan predecessor blocks for conditional branches.
8219 for (BasicBlock *Pred : predecessors(BB))
8220 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8221 if (PBI != BI && PBI->isConditional())
8222 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8223 return requestResimplify();
8224
8225 // Look for diamond patterns.
8226 if (MergeCondStores)
8227 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8228 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8229 if (PBI != BI && PBI->isConditional())
8230 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8231 return requestResimplify();
8232
8233 // Look for nested conditional branches.
8234 if (mergeNestedCondBranch(BI, DTU))
8235 return requestResimplify();
8236
8237 return false;
8238}
8239
8240/// Check if passing a value to an instruction will cause undefined behavior.
8241static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8242 assert(V->getType() == I->getType() && "Mismatched types");
8244 if (!C)
8245 return false;
8246
8247 if (I->use_empty())
8248 return false;
8249
8250 if (C->isNullValue() || isa<UndefValue>(C)) {
8251 // Only look at the first use we can handle, avoid hurting compile time with
8252 // long uselists
8253 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8254 auto *Use = cast<Instruction>(U.getUser());
8255 // Change this list when we want to add new instructions.
8256 switch (Use->getOpcode()) {
8257 default:
8258 return false;
8259 case Instruction::GetElementPtr:
8260 case Instruction::Ret:
8261 case Instruction::BitCast:
8262 case Instruction::Load:
8263 case Instruction::Store:
8264 case Instruction::Call:
8265 case Instruction::CallBr:
8266 case Instruction::Invoke:
8267 case Instruction::UDiv:
8268 case Instruction::URem:
8269 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8270 // implemented to avoid code complexity as it is unclear how useful such
8271 // logic is.
8272 case Instruction::SDiv:
8273 case Instruction::SRem:
8274 return true;
8275 }
8276 });
8277 if (FindUse == I->use_end())
8278 return false;
8279 auto &Use = *FindUse;
8280 auto *User = cast<Instruction>(Use.getUser());
8281 // Bail out if User is not in the same BB as I or User == I or User comes
8282 // before I in the block. The latter two can be the case if User is a
8283 // PHI node.
8284 if (User->getParent() != I->getParent() || User == I ||
8285 User->comesBefore(I))
8286 return false;
8287
8288 // Now make sure that there are no instructions in between that can alter
8289 // control flow (eg. calls)
8290 auto InstrRange =
8291 make_range(std::next(I->getIterator()), User->getIterator());
8292 if (any_of(InstrRange, [](Instruction &I) {
8294 }))
8295 return false;
8296
8297 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8299 if (GEP->getPointerOperand() == I) {
8300 // The type of GEP may differ from the type of base pointer.
8301 // Bail out on vector GEPs, as they are not handled by other checks.
8302 if (GEP->getType()->isVectorTy())
8303 return false;
8304 // The current base address is null, there are four cases to consider:
8305 // getelementptr (TY, null, 0) -> null
8306 // getelementptr (TY, null, not zero) -> may be modified
8307 // getelementptr inbounds (TY, null, 0) -> null
8308 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8309 // undefined?
8310 if (!GEP->hasAllZeroIndices() &&
8311 (!GEP->isInBounds() ||
8312 NullPointerIsDefined(GEP->getFunction(),
8313 GEP->getPointerAddressSpace())))
8314 PtrValueMayBeModified = true;
8315 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8316 }
8317
8318 // Look through return.
8319 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8320 bool HasNoUndefAttr =
8321 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8322 // Return undefined to a noundef return value is undefined.
8323 if (isa<UndefValue>(C) && HasNoUndefAttr)
8324 return true;
8325 // Return null to a nonnull+noundef return value is undefined.
8326 if (C->isNullValue() && HasNoUndefAttr &&
8327 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8328 return !PtrValueMayBeModified;
8329 }
8330 }
8331
8332 // Load from null is undefined.
8333 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8334 if (!LI->isVolatile())
8335 return !NullPointerIsDefined(LI->getFunction(),
8336 LI->getPointerAddressSpace());
8337
8338 // Store to null is undefined.
8340 if (!SI->isVolatile())
8341 return (!NullPointerIsDefined(SI->getFunction(),
8342 SI->getPointerAddressSpace())) &&
8343 SI->getPointerOperand() == I;
8344
8345 // llvm.assume(false/undef) always triggers immediate UB.
8346 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8347 // Ignore assume operand bundles.
8348 if (I == Assume->getArgOperand(0))
8349 return true;
8350 }
8351
8352 if (auto *CB = dyn_cast<CallBase>(User)) {
8353 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8354 return false;
8355 // A call to null is undefined.
8356 if (CB->getCalledOperand() == I)
8357 return true;
8358
8359 if (CB->isArgOperand(&Use)) {
8360 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8361 // Passing null to a nonnnull+noundef argument is undefined.
8363 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8364 return !PtrValueMayBeModified;
8365 // Passing undef to a noundef argument is undefined.
8366 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8367 return true;
8368 }
8369 }
8370 // Div/Rem by zero is immediate UB
8371 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8372 return true;
8373 }
8374 return false;
8375}
8376
8377/// If BB has an incoming value that will always trigger undefined behavior
8378/// (eg. null pointer dereference), remove the branch leading here.
8380 DomTreeUpdater *DTU,
8381 AssumptionCache *AC) {
8382 for (PHINode &PHI : BB->phis())
8383 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8384 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8385 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8386 Instruction *T = Predecessor->getTerminator();
8387 IRBuilder<> Builder(T);
8388 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8389 BB->removePredecessor(Predecessor);
8390 // Turn unconditional branches into unreachables and remove the dead
8391 // destination from conditional branches.
8392 if (BI->isUnconditional())
8393 Builder.CreateUnreachable();
8394 else {
8395 // Preserve guarding condition in assume, because it might not be
8396 // inferrable from any dominating condition.
8397 Value *Cond = BI->getCondition();
8398 CallInst *Assumption;
8399 if (BI->getSuccessor(0) == BB)
8400 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8401 else
8402 Assumption = Builder.CreateAssumption(Cond);
8403 if (AC)
8404 AC->registerAssumption(cast<AssumeInst>(Assumption));
8405 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8406 : BI->getSuccessor(0));
8407 }
8408 BI->eraseFromParent();
8409 if (DTU)
8410 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8411 return true;
8412 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8413 // Redirect all branches leading to UB into
8414 // a newly created unreachable block.
8415 BasicBlock *Unreachable = BasicBlock::Create(
8416 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8417 Builder.SetInsertPoint(Unreachable);
8418 // The new block contains only one instruction: Unreachable
8419 Builder.CreateUnreachable();
8420 for (const auto &Case : SI->cases())
8421 if (Case.getCaseSuccessor() == BB) {
8422 BB->removePredecessor(Predecessor);
8423 Case.setSuccessor(Unreachable);
8424 }
8425 if (SI->getDefaultDest() == BB) {
8426 BB->removePredecessor(Predecessor);
8427 SI->setDefaultDest(Unreachable);
8428 }
8429
8430 if (DTU)
8431 DTU->applyUpdates(
8432 { { DominatorTree::Insert, Predecessor, Unreachable },
8433 { DominatorTree::Delete, Predecessor, BB } });
8434 return true;
8435 }
8436 }
8437
8438 return false;
8439}
8440
8441bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8442 bool Changed = false;
8443
8444 assert(BB && BB->getParent() && "Block not embedded in function!");
8445 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8446
8447 // Remove basic blocks that have no predecessors (except the entry block)...
8448 // or that just have themself as a predecessor. These are unreachable.
8449 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8450 BB->getSinglePredecessor() == BB) {
8451 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8452 DeleteDeadBlock(BB, DTU);
8453 return true;
8454 }
8455
8456 // Check to see if we can constant propagate this terminator instruction
8457 // away...
8458 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8459 /*TLI=*/nullptr, DTU);
8460
8461 // Check for and eliminate duplicate PHI nodes in this block.
8463
8464 // Check for and remove branches that will always cause undefined behavior.
8466 return requestResimplify();
8467
8468 // Merge basic blocks into their predecessor if there is only one distinct
8469 // pred, and if there is only one distinct successor of the predecessor, and
8470 // if there are no PHI nodes.
8471 if (MergeBlockIntoPredecessor(BB, DTU))
8472 return true;
8473
8474 if (SinkCommon && Options.SinkCommonInsts)
8475 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8476 mergeCompatibleInvokes(BB, DTU)) {
8477 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8478 // so we may now how duplicate PHI's.
8479 // Let's rerun EliminateDuplicatePHINodes() first,
8480 // before foldTwoEntryPHINode() potentially converts them into select's,
8481 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8482 return true;
8483 }
8484
8485 IRBuilder<> Builder(BB);
8486
8487 if (Options.SpeculateBlocks &&
8488 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8489 // If there is a trivial two-entry PHI node in this basic block, and we can
8490 // eliminate it, do so now.
8491 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8492 if (PN->getNumIncomingValues() == 2)
8493 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8494 Options.SpeculateUnpredictables))
8495 return true;
8496 }
8497
8499 Builder.SetInsertPoint(Terminator);
8500 switch (Terminator->getOpcode()) {
8501 case Instruction::Br:
8502 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8503 break;
8504 case Instruction::Resume:
8505 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8506 break;
8507 case Instruction::CleanupRet:
8508 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8509 break;
8510 case Instruction::Switch:
8511 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8512 break;
8513 case Instruction::Unreachable:
8514 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8515 break;
8516 case Instruction::IndirectBr:
8517 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8518 break;
8519 }
8520
8521 return Changed;
8522}
8523
8524bool SimplifyCFGOpt::run(BasicBlock *BB) {
8525 bool Changed = false;
8526
8527 // Repeated simplify BB as long as resimplification is requested.
8528 do {
8529 Resimplify = false;
8530
8531 // Perform one round of simplifcation. Resimplify flag will be set if
8532 // another iteration is requested.
8533 Changed |= simplifyOnce(BB);
8534 } while (Resimplify);
8535
8536 return Changed;
8537}
8538
8541 ArrayRef<WeakVH> LoopHeaders) {
8542 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8543 Options)
8544 .run(BB);
8545}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL)
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1574
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition ArrayRef.h:156
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:934
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:984
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:767
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1120
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:220
bool isNegative() const
Definition Constants.h:209
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:193
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:256
static DebugLoc getTemporary()
Definition DebugLoc.h:161
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:183
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:170
static DebugLoc getDropped()
Definition DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
unsigned size() const
Definition DenseMap.h:108
iterator end()
Definition DenseMap.h:81
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:205
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:112
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2345
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2100
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2637
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1931
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1805
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1847
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1860
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2194
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2068
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2277
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1077
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:115
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:104
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
op_range operands()
Definition User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:201
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:256
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
auto successors(const MachineBasicBlock *BB)
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2056
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1757
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2108
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< uint32_t, 2 > &B1, const SmallVector< uint32_t, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1719
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3081
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3339
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1934
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3842
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2010
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2100
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1584
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2068
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:315
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:249