Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cstddef>
84#include <cstdint>
85#include <iterator>
86#include <map>
87#include <optional>
88#include <set>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace PatternMatch;
95
96#define DEBUG_TYPE "simplifycfg"
97
99 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
100
101 cl::desc(
102 "Temporary development switch used to gradually uplift SimplifyCFG "
103 "into preserving DomTree,"));
104
105// Chosen as 2 so as to be cheap, but still to have enough power to fold
106// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
107// To catch this, we need to fold a compare and a select, hence '2' being the
108// minimum reasonable default.
110 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
111 cl::desc(
112 "Control the amount of phi node folding to perform (default = 2)"));
113
115 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
116 cl::desc("Control the maximal total instruction cost that we are willing "
117 "to speculatively execute to fold a 2-entry PHI node into a "
118 "select (default = 4)"));
119
120static cl::opt<bool>
121 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
122 cl::desc("Hoist common instructions up to the parent block"));
123
125 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
126 cl::desc("Hoist loads if the target supports conditional faulting"));
127
129 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
130 cl::desc("Hoist stores if the target supports conditional faulting"));
131
133 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
134 cl::desc("Control the maximal conditional load/store that we are willing "
135 "to speculatively execute to eliminate conditional branch "
136 "(default = 6)"));
137
139 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
140 cl::init(20),
141 cl::desc("Allow reordering across at most this many "
142 "instructions when hoisting"));
143
144static cl::opt<bool>
145 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
146 cl::desc("Sink common instructions down to the end block"));
147
149 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
150 cl::desc("Hoist conditional stores if an unconditional store precedes"));
151
153 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
154 cl::desc("Hoist conditional stores even if an unconditional store does not "
155 "precede - hoist multiple conditional stores into a single "
156 "predicated store"));
157
159 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
160 cl::desc("When merging conditional stores, do so even if the resultant "
161 "basic blocks are unlikely to be if-converted as a result"));
162
164 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
165 cl::desc("Allow exactly one expensive instruction to be speculatively "
166 "executed"));
167
169 "max-speculation-depth", cl::Hidden, cl::init(10),
170 cl::desc("Limit maximum recursion depth when calculating costs of "
171 "speculatively executed instructions"));
172
173static cl::opt<int>
174 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
175 cl::init(10),
176 cl::desc("Max size of a block which is still considered "
177 "small enough to thread through"));
178
179// Two is chosen to allow one negation and a logical combine.
181 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
182 cl::init(2),
183 cl::desc("Maximum cost of combining conditions when "
184 "folding branches"));
185
187 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
188 cl::init(2),
189 cl::desc("Multiplier to apply to threshold when determining whether or not "
190 "to fold branch to common destination when vector operations are "
191 "present"));
192
194 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
195 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
196
198 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
199 cl::desc("Limit cases to analyze when converting a switch to select"));
200
202 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
203 cl::desc("Limit number of blocks a define in a threaded block is allowed "
204 "to be live in"));
205
207
208STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
209STATISTIC(NumLinearMaps,
210 "Number of switch instructions turned into linear mapping");
211STATISTIC(NumLookupTables,
212 "Number of switch instructions turned into lookup tables");
214 NumLookupTablesHoles,
215 "Number of switch instructions turned into lookup tables (holes checked)");
216STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
217STATISTIC(NumFoldValueComparisonIntoPredecessors,
218 "Number of value comparisons folded into predecessor basic blocks");
219STATISTIC(NumFoldBranchToCommonDest,
220 "Number of branches folded into predecessor basic block");
222 NumHoistCommonCode,
223 "Number of common instruction 'blocks' hoisted up to the begin block");
224STATISTIC(NumHoistCommonInstrs,
225 "Number of common instructions hoisted up to the begin block");
226STATISTIC(NumSinkCommonCode,
227 "Number of common instruction 'blocks' sunk down to the end block");
228STATISTIC(NumSinkCommonInstrs,
229 "Number of common instructions sunk down to the end block");
230STATISTIC(NumSpeculations, "Number of speculative executed instructions");
231STATISTIC(NumInvokes,
232 "Number of invokes with empty resume blocks simplified into calls");
233STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
234STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
235
236namespace {
237
238// The first field contains the value that the switch produces when a certain
239// case group is selected, and the second field is a vector containing the
240// cases composing the case group.
241using SwitchCaseResultVectorTy =
243
244// The first field contains the phi node that generates a result of the switch
245// and the second field contains the value generated for a certain case in the
246// switch for that PHI.
247using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
248
249/// ValueEqualityComparisonCase - Represents a case of a switch.
250struct ValueEqualityComparisonCase {
252 BasicBlock *Dest;
253
254 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
255 : Value(Value), Dest(Dest) {}
256
257 bool operator<(ValueEqualityComparisonCase RHS) const {
258 // Comparing pointers is ok as we only rely on the order for uniquing.
259 return Value < RHS.Value;
260 }
261
262 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
263};
264
265class SimplifyCFGOpt {
266 const TargetTransformInfo &TTI;
267 DomTreeUpdater *DTU;
268 const DataLayout &DL;
269 ArrayRef<WeakVH> LoopHeaders;
270 const SimplifyCFGOptions &Options;
271 bool Resimplify;
272
273 Value *isValueEqualityComparison(Instruction *TI);
274 BasicBlock *getValueEqualityComparisonCases(
275 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
276 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
277 BasicBlock *Pred,
278 IRBuilder<> &Builder);
279 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
280 Instruction *PTI,
281 IRBuilder<> &Builder);
282 bool foldValueComparisonIntoPredecessors(Instruction *TI,
283 IRBuilder<> &Builder);
284
285 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
286 bool simplifySingleResume(ResumeInst *RI);
287 bool simplifyCommonResume(ResumeInst *RI);
288 bool simplifyCleanupReturn(CleanupReturnInst *RI);
289 bool simplifyUnreachable(UnreachableInst *UI);
290 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
291 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
292 bool simplifyIndirectBr(IndirectBrInst *IBI);
293 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
294 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
295 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
296 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
297
298 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
299 IRBuilder<> &Builder);
300
301 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
302 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
303 Instruction *TI, Instruction *I1,
304 SmallVectorImpl<Instruction *> &OtherSuccTIs);
305 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
306 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
307 BasicBlock *TrueBB, BasicBlock *FalseBB,
308 uint32_t TrueWeight, uint32_t FalseWeight);
309 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
310 const DataLayout &DL);
311 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
312 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
313 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
314
315public:
316 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
317 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
318 const SimplifyCFGOptions &Opts)
319 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
320 assert((!DTU || !DTU->hasPostDomTree()) &&
321 "SimplifyCFG is not yet capable of maintaining validity of a "
322 "PostDomTree, so don't ask for it.");
323 }
324
325 bool simplifyOnce(BasicBlock *BB);
326 bool run(BasicBlock *BB);
327
328 // Helper to set Resimplify and return change indication.
329 bool requestResimplify() {
330 Resimplify = true;
331 return true;
332 }
333};
334
335// we synthesize a || b as select a, true, b
336// we synthesize a && b as select a, b, false
337// this function determines if SI is playing one of those roles.
338[[maybe_unused]] bool
339isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
340 return ((isa<ConstantInt>(SI->getTrueValue()) &&
341 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
342 (isa<ConstantInt>(SI->getFalseValue()) &&
343 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
344}
345
346} // end anonymous namespace
347
348/// Return true if all the PHI nodes in the basic block \p BB
349/// receive compatible (identical) incoming values when coming from
350/// all of the predecessor blocks that are specified in \p IncomingBlocks.
351///
352/// Note that if the values aren't exactly identical, but \p EquivalenceSet
353/// is provided, and *both* of the values are present in the set,
354/// then they are considered equal.
356 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
357 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
358 assert(IncomingBlocks.size() == 2 &&
359 "Only for a pair of incoming blocks at the time!");
360
361 // FIXME: it is okay if one of the incoming values is an `undef` value,
362 // iff the other incoming value is guaranteed to be a non-poison value.
363 // FIXME: it is okay if one of the incoming values is a `poison` value.
364 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
365 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
366 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
367 if (IV0 == IV1)
368 return true;
369 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
370 EquivalenceSet->contains(IV1))
371 return true;
372 return false;
373 });
374}
375
376/// Return true if it is safe to merge these two
377/// terminator instructions together.
378static bool
380 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
381 if (SI1 == SI2)
382 return false; // Can't merge with self!
383
384 // It is not safe to merge these two switch instructions if they have a common
385 // successor, and if that successor has a PHI node, and if *that* PHI node has
386 // conflicting incoming values from the two switch blocks.
387 BasicBlock *SI1BB = SI1->getParent();
388 BasicBlock *SI2BB = SI2->getParent();
389
391 bool Fail = false;
392 for (BasicBlock *Succ : successors(SI2BB)) {
393 if (!SI1Succs.count(Succ))
394 continue;
395 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
396 continue;
397 Fail = true;
398 if (FailBlocks)
399 FailBlocks->insert(Succ);
400 else
401 break;
402 }
403
404 return !Fail;
405}
406
407/// Update PHI nodes in Succ to indicate that there will now be entries in it
408/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
409/// will be the same as those coming in from ExistPred, an existing predecessor
410/// of Succ.
411static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
412 BasicBlock *ExistPred,
413 MemorySSAUpdater *MSSAU = nullptr) {
414 for (PHINode &PN : Succ->phis())
415 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
416 if (MSSAU)
417 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
418 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
419}
420
421/// Compute an abstract "cost" of speculating the given instruction,
422/// which is assumed to be safe to speculate. TCC_Free means cheap,
423/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
424/// expensive.
426 const TargetTransformInfo &TTI) {
427 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
428}
429
430/// If we have a merge point of an "if condition" as accepted above,
431/// return true if the specified value dominates the block. We don't handle
432/// the true generality of domination here, just a special case which works
433/// well enough for us.
434///
435/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
436/// see if V (which must be an instruction) and its recursive operands
437/// that do not dominate BB have a combined cost lower than Budget and
438/// are non-trapping. If both are true, the instruction is inserted into the
439/// set and true is returned.
440///
441/// The cost for most non-trapping instructions is defined as 1 except for
442/// Select whose cost is 2.
443///
444/// After this function returns, Cost is increased by the cost of
445/// V plus its non-dominating operands. If that cost is greater than
446/// Budget, false is returned and Cost is undefined.
448 Value *V, BasicBlock *BB, Instruction *InsertPt,
449 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
451 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
452 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
453 // so limit the recursion depth.
454 // TODO: While this recursion limit does prevent pathological behavior, it
455 // would be better to track visited instructions to avoid cycles.
457 return false;
458
460 if (!I) {
461 // Non-instructions dominate all instructions and can be executed
462 // unconditionally.
463 return true;
464 }
465 BasicBlock *PBB = I->getParent();
466
467 // We don't want to allow weird loops that might have the "if condition" in
468 // the bottom of this block.
469 if (PBB == BB)
470 return false;
471
472 // If this instruction is defined in a block that contains an unconditional
473 // branch to BB, then it must be in the 'conditional' part of the "if
474 // statement". If not, it definitely dominates the region.
476 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
477 return true;
478
479 // If we have seen this instruction before, don't count it again.
480 if (AggressiveInsts.count(I))
481 return true;
482
483 // Okay, it looks like the instruction IS in the "condition". Check to
484 // see if it's a cheap instruction to unconditionally compute, and if it
485 // only uses stuff defined outside of the condition. If so, hoist it out.
486 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
487 return false;
488
489 // Overflow arithmetic instruction plus extract value are usually generated
490 // when a division is being replaced. But, in this case, the zero check may
491 // still be kept in the code. In that case it would be worth to hoist these
492 // two instruction out of the basic block. Let's treat this pattern as one
493 // single cheap instruction here!
494 WithOverflowInst *OverflowInst;
495 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
496 ZeroCostInstructions.insert(OverflowInst);
497 Cost += 1;
498 } else if (!ZeroCostInstructions.contains(I))
499 Cost += computeSpeculationCost(I, TTI);
500
501 // Allow exactly one instruction to be speculated regardless of its cost
502 // (as long as it is safe to do so).
503 // This is intended to flatten the CFG even if the instruction is a division
504 // or other expensive operation. The speculation of an expensive instruction
505 // is expected to be undone in CodeGenPrepare if the speculation has not
506 // enabled further IR optimizations.
507 if (Cost > Budget &&
508 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
509 !Cost.isValid()))
510 return false;
511
512 // Okay, we can only really hoist these out if their operands do
513 // not take us over the cost threshold.
514 for (Use &Op : I->operands())
515 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
516 TTI, AC, ZeroCostInstructions, Depth + 1))
517 return false;
518 // Okay, it's safe to do this! Remember this instruction.
519 AggressiveInsts.insert(I);
520 return true;
521}
522
523/// Extract ConstantInt from value, looking through IntToPtr
524/// and PointerNullValue. Return NULL if value is not a constant int.
526 // Normal constant int.
528 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
529 return CI;
530
531 // It is not safe to look through inttoptr or ptrtoint when using unstable
532 // pointer types.
533 if (DL.hasUnstableRepresentation(V->getType()))
534 return nullptr;
535
536 // This is some kind of pointer constant. Turn it into a pointer-sized
537 // ConstantInt if possible.
538 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
539
540 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
542 return ConstantInt::get(IntPtrTy, 0);
543
544 // IntToPtr const int, we can look through this if the semantics of
545 // inttoptr for this address space are a simple (truncating) bitcast.
547 if (CE->getOpcode() == Instruction::IntToPtr)
548 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
549 // The constant is very likely to have the right type already.
550 if (CI->getType() == IntPtrTy)
551 return CI;
552 else
553 return cast<ConstantInt>(
554 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
555 }
556 return nullptr;
557}
558
559namespace {
560
561/// Given a chain of or (||) or and (&&) comparison of a value against a
562/// constant, this will try to recover the information required for a switch
563/// structure.
564/// It will depth-first traverse the chain of comparison, seeking for patterns
565/// like %a == 12 or %a < 4 and combine them to produce a set of integer
566/// representing the different cases for the switch.
567/// Note that if the chain is composed of '||' it will build the set of elements
568/// that matches the comparisons (i.e. any of this value validate the chain)
569/// while for a chain of '&&' it will build the set elements that make the test
570/// fail.
571struct ConstantComparesGatherer {
572 const DataLayout &DL;
573
574 /// Value found for the switch comparison
575 Value *CompValue = nullptr;
576
577 /// Extra clause to be checked before the switch
578 Value *Extra = nullptr;
579
580 /// Set of integers to match in switch
582
583 /// Number of comparisons matched in the and/or chain
584 unsigned UsedICmps = 0;
585
586 /// If the elements in Vals matches the comparisons
587 bool IsEq = false;
588
589 // Used to check if the first matched CompValue shall be the Extra check.
590 bool IgnoreFirstMatch = false;
591 bool MultipleMatches = false;
592
593 /// Construct and compute the result for the comparison instruction Cond
594 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
595 gather(Cond);
596 if (CompValue || !MultipleMatches)
597 return;
598 Extra = nullptr;
599 Vals.clear();
600 UsedICmps = 0;
601 IgnoreFirstMatch = true;
602 gather(Cond);
603 }
604
605 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
606 ConstantComparesGatherer &
607 operator=(const ConstantComparesGatherer &) = delete;
608
609private:
610 /// Try to set the current value used for the comparison, it succeeds only if
611 /// it wasn't set before or if the new value is the same as the old one
612 bool setValueOnce(Value *NewVal) {
613 if (IgnoreFirstMatch) {
614 IgnoreFirstMatch = false;
615 return false;
616 }
617 if (CompValue && CompValue != NewVal) {
618 MultipleMatches = true;
619 return false;
620 }
621 CompValue = NewVal;
622 return true;
623 }
624
625 /// Try to match Instruction "I" as a comparison against a constant and
626 /// populates the array Vals with the set of values that match (or do not
627 /// match depending on isEQ).
628 /// Return false on failure. On success, the Value the comparison matched
629 /// against is placed in CompValue.
630 /// If CompValue is already set, the function is expected to fail if a match
631 /// is found but the value compared to is different.
632 bool matchInstruction(Instruction *I, bool isEQ) {
633 if (match(I, m_Not(m_Instruction(I))))
634 isEQ = !isEQ;
635
636 Value *Val;
637 if (match(I, m_NUWTrunc(m_Value(Val)))) {
638 // If we already have a value for the switch, it has to match!
639 if (!setValueOnce(Val))
640 return false;
641 UsedICmps++;
642 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
643 return true;
644 }
645 // If this is an icmp against a constant, handle this as one of the cases.
646 ICmpInst *ICI;
647 ConstantInt *C;
648 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
649 (C = getConstantInt(I->getOperand(1), DL)))) {
650 return false;
651 }
652
653 Value *RHSVal;
654 const APInt *RHSC;
655
656 // Pattern match a special case
657 // (x & ~2^z) == y --> x == y || x == y|2^z
658 // This undoes a transformation done by instcombine to fuse 2 compares.
659 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
660 // It's a little bit hard to see why the following transformations are
661 // correct. Here is a CVC3 program to verify them for 64-bit values:
662
663 /*
664 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
665 x : BITVECTOR(64);
666 y : BITVECTOR(64);
667 z : BITVECTOR(64);
668 mask : BITVECTOR(64) = BVSHL(ONE, z);
669 QUERY( (y & ~mask = y) =>
670 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
671 );
672 QUERY( (y | mask = y) =>
673 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
674 );
675 */
676
677 // Please note that each pattern must be a dual implication (<--> or
678 // iff). One directional implication can create spurious matches. If the
679 // implication is only one-way, an unsatisfiable condition on the left
680 // side can imply a satisfiable condition on the right side. Dual
681 // implication ensures that satisfiable conditions are transformed to
682 // other satisfiable conditions and unsatisfiable conditions are
683 // transformed to other unsatisfiable conditions.
684
685 // Here is a concrete example of a unsatisfiable condition on the left
686 // implying a satisfiable condition on the right:
687 //
688 // mask = (1 << z)
689 // (x & ~mask) == y --> (x == y || x == (y | mask))
690 //
691 // Substituting y = 3, z = 0 yields:
692 // (x & -2) == 3 --> (x == 3 || x == 2)
693
694 // Pattern match a special case:
695 /*
696 QUERY( (y & ~mask = y) =>
697 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
698 );
699 */
700 if (match(ICI->getOperand(0),
701 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
702 APInt Mask = ~*RHSC;
703 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
704 // If we already have a value for the switch, it has to match!
705 if (!setValueOnce(RHSVal))
706 return false;
707
708 Vals.push_back(C);
709 Vals.push_back(
710 ConstantInt::get(C->getContext(),
711 C->getValue() | Mask));
712 UsedICmps++;
713 return true;
714 }
715 }
716
717 // Pattern match a special case:
718 /*
719 QUERY( (y | mask = y) =>
720 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
721 );
722 */
723 if (match(ICI->getOperand(0),
724 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
725 APInt Mask = *RHSC;
726 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
727 // If we already have a value for the switch, it has to match!
728 if (!setValueOnce(RHSVal))
729 return false;
730
731 Vals.push_back(C);
732 Vals.push_back(ConstantInt::get(C->getContext(),
733 C->getValue() & ~Mask));
734 UsedICmps++;
735 return true;
736 }
737 }
738
739 // If we already have a value for the switch, it has to match!
740 if (!setValueOnce(ICI->getOperand(0)))
741 return false;
742
743 UsedICmps++;
744 Vals.push_back(C);
745 return true;
746 }
747
748 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
749 ConstantRange Span =
751
752 // Shift the range if the compare is fed by an add. This is the range
753 // compare idiom as emitted by instcombine.
754 Value *CandidateVal = I->getOperand(0);
755 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
756 Span = Span.subtract(*RHSC);
757 CandidateVal = RHSVal;
758 }
759
760 // If this is an and/!= check, then we are looking to build the set of
761 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
762 // x != 0 && x != 1.
763 if (!isEQ)
764 Span = Span.inverse();
765
766 // If there are a ton of values, we don't want to make a ginormous switch.
767 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
768 return false;
769 }
770
771 // If we already have a value for the switch, it has to match!
772 if (!setValueOnce(CandidateVal))
773 return false;
774
775 // Add all values from the range to the set
776 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
777 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
778
779 UsedICmps++;
780 return true;
781 }
782
783 /// Given a potentially 'or'd or 'and'd together collection of icmp
784 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
785 /// the value being compared, and stick the list constants into the Vals
786 /// vector.
787 /// One "Extra" case is allowed to differ from the other.
788 void gather(Value *V) {
789 Value *Op0, *Op1;
790 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
791 IsEq = true;
792 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
793 IsEq = false;
794 else
795 return;
796 // Keep a stack (SmallVector for efficiency) for depth-first traversal
797 SmallVector<Value *, 8> DFT{Op0, Op1};
798 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
799
800 while (!DFT.empty()) {
801 V = DFT.pop_back_val();
802
803 if (Instruction *I = dyn_cast<Instruction>(V)) {
804 // If it is a || (or && depending on isEQ), process the operands.
805 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
806 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
807 if (Visited.insert(Op1).second)
808 DFT.push_back(Op1);
809 if (Visited.insert(Op0).second)
810 DFT.push_back(Op0);
811
812 continue;
813 }
814
815 // Try to match the current instruction
816 if (matchInstruction(I, IsEq))
817 // Match succeed, continue the loop
818 continue;
819 }
820
821 // One element of the sequence of || (or &&) could not be match as a
822 // comparison against the same value as the others.
823 // We allow only one "Extra" case to be checked before the switch
824 if (!Extra) {
825 Extra = V;
826 continue;
827 }
828 // Failed to parse a proper sequence, abort now
829 CompValue = nullptr;
830 break;
831 }
832 }
833};
834
835} // end anonymous namespace
836
838 MemorySSAUpdater *MSSAU = nullptr) {
839 Instruction *Cond = nullptr;
841 Cond = dyn_cast<Instruction>(SI->getCondition());
842 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
843 if (BI->isConditional())
844 Cond = dyn_cast<Instruction>(BI->getCondition());
845 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
846 Cond = dyn_cast<Instruction>(IBI->getAddress());
847 }
848
849 TI->eraseFromParent();
850 if (Cond)
852}
853
854/// Return true if the specified terminator checks
855/// to see if a value is equal to constant integer value.
856Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
857 Value *CV = nullptr;
858 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
859 // Do not permit merging of large switch instructions into their
860 // predecessors unless there is only one predecessor.
861 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
862 CV = SI->getCondition();
863 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
864 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
865 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
866 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
867 CV = ICI->getOperand(0);
868 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
869 if (Trunc->hasNoUnsignedWrap())
870 CV = Trunc->getOperand(0);
871 }
872 }
873
874 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
875 if (CV) {
876 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
877 Value *Ptr = PTII->getPointerOperand();
878 if (DL.hasUnstableRepresentation(Ptr->getType()))
879 return CV;
880 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
881 CV = Ptr;
882 }
883 }
884 return CV;
885}
886
887/// Given a value comparison instruction,
888/// decode all of the 'cases' that it represents and return the 'default' block.
889BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
890 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
891 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
892 Cases.reserve(SI->getNumCases());
893 for (auto Case : SI->cases())
894 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
895 Case.getCaseSuccessor()));
896 return SI->getDefaultDest();
897 }
898
899 BranchInst *BI = cast<BranchInst>(TI);
900 Value *Cond = BI->getCondition();
901 ICmpInst::Predicate Pred;
902 ConstantInt *C;
903 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
904 Pred = ICI->getPredicate();
905 C = getConstantInt(ICI->getOperand(1), DL);
906 } else {
907 Pred = ICmpInst::ICMP_NE;
908 auto *Trunc = cast<TruncInst>(Cond);
909 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
910 }
911 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
912 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
913 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
914}
915
916/// Given a vector of bb/value pairs, remove any entries
917/// in the list that match the specified block.
918static void
920 std::vector<ValueEqualityComparisonCase> &Cases) {
921 llvm::erase(Cases, BB);
922}
923
924/// Return true if there are any keys in C1 that exist in C2 as well.
925static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
926 std::vector<ValueEqualityComparisonCase> &C2) {
927 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
928
929 // Make V1 be smaller than V2.
930 if (V1->size() > V2->size())
931 std::swap(V1, V2);
932
933 if (V1->empty())
934 return false;
935 if (V1->size() == 1) {
936 // Just scan V2.
937 ConstantInt *TheVal = (*V1)[0].Value;
938 for (const ValueEqualityComparisonCase &VECC : *V2)
939 if (TheVal == VECC.Value)
940 return true;
941 }
942
943 // Otherwise, just sort both lists and compare element by element.
944 array_pod_sort(V1->begin(), V1->end());
945 array_pod_sort(V2->begin(), V2->end());
946 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
947 while (i1 != e1 && i2 != e2) {
948 if ((*V1)[i1].Value == (*V2)[i2].Value)
949 return true;
950 if ((*V1)[i1].Value < (*V2)[i2].Value)
951 ++i1;
952 else
953 ++i2;
954 }
955 return false;
956}
957
958/// If TI is known to be a terminator instruction and its block is known to
959/// only have a single predecessor block, check to see if that predecessor is
960/// also a value comparison with the same value, and if that comparison
961/// determines the outcome of this comparison. If so, simplify TI. This does a
962/// very limited form of jump threading.
963bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
964 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
965 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
966 if (!PredVal)
967 return false; // Not a value comparison in predecessor.
968
969 Value *ThisVal = isValueEqualityComparison(TI);
970 assert(ThisVal && "This isn't a value comparison!!");
971 if (ThisVal != PredVal)
972 return false; // Different predicates.
973
974 // TODO: Preserve branch weight metadata, similarly to how
975 // foldValueComparisonIntoPredecessors preserves it.
976
977 // Find out information about when control will move from Pred to TI's block.
978 std::vector<ValueEqualityComparisonCase> PredCases;
979 BasicBlock *PredDef =
980 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
981 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
982
983 // Find information about how control leaves this block.
984 std::vector<ValueEqualityComparisonCase> ThisCases;
985 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
986 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
987
988 // If TI's block is the default block from Pred's comparison, potentially
989 // simplify TI based on this knowledge.
990 if (PredDef == TI->getParent()) {
991 // If we are here, we know that the value is none of those cases listed in
992 // PredCases. If there are any cases in ThisCases that are in PredCases, we
993 // can simplify TI.
994 if (!valuesOverlap(PredCases, ThisCases))
995 return false;
996
997 if (isa<BranchInst>(TI)) {
998 // Okay, one of the successors of this condbr is dead. Convert it to a
999 // uncond br.
1000 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1001 // Insert the new branch.
1002 Instruction *NI = Builder.CreateBr(ThisDef);
1003 (void)NI;
1004
1005 // Remove PHI node entries for the dead edge.
1006 ThisCases[0].Dest->removePredecessor(PredDef);
1007
1008 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1009 << "Through successor TI: " << *TI << "Leaving: " << *NI
1010 << "\n");
1011
1013
1014 if (DTU)
1015 DTU->applyUpdates(
1016 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1017
1018 return true;
1019 }
1020
1021 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1022 // Okay, TI has cases that are statically dead, prune them away.
1023 SmallPtrSet<Constant *, 16> DeadCases;
1024 for (const ValueEqualityComparisonCase &Case : PredCases)
1025 DeadCases.insert(Case.Value);
1026
1027 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1028 << "Through successor TI: " << *TI);
1029
1030 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1031 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1032 --i;
1033 auto *Successor = i->getCaseSuccessor();
1034 if (DTU)
1035 ++NumPerSuccessorCases[Successor];
1036 if (DeadCases.count(i->getCaseValue())) {
1037 Successor->removePredecessor(PredDef);
1038 SI.removeCase(i);
1039 if (DTU)
1040 --NumPerSuccessorCases[Successor];
1041 }
1042 }
1043
1044 if (DTU) {
1045 std::vector<DominatorTree::UpdateType> Updates;
1046 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1047 if (I.second == 0)
1048 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1049 DTU->applyUpdates(Updates);
1050 }
1051
1052 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1053 return true;
1054 }
1055
1056 // Otherwise, TI's block must correspond to some matched value. Find out
1057 // which value (or set of values) this is.
1058 ConstantInt *TIV = nullptr;
1059 BasicBlock *TIBB = TI->getParent();
1060 for (const auto &[Value, Dest] : PredCases)
1061 if (Dest == TIBB) {
1062 if (TIV)
1063 return false; // Cannot handle multiple values coming to this block.
1064 TIV = Value;
1065 }
1066 assert(TIV && "No edge from pred to succ?");
1067
1068 // Okay, we found the one constant that our value can be if we get into TI's
1069 // BB. Find out which successor will unconditionally be branched to.
1070 BasicBlock *TheRealDest = nullptr;
1071 for (const auto &[Value, Dest] : ThisCases)
1072 if (Value == TIV) {
1073 TheRealDest = Dest;
1074 break;
1075 }
1076
1077 // If not handled by any explicit cases, it is handled by the default case.
1078 if (!TheRealDest)
1079 TheRealDest = ThisDef;
1080
1081 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1082
1083 // Remove PHI node entries for dead edges.
1084 BasicBlock *CheckEdge = TheRealDest;
1085 for (BasicBlock *Succ : successors(TIBB))
1086 if (Succ != CheckEdge) {
1087 if (Succ != TheRealDest)
1088 RemovedSuccs.insert(Succ);
1089 Succ->removePredecessor(TIBB);
1090 } else
1091 CheckEdge = nullptr;
1092
1093 // Insert the new branch.
1094 Instruction *NI = Builder.CreateBr(TheRealDest);
1095 (void)NI;
1096
1097 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1098 << "Through successor TI: " << *TI << "Leaving: " << *NI
1099 << "\n");
1100
1102 if (DTU) {
1103 SmallVector<DominatorTree::UpdateType, 2> Updates;
1104 Updates.reserve(RemovedSuccs.size());
1105 for (auto *RemovedSucc : RemovedSuccs)
1106 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1107 DTU->applyUpdates(Updates);
1108 }
1109 return true;
1110}
1111
1112namespace {
1113
1114/// This class implements a stable ordering of constant
1115/// integers that does not depend on their address. This is important for
1116/// applications that sort ConstantInt's to ensure uniqueness.
1117struct ConstantIntOrdering {
1118 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1119 return LHS->getValue().ult(RHS->getValue());
1120 }
1121};
1122
1123} // end anonymous namespace
1124
1126 ConstantInt *const *P2) {
1127 const ConstantInt *LHS = *P1;
1128 const ConstantInt *RHS = *P2;
1129 if (LHS == RHS)
1130 return 0;
1131 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1132}
1133
1134/// Get Weights of a given terminator, the default weight is at the front
1135/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1136/// metadata.
1138 SmallVectorImpl<uint64_t> &Weights) {
1139 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1140 assert(MD && "Invalid branch-weight metadata");
1141 extractFromBranchWeightMD64(MD, Weights);
1142
1143 // If TI is a conditional eq, the default case is the false case,
1144 // and the corresponding branch-weight data is at index 2. We swap the
1145 // default weight to be the first entry.
1146 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1147 assert(Weights.size() == 2);
1148 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1149 if (!ICI)
1150 return;
1151
1152 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1153 std::swap(Weights.front(), Weights.back());
1154 }
1155}
1156
1158 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1159 Instruction *PTI = PredBlock->getTerminator();
1160
1161 // If we have bonus instructions, clone them into the predecessor block.
1162 // Note that there may be multiple predecessor blocks, so we cannot move
1163 // bonus instructions to a predecessor block.
1164 for (Instruction &BonusInst : *BB) {
1165 if (BonusInst.isTerminator())
1166 continue;
1167
1168 Instruction *NewBonusInst = BonusInst.clone();
1169
1170 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1171 // Unless the instruction has the same !dbg location as the original
1172 // branch, drop it. When we fold the bonus instructions we want to make
1173 // sure we reset their debug locations in order to avoid stepping on
1174 // dead code caused by folding dead branches.
1175 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1176 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1177 mapAtomInstance(DL, VMap);
1178 }
1179
1180 RemapInstruction(NewBonusInst, VMap,
1182
1183 // If we speculated an instruction, we need to drop any metadata that may
1184 // result in undefined behavior, as the metadata might have been valid
1185 // only given the branch precondition.
1186 // Similarly strip attributes on call parameters that may cause UB in
1187 // location the call is moved to.
1188 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1189
1190 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1191 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1192 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1194
1195 NewBonusInst->takeName(&BonusInst);
1196 BonusInst.setName(NewBonusInst->getName() + ".old");
1197 VMap[&BonusInst] = NewBonusInst;
1198
1199 // Update (liveout) uses of bonus instructions,
1200 // now that the bonus instruction has been cloned into predecessor.
1201 // Note that we expect to be in a block-closed SSA form for this to work!
1202 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1203 auto *UI = cast<Instruction>(U.getUser());
1204 auto *PN = dyn_cast<PHINode>(UI);
1205 if (!PN) {
1206 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1207 "If the user is not a PHI node, then it should be in the same "
1208 "block as, and come after, the original bonus instruction.");
1209 continue; // Keep using the original bonus instruction.
1210 }
1211 // Is this the block-closed SSA form PHI node?
1212 if (PN->getIncomingBlock(U) == BB)
1213 continue; // Great, keep using the original bonus instruction.
1214 // The only other alternative is an "use" when coming from
1215 // the predecessor block - here we should refer to the cloned bonus instr.
1216 assert(PN->getIncomingBlock(U) == PredBlock &&
1217 "Not in block-closed SSA form?");
1218 U.set(NewBonusInst);
1219 }
1220 }
1221
1222 // Key Instructions: We may have propagated atom info into the pred. If the
1223 // pred's terminator already has atom info do nothing as merging would drop
1224 // one atom group anyway. If it doesn't, propagte the remapped atom group
1225 // from BB's terminator.
1226 if (auto &PredDL = PTI->getDebugLoc()) {
1227 auto &DL = BB->getTerminator()->getDebugLoc();
1228 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1229 PredDL.isSameSourceLocation(DL)) {
1230 PTI->setDebugLoc(DL);
1231 RemapSourceAtom(PTI, VMap);
1232 }
1233 }
1234}
1235
1236bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1237 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1238 BasicBlock *BB = TI->getParent();
1239 BasicBlock *Pred = PTI->getParent();
1240
1242
1243 // Figure out which 'cases' to copy from SI to PSI.
1244 std::vector<ValueEqualityComparisonCase> BBCases;
1245 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1246
1247 std::vector<ValueEqualityComparisonCase> PredCases;
1248 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1249
1250 // Based on whether the default edge from PTI goes to BB or not, fill in
1251 // PredCases and PredDefault with the new switch cases we would like to
1252 // build.
1253 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1254
1255 // Update the branch weight metadata along the way
1256 SmallVector<uint64_t, 8> Weights;
1257 bool PredHasWeights = hasBranchWeightMD(*PTI);
1258 bool SuccHasWeights = hasBranchWeightMD(*TI);
1259
1260 if (PredHasWeights) {
1261 getBranchWeights(PTI, Weights);
1262 // branch-weight metadata is inconsistent here.
1263 if (Weights.size() != 1 + PredCases.size())
1264 PredHasWeights = SuccHasWeights = false;
1265 } else if (SuccHasWeights)
1266 // If there are no predecessor weights but there are successor weights,
1267 // populate Weights with 1, which will later be scaled to the sum of
1268 // successor's weights
1269 Weights.assign(1 + PredCases.size(), 1);
1270
1271 SmallVector<uint64_t, 8> SuccWeights;
1272 if (SuccHasWeights) {
1273 getBranchWeights(TI, SuccWeights);
1274 // branch-weight metadata is inconsistent here.
1275 if (SuccWeights.size() != 1 + BBCases.size())
1276 PredHasWeights = SuccHasWeights = false;
1277 } else if (PredHasWeights)
1278 SuccWeights.assign(1 + BBCases.size(), 1);
1279
1280 if (PredDefault == BB) {
1281 // If this is the default destination from PTI, only the edges in TI
1282 // that don't occur in PTI, or that branch to BB will be activated.
1283 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1284 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1285 if (PredCases[i].Dest != BB)
1286 PTIHandled.insert(PredCases[i].Value);
1287 else {
1288 // The default destination is BB, we don't need explicit targets.
1289 std::swap(PredCases[i], PredCases.back());
1290
1291 if (PredHasWeights || SuccHasWeights) {
1292 // Increase weight for the default case.
1293 Weights[0] += Weights[i + 1];
1294 std::swap(Weights[i + 1], Weights.back());
1295 Weights.pop_back();
1296 }
1297
1298 PredCases.pop_back();
1299 --i;
1300 --e;
1301 }
1302
1303 // Reconstruct the new switch statement we will be building.
1304 if (PredDefault != BBDefault) {
1305 PredDefault->removePredecessor(Pred);
1306 if (DTU && PredDefault != BB)
1307 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1308 PredDefault = BBDefault;
1309 ++NewSuccessors[BBDefault];
1310 }
1311
1312 unsigned CasesFromPred = Weights.size();
1313 uint64_t ValidTotalSuccWeight = 0;
1314 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1315 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1316 PredCases.push_back(BBCases[i]);
1317 ++NewSuccessors[BBCases[i].Dest];
1318 if (SuccHasWeights || PredHasWeights) {
1319 // The default weight is at index 0, so weight for the ith case
1320 // should be at index i+1. Scale the cases from successor by
1321 // PredDefaultWeight (Weights[0]).
1322 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1323 ValidTotalSuccWeight += SuccWeights[i + 1];
1324 }
1325 }
1326
1327 if (SuccHasWeights || PredHasWeights) {
1328 ValidTotalSuccWeight += SuccWeights[0];
1329 // Scale the cases from predecessor by ValidTotalSuccWeight.
1330 for (unsigned i = 1; i < CasesFromPred; ++i)
1331 Weights[i] *= ValidTotalSuccWeight;
1332 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1333 Weights[0] *= SuccWeights[0];
1334 }
1335 } else {
1336 // If this is not the default destination from PSI, only the edges
1337 // in SI that occur in PSI with a destination of BB will be
1338 // activated.
1339 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1340 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1341 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1342 if (PredCases[i].Dest == BB) {
1343 PTIHandled.insert(PredCases[i].Value);
1344
1345 if (PredHasWeights || SuccHasWeights) {
1346 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1347 std::swap(Weights[i + 1], Weights.back());
1348 Weights.pop_back();
1349 }
1350
1351 std::swap(PredCases[i], PredCases.back());
1352 PredCases.pop_back();
1353 --i;
1354 --e;
1355 }
1356
1357 // Okay, now we know which constants were sent to BB from the
1358 // predecessor. Figure out where they will all go now.
1359 for (const ValueEqualityComparisonCase &Case : BBCases)
1360 if (PTIHandled.count(Case.Value)) {
1361 // If this is one we are capable of getting...
1362 if (PredHasWeights || SuccHasWeights)
1363 Weights.push_back(WeightsForHandled[Case.Value]);
1364 PredCases.push_back(Case);
1365 ++NewSuccessors[Case.Dest];
1366 PTIHandled.erase(Case.Value); // This constant is taken care of
1367 }
1368
1369 // If there are any constants vectored to BB that TI doesn't handle,
1370 // they must go to the default destination of TI.
1371 for (ConstantInt *I : PTIHandled) {
1372 if (PredHasWeights || SuccHasWeights)
1373 Weights.push_back(WeightsForHandled[I]);
1374 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1375 ++NewSuccessors[BBDefault];
1376 }
1377 }
1378
1379 // Okay, at this point, we know which new successor Pred will get. Make
1380 // sure we update the number of entries in the PHI nodes for these
1381 // successors.
1382 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1383 if (DTU) {
1384 SuccsOfPred = {llvm::from_range, successors(Pred)};
1385 Updates.reserve(Updates.size() + NewSuccessors.size());
1386 }
1387 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1388 NewSuccessors) {
1389 for (auto I : seq(NewSuccessor.second)) {
1390 (void)I;
1391 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1392 }
1393 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1394 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1395 }
1396
1397 Builder.SetInsertPoint(PTI);
1398 // Convert pointer to int before we switch.
1399 if (CV->getType()->isPointerTy()) {
1400 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1401 "Should not end up here with unstable pointers");
1402 CV =
1403 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1404 }
1405
1406 // Now that the successors are updated, create the new Switch instruction.
1407 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1408 NewSI->setDebugLoc(PTI->getDebugLoc());
1409 for (ValueEqualityComparisonCase &V : PredCases)
1410 NewSI->addCase(V.Value, V.Dest);
1411
1412 if (PredHasWeights || SuccHasWeights)
1413 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1414 /*ElideAllZero=*/true);
1415
1417
1418 // Okay, last check. If BB is still a successor of PSI, then we must
1419 // have an infinite loop case. If so, add an infinitely looping block
1420 // to handle the case to preserve the behavior of the code.
1421 BasicBlock *InfLoopBlock = nullptr;
1422 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1423 if (NewSI->getSuccessor(i) == BB) {
1424 if (!InfLoopBlock) {
1425 // Insert it at the end of the function, because it's either code,
1426 // or it won't matter if it's hot. :)
1427 InfLoopBlock =
1428 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1429 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1430 if (DTU)
1431 Updates.push_back(
1432 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1433 }
1434 NewSI->setSuccessor(i, InfLoopBlock);
1435 }
1436
1437 if (DTU) {
1438 if (InfLoopBlock)
1439 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1440
1441 Updates.push_back({DominatorTree::Delete, Pred, BB});
1442
1443 DTU->applyUpdates(Updates);
1444 }
1445
1446 ++NumFoldValueComparisonIntoPredecessors;
1447 return true;
1448}
1449
1450/// The specified terminator is a value equality comparison instruction
1451/// (either a switch or a branch on "X == c").
1452/// See if any of the predecessors of the terminator block are value comparisons
1453/// on the same value. If so, and if safe to do so, fold them together.
1454bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1455 IRBuilder<> &Builder) {
1456 BasicBlock *BB = TI->getParent();
1457 Value *CV = isValueEqualityComparison(TI); // CondVal
1458 assert(CV && "Not a comparison?");
1459
1460 bool Changed = false;
1461
1462 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1463 while (!Preds.empty()) {
1464 BasicBlock *Pred = Preds.pop_back_val();
1465 Instruction *PTI = Pred->getTerminator();
1466
1467 // Don't try to fold into itself.
1468 if (Pred == BB)
1469 continue;
1470
1471 // See if the predecessor is a comparison with the same value.
1472 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1473 if (PCV != CV)
1474 continue;
1475
1476 SmallSetVector<BasicBlock *, 4> FailBlocks;
1477 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1478 for (auto *Succ : FailBlocks) {
1479 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1480 return false;
1481 }
1482 }
1483
1484 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1485 Changed = true;
1486 }
1487 return Changed;
1488}
1489
1490// If we would need to insert a select that uses the value of this invoke
1491// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1492// need to do this), we can't hoist the invoke, as there is nowhere to put the
1493// select in this case.
1495 Instruction *I1, Instruction *I2) {
1496 for (BasicBlock *Succ : successors(BB1)) {
1497 for (const PHINode &PN : Succ->phis()) {
1498 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1499 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1500 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1501 return false;
1502 }
1503 }
1504 }
1505 return true;
1506}
1507
1508// Get interesting characteristics of instructions that
1509// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1510// instructions can be reordered across.
1516
1518 unsigned Flags = 0;
1519 if (I->mayReadFromMemory())
1520 Flags |= SkipReadMem;
1521 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1522 // inalloca) across stacksave/stackrestore boundaries.
1523 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1524 Flags |= SkipSideEffect;
1526 Flags |= SkipImplicitControlFlow;
1527 return Flags;
1528}
1529
1530// Returns true if it is safe to reorder an instruction across preceding
1531// instructions in a basic block.
1532static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1533 // Don't reorder a store over a load.
1534 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1535 return false;
1536
1537 // If we have seen an instruction with side effects, it's unsafe to reorder an
1538 // instruction which reads memory or itself has side effects.
1539 if ((Flags & SkipSideEffect) &&
1540 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1541 return false;
1542
1543 // Reordering across an instruction which does not necessarily transfer
1544 // control to the next instruction is speculation.
1546 return false;
1547
1548 // Hoisting of llvm.deoptimize is only legal together with the next return
1549 // instruction, which this pass is not always able to do.
1550 if (auto *CB = dyn_cast<CallBase>(I))
1551 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1552 return false;
1553
1554 // It's also unsafe/illegal to hoist an instruction above its instruction
1555 // operands
1556 BasicBlock *BB = I->getParent();
1557 for (Value *Op : I->operands()) {
1558 if (auto *J = dyn_cast<Instruction>(Op))
1559 if (J->getParent() == BB)
1560 return false;
1561 }
1562
1563 return true;
1564}
1565
1566static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1567
1568/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1569/// instructions \p I1 and \p I2 can and should be hoisted.
1571 const TargetTransformInfo &TTI) {
1572 // If we're going to hoist a call, make sure that the two instructions
1573 // we're commoning/hoisting are both marked with musttail, or neither of
1574 // them is marked as such. Otherwise, we might end up in a situation where
1575 // we hoist from a block where the terminator is a `ret` to a block where
1576 // the terminator is a `br`, and `musttail` calls expect to be followed by
1577 // a return.
1578 auto *C1 = dyn_cast<CallInst>(I1);
1579 auto *C2 = dyn_cast<CallInst>(I2);
1580 if (C1 && C2)
1581 if (C1->isMustTailCall() != C2->isMustTailCall())
1582 return false;
1583
1584 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1585 return false;
1586
1587 // If any of the two call sites has nomerge or convergent attribute, stop
1588 // hoisting.
1589 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1590 if (CB1->cannotMerge() || CB1->isConvergent())
1591 return false;
1592 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1593 if (CB2->cannotMerge() || CB2->isConvergent())
1594 return false;
1595
1596 return true;
1597}
1598
1599/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1600/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1601/// hoistCommonCodeFromSuccessors. e.g. The input:
1602/// I1 DVRs: { x, z },
1603/// OtherInsts: { I2 DVRs: { x, y, z } }
1604/// would result in hoisting only DbgVariableRecord x.
1606 Instruction *TI, Instruction *I1,
1607 SmallVectorImpl<Instruction *> &OtherInsts) {
1608 if (!I1->hasDbgRecords())
1609 return;
1610 using CurrentAndEndIt =
1611 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1612 // Vector of {Current, End} iterators.
1614 Itrs.reserve(OtherInsts.size() + 1);
1615 // Helper lambdas for lock-step checks:
1616 // Return true if this Current == End.
1617 auto atEnd = [](const CurrentAndEndIt &Pair) {
1618 return Pair.first == Pair.second;
1619 };
1620 // Return true if all Current are identical.
1621 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1622 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1624 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1625 });
1626 };
1627
1628 // Collect the iterators.
1629 Itrs.push_back(
1630 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1631 for (Instruction *Other : OtherInsts) {
1632 if (!Other->hasDbgRecords())
1633 return;
1634 Itrs.push_back(
1635 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1636 }
1637
1638 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1639 // the lock-step DbgRecord are identical, hoist all of them to TI.
1640 // This replicates the dbg.* intrinsic behaviour in
1641 // hoistCommonCodeFromSuccessors.
1642 while (none_of(Itrs, atEnd)) {
1643 bool HoistDVRs = allIdentical(Itrs);
1644 for (CurrentAndEndIt &Pair : Itrs) {
1645 // Increment Current iterator now as we may be about to move the
1646 // DbgRecord.
1647 DbgRecord &DR = *Pair.first++;
1648 if (HoistDVRs) {
1649 DR.removeFromParent();
1650 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1651 }
1652 }
1653 }
1654}
1655
1657 const Instruction *I2) {
1658 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1659 return true;
1660
1661 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1662 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1663 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1664 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1665 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1666
1667 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1668 return I1->getOperand(0) == I2->getOperand(1) &&
1669 I1->getOperand(1) == I2->getOperand(0) &&
1670 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1671 }
1672
1673 return false;
1674}
1675
1676/// If the target supports conditional faulting,
1677/// we look for the following pattern:
1678/// \code
1679/// BB:
1680/// ...
1681/// %cond = icmp ult %x, %y
1682/// br i1 %cond, label %TrueBB, label %FalseBB
1683/// FalseBB:
1684/// store i32 1, ptr %q, align 4
1685/// ...
1686/// TrueBB:
1687/// %maskedloadstore = load i32, ptr %b, align 4
1688/// store i32 %maskedloadstore, ptr %p, align 4
1689/// ...
1690/// \endcode
1691///
1692/// and transform it into:
1693///
1694/// \code
1695/// BB:
1696/// ...
1697/// %cond = icmp ult %x, %y
1698/// %maskedloadstore = cload i32, ptr %b, %cond
1699/// cstore i32 %maskedloadstore, ptr %p, %cond
1700/// cstore i32 1, ptr %q, ~%cond
1701/// br i1 %cond, label %TrueBB, label %FalseBB
1702/// FalseBB:
1703/// ...
1704/// TrueBB:
1705/// ...
1706/// \endcode
1707///
1708/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1709/// e.g.
1710///
1711/// \code
1712/// %vcond = bitcast i1 %cond to <1 x i1>
1713/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1714/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1715/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1716/// call void @llvm.masked.store.v1i32.p0
1717/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1718/// %cond.not = xor i1 %cond, true
1719/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1720/// call void @llvm.masked.store.v1i32.p0
1721/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1722/// \endcode
1723///
1724/// So we need to turn hoisted load/store into cload/cstore.
1725///
1726/// \param BI The branch instruction.
1727/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1728/// will be speculated.
1729/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1731 BranchInst *BI,
1732 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1733 std::optional<bool> Invert, Instruction *Sel) {
1734 auto &Context = BI->getParent()->getContext();
1735 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1736 auto *Cond = BI->getOperand(0);
1737 // Construct the condition if needed.
1738 BasicBlock *BB = BI->getParent();
1739 Value *Mask = nullptr;
1740 Value *MaskFalse = nullptr;
1741 Value *MaskTrue = nullptr;
1742 if (Invert.has_value()) {
1743 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1744 Mask = Builder.CreateBitCast(
1745 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1746 VCondTy);
1747 } else {
1748 IRBuilder<> Builder(BI);
1749 MaskFalse = Builder.CreateBitCast(
1750 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1751 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1752 }
1753 auto PeekThroughBitcasts = [](Value *V) {
1754 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1755 V = BitCast->getOperand(0);
1756 return V;
1757 };
1758 for (auto *I : SpeculatedConditionalLoadsStores) {
1759 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1760 if (!Invert.has_value())
1761 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1762 // We currently assume conditional faulting load/store is supported for
1763 // scalar types only when creating new instructions. This can be easily
1764 // extended for vector types in the future.
1765 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1766 auto *Op0 = I->getOperand(0);
1767 CallInst *MaskedLoadStore = nullptr;
1768 if (auto *LI = dyn_cast<LoadInst>(I)) {
1769 // Handle Load.
1770 auto *Ty = I->getType();
1771 PHINode *PN = nullptr;
1772 Value *PassThru = nullptr;
1773 if (Invert.has_value())
1774 for (User *U : I->users()) {
1775 if ((PN = dyn_cast<PHINode>(U))) {
1776 PassThru = Builder.CreateBitCast(
1777 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1778 FixedVectorType::get(Ty, 1));
1779 } else if (auto *Ins = cast<Instruction>(U);
1780 Sel && Ins->getParent() == BB) {
1781 // This happens when store or/and a speculative instruction between
1782 // load and store were hoisted to the BB. Make sure the masked load
1783 // inserted before its use.
1784 // We assume there's one of such use.
1785 Builder.SetInsertPoint(Ins);
1786 }
1787 }
1788 MaskedLoadStore = Builder.CreateMaskedLoad(
1789 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1790 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1791 if (PN)
1792 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1793 I->replaceAllUsesWith(NewLoadStore);
1794 } else {
1795 // Handle Store.
1796 auto *StoredVal = Builder.CreateBitCast(
1797 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1798 MaskedLoadStore = Builder.CreateMaskedStore(
1799 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1800 }
1801 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1802 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1803 //
1804 // !nonnull, !align : Not support pointer type, no need to keep.
1805 // !range: Load type is changed from scalar to vector, but the metadata on
1806 // vector specifies a per-element range, so the semantics stay the
1807 // same. Keep it.
1808 // !annotation: Not impact semantics. Keep it.
1809 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1810 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1811 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1812 // FIXME: DIAssignID is not supported for masked store yet.
1813 // (Verifier::visitDIAssignIDMetadata)
1815 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1816 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1817 });
1818 MaskedLoadStore->copyMetadata(*I);
1819 I->eraseFromParent();
1820 }
1821}
1822
1824 const TargetTransformInfo &TTI) {
1825 // Not handle volatile or atomic.
1826 bool IsStore = false;
1827 if (auto *L = dyn_cast<LoadInst>(I)) {
1828 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1829 return false;
1830 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1831 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1832 return false;
1833 IsStore = true;
1834 } else
1835 return false;
1836
1837 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1838 // That's why we have the alignment limitation.
1839 // FIXME: Update the prototype of the intrinsics?
1840 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1842}
1843
1844/// Hoist any common code in the successor blocks up into the block. This
1845/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1846/// given, only perform hoisting in case all successors blocks contain matching
1847/// instructions only. In that case, all instructions can be hoisted and the
1848/// original branch will be replaced and selects for PHIs are added.
1849bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1850 bool AllInstsEqOnly) {
1851 // This does very trivial matching, with limited scanning, to find identical
1852 // instructions in the two blocks. In particular, we don't want to get into
1853 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1854 // such, we currently just scan for obviously identical instructions in an
1855 // identical order, possibly separated by the same number of non-identical
1856 // instructions.
1857 BasicBlock *BB = TI->getParent();
1858 unsigned int SuccSize = succ_size(BB);
1859 if (SuccSize < 2)
1860 return false;
1861
1862 // If either of the blocks has it's address taken, then we can't do this fold,
1863 // because the code we'd hoist would no longer run when we jump into the block
1864 // by it's address.
1865 for (auto *Succ : successors(BB))
1866 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1867 return false;
1868
1869 // The second of pair is a SkipFlags bitmask.
1870 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1871 SmallVector<SuccIterPair, 8> SuccIterPairs;
1872 for (auto *Succ : successors(BB)) {
1873 BasicBlock::iterator SuccItr = Succ->begin();
1874 if (isa<PHINode>(*SuccItr))
1875 return false;
1876 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1877 }
1878
1879 if (AllInstsEqOnly) {
1880 // Check if all instructions in the successor blocks match. This allows
1881 // hoisting all instructions and removing the blocks we are hoisting from,
1882 // so does not add any new instructions.
1884 // Check if sizes and terminators of all successors match.
1885 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1886 Instruction *Term0 = Succs[0]->getTerminator();
1887 Instruction *Term = Succ->getTerminator();
1888 return !Term->isSameOperationAs(Term0) ||
1889 !equal(Term->operands(), Term0->operands()) ||
1890 Succs[0]->size() != Succ->size();
1891 });
1892 if (!AllSame)
1893 return false;
1894 if (AllSame) {
1895 LockstepReverseIterator<true> LRI(Succs);
1896 while (LRI.isValid()) {
1897 Instruction *I0 = (*LRI)[0];
1898 if (any_of(*LRI, [I0](Instruction *I) {
1899 return !areIdenticalUpToCommutativity(I0, I);
1900 })) {
1901 return false;
1902 }
1903 --LRI;
1904 }
1905 }
1906 // Now we know that all instructions in all successors can be hoisted. Let
1907 // the loop below handle the hoisting.
1908 }
1909
1910 // Count how many instructions were not hoisted so far. There's a limit on how
1911 // many instructions we skip, serving as a compilation time control as well as
1912 // preventing excessive increase of life ranges.
1913 unsigned NumSkipped = 0;
1914 // If we find an unreachable instruction at the beginning of a basic block, we
1915 // can still hoist instructions from the rest of the basic blocks.
1916 if (SuccIterPairs.size() > 2) {
1917 erase_if(SuccIterPairs,
1918 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1919 if (SuccIterPairs.size() < 2)
1920 return false;
1921 }
1922
1923 bool Changed = false;
1924
1925 for (;;) {
1926 auto *SuccIterPairBegin = SuccIterPairs.begin();
1927 auto &BB1ItrPair = *SuccIterPairBegin++;
1928 auto OtherSuccIterPairRange =
1929 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1930 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1931
1932 Instruction *I1 = &*BB1ItrPair.first;
1933
1934 bool AllInstsAreIdentical = true;
1935 bool HasTerminator = I1->isTerminator();
1936 for (auto &SuccIter : OtherSuccIterRange) {
1937 Instruction *I2 = &*SuccIter;
1938 HasTerminator |= I2->isTerminator();
1939 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1940 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1941 AllInstsAreIdentical = false;
1942 }
1943
1944 SmallVector<Instruction *, 8> OtherInsts;
1945 for (auto &SuccIter : OtherSuccIterRange)
1946 OtherInsts.push_back(&*SuccIter);
1947
1948 // If we are hoisting the terminator instruction, don't move one (making a
1949 // broken BB), instead clone it, and remove BI.
1950 if (HasTerminator) {
1951 // Even if BB, which contains only one unreachable instruction, is ignored
1952 // at the beginning of the loop, we can hoist the terminator instruction.
1953 // If any instructions remain in the block, we cannot hoist terminators.
1954 if (NumSkipped || !AllInstsAreIdentical) {
1955 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1956 return Changed;
1957 }
1958
1959 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1960 Changed;
1961 }
1962
1963 if (AllInstsAreIdentical) {
1964 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1965 AllInstsAreIdentical =
1966 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1967 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1968 Instruction *I2 = &*Pair.first;
1969 unsigned SkipFlagsBB2 = Pair.second;
1970 // Even if the instructions are identical, it may not
1971 // be safe to hoist them if we have skipped over
1972 // instructions with side effects or their operands
1973 // weren't hoisted.
1974 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1976 });
1977 }
1978
1979 if (AllInstsAreIdentical) {
1980 BB1ItrPair.first++;
1981 // For a normal instruction, we just move one to right before the
1982 // branch, then replace all uses of the other with the first. Finally,
1983 // we remove the now redundant second instruction.
1984 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1985 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1986 // and leave any that were not hoisted behind (by calling moveBefore
1987 // rather than moveBeforePreserving).
1988 I1->moveBefore(TI->getIterator());
1989 for (auto &SuccIter : OtherSuccIterRange) {
1990 Instruction *I2 = &*SuccIter++;
1991 assert(I2 != I1);
1992 if (!I2->use_empty())
1993 I2->replaceAllUsesWith(I1);
1994 I1->andIRFlags(I2);
1995 if (auto *CB = dyn_cast<CallBase>(I1)) {
1996 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
1997 assert(Success && "We should not be trying to hoist callbases "
1998 "with non-intersectable attributes");
1999 // For NDEBUG Compile.
2000 (void)Success;
2001 }
2002
2003 combineMetadataForCSE(I1, I2, true);
2004 // I1 and I2 are being combined into a single instruction. Its debug
2005 // location is the merged locations of the original instructions.
2006 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2007 I2->eraseFromParent();
2008 }
2009 if (!Changed)
2010 NumHoistCommonCode += SuccIterPairs.size();
2011 Changed = true;
2012 NumHoistCommonInstrs += SuccIterPairs.size();
2013 } else {
2014 if (NumSkipped >= HoistCommonSkipLimit) {
2015 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2016 return Changed;
2017 }
2018 // We are about to skip over a pair of non-identical instructions. Record
2019 // if any have characteristics that would prevent reordering instructions
2020 // across them.
2021 for (auto &SuccIterPair : SuccIterPairs) {
2022 Instruction *I = &*SuccIterPair.first++;
2023 SuccIterPair.second |= skippedInstrFlags(I);
2024 }
2025 ++NumSkipped;
2026 }
2027 }
2028}
2029
2030bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2031 Instruction *TI, Instruction *I1,
2032 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2033
2034 auto *BI = dyn_cast<BranchInst>(TI);
2035
2036 bool Changed = false;
2037 BasicBlock *TIParent = TI->getParent();
2038 BasicBlock *BB1 = I1->getParent();
2039
2040 // Use only for an if statement.
2041 auto *I2 = *OtherSuccTIs.begin();
2042 auto *BB2 = I2->getParent();
2043 if (BI) {
2044 assert(OtherSuccTIs.size() == 1);
2045 assert(BI->getSuccessor(0) == I1->getParent());
2046 assert(BI->getSuccessor(1) == I2->getParent());
2047 }
2048
2049 // In the case of an if statement, we try to hoist an invoke.
2050 // FIXME: Can we define a safety predicate for CallBr?
2051 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2052 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2053 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2054 return false;
2055
2056 // TODO: callbr hoisting currently disabled pending further study.
2057 if (isa<CallBrInst>(I1))
2058 return false;
2059
2060 for (BasicBlock *Succ : successors(BB1)) {
2061 for (PHINode &PN : Succ->phis()) {
2062 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2063 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2064 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2065 if (BB1V == BB2V)
2066 continue;
2067
2068 // In the case of an if statement, check for
2069 // passingValueIsAlwaysUndefined here because we would rather eliminate
2070 // undefined control flow then converting it to a select.
2071 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2073 return false;
2074 }
2075 }
2076 }
2077
2078 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2079 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2080 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2081 // Clone the terminator and hoist it into the pred, without any debug info.
2082 Instruction *NT = I1->clone();
2083 NT->insertInto(TIParent, TI->getIterator());
2084 if (!NT->getType()->isVoidTy()) {
2085 I1->replaceAllUsesWith(NT);
2086 for (Instruction *OtherSuccTI : OtherSuccTIs)
2087 OtherSuccTI->replaceAllUsesWith(NT);
2088 NT->takeName(I1);
2089 }
2090 Changed = true;
2091 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2092
2093 // Ensure terminator gets a debug location, even an unknown one, in case
2094 // it involves inlinable calls.
2096 Locs.push_back(I1->getDebugLoc());
2097 for (auto *OtherSuccTI : OtherSuccTIs)
2098 Locs.push_back(OtherSuccTI->getDebugLoc());
2099 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2100
2101 // PHIs created below will adopt NT's merged DebugLoc.
2102 IRBuilder<NoFolder> Builder(NT);
2103
2104 // In the case of an if statement, hoisting one of the terminators from our
2105 // successor is a great thing. Unfortunately, the successors of the if/else
2106 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2107 // must agree for all PHI nodes, so we insert select instruction to compute
2108 // the final result.
2109 if (BI) {
2110 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2111 for (BasicBlock *Succ : successors(BB1)) {
2112 for (PHINode &PN : Succ->phis()) {
2113 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2114 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2115 if (BB1V == BB2V)
2116 continue;
2117
2118 // These values do not agree. Insert a select instruction before NT
2119 // that determines the right value.
2120 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2121 if (!SI) {
2122 // Propagate fast-math-flags from phi node to its replacement select.
2124 BI->getCondition(), BB1V, BB2V,
2125 isa<FPMathOperator>(PN) ? &PN : nullptr,
2126 BB1V->getName() + "." + BB2V->getName(), BI));
2127 }
2128
2129 // Make the PHI node use the select for all incoming values for BB1/BB2
2130 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2131 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2132 PN.setIncomingValue(i, SI);
2133 }
2134 }
2135 }
2136
2138
2139 // Update any PHI nodes in our new successors.
2140 for (BasicBlock *Succ : successors(BB1)) {
2141 addPredecessorToBlock(Succ, TIParent, BB1);
2142 if (DTU)
2143 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2144 }
2145
2146 if (DTU)
2147 for (BasicBlock *Succ : successors(TI))
2148 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2149
2151 if (DTU)
2152 DTU->applyUpdates(Updates);
2153 return Changed;
2154}
2155
2156// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2157// into variables.
2159 int OpIdx) {
2160 // Divide/Remainder by constant is typically much cheaper than by variable.
2161 if (I->isIntDivRem())
2162 return OpIdx != 1;
2163 return !isa<IntrinsicInst>(I);
2164}
2165
2166// All instructions in Insts belong to different blocks that all unconditionally
2167// branch to a common successor. Analyze each instruction and return true if it
2168// would be possible to sink them into their successor, creating one common
2169// instruction instead. For every value that would be required to be provided by
2170// PHI node (because an operand varies in each input block), add to PHIOperands.
2173 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2174 // Prune out obviously bad instructions to move. Each instruction must have
2175 // the same number of uses, and we check later that the uses are consistent.
2176 std::optional<unsigned> NumUses;
2177 for (auto *I : Insts) {
2178 // These instructions may change or break semantics if moved.
2179 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2180 I->getType()->isTokenTy())
2181 return false;
2182
2183 // Do not try to sink an instruction in an infinite loop - it can cause
2184 // this algorithm to infinite loop.
2185 if (I->getParent()->getSingleSuccessor() == I->getParent())
2186 return false;
2187
2188 // Conservatively return false if I is an inline-asm instruction. Sinking
2189 // and merging inline-asm instructions can potentially create arguments
2190 // that cannot satisfy the inline-asm constraints.
2191 // If the instruction has nomerge or convergent attribute, return false.
2192 if (const auto *C = dyn_cast<CallBase>(I))
2193 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2194 return false;
2195
2196 if (!NumUses)
2197 NumUses = I->getNumUses();
2198 else if (NumUses != I->getNumUses())
2199 return false;
2200 }
2201
2202 const Instruction *I0 = Insts.front();
2203 const auto I0MMRA = MMRAMetadata(*I0);
2204 for (auto *I : Insts) {
2205 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2206 return false;
2207
2208 // Treat MMRAs conservatively. This pass can be quite aggressive and
2209 // could drop a lot of MMRAs otherwise.
2210 if (MMRAMetadata(*I) != I0MMRA)
2211 return false;
2212 }
2213
2214 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2215 // then the other phi operands must match the instructions from Insts. This
2216 // also has to hold true for any phi nodes that would be created as a result
2217 // of sinking. Both of these cases are represented by PhiOperands.
2218 for (const Use &U : I0->uses()) {
2219 auto It = PHIOperands.find(&U);
2220 if (It == PHIOperands.end())
2221 // There may be uses in other blocks when sinking into a loop header.
2222 return false;
2223 if (!equal(Insts, It->second))
2224 return false;
2225 }
2226
2227 // For calls to be sinkable, they must all be indirect, or have same callee.
2228 // I.e. if we have two direct calls to different callees, we don't want to
2229 // turn that into an indirect call. Likewise, if we have an indirect call,
2230 // and a direct call, we don't actually want to have a single indirect call.
2231 if (isa<CallBase>(I0)) {
2232 auto IsIndirectCall = [](const Instruction *I) {
2233 return cast<CallBase>(I)->isIndirectCall();
2234 };
2235 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2236 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2237 if (HaveIndirectCalls) {
2238 if (!AllCallsAreIndirect)
2239 return false;
2240 } else {
2241 // All callees must be identical.
2242 Value *Callee = nullptr;
2243 for (const Instruction *I : Insts) {
2244 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2245 if (!Callee)
2246 Callee = CurrCallee;
2247 else if (Callee != CurrCallee)
2248 return false;
2249 }
2250 }
2251 }
2252
2253 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2254 Value *Op = I0->getOperand(OI);
2255 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2256 assert(I->getNumOperands() == I0->getNumOperands());
2257 return I->getOperand(OI) == I0->getOperand(OI);
2258 };
2259 if (!all_of(Insts, SameAsI0)) {
2262 // We can't create a PHI from this GEP.
2263 return false;
2264 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2265 for (auto *I : Insts)
2266 Ops.push_back(I->getOperand(OI));
2267 }
2268 }
2269 return true;
2270}
2271
2272// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2273// instruction of every block in Blocks to their common successor, commoning
2274// into one instruction.
2276 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2277
2278 // canSinkInstructions returning true guarantees that every block has at
2279 // least one non-terminator instruction.
2281 for (auto *BB : Blocks) {
2282 Instruction *I = BB->getTerminator();
2283 I = I->getPrevNode();
2284 Insts.push_back(I);
2285 }
2286
2287 // We don't need to do any more checking here; canSinkInstructions should
2288 // have done it all for us.
2289 SmallVector<Value*, 4> NewOperands;
2290 Instruction *I0 = Insts.front();
2291 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2292 // This check is different to that in canSinkInstructions. There, we
2293 // cared about the global view once simplifycfg (and instcombine) have
2294 // completed - it takes into account PHIs that become trivially
2295 // simplifiable. However here we need a more local view; if an operand
2296 // differs we create a PHI and rely on instcombine to clean up the very
2297 // small mess we may make.
2298 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2299 return I->getOperand(O) != I0->getOperand(O);
2300 });
2301 if (!NeedPHI) {
2302 NewOperands.push_back(I0->getOperand(O));
2303 continue;
2304 }
2305
2306 // Create a new PHI in the successor block and populate it.
2307 auto *Op = I0->getOperand(O);
2308 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2309 auto *PN =
2310 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2311 PN->insertBefore(BBEnd->begin());
2312 for (auto *I : Insts)
2313 PN->addIncoming(I->getOperand(O), I->getParent());
2314 NewOperands.push_back(PN);
2315 }
2316
2317 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2318 // and move it to the start of the successor block.
2319 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2320 I0->getOperandUse(O).set(NewOperands[O]);
2321
2322 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2323
2324 // Update metadata and IR flags, and merge debug locations.
2325 for (auto *I : Insts)
2326 if (I != I0) {
2327 // The debug location for the "common" instruction is the merged locations
2328 // of all the commoned instructions. We start with the original location
2329 // of the "common" instruction and iteratively merge each location in the
2330 // loop below.
2331 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2332 // However, as N-way merge for CallInst is rare, so we use simplified API
2333 // instead of using complex API for N-way merge.
2334 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2335 combineMetadataForCSE(I0, I, true);
2336 I0->andIRFlags(I);
2337 if (auto *CB = dyn_cast<CallBase>(I0)) {
2338 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2339 assert(Success && "We should not be trying to sink callbases "
2340 "with non-intersectable attributes");
2341 // For NDEBUG Compile.
2342 (void)Success;
2343 }
2344 }
2345
2346 for (User *U : make_early_inc_range(I0->users())) {
2347 // canSinkLastInstruction checked that all instructions are only used by
2348 // phi nodes in a way that allows replacing the phi node with the common
2349 // instruction.
2350 auto *PN = cast<PHINode>(U);
2351 PN->replaceAllUsesWith(I0);
2352 PN->eraseFromParent();
2353 }
2354
2355 // Finally nuke all instructions apart from the common instruction.
2356 for (auto *I : Insts) {
2357 if (I == I0)
2358 continue;
2359 // The remaining uses are debug users, replace those with the common inst.
2360 // In most (all?) cases this just introduces a use-before-def.
2361 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2362 I->replaceAllUsesWith(I0);
2363 I->eraseFromParent();
2364 }
2365}
2366
2367/// Check whether BB's predecessors end with unconditional branches. If it is
2368/// true, sink any common code from the predecessors to BB.
2370 DomTreeUpdater *DTU) {
2371 // We support two situations:
2372 // (1) all incoming arcs are unconditional
2373 // (2) there are non-unconditional incoming arcs
2374 //
2375 // (2) is very common in switch defaults and
2376 // else-if patterns;
2377 //
2378 // if (a) f(1);
2379 // else if (b) f(2);
2380 //
2381 // produces:
2382 //
2383 // [if]
2384 // / \
2385 // [f(1)] [if]
2386 // | | \
2387 // | | |
2388 // | [f(2)]|
2389 // \ | /
2390 // [ end ]
2391 //
2392 // [end] has two unconditional predecessor arcs and one conditional. The
2393 // conditional refers to the implicit empty 'else' arc. This conditional
2394 // arc can also be caused by an empty default block in a switch.
2395 //
2396 // In this case, we attempt to sink code from all *unconditional* arcs.
2397 // If we can sink instructions from these arcs (determined during the scan
2398 // phase below) we insert a common successor for all unconditional arcs and
2399 // connect that to [end], to enable sinking:
2400 //
2401 // [if]
2402 // / \
2403 // [x(1)] [if]
2404 // | | \
2405 // | | \
2406 // | [x(2)] |
2407 // \ / |
2408 // [sink.split] |
2409 // \ /
2410 // [ end ]
2411 //
2412 SmallVector<BasicBlock*,4> UnconditionalPreds;
2413 bool HaveNonUnconditionalPredecessors = false;
2414 for (auto *PredBB : predecessors(BB)) {
2415 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2416 if (PredBr && PredBr->isUnconditional())
2417 UnconditionalPreds.push_back(PredBB);
2418 else
2419 HaveNonUnconditionalPredecessors = true;
2420 }
2421 if (UnconditionalPreds.size() < 2)
2422 return false;
2423
2424 // We take a two-step approach to tail sinking. First we scan from the end of
2425 // each block upwards in lockstep. If the n'th instruction from the end of each
2426 // block can be sunk, those instructions are added to ValuesToSink and we
2427 // carry on. If we can sink an instruction but need to PHI-merge some operands
2428 // (because they're not identical in each instruction) we add these to
2429 // PHIOperands.
2430 // We prepopulate PHIOperands with the phis that already exist in BB.
2432 for (PHINode &PN : BB->phis()) {
2434 for (const Use &U : PN.incoming_values())
2435 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2436 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2437 for (BasicBlock *Pred : UnconditionalPreds)
2438 Ops.push_back(*IncomingVals[Pred]);
2439 }
2440
2441 int ScanIdx = 0;
2442 SmallPtrSet<Value*,4> InstructionsToSink;
2443 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2444 while (LRI.isValid() &&
2445 canSinkInstructions(*LRI, PHIOperands)) {
2446 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2447 << "\n");
2448 InstructionsToSink.insert_range(*LRI);
2449 ++ScanIdx;
2450 --LRI;
2451 }
2452
2453 // If no instructions can be sunk, early-return.
2454 if (ScanIdx == 0)
2455 return false;
2456
2457 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2458
2459 if (!followedByDeoptOrUnreachable) {
2460 // Check whether this is the pointer operand of a load/store.
2461 auto IsMemOperand = [](Use &U) {
2462 auto *I = cast<Instruction>(U.getUser());
2463 if (isa<LoadInst>(I))
2464 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2465 if (isa<StoreInst>(I))
2466 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2467 return false;
2468 };
2469
2470 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2471 // actually sink before encountering instruction that is unprofitable to
2472 // sink?
2473 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2474 unsigned NumPHIInsts = 0;
2475 for (Use &U : (*LRI)[0]->operands()) {
2476 auto It = PHIOperands.find(&U);
2477 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2478 return InstructionsToSink.contains(V);
2479 })) {
2480 ++NumPHIInsts;
2481 // Do not separate a load/store from the gep producing the address.
2482 // The gep can likely be folded into the load/store as an addressing
2483 // mode. Additionally, a load of a gep is easier to analyze than a
2484 // load of a phi.
2485 if (IsMemOperand(U) &&
2486 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2487 return false;
2488 // FIXME: this check is overly optimistic. We may end up not sinking
2489 // said instruction, due to the very same profitability check.
2490 // See @creating_too_many_phis in sink-common-code.ll.
2491 }
2492 }
2493 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2494 return NumPHIInsts <= 1;
2495 };
2496
2497 // We've determined that we are going to sink last ScanIdx instructions,
2498 // and recorded them in InstructionsToSink. Now, some instructions may be
2499 // unprofitable to sink. But that determination depends on the instructions
2500 // that we are going to sink.
2501
2502 // First, forward scan: find the first instruction unprofitable to sink,
2503 // recording all the ones that are profitable to sink.
2504 // FIXME: would it be better, after we detect that not all are profitable.
2505 // to either record the profitable ones, or erase the unprofitable ones?
2506 // Maybe we need to choose (at runtime) the one that will touch least
2507 // instrs?
2508 LRI.reset();
2509 int Idx = 0;
2510 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2511 while (Idx < ScanIdx) {
2512 if (!ProfitableToSinkInstruction(LRI)) {
2513 // Too many PHIs would be created.
2514 LLVM_DEBUG(
2515 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2516 break;
2517 }
2518 InstructionsProfitableToSink.insert_range(*LRI);
2519 --LRI;
2520 ++Idx;
2521 }
2522
2523 // If no instructions can be sunk, early-return.
2524 if (Idx == 0)
2525 return false;
2526
2527 // Did we determine that (only) some instructions are unprofitable to sink?
2528 if (Idx < ScanIdx) {
2529 // Okay, some instructions are unprofitable.
2530 ScanIdx = Idx;
2531 InstructionsToSink = InstructionsProfitableToSink;
2532
2533 // But, that may make other instructions unprofitable, too.
2534 // So, do a backward scan, do any earlier instructions become
2535 // unprofitable?
2536 assert(
2537 !ProfitableToSinkInstruction(LRI) &&
2538 "We already know that the last instruction is unprofitable to sink");
2539 ++LRI;
2540 --Idx;
2541 while (Idx >= 0) {
2542 // If we detect that an instruction becomes unprofitable to sink,
2543 // all earlier instructions won't be sunk either,
2544 // so preemptively keep InstructionsProfitableToSink in sync.
2545 // FIXME: is this the most performant approach?
2546 for (auto *I : *LRI)
2547 InstructionsProfitableToSink.erase(I);
2548 if (!ProfitableToSinkInstruction(LRI)) {
2549 // Everything starting with this instruction won't be sunk.
2550 ScanIdx = Idx;
2551 InstructionsToSink = InstructionsProfitableToSink;
2552 }
2553 ++LRI;
2554 --Idx;
2555 }
2556 }
2557
2558 // If no instructions can be sunk, early-return.
2559 if (ScanIdx == 0)
2560 return false;
2561 }
2562
2563 bool Changed = false;
2564
2565 if (HaveNonUnconditionalPredecessors) {
2566 if (!followedByDeoptOrUnreachable) {
2567 // It is always legal to sink common instructions from unconditional
2568 // predecessors. However, if not all predecessors are unconditional,
2569 // this transformation might be pessimizing. So as a rule of thumb,
2570 // don't do it unless we'd sink at least one non-speculatable instruction.
2571 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2572 LRI.reset();
2573 int Idx = 0;
2574 bool Profitable = false;
2575 while (Idx < ScanIdx) {
2576 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2577 Profitable = true;
2578 break;
2579 }
2580 --LRI;
2581 ++Idx;
2582 }
2583 if (!Profitable)
2584 return false;
2585 }
2586
2587 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2588 // We have a conditional edge and we're going to sink some instructions.
2589 // Insert a new block postdominating all blocks we're going to sink from.
2590 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2591 // Edges couldn't be split.
2592 return false;
2593 Changed = true;
2594 }
2595
2596 // Now that we've analyzed all potential sinking candidates, perform the
2597 // actual sink. We iteratively sink the last non-terminator of the source
2598 // blocks into their common successor unless doing so would require too
2599 // many PHI instructions to be generated (currently only one PHI is allowed
2600 // per sunk instruction).
2601 //
2602 // We can use InstructionsToSink to discount values needing PHI-merging that will
2603 // actually be sunk in a later iteration. This allows us to be more
2604 // aggressive in what we sink. This does allow a false positive where we
2605 // sink presuming a later value will also be sunk, but stop half way through
2606 // and never actually sink it which means we produce more PHIs than intended.
2607 // This is unlikely in practice though.
2608 int SinkIdx = 0;
2609 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2610 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2611 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2612 << "\n");
2613
2614 // Because we've sunk every instruction in turn, the current instruction to
2615 // sink is always at index 0.
2616 LRI.reset();
2617
2618 sinkLastInstruction(UnconditionalPreds);
2619 NumSinkCommonInstrs++;
2620 Changed = true;
2621 }
2622 if (SinkIdx != 0)
2623 ++NumSinkCommonCode;
2624 return Changed;
2625}
2626
2627namespace {
2628
2629struct CompatibleSets {
2630 using SetTy = SmallVector<InvokeInst *, 2>;
2631
2633
2634 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2635
2636 SetTy &getCompatibleSet(InvokeInst *II);
2637
2638 void insert(InvokeInst *II);
2639};
2640
2641CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2642 // Perform a linear scan over all the existing sets, see if the new `invoke`
2643 // is compatible with any particular set. Since we know that all the `invokes`
2644 // within a set are compatible, only check the first `invoke` in each set.
2645 // WARNING: at worst, this has quadratic complexity.
2646 for (CompatibleSets::SetTy &Set : Sets) {
2647 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2648 return Set;
2649 }
2650
2651 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2652 return Sets.emplace_back();
2653}
2654
2655void CompatibleSets::insert(InvokeInst *II) {
2656 getCompatibleSet(II).emplace_back(II);
2657}
2658
2659bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2660 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2661
2662 // Can we theoretically merge these `invoke`s?
2663 auto IsIllegalToMerge = [](InvokeInst *II) {
2664 return II->cannotMerge() || II->isInlineAsm();
2665 };
2666 if (any_of(Invokes, IsIllegalToMerge))
2667 return false;
2668
2669 // Either both `invoke`s must be direct,
2670 // or both `invoke`s must be indirect.
2671 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2672 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2673 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2674 if (HaveIndirectCalls) {
2675 if (!AllCallsAreIndirect)
2676 return false;
2677 } else {
2678 // All callees must be identical.
2679 Value *Callee = nullptr;
2680 for (InvokeInst *II : Invokes) {
2681 Value *CurrCallee = II->getCalledOperand();
2682 assert(CurrCallee && "There is always a called operand.");
2683 if (!Callee)
2684 Callee = CurrCallee;
2685 else if (Callee != CurrCallee)
2686 return false;
2687 }
2688 }
2689
2690 // Either both `invoke`s must not have a normal destination,
2691 // or both `invoke`s must have a normal destination,
2692 auto HasNormalDest = [](InvokeInst *II) {
2693 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2694 };
2695 if (any_of(Invokes, HasNormalDest)) {
2696 // Do not merge `invoke` that does not have a normal destination with one
2697 // that does have a normal destination, even though doing so would be legal.
2698 if (!all_of(Invokes, HasNormalDest))
2699 return false;
2700
2701 // All normal destinations must be identical.
2702 BasicBlock *NormalBB = nullptr;
2703 for (InvokeInst *II : Invokes) {
2704 BasicBlock *CurrNormalBB = II->getNormalDest();
2705 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2706 if (!NormalBB)
2707 NormalBB = CurrNormalBB;
2708 else if (NormalBB != CurrNormalBB)
2709 return false;
2710 }
2711
2712 // In the normal destination, the incoming values for these two `invoke`s
2713 // must be compatible.
2714 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2716 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2717 &EquivalenceSet))
2718 return false;
2719 }
2720
2721#ifndef NDEBUG
2722 // All unwind destinations must be identical.
2723 // We know that because we have started from said unwind destination.
2724 BasicBlock *UnwindBB = nullptr;
2725 for (InvokeInst *II : Invokes) {
2726 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2727 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2728 if (!UnwindBB)
2729 UnwindBB = CurrUnwindBB;
2730 else
2731 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2732 }
2733#endif
2734
2735 // In the unwind destination, the incoming values for these two `invoke`s
2736 // must be compatible.
2738 Invokes.front()->getUnwindDest(),
2739 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2740 return false;
2741
2742 // Ignoring arguments, these `invoke`s must be identical,
2743 // including operand bundles.
2744 const InvokeInst *II0 = Invokes.front();
2745 for (auto *II : Invokes.drop_front())
2746 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2747 return false;
2748
2749 // Can we theoretically form the data operands for the merged `invoke`?
2750 auto IsIllegalToMergeArguments = [](auto Ops) {
2751 Use &U0 = std::get<0>(Ops);
2752 Use &U1 = std::get<1>(Ops);
2753 if (U0 == U1)
2754 return false;
2756 U0.getOperandNo());
2757 };
2758 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2759 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2760 IsIllegalToMergeArguments))
2761 return false;
2762
2763 return true;
2764}
2765
2766} // namespace
2767
2768// Merge all invokes in the provided set, all of which are compatible
2769// as per the `CompatibleSets::shouldBelongToSameSet()`.
2771 DomTreeUpdater *DTU) {
2772 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2773
2775 if (DTU)
2776 Updates.reserve(2 + 3 * Invokes.size());
2777
2778 bool HasNormalDest =
2779 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2780
2781 // Clone one of the invokes into a new basic block.
2782 // Since they are all compatible, it doesn't matter which invoke is cloned.
2783 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2784 InvokeInst *II0 = Invokes.front();
2785 BasicBlock *II0BB = II0->getParent();
2786 BasicBlock *InsertBeforeBlock =
2787 II0->getParent()->getIterator()->getNextNode();
2788 Function *Func = II0BB->getParent();
2789 LLVMContext &Ctx = II0->getContext();
2790
2791 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2792 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2793
2794 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2795 // NOTE: all invokes have the same attributes, so no handling needed.
2796 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2797
2798 if (!HasNormalDest) {
2799 // This set does not have a normal destination,
2800 // so just form a new block with unreachable terminator.
2801 BasicBlock *MergedNormalDest = BasicBlock::Create(
2802 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2803 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2804 UI->setDebugLoc(DebugLoc::getTemporary());
2805 MergedInvoke->setNormalDest(MergedNormalDest);
2806 }
2807
2808 // The unwind destination, however, remainds identical for all invokes here.
2809
2810 return MergedInvoke;
2811 }();
2812
2813 if (DTU) {
2814 // Predecessor blocks that contained these invokes will now branch to
2815 // the new block that contains the merged invoke, ...
2816 for (InvokeInst *II : Invokes)
2817 Updates.push_back(
2818 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2819
2820 // ... which has the new `unreachable` block as normal destination,
2821 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2822 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2823 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2824 SuccBBOfMergedInvoke});
2825
2826 // Since predecessor blocks now unconditionally branch to a new block,
2827 // they no longer branch to their original successors.
2828 for (InvokeInst *II : Invokes)
2829 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2830 Updates.push_back(
2831 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2832 }
2833
2834 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2835
2836 // Form the merged operands for the merged invoke.
2837 for (Use &U : MergedInvoke->operands()) {
2838 // Only PHI together the indirect callees and data operands.
2839 if (MergedInvoke->isCallee(&U)) {
2840 if (!IsIndirectCall)
2841 continue;
2842 } else if (!MergedInvoke->isDataOperand(&U))
2843 continue;
2844
2845 // Don't create trivial PHI's with all-identical incoming values.
2846 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2847 return II->getOperand(U.getOperandNo()) != U.get();
2848 });
2849 if (!NeedPHI)
2850 continue;
2851
2852 // Form a PHI out of all the data ops under this index.
2854 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2855 for (InvokeInst *II : Invokes)
2856 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2857
2858 U.set(PN);
2859 }
2860
2861 // We've ensured that each PHI node has compatible (identical) incoming values
2862 // when coming from each of the `invoke`s in the current merge set,
2863 // so update the PHI nodes accordingly.
2864 for (BasicBlock *Succ : successors(MergedInvoke))
2865 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2866 /*ExistPred=*/Invokes.front()->getParent());
2867
2868 // And finally, replace the original `invoke`s with an unconditional branch
2869 // to the block with the merged `invoke`. Also, give that merged `invoke`
2870 // the merged debugloc of all the original `invoke`s.
2871 DILocation *MergedDebugLoc = nullptr;
2872 for (InvokeInst *II : Invokes) {
2873 // Compute the debug location common to all the original `invoke`s.
2874 if (!MergedDebugLoc)
2875 MergedDebugLoc = II->getDebugLoc();
2876 else
2877 MergedDebugLoc =
2878 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2879
2880 // And replace the old `invoke` with an unconditionally branch
2881 // to the block with the merged `invoke`.
2882 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2883 OrigSuccBB->removePredecessor(II->getParent());
2884 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2885 // The unconditional branch is part of the replacement for the original
2886 // invoke, so should use its DebugLoc.
2887 BI->setDebugLoc(II->getDebugLoc());
2888 bool Success = MergedInvoke->tryIntersectAttributes(II);
2889 assert(Success && "Merged invokes with incompatible attributes");
2890 // For NDEBUG Compile
2891 (void)Success;
2892 II->replaceAllUsesWith(MergedInvoke);
2893 II->eraseFromParent();
2894 ++NumInvokesMerged;
2895 }
2896 MergedInvoke->setDebugLoc(MergedDebugLoc);
2897 ++NumInvokeSetsFormed;
2898
2899 if (DTU)
2900 DTU->applyUpdates(Updates);
2901}
2902
2903/// If this block is a `landingpad` exception handling block, categorize all
2904/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2905/// being "mergeable" together, and then merge invokes in each set together.
2906///
2907/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2908/// [...] [...]
2909/// | |
2910/// [invoke0] [invoke1]
2911/// / \ / \
2912/// [cont0] [landingpad] [cont1]
2913/// to:
2914/// [...] [...]
2915/// \ /
2916/// [invoke]
2917/// / \
2918/// [cont] [landingpad]
2919///
2920/// But of course we can only do that if the invokes share the `landingpad`,
2921/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2922/// and the invoked functions are "compatible".
2925 return false;
2926
2927 bool Changed = false;
2928
2929 // FIXME: generalize to all exception handling blocks?
2930 if (!BB->isLandingPad())
2931 return Changed;
2932
2933 CompatibleSets Grouper;
2934
2935 // Record all the predecessors of this `landingpad`. As per verifier,
2936 // the only allowed predecessor is the unwind edge of an `invoke`.
2937 // We want to group "compatible" `invokes` into the same set to be merged.
2938 for (BasicBlock *PredBB : predecessors(BB))
2939 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2940
2941 // And now, merge `invoke`s that were grouped togeter.
2942 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2943 if (Invokes.size() < 2)
2944 continue;
2945 Changed = true;
2946 mergeCompatibleInvokesImpl(Invokes, DTU);
2947 }
2948
2949 return Changed;
2950}
2951
2952namespace {
2953/// Track ephemeral values, which should be ignored for cost-modelling
2954/// purposes. Requires walking instructions in reverse order.
2955class EphemeralValueTracker {
2956 SmallPtrSet<const Instruction *, 32> EphValues;
2957
2958 bool isEphemeral(const Instruction *I) {
2959 if (isa<AssumeInst>(I))
2960 return true;
2961 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2962 all_of(I->users(), [&](const User *U) {
2963 return EphValues.count(cast<Instruction>(U));
2964 });
2965 }
2966
2967public:
2968 bool track(const Instruction *I) {
2969 if (isEphemeral(I)) {
2970 EphValues.insert(I);
2971 return true;
2972 }
2973 return false;
2974 }
2975
2976 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2977};
2978} // namespace
2979
2980/// Determine if we can hoist sink a sole store instruction out of a
2981/// conditional block.
2982///
2983/// We are looking for code like the following:
2984/// BrBB:
2985/// store i32 %add, i32* %arrayidx2
2986/// ... // No other stores or function calls (we could be calling a memory
2987/// ... // function).
2988/// %cmp = icmp ult %x, %y
2989/// br i1 %cmp, label %EndBB, label %ThenBB
2990/// ThenBB:
2991/// store i32 %add5, i32* %arrayidx2
2992/// br label EndBB
2993/// EndBB:
2994/// ...
2995/// We are going to transform this into:
2996/// BrBB:
2997/// store i32 %add, i32* %arrayidx2
2998/// ... //
2999/// %cmp = icmp ult %x, %y
3000/// %add.add5 = select i1 %cmp, i32 %add, %add5
3001/// store i32 %add.add5, i32* %arrayidx2
3002/// ...
3003///
3004/// \return The pointer to the value of the previous store if the store can be
3005/// hoisted into the predecessor block. 0 otherwise.
3007 BasicBlock *StoreBB, BasicBlock *EndBB) {
3008 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3009 if (!StoreToHoist)
3010 return nullptr;
3011
3012 // Volatile or atomic.
3013 if (!StoreToHoist->isSimple())
3014 return nullptr;
3015
3016 Value *StorePtr = StoreToHoist->getPointerOperand();
3017 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3018
3019 // Look for a store to the same pointer in BrBB.
3020 unsigned MaxNumInstToLookAt = 9;
3021 // Skip pseudo probe intrinsic calls which are not really killing any memory
3022 // accesses.
3023 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3024 if (!MaxNumInstToLookAt)
3025 break;
3026 --MaxNumInstToLookAt;
3027
3028 // Could be calling an instruction that affects memory like free().
3029 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3030 return nullptr;
3031
3032 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3033 // Found the previous store to same location and type. Make sure it is
3034 // simple, to avoid introducing a spurious non-atomic write after an
3035 // atomic write.
3036 if (SI->getPointerOperand() == StorePtr &&
3037 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3038 SI->getAlign() >= StoreToHoist->getAlign())
3039 // Found the previous store, return its value operand.
3040 return SI->getValueOperand();
3041 return nullptr; // Unknown store.
3042 }
3043
3044 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3045 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3046 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3047 Value *Obj = getUnderlyingObject(StorePtr);
3048 bool ExplicitlyDereferenceableOnly;
3049 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3051 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3053 (!ExplicitlyDereferenceableOnly ||
3054 isDereferenceablePointer(StorePtr, StoreTy,
3055 LI->getDataLayout()))) {
3056 // Found a previous load, return it.
3057 return LI;
3058 }
3059 }
3060 // The load didn't work out, but we may still find a store.
3061 }
3062 }
3063
3064 return nullptr;
3065}
3066
3067/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3068/// converted to selects.
3070 BasicBlock *EndBB,
3071 unsigned &SpeculatedInstructions,
3072 InstructionCost &Cost,
3073 const TargetTransformInfo &TTI) {
3075 BB->getParent()->hasMinSize()
3078
3079 bool HaveRewritablePHIs = false;
3080 for (PHINode &PN : EndBB->phis()) {
3081 Value *OrigV = PN.getIncomingValueForBlock(BB);
3082 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3083
3084 // FIXME: Try to remove some of the duplication with
3085 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3086 if (ThenV == OrigV)
3087 continue;
3088
3089 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3090 CmpInst::makeCmpResultType(PN.getType()),
3092
3093 // Don't convert to selects if we could remove undefined behavior instead.
3094 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3096 return false;
3097
3098 HaveRewritablePHIs = true;
3099 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3100 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3101 if (!OrigCE && !ThenCE)
3102 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3103
3104 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3105 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3106 InstructionCost MaxCost =
3108 if (OrigCost + ThenCost > MaxCost)
3109 return false;
3110
3111 // Account for the cost of an unfolded ConstantExpr which could end up
3112 // getting expanded into Instructions.
3113 // FIXME: This doesn't account for how many operations are combined in the
3114 // constant expression.
3115 ++SpeculatedInstructions;
3116 if (SpeculatedInstructions > 1)
3117 return false;
3118 }
3119
3120 return HaveRewritablePHIs;
3121}
3122
3124 std::optional<bool> Invert,
3125 const TargetTransformInfo &TTI) {
3126 // If the branch is non-unpredictable, and is predicted to *not* branch to
3127 // the `then` block, then avoid speculating it.
3128 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3129 return true;
3130
3131 uint64_t TWeight, FWeight;
3132 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3133 return true;
3134
3135 if (!Invert.has_value())
3136 return false;
3137
3138 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3139 BranchProbability BIEndProb =
3140 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3141 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3142 return BIEndProb < Likely;
3143}
3144
3145/// Speculate a conditional basic block flattening the CFG.
3146///
3147/// Note that this is a very risky transform currently. Speculating
3148/// instructions like this is most often not desirable. Instead, there is an MI
3149/// pass which can do it with full awareness of the resource constraints.
3150/// However, some cases are "obvious" and we should do directly. An example of
3151/// this is speculating a single, reasonably cheap instruction.
3152///
3153/// There is only one distinct advantage to flattening the CFG at the IR level:
3154/// it makes very common but simplistic optimizations such as are common in
3155/// instcombine and the DAG combiner more powerful by removing CFG edges and
3156/// modeling their effects with easier to reason about SSA value graphs.
3157///
3158///
3159/// An illustration of this transform is turning this IR:
3160/// \code
3161/// BB:
3162/// %cmp = icmp ult %x, %y
3163/// br i1 %cmp, label %EndBB, label %ThenBB
3164/// ThenBB:
3165/// %sub = sub %x, %y
3166/// br label BB2
3167/// EndBB:
3168/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3169/// ...
3170/// \endcode
3171///
3172/// Into this IR:
3173/// \code
3174/// BB:
3175/// %cmp = icmp ult %x, %y
3176/// %sub = sub %x, %y
3177/// %cond = select i1 %cmp, 0, %sub
3178/// ...
3179/// \endcode
3180///
3181/// \returns true if the conditional block is removed.
3182bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3183 BasicBlock *ThenBB) {
3184 if (!Options.SpeculateBlocks)
3185 return false;
3186
3187 // Be conservative for now. FP select instruction can often be expensive.
3188 Value *BrCond = BI->getCondition();
3189 if (isa<FCmpInst>(BrCond))
3190 return false;
3191
3192 BasicBlock *BB = BI->getParent();
3193 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3194 InstructionCost Budget =
3196
3197 // If ThenBB is actually on the false edge of the conditional branch, remember
3198 // to swap the select operands later.
3199 bool Invert = false;
3200 if (ThenBB != BI->getSuccessor(0)) {
3201 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3202 Invert = true;
3203 }
3204 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3205
3206 if (!isProfitableToSpeculate(BI, Invert, TTI))
3207 return false;
3208
3209 // Keep a count of how many times instructions are used within ThenBB when
3210 // they are candidates for sinking into ThenBB. Specifically:
3211 // - They are defined in BB, and
3212 // - They have no side effects, and
3213 // - All of their uses are in ThenBB.
3214 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3215
3216 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3217
3218 unsigned SpeculatedInstructions = 0;
3219 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3220 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3221 Value *SpeculatedStoreValue = nullptr;
3222 StoreInst *SpeculatedStore = nullptr;
3223 EphemeralValueTracker EphTracker;
3224 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3225 // Skip pseudo probes. The consequence is we lose track of the branch
3226 // probability for ThenBB, which is fine since the optimization here takes
3227 // place regardless of the branch probability.
3228 if (isa<PseudoProbeInst>(I)) {
3229 // The probe should be deleted so that it will not be over-counted when
3230 // the samples collected on the non-conditional path are counted towards
3231 // the conditional path. We leave it for the counts inference algorithm to
3232 // figure out a proper count for an unknown probe.
3233 SpeculatedPseudoProbes.push_back(&I);
3234 continue;
3235 }
3236
3237 // Ignore ephemeral values, they will be dropped by the transform.
3238 if (EphTracker.track(&I))
3239 continue;
3240
3241 // Only speculatively execute a single instruction (not counting the
3242 // terminator) for now.
3243 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3245 SpeculatedConditionalLoadsStores.size() <
3247 // Not count load/store into cost if target supports conditional faulting
3248 // b/c it's cheap to speculate it.
3249 if (IsSafeCheapLoadStore)
3250 SpeculatedConditionalLoadsStores.push_back(&I);
3251 else
3252 ++SpeculatedInstructions;
3253
3254 if (SpeculatedInstructions > 1)
3255 return false;
3256
3257 // Don't hoist the instruction if it's unsafe or expensive.
3258 if (!IsSafeCheapLoadStore &&
3260 !(HoistCondStores && !SpeculatedStoreValue &&
3261 (SpeculatedStoreValue =
3262 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3263 return false;
3264 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3267 return false;
3268
3269 // Store the store speculation candidate.
3270 if (!SpeculatedStore && SpeculatedStoreValue)
3271 SpeculatedStore = cast<StoreInst>(&I);
3272
3273 // Do not hoist the instruction if any of its operands are defined but not
3274 // used in BB. The transformation will prevent the operand from
3275 // being sunk into the use block.
3276 for (Use &Op : I.operands()) {
3278 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3279 continue; // Not a candidate for sinking.
3280
3281 ++SinkCandidateUseCounts[OpI];
3282 }
3283 }
3284
3285 // Consider any sink candidates which are only used in ThenBB as costs for
3286 // speculation. Note, while we iterate over a DenseMap here, we are summing
3287 // and so iteration order isn't significant.
3288 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3289 if (Inst->hasNUses(Count)) {
3290 ++SpeculatedInstructions;
3291 if (SpeculatedInstructions > 1)
3292 return false;
3293 }
3294
3295 // Check that we can insert the selects and that it's not too expensive to do
3296 // so.
3297 bool Convert =
3298 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3300 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3301 SpeculatedInstructions, Cost, TTI);
3302 if (!Convert || Cost > Budget)
3303 return false;
3304
3305 // If we get here, we can hoist the instruction and if-convert.
3306 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3307
3308 Instruction *Sel = nullptr;
3309 // Insert a select of the value of the speculated store.
3310 if (SpeculatedStoreValue) {
3311 IRBuilder<NoFolder> Builder(BI);
3312 Value *OrigV = SpeculatedStore->getValueOperand();
3313 Value *TrueV = SpeculatedStore->getValueOperand();
3314 Value *FalseV = SpeculatedStoreValue;
3315 if (Invert)
3316 std::swap(TrueV, FalseV);
3317 Value *S = Builder.CreateSelect(
3318 BrCond, TrueV, FalseV, "spec.store.select", BI);
3319 Sel = cast<Instruction>(S);
3320 SpeculatedStore->setOperand(0, S);
3321 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3322 SpeculatedStore->getDebugLoc());
3323 // The value stored is still conditional, but the store itself is now
3324 // unconditonally executed, so we must be sure that any linked dbg.assign
3325 // intrinsics are tracking the new stored value (the result of the
3326 // select). If we don't, and the store were to be removed by another pass
3327 // (e.g. DSE), then we'd eventually end up emitting a location describing
3328 // the conditional value, unconditionally.
3329 //
3330 // === Before this transformation ===
3331 // pred:
3332 // store %one, %x.dest, !DIAssignID !1
3333 // dbg.assign %one, "x", ..., !1, ...
3334 // br %cond if.then
3335 //
3336 // if.then:
3337 // store %two, %x.dest, !DIAssignID !2
3338 // dbg.assign %two, "x", ..., !2, ...
3339 //
3340 // === After this transformation ===
3341 // pred:
3342 // store %one, %x.dest, !DIAssignID !1
3343 // dbg.assign %one, "x", ..., !1
3344 /// ...
3345 // %merge = select %cond, %two, %one
3346 // store %merge, %x.dest, !DIAssignID !2
3347 // dbg.assign %merge, "x", ..., !2
3348 for (DbgVariableRecord *DbgAssign :
3349 at::getDVRAssignmentMarkers(SpeculatedStore))
3350 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3351 DbgAssign->replaceVariableLocationOp(OrigV, S);
3352 }
3353
3354 // Metadata can be dependent on the condition we are hoisting above.
3355 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3356 // to avoid making it appear as if the condition is a constant, which would
3357 // be misleading while debugging.
3358 // Similarly strip attributes that maybe dependent on condition we are
3359 // hoisting above.
3360 for (auto &I : make_early_inc_range(*ThenBB)) {
3361 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3362 I.dropLocation();
3363 }
3364 I.dropUBImplyingAttrsAndMetadata();
3365
3366 // Drop ephemeral values.
3367 if (EphTracker.contains(&I)) {
3368 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3369 I.eraseFromParent();
3370 }
3371 }
3372
3373 // Hoist the instructions.
3374 // Drop DbgVariableRecords attached to these instructions.
3375 for (auto &It : *ThenBB)
3376 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3377 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3378 // equivalent).
3379 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3380 !DVR || !DVR->isDbgAssign())
3381 It.dropOneDbgRecord(&DR);
3382 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3383 std::prev(ThenBB->end()));
3384
3385 if (!SpeculatedConditionalLoadsStores.empty())
3386 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3387 Sel);
3388
3389 // Insert selects and rewrite the PHI operands.
3390 IRBuilder<NoFolder> Builder(BI);
3391 for (PHINode &PN : EndBB->phis()) {
3392 unsigned OrigI = PN.getBasicBlockIndex(BB);
3393 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3394 Value *OrigV = PN.getIncomingValue(OrigI);
3395 Value *ThenV = PN.getIncomingValue(ThenI);
3396
3397 // Skip PHIs which are trivial.
3398 if (OrigV == ThenV)
3399 continue;
3400
3401 // Create a select whose true value is the speculatively executed value and
3402 // false value is the pre-existing value. Swap them if the branch
3403 // destinations were inverted.
3404 Value *TrueV = ThenV, *FalseV = OrigV;
3405 if (Invert)
3406 std::swap(TrueV, FalseV);
3407 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3408 PN.setIncomingValue(OrigI, V);
3409 PN.setIncomingValue(ThenI, V);
3410 }
3411
3412 // Remove speculated pseudo probes.
3413 for (Instruction *I : SpeculatedPseudoProbes)
3414 I->eraseFromParent();
3415
3416 ++NumSpeculations;
3417 return true;
3418}
3419
3421
3422// Return false if number of blocks searched is too much.
3423static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3424 BlocksSet &ReachesNonLocalUses) {
3425 if (BB == DefBB)
3426 return true;
3427 if (!ReachesNonLocalUses.insert(BB).second)
3428 return true;
3429
3430 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3431 return false;
3432 for (BasicBlock *Pred : predecessors(BB))
3433 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3434 return false;
3435 return true;
3436}
3437
3438/// Return true if we can thread a branch across this block.
3440 BlocksSet &NonLocalUseBlocks) {
3441 int Size = 0;
3442 EphemeralValueTracker EphTracker;
3443
3444 // Walk the loop in reverse so that we can identify ephemeral values properly
3445 // (values only feeding assumes).
3446 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3447 // Can't fold blocks that contain noduplicate or convergent calls.
3448 if (CallInst *CI = dyn_cast<CallInst>(&I))
3449 if (CI->cannotDuplicate() || CI->isConvergent())
3450 return false;
3451
3452 // Ignore ephemeral values which are deleted during codegen.
3453 // We will delete Phis while threading, so Phis should not be accounted in
3454 // block's size.
3455 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3456 if (Size++ > MaxSmallBlockSize)
3457 return false; // Don't clone large BB's.
3458 }
3459
3460 // Record blocks with non-local uses of values defined in the current basic
3461 // block.
3462 for (User *U : I.users()) {
3464 BasicBlock *UsedInBB = UI->getParent();
3465 if (UsedInBB == BB) {
3466 if (isa<PHINode>(UI))
3467 return false;
3468 } else
3469 NonLocalUseBlocks.insert(UsedInBB);
3470 }
3471
3472 // Looks ok, continue checking.
3473 }
3474
3475 return true;
3476}
3477
3479 BasicBlock *To) {
3480 // Don't look past the block defining the value, we might get the value from
3481 // a previous loop iteration.
3482 auto *I = dyn_cast<Instruction>(V);
3483 if (I && I->getParent() == To)
3484 return nullptr;
3485
3486 // We know the value if the From block branches on it.
3487 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3488 if (BI && BI->isConditional() && BI->getCondition() == V &&
3489 BI->getSuccessor(0) != BI->getSuccessor(1))
3490 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3492
3493 return nullptr;
3494}
3495
3496/// If we have a conditional branch on something for which we know the constant
3497/// value in predecessors (e.g. a phi node in the current block), thread edges
3498/// from the predecessor to their ultimate destination.
3499static std::optional<bool>
3501 const DataLayout &DL,
3502 AssumptionCache *AC) {
3504 BasicBlock *BB = BI->getParent();
3505 Value *Cond = BI->getCondition();
3507 if (PN && PN->getParent() == BB) {
3508 // Degenerate case of a single entry PHI.
3509 if (PN->getNumIncomingValues() == 1) {
3511 return true;
3512 }
3513
3514 for (Use &U : PN->incoming_values())
3515 if (auto *CB = dyn_cast<ConstantInt>(U))
3516 KnownValues[CB].insert(PN->getIncomingBlock(U));
3517 } else {
3518 for (BasicBlock *Pred : predecessors(BB)) {
3519 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3520 KnownValues[CB].insert(Pred);
3521 }
3522 }
3523
3524 if (KnownValues.empty())
3525 return false;
3526
3527 // Now we know that this block has multiple preds and two succs.
3528 // Check that the block is small enough and record which non-local blocks use
3529 // values defined in the block.
3530
3531 BlocksSet NonLocalUseBlocks;
3532 BlocksSet ReachesNonLocalUseBlocks;
3533 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3534 return false;
3535
3536 // Jump-threading can only be done to destinations where no values defined
3537 // in BB are live.
3538
3539 // Quickly check if both destinations have uses. If so, jump-threading cannot
3540 // be done.
3541 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3542 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3543 return false;
3544
3545 // Search backward from NonLocalUseBlocks to find which blocks
3546 // reach non-local uses.
3547 for (BasicBlock *UseBB : NonLocalUseBlocks)
3548 // Give up if too many blocks are searched.
3549 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3550 return false;
3551
3552 for (const auto &Pair : KnownValues) {
3553 ConstantInt *CB = Pair.first;
3554 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3555 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3556
3557 // Okay, we now know that all edges from PredBB should be revectored to
3558 // branch to RealDest.
3559 if (RealDest == BB)
3560 continue; // Skip self loops.
3561
3562 // Skip if the predecessor's terminator is an indirect branch.
3563 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3564 return isa<IndirectBrInst>(PredBB->getTerminator());
3565 }))
3566 continue;
3567
3568 // Only revector to RealDest if no values defined in BB are live.
3569 if (ReachesNonLocalUseBlocks.contains(RealDest))
3570 continue;
3571
3572 LLVM_DEBUG({
3573 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3574 << " has value " << *Pair.first << " in predecessors:\n";
3575 for (const BasicBlock *PredBB : Pair.second)
3576 dbgs() << " " << PredBB->getName() << "\n";
3577 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3578 });
3579
3580 // Split the predecessors we are threading into a new edge block. We'll
3581 // clone the instructions into this block, and then redirect it to RealDest.
3582 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3583
3584 // TODO: These just exist to reduce test diff, we can drop them if we like.
3585 EdgeBB->setName(RealDest->getName() + ".critedge");
3586 EdgeBB->moveBefore(RealDest);
3587
3588 // Update PHI nodes.
3589 addPredecessorToBlock(RealDest, EdgeBB, BB);
3590
3591 // BB may have instructions that are being threaded over. Clone these
3592 // instructions into EdgeBB. We know that there will be no uses of the
3593 // cloned instructions outside of EdgeBB.
3594 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3595 ValueToValueMapTy TranslateMap; // Track translated values.
3596 TranslateMap[Cond] = CB;
3597
3598 // RemoveDIs: track instructions that we optimise away while folding, so
3599 // that we can copy DbgVariableRecords from them later.
3600 BasicBlock::iterator SrcDbgCursor = BB->begin();
3601 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3602 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3603 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3604 continue;
3605 }
3606 // Clone the instruction.
3607 Instruction *N = BBI->clone();
3608 // Insert the new instruction into its new home.
3609 N->insertInto(EdgeBB, InsertPt);
3610
3611 if (BBI->hasName())
3612 N->setName(BBI->getName() + ".c");
3613
3614 // Update operands due to translation.
3615 // Key Instructions: Remap all the atom groups.
3616 if (const DebugLoc &DL = BBI->getDebugLoc())
3617 mapAtomInstance(DL, TranslateMap);
3618 RemapInstruction(N, TranslateMap,
3620
3621 // Check for trivial simplification.
3622 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3623 if (!BBI->use_empty())
3624 TranslateMap[&*BBI] = V;
3625 if (!N->mayHaveSideEffects()) {
3626 N->eraseFromParent(); // Instruction folded away, don't need actual
3627 // inst
3628 N = nullptr;
3629 }
3630 } else {
3631 if (!BBI->use_empty())
3632 TranslateMap[&*BBI] = N;
3633 }
3634 if (N) {
3635 // Copy all debug-info attached to instructions from the last we
3636 // successfully clone, up to this instruction (they might have been
3637 // folded away).
3638 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3639 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3640 SrcDbgCursor = std::next(BBI);
3641 // Clone debug-info on this instruction too.
3642 N->cloneDebugInfoFrom(&*BBI);
3643
3644 // Register the new instruction with the assumption cache if necessary.
3645 if (auto *Assume = dyn_cast<AssumeInst>(N))
3646 if (AC)
3647 AC->registerAssumption(Assume);
3648 }
3649 }
3650
3651 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3652 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3653 InsertPt->cloneDebugInfoFrom(BI);
3654
3655 BB->removePredecessor(EdgeBB);
3656 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3657 EdgeBI->setSuccessor(0, RealDest);
3658 EdgeBI->setDebugLoc(BI->getDebugLoc());
3659
3660 if (DTU) {
3662 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3663 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3664 DTU->applyUpdates(Updates);
3665 }
3666
3667 // For simplicity, we created a separate basic block for the edge. Merge
3668 // it back into the predecessor if possible. This not only avoids
3669 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3670 // bypass the check for trivial cycles above.
3671 MergeBlockIntoPredecessor(EdgeBB, DTU);
3672
3673 // Signal repeat, simplifying any other constants.
3674 return std::nullopt;
3675 }
3676
3677 return false;
3678}
3679
3680bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3681 // Note: If BB is a loop header then there is a risk that threading introduces
3682 // a non-canonical loop by moving a back edge. So we avoid this optimization
3683 // for loop headers if NeedCanonicalLoop is set.
3684 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3685 return false;
3686
3687 std::optional<bool> Result;
3688 bool EverChanged = false;
3689 do {
3690 // Note that None means "we changed things, but recurse further."
3691 Result =
3693 EverChanged |= Result == std::nullopt || *Result;
3694 } while (Result == std::nullopt);
3695 return EverChanged;
3696}
3697
3698/// Given a BB that starts with the specified two-entry PHI node,
3699/// see if we can eliminate it.
3702 const DataLayout &DL,
3703 bool SpeculateUnpredictables) {
3704 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3705 // statement", which has a very simple dominance structure. Basically, we
3706 // are trying to find the condition that is being branched on, which
3707 // subsequently causes this merge to happen. We really want control
3708 // dependence information for this check, but simplifycfg can't keep it up
3709 // to date, and this catches most of the cases we care about anyway.
3710 BasicBlock *BB = PN->getParent();
3711
3712 BasicBlock *IfTrue, *IfFalse;
3713 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3714 if (!DomBI)
3715 return false;
3716 Value *IfCond = DomBI->getCondition();
3717 // Don't bother if the branch will be constant folded trivially.
3718 if (isa<ConstantInt>(IfCond))
3719 return false;
3720
3721 BasicBlock *DomBlock = DomBI->getParent();
3724 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3725 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3726 });
3727 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3728 "Will have either one or two blocks to speculate.");
3729
3730 // If the branch is non-unpredictable, see if we either predictably jump to
3731 // the merge bb (if we have only a single 'then' block), or if we predictably
3732 // jump to one specific 'then' block (if we have two of them).
3733 // It isn't beneficial to speculatively execute the code
3734 // from the block that we know is predictably not entered.
3735 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3736 if (!IsUnpredictable) {
3737 uint64_t TWeight, FWeight;
3738 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3739 (TWeight + FWeight) != 0) {
3740 BranchProbability BITrueProb =
3741 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3742 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3743 BranchProbability BIFalseProb = BITrueProb.getCompl();
3744 if (IfBlocks.size() == 1) {
3745 BranchProbability BIBBProb =
3746 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3747 if (BIBBProb >= Likely)
3748 return false;
3749 } else {
3750 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3751 return false;
3752 }
3753 }
3754 }
3755
3756 // Don't try to fold an unreachable block. For example, the phi node itself
3757 // can't be the candidate if-condition for a select that we want to form.
3758 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3759 if (IfCondPhiInst->getParent() == BB)
3760 return false;
3761
3762 // Okay, we found that we can merge this two-entry phi node into a select.
3763 // Doing so would require us to fold *all* two entry phi nodes in this block.
3764 // At some point this becomes non-profitable (particularly if the target
3765 // doesn't support cmov's). Only do this transformation if there are two or
3766 // fewer PHI nodes in this block.
3767 unsigned NumPhis = 0;
3768 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3769 if (NumPhis > 2)
3770 return false;
3771
3772 // Loop over the PHI's seeing if we can promote them all to select
3773 // instructions. While we are at it, keep track of the instructions
3774 // that need to be moved to the dominating block.
3775 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3776 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3777 InstructionCost Cost = 0;
3778 InstructionCost Budget =
3780 if (SpeculateUnpredictables && IsUnpredictable)
3781 Budget += TTI.getBranchMispredictPenalty();
3782
3783 bool Changed = false;
3784 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3785 PHINode *PN = cast<PHINode>(II++);
3786 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3787 PN->replaceAllUsesWith(V);
3788 PN->eraseFromParent();
3789 Changed = true;
3790 continue;
3791 }
3792
3793 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3794 AggressiveInsts, Cost, Budget, TTI, AC,
3795 ZeroCostInstructions) ||
3796 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3797 AggressiveInsts, Cost, Budget, TTI, AC,
3798 ZeroCostInstructions))
3799 return Changed;
3800 }
3801
3802 // If we folded the first phi, PN dangles at this point. Refresh it. If
3803 // we ran out of PHIs then we simplified them all.
3804 PN = dyn_cast<PHINode>(BB->begin());
3805 if (!PN)
3806 return true;
3807
3808 // Return true if at least one of these is a 'not', and another is either
3809 // a 'not' too, or a constant.
3810 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3811 if (!match(V0, m_Not(m_Value())))
3812 std::swap(V0, V1);
3813 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3814 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3815 };
3816
3817 // Don't fold i1 branches on PHIs which contain binary operators or
3818 // (possibly inverted) select form of or/ands, unless one of
3819 // the incoming values is an 'not' and another one is freely invertible.
3820 // These can often be turned into switches and other things.
3821 auto IsBinOpOrAnd = [](Value *V) {
3822 return match(
3824 };
3825 if (PN->getType()->isIntegerTy(1) &&
3826 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3827 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3828 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3829 PN->getIncomingValue(1)))
3830 return Changed;
3831
3832 // If all PHI nodes are promotable, check to make sure that all instructions
3833 // in the predecessor blocks can be promoted as well. If not, we won't be able
3834 // to get rid of the control flow, so it's not worth promoting to select
3835 // instructions.
3836 for (BasicBlock *IfBlock : IfBlocks)
3837 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3838 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3839 // This is not an aggressive instruction that we can promote.
3840 // Because of this, we won't be able to get rid of the control flow, so
3841 // the xform is not worth it.
3842 return Changed;
3843 }
3844
3845 // If either of the blocks has it's address taken, we can't do this fold.
3846 if (any_of(IfBlocks,
3847 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3848 return Changed;
3849
3850 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3851 if (IsUnpredictable) dbgs() << " (unpredictable)";
3852 dbgs() << " T: " << IfTrue->getName()
3853 << " F: " << IfFalse->getName() << "\n");
3854
3855 // If we can still promote the PHI nodes after this gauntlet of tests,
3856 // do all of the PHI's now.
3857
3858 // Move all 'aggressive' instructions, which are defined in the
3859 // conditional parts of the if's up to the dominating block.
3860 for (BasicBlock *IfBlock : IfBlocks)
3861 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3862
3863 IRBuilder<NoFolder> Builder(DomBI);
3864 // Propagate fast-math-flags from phi nodes to replacement selects.
3865 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3866 // Change the PHI node into a select instruction.
3867 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3868 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3869
3870 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3871 isa<FPMathOperator>(PN) ? PN : nullptr,
3872 "", DomBI);
3873 PN->replaceAllUsesWith(Sel);
3874 Sel->takeName(PN);
3875 PN->eraseFromParent();
3876 }
3877
3878 // At this point, all IfBlocks are empty, so our if statement
3879 // has been flattened. Change DomBlock to jump directly to our new block to
3880 // avoid other simplifycfg's kicking in on the diamond.
3881 Builder.CreateBr(BB);
3882
3884 if (DTU) {
3885 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3886 for (auto *Successor : successors(DomBlock))
3887 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3888 }
3889
3890 DomBI->eraseFromParent();
3891 if (DTU)
3892 DTU->applyUpdates(Updates);
3893
3894 return true;
3895}
3896
3899 Value *RHS, const Twine &Name = "") {
3900 // Try to relax logical op to binary op.
3901 if (impliesPoison(RHS, LHS))
3902 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3903 if (Opc == Instruction::And)
3904 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3905 if (Opc == Instruction::Or)
3906 return Builder.CreateLogicalOr(LHS, RHS, Name);
3907 llvm_unreachable("Invalid logical opcode");
3908}
3909
3910/// Return true if either PBI or BI has branch weight available, and store
3911/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3912/// not have branch weight, use 1:1 as its weight.
3914 uint64_t &PredTrueWeight,
3915 uint64_t &PredFalseWeight,
3916 uint64_t &SuccTrueWeight,
3917 uint64_t &SuccFalseWeight) {
3918 bool PredHasWeights =
3919 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3920 bool SuccHasWeights =
3921 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3922 if (PredHasWeights || SuccHasWeights) {
3923 if (!PredHasWeights)
3924 PredTrueWeight = PredFalseWeight = 1;
3925 if (!SuccHasWeights)
3926 SuccTrueWeight = SuccFalseWeight = 1;
3927 return true;
3928 } else {
3929 return false;
3930 }
3931}
3932
3933/// Determine if the two branches share a common destination and deduce a glue
3934/// that joins the branches' conditions to arrive at the common destination if
3935/// that would be profitable.
3936static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3938 const TargetTransformInfo *TTI) {
3939 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3940 "Both blocks must end with a conditional branches.");
3942 "PredBB must be a predecessor of BB.");
3943
3944 // We have the potential to fold the conditions together, but if the
3945 // predecessor branch is predictable, we may not want to merge them.
3946 uint64_t PTWeight, PFWeight;
3947 BranchProbability PBITrueProb, Likely;
3948 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3949 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3950 (PTWeight + PFWeight) != 0) {
3951 PBITrueProb =
3952 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3953 Likely = TTI->getPredictableBranchThreshold();
3954 }
3955
3956 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3957 // Speculate the 2nd condition unless the 1st is probably true.
3958 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3959 return {{BI->getSuccessor(0), Instruction::Or, false}};
3960 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3961 // Speculate the 2nd condition unless the 1st is probably false.
3962 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3963 return {{BI->getSuccessor(1), Instruction::And, false}};
3964 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3965 // Speculate the 2nd condition unless the 1st is probably true.
3966 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3967 return {{BI->getSuccessor(1), Instruction::And, true}};
3968 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3969 // Speculate the 2nd condition unless the 1st is probably false.
3970 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3971 return {{BI->getSuccessor(0), Instruction::Or, true}};
3972 }
3973 return std::nullopt;
3974}
3975
3977 DomTreeUpdater *DTU,
3978 MemorySSAUpdater *MSSAU,
3979 const TargetTransformInfo *TTI) {
3980 BasicBlock *BB = BI->getParent();
3981 BasicBlock *PredBlock = PBI->getParent();
3982
3983 // Determine if the two branches share a common destination.
3984 BasicBlock *CommonSucc;
3986 bool InvertPredCond;
3987 std::tie(CommonSucc, Opc, InvertPredCond) =
3989
3990 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3991
3992 IRBuilder<> Builder(PBI);
3993 // The builder is used to create instructions to eliminate the branch in BB.
3994 // If BB's terminator has !annotation metadata, add it to the new
3995 // instructions.
3996 Builder.CollectMetadataToCopy(BB->getTerminator(),
3997 {LLVMContext::MD_annotation});
3998
3999 // If we need to invert the condition in the pred block to match, do so now.
4000 if (InvertPredCond) {
4001 InvertBranch(PBI, Builder);
4002 }
4003
4004 BasicBlock *UniqueSucc =
4005 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4006
4007 // Before cloning instructions, notify the successor basic block that it
4008 // is about to have a new predecessor. This will update PHI nodes,
4009 // which will allow us to update live-out uses of bonus instructions.
4010 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4011
4012 // Try to update branch weights.
4013 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4014 SmallVector<uint64_t, 2> MDWeights;
4015 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4016 SuccTrueWeight, SuccFalseWeight)) {
4017
4018 if (PBI->getSuccessor(0) == BB) {
4019 // PBI: br i1 %x, BB, FalseDest
4020 // BI: br i1 %y, UniqueSucc, FalseDest
4021 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4022 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4023 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4024 // TrueWeight for PBI * FalseWeight for BI.
4025 // We assume that total weights of a BranchInst can fit into 32 bits.
4026 // Therefore, we will not have overflow using 64-bit arithmetic.
4027 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4028 PredTrueWeight * SuccFalseWeight);
4029 } else {
4030 // PBI: br i1 %x, TrueDest, BB
4031 // BI: br i1 %y, TrueDest, UniqueSucc
4032 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4033 // FalseWeight for PBI * TrueWeight for BI.
4034 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4035 PredFalseWeight * SuccTrueWeight);
4036 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4037 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4038 }
4039
4040 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4041 /*ElideAllZero=*/true);
4042
4043 // TODO: If BB is reachable from all paths through PredBlock, then we
4044 // could replace PBI's branch probabilities with BI's.
4045 } else
4046 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4047
4048 // Now, update the CFG.
4049 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4050
4051 if (DTU)
4052 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4053 {DominatorTree::Delete, PredBlock, BB}});
4054
4055 // If BI was a loop latch, it may have had associated loop metadata.
4056 // We need to copy it to the new latch, that is, PBI.
4057 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4058 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4059
4060 ValueToValueMapTy VMap; // maps original values to cloned values
4062
4063 Module *M = BB->getModule();
4064
4065 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4066 for (DbgVariableRecord &DVR :
4068 RemapDbgRecord(M, &DVR, VMap,
4070 }
4071
4072 // Now that the Cond was cloned into the predecessor basic block,
4073 // or/and the two conditions together.
4074 Value *BICond = VMap[BI->getCondition()];
4075 PBI->setCondition(
4076 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4078 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4079 if (!MDWeights.empty()) {
4080 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4081 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4082 /*IsExpected=*/false, /*ElideAllZero=*/true);
4083 }
4084
4085 ++NumFoldBranchToCommonDest;
4086 return true;
4087}
4088
4089/// Return if an instruction's type or any of its operands' types are a vector
4090/// type.
4091static bool isVectorOp(Instruction &I) {
4092 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4093 return U->getType()->isVectorTy();
4094 });
4095}
4096
4097/// If this basic block is simple enough, and if a predecessor branches to us
4098/// and one of our successors, fold the block into the predecessor and use
4099/// logical operations to pick the right destination.
4101 MemorySSAUpdater *MSSAU,
4102 const TargetTransformInfo *TTI,
4103 unsigned BonusInstThreshold) {
4104 // If this block ends with an unconditional branch,
4105 // let speculativelyExecuteBB() deal with it.
4106 if (!BI->isConditional())
4107 return false;
4108
4109 BasicBlock *BB = BI->getParent();
4113
4115
4117 Cond->getParent() != BB || !Cond->hasOneUse())
4118 return false;
4119
4120 // Finally, don't infinitely unroll conditional loops.
4121 if (is_contained(successors(BB), BB))
4122 return false;
4123
4124 // With which predecessors will we want to deal with?
4126 for (BasicBlock *PredBlock : predecessors(BB)) {
4127 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4128
4129 // Check that we have two conditional branches. If there is a PHI node in
4130 // the common successor, verify that the same value flows in from both
4131 // blocks.
4132 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4133 continue;
4134
4135 // Determine if the two branches share a common destination.
4136 BasicBlock *CommonSucc;
4138 bool InvertPredCond;
4139 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4140 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4141 else
4142 continue;
4143
4144 // Check the cost of inserting the necessary logic before performing the
4145 // transformation.
4146 if (TTI) {
4147 Type *Ty = BI->getCondition()->getType();
4148 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4149 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4150 !isa<CmpInst>(PBI->getCondition())))
4151 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4152
4154 continue;
4155 }
4156
4157 // Ok, we do want to deal with this predecessor. Record it.
4158 Preds.emplace_back(PredBlock);
4159 }
4160
4161 // If there aren't any predecessors into which we can fold,
4162 // don't bother checking the cost.
4163 if (Preds.empty())
4164 return false;
4165
4166 // Only allow this transformation if computing the condition doesn't involve
4167 // too many instructions and these involved instructions can be executed
4168 // unconditionally. We denote all involved instructions except the condition
4169 // as "bonus instructions", and only allow this transformation when the
4170 // number of the bonus instructions we'll need to create when cloning into
4171 // each predecessor does not exceed a certain threshold.
4172 unsigned NumBonusInsts = 0;
4173 bool SawVectorOp = false;
4174 const unsigned PredCount = Preds.size();
4175 for (Instruction &I : *BB) {
4176 // Don't check the branch condition comparison itself.
4177 if (&I == Cond)
4178 continue;
4179 // Ignore the terminator.
4180 if (isa<BranchInst>(I))
4181 continue;
4182 // I must be safe to execute unconditionally.
4184 return false;
4185 SawVectorOp |= isVectorOp(I);
4186
4187 // Account for the cost of duplicating this instruction into each
4188 // predecessor. Ignore free instructions.
4189 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4191 NumBonusInsts += PredCount;
4192
4193 // Early exits once we reach the limit.
4194 if (NumBonusInsts >
4195 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4196 return false;
4197 }
4198
4199 auto IsBCSSAUse = [BB, &I](Use &U) {
4200 auto *UI = cast<Instruction>(U.getUser());
4201 if (auto *PN = dyn_cast<PHINode>(UI))
4202 return PN->getIncomingBlock(U) == BB;
4203 return UI->getParent() == BB && I.comesBefore(UI);
4204 };
4205
4206 // Does this instruction require rewriting of uses?
4207 if (!all_of(I.uses(), IsBCSSAUse))
4208 return false;
4209 }
4210 if (NumBonusInsts >
4211 BonusInstThreshold *
4212 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4213 return false;
4214
4215 // Ok, we have the budget. Perform the transformation.
4216 for (BasicBlock *PredBlock : Preds) {
4217 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4218 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4219 }
4220 return false;
4221}
4222
4223// If there is only one store in BB1 and BB2, return it, otherwise return
4224// nullptr.
4226 StoreInst *S = nullptr;
4227 for (auto *BB : {BB1, BB2}) {
4228 if (!BB)
4229 continue;
4230 for (auto &I : *BB)
4231 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4232 if (S)
4233 // Multiple stores seen.
4234 return nullptr;
4235 else
4236 S = SI;
4237 }
4238 }
4239 return S;
4240}
4241
4243 Value *AlternativeV = nullptr) {
4244 // PHI is going to be a PHI node that allows the value V that is defined in
4245 // BB to be referenced in BB's only successor.
4246 //
4247 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4248 // doesn't matter to us what the other operand is (it'll never get used). We
4249 // could just create a new PHI with an undef incoming value, but that could
4250 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4251 // other PHI. So here we directly look for some PHI in BB's successor with V
4252 // as an incoming operand. If we find one, we use it, else we create a new
4253 // one.
4254 //
4255 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4256 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4257 // where OtherBB is the single other predecessor of BB's only successor.
4258 PHINode *PHI = nullptr;
4259 BasicBlock *Succ = BB->getSingleSuccessor();
4260
4261 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4262 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4263 PHI = cast<PHINode>(I);
4264 if (!AlternativeV)
4265 break;
4266
4267 assert(Succ->hasNPredecessors(2));
4268 auto PredI = pred_begin(Succ);
4269 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4270 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4271 break;
4272 PHI = nullptr;
4273 }
4274 if (PHI)
4275 return PHI;
4276
4277 // If V is not an instruction defined in BB, just return it.
4278 if (!AlternativeV &&
4279 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4280 return V;
4281
4282 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4283 PHI->insertBefore(Succ->begin());
4284 PHI->addIncoming(V, BB);
4285 for (BasicBlock *PredBB : predecessors(Succ))
4286 if (PredBB != BB)
4287 PHI->addIncoming(
4288 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4289 return PHI;
4290}
4291
4293 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4294 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4295 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4296 // For every pointer, there must be exactly two stores, one coming from
4297 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4298 // store (to any address) in PTB,PFB or QTB,QFB.
4299 // FIXME: We could relax this restriction with a bit more work and performance
4300 // testing.
4301 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4302 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4303 if (!PStore || !QStore)
4304 return false;
4305
4306 // Now check the stores are compatible.
4307 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4308 PStore->getValueOperand()->getType() !=
4309 QStore->getValueOperand()->getType())
4310 return false;
4311
4312 // Check that sinking the store won't cause program behavior changes. Sinking
4313 // the store out of the Q blocks won't change any behavior as we're sinking
4314 // from a block to its unconditional successor. But we're moving a store from
4315 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4316 // So we need to check that there are no aliasing loads or stores in
4317 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4318 // operations between PStore and the end of its parent block.
4319 //
4320 // The ideal way to do this is to query AliasAnalysis, but we don't
4321 // preserve AA currently so that is dangerous. Be super safe and just
4322 // check there are no other memory operations at all.
4323 for (auto &I : *QFB->getSinglePredecessor())
4324 if (I.mayReadOrWriteMemory())
4325 return false;
4326 for (auto &I : *QFB)
4327 if (&I != QStore && I.mayReadOrWriteMemory())
4328 return false;
4329 if (QTB)
4330 for (auto &I : *QTB)
4331 if (&I != QStore && I.mayReadOrWriteMemory())
4332 return false;
4333 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4334 I != E; ++I)
4335 if (&*I != PStore && I->mayReadOrWriteMemory())
4336 return false;
4337
4338 // If we're not in aggressive mode, we only optimize if we have some
4339 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4340 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4341 if (!BB)
4342 return true;
4343 // Heuristic: if the block can be if-converted/phi-folded and the
4344 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4345 // thread this store.
4346 InstructionCost Cost = 0;
4347 InstructionCost Budget =
4349 for (auto &I : BB->instructionsWithoutDebug(false)) {
4350 // Consider terminator instruction to be free.
4351 if (I.isTerminator())
4352 continue;
4353 // If this is one the stores that we want to speculate out of this BB,
4354 // then don't count it's cost, consider it to be free.
4355 if (auto *S = dyn_cast<StoreInst>(&I))
4356 if (llvm::find(FreeStores, S))
4357 continue;
4358 // Else, we have a white-list of instructions that we are ak speculating.
4360 return false; // Not in white-list - not worthwhile folding.
4361 // And finally, if this is a non-free instruction that we are okay
4362 // speculating, ensure that we consider the speculation budget.
4363 Cost +=
4364 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4365 if (Cost > Budget)
4366 return false; // Eagerly refuse to fold as soon as we're out of budget.
4367 }
4368 assert(Cost <= Budget &&
4369 "When we run out of budget we will eagerly return from within the "
4370 "per-instruction loop.");
4371 return true;
4372 };
4373
4374 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4376 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4377 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4378 return false;
4379
4380 // If PostBB has more than two predecessors, we need to split it so we can
4381 // sink the store.
4382 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4383 // We know that QFB's only successor is PostBB. And QFB has a single
4384 // predecessor. If QTB exists, then its only successor is also PostBB.
4385 // If QTB does not exist, then QFB's only predecessor has a conditional
4386 // branch to QFB and PostBB.
4387 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4388 BasicBlock *NewBB =
4389 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4390 if (!NewBB)
4391 return false;
4392 PostBB = NewBB;
4393 }
4394
4395 // OK, we're going to sink the stores to PostBB. The store has to be
4396 // conditional though, so first create the predicate.
4397 BranchInst *PBranch =
4399 BranchInst *QBranch =
4401 Value *PCond = PBranch->getCondition();
4402 Value *QCond = QBranch->getCondition();
4403
4405 PStore->getParent());
4407 QStore->getParent(), PPHI);
4408
4409 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4410 IRBuilder<> QB(PostBB, PostBBFirst);
4411 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4412
4413 InvertPCond ^= (PStore->getParent() != PTB);
4414 InvertQCond ^= (QStore->getParent() != QTB);
4415 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4416 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4417
4418 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4419
4420 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4421 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4422 /*Unreachable=*/false,
4423 /*BranchWeights=*/nullptr, DTU);
4424 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4426 SmallVector<uint32_t, 2> PWeights, QWeights;
4427 extractBranchWeights(*PBranch, PWeights);
4428 extractBranchWeights(*QBranch, QWeights);
4429 if (InvertPCond)
4430 std::swap(PWeights[0], PWeights[1]);
4431 if (InvertQCond)
4432 std::swap(QWeights[0], QWeights[1]);
4433 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4435 {CombinedWeights[0], CombinedWeights[1]},
4436 /*IsExpected=*/false, /*ElideAllZero=*/true);
4437 }
4438
4439 QB.SetInsertPoint(T);
4440 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4441 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4442 // Choose the minimum alignment. If we could prove both stores execute, we
4443 // could use biggest one. In this case, though, we only know that one of the
4444 // stores executes. And we don't know it's safe to take the alignment from a
4445 // store that doesn't execute.
4446 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4447
4448 QStore->eraseFromParent();
4449 PStore->eraseFromParent();
4450
4451 return true;
4452}
4453
4455 DomTreeUpdater *DTU, const DataLayout &DL,
4456 const TargetTransformInfo &TTI) {
4457 // The intention here is to find diamonds or triangles (see below) where each
4458 // conditional block contains a store to the same address. Both of these
4459 // stores are conditional, so they can't be unconditionally sunk. But it may
4460 // be profitable to speculatively sink the stores into one merged store at the
4461 // end, and predicate the merged store on the union of the two conditions of
4462 // PBI and QBI.
4463 //
4464 // This can reduce the number of stores executed if both of the conditions are
4465 // true, and can allow the blocks to become small enough to be if-converted.
4466 // This optimization will also chain, so that ladders of test-and-set
4467 // sequences can be if-converted away.
4468 //
4469 // We only deal with simple diamonds or triangles:
4470 //
4471 // PBI or PBI or a combination of the two
4472 // / \ | \
4473 // PTB PFB | PFB
4474 // \ / | /
4475 // QBI QBI
4476 // / \ | \
4477 // QTB QFB | QFB
4478 // \ / | /
4479 // PostBB PostBB
4480 //
4481 // We model triangles as a type of diamond with a nullptr "true" block.
4482 // Triangles are canonicalized so that the fallthrough edge is represented by
4483 // a true condition, as in the diagram above.
4484 BasicBlock *PTB = PBI->getSuccessor(0);
4485 BasicBlock *PFB = PBI->getSuccessor(1);
4486 BasicBlock *QTB = QBI->getSuccessor(0);
4487 BasicBlock *QFB = QBI->getSuccessor(1);
4488 BasicBlock *PostBB = QFB->getSingleSuccessor();
4489
4490 // Make sure we have a good guess for PostBB. If QTB's only successor is
4491 // QFB, then QFB is a better PostBB.
4492 if (QTB->getSingleSuccessor() == QFB)
4493 PostBB = QFB;
4494
4495 // If we couldn't find a good PostBB, stop.
4496 if (!PostBB)
4497 return false;
4498
4499 bool InvertPCond = false, InvertQCond = false;
4500 // Canonicalize fallthroughs to the true branches.
4501 if (PFB == QBI->getParent()) {
4502 std::swap(PFB, PTB);
4503 InvertPCond = true;
4504 }
4505 if (QFB == PostBB) {
4506 std::swap(QFB, QTB);
4507 InvertQCond = true;
4508 }
4509
4510 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4511 // and QFB may not. Model fallthroughs as a nullptr block.
4512 if (PTB == QBI->getParent())
4513 PTB = nullptr;
4514 if (QTB == PostBB)
4515 QTB = nullptr;
4516
4517 // Legality bailouts. We must have at least the non-fallthrough blocks and
4518 // the post-dominating block, and the non-fallthroughs must only have one
4519 // predecessor.
4520 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4521 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4522 };
4523 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4524 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4525 return false;
4526 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4527 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4528 return false;
4529 if (!QBI->getParent()->hasNUses(2))
4530 return false;
4531
4532 // OK, this is a sequence of two diamonds or triangles.
4533 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4534 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4535 for (auto *BB : {PTB, PFB}) {
4536 if (!BB)
4537 continue;
4538 for (auto &I : *BB)
4540 PStoreAddresses.insert(SI->getPointerOperand());
4541 }
4542 for (auto *BB : {QTB, QFB}) {
4543 if (!BB)
4544 continue;
4545 for (auto &I : *BB)
4547 QStoreAddresses.insert(SI->getPointerOperand());
4548 }
4549
4550 set_intersect(PStoreAddresses, QStoreAddresses);
4551 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4552 // clear what it contains.
4553 auto &CommonAddresses = PStoreAddresses;
4554
4555 bool Changed = false;
4556 for (auto *Address : CommonAddresses)
4557 Changed |=
4558 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4559 InvertPCond, InvertQCond, DTU, DL, TTI);
4560 return Changed;
4561}
4562
4563/// If the previous block ended with a widenable branch, determine if reusing
4564/// the target block is profitable and legal. This will have the effect of
4565/// "widening" PBI, but doesn't require us to reason about hosting safety.
4567 DomTreeUpdater *DTU) {
4568 // TODO: This can be generalized in two important ways:
4569 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4570 // values from the PBI edge.
4571 // 2) We can sink side effecting instructions into BI's fallthrough
4572 // successor provided they doesn't contribute to computation of
4573 // BI's condition.
4574 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4575 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4576 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4577 !BI->getParent()->getSinglePredecessor())
4578 return false;
4579 if (!IfFalseBB->phis().empty())
4580 return false; // TODO
4581 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4582 // may undo the transform done here.
4583 // TODO: There might be a more fine-grained solution to this.
4584 if (!llvm::succ_empty(IfFalseBB))
4585 return false;
4586 // Use lambda to lazily compute expensive condition after cheap ones.
4587 auto NoSideEffects = [](BasicBlock &BB) {
4588 return llvm::none_of(BB, [](const Instruction &I) {
4589 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4590 });
4591 };
4592 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4593 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4594 NoSideEffects(*BI->getParent())) {
4595 auto *OldSuccessor = BI->getSuccessor(1);
4596 OldSuccessor->removePredecessor(BI->getParent());
4597 BI->setSuccessor(1, IfFalseBB);
4598 if (DTU)
4599 DTU->applyUpdates(
4600 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4601 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4602 return true;
4603 }
4604 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4605 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4606 NoSideEffects(*BI->getParent())) {
4607 auto *OldSuccessor = BI->getSuccessor(0);
4608 OldSuccessor->removePredecessor(BI->getParent());
4609 BI->setSuccessor(0, IfFalseBB);
4610 if (DTU)
4611 DTU->applyUpdates(
4612 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4613 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4614 return true;
4615 }
4616 return false;
4617}
4618
4619/// If we have a conditional branch as a predecessor of another block,
4620/// this function tries to simplify it. We know
4621/// that PBI and BI are both conditional branches, and BI is in one of the
4622/// successor blocks of PBI - PBI branches to BI.
4624 DomTreeUpdater *DTU,
4625 const DataLayout &DL,
4626 const TargetTransformInfo &TTI) {
4627 assert(PBI->isConditional() && BI->isConditional());
4628 BasicBlock *BB = BI->getParent();
4629
4630 // If this block ends with a branch instruction, and if there is a
4631 // predecessor that ends on a branch of the same condition, make
4632 // this conditional branch redundant.
4633 if (PBI->getCondition() == BI->getCondition() &&
4634 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4635 // Okay, the outcome of this conditional branch is statically
4636 // knowable. If this block had a single pred, handle specially, otherwise
4637 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4638 if (BB->getSinglePredecessor()) {
4639 // Turn this into a branch on constant.
4640 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4641 BI->setCondition(
4642 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4643 return true; // Nuke the branch on constant.
4644 }
4645 }
4646
4647 // If the previous block ended with a widenable branch, determine if reusing
4648 // the target block is profitable and legal. This will have the effect of
4649 // "widening" PBI, but doesn't require us to reason about hosting safety.
4650 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4651 return true;
4652
4653 // If both branches are conditional and both contain stores to the same
4654 // address, remove the stores from the conditionals and create a conditional
4655 // merged store at the end.
4656 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4657 return true;
4658
4659 // If this is a conditional branch in an empty block, and if any
4660 // predecessors are a conditional branch to one of our destinations,
4661 // fold the conditions into logical ops and one cond br.
4662
4663 // Ignore dbg intrinsics.
4664 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4665 return false;
4666
4667 int PBIOp, BIOp;
4668 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4669 PBIOp = 0;
4670 BIOp = 0;
4671 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4672 PBIOp = 0;
4673 BIOp = 1;
4674 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4675 PBIOp = 1;
4676 BIOp = 0;
4677 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4678 PBIOp = 1;
4679 BIOp = 1;
4680 } else {
4681 return false;
4682 }
4683
4684 // Check to make sure that the other destination of this branch
4685 // isn't BB itself. If so, this is an infinite loop that will
4686 // keep getting unwound.
4687 if (PBI->getSuccessor(PBIOp) == BB)
4688 return false;
4689
4690 // If predecessor's branch probability to BB is too low don't merge branches.
4691 SmallVector<uint32_t, 2> PredWeights;
4692 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4693 extractBranchWeights(*PBI, PredWeights) &&
4694 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4695
4697 PredWeights[PBIOp],
4698 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4699
4700 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4701 if (CommonDestProb >= Likely)
4702 return false;
4703 }
4704
4705 // Do not perform this transformation if it would require
4706 // insertion of a large number of select instructions. For targets
4707 // without predication/cmovs, this is a big pessimization.
4708
4709 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4710 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4711 unsigned NumPhis = 0;
4712 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4713 ++II, ++NumPhis) {
4714 if (NumPhis > 2) // Disable this xform.
4715 return false;
4716 }
4717
4718 // Finally, if everything is ok, fold the branches to logical ops.
4719 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4720
4721 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4722 << "AND: " << *BI->getParent());
4723
4725
4726 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4727 // branch in it, where one edge (OtherDest) goes back to itself but the other
4728 // exits. We don't *know* that the program avoids the infinite loop
4729 // (even though that seems likely). If we do this xform naively, we'll end up
4730 // recursively unpeeling the loop. Since we know that (after the xform is
4731 // done) that the block *is* infinite if reached, we just make it an obviously
4732 // infinite loop with no cond branch.
4733 if (OtherDest == BB) {
4734 // Insert it at the end of the function, because it's either code,
4735 // or it won't matter if it's hot. :)
4736 BasicBlock *InfLoopBlock =
4737 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4738 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4739 if (DTU)
4740 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4741 OtherDest = InfLoopBlock;
4742 }
4743
4744 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4745
4746 // BI may have other predecessors. Because of this, we leave
4747 // it alone, but modify PBI.
4748
4749 // Make sure we get to CommonDest on True&True directions.
4750 Value *PBICond = PBI->getCondition();
4751 IRBuilder<NoFolder> Builder(PBI);
4752 if (PBIOp)
4753 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4754
4755 Value *BICond = BI->getCondition();
4756 if (BIOp)
4757 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4758
4759 // Merge the conditions.
4760 Value *Cond =
4761 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4762
4763 // Modify PBI to branch on the new condition to the new dests.
4764 PBI->setCondition(Cond);
4765 PBI->setSuccessor(0, CommonDest);
4766 PBI->setSuccessor(1, OtherDest);
4767
4768 if (DTU) {
4769 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4770 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4771
4772 DTU->applyUpdates(Updates);
4773 }
4774
4775 // Update branch weight for PBI.
4776 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4777 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4778 bool HasWeights =
4779 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4780 SuccTrueWeight, SuccFalseWeight);
4781 if (HasWeights) {
4782 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4783 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4784 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4785 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4786 // The weight to CommonDest should be PredCommon * SuccTotal +
4787 // PredOther * SuccCommon.
4788 // The weight to OtherDest should be PredOther * SuccOther.
4789 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4790 PredOther * SuccCommon,
4791 PredOther * SuccOther};
4792
4793 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4794 /*ElideAllZero=*/true);
4795 // Cond may be a select instruction with the first operand set to "true", or
4796 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4798 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4799 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4800 // The select is predicated on PBICond
4801 assert(dyn_cast<SelectInst>(SI)->getCondition() == PBICond);
4802 // The corresponding probabilities are what was referred to above as
4803 // PredCommon and PredOther.
4804 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4805 /*IsExpected=*/false, /*ElideAllZero=*/true);
4806 }
4807 }
4808
4809 // OtherDest may have phi nodes. If so, add an entry from PBI's
4810 // block that are identical to the entries for BI's block.
4811 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4812
4813 // We know that the CommonDest already had an edge from PBI to
4814 // it. If it has PHIs though, the PHIs may have different
4815 // entries for BB and PBI's BB. If so, insert a select to make
4816 // them agree.
4817 for (PHINode &PN : CommonDest->phis()) {
4818 Value *BIV = PN.getIncomingValueForBlock(BB);
4819 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4820 Value *PBIV = PN.getIncomingValue(PBBIdx);
4821 if (BIV != PBIV) {
4822 // Insert a select in PBI to pick the right value.
4824 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4825 PN.setIncomingValue(PBBIdx, NV);
4826 // The select has the same condition as PBI, in the same BB. The
4827 // probabilities don't change.
4828 if (HasWeights) {
4829 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4830 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4831 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4832 /*IsExpected=*/false, /*ElideAllZero=*/true);
4833 }
4834 }
4835 }
4836
4837 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4838 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4839
4840 // This basic block is probably dead. We know it has at least
4841 // one fewer predecessor.
4842 return true;
4843}
4844
4845// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4846// true or to FalseBB if Cond is false.
4847// Takes care of updating the successors and removing the old terminator.
4848// Also makes sure not to introduce new successors by assuming that edges to
4849// non-successor TrueBBs and FalseBBs aren't reachable.
4850bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4851 Value *Cond, BasicBlock *TrueBB,
4852 BasicBlock *FalseBB,
4853 uint32_t TrueWeight,
4854 uint32_t FalseWeight) {
4855 auto *BB = OldTerm->getParent();
4856 // Remove any superfluous successor edges from the CFG.
4857 // First, figure out which successors to preserve.
4858 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4859 // successor.
4860 BasicBlock *KeepEdge1 = TrueBB;
4861 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4862
4863 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4864
4865 // Then remove the rest.
4866 for (BasicBlock *Succ : successors(OldTerm)) {
4867 // Make sure only to keep exactly one copy of each edge.
4868 if (Succ == KeepEdge1)
4869 KeepEdge1 = nullptr;
4870 else if (Succ == KeepEdge2)
4871 KeepEdge2 = nullptr;
4872 else {
4873 Succ->removePredecessor(BB,
4874 /*KeepOneInputPHIs=*/true);
4875
4876 if (Succ != TrueBB && Succ != FalseBB)
4877 RemovedSuccessors.insert(Succ);
4878 }
4879 }
4880
4881 IRBuilder<> Builder(OldTerm);
4882 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4883
4884 // Insert an appropriate new terminator.
4885 if (!KeepEdge1 && !KeepEdge2) {
4886 if (TrueBB == FalseBB) {
4887 // We were only looking for one successor, and it was present.
4888 // Create an unconditional branch to it.
4889 Builder.CreateBr(TrueBB);
4890 } else {
4891 // We found both of the successors we were looking for.
4892 // Create a conditional branch sharing the condition of the select.
4893 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4894 if (TrueWeight != FalseWeight)
4895 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4896 /*IsExpected=*/false, /*ElideAllZero=*/true);
4897 }
4898 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4899 // Neither of the selected blocks were successors, so this
4900 // terminator must be unreachable.
4901 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4902 } else {
4903 // One of the selected values was a successor, but the other wasn't.
4904 // Insert an unconditional branch to the one that was found;
4905 // the edge to the one that wasn't must be unreachable.
4906 if (!KeepEdge1) {
4907 // Only TrueBB was found.
4908 Builder.CreateBr(TrueBB);
4909 } else {
4910 // Only FalseBB was found.
4911 Builder.CreateBr(FalseBB);
4912 }
4913 }
4914
4916
4917 if (DTU) {
4918 SmallVector<DominatorTree::UpdateType, 2> Updates;
4919 Updates.reserve(RemovedSuccessors.size());
4920 for (auto *RemovedSuccessor : RemovedSuccessors)
4921 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4922 DTU->applyUpdates(Updates);
4923 }
4924
4925 return true;
4926}
4927
4928// Replaces
4929// (switch (select cond, X, Y)) on constant X, Y
4930// with a branch - conditional if X and Y lead to distinct BBs,
4931// unconditional otherwise.
4932bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4933 SelectInst *Select) {
4934 // Check for constant integer values in the select.
4935 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4936 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4937 if (!TrueVal || !FalseVal)
4938 return false;
4939
4940 // Find the relevant condition and destinations.
4941 Value *Condition = Select->getCondition();
4942 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4943 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4944
4945 // Get weight for TrueBB and FalseBB.
4946 uint32_t TrueWeight = 0, FalseWeight = 0;
4947 SmallVector<uint64_t, 8> Weights;
4948 bool HasWeights = hasBranchWeightMD(*SI);
4949 if (HasWeights) {
4950 getBranchWeights(SI, Weights);
4951 if (Weights.size() == 1 + SI->getNumCases()) {
4952 TrueWeight =
4953 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4954 FalseWeight =
4955 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4956 }
4957 }
4958
4959 // Perform the actual simplification.
4960 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4961 FalseWeight);
4962}
4963
4964// Replaces
4965// (indirectbr (select cond, blockaddress(@fn, BlockA),
4966// blockaddress(@fn, BlockB)))
4967// with
4968// (br cond, BlockA, BlockB).
4969bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4970 SelectInst *SI) {
4971 // Check that both operands of the select are block addresses.
4972 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4973 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4974 if (!TBA || !FBA)
4975 return false;
4976
4977 // Extract the actual blocks.
4978 BasicBlock *TrueBB = TBA->getBasicBlock();
4979 BasicBlock *FalseBB = FBA->getBasicBlock();
4980
4981 // Perform the actual simplification.
4982 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4983 0);
4984}
4985
4986/// This is called when we find an icmp instruction
4987/// (a seteq/setne with a constant) as the only instruction in a
4988/// block that ends with an uncond branch. We are looking for a very specific
4989/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4990/// this case, we merge the first two "or's of icmp" into a switch, but then the
4991/// default value goes to an uncond block with a seteq in it, we get something
4992/// like:
4993///
4994/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4995/// DEFAULT:
4996/// %tmp = icmp eq i8 %A, 92
4997/// br label %end
4998/// end:
4999/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5000///
5001/// We prefer to split the edge to 'end' so that there is a true/false entry to
5002/// the PHI, merging the third icmp into the switch.
5003bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5004 ICmpInst *ICI, IRBuilder<> &Builder) {
5005 BasicBlock *BB = ICI->getParent();
5006
5007 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5008 // complex.
5009 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5010 return false;
5011
5012 Value *V = ICI->getOperand(0);
5013 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5014
5015 // The pattern we're looking for is where our only predecessor is a switch on
5016 // 'V' and this block is the default case for the switch. In this case we can
5017 // fold the compared value into the switch to simplify things.
5018 BasicBlock *Pred = BB->getSinglePredecessor();
5019 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5020 return false;
5021
5022 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5023 if (SI->getCondition() != V)
5024 return false;
5025
5026 // If BB is reachable on a non-default case, then we simply know the value of
5027 // V in this block. Substitute it and constant fold the icmp instruction
5028 // away.
5029 if (SI->getDefaultDest() != BB) {
5030 ConstantInt *VVal = SI->findCaseDest(BB);
5031 assert(VVal && "Should have a unique destination value");
5032 ICI->setOperand(0, VVal);
5033
5034 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5035 ICI->replaceAllUsesWith(V);
5036 ICI->eraseFromParent();
5037 }
5038 // BB is now empty, so it is likely to simplify away.
5039 return requestResimplify();
5040 }
5041
5042 // Ok, the block is reachable from the default dest. If the constant we're
5043 // comparing exists in one of the other edges, then we can constant fold ICI
5044 // and zap it.
5045 if (SI->findCaseValue(Cst) != SI->case_default()) {
5046 Value *V;
5047 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5049 else
5051
5052 ICI->replaceAllUsesWith(V);
5053 ICI->eraseFromParent();
5054 // BB is now empty, so it is likely to simplify away.
5055 return requestResimplify();
5056 }
5057
5058 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5059 // the block.
5060 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5061 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5062 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5064 return false;
5065
5066 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5067 // true in the PHI.
5068 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5069 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5070
5071 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5072 std::swap(DefaultCst, NewCst);
5073
5074 // Replace ICI (which is used by the PHI for the default value) with true or
5075 // false depending on if it is EQ or NE.
5076 ICI->replaceAllUsesWith(DefaultCst);
5077 ICI->eraseFromParent();
5078
5079 SmallVector<DominatorTree::UpdateType, 2> Updates;
5080
5081 // Okay, the switch goes to this block on a default value. Add an edge from
5082 // the switch to the merge point on the compared value.
5083 BasicBlock *NewBB =
5084 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5085 {
5086 SwitchInstProfUpdateWrapper SIW(*SI);
5087 auto W0 = SIW.getSuccessorWeight(0);
5089 if (W0) {
5090 NewW = ((uint64_t(*W0) + 1) >> 1);
5091 SIW.setSuccessorWeight(0, *NewW);
5092 }
5093 SIW.addCase(Cst, NewBB, NewW);
5094 if (DTU)
5095 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5096 }
5097
5098 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5099 Builder.SetInsertPoint(NewBB);
5100 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5101 Builder.CreateBr(SuccBlock);
5102 PHIUse->addIncoming(NewCst, NewBB);
5103 if (DTU) {
5104 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5105 DTU->applyUpdates(Updates);
5106 }
5107 return true;
5108}
5109
5110/// The specified branch is a conditional branch.
5111/// Check to see if it is branching on an or/and chain of icmp instructions, and
5112/// fold it into a switch instruction if so.
5113bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5114 IRBuilder<> &Builder,
5115 const DataLayout &DL) {
5117 if (!Cond)
5118 return false;
5119
5120 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5121 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5122 // 'setne's and'ed together, collect them.
5123
5124 // Try to gather values from a chain of and/or to be turned into a switch
5125 ConstantComparesGatherer ConstantCompare(Cond, DL);
5126 // Unpack the result
5127 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5128 Value *CompVal = ConstantCompare.CompValue;
5129 unsigned UsedICmps = ConstantCompare.UsedICmps;
5130 Value *ExtraCase = ConstantCompare.Extra;
5131 bool TrueWhenEqual = ConstantCompare.IsEq;
5132
5133 // If we didn't have a multiply compared value, fail.
5134 if (!CompVal)
5135 return false;
5136
5137 // Avoid turning single icmps into a switch.
5138 if (UsedICmps <= 1)
5139 return false;
5140
5141 // There might be duplicate constants in the list, which the switch
5142 // instruction can't handle, remove them now.
5143 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5144 Values.erase(llvm::unique(Values), Values.end());
5145
5146 // If Extra was used, we require at least two switch values to do the
5147 // transformation. A switch with one value is just a conditional branch.
5148 if (ExtraCase && Values.size() < 2)
5149 return false;
5150
5151 // TODO: Preserve branch weight metadata, similarly to how
5152 // foldValueComparisonIntoPredecessors preserves it.
5153
5154 // Figure out which block is which destination.
5155 BasicBlock *DefaultBB = BI->getSuccessor(1);
5156 BasicBlock *EdgeBB = BI->getSuccessor(0);
5157 if (!TrueWhenEqual)
5158 std::swap(DefaultBB, EdgeBB);
5159
5160 BasicBlock *BB = BI->getParent();
5161
5162 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5163 << " cases into SWITCH. BB is:\n"
5164 << *BB);
5165
5166 SmallVector<DominatorTree::UpdateType, 2> Updates;
5167
5168 // If there are any extra values that couldn't be folded into the switch
5169 // then we evaluate them with an explicit branch first. Split the block
5170 // right before the condbr to handle it.
5171 if (ExtraCase) {
5172 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5173 /*MSSAU=*/nullptr, "switch.early.test");
5174
5175 // Remove the uncond branch added to the old block.
5176 Instruction *OldTI = BB->getTerminator();
5177 Builder.SetInsertPoint(OldTI);
5178
5179 // There can be an unintended UB if extra values are Poison. Before the
5180 // transformation, extra values may not be evaluated according to the
5181 // condition, and it will not raise UB. But after transformation, we are
5182 // evaluating extra values before checking the condition, and it will raise
5183 // UB. It can be solved by adding freeze instruction to extra values.
5184 AssumptionCache *AC = Options.AC;
5185
5186 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5187 ExtraCase = Builder.CreateFreeze(ExtraCase);
5188
5189 if (TrueWhenEqual)
5190 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5191 else
5192 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5193
5194 OldTI->eraseFromParent();
5195
5196 if (DTU)
5197 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5198
5199 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5200 // for the edge we just added.
5201 addPredecessorToBlock(EdgeBB, BB, NewBB);
5202
5203 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5204 << "\nEXTRABB = " << *BB);
5205 BB = NewBB;
5206 }
5207
5208 Builder.SetInsertPoint(BI);
5209 // Convert pointer to int before we switch.
5210 if (CompVal->getType()->isPointerTy()) {
5211 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5212 "Should not end up here with unstable pointers");
5213 CompVal = Builder.CreatePtrToInt(
5214 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5215 }
5216
5217 // Create the new switch instruction now.
5218 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5219
5220 // Add all of the 'cases' to the switch instruction.
5221 for (ConstantInt *Val : Values)
5222 New->addCase(Val, EdgeBB);
5223
5224 // We added edges from PI to the EdgeBB. As such, if there were any
5225 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5226 // the number of edges added.
5227 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5228 PHINode *PN = cast<PHINode>(BBI);
5229 Value *InVal = PN->getIncomingValueForBlock(BB);
5230 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5231 PN->addIncoming(InVal, BB);
5232 }
5233
5234 // Erase the old branch instruction.
5236 if (DTU)
5237 DTU->applyUpdates(Updates);
5238
5239 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5240 return true;
5241}
5242
5243bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5244 if (isa<PHINode>(RI->getValue()))
5245 return simplifyCommonResume(RI);
5246 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5247 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5248 // The resume must unwind the exception that caused control to branch here.
5249 return simplifySingleResume(RI);
5250
5251 return false;
5252}
5253
5254// Check if cleanup block is empty
5256 for (Instruction &I : R) {
5257 auto *II = dyn_cast<IntrinsicInst>(&I);
5258 if (!II)
5259 return false;
5260
5261 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5262 switch (IntrinsicID) {
5263 case Intrinsic::dbg_declare:
5264 case Intrinsic::dbg_value:
5265 case Intrinsic::dbg_label:
5266 case Intrinsic::lifetime_end:
5267 break;
5268 default:
5269 return false;
5270 }
5271 }
5272 return true;
5273}
5274
5275// Simplify resume that is shared by several landing pads (phi of landing pad).
5276bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5277 BasicBlock *BB = RI->getParent();
5278
5279 // Check that there are no other instructions except for debug and lifetime
5280 // intrinsics between the phi's and resume instruction.
5281 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5282 BB->getTerminator()->getIterator())))
5283 return false;
5284
5285 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5286 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5287
5288 // Check incoming blocks to see if any of them are trivial.
5289 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5290 Idx++) {
5291 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5292 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5293
5294 // If the block has other successors, we can not delete it because
5295 // it has other dependents.
5296 if (IncomingBB->getUniqueSuccessor() != BB)
5297 continue;
5298
5299 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5300 // Not the landing pad that caused the control to branch here.
5301 if (IncomingValue != LandingPad)
5302 continue;
5303
5305 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5306 TrivialUnwindBlocks.insert(IncomingBB);
5307 }
5308
5309 // If no trivial unwind blocks, don't do any simplifications.
5310 if (TrivialUnwindBlocks.empty())
5311 return false;
5312
5313 // Turn all invokes that unwind here into calls.
5314 for (auto *TrivialBB : TrivialUnwindBlocks) {
5315 // Blocks that will be simplified should be removed from the phi node.
5316 // Note there could be multiple edges to the resume block, and we need
5317 // to remove them all.
5318 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5319 BB->removePredecessor(TrivialBB, true);
5320
5321 for (BasicBlock *Pred :
5323 removeUnwindEdge(Pred, DTU);
5324 ++NumInvokes;
5325 }
5326
5327 // In each SimplifyCFG run, only the current processed block can be erased.
5328 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5329 // of erasing TrivialBB, we only remove the branch to the common resume
5330 // block so that we can later erase the resume block since it has no
5331 // predecessors.
5332 TrivialBB->getTerminator()->eraseFromParent();
5333 new UnreachableInst(RI->getContext(), TrivialBB);
5334 if (DTU)
5335 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5336 }
5337
5338 // Delete the resume block if all its predecessors have been removed.
5339 if (pred_empty(BB))
5340 DeleteDeadBlock(BB, DTU);
5341
5342 return !TrivialUnwindBlocks.empty();
5343}
5344
5345// Simplify resume that is only used by a single (non-phi) landing pad.
5346bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5347 BasicBlock *BB = RI->getParent();
5348 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5349 assert(RI->getValue() == LPInst &&
5350 "Resume must unwind the exception that caused control to here");
5351
5352 // Check that there are no other instructions except for debug intrinsics.
5354 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5355 return false;
5356
5357 // Turn all invokes that unwind here into calls and delete the basic block.
5358 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5359 removeUnwindEdge(Pred, DTU);
5360 ++NumInvokes;
5361 }
5362
5363 // The landingpad is now unreachable. Zap it.
5364 DeleteDeadBlock(BB, DTU);
5365 return true;
5366}
5367
5369 // If this is a trivial cleanup pad that executes no instructions, it can be
5370 // eliminated. If the cleanup pad continues to the caller, any predecessor
5371 // that is an EH pad will be updated to continue to the caller and any
5372 // predecessor that terminates with an invoke instruction will have its invoke
5373 // instruction converted to a call instruction. If the cleanup pad being
5374 // simplified does not continue to the caller, each predecessor will be
5375 // updated to continue to the unwind destination of the cleanup pad being
5376 // simplified.
5377 BasicBlock *BB = RI->getParent();
5378 CleanupPadInst *CPInst = RI->getCleanupPad();
5379 if (CPInst->getParent() != BB)
5380 // This isn't an empty cleanup.
5381 return false;
5382
5383 // We cannot kill the pad if it has multiple uses. This typically arises
5384 // from unreachable basic blocks.
5385 if (!CPInst->hasOneUse())
5386 return false;
5387
5388 // Check that there are no other instructions except for benign intrinsics.
5390 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5391 return false;
5392
5393 // If the cleanup return we are simplifying unwinds to the caller, this will
5394 // set UnwindDest to nullptr.
5395 BasicBlock *UnwindDest = RI->getUnwindDest();
5396
5397 // We're about to remove BB from the control flow. Before we do, sink any
5398 // PHINodes into the unwind destination. Doing this before changing the
5399 // control flow avoids some potentially slow checks, since we can currently
5400 // be certain that UnwindDest and BB have no common predecessors (since they
5401 // are both EH pads).
5402 if (UnwindDest) {
5403 // First, go through the PHI nodes in UnwindDest and update any nodes that
5404 // reference the block we are removing
5405 for (PHINode &DestPN : UnwindDest->phis()) {
5406 int Idx = DestPN.getBasicBlockIndex(BB);
5407 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5408 assert(Idx != -1);
5409 // This PHI node has an incoming value that corresponds to a control
5410 // path through the cleanup pad we are removing. If the incoming
5411 // value is in the cleanup pad, it must be a PHINode (because we
5412 // verified above that the block is otherwise empty). Otherwise, the
5413 // value is either a constant or a value that dominates the cleanup
5414 // pad being removed.
5415 //
5416 // Because BB and UnwindDest are both EH pads, all of their
5417 // predecessors must unwind to these blocks, and since no instruction
5418 // can have multiple unwind destinations, there will be no overlap in
5419 // incoming blocks between SrcPN and DestPN.
5420 Value *SrcVal = DestPN.getIncomingValue(Idx);
5421 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5422
5423 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5424 for (auto *Pred : predecessors(BB)) {
5425 Value *Incoming =
5426 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5427 DestPN.addIncoming(Incoming, Pred);
5428 }
5429 }
5430
5431 // Sink any remaining PHI nodes directly into UnwindDest.
5432 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5433 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5434 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5435 // If the PHI node has no uses or all of its uses are in this basic
5436 // block (meaning they are debug or lifetime intrinsics), just leave
5437 // it. It will be erased when we erase BB below.
5438 continue;
5439
5440 // Otherwise, sink this PHI node into UnwindDest.
5441 // Any predecessors to UnwindDest which are not already represented
5442 // must be back edges which inherit the value from the path through
5443 // BB. In this case, the PHI value must reference itself.
5444 for (auto *pred : predecessors(UnwindDest))
5445 if (pred != BB)
5446 PN.addIncoming(&PN, pred);
5447 PN.moveBefore(InsertPt);
5448 // Also, add a dummy incoming value for the original BB itself,
5449 // so that the PHI is well-formed until we drop said predecessor.
5450 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5451 }
5452 }
5453
5454 std::vector<DominatorTree::UpdateType> Updates;
5455
5456 // We use make_early_inc_range here because we will remove all predecessors.
5458 if (UnwindDest == nullptr) {
5459 if (DTU) {
5460 DTU->applyUpdates(Updates);
5461 Updates.clear();
5462 }
5463 removeUnwindEdge(PredBB, DTU);
5464 ++NumInvokes;
5465 } else {
5466 BB->removePredecessor(PredBB);
5467 Instruction *TI = PredBB->getTerminator();
5468 TI->replaceUsesOfWith(BB, UnwindDest);
5469 if (DTU) {
5470 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5471 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5472 }
5473 }
5474 }
5475
5476 if (DTU)
5477 DTU->applyUpdates(Updates);
5478
5479 DeleteDeadBlock(BB, DTU);
5480
5481 return true;
5482}
5483
5484// Try to merge two cleanuppads together.
5486 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5487 // with.
5488 BasicBlock *UnwindDest = RI->getUnwindDest();
5489 if (!UnwindDest)
5490 return false;
5491
5492 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5493 // be safe to merge without code duplication.
5494 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5495 return false;
5496
5497 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5498 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5499 if (!SuccessorCleanupPad)
5500 return false;
5501
5502 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5503 // Replace any uses of the successor cleanupad with the predecessor pad
5504 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5505 // funclet bundle operands.
5506 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5507 // Remove the old cleanuppad.
5508 SuccessorCleanupPad->eraseFromParent();
5509 // Now, we simply replace the cleanupret with a branch to the unwind
5510 // destination.
5511 BranchInst::Create(UnwindDest, RI->getParent());
5512 RI->eraseFromParent();
5513
5514 return true;
5515}
5516
5517bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5518 // It is possible to transiantly have an undef cleanuppad operand because we
5519 // have deleted some, but not all, dead blocks.
5520 // Eventually, this block will be deleted.
5521 if (isa<UndefValue>(RI->getOperand(0)))
5522 return false;
5523
5524 if (mergeCleanupPad(RI))
5525 return true;
5526
5527 if (removeEmptyCleanup(RI, DTU))
5528 return true;
5529
5530 return false;
5531}
5532
5533// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5534bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5535 BasicBlock *BB = UI->getParent();
5536
5537 bool Changed = false;
5538
5539 // Ensure that any debug-info records that used to occur after the Unreachable
5540 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5541 // the block.
5543
5544 // Debug-info records on the unreachable inst itself should be deleted, as
5545 // below we delete everything past the final executable instruction.
5546 UI->dropDbgRecords();
5547
5548 // If there are any instructions immediately before the unreachable that can
5549 // be removed, do so.
5550 while (UI->getIterator() != BB->begin()) {
5552 --BBI;
5553
5555 break; // Can not drop any more instructions. We're done here.
5556 // Otherwise, this instruction can be freely erased,
5557 // even if it is not side-effect free.
5558
5559 // Note that deleting EH's here is in fact okay, although it involves a bit
5560 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5561 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5562 // and we can therefore guarantee this block will be erased.
5563
5564 // If we're deleting this, we're deleting any subsequent debug info, so
5565 // delete DbgRecords.
5566 BBI->dropDbgRecords();
5567
5568 // Delete this instruction (any uses are guaranteed to be dead)
5569 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5570 BBI->eraseFromParent();
5571 Changed = true;
5572 }
5573
5574 // If the unreachable instruction is the first in the block, take a gander
5575 // at all of the predecessors of this instruction, and simplify them.
5576 if (&BB->front() != UI)
5577 return Changed;
5578
5579 std::vector<DominatorTree::UpdateType> Updates;
5580
5581 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5582 for (BasicBlock *Predecessor : Preds) {
5583 Instruction *TI = Predecessor->getTerminator();
5584 IRBuilder<> Builder(TI);
5585 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5586 // We could either have a proper unconditional branch,
5587 // or a degenerate conditional branch with matching destinations.
5588 if (all_of(BI->successors(),
5589 [BB](auto *Successor) { return Successor == BB; })) {
5590 new UnreachableInst(TI->getContext(), TI->getIterator());
5591 TI->eraseFromParent();
5592 Changed = true;
5593 } else {
5594 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5595 Value* Cond = BI->getCondition();
5596 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5597 "The destinations are guaranteed to be different here.");
5598 CallInst *Assumption;
5599 if (BI->getSuccessor(0) == BB) {
5600 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5601 Builder.CreateBr(BI->getSuccessor(1));
5602 } else {
5603 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5604 Assumption = Builder.CreateAssumption(Cond);
5605 Builder.CreateBr(BI->getSuccessor(0));
5606 }
5607 if (Options.AC)
5608 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5609
5611 Changed = true;
5612 }
5613 if (DTU)
5614 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5615 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5616 SwitchInstProfUpdateWrapper SU(*SI);
5617 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5618 if (i->getCaseSuccessor() != BB) {
5619 ++i;
5620 continue;
5621 }
5622 BB->removePredecessor(SU->getParent());
5623 i = SU.removeCase(i);
5624 e = SU->case_end();
5625 Changed = true;
5626 }
5627 // Note that the default destination can't be removed!
5628 if (DTU && SI->getDefaultDest() != BB)
5629 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5630 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5631 if (II->getUnwindDest() == BB) {
5632 if (DTU) {
5633 DTU->applyUpdates(Updates);
5634 Updates.clear();
5635 }
5636 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5637 if (!CI->doesNotThrow())
5638 CI->setDoesNotThrow();
5639 Changed = true;
5640 }
5641 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5642 if (CSI->getUnwindDest() == BB) {
5643 if (DTU) {
5644 DTU->applyUpdates(Updates);
5645 Updates.clear();
5646 }
5647 removeUnwindEdge(TI->getParent(), DTU);
5648 Changed = true;
5649 continue;
5650 }
5651
5652 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5653 E = CSI->handler_end();
5654 I != E; ++I) {
5655 if (*I == BB) {
5656 CSI->removeHandler(I);
5657 --I;
5658 --E;
5659 Changed = true;
5660 }
5661 }
5662 if (DTU)
5663 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5664 if (CSI->getNumHandlers() == 0) {
5665 if (CSI->hasUnwindDest()) {
5666 // Redirect all predecessors of the block containing CatchSwitchInst
5667 // to instead branch to the CatchSwitchInst's unwind destination.
5668 if (DTU) {
5669 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5670 Updates.push_back({DominatorTree::Insert,
5671 PredecessorOfPredecessor,
5672 CSI->getUnwindDest()});
5673 Updates.push_back({DominatorTree::Delete,
5674 PredecessorOfPredecessor, Predecessor});
5675 }
5676 }
5677 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5678 } else {
5679 // Rewrite all preds to unwind to caller (or from invoke to call).
5680 if (DTU) {
5681 DTU->applyUpdates(Updates);
5682 Updates.clear();
5683 }
5684 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5685 for (BasicBlock *EHPred : EHPreds)
5686 removeUnwindEdge(EHPred, DTU);
5687 }
5688 // The catchswitch is no longer reachable.
5689 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5690 CSI->eraseFromParent();
5691 Changed = true;
5692 }
5693 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5694 (void)CRI;
5695 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5696 "Expected to always have an unwind to BB.");
5697 if (DTU)
5698 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5699 new UnreachableInst(TI->getContext(), TI->getIterator());
5700 TI->eraseFromParent();
5701 Changed = true;
5702 }
5703 }
5704
5705 if (DTU)
5706 DTU->applyUpdates(Updates);
5707
5708 // If this block is now dead, remove it.
5709 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5710 DeleteDeadBlock(BB, DTU);
5711 return true;
5712 }
5713
5714 return Changed;
5715}
5716
5718 assert(Cases.size() >= 1);
5719
5721 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5722 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5723 return false;
5724 }
5725 return true;
5726}
5727
5729 DomTreeUpdater *DTU,
5730 bool RemoveOrigDefaultBlock = true) {
5731 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5732 auto *BB = Switch->getParent();
5733 auto *OrigDefaultBlock = Switch->getDefaultDest();
5734 if (RemoveOrigDefaultBlock)
5735 OrigDefaultBlock->removePredecessor(BB);
5736 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5737 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5738 OrigDefaultBlock);
5739 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5741 Switch->setDefaultDest(&*NewDefaultBlock);
5742 if (DTU) {
5744 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5745 if (RemoveOrigDefaultBlock &&
5746 !is_contained(successors(BB), OrigDefaultBlock))
5747 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5748 DTU->applyUpdates(Updates);
5749 }
5750}
5751
5752/// Turn a switch into an integer range comparison and branch.
5753/// Switches with more than 2 destinations are ignored.
5754/// Switches with 1 destination are also ignored.
5755bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5756 IRBuilder<> &Builder) {
5757 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5758
5759 bool HasDefault = !SI->defaultDestUnreachable();
5760
5761 auto *BB = SI->getParent();
5762
5763 // Partition the cases into two sets with different destinations.
5764 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5765 BasicBlock *DestB = nullptr;
5768
5769 for (auto Case : SI->cases()) {
5770 BasicBlock *Dest = Case.getCaseSuccessor();
5771 if (!DestA)
5772 DestA = Dest;
5773 if (Dest == DestA) {
5774 CasesA.push_back(Case.getCaseValue());
5775 continue;
5776 }
5777 if (!DestB)
5778 DestB = Dest;
5779 if (Dest == DestB) {
5780 CasesB.push_back(Case.getCaseValue());
5781 continue;
5782 }
5783 return false; // More than two destinations.
5784 }
5785 if (!DestB)
5786 return false; // All destinations are the same and the default is unreachable
5787
5788 assert(DestA && DestB &&
5789 "Single-destination switch should have been folded.");
5790 assert(DestA != DestB);
5791 assert(DestB != SI->getDefaultDest());
5792 assert(!CasesB.empty() && "There must be non-default cases.");
5793 assert(!CasesA.empty() || HasDefault);
5794
5795 // Figure out if one of the sets of cases form a contiguous range.
5796 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5797 BasicBlock *ContiguousDest = nullptr;
5798 BasicBlock *OtherDest = nullptr;
5799 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5800 ContiguousCases = &CasesA;
5801 ContiguousDest = DestA;
5802 OtherDest = DestB;
5803 } else if (casesAreContiguous(CasesB)) {
5804 ContiguousCases = &CasesB;
5805 ContiguousDest = DestB;
5806 OtherDest = DestA;
5807 } else
5808 return false;
5809
5810 // Start building the compare and branch.
5811
5812 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5813 Constant *NumCases =
5814 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5815
5816 Value *Sub = SI->getCondition();
5817 if (!Offset->isNullValue())
5818 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5819
5820 Value *Cmp;
5821 // If NumCases overflowed, then all possible values jump to the successor.
5822 if (NumCases->isNullValue() && !ContiguousCases->empty())
5823 Cmp = ConstantInt::getTrue(SI->getContext());
5824 else
5825 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5826 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5827
5828 // Update weight for the newly-created conditional branch.
5829 if (hasBranchWeightMD(*SI)) {
5830 SmallVector<uint64_t, 8> Weights;
5831 getBranchWeights(SI, Weights);
5832 if (Weights.size() == 1 + SI->getNumCases()) {
5833 uint64_t TrueWeight = 0;
5834 uint64_t FalseWeight = 0;
5835 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5836 if (SI->getSuccessor(I) == ContiguousDest)
5837 TrueWeight += Weights[I];
5838 else
5839 FalseWeight += Weights[I];
5840 }
5841 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5842 TrueWeight /= 2;
5843 FalseWeight /= 2;
5844 }
5845 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
5846 /*IsExpected=*/false, /*ElideAllZero=*/true);
5847 }
5848 }
5849
5850 // Prune obsolete incoming values off the successors' PHI nodes.
5851 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5852 unsigned PreviousEdges = ContiguousCases->size();
5853 if (ContiguousDest == SI->getDefaultDest())
5854 ++PreviousEdges;
5855 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5856 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5857 }
5858 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5859 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5860 if (OtherDest == SI->getDefaultDest())
5861 ++PreviousEdges;
5862 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5863 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5864 }
5865
5866 // Clean up the default block - it may have phis or other instructions before
5867 // the unreachable terminator.
5868 if (!HasDefault)
5870
5871 auto *UnreachableDefault = SI->getDefaultDest();
5872
5873 // Drop the switch.
5874 SI->eraseFromParent();
5875
5876 if (!HasDefault && DTU)
5877 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5878
5879 return true;
5880}
5881
5882/// Compute masked bits for the condition of a switch
5883/// and use it to remove dead cases.
5885 AssumptionCache *AC,
5886 const DataLayout &DL) {
5887 Value *Cond = SI->getCondition();
5888 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
5889
5890 // We can also eliminate cases by determining that their values are outside of
5891 // the limited range of the condition based on how many significant (non-sign)
5892 // bits are in the condition value.
5893 unsigned MaxSignificantBitsInCond =
5895
5896 // Gather dead cases.
5898 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5899 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5900 for (const auto &Case : SI->cases()) {
5901 auto *Successor = Case.getCaseSuccessor();
5902 if (DTU) {
5903 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
5904 if (Inserted)
5905 UniqueSuccessors.push_back(Successor);
5906 ++It->second;
5907 }
5908 const APInt &CaseVal = Case.getCaseValue()->getValue();
5909 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5910 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5911 DeadCases.push_back(Case.getCaseValue());
5912 if (DTU)
5913 --NumPerSuccessorCases[Successor];
5914 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5915 << " is dead.\n");
5916 }
5917 }
5918
5919 // If we can prove that the cases must cover all possible values, the
5920 // default destination becomes dead and we can remove it. If we know some
5921 // of the bits in the value, we can use that to more precisely compute the
5922 // number of possible unique case values.
5923 bool HasDefault = !SI->defaultDestUnreachable();
5924 const unsigned NumUnknownBits =
5925 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5926 assert(NumUnknownBits <= Known.getBitWidth());
5927 if (HasDefault && DeadCases.empty() &&
5928 NumUnknownBits < 64 /* avoid overflow */) {
5929 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5930 if (SI->getNumCases() == AllNumCases) {
5932 return true;
5933 }
5934 // When only one case value is missing, replace default with that case.
5935 // Eliminating the default branch will provide more opportunities for
5936 // optimization, such as lookup tables.
5937 if (SI->getNumCases() == AllNumCases - 1) {
5938 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5939 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5940 if (CondTy->getIntegerBitWidth() > 64 ||
5941 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5942 return false;
5943
5944 uint64_t MissingCaseVal = 0;
5945 for (const auto &Case : SI->cases())
5946 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5947 auto *MissingCase =
5948 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5950 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5951 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5952 SIW.setSuccessorWeight(0, 0);
5953 return true;
5954 }
5955 }
5956
5957 if (DeadCases.empty())
5958 return false;
5959
5961 for (ConstantInt *DeadCase : DeadCases) {
5962 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5963 assert(CaseI != SI->case_default() &&
5964 "Case was not found. Probably mistake in DeadCases forming.");
5965 // Prune unused values from PHI nodes.
5966 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5967 SIW.removeCase(CaseI);
5968 }
5969
5970 if (DTU) {
5971 std::vector<DominatorTree::UpdateType> Updates;
5972 for (auto *Successor : UniqueSuccessors)
5973 if (NumPerSuccessorCases[Successor] == 0)
5974 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5975 DTU->applyUpdates(Updates);
5976 }
5977
5978 return true;
5979}
5980
5981/// If BB would be eligible for simplification by
5982/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5983/// by an unconditional branch), look at the phi node for BB in the successor
5984/// block and see if the incoming value is equal to CaseValue. If so, return
5985/// the phi node, and set PhiIndex to BB's index in the phi node.
5987 BasicBlock *BB, int *PhiIndex) {
5988 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
5989 return nullptr; // BB must be empty to be a candidate for simplification.
5990 if (!BB->getSinglePredecessor())
5991 return nullptr; // BB must be dominated by the switch.
5992
5994 if (!Branch || !Branch->isUnconditional())
5995 return nullptr; // Terminator must be unconditional branch.
5996
5997 BasicBlock *Succ = Branch->getSuccessor(0);
5998
5999 for (PHINode &PHI : Succ->phis()) {
6000 int Idx = PHI.getBasicBlockIndex(BB);
6001 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6002
6003 Value *InValue = PHI.getIncomingValue(Idx);
6004 if (InValue != CaseValue)
6005 continue;
6006
6007 *PhiIndex = Idx;
6008 return &PHI;
6009 }
6010
6011 return nullptr;
6012}
6013
6014/// Try to forward the condition of a switch instruction to a phi node
6015/// dominated by the switch, if that would mean that some of the destination
6016/// blocks of the switch can be folded away. Return true if a change is made.
6018 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6019
6020 ForwardingNodesMap ForwardingNodes;
6021 BasicBlock *SwitchBlock = SI->getParent();
6022 bool Changed = false;
6023 for (const auto &Case : SI->cases()) {
6024 ConstantInt *CaseValue = Case.getCaseValue();
6025 BasicBlock *CaseDest = Case.getCaseSuccessor();
6026
6027 // Replace phi operands in successor blocks that are using the constant case
6028 // value rather than the switch condition variable:
6029 // switchbb:
6030 // switch i32 %x, label %default [
6031 // i32 17, label %succ
6032 // ...
6033 // succ:
6034 // %r = phi i32 ... [ 17, %switchbb ] ...
6035 // -->
6036 // %r = phi i32 ... [ %x, %switchbb ] ...
6037
6038 for (PHINode &Phi : CaseDest->phis()) {
6039 // This only works if there is exactly 1 incoming edge from the switch to
6040 // a phi. If there is >1, that means multiple cases of the switch map to 1
6041 // value in the phi, and that phi value is not the switch condition. Thus,
6042 // this transform would not make sense (the phi would be invalid because
6043 // a phi can't have different incoming values from the same block).
6044 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6045 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6046 count(Phi.blocks(), SwitchBlock) == 1) {
6047 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6048 Changed = true;
6049 }
6050 }
6051
6052 // Collect phi nodes that are indirectly using this switch's case constants.
6053 int PhiIdx;
6054 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6055 ForwardingNodes[Phi].push_back(PhiIdx);
6056 }
6057
6058 for (auto &ForwardingNode : ForwardingNodes) {
6059 PHINode *Phi = ForwardingNode.first;
6060 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6061 // Check if it helps to fold PHI.
6062 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6063 continue;
6064
6065 for (int Index : Indexes)
6066 Phi->setIncomingValue(Index, SI->getCondition());
6067 Changed = true;
6068 }
6069
6070 return Changed;
6071}
6072
6073/// Return true if the backend will be able to handle
6074/// initializing an array of constants like C.
6076 if (C->isThreadDependent())
6077 return false;
6078 if (C->isDLLImportDependent())
6079 return false;
6080
6081 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6084 return false;
6085
6087 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6088 // materializing the array of constants.
6089 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6090 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6091 return false;
6092 }
6093
6094 if (!TTI.shouldBuildLookupTablesForConstant(C))
6095 return false;
6096
6097 return true;
6098}
6099
6100/// If V is a Constant, return it. Otherwise, try to look up
6101/// its constant value in ConstantPool, returning 0 if it's not there.
6102static Constant *
6105 if (Constant *C = dyn_cast<Constant>(V))
6106 return C;
6107 return ConstantPool.lookup(V);
6108}
6109
6110/// Try to fold instruction I into a constant. This works for
6111/// simple instructions such as binary operations where both operands are
6112/// constant or can be replaced by constants from the ConstantPool. Returns the
6113/// resulting constant on success, 0 otherwise.
6114static Constant *
6118 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6119 if (!A)
6120 return nullptr;
6121 if (A->isAllOnesValue())
6122 return lookupConstant(Select->getTrueValue(), ConstantPool);
6123 if (A->isNullValue())
6124 return lookupConstant(Select->getFalseValue(), ConstantPool);
6125 return nullptr;
6126 }
6127
6129 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6130 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6131 COps.push_back(A);
6132 else
6133 return nullptr;
6134 }
6135
6136 return ConstantFoldInstOperands(I, COps, DL);
6137}
6138
6139/// Try to determine the resulting constant values in phi nodes
6140/// at the common destination basic block, *CommonDest, for one of the case
6141/// destionations CaseDest corresponding to value CaseVal (0 for the default
6142/// case), of a switch instruction SI.
6143static bool
6145 BasicBlock **CommonDest,
6146 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6147 const DataLayout &DL, const TargetTransformInfo &TTI) {
6148 // The block from which we enter the common destination.
6149 BasicBlock *Pred = SI->getParent();
6150
6151 // If CaseDest is empty except for some side-effect free instructions through
6152 // which we can constant-propagate the CaseVal, continue to its successor.
6154 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6155 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6156 if (I.isTerminator()) {
6157 // If the terminator is a simple branch, continue to the next block.
6158 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6159 return false;
6160 Pred = CaseDest;
6161 CaseDest = I.getSuccessor(0);
6162 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6163 // Instruction is side-effect free and constant.
6164
6165 // If the instruction has uses outside this block or a phi node slot for
6166 // the block, it is not safe to bypass the instruction since it would then
6167 // no longer dominate all its uses.
6168 for (auto &Use : I.uses()) {
6169 User *User = Use.getUser();
6171 if (I->getParent() == CaseDest)
6172 continue;
6173 if (PHINode *Phi = dyn_cast<PHINode>(User))
6174 if (Phi->getIncomingBlock(Use) == CaseDest)
6175 continue;
6176 return false;
6177 }
6178
6179 ConstantPool.insert(std::make_pair(&I, C));
6180 } else {
6181 break;
6182 }
6183 }
6184
6185 // If we did not have a CommonDest before, use the current one.
6186 if (!*CommonDest)
6187 *CommonDest = CaseDest;
6188 // If the destination isn't the common one, abort.
6189 if (CaseDest != *CommonDest)
6190 return false;
6191
6192 // Get the values for this case from phi nodes in the destination block.
6193 for (PHINode &PHI : (*CommonDest)->phis()) {
6194 int Idx = PHI.getBasicBlockIndex(Pred);
6195 if (Idx == -1)
6196 continue;
6197
6198 Constant *ConstVal =
6199 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6200 if (!ConstVal)
6201 return false;
6202
6203 // Be conservative about which kinds of constants we support.
6204 if (!validLookupTableConstant(ConstVal, TTI))
6205 return false;
6206
6207 Res.push_back(std::make_pair(&PHI, ConstVal));
6208 }
6209
6210 return Res.size() > 0;
6211}
6212
6213// Helper function used to add CaseVal to the list of cases that generate
6214// Result. Returns the updated number of cases that generate this result.
6215static size_t mapCaseToResult(ConstantInt *CaseVal,
6216 SwitchCaseResultVectorTy &UniqueResults,
6217 Constant *Result) {
6218 for (auto &I : UniqueResults) {
6219 if (I.first == Result) {
6220 I.second.push_back(CaseVal);
6221 return I.second.size();
6222 }
6223 }
6224 UniqueResults.push_back(
6225 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6226 return 1;
6227}
6228
6229// Helper function that initializes a map containing
6230// results for the PHI node of the common destination block for a switch
6231// instruction. Returns false if multiple PHI nodes have been found or if
6232// there is not a common destination block for the switch.
6234 BasicBlock *&CommonDest,
6235 SwitchCaseResultVectorTy &UniqueResults,
6236 Constant *&DefaultResult,
6237 const DataLayout &DL,
6238 const TargetTransformInfo &TTI,
6239 uintptr_t MaxUniqueResults) {
6240 for (const auto &I : SI->cases()) {
6241 ConstantInt *CaseVal = I.getCaseValue();
6242
6243 // Resulting value at phi nodes for this case value.
6244 SwitchCaseResultsTy Results;
6245 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6246 DL, TTI))
6247 return false;
6248
6249 // Only one value per case is permitted.
6250 if (Results.size() > 1)
6251 return false;
6252
6253 // Add the case->result mapping to UniqueResults.
6254 const size_t NumCasesForResult =
6255 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6256
6257 // Early out if there are too many cases for this result.
6258 if (NumCasesForResult > MaxSwitchCasesPerResult)
6259 return false;
6260
6261 // Early out if there are too many unique results.
6262 if (UniqueResults.size() > MaxUniqueResults)
6263 return false;
6264
6265 // Check the PHI consistency.
6266 if (!PHI)
6267 PHI = Results[0].first;
6268 else if (PHI != Results[0].first)
6269 return false;
6270 }
6271 // Find the default result value.
6273 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6274 DL, TTI);
6275 // If the default value is not found abort unless the default destination
6276 // is unreachable.
6277 DefaultResult =
6278 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6279
6280 return DefaultResult || SI->defaultDestUnreachable();
6281}
6282
6283// Helper function that checks if it is possible to transform a switch with only
6284// two cases (or two cases + default) that produces a result into a select.
6285// TODO: Handle switches with more than 2 cases that map to the same result.
6286// The branch weights correspond to the provided Condition (i.e. if Condition is
6287// modified from the original SwitchInst, the caller must adjust the weights)
6288static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6289 Constant *DefaultResult, Value *Condition,
6290 IRBuilder<> &Builder, const DataLayout &DL,
6291 ArrayRef<uint32_t> BranchWeights) {
6292 // If we are selecting between only two cases transform into a simple
6293 // select or a two-way select if default is possible.
6294 // Example:
6295 // switch (a) { %0 = icmp eq i32 %a, 10
6296 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6297 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6298 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6299 // }
6300
6301 const bool HasBranchWeights =
6302 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6303
6304 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6305 ResultVector[1].second.size() == 1) {
6306 ConstantInt *FirstCase = ResultVector[0].second[0];
6307 ConstantInt *SecondCase = ResultVector[1].second[0];
6308 Value *SelectValue = ResultVector[1].first;
6309 if (DefaultResult) {
6310 Value *ValueCompare =
6311 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6312 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6313 DefaultResult, "switch.select");
6314 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6315 SI && HasBranchWeights) {
6316 // We start with 3 probabilities, where the numerator is the
6317 // corresponding BranchWeights[i], and the denominator is the sum over
6318 // BranchWeights. We want the probability and negative probability of
6319 // Condition == SecondCase.
6320 assert(BranchWeights.size() == 3);
6322 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6323 /*IsExpected=*/false, /*ElideAllZero=*/true);
6324 }
6325 }
6326 Value *ValueCompare =
6327 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6328 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6329 SelectValue, "switch.select");
6330 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6331 // We may have had a DefaultResult. Base the position of the first and
6332 // second's branch weights accordingly. Also the proability that Condition
6333 // != FirstCase needs to take that into account.
6334 assert(BranchWeights.size() >= 2);
6335 size_t FirstCasePos = (Condition != nullptr);
6336 size_t SecondCasePos = FirstCasePos + 1;
6337 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6339 {BranchWeights[FirstCasePos],
6340 DefaultCase + BranchWeights[SecondCasePos]},
6341 /*IsExpected=*/false, /*ElideAllZero=*/true);
6342 }
6343 return Ret;
6344 }
6345
6346 // Handle the degenerate case where two cases have the same result value.
6347 if (ResultVector.size() == 1 && DefaultResult) {
6348 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6349 unsigned CaseCount = CaseValues.size();
6350 // n bits group cases map to the same result:
6351 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6352 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6353 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6354 if (isPowerOf2_32(CaseCount)) {
6355 ConstantInt *MinCaseVal = CaseValues[0];
6356 // If there are bits that are set exclusively by CaseValues, we
6357 // can transform the switch into a select if the conjunction of
6358 // all the values uniquely identify CaseValues.
6359 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6360
6361 // Find the minimum value and compute the and of all the case values.
6362 for (auto *Case : CaseValues) {
6363 if (Case->getValue().slt(MinCaseVal->getValue()))
6364 MinCaseVal = Case;
6365 AndMask &= Case->getValue();
6366 }
6367 KnownBits Known = computeKnownBits(Condition, DL);
6368
6369 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6370 // Compute the number of bits that are free to vary.
6371 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6372
6373 // Check if the number of values covered by the mask is equal
6374 // to the number of cases.
6375 if (FreeBits == Log2_32(CaseCount)) {
6376 Value *And = Builder.CreateAnd(Condition, AndMask);
6377 Value *Cmp = Builder.CreateICmpEQ(
6378 And, Constant::getIntegerValue(And->getType(), AndMask));
6379 Value *Ret =
6380 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6381 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6382 // We know there's a Default case. We base the resulting branch
6383 // weights off its probability.
6384 assert(BranchWeights.size() >= 2);
6386 *SI,
6387 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6388 /*IsExpected=*/false, /*ElideAllZero=*/true);
6389 }
6390 return Ret;
6391 }
6392 }
6393
6394 // Mark the bits case number touched.
6395 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6396 for (auto *Case : CaseValues)
6397 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6398
6399 // Check if cases with the same result can cover all number
6400 // in touched bits.
6401 if (BitMask.popcount() == Log2_32(CaseCount)) {
6402 if (!MinCaseVal->isNullValue())
6403 Condition = Builder.CreateSub(Condition, MinCaseVal);
6404 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6405 Value *Cmp = Builder.CreateICmpEQ(
6406 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6407 Value *Ret =
6408 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6409 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6410 assert(BranchWeights.size() >= 2);
6412 *SI,
6413 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6414 /*IsExpected=*/false, /*ElideAllZero=*/true);
6415 }
6416 return Ret;
6417 }
6418 }
6419
6420 // Handle the degenerate case where two cases have the same value.
6421 if (CaseValues.size() == 2) {
6422 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6423 "switch.selectcmp.case1");
6424 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6425 "switch.selectcmp.case2");
6426 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6427 Value *Ret =
6428 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6429 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6430 assert(BranchWeights.size() >= 2);
6432 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6433 /*IsExpected=*/false, /*ElideAllZero=*/true);
6434 }
6435 return Ret;
6436 }
6437 }
6438
6439 return nullptr;
6440}
6441
6442// Helper function to cleanup a switch instruction that has been converted into
6443// a select, fixing up PHI nodes and basic blocks.
6445 Value *SelectValue,
6446 IRBuilder<> &Builder,
6447 DomTreeUpdater *DTU) {
6448 std::vector<DominatorTree::UpdateType> Updates;
6449
6450 BasicBlock *SelectBB = SI->getParent();
6451 BasicBlock *DestBB = PHI->getParent();
6452
6453 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6454 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6455 Builder.CreateBr(DestBB);
6456
6457 // Remove the switch.
6458
6459 PHI->removeIncomingValueIf(
6460 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6461 PHI->addIncoming(SelectValue, SelectBB);
6462
6463 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6464 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6465 BasicBlock *Succ = SI->getSuccessor(i);
6466
6467 if (Succ == DestBB)
6468 continue;
6469 Succ->removePredecessor(SelectBB);
6470 if (DTU && RemovedSuccessors.insert(Succ).second)
6471 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6472 }
6473 SI->eraseFromParent();
6474 if (DTU)
6475 DTU->applyUpdates(Updates);
6476}
6477
6478/// If a switch is only used to initialize one or more phi nodes in a common
6479/// successor block with only two different constant values, try to replace the
6480/// switch with a select. Returns true if the fold was made.
6482 DomTreeUpdater *DTU, const DataLayout &DL,
6483 const TargetTransformInfo &TTI) {
6484 Value *const Cond = SI->getCondition();
6485 PHINode *PHI = nullptr;
6486 BasicBlock *CommonDest = nullptr;
6487 Constant *DefaultResult;
6488 SwitchCaseResultVectorTy UniqueResults;
6489 // Collect all the cases that will deliver the same value from the switch.
6490 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6491 DL, TTI, /*MaxUniqueResults*/ 2))
6492 return false;
6493
6494 assert(PHI != nullptr && "PHI for value select not found");
6495 Builder.SetInsertPoint(SI);
6496 SmallVector<uint32_t, 4> BranchWeights;
6498 [[maybe_unused]] auto HasWeights =
6500 assert(!HasWeights == (BranchWeights.empty()));
6501 }
6502 assert(BranchWeights.empty() ||
6503 (BranchWeights.size() >=
6504 UniqueResults.size() + (DefaultResult != nullptr)));
6505
6506 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6507 Builder, DL, BranchWeights);
6508 if (!SelectValue)
6509 return false;
6510
6511 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6512 return true;
6513}
6514
6515namespace {
6516
6517/// This class finds alternatives for switches to ultimately
6518/// replace the switch.
6519class SwitchReplacement {
6520public:
6521 /// Create a helper for optimizations to use as a switch replacement.
6522 /// Find a better representation for the content of Values,
6523 /// using DefaultValue to fill any holes in the table.
6524 SwitchReplacement(
6525 Module &M, uint64_t TableSize, ConstantInt *Offset,
6526 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6527 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6528
6529 /// Build instructions with Builder to retrieve values using Index
6530 /// and replace the switch.
6531 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6532 Function *Func);
6533
6534 /// Return true if a table with TableSize elements of
6535 /// type ElementType would fit in a target-legal register.
6536 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6537 Type *ElementType);
6538
6539 /// Return the default value of the switch.
6540 Constant *getDefaultValue();
6541
6542 /// Return true if the replacement is a lookup table.
6543 bool isLookupTable();
6544
6545private:
6546 // Depending on the switch, there are different alternatives.
6547 enum {
6548 // For switches where each case contains the same value, we just have to
6549 // store that single value and return it for each lookup.
6550 SingleValueKind,
6551
6552 // For switches where there is a linear relationship between table index
6553 // and values. We calculate the result with a simple multiplication
6554 // and addition instead of a table lookup.
6555 LinearMapKind,
6556
6557 // For small tables with integer elements, we can pack them into a bitmap
6558 // that fits into a target-legal register. Values are retrieved by
6559 // shift and mask operations.
6560 BitMapKind,
6561
6562 // The table is stored as an array of values. Values are retrieved by load
6563 // instructions from the table.
6564 LookupTableKind
6565 } Kind;
6566
6567 // The default value of the switch.
6568 Constant *DefaultValue;
6569
6570 // The type of the output values.
6571 Type *ValueType;
6572
6573 // For SingleValueKind, this is the single value.
6574 Constant *SingleValue = nullptr;
6575
6576 // For BitMapKind, this is the bitmap.
6577 ConstantInt *BitMap = nullptr;
6578 IntegerType *BitMapElementTy = nullptr;
6579
6580 // For LinearMapKind, these are the constants used to derive the value.
6581 ConstantInt *LinearOffset = nullptr;
6582 ConstantInt *LinearMultiplier = nullptr;
6583 bool LinearMapValWrapped = false;
6584
6585 // For LookupTableKind, this is the table.
6586 Constant *Initializer = nullptr;
6587};
6588
6589} // end anonymous namespace
6590
6591SwitchReplacement::SwitchReplacement(
6592 Module &M, uint64_t TableSize, ConstantInt *Offset,
6593 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6594 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6595 : DefaultValue(DefaultValue) {
6596 assert(Values.size() && "Can't build lookup table without values!");
6597 assert(TableSize >= Values.size() && "Can't fit values in table!");
6598
6599 // If all values in the table are equal, this is that value.
6600 SingleValue = Values.begin()->second;
6601
6602 ValueType = Values.begin()->second->getType();
6603
6604 // Build up the table contents.
6605 SmallVector<Constant *, 64> TableContents(TableSize);
6606 for (const auto &[CaseVal, CaseRes] : Values) {
6607 assert(CaseRes->getType() == ValueType);
6608
6609 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6610 TableContents[Idx] = CaseRes;
6611
6612 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6613 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6614 }
6615
6616 // Fill in any holes in the table with the default result.
6617 if (Values.size() < TableSize) {
6618 assert(DefaultValue &&
6619 "Need a default value to fill the lookup table holes.");
6620 assert(DefaultValue->getType() == ValueType);
6621 for (uint64_t I = 0; I < TableSize; ++I) {
6622 if (!TableContents[I])
6623 TableContents[I] = DefaultValue;
6624 }
6625
6626 // If the default value is poison, all the holes are poison.
6627 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6628
6629 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6630 SingleValue = nullptr;
6631 }
6632
6633 // If each element in the table contains the same value, we only need to store
6634 // that single value.
6635 if (SingleValue) {
6636 Kind = SingleValueKind;
6637 return;
6638 }
6639
6640 // Check if we can derive the value with a linear transformation from the
6641 // table index.
6643 bool LinearMappingPossible = true;
6644 APInt PrevVal;
6645 APInt DistToPrev;
6646 // When linear map is monotonic and signed overflow doesn't happen on
6647 // maximum index, we can attach nsw on Add and Mul.
6648 bool NonMonotonic = false;
6649 assert(TableSize >= 2 && "Should be a SingleValue table.");
6650 // Check if there is the same distance between two consecutive values.
6651 for (uint64_t I = 0; I < TableSize; ++I) {
6652 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6653
6654 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6655 // This is an poison, so it's (probably) a lookup table hole.
6656 // To prevent any regressions from before we switched to using poison as
6657 // the default value, holes will fall back to using the first value.
6658 // This can be removed once we add proper handling for poisons in lookup
6659 // tables.
6660 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6661 }
6662
6663 if (!ConstVal) {
6664 // This is an undef. We could deal with it, but undefs in lookup tables
6665 // are very seldom. It's probably not worth the additional complexity.
6666 LinearMappingPossible = false;
6667 break;
6668 }
6669 const APInt &Val = ConstVal->getValue();
6670 if (I != 0) {
6671 APInt Dist = Val - PrevVal;
6672 if (I == 1) {
6673 DistToPrev = Dist;
6674 } else if (Dist != DistToPrev) {
6675 LinearMappingPossible = false;
6676 break;
6677 }
6678 NonMonotonic |=
6679 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6680 }
6681 PrevVal = Val;
6682 }
6683 if (LinearMappingPossible) {
6684 LinearOffset = cast<ConstantInt>(TableContents[0]);
6685 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6686 APInt M = LinearMultiplier->getValue();
6687 bool MayWrap = true;
6688 if (isIntN(M.getBitWidth(), TableSize - 1))
6689 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6690 LinearMapValWrapped = NonMonotonic || MayWrap;
6691 Kind = LinearMapKind;
6692 return;
6693 }
6694 }
6695
6696 // If the type is integer and the table fits in a register, build a bitmap.
6697 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6699 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6700 for (uint64_t I = TableSize; I > 0; --I) {
6701 TableInt <<= IT->getBitWidth();
6702 // Insert values into the bitmap. Undef values are set to zero.
6703 if (!isa<UndefValue>(TableContents[I - 1])) {
6704 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6705 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6706 }
6707 }
6708 BitMap = ConstantInt::get(M.getContext(), TableInt);
6709 BitMapElementTy = IT;
6710 Kind = BitMapKind;
6711 return;
6712 }
6713
6714 // Store the table in an array.
6715 auto *TableTy = ArrayType::get(ValueType, TableSize);
6716 Initializer = ConstantArray::get(TableTy, TableContents);
6717
6718 Kind = LookupTableKind;
6719}
6720
6721Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6722 const DataLayout &DL, Function *Func) {
6723 switch (Kind) {
6724 case SingleValueKind:
6725 return SingleValue;
6726 case LinearMapKind: {
6727 ++NumLinearMaps;
6728 // Derive the result value from the input value.
6729 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6730 false, "switch.idx.cast");
6731 if (!LinearMultiplier->isOne())
6732 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6733 /*HasNUW = */ false,
6734 /*HasNSW = */ !LinearMapValWrapped);
6735
6736 if (!LinearOffset->isZero())
6737 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6738 /*HasNUW = */ false,
6739 /*HasNSW = */ !LinearMapValWrapped);
6740 return Result;
6741 }
6742 case BitMapKind: {
6743 ++NumBitMaps;
6744 // Type of the bitmap (e.g. i59).
6745 IntegerType *MapTy = BitMap->getIntegerType();
6746
6747 // Cast Index to the same type as the bitmap.
6748 // Note: The Index is <= the number of elements in the table, so
6749 // truncating it to the width of the bitmask is safe.
6750 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6751
6752 // Multiply the shift amount by the element width. NUW/NSW can always be
6753 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6754 // BitMap's bit width.
6755 ShiftAmt = Builder.CreateMul(
6756 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6757 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6758
6759 // Shift down.
6760 Value *DownShifted =
6761 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6762 // Mask off.
6763 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6764 }
6765 case LookupTableKind: {
6766 ++NumLookupTables;
6767 auto *Table =
6768 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6769 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6770 Initializer, "switch.table." + Func->getName());
6771 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6772 // Set the alignment to that of an array items. We will be only loading one
6773 // value out of it.
6774 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
6775 Type *IndexTy = DL.getIndexType(Table->getType());
6776 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
6777
6778 if (Index->getType() != IndexTy) {
6779 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6780 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6781 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6782 Zext->setNonNeg(
6783 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6784 }
6785
6786 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6787 Value *GEP =
6788 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
6789 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6790 }
6791 }
6792 llvm_unreachable("Unknown helper kind!");
6793}
6794
6795bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6796 uint64_t TableSize,
6797 Type *ElementType) {
6798 auto *IT = dyn_cast<IntegerType>(ElementType);
6799 if (!IT)
6800 return false;
6801 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6802 // are <= 15, we could try to narrow the type.
6803
6804 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6805 if (TableSize >= UINT_MAX / IT->getBitWidth())
6806 return false;
6807 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6808}
6809
6811 const DataLayout &DL) {
6812 // Allow any legal type.
6813 if (TTI.isTypeLegal(Ty))
6814 return true;
6815
6816 auto *IT = dyn_cast<IntegerType>(Ty);
6817 if (!IT)
6818 return false;
6819
6820 // Also allow power of 2 integer types that have at least 8 bits and fit in
6821 // a register. These types are common in frontend languages and targets
6822 // usually support loads of these types.
6823 // TODO: We could relax this to any integer that fits in a register and rely
6824 // on ABI alignment and padding in the table to allow the load to be widened.
6825 // Or we could widen the constants and truncate the load.
6826 unsigned BitWidth = IT->getBitWidth();
6827 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6828 DL.fitsInLegalInteger(IT->getBitWidth());
6829}
6830
6831Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
6832
6833bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
6834
6835static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6836 // 40% is the default density for building a jump table in optsize/minsize
6837 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6838 // function was based on.
6839 const uint64_t MinDensity = 40;
6840
6841 if (CaseRange >= UINT64_MAX / 100)
6842 return false; // Avoid multiplication overflows below.
6843
6844 return NumCases * 100 >= CaseRange * MinDensity;
6845}
6846
6848 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6849 uint64_t Range = Diff + 1;
6850 if (Range < Diff)
6851 return false; // Overflow.
6852
6853 return isSwitchDense(Values.size(), Range);
6854}
6855
6856/// Determine whether a lookup table should be built for this switch, based on
6857/// the number of cases, size of the table, and the types of the results.
6858// TODO: We could support larger than legal types by limiting based on the
6859// number of loads required and/or table size. If the constants are small we
6860// could use smaller table entries and extend after the load.
6862 const TargetTransformInfo &TTI,
6863 const DataLayout &DL,
6864 const SmallVector<Type *> &ResultTypes) {
6865 if (SI->getNumCases() > TableSize)
6866 return false; // TableSize overflowed.
6867
6868 bool AllTablesFitInRegister = true;
6869 bool HasIllegalType = false;
6870 for (const auto &Ty : ResultTypes) {
6871 // Saturate this flag to true.
6872 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6873
6874 // Saturate this flag to false.
6875 AllTablesFitInRegister =
6876 AllTablesFitInRegister &&
6877 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
6878
6879 // If both flags saturate, we're done. NOTE: This *only* works with
6880 // saturating flags, and all flags have to saturate first due to the
6881 // non-deterministic behavior of iterating over a dense map.
6882 if (HasIllegalType && !AllTablesFitInRegister)
6883 break;
6884 }
6885
6886 // If each table would fit in a register, we should build it anyway.
6887 if (AllTablesFitInRegister)
6888 return true;
6889
6890 // Don't build a table that doesn't fit in-register if it has illegal types.
6891 if (HasIllegalType)
6892 return false;
6893
6894 return isSwitchDense(SI->getNumCases(), TableSize);
6895}
6896
6898 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6899 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
6900 const DataLayout &DL, const TargetTransformInfo &TTI) {
6901 if (MinCaseVal.isNullValue())
6902 return true;
6903 if (MinCaseVal.isNegative() ||
6904 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6905 !HasDefaultResults)
6906 return false;
6907 return all_of(ResultTypes, [&](const auto &ResultType) {
6908 return SwitchReplacement::wouldFitInRegister(
6909 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
6910 });
6911}
6912
6913/// Try to reuse the switch table index compare. Following pattern:
6914/// \code
6915/// if (idx < tablesize)
6916/// r = table[idx]; // table does not contain default_value
6917/// else
6918/// r = default_value;
6919/// if (r != default_value)
6920/// ...
6921/// \endcode
6922/// Is optimized to:
6923/// \code
6924/// cond = idx < tablesize;
6925/// if (cond)
6926/// r = table[idx];
6927/// else
6928/// r = default_value;
6929/// if (cond)
6930/// ...
6931/// \endcode
6932/// Jump threading will then eliminate the second if(cond).
6934 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6935 Constant *DefaultValue,
6936 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6938 if (!CmpInst)
6939 return;
6940
6941 // We require that the compare is in the same block as the phi so that jump
6942 // threading can do its work afterwards.
6943 if (CmpInst->getParent() != PhiBlock)
6944 return;
6945
6947 if (!CmpOp1)
6948 return;
6949
6950 Value *RangeCmp = RangeCheckBranch->getCondition();
6951 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6952 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6953
6954 // Check if the compare with the default value is constant true or false.
6955 const DataLayout &DL = PhiBlock->getDataLayout();
6957 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6958 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6959 return;
6960
6961 // Check if the compare with the case values is distinct from the default
6962 // compare result.
6963 for (auto ValuePair : Values) {
6965 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6966 if (!CaseConst || CaseConst == DefaultConst ||
6967 (CaseConst != TrueConst && CaseConst != FalseConst))
6968 return;
6969 }
6970
6971 // Check if the branch instruction dominates the phi node. It's a simple
6972 // dominance check, but sufficient for our needs.
6973 // Although this check is invariant in the calling loops, it's better to do it
6974 // at this late stage. Practically we do it at most once for a switch.
6975 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6976 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6977 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6978 return;
6979 }
6980
6981 if (DefaultConst == FalseConst) {
6982 // The compare yields the same result. We can replace it.
6983 CmpInst->replaceAllUsesWith(RangeCmp);
6984 ++NumTableCmpReuses;
6985 } else {
6986 // The compare yields the same result, just inverted. We can replace it.
6987 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6988 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6989 RangeCheckBranch->getIterator());
6990 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6991 ++NumTableCmpReuses;
6992 }
6993}
6994
6995/// If the switch is only used to initialize one or more phi nodes in a common
6996/// successor block with different constant values, replace the switch with
6997/// lookup tables.
6999 DomTreeUpdater *DTU, const DataLayout &DL,
7000 const TargetTransformInfo &TTI) {
7001 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7002
7003 BasicBlock *BB = SI->getParent();
7004 Function *Fn = BB->getParent();
7005
7006 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7007 // split off a dense part and build a lookup table for that.
7008
7009 // FIXME: This creates arrays of GEPs to constant strings, which means each
7010 // GEP needs a runtime relocation in PIC code. We should just build one big
7011 // string and lookup indices into that.
7012
7013 // Ignore switches with less than three cases. Lookup tables will not make
7014 // them faster, so we don't analyze them.
7015 if (SI->getNumCases() < 3)
7016 return false;
7017
7018 // Figure out the corresponding result for each case value and phi node in the
7019 // common destination, as well as the min and max case values.
7020 assert(!SI->cases().empty());
7021 SwitchInst::CaseIt CI = SI->case_begin();
7022 ConstantInt *MinCaseVal = CI->getCaseValue();
7023 ConstantInt *MaxCaseVal = CI->getCaseValue();
7024
7025 BasicBlock *CommonDest = nullptr;
7026
7027 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7029
7031 SmallVector<Type *> ResultTypes;
7033
7034 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7035 ConstantInt *CaseVal = CI->getCaseValue();
7036 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7037 MinCaseVal = CaseVal;
7038 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7039 MaxCaseVal = CaseVal;
7040
7041 // Resulting value at phi nodes for this case value.
7043 ResultsTy Results;
7044 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7045 Results, DL, TTI))
7046 return false;
7047
7048 // Append the result and result types from this case to the list for each
7049 // phi.
7050 for (const auto &I : Results) {
7051 PHINode *PHI = I.first;
7052 Constant *Value = I.second;
7053 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7054 if (Inserted)
7055 PHIs.push_back(PHI);
7056 It->second.push_back(std::make_pair(CaseVal, Value));
7057 ResultTypes.push_back(PHI->getType());
7058 }
7059 }
7060
7061 // If the table has holes, we need a constant result for the default case
7062 // or a bitmask that fits in a register.
7063 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7064 bool HasDefaultResults =
7065 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7066 DefaultResultsList, DL, TTI);
7067 for (const auto &I : DefaultResultsList) {
7068 PHINode *PHI = I.first;
7069 Constant *Result = I.second;
7070 DefaultResults[PHI] = Result;
7071 }
7072
7073 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7074 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7075 uint64_t TableSize;
7076 ConstantInt *TableIndexOffset;
7077 if (UseSwitchConditionAsTableIndex) {
7078 TableSize = MaxCaseVal->getLimitedValue() + 1;
7079 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7080 } else {
7081 TableSize =
7082 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7083
7084 TableIndexOffset = MinCaseVal;
7085 }
7086
7087 // If the default destination is unreachable, or if the lookup table covers
7088 // all values of the conditional variable, branch directly to the lookup table
7089 // BB. Otherwise, check that the condition is within the case range.
7090 uint64_t NumResults = ResultLists[PHIs[0]].size();
7091 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7092
7093 bool TableHasHoles = (NumResults < TableSize);
7094
7095 // If the table has holes but the default destination doesn't produce any
7096 // constant results, the lookup table entries corresponding to the holes will
7097 // contain poison.
7098 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7099
7100 // If the default destination doesn't produce a constant result but is still
7101 // reachable, and the lookup table has holes, we need to use a mask to
7102 // determine if the current index should load from the lookup table or jump
7103 // to the default case.
7104 // The mask is unnecessary if the table has holes but the default destination
7105 // is unreachable, as in that case the holes must also be unreachable.
7106 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7107 if (NeedMask) {
7108 // As an extra penalty for the validity test we require more cases.
7109 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7110 return false;
7111 if (!DL.fitsInLegalInteger(TableSize))
7112 return false;
7113 }
7114
7115 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7116 return false;
7117
7118 // Compute the table index value.
7119 Value *TableIndex;
7120 if (UseSwitchConditionAsTableIndex) {
7121 TableIndex = SI->getCondition();
7122 if (HasDefaultResults) {
7123 // Grow the table to cover all possible index values to avoid the range
7124 // check. It will use the default result to fill in the table hole later,
7125 // so make sure it exist.
7126 ConstantRange CR =
7127 computeConstantRange(TableIndex, /* ForSigned */ false);
7128 // Grow the table shouldn't have any size impact by checking
7129 // wouldFitInRegister.
7130 // TODO: Consider growing the table also when it doesn't fit in a register
7131 // if no optsize is specified.
7132 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7133 if (!CR.isUpperWrapped() &&
7134 all_of(ResultTypes, [&](const auto &ResultType) {
7135 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7136 ResultType);
7137 })) {
7138 // There may be some case index larger than the UpperBound (unreachable
7139 // case), so make sure the table size does not get smaller.
7140 TableSize = std::max(UpperBound, TableSize);
7141 // The default branch is unreachable after we enlarge the lookup table.
7142 // Adjust DefaultIsReachable to reuse code path.
7143 DefaultIsReachable = false;
7144 }
7145 }
7146 }
7147
7148 // Keep track of the switch replacement for each phi
7150 for (PHINode *PHI : PHIs) {
7151 const auto &ResultList = ResultLists[PHI];
7152
7153 Type *ResultType = ResultList.begin()->second->getType();
7154 // Use any value to fill the lookup table holes.
7156 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7157 StringRef FuncName = Fn->getName();
7158 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7159 ResultList, DefaultVal, DL, FuncName);
7160 PhiToReplacementMap.insert({PHI, Replacement});
7161 }
7162
7163 bool AnyLookupTables = any_of(
7164 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7165
7166 // A few conditions prevent the generation of lookup tables:
7167 // 1. The target does not support lookup tables.
7168 // 2. The "no-jump-tables" function attribute is set.
7169 // However, these objections do not apply to other switch replacements, like
7170 // the bitmap, so we only stop here if any of these conditions are met and we
7171 // want to create a LUT. Otherwise, continue with the switch replacement.
7172 if (AnyLookupTables &&
7173 (!TTI.shouldBuildLookupTables() ||
7174 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7175 return false;
7176
7177 Builder.SetInsertPoint(SI);
7178 // TableIndex is the switch condition - TableIndexOffset if we don't
7179 // use the condition directly
7180 if (!UseSwitchConditionAsTableIndex) {
7181 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7182 // we can try to attach nsw.
7183 bool MayWrap = true;
7184 if (!DefaultIsReachable) {
7185 APInt Res =
7186 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7187 (void)Res;
7188 }
7189 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7190 "switch.tableidx", /*HasNUW =*/false,
7191 /*HasNSW =*/!MayWrap);
7192 }
7193
7194 std::vector<DominatorTree::UpdateType> Updates;
7195
7196 // Compute the maximum table size representable by the integer type we are
7197 // switching upon.
7198 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7199 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7200 assert(MaxTableSize >= TableSize &&
7201 "It is impossible for a switch to have more entries than the max "
7202 "representable value of its input integer type's size.");
7203
7204 // Create the BB that does the lookups.
7205 Module &Mod = *CommonDest->getParent()->getParent();
7206 BasicBlock *LookupBB = BasicBlock::Create(
7207 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7208
7209 BranchInst *RangeCheckBranch = nullptr;
7210
7211 Builder.SetInsertPoint(SI);
7212 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7213 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7214 Builder.CreateBr(LookupBB);
7215 if (DTU)
7216 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7217 // Note: We call removeProdecessor later since we need to be able to get the
7218 // PHI value for the default case in case we're using a bit mask.
7219 } else {
7220 Value *Cmp = Builder.CreateICmpULT(
7221 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7222 RangeCheckBranch =
7223 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7224 if (DTU)
7225 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7226 }
7227
7228 // Populate the BB that does the lookups.
7229 Builder.SetInsertPoint(LookupBB);
7230
7231 if (NeedMask) {
7232 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7233 // re-purposed to do the hole check, and we create a new LookupBB.
7234 BasicBlock *MaskBB = LookupBB;
7235 MaskBB->setName("switch.hole_check");
7236 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7237 CommonDest->getParent(), CommonDest);
7238
7239 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7240 // unnecessary illegal types.
7241 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7242 APInt MaskInt(TableSizePowOf2, 0);
7243 APInt One(TableSizePowOf2, 1);
7244 // Build bitmask; fill in a 1 bit for every case.
7245 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7246 for (const auto &Result : ResultList) {
7247 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7248 .getLimitedValue();
7249 MaskInt |= One << Idx;
7250 }
7251 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7252
7253 // Get the TableIndex'th bit of the bitmask.
7254 // If this bit is 0 (meaning hole) jump to the default destination,
7255 // else continue with table lookup.
7256 IntegerType *MapTy = TableMask->getIntegerType();
7257 Value *MaskIndex =
7258 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7259 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7260 Value *LoBit = Builder.CreateTrunc(
7261 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7262 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7263 if (DTU) {
7264 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7265 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7266 }
7267 Builder.SetInsertPoint(LookupBB);
7268 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7269 }
7270
7271 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7272 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7273 // do not delete PHINodes here.
7274 SI->getDefaultDest()->removePredecessor(BB,
7275 /*KeepOneInputPHIs=*/true);
7276 if (DTU)
7277 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7278 }
7279
7280 for (PHINode *PHI : PHIs) {
7281 const ResultListTy &ResultList = ResultLists[PHI];
7282 auto Replacement = PhiToReplacementMap.at(PHI);
7283 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7284 // Do a small peephole optimization: re-use the switch table compare if
7285 // possible.
7286 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7287 BasicBlock *PhiBlock = PHI->getParent();
7288 // Search for compare instructions which use the phi.
7289 for (auto *User : PHI->users()) {
7290 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7291 Replacement.getDefaultValue(), ResultList);
7292 }
7293 }
7294
7295 PHI->addIncoming(Result, LookupBB);
7296 }
7297
7298 Builder.CreateBr(CommonDest);
7299 if (DTU)
7300 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7301
7302 // Remove the switch.
7303 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7304 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7305 BasicBlock *Succ = SI->getSuccessor(i);
7306
7307 if (Succ == SI->getDefaultDest())
7308 continue;
7309 Succ->removePredecessor(BB);
7310 if (DTU && RemovedSuccessors.insert(Succ).second)
7311 Updates.push_back({DominatorTree::Delete, BB, Succ});
7312 }
7313 SI->eraseFromParent();
7314
7315 if (DTU)
7316 DTU->applyUpdates(Updates);
7317
7318 if (NeedMask)
7319 ++NumLookupTablesHoles;
7320 return true;
7321}
7322
7323/// Try to transform a switch that has "holes" in it to a contiguous sequence
7324/// of cases.
7325///
7326/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7327/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7328///
7329/// This converts a sparse switch into a dense switch which allows better
7330/// lowering and could also allow transforming into a lookup table.
7332 const DataLayout &DL,
7333 const TargetTransformInfo &TTI) {
7334 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7335 if (CondTy->getIntegerBitWidth() > 64 ||
7336 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7337 return false;
7338 // Only bother with this optimization if there are more than 3 switch cases;
7339 // SDAG will only bother creating jump tables for 4 or more cases.
7340 if (SI->getNumCases() < 4)
7341 return false;
7342
7343 // This transform is agnostic to the signedness of the input or case values. We
7344 // can treat the case values as signed or unsigned. We can optimize more common
7345 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7346 // as signed.
7348 for (const auto &C : SI->cases())
7349 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7350 llvm::sort(Values);
7351
7352 // If the switch is already dense, there's nothing useful to do here.
7353 if (isSwitchDense(Values))
7354 return false;
7355
7356 // First, transform the values such that they start at zero and ascend.
7357 int64_t Base = Values[0];
7358 for (auto &V : Values)
7359 V -= (uint64_t)(Base);
7360
7361 // Now we have signed numbers that have been shifted so that, given enough
7362 // precision, there are no negative values. Since the rest of the transform
7363 // is bitwise only, we switch now to an unsigned representation.
7364
7365 // This transform can be done speculatively because it is so cheap - it
7366 // results in a single rotate operation being inserted.
7367
7368 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7369 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7370 // less than 64.
7371 unsigned Shift = 64;
7372 for (auto &V : Values)
7373 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7374 assert(Shift < 64);
7375 if (Shift > 0)
7376 for (auto &V : Values)
7377 V = (int64_t)((uint64_t)V >> Shift);
7378
7379 if (!isSwitchDense(Values))
7380 // Transform didn't create a dense switch.
7381 return false;
7382
7383 // The obvious transform is to shift the switch condition right and emit a
7384 // check that the condition actually cleanly divided by GCD, i.e.
7385 // C & (1 << Shift - 1) == 0
7386 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7387 //
7388 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7389 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7390 // are nonzero then the switch condition will be very large and will hit the
7391 // default case.
7392
7393 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7394 Builder.SetInsertPoint(SI);
7395 Value *Sub =
7396 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7397 Value *Rot = Builder.CreateIntrinsic(
7398 Ty, Intrinsic::fshl,
7399 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7400 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7401
7402 for (auto Case : SI->cases()) {
7403 auto *Orig = Case.getCaseValue();
7404 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7405 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7406 }
7407 return true;
7408}
7409
7410/// Tries to transform switch of powers of two to reduce switch range.
7411/// For example, switch like:
7412/// switch (C) { case 1: case 2: case 64: case 128: }
7413/// will be transformed to:
7414/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7415///
7416/// This transformation allows better lowering and may transform the switch
7417/// instruction into a sequence of bit manipulation and a smaller
7418/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7419/// address of the jump target, and indirectly jump to it).
7421 const DataLayout &DL,
7422 const TargetTransformInfo &TTI) {
7423 Value *Condition = SI->getCondition();
7424 LLVMContext &Context = SI->getContext();
7425 auto *CondTy = cast<IntegerType>(Condition->getType());
7426
7427 if (CondTy->getIntegerBitWidth() > 64 ||
7428 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7429 return false;
7430
7431 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7432 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7433 {Condition, ConstantInt::getTrue(Context)});
7434 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7435 TTI::TCC_Basic * 2)
7436 return false;
7437
7438 // Only bother with this optimization if there are more than 3 switch cases.
7439 // SDAG will start emitting jump tables for 4 or more cases.
7440 if (SI->getNumCases() < 4)
7441 return false;
7442
7443 // We perform this optimization only for switches with
7444 // unreachable default case.
7445 // This assumtion will save us from checking if `Condition` is a power of two.
7446 if (!SI->defaultDestUnreachable())
7447 return false;
7448
7449 // Check that switch cases are powers of two.
7451 for (const auto &Case : SI->cases()) {
7452 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7453 if (llvm::has_single_bit(CaseValue))
7454 Values.push_back(CaseValue);
7455 else
7456 return false;
7457 }
7458
7459 // isSwichDense requires case values to be sorted.
7460 llvm::sort(Values);
7461 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7462 llvm::countr_zero(Values.front()) + 1))
7463 // Transform is unable to generate dense switch.
7464 return false;
7465
7466 Builder.SetInsertPoint(SI);
7467
7468 // Replace each case with its trailing zeros number.
7469 for (auto &Case : SI->cases()) {
7470 auto *OrigValue = Case.getCaseValue();
7471 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7472 OrigValue->getValue().countr_zero()));
7473 }
7474
7475 // Replace condition with its trailing zeros number.
7476 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7477 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7478
7479 SI->setCondition(ConditionTrailingZeros);
7480
7481 return true;
7482}
7483
7484/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7485/// the same destination.
7487 DomTreeUpdater *DTU) {
7488 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7489 if (!Cmp || !Cmp->hasOneUse())
7490 return false;
7491
7493 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7494 if (!HasWeights)
7495 Weights.resize(4); // Avoid checking HasWeights everywhere.
7496
7497 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7498 int64_t Res;
7499 BasicBlock *Succ, *OtherSucc;
7500 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7501 BasicBlock *Unreachable = nullptr;
7502
7503 if (SI->getNumCases() == 2) {
7504 // Find which of 1, 0 or -1 is missing (handled by default dest).
7505 SmallSet<int64_t, 3> Missing;
7506 Missing.insert(1);
7507 Missing.insert(0);
7508 Missing.insert(-1);
7509
7510 Succ = SI->getDefaultDest();
7511 SuccWeight = Weights[0];
7512 OtherSucc = nullptr;
7513 for (auto &Case : SI->cases()) {
7514 std::optional<int64_t> Val =
7515 Case.getCaseValue()->getValue().trySExtValue();
7516 if (!Val)
7517 return false;
7518 if (!Missing.erase(*Val))
7519 return false;
7520 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7521 return false;
7522 OtherSucc = Case.getCaseSuccessor();
7523 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7524 }
7525
7526 assert(Missing.size() == 1 && "Should have one case left");
7527 Res = *Missing.begin();
7528 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7529 // Normalize so that Succ is taken once and OtherSucc twice.
7530 Unreachable = SI->getDefaultDest();
7531 Succ = OtherSucc = nullptr;
7532 for (auto &Case : SI->cases()) {
7533 BasicBlock *NewSucc = Case.getCaseSuccessor();
7534 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7535 if (!OtherSucc || OtherSucc == NewSucc) {
7536 OtherSucc = NewSucc;
7537 OtherSuccWeight += Weight;
7538 } else if (!Succ) {
7539 Succ = NewSucc;
7540 SuccWeight = Weight;
7541 } else if (Succ == NewSucc) {
7542 std::swap(Succ, OtherSucc);
7543 std::swap(SuccWeight, OtherSuccWeight);
7544 } else
7545 return false;
7546 }
7547 for (auto &Case : SI->cases()) {
7548 std::optional<int64_t> Val =
7549 Case.getCaseValue()->getValue().trySExtValue();
7550 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7551 return false;
7552 if (Case.getCaseSuccessor() == Succ) {
7553 Res = *Val;
7554 break;
7555 }
7556 }
7557 } else {
7558 return false;
7559 }
7560
7561 // Determine predicate for the missing case.
7563 switch (Res) {
7564 case 1:
7565 Pred = ICmpInst::ICMP_UGT;
7566 break;
7567 case 0:
7568 Pred = ICmpInst::ICMP_EQ;
7569 break;
7570 case -1:
7571 Pred = ICmpInst::ICMP_ULT;
7572 break;
7573 }
7574 if (Cmp->isSigned())
7575 Pred = ICmpInst::getSignedPredicate(Pred);
7576
7577 MDNode *NewWeights = nullptr;
7578 if (HasWeights)
7579 NewWeights = MDBuilder(SI->getContext())
7580 .createBranchWeights(SuccWeight, OtherSuccWeight);
7581
7582 BasicBlock *BB = SI->getParent();
7583 Builder.SetInsertPoint(SI->getIterator());
7584 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7585 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7586 SI->getMetadata(LLVMContext::MD_unpredictable));
7587 OtherSucc->removePredecessor(BB);
7588 if (Unreachable)
7589 Unreachable->removePredecessor(BB);
7590 SI->eraseFromParent();
7591 Cmp->eraseFromParent();
7592 if (DTU && Unreachable)
7593 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7594 return true;
7595}
7596
7597/// Checking whether two cases of SI are equal depends on the contents of the
7598/// BasicBlock and the incoming values of their successor PHINodes.
7599/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7600/// calling this function on each BasicBlock every time isEqual is called,
7601/// especially since the same BasicBlock may be passed as an argument multiple
7602/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7603/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7604/// of the incoming values.
7609
7610namespace llvm {
7611template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7613 return static_cast<SwitchSuccWrapper *>(
7615 }
7617 return static_cast<SwitchSuccWrapper *>(
7619 }
7620 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7621 BasicBlock *Succ = SSW->Dest;
7623 assert(BI->isUnconditional() &&
7624 "Only supporting unconditional branches for now");
7625 assert(BI->getNumSuccessors() == 1 &&
7626 "Expected unconditional branches to have one successor");
7627 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7628
7629 // Since we assume the BB is just a single BranchInst with a single
7630 // successor, we hash as the BB and the incoming Values of its successor
7631 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7632 // including the incoming PHI values leads to better performance.
7633 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7634 // time and passing it in SwitchSuccWrapper, but this slowed down the
7635 // average compile time without having any impact on the worst case compile
7636 // time.
7637 BasicBlock *BB = BI->getSuccessor(0);
7638 SmallVector<Value *> PhiValsForBB;
7639 for (PHINode &Phi : BB->phis())
7640 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7641
7642 return hash_combine(BB, hash_combine_range(PhiValsForBB));
7643 }
7644 static bool isEqual(const SwitchSuccWrapper *LHS,
7645 const SwitchSuccWrapper *RHS) {
7648 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7649 return LHS == RHS;
7650
7651 BasicBlock *A = LHS->Dest;
7652 BasicBlock *B = RHS->Dest;
7653
7654 // FIXME: we checked that the size of A and B are both 1 in
7655 // simplifyDuplicateSwitchArms to make the Case list smaller to
7656 // improve performance. If we decide to support BasicBlocks with more
7657 // than just a single instruction, we need to check that A.size() ==
7658 // B.size() here, and we need to check more than just the BranchInsts
7659 // for equality.
7660
7661 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7662 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7663 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7664 "Only supporting unconditional branches for now");
7665 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7666 return false;
7667
7668 // Need to check that PHIs in successor have matching values
7669 BasicBlock *Succ = ABI->getSuccessor(0);
7670 for (PHINode &Phi : Succ->phis()) {
7671 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7672 if (PredIVs[A] != PredIVs[B])
7673 return false;
7674 }
7675
7676 return true;
7677 }
7678};
7679} // namespace llvm
7680
7681bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7682 DomTreeUpdater *DTU) {
7683 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7684 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7685 // an entire PHI at once after the loop, opposed to calling
7686 // getIncomingValueForBlock inside this loop, since each call to
7687 // getIncomingValueForBlock is O(|Preds|).
7688 SmallPtrSet<PHINode *, 8> Phis;
7689 SmallPtrSet<BasicBlock *, 8> Seen;
7690 DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> PhiPredIVs;
7691 DenseMap<BasicBlock *, SmallVector<unsigned, 32>> BBToSuccessorIndexes;
7693 Cases.reserve(SI->getNumSuccessors());
7694
7695 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7696 BasicBlock *BB = SI->getSuccessor(I);
7697
7698 // FIXME: Support more than just a single BranchInst. One way we could do
7699 // this is by taking a hashing approach of all insts in BB.
7700 if (BB->size() != 1)
7701 continue;
7702
7703 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7704 // on other kinds of terminators. We decide to only support unconditional
7705 // branches for now for compile time reasons.
7706 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7707 if (!BI || BI->isConditional())
7708 continue;
7709
7710 if (!Seen.insert(BB).second) {
7711 auto It = BBToSuccessorIndexes.find(BB);
7712 if (It != BBToSuccessorIndexes.end())
7713 It->second.emplace_back(I);
7714 continue;
7715 }
7716
7717 // FIXME: This case needs some extra care because the terminators other than
7718 // SI need to be updated. For now, consider only backedges to the SI.
7719 if (BB->getUniquePredecessor() != SI->getParent())
7720 continue;
7721
7722 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7723 for (BasicBlock *Succ : BI->successors())
7725
7726 // Add the successor only if not previously visited.
7727 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7728 BBToSuccessorIndexes[BB].emplace_back(I);
7729 }
7730
7731 // Precompute a data structure to improve performance of isEqual for
7732 // SwitchSuccWrapper.
7733 PhiPredIVs.reserve(Phis.size());
7734 for (PHINode *Phi : Phis) {
7735 auto &IVs =
7736 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
7737 for (auto &IV : Phi->incoming_values())
7738 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
7739 }
7740
7741 // Build a set such that if the SwitchSuccWrapper exists in the set and
7742 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7743 // which is not in the set should be replaced with the one in the set. If the
7744 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7745 // other SwitchSuccWrappers can check against it in the same manner. We use
7746 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7747 // around information to isEquality, getHashValue, and when doing the
7748 // replacement with better performance.
7749 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
7750 ReplaceWith.reserve(Cases.size());
7751
7753 Updates.reserve(ReplaceWith.size());
7754 bool MadeChange = false;
7755 for (auto &SSW : Cases) {
7756 // SSW is a candidate for simplification. If we find a duplicate BB,
7757 // replace it.
7758 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7759 if (!Inserted) {
7760 // We know that SI's parent BB no longer dominates the old case successor
7761 // since we are making it dead.
7762 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7763 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7764 for (unsigned Idx : Successors)
7765 SI->setSuccessor(Idx, (*It)->Dest);
7766 MadeChange = true;
7767 }
7768 }
7769
7770 if (DTU)
7771 DTU->applyUpdates(Updates);
7772
7773 return MadeChange;
7774}
7775
7776bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7777 BasicBlock *BB = SI->getParent();
7778
7779 if (isValueEqualityComparison(SI)) {
7780 // If we only have one predecessor, and if it is a branch on this value,
7781 // see if that predecessor totally determines the outcome of this switch.
7782 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7783 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7784 return requestResimplify();
7785
7786 Value *Cond = SI->getCondition();
7787 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7788 if (simplifySwitchOnSelect(SI, Select))
7789 return requestResimplify();
7790
7791 // If the block only contains the switch, see if we can fold the block
7792 // away into any preds.
7793 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7794 if (foldValueComparisonIntoPredecessors(SI, Builder))
7795 return requestResimplify();
7796 }
7797
7798 // Try to transform the switch into an icmp and a branch.
7799 // The conversion from switch to comparison may lose information on
7800 // impossible switch values, so disable it early in the pipeline.
7801 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7802 return requestResimplify();
7803
7804 // Remove unreachable cases.
7805 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7806 return requestResimplify();
7807
7808 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7809 return requestResimplify();
7810
7811 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7812 return requestResimplify();
7813
7814 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7815 return requestResimplify();
7816
7817 // The conversion from switch to lookup tables results in difficult-to-analyze
7818 // code and makes pruning branches much harder. This is a problem if the
7819 // switch expression itself can still be restricted as a result of inlining or
7820 // CVP. Therefore, only apply this transformation during late stages of the
7821 // optimisation pipeline.
7822 if (Options.ConvertSwitchToLookupTable &&
7823 simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
7824 return requestResimplify();
7825
7826 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7827 return requestResimplify();
7828
7829 if (reduceSwitchRange(SI, Builder, DL, TTI))
7830 return requestResimplify();
7831
7832 if (HoistCommon &&
7833 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7834 return requestResimplify();
7835
7836 if (simplifyDuplicateSwitchArms(SI, DTU))
7837 return requestResimplify();
7838
7839 return false;
7840}
7841
7842bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7843 BasicBlock *BB = IBI->getParent();
7844 bool Changed = false;
7845
7846 // Eliminate redundant destinations.
7847 SmallPtrSet<Value *, 8> Succs;
7848 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
7849 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7850 BasicBlock *Dest = IBI->getDestination(i);
7851 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7852 if (!Dest->hasAddressTaken())
7853 RemovedSuccs.insert(Dest);
7854 Dest->removePredecessor(BB);
7855 IBI->removeDestination(i);
7856 --i;
7857 --e;
7858 Changed = true;
7859 }
7860 }
7861
7862 if (DTU) {
7863 std::vector<DominatorTree::UpdateType> Updates;
7864 Updates.reserve(RemovedSuccs.size());
7865 for (auto *RemovedSucc : RemovedSuccs)
7866 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7867 DTU->applyUpdates(Updates);
7868 }
7869
7870 if (IBI->getNumDestinations() == 0) {
7871 // If the indirectbr has no successors, change it to unreachable.
7872 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7874 return true;
7875 }
7876
7877 if (IBI->getNumDestinations() == 1) {
7878 // If the indirectbr has one successor, change it to a direct branch.
7881 return true;
7882 }
7883
7884 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7885 if (simplifyIndirectBrOnSelect(IBI, SI))
7886 return requestResimplify();
7887 }
7888 return Changed;
7889}
7890
7891/// Given an block with only a single landing pad and a unconditional branch
7892/// try to find another basic block which this one can be merged with. This
7893/// handles cases where we have multiple invokes with unique landing pads, but
7894/// a shared handler.
7895///
7896/// We specifically choose to not worry about merging non-empty blocks
7897/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7898/// practice, the optimizer produces empty landing pad blocks quite frequently
7899/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7900/// sinking in this file)
7901///
7902/// This is primarily a code size optimization. We need to avoid performing
7903/// any transform which might inhibit optimization (such as our ability to
7904/// specialize a particular handler via tail commoning). We do this by not
7905/// merging any blocks which require us to introduce a phi. Since the same
7906/// values are flowing through both blocks, we don't lose any ability to
7907/// specialize. If anything, we make such specialization more likely.
7908///
7909/// TODO - This transformation could remove entries from a phi in the target
7910/// block when the inputs in the phi are the same for the two blocks being
7911/// merged. In some cases, this could result in removal of the PHI entirely.
7913 BasicBlock *BB, DomTreeUpdater *DTU) {
7914 auto Succ = BB->getUniqueSuccessor();
7915 assert(Succ);
7916 // If there's a phi in the successor block, we'd likely have to introduce
7917 // a phi into the merged landing pad block.
7918 if (isa<PHINode>(*Succ->begin()))
7919 return false;
7920
7921 for (BasicBlock *OtherPred : predecessors(Succ)) {
7922 if (BB == OtherPred)
7923 continue;
7924 BasicBlock::iterator I = OtherPred->begin();
7926 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7927 continue;
7928 ++I;
7930 if (!BI2 || !BI2->isIdenticalTo(BI))
7931 continue;
7932
7933 std::vector<DominatorTree::UpdateType> Updates;
7934
7935 // We've found an identical block. Update our predecessors to take that
7936 // path instead and make ourselves dead.
7938 for (BasicBlock *Pred : UniquePreds) {
7939 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7940 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7941 "unexpected successor");
7942 II->setUnwindDest(OtherPred);
7943 if (DTU) {
7944 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7945 Updates.push_back({DominatorTree::Delete, Pred, BB});
7946 }
7947 }
7948
7950 for (BasicBlock *Succ : UniqueSuccs) {
7951 Succ->removePredecessor(BB);
7952 if (DTU)
7953 Updates.push_back({DominatorTree::Delete, BB, Succ});
7954 }
7955
7956 IRBuilder<> Builder(BI);
7957 Builder.CreateUnreachable();
7958 BI->eraseFromParent();
7959 if (DTU)
7960 DTU->applyUpdates(Updates);
7961 return true;
7962 }
7963 return false;
7964}
7965
7966bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7967 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7968 : simplifyCondBranch(Branch, Builder);
7969}
7970
7971bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7972 IRBuilder<> &Builder) {
7973 BasicBlock *BB = BI->getParent();
7974 BasicBlock *Succ = BI->getSuccessor(0);
7975
7976 // If the Terminator is the only non-phi instruction, simplify the block.
7977 // If LoopHeader is provided, check if the block or its successor is a loop
7978 // header. (This is for early invocations before loop simplify and
7979 // vectorization to keep canonical loop forms for nested loops. These blocks
7980 // can be eliminated when the pass is invoked later in the back-end.)
7981 // Note that if BB has only one predecessor then we do not introduce new
7982 // backedge, so we can eliminate BB.
7983 bool NeedCanonicalLoop =
7984 Options.NeedCanonicalLoop &&
7985 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7986 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7988 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7989 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7990 return true;
7991
7992 // If the only instruction in the block is a seteq/setne comparison against a
7993 // constant, try to simplify the block.
7994 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7995 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7996 ++I;
7997 if (I->isTerminator() &&
7998 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7999 return true;
8000 }
8001
8002 // See if we can merge an empty landing pad block with another which is
8003 // equivalent.
8004 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8005 ++I;
8006 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8007 return true;
8008 }
8009
8010 // If this basic block is ONLY a compare and a branch, and if a predecessor
8011 // branches to us and our successor, fold the comparison into the
8012 // predecessor and use logical operations to update the incoming value
8013 // for PHI nodes in common successor.
8014 if (Options.SpeculateBlocks &&
8015 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8016 Options.BonusInstThreshold))
8017 return requestResimplify();
8018 return false;
8019}
8020
8022 BasicBlock *PredPred = nullptr;
8023 for (auto *P : predecessors(BB)) {
8024 BasicBlock *PPred = P->getSinglePredecessor();
8025 if (!PPred || (PredPred && PredPred != PPred))
8026 return nullptr;
8027 PredPred = PPred;
8028 }
8029 return PredPred;
8030}
8031
8032/// Fold the following pattern:
8033/// bb0:
8034/// br i1 %cond1, label %bb1, label %bb2
8035/// bb1:
8036/// br i1 %cond2, label %bb3, label %bb4
8037/// bb2:
8038/// br i1 %cond2, label %bb4, label %bb3
8039/// bb3:
8040/// ...
8041/// bb4:
8042/// ...
8043/// into
8044/// bb0:
8045/// %cond = xor i1 %cond1, %cond2
8046/// br i1 %cond, label %bb4, label %bb3
8047/// bb3:
8048/// ...
8049/// bb4:
8050/// ...
8051/// NOTE: %cond2 always dominates the terminator of bb0.
8053 BasicBlock *BB = BI->getParent();
8054 BasicBlock *BB1 = BI->getSuccessor(0);
8055 BasicBlock *BB2 = BI->getSuccessor(1);
8056 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8057 if (Succ == BB)
8058 return false;
8059 if (&Succ->front() != Succ->getTerminator())
8060 return false;
8061 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8062 if (!SuccBI || !SuccBI->isConditional())
8063 return false;
8064 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8065 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8066 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8067 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8068 };
8069 BranchInst *BB1BI, *BB2BI;
8070 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8071 return false;
8072
8073 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8074 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8075 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8076 return false;
8077
8078 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8079 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8080 IRBuilder<> Builder(BI);
8081 BI->setCondition(
8082 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8083 BB1->removePredecessor(BB);
8084 BI->setSuccessor(0, BB4);
8085 BB2->removePredecessor(BB);
8086 BI->setSuccessor(1, BB3);
8087 if (DTU) {
8089 Updates.push_back({DominatorTree::Delete, BB, BB1});
8090 Updates.push_back({DominatorTree::Insert, BB, BB4});
8091 Updates.push_back({DominatorTree::Delete, BB, BB2});
8092 Updates.push_back({DominatorTree::Insert, BB, BB3});
8093
8094 DTU->applyUpdates(Updates);
8095 }
8096 bool HasWeight = false;
8097 uint64_t BBTWeight, BBFWeight;
8098 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8099 HasWeight = true;
8100 else
8101 BBTWeight = BBFWeight = 1;
8102 uint64_t BB1TWeight, BB1FWeight;
8103 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8104 HasWeight = true;
8105 else
8106 BB1TWeight = BB1FWeight = 1;
8107 uint64_t BB2TWeight, BB2FWeight;
8108 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8109 HasWeight = true;
8110 else
8111 BB2TWeight = BB2FWeight = 1;
8112 if (HasWeight) {
8113 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8114 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8115 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8116 /*ElideAllZero=*/true);
8117 }
8118 return true;
8119}
8120
8121bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8122 assert(
8124 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8125 "Tautological conditional branch should have been eliminated already.");
8126
8127 BasicBlock *BB = BI->getParent();
8128 if (!Options.SimplifyCondBranch ||
8129 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8130 return false;
8131
8132 // Conditional branch
8133 if (isValueEqualityComparison(BI)) {
8134 // If we only have one predecessor, and if it is a branch on this value,
8135 // see if that predecessor totally determines the outcome of this
8136 // switch.
8137 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8138 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8139 return requestResimplify();
8140
8141 // This block must be empty, except for the setcond inst, if it exists.
8142 // Ignore dbg and pseudo intrinsics.
8143 auto I = BB->instructionsWithoutDebug(true).begin();
8144 if (&*I == BI) {
8145 if (foldValueComparisonIntoPredecessors(BI, Builder))
8146 return requestResimplify();
8147 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8148 ++I;
8149 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8150 return requestResimplify();
8151 }
8152 }
8153
8154 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8155 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8156 return true;
8157
8158 // If this basic block has dominating predecessor blocks and the dominating
8159 // blocks' conditions imply BI's condition, we know the direction of BI.
8160 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8161 if (Imp) {
8162 // Turn this into a branch on constant.
8163 auto *OldCond = BI->getCondition();
8164 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8165 : ConstantInt::getFalse(BB->getContext());
8166 BI->setCondition(TorF);
8168 return requestResimplify();
8169 }
8170
8171 // If this basic block is ONLY a compare and a branch, and if a predecessor
8172 // branches to us and one of our successors, fold the comparison into the
8173 // predecessor and use logical operations to pick the right destination.
8174 if (Options.SpeculateBlocks &&
8175 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8176 Options.BonusInstThreshold))
8177 return requestResimplify();
8178
8179 // We have a conditional branch to two blocks that are only reachable
8180 // from BI. We know that the condbr dominates the two blocks, so see if
8181 // there is any identical code in the "then" and "else" blocks. If so, we
8182 // can hoist it up to the branching block.
8183 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8184 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8185 if (HoistCommon &&
8186 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8187 return requestResimplify();
8188
8189 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8190 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8191 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8192 auto CanSpeculateConditionalLoadsStores = [&]() {
8193 for (auto *Succ : successors(BB)) {
8194 for (Instruction &I : *Succ) {
8195 if (I.isTerminator()) {
8196 if (I.getNumSuccessors() > 1)
8197 return false;
8198 continue;
8199 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8200 SpeculatedConditionalLoadsStores.size() ==
8202 return false;
8203 }
8204 SpeculatedConditionalLoadsStores.push_back(&I);
8205 }
8206 }
8207 return !SpeculatedConditionalLoadsStores.empty();
8208 };
8209
8210 if (CanSpeculateConditionalLoadsStores()) {
8211 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8212 std::nullopt, nullptr);
8213 return requestResimplify();
8214 }
8215 }
8216 } else {
8217 // If Successor #1 has multiple preds, we may be able to conditionally
8218 // execute Successor #0 if it branches to Successor #1.
8219 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8220 if (Succ0TI->getNumSuccessors() == 1 &&
8221 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8222 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8223 return requestResimplify();
8224 }
8225 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8226 // If Successor #0 has multiple preds, we may be able to conditionally
8227 // execute Successor #1 if it branches to Successor #0.
8228 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8229 if (Succ1TI->getNumSuccessors() == 1 &&
8230 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8231 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8232 return requestResimplify();
8233 }
8234
8235 // If this is a branch on something for which we know the constant value in
8236 // predecessors (e.g. a phi node in the current block), thread control
8237 // through this block.
8238 if (foldCondBranchOnValueKnownInPredecessor(BI))
8239 return requestResimplify();
8240
8241 // Scan predecessor blocks for conditional branches.
8242 for (BasicBlock *Pred : predecessors(BB))
8243 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8244 if (PBI != BI && PBI->isConditional())
8245 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8246 return requestResimplify();
8247
8248 // Look for diamond patterns.
8249 if (MergeCondStores)
8250 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8251 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8252 if (PBI != BI && PBI->isConditional())
8253 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8254 return requestResimplify();
8255
8256 // Look for nested conditional branches.
8257 if (mergeNestedCondBranch(BI, DTU))
8258 return requestResimplify();
8259
8260 return false;
8261}
8262
8263/// Check if passing a value to an instruction will cause undefined behavior.
8264static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8265 assert(V->getType() == I->getType() && "Mismatched types");
8267 if (!C)
8268 return false;
8269
8270 if (I->use_empty())
8271 return false;
8272
8273 if (C->isNullValue() || isa<UndefValue>(C)) {
8274 // Only look at the first use we can handle, avoid hurting compile time with
8275 // long uselists
8276 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8277 auto *Use = cast<Instruction>(U.getUser());
8278 // Change this list when we want to add new instructions.
8279 switch (Use->getOpcode()) {
8280 default:
8281 return false;
8282 case Instruction::GetElementPtr:
8283 case Instruction::Ret:
8284 case Instruction::BitCast:
8285 case Instruction::Load:
8286 case Instruction::Store:
8287 case Instruction::Call:
8288 case Instruction::CallBr:
8289 case Instruction::Invoke:
8290 case Instruction::UDiv:
8291 case Instruction::URem:
8292 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8293 // implemented to avoid code complexity as it is unclear how useful such
8294 // logic is.
8295 case Instruction::SDiv:
8296 case Instruction::SRem:
8297 return true;
8298 }
8299 });
8300 if (FindUse == I->use_end())
8301 return false;
8302 auto &Use = *FindUse;
8303 auto *User = cast<Instruction>(Use.getUser());
8304 // Bail out if User is not in the same BB as I or User == I or User comes
8305 // before I in the block. The latter two can be the case if User is a
8306 // PHI node.
8307 if (User->getParent() != I->getParent() || User == I ||
8308 User->comesBefore(I))
8309 return false;
8310
8311 // Now make sure that there are no instructions in between that can alter
8312 // control flow (eg. calls)
8313 auto InstrRange =
8314 make_range(std::next(I->getIterator()), User->getIterator());
8315 if (any_of(InstrRange, [](Instruction &I) {
8317 }))
8318 return false;
8319
8320 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8322 if (GEP->getPointerOperand() == I) {
8323 // The type of GEP may differ from the type of base pointer.
8324 // Bail out on vector GEPs, as they are not handled by other checks.
8325 if (GEP->getType()->isVectorTy())
8326 return false;
8327 // The current base address is null, there are four cases to consider:
8328 // getelementptr (TY, null, 0) -> null
8329 // getelementptr (TY, null, not zero) -> may be modified
8330 // getelementptr inbounds (TY, null, 0) -> null
8331 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8332 // undefined?
8333 if (!GEP->hasAllZeroIndices() &&
8334 (!GEP->isInBounds() ||
8335 NullPointerIsDefined(GEP->getFunction(),
8336 GEP->getPointerAddressSpace())))
8337 PtrValueMayBeModified = true;
8338 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8339 }
8340
8341 // Look through return.
8342 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8343 bool HasNoUndefAttr =
8344 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8345 // Return undefined to a noundef return value is undefined.
8346 if (isa<UndefValue>(C) && HasNoUndefAttr)
8347 return true;
8348 // Return null to a nonnull+noundef return value is undefined.
8349 if (C->isNullValue() && HasNoUndefAttr &&
8350 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8351 return !PtrValueMayBeModified;
8352 }
8353 }
8354
8355 // Load from null is undefined.
8356 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8357 if (!LI->isVolatile())
8358 return !NullPointerIsDefined(LI->getFunction(),
8359 LI->getPointerAddressSpace());
8360
8361 // Store to null is undefined.
8363 if (!SI->isVolatile())
8364 return (!NullPointerIsDefined(SI->getFunction(),
8365 SI->getPointerAddressSpace())) &&
8366 SI->getPointerOperand() == I;
8367
8368 // llvm.assume(false/undef) always triggers immediate UB.
8369 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8370 // Ignore assume operand bundles.
8371 if (I == Assume->getArgOperand(0))
8372 return true;
8373 }
8374
8375 if (auto *CB = dyn_cast<CallBase>(User)) {
8376 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8377 return false;
8378 // A call to null is undefined.
8379 if (CB->getCalledOperand() == I)
8380 return true;
8381
8382 if (CB->isArgOperand(&Use)) {
8383 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8384 // Passing null to a nonnnull+noundef argument is undefined.
8386 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8387 return !PtrValueMayBeModified;
8388 // Passing undef to a noundef argument is undefined.
8389 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8390 return true;
8391 }
8392 }
8393 // Div/Rem by zero is immediate UB
8394 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8395 return true;
8396 }
8397 return false;
8398}
8399
8400/// If BB has an incoming value that will always trigger undefined behavior
8401/// (eg. null pointer dereference), remove the branch leading here.
8403 DomTreeUpdater *DTU,
8404 AssumptionCache *AC) {
8405 for (PHINode &PHI : BB->phis())
8406 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8407 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8408 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8409 Instruction *T = Predecessor->getTerminator();
8410 IRBuilder<> Builder(T);
8411 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8412 BB->removePredecessor(Predecessor);
8413 // Turn unconditional branches into unreachables and remove the dead
8414 // destination from conditional branches.
8415 if (BI->isUnconditional())
8416 Builder.CreateUnreachable();
8417 else {
8418 // Preserve guarding condition in assume, because it might not be
8419 // inferrable from any dominating condition.
8420 Value *Cond = BI->getCondition();
8421 CallInst *Assumption;
8422 if (BI->getSuccessor(0) == BB)
8423 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8424 else
8425 Assumption = Builder.CreateAssumption(Cond);
8426 if (AC)
8427 AC->registerAssumption(cast<AssumeInst>(Assumption));
8428 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8429 : BI->getSuccessor(0));
8430 }
8431 BI->eraseFromParent();
8432 if (DTU)
8433 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8434 return true;
8435 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8436 // Redirect all branches leading to UB into
8437 // a newly created unreachable block.
8438 BasicBlock *Unreachable = BasicBlock::Create(
8439 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8440 Builder.SetInsertPoint(Unreachable);
8441 // The new block contains only one instruction: Unreachable
8442 Builder.CreateUnreachable();
8443 for (const auto &Case : SI->cases())
8444 if (Case.getCaseSuccessor() == BB) {
8445 BB->removePredecessor(Predecessor);
8446 Case.setSuccessor(Unreachable);
8447 }
8448 if (SI->getDefaultDest() == BB) {
8449 BB->removePredecessor(Predecessor);
8450 SI->setDefaultDest(Unreachable);
8451 }
8452
8453 if (DTU)
8454 DTU->applyUpdates(
8455 { { DominatorTree::Insert, Predecessor, Unreachable },
8456 { DominatorTree::Delete, Predecessor, BB } });
8457 return true;
8458 }
8459 }
8460
8461 return false;
8462}
8463
8464bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8465 bool Changed = false;
8466
8467 assert(BB && BB->getParent() && "Block not embedded in function!");
8468 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8469
8470 // Remove basic blocks that have no predecessors (except the entry block)...
8471 // or that just have themself as a predecessor. These are unreachable.
8472 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8473 BB->getSinglePredecessor() == BB) {
8474 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8475 DeleteDeadBlock(BB, DTU);
8476 return true;
8477 }
8478
8479 // Check to see if we can constant propagate this terminator instruction
8480 // away...
8481 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8482 /*TLI=*/nullptr, DTU);
8483
8484 // Check for and eliminate duplicate PHI nodes in this block.
8486
8487 // Check for and remove branches that will always cause undefined behavior.
8489 return requestResimplify();
8490
8491 // Merge basic blocks into their predecessor if there is only one distinct
8492 // pred, and if there is only one distinct successor of the predecessor, and
8493 // if there are no PHI nodes.
8494 if (MergeBlockIntoPredecessor(BB, DTU))
8495 return true;
8496
8497 if (SinkCommon && Options.SinkCommonInsts)
8498 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8499 mergeCompatibleInvokes(BB, DTU)) {
8500 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8501 // so we may now how duplicate PHI's.
8502 // Let's rerun EliminateDuplicatePHINodes() first,
8503 // before foldTwoEntryPHINode() potentially converts them into select's,
8504 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8505 return true;
8506 }
8507
8508 IRBuilder<> Builder(BB);
8509
8510 if (Options.SpeculateBlocks &&
8511 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8512 // If there is a trivial two-entry PHI node in this basic block, and we can
8513 // eliminate it, do so now.
8514 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8515 if (PN->getNumIncomingValues() == 2)
8516 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8517 Options.SpeculateUnpredictables))
8518 return true;
8519 }
8520
8522 Builder.SetInsertPoint(Terminator);
8523 switch (Terminator->getOpcode()) {
8524 case Instruction::Br:
8525 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8526 break;
8527 case Instruction::Resume:
8528 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8529 break;
8530 case Instruction::CleanupRet:
8531 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8532 break;
8533 case Instruction::Switch:
8534 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8535 break;
8536 case Instruction::Unreachable:
8537 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8538 break;
8539 case Instruction::IndirectBr:
8540 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8541 break;
8542 }
8543
8544 return Changed;
8545}
8546
8547bool SimplifyCFGOpt::run(BasicBlock *BB) {
8548 bool Changed = false;
8549
8550 // Repeated simplify BB as long as resimplification is requested.
8551 do {
8552 Resimplify = false;
8553
8554 // Perform one round of simplifcation. Resimplify flag will be set if
8555 // another iteration is requested.
8556 Changed |= simplifyOnce(BB);
8557 } while (Resimplify);
8558
8559 return Changed;
8560}
8561
8564 ArrayRef<WeakVH> LoopHeaders) {
8565 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8566 Options)
8567 .run(BB);
8568}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1574
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition ArrayRef.h:156
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:934
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:984
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:767
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1120
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:220
bool isNegative() const
Definition Constants.h:209
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:193
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:256
static DebugLoc getTemporary()
Definition DebugLoc.h:161
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:183
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:170
static DebugLoc getDropped()
Definition DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:237
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:213
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2345
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2100
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2637
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1931
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1805
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1847
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1860
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2194
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2068
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2277
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:119
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
op_range operands()
Definition User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:201
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:256
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1690
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2056
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1757
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2108
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< uint32_t, 2 > &B1, const SmallVector< uint32_t, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1719
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3087
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3345
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1934
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3848
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2100
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1584
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2068
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:315
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:257