Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cstddef>
84#include <cstdint>
85#include <iterator>
86#include <map>
87#include <numeric>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
100 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
101
102 cl::desc(
103 "Temporary development switch used to gradually uplift SimplifyCFG "
104 "into preserving DomTree,"));
105
106// Chosen as 2 so as to be cheap, but still to have enough power to fold
107// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
108// To catch this, we need to fold a compare and a select, hence '2' being the
109// minimum reasonable default.
111 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
112 cl::desc(
113 "Control the amount of phi node folding to perform (default = 2)"));
114
116 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
117 cl::desc("Control the maximal total instruction cost that we are willing "
118 "to speculatively execute to fold a 2-entry PHI node into a "
119 "select (default = 4)"));
120
121static cl::opt<bool>
122 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
123 cl::desc("Hoist common instructions up to the parent block"));
124
126 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
127 cl::desc("Hoist loads if the target supports conditional faulting"));
128
130 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
131 cl::desc("Hoist stores if the target supports conditional faulting"));
132
134 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
135 cl::desc("Control the maximal conditional load/store that we are willing "
136 "to speculatively execute to eliminate conditional branch "
137 "(default = 6)"));
138
140 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
141 cl::init(20),
142 cl::desc("Allow reordering across at most this many "
143 "instructions when hoisting"));
144
145static cl::opt<bool>
146 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
147 cl::desc("Sink common instructions down to the end block"));
148
150 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
151 cl::desc("Hoist conditional stores if an unconditional store precedes"));
152
154 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
155 cl::desc("Hoist conditional stores even if an unconditional store does not "
156 "precede - hoist multiple conditional stores into a single "
157 "predicated store"));
158
160 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
161 cl::desc("When merging conditional stores, do so even if the resultant "
162 "basic blocks are unlikely to be if-converted as a result"));
163
165 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
166 cl::desc("Allow exactly one expensive instruction to be speculatively "
167 "executed"));
168
170 "max-speculation-depth", cl::Hidden, cl::init(10),
171 cl::desc("Limit maximum recursion depth when calculating costs of "
172 "speculatively executed instructions"));
173
174static cl::opt<int>
175 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
176 cl::init(10),
177 cl::desc("Max size of a block which is still considered "
178 "small enough to thread through"));
179
180// Two is chosen to allow one negation and a logical combine.
182 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
183 cl::init(2),
184 cl::desc("Maximum cost of combining conditions when "
185 "folding branches"));
186
188 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
189 cl::init(2),
190 cl::desc("Multiplier to apply to threshold when determining whether or not "
191 "to fold branch to common destination when vector operations are "
192 "present"));
193
195 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
196 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
197
199 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
200 cl::desc("Limit cases to analyze when converting a switch to select"));
201
203 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
204 cl::desc("Limit number of blocks a define in a threaded block is allowed "
205 "to be live in"));
206
208
209STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
210STATISTIC(NumLinearMaps,
211 "Number of switch instructions turned into linear mapping");
212STATISTIC(NumLookupTables,
213 "Number of switch instructions turned into lookup tables");
215 NumLookupTablesHoles,
216 "Number of switch instructions turned into lookup tables (holes checked)");
217STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
218STATISTIC(NumFoldValueComparisonIntoPredecessors,
219 "Number of value comparisons folded into predecessor basic blocks");
220STATISTIC(NumFoldBranchToCommonDest,
221 "Number of branches folded into predecessor basic block");
223 NumHoistCommonCode,
224 "Number of common instruction 'blocks' hoisted up to the begin block");
225STATISTIC(NumHoistCommonInstrs,
226 "Number of common instructions hoisted up to the begin block");
227STATISTIC(NumSinkCommonCode,
228 "Number of common instruction 'blocks' sunk down to the end block");
229STATISTIC(NumSinkCommonInstrs,
230 "Number of common instructions sunk down to the end block");
231STATISTIC(NumSpeculations, "Number of speculative executed instructions");
232STATISTIC(NumInvokes,
233 "Number of invokes with empty resume blocks simplified into calls");
234STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
235STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
236
237namespace {
238
239// The first field contains the value that the switch produces when a certain
240// case group is selected, and the second field is a vector containing the
241// cases composing the case group.
242using SwitchCaseResultVectorTy =
244
245// The first field contains the phi node that generates a result of the switch
246// and the second field contains the value generated for a certain case in the
247// switch for that PHI.
248using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
249
250/// ValueEqualityComparisonCase - Represents a case of a switch.
251struct ValueEqualityComparisonCase {
253 BasicBlock *Dest;
254
255 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
256 : Value(Value), Dest(Dest) {}
257
258 bool operator<(ValueEqualityComparisonCase RHS) const {
259 // Comparing pointers is ok as we only rely on the order for uniquing.
260 return Value < RHS.Value;
261 }
262
263 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
264};
265
266class SimplifyCFGOpt {
267 const TargetTransformInfo &TTI;
268 DomTreeUpdater *DTU;
269 const DataLayout &DL;
270 ArrayRef<WeakVH> LoopHeaders;
271 const SimplifyCFGOptions &Options;
272 bool Resimplify;
273
274 Value *isValueEqualityComparison(Instruction *TI);
275 BasicBlock *getValueEqualityComparisonCases(
276 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
277 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
278 BasicBlock *Pred,
279 IRBuilder<> &Builder);
280 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
281 Instruction *PTI,
282 IRBuilder<> &Builder);
283 bool foldValueComparisonIntoPredecessors(Instruction *TI,
284 IRBuilder<> &Builder);
285
286 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
287 bool simplifySingleResume(ResumeInst *RI);
288 bool simplifyCommonResume(ResumeInst *RI);
289 bool simplifyCleanupReturn(CleanupReturnInst *RI);
290 bool simplifyUnreachable(UnreachableInst *UI);
291 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
292 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
293 bool simplifyIndirectBr(IndirectBrInst *IBI);
294 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
295 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
296 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
297 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
298
299 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
300 IRBuilder<> &Builder);
301
302 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
303 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
304 Instruction *TI, Instruction *I1,
305 SmallVectorImpl<Instruction *> &OtherSuccTIs);
306 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
307 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
308 BasicBlock *TrueBB, BasicBlock *FalseBB,
309 uint32_t TrueWeight, uint32_t FalseWeight);
310 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
311 const DataLayout &DL);
312 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
313 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
314 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
315
316public:
317 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
318 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
319 const SimplifyCFGOptions &Opts)
320 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
321 assert((!DTU || !DTU->hasPostDomTree()) &&
322 "SimplifyCFG is not yet capable of maintaining validity of a "
323 "PostDomTree, so don't ask for it.");
324 }
325
326 bool simplifyOnce(BasicBlock *BB);
327 bool run(BasicBlock *BB);
328
329 // Helper to set Resimplify and return change indication.
330 bool requestResimplify() {
331 Resimplify = true;
332 return true;
333 }
334};
335
336// we synthesize a || b as select a, true, b
337// we synthesize a && b as select a, b, false
338// this function determines if SI is playing one of those roles.
339[[maybe_unused]] bool
340isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
341 return ((isa<ConstantInt>(SI->getTrueValue()) &&
342 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
343 (isa<ConstantInt>(SI->getFalseValue()) &&
344 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
345}
346
347} // end anonymous namespace
348
349/// Return true if all the PHI nodes in the basic block \p BB
350/// receive compatible (identical) incoming values when coming from
351/// all of the predecessor blocks that are specified in \p IncomingBlocks.
352///
353/// Note that if the values aren't exactly identical, but \p EquivalenceSet
354/// is provided, and *both* of the values are present in the set,
355/// then they are considered equal.
357 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
358 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
359 assert(IncomingBlocks.size() == 2 &&
360 "Only for a pair of incoming blocks at the time!");
361
362 // FIXME: it is okay if one of the incoming values is an `undef` value,
363 // iff the other incoming value is guaranteed to be a non-poison value.
364 // FIXME: it is okay if one of the incoming values is a `poison` value.
365 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
366 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
367 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
368 if (IV0 == IV1)
369 return true;
370 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
371 EquivalenceSet->contains(IV1))
372 return true;
373 return false;
374 });
375}
376
377/// Return true if it is safe to merge these two
378/// terminator instructions together.
379static bool
381 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
382 if (SI1 == SI2)
383 return false; // Can't merge with self!
384
385 // It is not safe to merge these two switch instructions if they have a common
386 // successor, and if that successor has a PHI node, and if *that* PHI node has
387 // conflicting incoming values from the two switch blocks.
388 BasicBlock *SI1BB = SI1->getParent();
389 BasicBlock *SI2BB = SI2->getParent();
390
392 bool Fail = false;
393 for (BasicBlock *Succ : successors(SI2BB)) {
394 if (!SI1Succs.count(Succ))
395 continue;
396 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
397 continue;
398 Fail = true;
399 if (FailBlocks)
400 FailBlocks->insert(Succ);
401 else
402 break;
403 }
404
405 return !Fail;
406}
407
408/// Update PHI nodes in Succ to indicate that there will now be entries in it
409/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
410/// will be the same as those coming in from ExistPred, an existing predecessor
411/// of Succ.
412static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
413 BasicBlock *ExistPred,
414 MemorySSAUpdater *MSSAU = nullptr) {
415 for (PHINode &PN : Succ->phis())
416 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
417 if (MSSAU)
418 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
419 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
420}
421
422/// Compute an abstract "cost" of speculating the given instruction,
423/// which is assumed to be safe to speculate. TCC_Free means cheap,
424/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
425/// expensive.
427 const TargetTransformInfo &TTI) {
428 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
429}
430
431/// If we have a merge point of an "if condition" as accepted above,
432/// return true if the specified value dominates the block. We don't handle
433/// the true generality of domination here, just a special case which works
434/// well enough for us.
435///
436/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
437/// see if V (which must be an instruction) and its recursive operands
438/// that do not dominate BB have a combined cost lower than Budget and
439/// are non-trapping. If both are true, the instruction is inserted into the
440/// set and true is returned.
441///
442/// The cost for most non-trapping instructions is defined as 1 except for
443/// Select whose cost is 2.
444///
445/// After this function returns, Cost is increased by the cost of
446/// V plus its non-dominating operands. If that cost is greater than
447/// Budget, false is returned and Cost is undefined.
449 Value *V, BasicBlock *BB, Instruction *InsertPt,
450 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
452 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
453 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
454 // so limit the recursion depth.
455 // TODO: While this recursion limit does prevent pathological behavior, it
456 // would be better to track visited instructions to avoid cycles.
458 return false;
459
461 if (!I) {
462 // Non-instructions dominate all instructions and can be executed
463 // unconditionally.
464 return true;
465 }
466 BasicBlock *PBB = I->getParent();
467
468 // We don't want to allow weird loops that might have the "if condition" in
469 // the bottom of this block.
470 if (PBB == BB)
471 return false;
472
473 // If this instruction is defined in a block that contains an unconditional
474 // branch to BB, then it must be in the 'conditional' part of the "if
475 // statement". If not, it definitely dominates the region.
477 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
478 return true;
479
480 // If we have seen this instruction before, don't count it again.
481 if (AggressiveInsts.count(I))
482 return true;
483
484 // Okay, it looks like the instruction IS in the "condition". Check to
485 // see if it's a cheap instruction to unconditionally compute, and if it
486 // only uses stuff defined outside of the condition. If so, hoist it out.
487 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
488 return false;
489
490 // Overflow arithmetic instruction plus extract value are usually generated
491 // when a division is being replaced. But, in this case, the zero check may
492 // still be kept in the code. In that case it would be worth to hoist these
493 // two instruction out of the basic block. Let's treat this pattern as one
494 // single cheap instruction here!
495 WithOverflowInst *OverflowInst;
496 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
497 ZeroCostInstructions.insert(OverflowInst);
498 Cost += 1;
499 } else if (!ZeroCostInstructions.contains(I))
500 Cost += computeSpeculationCost(I, TTI);
501
502 // Allow exactly one instruction to be speculated regardless of its cost
503 // (as long as it is safe to do so).
504 // This is intended to flatten the CFG even if the instruction is a division
505 // or other expensive operation. The speculation of an expensive instruction
506 // is expected to be undone in CodeGenPrepare if the speculation has not
507 // enabled further IR optimizations.
508 if (Cost > Budget &&
509 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
510 !Cost.isValid()))
511 return false;
512
513 // Okay, we can only really hoist these out if their operands do
514 // not take us over the cost threshold.
515 for (Use &Op : I->operands())
516 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
517 TTI, AC, ZeroCostInstructions, Depth + 1))
518 return false;
519 // Okay, it's safe to do this! Remember this instruction.
520 AggressiveInsts.insert(I);
521 return true;
522}
523
524/// Extract ConstantInt from value, looking through IntToPtr
525/// and PointerNullValue. Return NULL if value is not a constant int.
527 // Normal constant int.
529 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
530 return CI;
531
532 // It is not safe to look through inttoptr or ptrtoint when using unstable
533 // pointer types.
534 if (DL.hasUnstableRepresentation(V->getType()))
535 return nullptr;
536
537 // This is some kind of pointer constant. Turn it into a pointer-sized
538 // ConstantInt if possible.
539 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
540
541 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
543 return ConstantInt::get(IntPtrTy, 0);
544
545 // IntToPtr const int, we can look through this if the semantics of
546 // inttoptr for this address space are a simple (truncating) bitcast.
548 if (CE->getOpcode() == Instruction::IntToPtr)
549 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
550 // The constant is very likely to have the right type already.
551 if (CI->getType() == IntPtrTy)
552 return CI;
553 else
554 return cast<ConstantInt>(
555 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
556 }
557 return nullptr;
558}
559
560namespace {
561
562/// Given a chain of or (||) or and (&&) comparison of a value against a
563/// constant, this will try to recover the information required for a switch
564/// structure.
565/// It will depth-first traverse the chain of comparison, seeking for patterns
566/// like %a == 12 or %a < 4 and combine them to produce a set of integer
567/// representing the different cases for the switch.
568/// Note that if the chain is composed of '||' it will build the set of elements
569/// that matches the comparisons (i.e. any of this value validate the chain)
570/// while for a chain of '&&' it will build the set elements that make the test
571/// fail.
572struct ConstantComparesGatherer {
573 const DataLayout &DL;
574
575 /// Value found for the switch comparison
576 Value *CompValue = nullptr;
577
578 /// Extra clause to be checked before the switch
579 Value *Extra = nullptr;
580
581 /// Set of integers to match in switch
583
584 /// Number of comparisons matched in the and/or chain
585 unsigned UsedICmps = 0;
586
587 /// If the elements in Vals matches the comparisons
588 bool IsEq = false;
589
590 // Used to check if the first matched CompValue shall be the Extra check.
591 bool IgnoreFirstMatch = false;
592 bool MultipleMatches = false;
593
594 /// Construct and compute the result for the comparison instruction Cond
595 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
596 gather(Cond);
597 if (CompValue || !MultipleMatches)
598 return;
599 Extra = nullptr;
600 Vals.clear();
601 UsedICmps = 0;
602 IgnoreFirstMatch = true;
603 gather(Cond);
604 }
605
606 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
607 ConstantComparesGatherer &
608 operator=(const ConstantComparesGatherer &) = delete;
609
610private:
611 /// Try to set the current value used for the comparison, it succeeds only if
612 /// it wasn't set before or if the new value is the same as the old one
613 bool setValueOnce(Value *NewVal) {
614 if (IgnoreFirstMatch) {
615 IgnoreFirstMatch = false;
616 return false;
617 }
618 if (CompValue && CompValue != NewVal) {
619 MultipleMatches = true;
620 return false;
621 }
622 CompValue = NewVal;
623 return true;
624 }
625
626 /// Try to match Instruction "I" as a comparison against a constant and
627 /// populates the array Vals with the set of values that match (or do not
628 /// match depending on isEQ).
629 /// Return false on failure. On success, the Value the comparison matched
630 /// against is placed in CompValue.
631 /// If CompValue is already set, the function is expected to fail if a match
632 /// is found but the value compared to is different.
633 bool matchInstruction(Instruction *I, bool isEQ) {
634 if (match(I, m_Not(m_Instruction(I))))
635 isEQ = !isEQ;
636
637 Value *Val;
638 if (match(I, m_NUWTrunc(m_Value(Val)))) {
639 // If we already have a value for the switch, it has to match!
640 if (!setValueOnce(Val))
641 return false;
642 UsedICmps++;
643 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
644 return true;
645 }
646 // If this is an icmp against a constant, handle this as one of the cases.
647 ICmpInst *ICI;
648 ConstantInt *C;
649 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
650 (C = getConstantInt(I->getOperand(1), DL)))) {
651 return false;
652 }
653
654 Value *RHSVal;
655 const APInt *RHSC;
656
657 // Pattern match a special case
658 // (x & ~2^z) == y --> x == y || x == y|2^z
659 // This undoes a transformation done by instcombine to fuse 2 compares.
660 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
661 // It's a little bit hard to see why the following transformations are
662 // correct. Here is a CVC3 program to verify them for 64-bit values:
663
664 /*
665 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
666 x : BITVECTOR(64);
667 y : BITVECTOR(64);
668 z : BITVECTOR(64);
669 mask : BITVECTOR(64) = BVSHL(ONE, z);
670 QUERY( (y & ~mask = y) =>
671 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
672 );
673 QUERY( (y | mask = y) =>
674 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
675 );
676 */
677
678 // Please note that each pattern must be a dual implication (<--> or
679 // iff). One directional implication can create spurious matches. If the
680 // implication is only one-way, an unsatisfiable condition on the left
681 // side can imply a satisfiable condition on the right side. Dual
682 // implication ensures that satisfiable conditions are transformed to
683 // other satisfiable conditions and unsatisfiable conditions are
684 // transformed to other unsatisfiable conditions.
685
686 // Here is a concrete example of a unsatisfiable condition on the left
687 // implying a satisfiable condition on the right:
688 //
689 // mask = (1 << z)
690 // (x & ~mask) == y --> (x == y || x == (y | mask))
691 //
692 // Substituting y = 3, z = 0 yields:
693 // (x & -2) == 3 --> (x == 3 || x == 2)
694
695 // Pattern match a special case:
696 /*
697 QUERY( (y & ~mask = y) =>
698 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
699 );
700 */
701 if (match(ICI->getOperand(0),
702 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
703 APInt Mask = ~*RHSC;
704 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
705 // If we already have a value for the switch, it has to match!
706 if (!setValueOnce(RHSVal))
707 return false;
708
709 Vals.push_back(C);
710 Vals.push_back(
711 ConstantInt::get(C->getContext(),
712 C->getValue() | Mask));
713 UsedICmps++;
714 return true;
715 }
716 }
717
718 // Pattern match a special case:
719 /*
720 QUERY( (y | mask = y) =>
721 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
722 );
723 */
724 if (match(ICI->getOperand(0),
725 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
726 APInt Mask = *RHSC;
727 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
728 // If we already have a value for the switch, it has to match!
729 if (!setValueOnce(RHSVal))
730 return false;
731
732 Vals.push_back(C);
733 Vals.push_back(ConstantInt::get(C->getContext(),
734 C->getValue() & ~Mask));
735 UsedICmps++;
736 return true;
737 }
738 }
739
740 // If we already have a value for the switch, it has to match!
741 if (!setValueOnce(ICI->getOperand(0)))
742 return false;
743
744 UsedICmps++;
745 Vals.push_back(C);
746 return true;
747 }
748
749 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
750 ConstantRange Span =
752
753 // Shift the range if the compare is fed by an add. This is the range
754 // compare idiom as emitted by instcombine.
755 Value *CandidateVal = I->getOperand(0);
756 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
757 Span = Span.subtract(*RHSC);
758 CandidateVal = RHSVal;
759 }
760
761 // If this is an and/!= check, then we are looking to build the set of
762 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
763 // x != 0 && x != 1.
764 if (!isEQ)
765 Span = Span.inverse();
766
767 // If there are a ton of values, we don't want to make a ginormous switch.
768 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
769 return false;
770 }
771
772 // If we already have a value for the switch, it has to match!
773 if (!setValueOnce(CandidateVal))
774 return false;
775
776 // Add all values from the range to the set
777 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
778 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
779
780 UsedICmps++;
781 return true;
782 }
783
784 /// Given a potentially 'or'd or 'and'd together collection of icmp
785 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
786 /// the value being compared, and stick the list constants into the Vals
787 /// vector.
788 /// One "Extra" case is allowed to differ from the other.
789 void gather(Value *V) {
790 Value *Op0, *Op1;
791 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
792 IsEq = true;
793 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
794 IsEq = false;
795 else
796 return;
797 // Keep a stack (SmallVector for efficiency) for depth-first traversal
798 SmallVector<Value *, 8> DFT{Op0, Op1};
799 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
800
801 while (!DFT.empty()) {
802 V = DFT.pop_back_val();
803
804 if (Instruction *I = dyn_cast<Instruction>(V)) {
805 // If it is a || (or && depending on isEQ), process the operands.
806 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
807 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
808 if (Visited.insert(Op1).second)
809 DFT.push_back(Op1);
810 if (Visited.insert(Op0).second)
811 DFT.push_back(Op0);
812
813 continue;
814 }
815
816 // Try to match the current instruction
817 if (matchInstruction(I, IsEq))
818 // Match succeed, continue the loop
819 continue;
820 }
821
822 // One element of the sequence of || (or &&) could not be match as a
823 // comparison against the same value as the others.
824 // We allow only one "Extra" case to be checked before the switch
825 if (!Extra) {
826 Extra = V;
827 continue;
828 }
829 // Failed to parse a proper sequence, abort now
830 CompValue = nullptr;
831 break;
832 }
833 }
834};
835
836} // end anonymous namespace
837
839 MemorySSAUpdater *MSSAU = nullptr) {
840 Instruction *Cond = nullptr;
842 Cond = dyn_cast<Instruction>(SI->getCondition());
843 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
844 if (BI->isConditional())
845 Cond = dyn_cast<Instruction>(BI->getCondition());
846 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
847 Cond = dyn_cast<Instruction>(IBI->getAddress());
848 }
849
850 TI->eraseFromParent();
851 if (Cond)
853}
854
855/// Return true if the specified terminator checks
856/// to see if a value is equal to constant integer value.
857Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
858 Value *CV = nullptr;
859 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
860 // Do not permit merging of large switch instructions into their
861 // predecessors unless there is only one predecessor.
862 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
863 CV = SI->getCondition();
864 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
865 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
866 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
867 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
868 CV = ICI->getOperand(0);
869 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
870 if (Trunc->hasNoUnsignedWrap())
871 CV = Trunc->getOperand(0);
872 }
873 }
874
875 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
876 if (CV) {
877 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
878 Value *Ptr = PTII->getPointerOperand();
879 if (DL.hasUnstableRepresentation(Ptr->getType()))
880 return CV;
881 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
882 CV = Ptr;
883 }
884 }
885 return CV;
886}
887
888/// Given a value comparison instruction,
889/// decode all of the 'cases' that it represents and return the 'default' block.
890BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
891 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
892 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
893 Cases.reserve(SI->getNumCases());
894 for (auto Case : SI->cases())
895 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
896 Case.getCaseSuccessor()));
897 return SI->getDefaultDest();
898 }
899
900 BranchInst *BI = cast<BranchInst>(TI);
901 Value *Cond = BI->getCondition();
902 ICmpInst::Predicate Pred;
903 ConstantInt *C;
904 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
905 Pred = ICI->getPredicate();
906 C = getConstantInt(ICI->getOperand(1), DL);
907 } else {
908 Pred = ICmpInst::ICMP_NE;
909 auto *Trunc = cast<TruncInst>(Cond);
910 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
911 }
912 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
913 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
914 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
915}
916
917/// Given a vector of bb/value pairs, remove any entries
918/// in the list that match the specified block.
919static void
921 std::vector<ValueEqualityComparisonCase> &Cases) {
922 llvm::erase(Cases, BB);
923}
924
925/// Return true if there are any keys in C1 that exist in C2 as well.
926static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
927 std::vector<ValueEqualityComparisonCase> &C2) {
928 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
929
930 // Make V1 be smaller than V2.
931 if (V1->size() > V2->size())
932 std::swap(V1, V2);
933
934 if (V1->empty())
935 return false;
936 if (V1->size() == 1) {
937 // Just scan V2.
938 ConstantInt *TheVal = (*V1)[0].Value;
939 for (const ValueEqualityComparisonCase &VECC : *V2)
940 if (TheVal == VECC.Value)
941 return true;
942 }
943
944 // Otherwise, just sort both lists and compare element by element.
945 array_pod_sort(V1->begin(), V1->end());
946 array_pod_sort(V2->begin(), V2->end());
947 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
948 while (i1 != e1 && i2 != e2) {
949 if ((*V1)[i1].Value == (*V2)[i2].Value)
950 return true;
951 if ((*V1)[i1].Value < (*V2)[i2].Value)
952 ++i1;
953 else
954 ++i2;
955 }
956 return false;
957}
958
959// Set branch weights on SwitchInst. This sets the metadata if there is at
960// least one non-zero weight.
962 bool IsExpected) {
963 // Check that there is at least one non-zero weight. Otherwise, pass
964 // nullptr to setMetadata which will erase the existing metadata.
965 MDNode *N = nullptr;
966 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
967 N = MDBuilder(SI->getParent()->getContext())
968 .createBranchWeights(Weights, IsExpected);
969 SI->setMetadata(LLVMContext::MD_prof, N);
970}
971
972// Similar to the above, but for branch and select instructions that take
973// exactly 2 weights.
974static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
975 uint32_t FalseWeight, bool IsExpected) {
977 // Check that there is at least one non-zero weight. Otherwise, pass
978 // nullptr to setMetadata which will erase the existing metadata.
979 MDNode *N = nullptr;
980 if (TrueWeight || FalseWeight)
981 N = MDBuilder(I->getParent()->getContext())
982 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
983 I->setMetadata(LLVMContext::MD_prof, N);
984}
985
986/// If TI is known to be a terminator instruction and its block is known to
987/// only have a single predecessor block, check to see if that predecessor is
988/// also a value comparison with the same value, and if that comparison
989/// determines the outcome of this comparison. If so, simplify TI. This does a
990/// very limited form of jump threading.
991bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
992 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
993 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
994 if (!PredVal)
995 return false; // Not a value comparison in predecessor.
996
997 Value *ThisVal = isValueEqualityComparison(TI);
998 assert(ThisVal && "This isn't a value comparison!!");
999 if (ThisVal != PredVal)
1000 return false; // Different predicates.
1001
1002 // TODO: Preserve branch weight metadata, similarly to how
1003 // foldValueComparisonIntoPredecessors preserves it.
1004
1005 // Find out information about when control will move from Pred to TI's block.
1006 std::vector<ValueEqualityComparisonCase> PredCases;
1007 BasicBlock *PredDef =
1008 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
1009 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
1010
1011 // Find information about how control leaves this block.
1012 std::vector<ValueEqualityComparisonCase> ThisCases;
1013 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
1014 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
1015
1016 // If TI's block is the default block from Pred's comparison, potentially
1017 // simplify TI based on this knowledge.
1018 if (PredDef == TI->getParent()) {
1019 // If we are here, we know that the value is none of those cases listed in
1020 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1021 // can simplify TI.
1022 if (!valuesOverlap(PredCases, ThisCases))
1023 return false;
1024
1025 if (isa<BranchInst>(TI)) {
1026 // Okay, one of the successors of this condbr is dead. Convert it to a
1027 // uncond br.
1028 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1029 // Insert the new branch.
1030 Instruction *NI = Builder.CreateBr(ThisDef);
1031 (void)NI;
1032
1033 // Remove PHI node entries for the dead edge.
1034 ThisCases[0].Dest->removePredecessor(PredDef);
1035
1036 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1037 << "Through successor TI: " << *TI << "Leaving: " << *NI
1038 << "\n");
1039
1041
1042 if (DTU)
1043 DTU->applyUpdates(
1044 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1045
1046 return true;
1047 }
1048
1049 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1050 // Okay, TI has cases that are statically dead, prune them away.
1051 SmallPtrSet<Constant *, 16> DeadCases;
1052 for (const ValueEqualityComparisonCase &Case : PredCases)
1053 DeadCases.insert(Case.Value);
1054
1055 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1056 << "Through successor TI: " << *TI);
1057
1058 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1059 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1060 --i;
1061 auto *Successor = i->getCaseSuccessor();
1062 if (DTU)
1063 ++NumPerSuccessorCases[Successor];
1064 if (DeadCases.count(i->getCaseValue())) {
1065 Successor->removePredecessor(PredDef);
1066 SI.removeCase(i);
1067 if (DTU)
1068 --NumPerSuccessorCases[Successor];
1069 }
1070 }
1071
1072 if (DTU) {
1073 std::vector<DominatorTree::UpdateType> Updates;
1074 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1075 if (I.second == 0)
1076 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1077 DTU->applyUpdates(Updates);
1078 }
1079
1080 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1081 return true;
1082 }
1083
1084 // Otherwise, TI's block must correspond to some matched value. Find out
1085 // which value (or set of values) this is.
1086 ConstantInt *TIV = nullptr;
1087 BasicBlock *TIBB = TI->getParent();
1088 for (const auto &[Value, Dest] : PredCases)
1089 if (Dest == TIBB) {
1090 if (TIV)
1091 return false; // Cannot handle multiple values coming to this block.
1092 TIV = Value;
1093 }
1094 assert(TIV && "No edge from pred to succ?");
1095
1096 // Okay, we found the one constant that our value can be if we get into TI's
1097 // BB. Find out which successor will unconditionally be branched to.
1098 BasicBlock *TheRealDest = nullptr;
1099 for (const auto &[Value, Dest] : ThisCases)
1100 if (Value == TIV) {
1101 TheRealDest = Dest;
1102 break;
1103 }
1104
1105 // If not handled by any explicit cases, it is handled by the default case.
1106 if (!TheRealDest)
1107 TheRealDest = ThisDef;
1108
1109 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1110
1111 // Remove PHI node entries for dead edges.
1112 BasicBlock *CheckEdge = TheRealDest;
1113 for (BasicBlock *Succ : successors(TIBB))
1114 if (Succ != CheckEdge) {
1115 if (Succ != TheRealDest)
1116 RemovedSuccs.insert(Succ);
1117 Succ->removePredecessor(TIBB);
1118 } else
1119 CheckEdge = nullptr;
1120
1121 // Insert the new branch.
1122 Instruction *NI = Builder.CreateBr(TheRealDest);
1123 (void)NI;
1124
1125 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1126 << "Through successor TI: " << *TI << "Leaving: " << *NI
1127 << "\n");
1128
1130 if (DTU) {
1131 SmallVector<DominatorTree::UpdateType, 2> Updates;
1132 Updates.reserve(RemovedSuccs.size());
1133 for (auto *RemovedSucc : RemovedSuccs)
1134 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1135 DTU->applyUpdates(Updates);
1136 }
1137 return true;
1138}
1139
1140namespace {
1141
1142/// This class implements a stable ordering of constant
1143/// integers that does not depend on their address. This is important for
1144/// applications that sort ConstantInt's to ensure uniqueness.
1145struct ConstantIntOrdering {
1146 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1147 return LHS->getValue().ult(RHS->getValue());
1148 }
1149};
1150
1151} // end anonymous namespace
1152
1154 ConstantInt *const *P2) {
1155 const ConstantInt *LHS = *P1;
1156 const ConstantInt *RHS = *P2;
1157 if (LHS == RHS)
1158 return 0;
1159 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1160}
1161
1162/// Get Weights of a given terminator, the default weight is at the front
1163/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1164/// metadata.
1166 SmallVectorImpl<uint64_t> &Weights) {
1167 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1168 assert(MD && "Invalid branch-weight metadata");
1169 extractFromBranchWeightMD64(MD, Weights);
1170
1171 // If TI is a conditional eq, the default case is the false case,
1172 // and the corresponding branch-weight data is at index 2. We swap the
1173 // default weight to be the first entry.
1174 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1175 assert(Weights.size() == 2);
1176 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1177 if (!ICI)
1178 return;
1179
1180 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1181 std::swap(Weights.front(), Weights.back());
1182 }
1183}
1184
1185/// Keep halving the weights until all can fit in uint32_t.
1187 uint64_t Max = *llvm::max_element(Weights);
1188 if (Max > UINT_MAX) {
1189 unsigned Offset = 32 - llvm::countl_zero(Max);
1190 for (uint64_t &I : Weights)
1191 I >>= Offset;
1192 }
1193}
1194
1196 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1197 Instruction *PTI = PredBlock->getTerminator();
1198
1199 // If we have bonus instructions, clone them into the predecessor block.
1200 // Note that there may be multiple predecessor blocks, so we cannot move
1201 // bonus instructions to a predecessor block.
1202 for (Instruction &BonusInst : *BB) {
1203 if (BonusInst.isTerminator())
1204 continue;
1205
1206 Instruction *NewBonusInst = BonusInst.clone();
1207
1208 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1209 // Unless the instruction has the same !dbg location as the original
1210 // branch, drop it. When we fold the bonus instructions we want to make
1211 // sure we reset their debug locations in order to avoid stepping on
1212 // dead code caused by folding dead branches.
1213 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1214 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1215 mapAtomInstance(DL, VMap);
1216 }
1217
1218 RemapInstruction(NewBonusInst, VMap,
1220
1221 // If we speculated an instruction, we need to drop any metadata that may
1222 // result in undefined behavior, as the metadata might have been valid
1223 // only given the branch precondition.
1224 // Similarly strip attributes on call parameters that may cause UB in
1225 // location the call is moved to.
1226 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1227
1228 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1229 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1230 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1232
1233 NewBonusInst->takeName(&BonusInst);
1234 BonusInst.setName(NewBonusInst->getName() + ".old");
1235 VMap[&BonusInst] = NewBonusInst;
1236
1237 // Update (liveout) uses of bonus instructions,
1238 // now that the bonus instruction has been cloned into predecessor.
1239 // Note that we expect to be in a block-closed SSA form for this to work!
1240 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1241 auto *UI = cast<Instruction>(U.getUser());
1242 auto *PN = dyn_cast<PHINode>(UI);
1243 if (!PN) {
1244 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1245 "If the user is not a PHI node, then it should be in the same "
1246 "block as, and come after, the original bonus instruction.");
1247 continue; // Keep using the original bonus instruction.
1248 }
1249 // Is this the block-closed SSA form PHI node?
1250 if (PN->getIncomingBlock(U) == BB)
1251 continue; // Great, keep using the original bonus instruction.
1252 // The only other alternative is an "use" when coming from
1253 // the predecessor block - here we should refer to the cloned bonus instr.
1254 assert(PN->getIncomingBlock(U) == PredBlock &&
1255 "Not in block-closed SSA form?");
1256 U.set(NewBonusInst);
1257 }
1258 }
1259
1260 // Key Instructions: We may have propagated atom info into the pred. If the
1261 // pred's terminator already has atom info do nothing as merging would drop
1262 // one atom group anyway. If it doesn't, propagte the remapped atom group
1263 // from BB's terminator.
1264 if (auto &PredDL = PTI->getDebugLoc()) {
1265 auto &DL = BB->getTerminator()->getDebugLoc();
1266 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1267 PredDL.isSameSourceLocation(DL)) {
1268 PTI->setDebugLoc(DL);
1269 RemapSourceAtom(PTI, VMap);
1270 }
1271 }
1272}
1273
1274bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1275 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1276 BasicBlock *BB = TI->getParent();
1277 BasicBlock *Pred = PTI->getParent();
1278
1280
1281 // Figure out which 'cases' to copy from SI to PSI.
1282 std::vector<ValueEqualityComparisonCase> BBCases;
1283 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1284
1285 std::vector<ValueEqualityComparisonCase> PredCases;
1286 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1287
1288 // Based on whether the default edge from PTI goes to BB or not, fill in
1289 // PredCases and PredDefault with the new switch cases we would like to
1290 // build.
1291 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1292
1293 // Update the branch weight metadata along the way
1294 SmallVector<uint64_t, 8> Weights;
1295 bool PredHasWeights = hasBranchWeightMD(*PTI);
1296 bool SuccHasWeights = hasBranchWeightMD(*TI);
1297
1298 if (PredHasWeights) {
1299 getBranchWeights(PTI, Weights);
1300 // branch-weight metadata is inconsistent here.
1301 if (Weights.size() != 1 + PredCases.size())
1302 PredHasWeights = SuccHasWeights = false;
1303 } else if (SuccHasWeights)
1304 // If there are no predecessor weights but there are successor weights,
1305 // populate Weights with 1, which will later be scaled to the sum of
1306 // successor's weights
1307 Weights.assign(1 + PredCases.size(), 1);
1308
1309 SmallVector<uint64_t, 8> SuccWeights;
1310 if (SuccHasWeights) {
1311 getBranchWeights(TI, SuccWeights);
1312 // branch-weight metadata is inconsistent here.
1313 if (SuccWeights.size() != 1 + BBCases.size())
1314 PredHasWeights = SuccHasWeights = false;
1315 } else if (PredHasWeights)
1316 SuccWeights.assign(1 + BBCases.size(), 1);
1317
1318 if (PredDefault == BB) {
1319 // If this is the default destination from PTI, only the edges in TI
1320 // that don't occur in PTI, or that branch to BB will be activated.
1321 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1322 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1323 if (PredCases[i].Dest != BB)
1324 PTIHandled.insert(PredCases[i].Value);
1325 else {
1326 // The default destination is BB, we don't need explicit targets.
1327 std::swap(PredCases[i], PredCases.back());
1328
1329 if (PredHasWeights || SuccHasWeights) {
1330 // Increase weight for the default case.
1331 Weights[0] += Weights[i + 1];
1332 std::swap(Weights[i + 1], Weights.back());
1333 Weights.pop_back();
1334 }
1335
1336 PredCases.pop_back();
1337 --i;
1338 --e;
1339 }
1340
1341 // Reconstruct the new switch statement we will be building.
1342 if (PredDefault != BBDefault) {
1343 PredDefault->removePredecessor(Pred);
1344 if (DTU && PredDefault != BB)
1345 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1346 PredDefault = BBDefault;
1347 ++NewSuccessors[BBDefault];
1348 }
1349
1350 unsigned CasesFromPred = Weights.size();
1351 uint64_t ValidTotalSuccWeight = 0;
1352 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1353 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1354 PredCases.push_back(BBCases[i]);
1355 ++NewSuccessors[BBCases[i].Dest];
1356 if (SuccHasWeights || PredHasWeights) {
1357 // The default weight is at index 0, so weight for the ith case
1358 // should be at index i+1. Scale the cases from successor by
1359 // PredDefaultWeight (Weights[0]).
1360 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1361 ValidTotalSuccWeight += SuccWeights[i + 1];
1362 }
1363 }
1364
1365 if (SuccHasWeights || PredHasWeights) {
1366 ValidTotalSuccWeight += SuccWeights[0];
1367 // Scale the cases from predecessor by ValidTotalSuccWeight.
1368 for (unsigned i = 1; i < CasesFromPred; ++i)
1369 Weights[i] *= ValidTotalSuccWeight;
1370 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1371 Weights[0] *= SuccWeights[0];
1372 }
1373 } else {
1374 // If this is not the default destination from PSI, only the edges
1375 // in SI that occur in PSI with a destination of BB will be
1376 // activated.
1377 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1378 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1379 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1380 if (PredCases[i].Dest == BB) {
1381 PTIHandled.insert(PredCases[i].Value);
1382
1383 if (PredHasWeights || SuccHasWeights) {
1384 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1385 std::swap(Weights[i + 1], Weights.back());
1386 Weights.pop_back();
1387 }
1388
1389 std::swap(PredCases[i], PredCases.back());
1390 PredCases.pop_back();
1391 --i;
1392 --e;
1393 }
1394
1395 // Okay, now we know which constants were sent to BB from the
1396 // predecessor. Figure out where they will all go now.
1397 for (const ValueEqualityComparisonCase &Case : BBCases)
1398 if (PTIHandled.count(Case.Value)) {
1399 // If this is one we are capable of getting...
1400 if (PredHasWeights || SuccHasWeights)
1401 Weights.push_back(WeightsForHandled[Case.Value]);
1402 PredCases.push_back(Case);
1403 ++NewSuccessors[Case.Dest];
1404 PTIHandled.erase(Case.Value); // This constant is taken care of
1405 }
1406
1407 // If there are any constants vectored to BB that TI doesn't handle,
1408 // they must go to the default destination of TI.
1409 for (ConstantInt *I : PTIHandled) {
1410 if (PredHasWeights || SuccHasWeights)
1411 Weights.push_back(WeightsForHandled[I]);
1412 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1413 ++NewSuccessors[BBDefault];
1414 }
1415 }
1416
1417 // Okay, at this point, we know which new successor Pred will get. Make
1418 // sure we update the number of entries in the PHI nodes for these
1419 // successors.
1420 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1421 if (DTU) {
1422 SuccsOfPred = {llvm::from_range, successors(Pred)};
1423 Updates.reserve(Updates.size() + NewSuccessors.size());
1424 }
1425 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1426 NewSuccessors) {
1427 for (auto I : seq(NewSuccessor.second)) {
1428 (void)I;
1429 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1430 }
1431 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1432 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1433 }
1434
1435 Builder.SetInsertPoint(PTI);
1436 // Convert pointer to int before we switch.
1437 if (CV->getType()->isPointerTy()) {
1438 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1439 "Should not end up here with unstable pointers");
1440 CV =
1441 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1442 }
1443
1444 // Now that the successors are updated, create the new Switch instruction.
1445 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1446 NewSI->setDebugLoc(PTI->getDebugLoc());
1447 for (ValueEqualityComparisonCase &V : PredCases)
1448 NewSI->addCase(V.Value, V.Dest);
1449
1450 if (PredHasWeights || SuccHasWeights) {
1451 // Halve the weights if any of them cannot fit in an uint32_t
1452 fitWeights(Weights);
1453
1454 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1455
1456 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1457 }
1458
1460
1461 // Okay, last check. If BB is still a successor of PSI, then we must
1462 // have an infinite loop case. If so, add an infinitely looping block
1463 // to handle the case to preserve the behavior of the code.
1464 BasicBlock *InfLoopBlock = nullptr;
1465 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1466 if (NewSI->getSuccessor(i) == BB) {
1467 if (!InfLoopBlock) {
1468 // Insert it at the end of the function, because it's either code,
1469 // or it won't matter if it's hot. :)
1470 InfLoopBlock =
1471 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1472 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1473 if (DTU)
1474 Updates.push_back(
1475 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1476 }
1477 NewSI->setSuccessor(i, InfLoopBlock);
1478 }
1479
1480 if (DTU) {
1481 if (InfLoopBlock)
1482 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1483
1484 Updates.push_back({DominatorTree::Delete, Pred, BB});
1485
1486 DTU->applyUpdates(Updates);
1487 }
1488
1489 ++NumFoldValueComparisonIntoPredecessors;
1490 return true;
1491}
1492
1493/// The specified terminator is a value equality comparison instruction
1494/// (either a switch or a branch on "X == c").
1495/// See if any of the predecessors of the terminator block are value comparisons
1496/// on the same value. If so, and if safe to do so, fold them together.
1497bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1498 IRBuilder<> &Builder) {
1499 BasicBlock *BB = TI->getParent();
1500 Value *CV = isValueEqualityComparison(TI); // CondVal
1501 assert(CV && "Not a comparison?");
1502
1503 bool Changed = false;
1504
1505 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1506 while (!Preds.empty()) {
1507 BasicBlock *Pred = Preds.pop_back_val();
1508 Instruction *PTI = Pred->getTerminator();
1509
1510 // Don't try to fold into itself.
1511 if (Pred == BB)
1512 continue;
1513
1514 // See if the predecessor is a comparison with the same value.
1515 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1516 if (PCV != CV)
1517 continue;
1518
1519 SmallSetVector<BasicBlock *, 4> FailBlocks;
1520 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1521 for (auto *Succ : FailBlocks) {
1522 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1523 return false;
1524 }
1525 }
1526
1527 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1528 Changed = true;
1529 }
1530 return Changed;
1531}
1532
1533// If we would need to insert a select that uses the value of this invoke
1534// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1535// need to do this), we can't hoist the invoke, as there is nowhere to put the
1536// select in this case.
1538 Instruction *I1, Instruction *I2) {
1539 for (BasicBlock *Succ : successors(BB1)) {
1540 for (const PHINode &PN : Succ->phis()) {
1541 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1542 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1543 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1544 return false;
1545 }
1546 }
1547 }
1548 return true;
1549}
1550
1551// Get interesting characteristics of instructions that
1552// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1553// instructions can be reordered across.
1559
1561 unsigned Flags = 0;
1562 if (I->mayReadFromMemory())
1563 Flags |= SkipReadMem;
1564 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1565 // inalloca) across stacksave/stackrestore boundaries.
1566 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1567 Flags |= SkipSideEffect;
1569 Flags |= SkipImplicitControlFlow;
1570 return Flags;
1571}
1572
1573// Returns true if it is safe to reorder an instruction across preceding
1574// instructions in a basic block.
1575static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1576 // Don't reorder a store over a load.
1577 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1578 return false;
1579
1580 // If we have seen an instruction with side effects, it's unsafe to reorder an
1581 // instruction which reads memory or itself has side effects.
1582 if ((Flags & SkipSideEffect) &&
1583 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1584 return false;
1585
1586 // Reordering across an instruction which does not necessarily transfer
1587 // control to the next instruction is speculation.
1589 return false;
1590
1591 // Hoisting of llvm.deoptimize is only legal together with the next return
1592 // instruction, which this pass is not always able to do.
1593 if (auto *CB = dyn_cast<CallBase>(I))
1594 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1595 return false;
1596
1597 // It's also unsafe/illegal to hoist an instruction above its instruction
1598 // operands
1599 BasicBlock *BB = I->getParent();
1600 for (Value *Op : I->operands()) {
1601 if (auto *J = dyn_cast<Instruction>(Op))
1602 if (J->getParent() == BB)
1603 return false;
1604 }
1605
1606 return true;
1607}
1608
1609static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1610
1611/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1612/// instructions \p I1 and \p I2 can and should be hoisted.
1614 const TargetTransformInfo &TTI) {
1615 // If we're going to hoist a call, make sure that the two instructions
1616 // we're commoning/hoisting are both marked with musttail, or neither of
1617 // them is marked as such. Otherwise, we might end up in a situation where
1618 // we hoist from a block where the terminator is a `ret` to a block where
1619 // the terminator is a `br`, and `musttail` calls expect to be followed by
1620 // a return.
1621 auto *C1 = dyn_cast<CallInst>(I1);
1622 auto *C2 = dyn_cast<CallInst>(I2);
1623 if (C1 && C2)
1624 if (C1->isMustTailCall() != C2->isMustTailCall())
1625 return false;
1626
1627 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1628 return false;
1629
1630 // If any of the two call sites has nomerge or convergent attribute, stop
1631 // hoisting.
1632 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1633 if (CB1->cannotMerge() || CB1->isConvergent())
1634 return false;
1635 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1636 if (CB2->cannotMerge() || CB2->isConvergent())
1637 return false;
1638
1639 return true;
1640}
1641
1642/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1643/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1644/// hoistCommonCodeFromSuccessors. e.g. The input:
1645/// I1 DVRs: { x, z },
1646/// OtherInsts: { I2 DVRs: { x, y, z } }
1647/// would result in hoisting only DbgVariableRecord x.
1649 Instruction *TI, Instruction *I1,
1650 SmallVectorImpl<Instruction *> &OtherInsts) {
1651 if (!I1->hasDbgRecords())
1652 return;
1653 using CurrentAndEndIt =
1654 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1655 // Vector of {Current, End} iterators.
1657 Itrs.reserve(OtherInsts.size() + 1);
1658 // Helper lambdas for lock-step checks:
1659 // Return true if this Current == End.
1660 auto atEnd = [](const CurrentAndEndIt &Pair) {
1661 return Pair.first == Pair.second;
1662 };
1663 // Return true if all Current are identical.
1664 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1665 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1667 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1668 });
1669 };
1670
1671 // Collect the iterators.
1672 Itrs.push_back(
1673 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1674 for (Instruction *Other : OtherInsts) {
1675 if (!Other->hasDbgRecords())
1676 return;
1677 Itrs.push_back(
1678 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1679 }
1680
1681 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1682 // the lock-step DbgRecord are identical, hoist all of them to TI.
1683 // This replicates the dbg.* intrinsic behaviour in
1684 // hoistCommonCodeFromSuccessors.
1685 while (none_of(Itrs, atEnd)) {
1686 bool HoistDVRs = allIdentical(Itrs);
1687 for (CurrentAndEndIt &Pair : Itrs) {
1688 // Increment Current iterator now as we may be about to move the
1689 // DbgRecord.
1690 DbgRecord &DR = *Pair.first++;
1691 if (HoistDVRs) {
1692 DR.removeFromParent();
1693 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1694 }
1695 }
1696 }
1697}
1698
1700 const Instruction *I2) {
1701 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1702 return true;
1703
1704 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1705 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1706 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1707 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1708 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1709
1710 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1711 return I1->getOperand(0) == I2->getOperand(1) &&
1712 I1->getOperand(1) == I2->getOperand(0) &&
1713 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1714 }
1715
1716 return false;
1717}
1718
1719/// If the target supports conditional faulting,
1720/// we look for the following pattern:
1721/// \code
1722/// BB:
1723/// ...
1724/// %cond = icmp ult %x, %y
1725/// br i1 %cond, label %TrueBB, label %FalseBB
1726/// FalseBB:
1727/// store i32 1, ptr %q, align 4
1728/// ...
1729/// TrueBB:
1730/// %maskedloadstore = load i32, ptr %b, align 4
1731/// store i32 %maskedloadstore, ptr %p, align 4
1732/// ...
1733/// \endcode
1734///
1735/// and transform it into:
1736///
1737/// \code
1738/// BB:
1739/// ...
1740/// %cond = icmp ult %x, %y
1741/// %maskedloadstore = cload i32, ptr %b, %cond
1742/// cstore i32 %maskedloadstore, ptr %p, %cond
1743/// cstore i32 1, ptr %q, ~%cond
1744/// br i1 %cond, label %TrueBB, label %FalseBB
1745/// FalseBB:
1746/// ...
1747/// TrueBB:
1748/// ...
1749/// \endcode
1750///
1751/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1752/// e.g.
1753///
1754/// \code
1755/// %vcond = bitcast i1 %cond to <1 x i1>
1756/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1757/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1758/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1759/// call void @llvm.masked.store.v1i32.p0
1760/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1761/// %cond.not = xor i1 %cond, true
1762/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1763/// call void @llvm.masked.store.v1i32.p0
1764/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1765/// \endcode
1766///
1767/// So we need to turn hoisted load/store into cload/cstore.
1768///
1769/// \param BI The branch instruction.
1770/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1771/// will be speculated.
1772/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1774 BranchInst *BI,
1775 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1776 std::optional<bool> Invert, Instruction *Sel) {
1777 auto &Context = BI->getParent()->getContext();
1778 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1779 auto *Cond = BI->getOperand(0);
1780 // Construct the condition if needed.
1781 BasicBlock *BB = BI->getParent();
1782 Value *Mask = nullptr;
1783 Value *MaskFalse = nullptr;
1784 Value *MaskTrue = nullptr;
1785 if (Invert.has_value()) {
1786 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1787 Mask = Builder.CreateBitCast(
1788 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1789 VCondTy);
1790 } else {
1791 IRBuilder<> Builder(BI);
1792 MaskFalse = Builder.CreateBitCast(
1793 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1794 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1795 }
1796 auto PeekThroughBitcasts = [](Value *V) {
1797 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1798 V = BitCast->getOperand(0);
1799 return V;
1800 };
1801 for (auto *I : SpeculatedConditionalLoadsStores) {
1802 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1803 if (!Invert.has_value())
1804 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1805 // We currently assume conditional faulting load/store is supported for
1806 // scalar types only when creating new instructions. This can be easily
1807 // extended for vector types in the future.
1808 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1809 auto *Op0 = I->getOperand(0);
1810 CallInst *MaskedLoadStore = nullptr;
1811 if (auto *LI = dyn_cast<LoadInst>(I)) {
1812 // Handle Load.
1813 auto *Ty = I->getType();
1814 PHINode *PN = nullptr;
1815 Value *PassThru = nullptr;
1816 if (Invert.has_value())
1817 for (User *U : I->users()) {
1818 if ((PN = dyn_cast<PHINode>(U))) {
1819 PassThru = Builder.CreateBitCast(
1820 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1821 FixedVectorType::get(Ty, 1));
1822 } else if (auto *Ins = cast<Instruction>(U);
1823 Sel && Ins->getParent() == BB) {
1824 // This happens when store or/and a speculative instruction between
1825 // load and store were hoisted to the BB. Make sure the masked load
1826 // inserted before its use.
1827 // We assume there's one of such use.
1828 Builder.SetInsertPoint(Ins);
1829 }
1830 }
1831 MaskedLoadStore = Builder.CreateMaskedLoad(
1832 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1833 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1834 if (PN)
1835 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1836 I->replaceAllUsesWith(NewLoadStore);
1837 } else {
1838 // Handle Store.
1839 auto *StoredVal = Builder.CreateBitCast(
1840 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1841 MaskedLoadStore = Builder.CreateMaskedStore(
1842 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1843 }
1844 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1845 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1846 //
1847 // !nonnull, !align : Not support pointer type, no need to keep.
1848 // !range: Load type is changed from scalar to vector, but the metadata on
1849 // vector specifies a per-element range, so the semantics stay the
1850 // same. Keep it.
1851 // !annotation: Not impact semantics. Keep it.
1852 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1853 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1854 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1855 // FIXME: DIAssignID is not supported for masked store yet.
1856 // (Verifier::visitDIAssignIDMetadata)
1858 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1859 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1860 });
1861 MaskedLoadStore->copyMetadata(*I);
1862 I->eraseFromParent();
1863 }
1864}
1865
1867 const TargetTransformInfo &TTI) {
1868 // Not handle volatile or atomic.
1869 bool IsStore = false;
1870 if (auto *L = dyn_cast<LoadInst>(I)) {
1871 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1872 return false;
1873 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1874 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1875 return false;
1876 IsStore = true;
1877 } else
1878 return false;
1879
1880 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1881 // That's why we have the alignment limitation.
1882 // FIXME: Update the prototype of the intrinsics?
1883 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1885}
1886
1887/// Hoist any common code in the successor blocks up into the block. This
1888/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1889/// given, only perform hoisting in case all successors blocks contain matching
1890/// instructions only. In that case, all instructions can be hoisted and the
1891/// original branch will be replaced and selects for PHIs are added.
1892bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1893 bool AllInstsEqOnly) {
1894 // This does very trivial matching, with limited scanning, to find identical
1895 // instructions in the two blocks. In particular, we don't want to get into
1896 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1897 // such, we currently just scan for obviously identical instructions in an
1898 // identical order, possibly separated by the same number of non-identical
1899 // instructions.
1900 BasicBlock *BB = TI->getParent();
1901 unsigned int SuccSize = succ_size(BB);
1902 if (SuccSize < 2)
1903 return false;
1904
1905 // If either of the blocks has it's address taken, then we can't do this fold,
1906 // because the code we'd hoist would no longer run when we jump into the block
1907 // by it's address.
1908 for (auto *Succ : successors(BB))
1909 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1910 return false;
1911
1912 // The second of pair is a SkipFlags bitmask.
1913 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1914 SmallVector<SuccIterPair, 8> SuccIterPairs;
1915 for (auto *Succ : successors(BB)) {
1916 BasicBlock::iterator SuccItr = Succ->begin();
1917 if (isa<PHINode>(*SuccItr))
1918 return false;
1919 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1920 }
1921
1922 if (AllInstsEqOnly) {
1923 // Check if all instructions in the successor blocks match. This allows
1924 // hoisting all instructions and removing the blocks we are hoisting from,
1925 // so does not add any new instructions.
1927 // Check if sizes and terminators of all successors match.
1928 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1929 Instruction *Term0 = Succs[0]->getTerminator();
1930 Instruction *Term = Succ->getTerminator();
1931 return !Term->isSameOperationAs(Term0) ||
1932 !equal(Term->operands(), Term0->operands()) ||
1933 Succs[0]->size() != Succ->size();
1934 });
1935 if (!AllSame)
1936 return false;
1937 if (AllSame) {
1938 LockstepReverseIterator<true> LRI(Succs);
1939 while (LRI.isValid()) {
1940 Instruction *I0 = (*LRI)[0];
1941 if (any_of(*LRI, [I0](Instruction *I) {
1942 return !areIdenticalUpToCommutativity(I0, I);
1943 })) {
1944 return false;
1945 }
1946 --LRI;
1947 }
1948 }
1949 // Now we know that all instructions in all successors can be hoisted. Let
1950 // the loop below handle the hoisting.
1951 }
1952
1953 // Count how many instructions were not hoisted so far. There's a limit on how
1954 // many instructions we skip, serving as a compilation time control as well as
1955 // preventing excessive increase of life ranges.
1956 unsigned NumSkipped = 0;
1957 // If we find an unreachable instruction at the beginning of a basic block, we
1958 // can still hoist instructions from the rest of the basic blocks.
1959 if (SuccIterPairs.size() > 2) {
1960 erase_if(SuccIterPairs,
1961 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1962 if (SuccIterPairs.size() < 2)
1963 return false;
1964 }
1965
1966 bool Changed = false;
1967
1968 for (;;) {
1969 auto *SuccIterPairBegin = SuccIterPairs.begin();
1970 auto &BB1ItrPair = *SuccIterPairBegin++;
1971 auto OtherSuccIterPairRange =
1972 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1973 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1974
1975 Instruction *I1 = &*BB1ItrPair.first;
1976
1977 bool AllInstsAreIdentical = true;
1978 bool HasTerminator = I1->isTerminator();
1979 for (auto &SuccIter : OtherSuccIterRange) {
1980 Instruction *I2 = &*SuccIter;
1981 HasTerminator |= I2->isTerminator();
1982 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1983 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1984 AllInstsAreIdentical = false;
1985 }
1986
1987 SmallVector<Instruction *, 8> OtherInsts;
1988 for (auto &SuccIter : OtherSuccIterRange)
1989 OtherInsts.push_back(&*SuccIter);
1990
1991 // If we are hoisting the terminator instruction, don't move one (making a
1992 // broken BB), instead clone it, and remove BI.
1993 if (HasTerminator) {
1994 // Even if BB, which contains only one unreachable instruction, is ignored
1995 // at the beginning of the loop, we can hoist the terminator instruction.
1996 // If any instructions remain in the block, we cannot hoist terminators.
1997 if (NumSkipped || !AllInstsAreIdentical) {
1998 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1999 return Changed;
2000 }
2001
2002 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
2003 Changed;
2004 }
2005
2006 if (AllInstsAreIdentical) {
2007 unsigned SkipFlagsBB1 = BB1ItrPair.second;
2008 AllInstsAreIdentical =
2009 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
2010 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
2011 Instruction *I2 = &*Pair.first;
2012 unsigned SkipFlagsBB2 = Pair.second;
2013 // Even if the instructions are identical, it may not
2014 // be safe to hoist them if we have skipped over
2015 // instructions with side effects or their operands
2016 // weren't hoisted.
2017 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
2019 });
2020 }
2021
2022 if (AllInstsAreIdentical) {
2023 BB1ItrPair.first++;
2024 // For a normal instruction, we just move one to right before the
2025 // branch, then replace all uses of the other with the first. Finally,
2026 // we remove the now redundant second instruction.
2027 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2028 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2029 // and leave any that were not hoisted behind (by calling moveBefore
2030 // rather than moveBeforePreserving).
2031 I1->moveBefore(TI->getIterator());
2032 for (auto &SuccIter : OtherSuccIterRange) {
2033 Instruction *I2 = &*SuccIter++;
2034 assert(I2 != I1);
2035 if (!I2->use_empty())
2036 I2->replaceAllUsesWith(I1);
2037 I1->andIRFlags(I2);
2038 if (auto *CB = dyn_cast<CallBase>(I1)) {
2039 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2040 assert(Success && "We should not be trying to hoist callbases "
2041 "with non-intersectable attributes");
2042 // For NDEBUG Compile.
2043 (void)Success;
2044 }
2045
2046 combineMetadataForCSE(I1, I2, true);
2047 // I1 and I2 are being combined into a single instruction. Its debug
2048 // location is the merged locations of the original instructions.
2049 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2050 I2->eraseFromParent();
2051 }
2052 if (!Changed)
2053 NumHoistCommonCode += SuccIterPairs.size();
2054 Changed = true;
2055 NumHoistCommonInstrs += SuccIterPairs.size();
2056 } else {
2057 if (NumSkipped >= HoistCommonSkipLimit) {
2058 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2059 return Changed;
2060 }
2061 // We are about to skip over a pair of non-identical instructions. Record
2062 // if any have characteristics that would prevent reordering instructions
2063 // across them.
2064 for (auto &SuccIterPair : SuccIterPairs) {
2065 Instruction *I = &*SuccIterPair.first++;
2066 SuccIterPair.second |= skippedInstrFlags(I);
2067 }
2068 ++NumSkipped;
2069 }
2070 }
2071}
2072
2073bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2074 Instruction *TI, Instruction *I1,
2075 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2076
2077 auto *BI = dyn_cast<BranchInst>(TI);
2078
2079 bool Changed = false;
2080 BasicBlock *TIParent = TI->getParent();
2081 BasicBlock *BB1 = I1->getParent();
2082
2083 // Use only for an if statement.
2084 auto *I2 = *OtherSuccTIs.begin();
2085 auto *BB2 = I2->getParent();
2086 if (BI) {
2087 assert(OtherSuccTIs.size() == 1);
2088 assert(BI->getSuccessor(0) == I1->getParent());
2089 assert(BI->getSuccessor(1) == I2->getParent());
2090 }
2091
2092 // In the case of an if statement, we try to hoist an invoke.
2093 // FIXME: Can we define a safety predicate for CallBr?
2094 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2095 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2096 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2097 return false;
2098
2099 // TODO: callbr hoisting currently disabled pending further study.
2100 if (isa<CallBrInst>(I1))
2101 return false;
2102
2103 for (BasicBlock *Succ : successors(BB1)) {
2104 for (PHINode &PN : Succ->phis()) {
2105 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2106 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2107 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2108 if (BB1V == BB2V)
2109 continue;
2110
2111 // In the case of an if statement, check for
2112 // passingValueIsAlwaysUndefined here because we would rather eliminate
2113 // undefined control flow then converting it to a select.
2114 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2116 return false;
2117 }
2118 }
2119 }
2120
2121 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2122 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2123 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2124 // Clone the terminator and hoist it into the pred, without any debug info.
2125 Instruction *NT = I1->clone();
2126 NT->insertInto(TIParent, TI->getIterator());
2127 if (!NT->getType()->isVoidTy()) {
2128 I1->replaceAllUsesWith(NT);
2129 for (Instruction *OtherSuccTI : OtherSuccTIs)
2130 OtherSuccTI->replaceAllUsesWith(NT);
2131 NT->takeName(I1);
2132 }
2133 Changed = true;
2134 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2135
2136 // Ensure terminator gets a debug location, even an unknown one, in case
2137 // it involves inlinable calls.
2139 Locs.push_back(I1->getDebugLoc());
2140 for (auto *OtherSuccTI : OtherSuccTIs)
2141 Locs.push_back(OtherSuccTI->getDebugLoc());
2142 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2143
2144 // PHIs created below will adopt NT's merged DebugLoc.
2145 IRBuilder<NoFolder> Builder(NT);
2146
2147 // In the case of an if statement, hoisting one of the terminators from our
2148 // successor is a great thing. Unfortunately, the successors of the if/else
2149 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2150 // must agree for all PHI nodes, so we insert select instruction to compute
2151 // the final result.
2152 if (BI) {
2153 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2154 for (BasicBlock *Succ : successors(BB1)) {
2155 for (PHINode &PN : Succ->phis()) {
2156 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2157 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2158 if (BB1V == BB2V)
2159 continue;
2160
2161 // These values do not agree. Insert a select instruction before NT
2162 // that determines the right value.
2163 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2164 if (!SI) {
2165 // Propagate fast-math-flags from phi node to its replacement select.
2167 BI->getCondition(), BB1V, BB2V,
2168 isa<FPMathOperator>(PN) ? &PN : nullptr,
2169 BB1V->getName() + "." + BB2V->getName(), BI));
2170 }
2171
2172 // Make the PHI node use the select for all incoming values for BB1/BB2
2173 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2174 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2175 PN.setIncomingValue(i, SI);
2176 }
2177 }
2178 }
2179
2181
2182 // Update any PHI nodes in our new successors.
2183 for (BasicBlock *Succ : successors(BB1)) {
2184 addPredecessorToBlock(Succ, TIParent, BB1);
2185 if (DTU)
2186 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2187 }
2188
2189 if (DTU)
2190 for (BasicBlock *Succ : successors(TI))
2191 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2192
2194 if (DTU)
2195 DTU->applyUpdates(Updates);
2196 return Changed;
2197}
2198
2199// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2200// into variables.
2202 int OpIdx) {
2203 // Divide/Remainder by constant is typically much cheaper than by variable.
2204 if (I->isIntDivRem())
2205 return OpIdx != 1;
2206 return !isa<IntrinsicInst>(I);
2207}
2208
2209// All instructions in Insts belong to different blocks that all unconditionally
2210// branch to a common successor. Analyze each instruction and return true if it
2211// would be possible to sink them into their successor, creating one common
2212// instruction instead. For every value that would be required to be provided by
2213// PHI node (because an operand varies in each input block), add to PHIOperands.
2216 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2217 // Prune out obviously bad instructions to move. Each instruction must have
2218 // the same number of uses, and we check later that the uses are consistent.
2219 std::optional<unsigned> NumUses;
2220 for (auto *I : Insts) {
2221 // These instructions may change or break semantics if moved.
2222 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2223 I->getType()->isTokenTy())
2224 return false;
2225
2226 // Do not try to sink an instruction in an infinite loop - it can cause
2227 // this algorithm to infinite loop.
2228 if (I->getParent()->getSingleSuccessor() == I->getParent())
2229 return false;
2230
2231 // Conservatively return false if I is an inline-asm instruction. Sinking
2232 // and merging inline-asm instructions can potentially create arguments
2233 // that cannot satisfy the inline-asm constraints.
2234 // If the instruction has nomerge or convergent attribute, return false.
2235 if (const auto *C = dyn_cast<CallBase>(I))
2236 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2237 return false;
2238
2239 if (!NumUses)
2240 NumUses = I->getNumUses();
2241 else if (NumUses != I->getNumUses())
2242 return false;
2243 }
2244
2245 const Instruction *I0 = Insts.front();
2246 const auto I0MMRA = MMRAMetadata(*I0);
2247 for (auto *I : Insts) {
2248 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2249 return false;
2250
2251 // Treat MMRAs conservatively. This pass can be quite aggressive and
2252 // could drop a lot of MMRAs otherwise.
2253 if (MMRAMetadata(*I) != I0MMRA)
2254 return false;
2255 }
2256
2257 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2258 // then the other phi operands must match the instructions from Insts. This
2259 // also has to hold true for any phi nodes that would be created as a result
2260 // of sinking. Both of these cases are represented by PhiOperands.
2261 for (const Use &U : I0->uses()) {
2262 auto It = PHIOperands.find(&U);
2263 if (It == PHIOperands.end())
2264 // There may be uses in other blocks when sinking into a loop header.
2265 return false;
2266 if (!equal(Insts, It->second))
2267 return false;
2268 }
2269
2270 // For calls to be sinkable, they must all be indirect, or have same callee.
2271 // I.e. if we have two direct calls to different callees, we don't want to
2272 // turn that into an indirect call. Likewise, if we have an indirect call,
2273 // and a direct call, we don't actually want to have a single indirect call.
2274 if (isa<CallBase>(I0)) {
2275 auto IsIndirectCall = [](const Instruction *I) {
2276 return cast<CallBase>(I)->isIndirectCall();
2277 };
2278 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2279 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2280 if (HaveIndirectCalls) {
2281 if (!AllCallsAreIndirect)
2282 return false;
2283 } else {
2284 // All callees must be identical.
2285 Value *Callee = nullptr;
2286 for (const Instruction *I : Insts) {
2287 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2288 if (!Callee)
2289 Callee = CurrCallee;
2290 else if (Callee != CurrCallee)
2291 return false;
2292 }
2293 }
2294 }
2295
2296 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2297 Value *Op = I0->getOperand(OI);
2298 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2299 assert(I->getNumOperands() == I0->getNumOperands());
2300 return I->getOperand(OI) == I0->getOperand(OI);
2301 };
2302 if (!all_of(Insts, SameAsI0)) {
2305 // We can't create a PHI from this GEP.
2306 return false;
2307 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2308 for (auto *I : Insts)
2309 Ops.push_back(I->getOperand(OI));
2310 }
2311 }
2312 return true;
2313}
2314
2315// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2316// instruction of every block in Blocks to their common successor, commoning
2317// into one instruction.
2319 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2320
2321 // canSinkInstructions returning true guarantees that every block has at
2322 // least one non-terminator instruction.
2324 for (auto *BB : Blocks) {
2325 Instruction *I = BB->getTerminator();
2326 I = I->getPrevNode();
2327 Insts.push_back(I);
2328 }
2329
2330 // We don't need to do any more checking here; canSinkInstructions should
2331 // have done it all for us.
2332 SmallVector<Value*, 4> NewOperands;
2333 Instruction *I0 = Insts.front();
2334 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2335 // This check is different to that in canSinkInstructions. There, we
2336 // cared about the global view once simplifycfg (and instcombine) have
2337 // completed - it takes into account PHIs that become trivially
2338 // simplifiable. However here we need a more local view; if an operand
2339 // differs we create a PHI and rely on instcombine to clean up the very
2340 // small mess we may make.
2341 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2342 return I->getOperand(O) != I0->getOperand(O);
2343 });
2344 if (!NeedPHI) {
2345 NewOperands.push_back(I0->getOperand(O));
2346 continue;
2347 }
2348
2349 // Create a new PHI in the successor block and populate it.
2350 auto *Op = I0->getOperand(O);
2351 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2352 auto *PN =
2353 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2354 PN->insertBefore(BBEnd->begin());
2355 for (auto *I : Insts)
2356 PN->addIncoming(I->getOperand(O), I->getParent());
2357 NewOperands.push_back(PN);
2358 }
2359
2360 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2361 // and move it to the start of the successor block.
2362 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2363 I0->getOperandUse(O).set(NewOperands[O]);
2364
2365 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2366
2367 // Update metadata and IR flags, and merge debug locations.
2368 for (auto *I : Insts)
2369 if (I != I0) {
2370 // The debug location for the "common" instruction is the merged locations
2371 // of all the commoned instructions. We start with the original location
2372 // of the "common" instruction and iteratively merge each location in the
2373 // loop below.
2374 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2375 // However, as N-way merge for CallInst is rare, so we use simplified API
2376 // instead of using complex API for N-way merge.
2377 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2378 combineMetadataForCSE(I0, I, true);
2379 I0->andIRFlags(I);
2380 if (auto *CB = dyn_cast<CallBase>(I0)) {
2381 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2382 assert(Success && "We should not be trying to sink callbases "
2383 "with non-intersectable attributes");
2384 // For NDEBUG Compile.
2385 (void)Success;
2386 }
2387 }
2388
2389 for (User *U : make_early_inc_range(I0->users())) {
2390 // canSinkLastInstruction checked that all instructions are only used by
2391 // phi nodes in a way that allows replacing the phi node with the common
2392 // instruction.
2393 auto *PN = cast<PHINode>(U);
2394 PN->replaceAllUsesWith(I0);
2395 PN->eraseFromParent();
2396 }
2397
2398 // Finally nuke all instructions apart from the common instruction.
2399 for (auto *I : Insts) {
2400 if (I == I0)
2401 continue;
2402 // The remaining uses are debug users, replace those with the common inst.
2403 // In most (all?) cases this just introduces a use-before-def.
2404 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2405 I->replaceAllUsesWith(I0);
2406 I->eraseFromParent();
2407 }
2408}
2409
2410/// Check whether BB's predecessors end with unconditional branches. If it is
2411/// true, sink any common code from the predecessors to BB.
2413 DomTreeUpdater *DTU) {
2414 // We support two situations:
2415 // (1) all incoming arcs are unconditional
2416 // (2) there are non-unconditional incoming arcs
2417 //
2418 // (2) is very common in switch defaults and
2419 // else-if patterns;
2420 //
2421 // if (a) f(1);
2422 // else if (b) f(2);
2423 //
2424 // produces:
2425 //
2426 // [if]
2427 // / \
2428 // [f(1)] [if]
2429 // | | \
2430 // | | |
2431 // | [f(2)]|
2432 // \ | /
2433 // [ end ]
2434 //
2435 // [end] has two unconditional predecessor arcs and one conditional. The
2436 // conditional refers to the implicit empty 'else' arc. This conditional
2437 // arc can also be caused by an empty default block in a switch.
2438 //
2439 // In this case, we attempt to sink code from all *unconditional* arcs.
2440 // If we can sink instructions from these arcs (determined during the scan
2441 // phase below) we insert a common successor for all unconditional arcs and
2442 // connect that to [end], to enable sinking:
2443 //
2444 // [if]
2445 // / \
2446 // [x(1)] [if]
2447 // | | \
2448 // | | \
2449 // | [x(2)] |
2450 // \ / |
2451 // [sink.split] |
2452 // \ /
2453 // [ end ]
2454 //
2455 SmallVector<BasicBlock*,4> UnconditionalPreds;
2456 bool HaveNonUnconditionalPredecessors = false;
2457 for (auto *PredBB : predecessors(BB)) {
2458 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2459 if (PredBr && PredBr->isUnconditional())
2460 UnconditionalPreds.push_back(PredBB);
2461 else
2462 HaveNonUnconditionalPredecessors = true;
2463 }
2464 if (UnconditionalPreds.size() < 2)
2465 return false;
2466
2467 // We take a two-step approach to tail sinking. First we scan from the end of
2468 // each block upwards in lockstep. If the n'th instruction from the end of each
2469 // block can be sunk, those instructions are added to ValuesToSink and we
2470 // carry on. If we can sink an instruction but need to PHI-merge some operands
2471 // (because they're not identical in each instruction) we add these to
2472 // PHIOperands.
2473 // We prepopulate PHIOperands with the phis that already exist in BB.
2475 for (PHINode &PN : BB->phis()) {
2477 for (const Use &U : PN.incoming_values())
2478 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2479 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2480 for (BasicBlock *Pred : UnconditionalPreds)
2481 Ops.push_back(*IncomingVals[Pred]);
2482 }
2483
2484 int ScanIdx = 0;
2485 SmallPtrSet<Value*,4> InstructionsToSink;
2486 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2487 while (LRI.isValid() &&
2488 canSinkInstructions(*LRI, PHIOperands)) {
2489 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2490 << "\n");
2491 InstructionsToSink.insert_range(*LRI);
2492 ++ScanIdx;
2493 --LRI;
2494 }
2495
2496 // If no instructions can be sunk, early-return.
2497 if (ScanIdx == 0)
2498 return false;
2499
2500 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2501
2502 if (!followedByDeoptOrUnreachable) {
2503 // Check whether this is the pointer operand of a load/store.
2504 auto IsMemOperand = [](Use &U) {
2505 auto *I = cast<Instruction>(U.getUser());
2506 if (isa<LoadInst>(I))
2507 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2508 if (isa<StoreInst>(I))
2509 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2510 return false;
2511 };
2512
2513 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2514 // actually sink before encountering instruction that is unprofitable to
2515 // sink?
2516 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2517 unsigned NumPHIInsts = 0;
2518 for (Use &U : (*LRI)[0]->operands()) {
2519 auto It = PHIOperands.find(&U);
2520 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2521 return InstructionsToSink.contains(V);
2522 })) {
2523 ++NumPHIInsts;
2524 // Do not separate a load/store from the gep producing the address.
2525 // The gep can likely be folded into the load/store as an addressing
2526 // mode. Additionally, a load of a gep is easier to analyze than a
2527 // load of a phi.
2528 if (IsMemOperand(U) &&
2529 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2530 return false;
2531 // FIXME: this check is overly optimistic. We may end up not sinking
2532 // said instruction, due to the very same profitability check.
2533 // See @creating_too_many_phis in sink-common-code.ll.
2534 }
2535 }
2536 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2537 return NumPHIInsts <= 1;
2538 };
2539
2540 // We've determined that we are going to sink last ScanIdx instructions,
2541 // and recorded them in InstructionsToSink. Now, some instructions may be
2542 // unprofitable to sink. But that determination depends on the instructions
2543 // that we are going to sink.
2544
2545 // First, forward scan: find the first instruction unprofitable to sink,
2546 // recording all the ones that are profitable to sink.
2547 // FIXME: would it be better, after we detect that not all are profitable.
2548 // to either record the profitable ones, or erase the unprofitable ones?
2549 // Maybe we need to choose (at runtime) the one that will touch least
2550 // instrs?
2551 LRI.reset();
2552 int Idx = 0;
2553 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2554 while (Idx < ScanIdx) {
2555 if (!ProfitableToSinkInstruction(LRI)) {
2556 // Too many PHIs would be created.
2557 LLVM_DEBUG(
2558 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2559 break;
2560 }
2561 InstructionsProfitableToSink.insert_range(*LRI);
2562 --LRI;
2563 ++Idx;
2564 }
2565
2566 // If no instructions can be sunk, early-return.
2567 if (Idx == 0)
2568 return false;
2569
2570 // Did we determine that (only) some instructions are unprofitable to sink?
2571 if (Idx < ScanIdx) {
2572 // Okay, some instructions are unprofitable.
2573 ScanIdx = Idx;
2574 InstructionsToSink = InstructionsProfitableToSink;
2575
2576 // But, that may make other instructions unprofitable, too.
2577 // So, do a backward scan, do any earlier instructions become
2578 // unprofitable?
2579 assert(
2580 !ProfitableToSinkInstruction(LRI) &&
2581 "We already know that the last instruction is unprofitable to sink");
2582 ++LRI;
2583 --Idx;
2584 while (Idx >= 0) {
2585 // If we detect that an instruction becomes unprofitable to sink,
2586 // all earlier instructions won't be sunk either,
2587 // so preemptively keep InstructionsProfitableToSink in sync.
2588 // FIXME: is this the most performant approach?
2589 for (auto *I : *LRI)
2590 InstructionsProfitableToSink.erase(I);
2591 if (!ProfitableToSinkInstruction(LRI)) {
2592 // Everything starting with this instruction won't be sunk.
2593 ScanIdx = Idx;
2594 InstructionsToSink = InstructionsProfitableToSink;
2595 }
2596 ++LRI;
2597 --Idx;
2598 }
2599 }
2600
2601 // If no instructions can be sunk, early-return.
2602 if (ScanIdx == 0)
2603 return false;
2604 }
2605
2606 bool Changed = false;
2607
2608 if (HaveNonUnconditionalPredecessors) {
2609 if (!followedByDeoptOrUnreachable) {
2610 // It is always legal to sink common instructions from unconditional
2611 // predecessors. However, if not all predecessors are unconditional,
2612 // this transformation might be pessimizing. So as a rule of thumb,
2613 // don't do it unless we'd sink at least one non-speculatable instruction.
2614 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2615 LRI.reset();
2616 int Idx = 0;
2617 bool Profitable = false;
2618 while (Idx < ScanIdx) {
2619 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2620 Profitable = true;
2621 break;
2622 }
2623 --LRI;
2624 ++Idx;
2625 }
2626 if (!Profitable)
2627 return false;
2628 }
2629
2630 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2631 // We have a conditional edge and we're going to sink some instructions.
2632 // Insert a new block postdominating all blocks we're going to sink from.
2633 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2634 // Edges couldn't be split.
2635 return false;
2636 Changed = true;
2637 }
2638
2639 // Now that we've analyzed all potential sinking candidates, perform the
2640 // actual sink. We iteratively sink the last non-terminator of the source
2641 // blocks into their common successor unless doing so would require too
2642 // many PHI instructions to be generated (currently only one PHI is allowed
2643 // per sunk instruction).
2644 //
2645 // We can use InstructionsToSink to discount values needing PHI-merging that will
2646 // actually be sunk in a later iteration. This allows us to be more
2647 // aggressive in what we sink. This does allow a false positive where we
2648 // sink presuming a later value will also be sunk, but stop half way through
2649 // and never actually sink it which means we produce more PHIs than intended.
2650 // This is unlikely in practice though.
2651 int SinkIdx = 0;
2652 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2653 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2654 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2655 << "\n");
2656
2657 // Because we've sunk every instruction in turn, the current instruction to
2658 // sink is always at index 0.
2659 LRI.reset();
2660
2661 sinkLastInstruction(UnconditionalPreds);
2662 NumSinkCommonInstrs++;
2663 Changed = true;
2664 }
2665 if (SinkIdx != 0)
2666 ++NumSinkCommonCode;
2667 return Changed;
2668}
2669
2670namespace {
2671
2672struct CompatibleSets {
2673 using SetTy = SmallVector<InvokeInst *, 2>;
2674
2676
2677 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2678
2679 SetTy &getCompatibleSet(InvokeInst *II);
2680
2681 void insert(InvokeInst *II);
2682};
2683
2684CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2685 // Perform a linear scan over all the existing sets, see if the new `invoke`
2686 // is compatible with any particular set. Since we know that all the `invokes`
2687 // within a set are compatible, only check the first `invoke` in each set.
2688 // WARNING: at worst, this has quadratic complexity.
2689 for (CompatibleSets::SetTy &Set : Sets) {
2690 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2691 return Set;
2692 }
2693
2694 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2695 return Sets.emplace_back();
2696}
2697
2698void CompatibleSets::insert(InvokeInst *II) {
2699 getCompatibleSet(II).emplace_back(II);
2700}
2701
2702bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2703 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2704
2705 // Can we theoretically merge these `invoke`s?
2706 auto IsIllegalToMerge = [](InvokeInst *II) {
2707 return II->cannotMerge() || II->isInlineAsm();
2708 };
2709 if (any_of(Invokes, IsIllegalToMerge))
2710 return false;
2711
2712 // Either both `invoke`s must be direct,
2713 // or both `invoke`s must be indirect.
2714 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2715 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2716 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2717 if (HaveIndirectCalls) {
2718 if (!AllCallsAreIndirect)
2719 return false;
2720 } else {
2721 // All callees must be identical.
2722 Value *Callee = nullptr;
2723 for (InvokeInst *II : Invokes) {
2724 Value *CurrCallee = II->getCalledOperand();
2725 assert(CurrCallee && "There is always a called operand.");
2726 if (!Callee)
2727 Callee = CurrCallee;
2728 else if (Callee != CurrCallee)
2729 return false;
2730 }
2731 }
2732
2733 // Either both `invoke`s must not have a normal destination,
2734 // or both `invoke`s must have a normal destination,
2735 auto HasNormalDest = [](InvokeInst *II) {
2736 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2737 };
2738 if (any_of(Invokes, HasNormalDest)) {
2739 // Do not merge `invoke` that does not have a normal destination with one
2740 // that does have a normal destination, even though doing so would be legal.
2741 if (!all_of(Invokes, HasNormalDest))
2742 return false;
2743
2744 // All normal destinations must be identical.
2745 BasicBlock *NormalBB = nullptr;
2746 for (InvokeInst *II : Invokes) {
2747 BasicBlock *CurrNormalBB = II->getNormalDest();
2748 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2749 if (!NormalBB)
2750 NormalBB = CurrNormalBB;
2751 else if (NormalBB != CurrNormalBB)
2752 return false;
2753 }
2754
2755 // In the normal destination, the incoming values for these two `invoke`s
2756 // must be compatible.
2757 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2759 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2760 &EquivalenceSet))
2761 return false;
2762 }
2763
2764#ifndef NDEBUG
2765 // All unwind destinations must be identical.
2766 // We know that because we have started from said unwind destination.
2767 BasicBlock *UnwindBB = nullptr;
2768 for (InvokeInst *II : Invokes) {
2769 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2770 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2771 if (!UnwindBB)
2772 UnwindBB = CurrUnwindBB;
2773 else
2774 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2775 }
2776#endif
2777
2778 // In the unwind destination, the incoming values for these two `invoke`s
2779 // must be compatible.
2781 Invokes.front()->getUnwindDest(),
2782 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2783 return false;
2784
2785 // Ignoring arguments, these `invoke`s must be identical,
2786 // including operand bundles.
2787 const InvokeInst *II0 = Invokes.front();
2788 for (auto *II : Invokes.drop_front())
2789 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2790 return false;
2791
2792 // Can we theoretically form the data operands for the merged `invoke`?
2793 auto IsIllegalToMergeArguments = [](auto Ops) {
2794 Use &U0 = std::get<0>(Ops);
2795 Use &U1 = std::get<1>(Ops);
2796 if (U0 == U1)
2797 return false;
2799 U0.getOperandNo());
2800 };
2801 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2802 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2803 IsIllegalToMergeArguments))
2804 return false;
2805
2806 return true;
2807}
2808
2809} // namespace
2810
2811// Merge all invokes in the provided set, all of which are compatible
2812// as per the `CompatibleSets::shouldBelongToSameSet()`.
2814 DomTreeUpdater *DTU) {
2815 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2816
2818 if (DTU)
2819 Updates.reserve(2 + 3 * Invokes.size());
2820
2821 bool HasNormalDest =
2822 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2823
2824 // Clone one of the invokes into a new basic block.
2825 // Since they are all compatible, it doesn't matter which invoke is cloned.
2826 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2827 InvokeInst *II0 = Invokes.front();
2828 BasicBlock *II0BB = II0->getParent();
2829 BasicBlock *InsertBeforeBlock =
2830 II0->getParent()->getIterator()->getNextNode();
2831 Function *Func = II0BB->getParent();
2832 LLVMContext &Ctx = II0->getContext();
2833
2834 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2835 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2836
2837 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2838 // NOTE: all invokes have the same attributes, so no handling needed.
2839 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2840
2841 if (!HasNormalDest) {
2842 // This set does not have a normal destination,
2843 // so just form a new block with unreachable terminator.
2844 BasicBlock *MergedNormalDest = BasicBlock::Create(
2845 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2846 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2847 UI->setDebugLoc(DebugLoc::getTemporary());
2848 MergedInvoke->setNormalDest(MergedNormalDest);
2849 }
2850
2851 // The unwind destination, however, remainds identical for all invokes here.
2852
2853 return MergedInvoke;
2854 }();
2855
2856 if (DTU) {
2857 // Predecessor blocks that contained these invokes will now branch to
2858 // the new block that contains the merged invoke, ...
2859 for (InvokeInst *II : Invokes)
2860 Updates.push_back(
2861 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2862
2863 // ... which has the new `unreachable` block as normal destination,
2864 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2865 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2866 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2867 SuccBBOfMergedInvoke});
2868
2869 // Since predecessor blocks now unconditionally branch to a new block,
2870 // they no longer branch to their original successors.
2871 for (InvokeInst *II : Invokes)
2872 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2873 Updates.push_back(
2874 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2875 }
2876
2877 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2878
2879 // Form the merged operands for the merged invoke.
2880 for (Use &U : MergedInvoke->operands()) {
2881 // Only PHI together the indirect callees and data operands.
2882 if (MergedInvoke->isCallee(&U)) {
2883 if (!IsIndirectCall)
2884 continue;
2885 } else if (!MergedInvoke->isDataOperand(&U))
2886 continue;
2887
2888 // Don't create trivial PHI's with all-identical incoming values.
2889 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2890 return II->getOperand(U.getOperandNo()) != U.get();
2891 });
2892 if (!NeedPHI)
2893 continue;
2894
2895 // Form a PHI out of all the data ops under this index.
2897 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2898 for (InvokeInst *II : Invokes)
2899 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2900
2901 U.set(PN);
2902 }
2903
2904 // We've ensured that each PHI node has compatible (identical) incoming values
2905 // when coming from each of the `invoke`s in the current merge set,
2906 // so update the PHI nodes accordingly.
2907 for (BasicBlock *Succ : successors(MergedInvoke))
2908 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2909 /*ExistPred=*/Invokes.front()->getParent());
2910
2911 // And finally, replace the original `invoke`s with an unconditional branch
2912 // to the block with the merged `invoke`. Also, give that merged `invoke`
2913 // the merged debugloc of all the original `invoke`s.
2914 DILocation *MergedDebugLoc = nullptr;
2915 for (InvokeInst *II : Invokes) {
2916 // Compute the debug location common to all the original `invoke`s.
2917 if (!MergedDebugLoc)
2918 MergedDebugLoc = II->getDebugLoc();
2919 else
2920 MergedDebugLoc =
2921 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2922
2923 // And replace the old `invoke` with an unconditionally branch
2924 // to the block with the merged `invoke`.
2925 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2926 OrigSuccBB->removePredecessor(II->getParent());
2927 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2928 // The unconditional branch is part of the replacement for the original
2929 // invoke, so should use its DebugLoc.
2930 BI->setDebugLoc(II->getDebugLoc());
2931 bool Success = MergedInvoke->tryIntersectAttributes(II);
2932 assert(Success && "Merged invokes with incompatible attributes");
2933 // For NDEBUG Compile
2934 (void)Success;
2935 II->replaceAllUsesWith(MergedInvoke);
2936 II->eraseFromParent();
2937 ++NumInvokesMerged;
2938 }
2939 MergedInvoke->setDebugLoc(MergedDebugLoc);
2940 ++NumInvokeSetsFormed;
2941
2942 if (DTU)
2943 DTU->applyUpdates(Updates);
2944}
2945
2946/// If this block is a `landingpad` exception handling block, categorize all
2947/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2948/// being "mergeable" together, and then merge invokes in each set together.
2949///
2950/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2951/// [...] [...]
2952/// | |
2953/// [invoke0] [invoke1]
2954/// / \ / \
2955/// [cont0] [landingpad] [cont1]
2956/// to:
2957/// [...] [...]
2958/// \ /
2959/// [invoke]
2960/// / \
2961/// [cont] [landingpad]
2962///
2963/// But of course we can only do that if the invokes share the `landingpad`,
2964/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2965/// and the invoked functions are "compatible".
2968 return false;
2969
2970 bool Changed = false;
2971
2972 // FIXME: generalize to all exception handling blocks?
2973 if (!BB->isLandingPad())
2974 return Changed;
2975
2976 CompatibleSets Grouper;
2977
2978 // Record all the predecessors of this `landingpad`. As per verifier,
2979 // the only allowed predecessor is the unwind edge of an `invoke`.
2980 // We want to group "compatible" `invokes` into the same set to be merged.
2981 for (BasicBlock *PredBB : predecessors(BB))
2982 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2983
2984 // And now, merge `invoke`s that were grouped togeter.
2985 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2986 if (Invokes.size() < 2)
2987 continue;
2988 Changed = true;
2989 mergeCompatibleInvokesImpl(Invokes, DTU);
2990 }
2991
2992 return Changed;
2993}
2994
2995namespace {
2996/// Track ephemeral values, which should be ignored for cost-modelling
2997/// purposes. Requires walking instructions in reverse order.
2998class EphemeralValueTracker {
2999 SmallPtrSet<const Instruction *, 32> EphValues;
3000
3001 bool isEphemeral(const Instruction *I) {
3002 if (isa<AssumeInst>(I))
3003 return true;
3004 return !I->mayHaveSideEffects() && !I->isTerminator() &&
3005 all_of(I->users(), [&](const User *U) {
3006 return EphValues.count(cast<Instruction>(U));
3007 });
3008 }
3009
3010public:
3011 bool track(const Instruction *I) {
3012 if (isEphemeral(I)) {
3013 EphValues.insert(I);
3014 return true;
3015 }
3016 return false;
3017 }
3018
3019 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3020};
3021} // namespace
3022
3023/// Determine if we can hoist sink a sole store instruction out of a
3024/// conditional block.
3025///
3026/// We are looking for code like the following:
3027/// BrBB:
3028/// store i32 %add, i32* %arrayidx2
3029/// ... // No other stores or function calls (we could be calling a memory
3030/// ... // function).
3031/// %cmp = icmp ult %x, %y
3032/// br i1 %cmp, label %EndBB, label %ThenBB
3033/// ThenBB:
3034/// store i32 %add5, i32* %arrayidx2
3035/// br label EndBB
3036/// EndBB:
3037/// ...
3038/// We are going to transform this into:
3039/// BrBB:
3040/// store i32 %add, i32* %arrayidx2
3041/// ... //
3042/// %cmp = icmp ult %x, %y
3043/// %add.add5 = select i1 %cmp, i32 %add, %add5
3044/// store i32 %add.add5, i32* %arrayidx2
3045/// ...
3046///
3047/// \return The pointer to the value of the previous store if the store can be
3048/// hoisted into the predecessor block. 0 otherwise.
3050 BasicBlock *StoreBB, BasicBlock *EndBB) {
3051 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3052 if (!StoreToHoist)
3053 return nullptr;
3054
3055 // Volatile or atomic.
3056 if (!StoreToHoist->isSimple())
3057 return nullptr;
3058
3059 Value *StorePtr = StoreToHoist->getPointerOperand();
3060 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3061
3062 // Look for a store to the same pointer in BrBB.
3063 unsigned MaxNumInstToLookAt = 9;
3064 // Skip pseudo probe intrinsic calls which are not really killing any memory
3065 // accesses.
3066 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3067 if (!MaxNumInstToLookAt)
3068 break;
3069 --MaxNumInstToLookAt;
3070
3071 // Could be calling an instruction that affects memory like free().
3072 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3073 return nullptr;
3074
3075 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3076 // Found the previous store to same location and type. Make sure it is
3077 // simple, to avoid introducing a spurious non-atomic write after an
3078 // atomic write.
3079 if (SI->getPointerOperand() == StorePtr &&
3080 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3081 SI->getAlign() >= StoreToHoist->getAlign())
3082 // Found the previous store, return its value operand.
3083 return SI->getValueOperand();
3084 return nullptr; // Unknown store.
3085 }
3086
3087 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3088 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3089 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3090 Value *Obj = getUnderlyingObject(StorePtr);
3091 bool ExplicitlyDereferenceableOnly;
3092 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3094 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3096 (!ExplicitlyDereferenceableOnly ||
3097 isDereferenceablePointer(StorePtr, StoreTy,
3098 LI->getDataLayout()))) {
3099 // Found a previous load, return it.
3100 return LI;
3101 }
3102 }
3103 // The load didn't work out, but we may still find a store.
3104 }
3105 }
3106
3107 return nullptr;
3108}
3109
3110/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3111/// converted to selects.
3113 BasicBlock *EndBB,
3114 unsigned &SpeculatedInstructions,
3115 InstructionCost &Cost,
3116 const TargetTransformInfo &TTI) {
3118 BB->getParent()->hasMinSize()
3121
3122 bool HaveRewritablePHIs = false;
3123 for (PHINode &PN : EndBB->phis()) {
3124 Value *OrigV = PN.getIncomingValueForBlock(BB);
3125 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3126
3127 // FIXME: Try to remove some of the duplication with
3128 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3129 if (ThenV == OrigV)
3130 continue;
3131
3132 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3133 CmpInst::makeCmpResultType(PN.getType()),
3135
3136 // Don't convert to selects if we could remove undefined behavior instead.
3137 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3139 return false;
3140
3141 HaveRewritablePHIs = true;
3142 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3143 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3144 if (!OrigCE && !ThenCE)
3145 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3146
3147 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3148 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3149 InstructionCost MaxCost =
3151 if (OrigCost + ThenCost > MaxCost)
3152 return false;
3153
3154 // Account for the cost of an unfolded ConstantExpr which could end up
3155 // getting expanded into Instructions.
3156 // FIXME: This doesn't account for how many operations are combined in the
3157 // constant expression.
3158 ++SpeculatedInstructions;
3159 if (SpeculatedInstructions > 1)
3160 return false;
3161 }
3162
3163 return HaveRewritablePHIs;
3164}
3165
3167 std::optional<bool> Invert,
3168 const TargetTransformInfo &TTI) {
3169 // If the branch is non-unpredictable, and is predicted to *not* branch to
3170 // the `then` block, then avoid speculating it.
3171 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3172 return true;
3173
3174 uint64_t TWeight, FWeight;
3175 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3176 return true;
3177
3178 if (!Invert.has_value())
3179 return false;
3180
3181 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3182 BranchProbability BIEndProb =
3183 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3184 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3185 return BIEndProb < Likely;
3186}
3187
3188/// Speculate a conditional basic block flattening the CFG.
3189///
3190/// Note that this is a very risky transform currently. Speculating
3191/// instructions like this is most often not desirable. Instead, there is an MI
3192/// pass which can do it with full awareness of the resource constraints.
3193/// However, some cases are "obvious" and we should do directly. An example of
3194/// this is speculating a single, reasonably cheap instruction.
3195///
3196/// There is only one distinct advantage to flattening the CFG at the IR level:
3197/// it makes very common but simplistic optimizations such as are common in
3198/// instcombine and the DAG combiner more powerful by removing CFG edges and
3199/// modeling their effects with easier to reason about SSA value graphs.
3200///
3201///
3202/// An illustration of this transform is turning this IR:
3203/// \code
3204/// BB:
3205/// %cmp = icmp ult %x, %y
3206/// br i1 %cmp, label %EndBB, label %ThenBB
3207/// ThenBB:
3208/// %sub = sub %x, %y
3209/// br label BB2
3210/// EndBB:
3211/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3212/// ...
3213/// \endcode
3214///
3215/// Into this IR:
3216/// \code
3217/// BB:
3218/// %cmp = icmp ult %x, %y
3219/// %sub = sub %x, %y
3220/// %cond = select i1 %cmp, 0, %sub
3221/// ...
3222/// \endcode
3223///
3224/// \returns true if the conditional block is removed.
3225bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3226 BasicBlock *ThenBB) {
3227 if (!Options.SpeculateBlocks)
3228 return false;
3229
3230 // Be conservative for now. FP select instruction can often be expensive.
3231 Value *BrCond = BI->getCondition();
3232 if (isa<FCmpInst>(BrCond))
3233 return false;
3234
3235 BasicBlock *BB = BI->getParent();
3236 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3237 InstructionCost Budget =
3239
3240 // If ThenBB is actually on the false edge of the conditional branch, remember
3241 // to swap the select operands later.
3242 bool Invert = false;
3243 if (ThenBB != BI->getSuccessor(0)) {
3244 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3245 Invert = true;
3246 }
3247 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3248
3249 if (!isProfitableToSpeculate(BI, Invert, TTI))
3250 return false;
3251
3252 // Keep a count of how many times instructions are used within ThenBB when
3253 // they are candidates for sinking into ThenBB. Specifically:
3254 // - They are defined in BB, and
3255 // - They have no side effects, and
3256 // - All of their uses are in ThenBB.
3257 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3258
3259 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3260
3261 unsigned SpeculatedInstructions = 0;
3262 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3263 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3264 Value *SpeculatedStoreValue = nullptr;
3265 StoreInst *SpeculatedStore = nullptr;
3266 EphemeralValueTracker EphTracker;
3267 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3268 // Skip pseudo probes. The consequence is we lose track of the branch
3269 // probability for ThenBB, which is fine since the optimization here takes
3270 // place regardless of the branch probability.
3271 if (isa<PseudoProbeInst>(I)) {
3272 // The probe should be deleted so that it will not be over-counted when
3273 // the samples collected on the non-conditional path are counted towards
3274 // the conditional path. We leave it for the counts inference algorithm to
3275 // figure out a proper count for an unknown probe.
3276 SpeculatedPseudoProbes.push_back(&I);
3277 continue;
3278 }
3279
3280 // Ignore ephemeral values, they will be dropped by the transform.
3281 if (EphTracker.track(&I))
3282 continue;
3283
3284 // Only speculatively execute a single instruction (not counting the
3285 // terminator) for now.
3286 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3288 SpeculatedConditionalLoadsStores.size() <
3290 // Not count load/store into cost if target supports conditional faulting
3291 // b/c it's cheap to speculate it.
3292 if (IsSafeCheapLoadStore)
3293 SpeculatedConditionalLoadsStores.push_back(&I);
3294 else
3295 ++SpeculatedInstructions;
3296
3297 if (SpeculatedInstructions > 1)
3298 return false;
3299
3300 // Don't hoist the instruction if it's unsafe or expensive.
3301 if (!IsSafeCheapLoadStore &&
3303 !(HoistCondStores && !SpeculatedStoreValue &&
3304 (SpeculatedStoreValue =
3305 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3306 return false;
3307 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3310 return false;
3311
3312 // Store the store speculation candidate.
3313 if (!SpeculatedStore && SpeculatedStoreValue)
3314 SpeculatedStore = cast<StoreInst>(&I);
3315
3316 // Do not hoist the instruction if any of its operands are defined but not
3317 // used in BB. The transformation will prevent the operand from
3318 // being sunk into the use block.
3319 for (Use &Op : I.operands()) {
3321 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3322 continue; // Not a candidate for sinking.
3323
3324 ++SinkCandidateUseCounts[OpI];
3325 }
3326 }
3327
3328 // Consider any sink candidates which are only used in ThenBB as costs for
3329 // speculation. Note, while we iterate over a DenseMap here, we are summing
3330 // and so iteration order isn't significant.
3331 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3332 if (Inst->hasNUses(Count)) {
3333 ++SpeculatedInstructions;
3334 if (SpeculatedInstructions > 1)
3335 return false;
3336 }
3337
3338 // Check that we can insert the selects and that it's not too expensive to do
3339 // so.
3340 bool Convert =
3341 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3343 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3344 SpeculatedInstructions, Cost, TTI);
3345 if (!Convert || Cost > Budget)
3346 return false;
3347
3348 // If we get here, we can hoist the instruction and if-convert.
3349 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3350
3351 Instruction *Sel = nullptr;
3352 // Insert a select of the value of the speculated store.
3353 if (SpeculatedStoreValue) {
3354 IRBuilder<NoFolder> Builder(BI);
3355 Value *OrigV = SpeculatedStore->getValueOperand();
3356 Value *TrueV = SpeculatedStore->getValueOperand();
3357 Value *FalseV = SpeculatedStoreValue;
3358 if (Invert)
3359 std::swap(TrueV, FalseV);
3360 Value *S = Builder.CreateSelect(
3361 BrCond, TrueV, FalseV, "spec.store.select", BI);
3362 Sel = cast<Instruction>(S);
3363 SpeculatedStore->setOperand(0, S);
3364 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3365 SpeculatedStore->getDebugLoc());
3366 // The value stored is still conditional, but the store itself is now
3367 // unconditonally executed, so we must be sure that any linked dbg.assign
3368 // intrinsics are tracking the new stored value (the result of the
3369 // select). If we don't, and the store were to be removed by another pass
3370 // (e.g. DSE), then we'd eventually end up emitting a location describing
3371 // the conditional value, unconditionally.
3372 //
3373 // === Before this transformation ===
3374 // pred:
3375 // store %one, %x.dest, !DIAssignID !1
3376 // dbg.assign %one, "x", ..., !1, ...
3377 // br %cond if.then
3378 //
3379 // if.then:
3380 // store %two, %x.dest, !DIAssignID !2
3381 // dbg.assign %two, "x", ..., !2, ...
3382 //
3383 // === After this transformation ===
3384 // pred:
3385 // store %one, %x.dest, !DIAssignID !1
3386 // dbg.assign %one, "x", ..., !1
3387 /// ...
3388 // %merge = select %cond, %two, %one
3389 // store %merge, %x.dest, !DIAssignID !2
3390 // dbg.assign %merge, "x", ..., !2
3391 for (DbgVariableRecord *DbgAssign :
3392 at::getDVRAssignmentMarkers(SpeculatedStore))
3393 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3394 DbgAssign->replaceVariableLocationOp(OrigV, S);
3395 }
3396
3397 // Metadata can be dependent on the condition we are hoisting above.
3398 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3399 // to avoid making it appear as if the condition is a constant, which would
3400 // be misleading while debugging.
3401 // Similarly strip attributes that maybe dependent on condition we are
3402 // hoisting above.
3403 for (auto &I : make_early_inc_range(*ThenBB)) {
3404 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3405 I.dropLocation();
3406 }
3407 I.dropUBImplyingAttrsAndMetadata();
3408
3409 // Drop ephemeral values.
3410 if (EphTracker.contains(&I)) {
3411 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3412 I.eraseFromParent();
3413 }
3414 }
3415
3416 // Hoist the instructions.
3417 // Drop DbgVariableRecords attached to these instructions.
3418 for (auto &It : *ThenBB)
3419 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3420 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3421 // equivalent).
3422 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3423 !DVR || !DVR->isDbgAssign())
3424 It.dropOneDbgRecord(&DR);
3425 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3426 std::prev(ThenBB->end()));
3427
3428 if (!SpeculatedConditionalLoadsStores.empty())
3429 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3430 Sel);
3431
3432 // Insert selects and rewrite the PHI operands.
3433 IRBuilder<NoFolder> Builder(BI);
3434 for (PHINode &PN : EndBB->phis()) {
3435 unsigned OrigI = PN.getBasicBlockIndex(BB);
3436 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3437 Value *OrigV = PN.getIncomingValue(OrigI);
3438 Value *ThenV = PN.getIncomingValue(ThenI);
3439
3440 // Skip PHIs which are trivial.
3441 if (OrigV == ThenV)
3442 continue;
3443
3444 // Create a select whose true value is the speculatively executed value and
3445 // false value is the pre-existing value. Swap them if the branch
3446 // destinations were inverted.
3447 Value *TrueV = ThenV, *FalseV = OrigV;
3448 if (Invert)
3449 std::swap(TrueV, FalseV);
3450 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3451 PN.setIncomingValue(OrigI, V);
3452 PN.setIncomingValue(ThenI, V);
3453 }
3454
3455 // Remove speculated pseudo probes.
3456 for (Instruction *I : SpeculatedPseudoProbes)
3457 I->eraseFromParent();
3458
3459 ++NumSpeculations;
3460 return true;
3461}
3462
3464
3465// Return false if number of blocks searched is too much.
3466static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3467 BlocksSet &ReachesNonLocalUses) {
3468 if (BB == DefBB)
3469 return true;
3470 if (!ReachesNonLocalUses.insert(BB).second)
3471 return true;
3472
3473 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3474 return false;
3475 for (BasicBlock *Pred : predecessors(BB))
3476 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3477 return false;
3478 return true;
3479}
3480
3481/// Return true if we can thread a branch across this block.
3483 BlocksSet &NonLocalUseBlocks) {
3484 int Size = 0;
3485 EphemeralValueTracker EphTracker;
3486
3487 // Walk the loop in reverse so that we can identify ephemeral values properly
3488 // (values only feeding assumes).
3489 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3490 // Can't fold blocks that contain noduplicate or convergent calls.
3491 if (CallInst *CI = dyn_cast<CallInst>(&I))
3492 if (CI->cannotDuplicate() || CI->isConvergent())
3493 return false;
3494
3495 // Ignore ephemeral values which are deleted during codegen.
3496 // We will delete Phis while threading, so Phis should not be accounted in
3497 // block's size.
3498 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3499 if (Size++ > MaxSmallBlockSize)
3500 return false; // Don't clone large BB's.
3501 }
3502
3503 // Record blocks with non-local uses of values defined in the current basic
3504 // block.
3505 for (User *U : I.users()) {
3507 BasicBlock *UsedInBB = UI->getParent();
3508 if (UsedInBB == BB) {
3509 if (isa<PHINode>(UI))
3510 return false;
3511 } else
3512 NonLocalUseBlocks.insert(UsedInBB);
3513 }
3514
3515 // Looks ok, continue checking.
3516 }
3517
3518 return true;
3519}
3520
3522 BasicBlock *To) {
3523 // Don't look past the block defining the value, we might get the value from
3524 // a previous loop iteration.
3525 auto *I = dyn_cast<Instruction>(V);
3526 if (I && I->getParent() == To)
3527 return nullptr;
3528
3529 // We know the value if the From block branches on it.
3530 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3531 if (BI && BI->isConditional() && BI->getCondition() == V &&
3532 BI->getSuccessor(0) != BI->getSuccessor(1))
3533 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3535
3536 return nullptr;
3537}
3538
3539/// If we have a conditional branch on something for which we know the constant
3540/// value in predecessors (e.g. a phi node in the current block), thread edges
3541/// from the predecessor to their ultimate destination.
3542static std::optional<bool>
3544 const DataLayout &DL,
3545 AssumptionCache *AC) {
3547 BasicBlock *BB = BI->getParent();
3548 Value *Cond = BI->getCondition();
3550 if (PN && PN->getParent() == BB) {
3551 // Degenerate case of a single entry PHI.
3552 if (PN->getNumIncomingValues() == 1) {
3554 return true;
3555 }
3556
3557 for (Use &U : PN->incoming_values())
3558 if (auto *CB = dyn_cast<ConstantInt>(U))
3559 KnownValues[CB].insert(PN->getIncomingBlock(U));
3560 } else {
3561 for (BasicBlock *Pred : predecessors(BB)) {
3562 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3563 KnownValues[CB].insert(Pred);
3564 }
3565 }
3566
3567 if (KnownValues.empty())
3568 return false;
3569
3570 // Now we know that this block has multiple preds and two succs.
3571 // Check that the block is small enough and record which non-local blocks use
3572 // values defined in the block.
3573
3574 BlocksSet NonLocalUseBlocks;
3575 BlocksSet ReachesNonLocalUseBlocks;
3576 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3577 return false;
3578
3579 // Jump-threading can only be done to destinations where no values defined
3580 // in BB are live.
3581
3582 // Quickly check if both destinations have uses. If so, jump-threading cannot
3583 // be done.
3584 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3585 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3586 return false;
3587
3588 // Search backward from NonLocalUseBlocks to find which blocks
3589 // reach non-local uses.
3590 for (BasicBlock *UseBB : NonLocalUseBlocks)
3591 // Give up if too many blocks are searched.
3592 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3593 return false;
3594
3595 for (const auto &Pair : KnownValues) {
3596 ConstantInt *CB = Pair.first;
3597 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3598 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3599
3600 // Okay, we now know that all edges from PredBB should be revectored to
3601 // branch to RealDest.
3602 if (RealDest == BB)
3603 continue; // Skip self loops.
3604
3605 // Skip if the predecessor's terminator is an indirect branch.
3606 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3607 return isa<IndirectBrInst>(PredBB->getTerminator());
3608 }))
3609 continue;
3610
3611 // Only revector to RealDest if no values defined in BB are live.
3612 if (ReachesNonLocalUseBlocks.contains(RealDest))
3613 continue;
3614
3615 LLVM_DEBUG({
3616 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3617 << " has value " << *Pair.first << " in predecessors:\n";
3618 for (const BasicBlock *PredBB : Pair.second)
3619 dbgs() << " " << PredBB->getName() << "\n";
3620 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3621 });
3622
3623 // Split the predecessors we are threading into a new edge block. We'll
3624 // clone the instructions into this block, and then redirect it to RealDest.
3625 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3626
3627 // TODO: These just exist to reduce test diff, we can drop them if we like.
3628 EdgeBB->setName(RealDest->getName() + ".critedge");
3629 EdgeBB->moveBefore(RealDest);
3630
3631 // Update PHI nodes.
3632 addPredecessorToBlock(RealDest, EdgeBB, BB);
3633
3634 // BB may have instructions that are being threaded over. Clone these
3635 // instructions into EdgeBB. We know that there will be no uses of the
3636 // cloned instructions outside of EdgeBB.
3637 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3638 ValueToValueMapTy TranslateMap; // Track translated values.
3639 TranslateMap[Cond] = CB;
3640
3641 // RemoveDIs: track instructions that we optimise away while folding, so
3642 // that we can copy DbgVariableRecords from them later.
3643 BasicBlock::iterator SrcDbgCursor = BB->begin();
3644 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3645 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3646 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3647 continue;
3648 }
3649 // Clone the instruction.
3650 Instruction *N = BBI->clone();
3651 // Insert the new instruction into its new home.
3652 N->insertInto(EdgeBB, InsertPt);
3653
3654 if (BBI->hasName())
3655 N->setName(BBI->getName() + ".c");
3656
3657 // Update operands due to translation.
3658 // Key Instructions: Remap all the atom groups.
3659 if (const DebugLoc &DL = BBI->getDebugLoc())
3660 mapAtomInstance(DL, TranslateMap);
3661 RemapInstruction(N, TranslateMap,
3663
3664 // Check for trivial simplification.
3665 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3666 if (!BBI->use_empty())
3667 TranslateMap[&*BBI] = V;
3668 if (!N->mayHaveSideEffects()) {
3669 N->eraseFromParent(); // Instruction folded away, don't need actual
3670 // inst
3671 N = nullptr;
3672 }
3673 } else {
3674 if (!BBI->use_empty())
3675 TranslateMap[&*BBI] = N;
3676 }
3677 if (N) {
3678 // Copy all debug-info attached to instructions from the last we
3679 // successfully clone, up to this instruction (they might have been
3680 // folded away).
3681 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3682 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3683 SrcDbgCursor = std::next(BBI);
3684 // Clone debug-info on this instruction too.
3685 N->cloneDebugInfoFrom(&*BBI);
3686
3687 // Register the new instruction with the assumption cache if necessary.
3688 if (auto *Assume = dyn_cast<AssumeInst>(N))
3689 if (AC)
3690 AC->registerAssumption(Assume);
3691 }
3692 }
3693
3694 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3695 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3696 InsertPt->cloneDebugInfoFrom(BI);
3697
3698 BB->removePredecessor(EdgeBB);
3699 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3700 EdgeBI->setSuccessor(0, RealDest);
3701 EdgeBI->setDebugLoc(BI->getDebugLoc());
3702
3703 if (DTU) {
3705 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3706 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3707 DTU->applyUpdates(Updates);
3708 }
3709
3710 // For simplicity, we created a separate basic block for the edge. Merge
3711 // it back into the predecessor if possible. This not only avoids
3712 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3713 // bypass the check for trivial cycles above.
3714 MergeBlockIntoPredecessor(EdgeBB, DTU);
3715
3716 // Signal repeat, simplifying any other constants.
3717 return std::nullopt;
3718 }
3719
3720 return false;
3721}
3722
3723bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3724 // Note: If BB is a loop header then there is a risk that threading introduces
3725 // a non-canonical loop by moving a back edge. So we avoid this optimization
3726 // for loop headers if NeedCanonicalLoop is set.
3727 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3728 return false;
3729
3730 std::optional<bool> Result;
3731 bool EverChanged = false;
3732 do {
3733 // Note that None means "we changed things, but recurse further."
3734 Result =
3736 EverChanged |= Result == std::nullopt || *Result;
3737 } while (Result == std::nullopt);
3738 return EverChanged;
3739}
3740
3741/// Given a BB that starts with the specified two-entry PHI node,
3742/// see if we can eliminate it.
3745 const DataLayout &DL,
3746 bool SpeculateUnpredictables) {
3747 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3748 // statement", which has a very simple dominance structure. Basically, we
3749 // are trying to find the condition that is being branched on, which
3750 // subsequently causes this merge to happen. We really want control
3751 // dependence information for this check, but simplifycfg can't keep it up
3752 // to date, and this catches most of the cases we care about anyway.
3753 BasicBlock *BB = PN->getParent();
3754
3755 BasicBlock *IfTrue, *IfFalse;
3756 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3757 if (!DomBI)
3758 return false;
3759 Value *IfCond = DomBI->getCondition();
3760 // Don't bother if the branch will be constant folded trivially.
3761 if (isa<ConstantInt>(IfCond))
3762 return false;
3763
3764 BasicBlock *DomBlock = DomBI->getParent();
3767 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3768 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3769 });
3770 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3771 "Will have either one or two blocks to speculate.");
3772
3773 // If the branch is non-unpredictable, see if we either predictably jump to
3774 // the merge bb (if we have only a single 'then' block), or if we predictably
3775 // jump to one specific 'then' block (if we have two of them).
3776 // It isn't beneficial to speculatively execute the code
3777 // from the block that we know is predictably not entered.
3778 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3779 if (!IsUnpredictable) {
3780 uint64_t TWeight, FWeight;
3781 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3782 (TWeight + FWeight) != 0) {
3783 BranchProbability BITrueProb =
3784 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3785 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3786 BranchProbability BIFalseProb = BITrueProb.getCompl();
3787 if (IfBlocks.size() == 1) {
3788 BranchProbability BIBBProb =
3789 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3790 if (BIBBProb >= Likely)
3791 return false;
3792 } else {
3793 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3794 return false;
3795 }
3796 }
3797 }
3798
3799 // Don't try to fold an unreachable block. For example, the phi node itself
3800 // can't be the candidate if-condition for a select that we want to form.
3801 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3802 if (IfCondPhiInst->getParent() == BB)
3803 return false;
3804
3805 // Okay, we found that we can merge this two-entry phi node into a select.
3806 // Doing so would require us to fold *all* two entry phi nodes in this block.
3807 // At some point this becomes non-profitable (particularly if the target
3808 // doesn't support cmov's). Only do this transformation if there are two or
3809 // fewer PHI nodes in this block.
3810 unsigned NumPhis = 0;
3811 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3812 if (NumPhis > 2)
3813 return false;
3814
3815 // Loop over the PHI's seeing if we can promote them all to select
3816 // instructions. While we are at it, keep track of the instructions
3817 // that need to be moved to the dominating block.
3818 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3819 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3820 InstructionCost Cost = 0;
3821 InstructionCost Budget =
3823 if (SpeculateUnpredictables && IsUnpredictable)
3824 Budget += TTI.getBranchMispredictPenalty();
3825
3826 bool Changed = false;
3827 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3828 PHINode *PN = cast<PHINode>(II++);
3829 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3830 PN->replaceAllUsesWith(V);
3831 PN->eraseFromParent();
3832 Changed = true;
3833 continue;
3834 }
3835
3836 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3837 AggressiveInsts, Cost, Budget, TTI, AC,
3838 ZeroCostInstructions) ||
3839 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3840 AggressiveInsts, Cost, Budget, TTI, AC,
3841 ZeroCostInstructions))
3842 return Changed;
3843 }
3844
3845 // If we folded the first phi, PN dangles at this point. Refresh it. If
3846 // we ran out of PHIs then we simplified them all.
3847 PN = dyn_cast<PHINode>(BB->begin());
3848 if (!PN)
3849 return true;
3850
3851 // Return true if at least one of these is a 'not', and another is either
3852 // a 'not' too, or a constant.
3853 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3854 if (!match(V0, m_Not(m_Value())))
3855 std::swap(V0, V1);
3856 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3857 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3858 };
3859
3860 // Don't fold i1 branches on PHIs which contain binary operators or
3861 // (possibly inverted) select form of or/ands, unless one of
3862 // the incoming values is an 'not' and another one is freely invertible.
3863 // These can often be turned into switches and other things.
3864 auto IsBinOpOrAnd = [](Value *V) {
3865 return match(
3867 };
3868 if (PN->getType()->isIntegerTy(1) &&
3869 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3870 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3871 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3872 PN->getIncomingValue(1)))
3873 return Changed;
3874
3875 // If all PHI nodes are promotable, check to make sure that all instructions
3876 // in the predecessor blocks can be promoted as well. If not, we won't be able
3877 // to get rid of the control flow, so it's not worth promoting to select
3878 // instructions.
3879 for (BasicBlock *IfBlock : IfBlocks)
3880 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3881 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3882 // This is not an aggressive instruction that we can promote.
3883 // Because of this, we won't be able to get rid of the control flow, so
3884 // the xform is not worth it.
3885 return Changed;
3886 }
3887
3888 // If either of the blocks has it's address taken, we can't do this fold.
3889 if (any_of(IfBlocks,
3890 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3891 return Changed;
3892
3893 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3894 if (IsUnpredictable) dbgs() << " (unpredictable)";
3895 dbgs() << " T: " << IfTrue->getName()
3896 << " F: " << IfFalse->getName() << "\n");
3897
3898 // If we can still promote the PHI nodes after this gauntlet of tests,
3899 // do all of the PHI's now.
3900
3901 // Move all 'aggressive' instructions, which are defined in the
3902 // conditional parts of the if's up to the dominating block.
3903 for (BasicBlock *IfBlock : IfBlocks)
3904 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3905
3906 IRBuilder<NoFolder> Builder(DomBI);
3907 // Propagate fast-math-flags from phi nodes to replacement selects.
3908 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3909 // Change the PHI node into a select instruction.
3910 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3911 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3912
3913 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3914 isa<FPMathOperator>(PN) ? PN : nullptr,
3915 "", DomBI);
3916 PN->replaceAllUsesWith(Sel);
3917 Sel->takeName(PN);
3918 PN->eraseFromParent();
3919 }
3920
3921 // At this point, all IfBlocks are empty, so our if statement
3922 // has been flattened. Change DomBlock to jump directly to our new block to
3923 // avoid other simplifycfg's kicking in on the diamond.
3924 Builder.CreateBr(BB);
3925
3927 if (DTU) {
3928 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3929 for (auto *Successor : successors(DomBlock))
3930 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3931 }
3932
3933 DomBI->eraseFromParent();
3934 if (DTU)
3935 DTU->applyUpdates(Updates);
3936
3937 return true;
3938}
3939
3942 Value *RHS, const Twine &Name = "") {
3943 // Try to relax logical op to binary op.
3944 if (impliesPoison(RHS, LHS))
3945 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3946 if (Opc == Instruction::And)
3947 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3948 if (Opc == Instruction::Or)
3949 return Builder.CreateLogicalOr(LHS, RHS, Name);
3950 llvm_unreachable("Invalid logical opcode");
3951}
3952
3953/// Return true if either PBI or BI has branch weight available, and store
3954/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3955/// not have branch weight, use 1:1 as its weight.
3957 uint64_t &PredTrueWeight,
3958 uint64_t &PredFalseWeight,
3959 uint64_t &SuccTrueWeight,
3960 uint64_t &SuccFalseWeight) {
3961 bool PredHasWeights =
3962 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3963 bool SuccHasWeights =
3964 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3965 if (PredHasWeights || SuccHasWeights) {
3966 if (!PredHasWeights)
3967 PredTrueWeight = PredFalseWeight = 1;
3968 if (!SuccHasWeights)
3969 SuccTrueWeight = SuccFalseWeight = 1;
3970 return true;
3971 } else {
3972 return false;
3973 }
3974}
3975
3976/// Determine if the two branches share a common destination and deduce a glue
3977/// that joins the branches' conditions to arrive at the common destination if
3978/// that would be profitable.
3979static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3981 const TargetTransformInfo *TTI) {
3982 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3983 "Both blocks must end with a conditional branches.");
3985 "PredBB must be a predecessor of BB.");
3986
3987 // We have the potential to fold the conditions together, but if the
3988 // predecessor branch is predictable, we may not want to merge them.
3989 uint64_t PTWeight, PFWeight;
3990 BranchProbability PBITrueProb, Likely;
3991 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3992 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3993 (PTWeight + PFWeight) != 0) {
3994 PBITrueProb =
3995 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3996 Likely = TTI->getPredictableBranchThreshold();
3997 }
3998
3999 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4000 // Speculate the 2nd condition unless the 1st is probably true.
4001 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4002 return {{BI->getSuccessor(0), Instruction::Or, false}};
4003 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4004 // Speculate the 2nd condition unless the 1st is probably false.
4005 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4006 return {{BI->getSuccessor(1), Instruction::And, false}};
4007 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4008 // Speculate the 2nd condition unless the 1st is probably true.
4009 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4010 return {{BI->getSuccessor(1), Instruction::And, true}};
4011 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4012 // Speculate the 2nd condition unless the 1st is probably false.
4013 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4014 return {{BI->getSuccessor(0), Instruction::Or, true}};
4015 }
4016 return std::nullopt;
4017}
4018
4020 DomTreeUpdater *DTU,
4021 MemorySSAUpdater *MSSAU,
4022 const TargetTransformInfo *TTI) {
4023 BasicBlock *BB = BI->getParent();
4024 BasicBlock *PredBlock = PBI->getParent();
4025
4026 // Determine if the two branches share a common destination.
4027 BasicBlock *CommonSucc;
4029 bool InvertPredCond;
4030 std::tie(CommonSucc, Opc, InvertPredCond) =
4032
4033 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4034
4035 IRBuilder<> Builder(PBI);
4036 // The builder is used to create instructions to eliminate the branch in BB.
4037 // If BB's terminator has !annotation metadata, add it to the new
4038 // instructions.
4039 Builder.CollectMetadataToCopy(BB->getTerminator(),
4040 {LLVMContext::MD_annotation});
4041
4042 // If we need to invert the condition in the pred block to match, do so now.
4043 if (InvertPredCond) {
4044 InvertBranch(PBI, Builder);
4045 }
4046
4047 BasicBlock *UniqueSucc =
4048 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4049
4050 // Before cloning instructions, notify the successor basic block that it
4051 // is about to have a new predecessor. This will update PHI nodes,
4052 // which will allow us to update live-out uses of bonus instructions.
4053 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4054
4055 // Try to update branch weights.
4056 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4057 SmallVector<uint32_t, 2> MDWeights;
4058 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4059 SuccTrueWeight, SuccFalseWeight)) {
4060 SmallVector<uint64_t, 8> NewWeights;
4061
4062 if (PBI->getSuccessor(0) == BB) {
4063 // PBI: br i1 %x, BB, FalseDest
4064 // BI: br i1 %y, UniqueSucc, FalseDest
4065 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4066 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
4067 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4068 // TrueWeight for PBI * FalseWeight for BI.
4069 // We assume that total weights of a BranchInst can fit into 32 bits.
4070 // Therefore, we will not have overflow using 64-bit arithmetic.
4071 NewWeights.push_back(PredFalseWeight *
4072 (SuccFalseWeight + SuccTrueWeight) +
4073 PredTrueWeight * SuccFalseWeight);
4074 } else {
4075 // PBI: br i1 %x, TrueDest, BB
4076 // BI: br i1 %y, TrueDest, UniqueSucc
4077 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4078 // FalseWeight for PBI * TrueWeight for BI.
4079 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4080 PredFalseWeight * SuccTrueWeight);
4081 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4082 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
4083 }
4084
4085 // Halve the weights if any of them cannot fit in an uint32_t
4086 fitWeights(NewWeights);
4087
4088 append_range(MDWeights, NewWeights);
4089 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
4090
4091 // TODO: If BB is reachable from all paths through PredBlock, then we
4092 // could replace PBI's branch probabilities with BI's.
4093 } else
4094 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4095
4096 // Now, update the CFG.
4097 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4098
4099 if (DTU)
4100 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4101 {DominatorTree::Delete, PredBlock, BB}});
4102
4103 // If BI was a loop latch, it may have had associated loop metadata.
4104 // We need to copy it to the new latch, that is, PBI.
4105 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4106 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4107
4108 ValueToValueMapTy VMap; // maps original values to cloned values
4110
4111 Module *M = BB->getModule();
4112
4113 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4114 for (DbgVariableRecord &DVR :
4116 RemapDbgRecord(M, &DVR, VMap,
4118 }
4119
4120 // Now that the Cond was cloned into the predecessor basic block,
4121 // or/and the two conditions together.
4122 Value *BICond = VMap[BI->getCondition()];
4123 PBI->setCondition(
4124 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4126 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4127 if (!MDWeights.empty()) {
4128 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4129 setBranchWeights(SI, MDWeights[0], MDWeights[1],
4130 /*IsExpected=*/false);
4131 }
4132
4133 ++NumFoldBranchToCommonDest;
4134 return true;
4135}
4136
4137/// Return if an instruction's type or any of its operands' types are a vector
4138/// type.
4139static bool isVectorOp(Instruction &I) {
4140 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4141 return U->getType()->isVectorTy();
4142 });
4143}
4144
4145/// If this basic block is simple enough, and if a predecessor branches to us
4146/// and one of our successors, fold the block into the predecessor and use
4147/// logical operations to pick the right destination.
4149 MemorySSAUpdater *MSSAU,
4150 const TargetTransformInfo *TTI,
4151 unsigned BonusInstThreshold) {
4152 // If this block ends with an unconditional branch,
4153 // let speculativelyExecuteBB() deal with it.
4154 if (!BI->isConditional())
4155 return false;
4156
4157 BasicBlock *BB = BI->getParent();
4161
4163
4165 Cond->getParent() != BB || !Cond->hasOneUse())
4166 return false;
4167
4168 // Finally, don't infinitely unroll conditional loops.
4169 if (is_contained(successors(BB), BB))
4170 return false;
4171
4172 // With which predecessors will we want to deal with?
4174 for (BasicBlock *PredBlock : predecessors(BB)) {
4175 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4176
4177 // Check that we have two conditional branches. If there is a PHI node in
4178 // the common successor, verify that the same value flows in from both
4179 // blocks.
4180 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4181 continue;
4182
4183 // Determine if the two branches share a common destination.
4184 BasicBlock *CommonSucc;
4186 bool InvertPredCond;
4187 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4188 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4189 else
4190 continue;
4191
4192 // Check the cost of inserting the necessary logic before performing the
4193 // transformation.
4194 if (TTI) {
4195 Type *Ty = BI->getCondition()->getType();
4196 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4197 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4198 !isa<CmpInst>(PBI->getCondition())))
4199 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4200
4202 continue;
4203 }
4204
4205 // Ok, we do want to deal with this predecessor. Record it.
4206 Preds.emplace_back(PredBlock);
4207 }
4208
4209 // If there aren't any predecessors into which we can fold,
4210 // don't bother checking the cost.
4211 if (Preds.empty())
4212 return false;
4213
4214 // Only allow this transformation if computing the condition doesn't involve
4215 // too many instructions and these involved instructions can be executed
4216 // unconditionally. We denote all involved instructions except the condition
4217 // as "bonus instructions", and only allow this transformation when the
4218 // number of the bonus instructions we'll need to create when cloning into
4219 // each predecessor does not exceed a certain threshold.
4220 unsigned NumBonusInsts = 0;
4221 bool SawVectorOp = false;
4222 const unsigned PredCount = Preds.size();
4223 for (Instruction &I : *BB) {
4224 // Don't check the branch condition comparison itself.
4225 if (&I == Cond)
4226 continue;
4227 // Ignore the terminator.
4228 if (isa<BranchInst>(I))
4229 continue;
4230 // I must be safe to execute unconditionally.
4232 return false;
4233 SawVectorOp |= isVectorOp(I);
4234
4235 // Account for the cost of duplicating this instruction into each
4236 // predecessor. Ignore free instructions.
4237 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4239 NumBonusInsts += PredCount;
4240
4241 // Early exits once we reach the limit.
4242 if (NumBonusInsts >
4243 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4244 return false;
4245 }
4246
4247 auto IsBCSSAUse = [BB, &I](Use &U) {
4248 auto *UI = cast<Instruction>(U.getUser());
4249 if (auto *PN = dyn_cast<PHINode>(UI))
4250 return PN->getIncomingBlock(U) == BB;
4251 return UI->getParent() == BB && I.comesBefore(UI);
4252 };
4253
4254 // Does this instruction require rewriting of uses?
4255 if (!all_of(I.uses(), IsBCSSAUse))
4256 return false;
4257 }
4258 if (NumBonusInsts >
4259 BonusInstThreshold *
4260 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4261 return false;
4262
4263 // Ok, we have the budget. Perform the transformation.
4264 for (BasicBlock *PredBlock : Preds) {
4265 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4266 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4267 }
4268 return false;
4269}
4270
4271// If there is only one store in BB1 and BB2, return it, otherwise return
4272// nullptr.
4274 StoreInst *S = nullptr;
4275 for (auto *BB : {BB1, BB2}) {
4276 if (!BB)
4277 continue;
4278 for (auto &I : *BB)
4279 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4280 if (S)
4281 // Multiple stores seen.
4282 return nullptr;
4283 else
4284 S = SI;
4285 }
4286 }
4287 return S;
4288}
4289
4291 Value *AlternativeV = nullptr) {
4292 // PHI is going to be a PHI node that allows the value V that is defined in
4293 // BB to be referenced in BB's only successor.
4294 //
4295 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4296 // doesn't matter to us what the other operand is (it'll never get used). We
4297 // could just create a new PHI with an undef incoming value, but that could
4298 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4299 // other PHI. So here we directly look for some PHI in BB's successor with V
4300 // as an incoming operand. If we find one, we use it, else we create a new
4301 // one.
4302 //
4303 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4304 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4305 // where OtherBB is the single other predecessor of BB's only successor.
4306 PHINode *PHI = nullptr;
4307 BasicBlock *Succ = BB->getSingleSuccessor();
4308
4309 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4310 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4311 PHI = cast<PHINode>(I);
4312 if (!AlternativeV)
4313 break;
4314
4315 assert(Succ->hasNPredecessors(2));
4316 auto PredI = pred_begin(Succ);
4317 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4318 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4319 break;
4320 PHI = nullptr;
4321 }
4322 if (PHI)
4323 return PHI;
4324
4325 // If V is not an instruction defined in BB, just return it.
4326 if (!AlternativeV &&
4327 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4328 return V;
4329
4330 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4331 PHI->insertBefore(Succ->begin());
4332 PHI->addIncoming(V, BB);
4333 for (BasicBlock *PredBB : predecessors(Succ))
4334 if (PredBB != BB)
4335 PHI->addIncoming(
4336 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4337 return PHI;
4338}
4339
4341 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4342 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4343 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4344 // For every pointer, there must be exactly two stores, one coming from
4345 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4346 // store (to any address) in PTB,PFB or QTB,QFB.
4347 // FIXME: We could relax this restriction with a bit more work and performance
4348 // testing.
4349 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4350 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4351 if (!PStore || !QStore)
4352 return false;
4353
4354 // Now check the stores are compatible.
4355 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4356 PStore->getValueOperand()->getType() !=
4357 QStore->getValueOperand()->getType())
4358 return false;
4359
4360 // Check that sinking the store won't cause program behavior changes. Sinking
4361 // the store out of the Q blocks won't change any behavior as we're sinking
4362 // from a block to its unconditional successor. But we're moving a store from
4363 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4364 // So we need to check that there are no aliasing loads or stores in
4365 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4366 // operations between PStore and the end of its parent block.
4367 //
4368 // The ideal way to do this is to query AliasAnalysis, but we don't
4369 // preserve AA currently so that is dangerous. Be super safe and just
4370 // check there are no other memory operations at all.
4371 for (auto &I : *QFB->getSinglePredecessor())
4372 if (I.mayReadOrWriteMemory())
4373 return false;
4374 for (auto &I : *QFB)
4375 if (&I != QStore && I.mayReadOrWriteMemory())
4376 return false;
4377 if (QTB)
4378 for (auto &I : *QTB)
4379 if (&I != QStore && I.mayReadOrWriteMemory())
4380 return false;
4381 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4382 I != E; ++I)
4383 if (&*I != PStore && I->mayReadOrWriteMemory())
4384 return false;
4385
4386 // If we're not in aggressive mode, we only optimize if we have some
4387 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4388 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4389 if (!BB)
4390 return true;
4391 // Heuristic: if the block can be if-converted/phi-folded and the
4392 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4393 // thread this store.
4394 InstructionCost Cost = 0;
4395 InstructionCost Budget =
4397 for (auto &I : BB->instructionsWithoutDebug(false)) {
4398 // Consider terminator instruction to be free.
4399 if (I.isTerminator())
4400 continue;
4401 // If this is one the stores that we want to speculate out of this BB,
4402 // then don't count it's cost, consider it to be free.
4403 if (auto *S = dyn_cast<StoreInst>(&I))
4404 if (llvm::find(FreeStores, S))
4405 continue;
4406 // Else, we have a white-list of instructions that we are ak speculating.
4408 return false; // Not in white-list - not worthwhile folding.
4409 // And finally, if this is a non-free instruction that we are okay
4410 // speculating, ensure that we consider the speculation budget.
4411 Cost +=
4412 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4413 if (Cost > Budget)
4414 return false; // Eagerly refuse to fold as soon as we're out of budget.
4415 }
4416 assert(Cost <= Budget &&
4417 "When we run out of budget we will eagerly return from within the "
4418 "per-instruction loop.");
4419 return true;
4420 };
4421
4422 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4424 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4425 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4426 return false;
4427
4428 // If PostBB has more than two predecessors, we need to split it so we can
4429 // sink the store.
4430 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4431 // We know that QFB's only successor is PostBB. And QFB has a single
4432 // predecessor. If QTB exists, then its only successor is also PostBB.
4433 // If QTB does not exist, then QFB's only predecessor has a conditional
4434 // branch to QFB and PostBB.
4435 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4436 BasicBlock *NewBB =
4437 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4438 if (!NewBB)
4439 return false;
4440 PostBB = NewBB;
4441 }
4442
4443 // OK, we're going to sink the stores to PostBB. The store has to be
4444 // conditional though, so first create the predicate.
4445 BranchInst *PBranch =
4447 BranchInst *QBranch =
4449 Value *PCond = PBranch->getCondition();
4450 Value *QCond = QBranch->getCondition();
4451
4453 PStore->getParent());
4455 QStore->getParent(), PPHI);
4456
4457 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4458 IRBuilder<> QB(PostBB, PostBBFirst);
4459 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4460
4461 InvertPCond ^= (PStore->getParent() != PTB);
4462 InvertQCond ^= (QStore->getParent() != QTB);
4463 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4464 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4465
4466 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4467
4468 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4469 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4470 /*Unreachable=*/false,
4471 /*BranchWeights=*/nullptr, DTU);
4472 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4474 SmallVector<uint32_t, 2> PWeights, QWeights;
4475 extractBranchWeights(*PBranch, PWeights);
4476 extractBranchWeights(*QBranch, QWeights);
4477 if (InvertPCond)
4478 std::swap(PWeights[0], PWeights[1]);
4479 if (InvertQCond)
4480 std::swap(QWeights[0], QWeights[1]);
4481 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4482 setBranchWeights(PostBB->getTerminator(), CombinedWeights[0],
4483 CombinedWeights[1],
4484 /*IsExpected=*/false);
4485 }
4486
4487 QB.SetInsertPoint(T);
4488 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4489 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4490 // Choose the minimum alignment. If we could prove both stores execute, we
4491 // could use biggest one. In this case, though, we only know that one of the
4492 // stores executes. And we don't know it's safe to take the alignment from a
4493 // store that doesn't execute.
4494 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4495
4496 QStore->eraseFromParent();
4497 PStore->eraseFromParent();
4498
4499 return true;
4500}
4501
4503 DomTreeUpdater *DTU, const DataLayout &DL,
4504 const TargetTransformInfo &TTI) {
4505 // The intention here is to find diamonds or triangles (see below) where each
4506 // conditional block contains a store to the same address. Both of these
4507 // stores are conditional, so they can't be unconditionally sunk. But it may
4508 // be profitable to speculatively sink the stores into one merged store at the
4509 // end, and predicate the merged store on the union of the two conditions of
4510 // PBI and QBI.
4511 //
4512 // This can reduce the number of stores executed if both of the conditions are
4513 // true, and can allow the blocks to become small enough to be if-converted.
4514 // This optimization will also chain, so that ladders of test-and-set
4515 // sequences can be if-converted away.
4516 //
4517 // We only deal with simple diamonds or triangles:
4518 //
4519 // PBI or PBI or a combination of the two
4520 // / \ | \
4521 // PTB PFB | PFB
4522 // \ / | /
4523 // QBI QBI
4524 // / \ | \
4525 // QTB QFB | QFB
4526 // \ / | /
4527 // PostBB PostBB
4528 //
4529 // We model triangles as a type of diamond with a nullptr "true" block.
4530 // Triangles are canonicalized so that the fallthrough edge is represented by
4531 // a true condition, as in the diagram above.
4532 BasicBlock *PTB = PBI->getSuccessor(0);
4533 BasicBlock *PFB = PBI->getSuccessor(1);
4534 BasicBlock *QTB = QBI->getSuccessor(0);
4535 BasicBlock *QFB = QBI->getSuccessor(1);
4536 BasicBlock *PostBB = QFB->getSingleSuccessor();
4537
4538 // Make sure we have a good guess for PostBB. If QTB's only successor is
4539 // QFB, then QFB is a better PostBB.
4540 if (QTB->getSingleSuccessor() == QFB)
4541 PostBB = QFB;
4542
4543 // If we couldn't find a good PostBB, stop.
4544 if (!PostBB)
4545 return false;
4546
4547 bool InvertPCond = false, InvertQCond = false;
4548 // Canonicalize fallthroughs to the true branches.
4549 if (PFB == QBI->getParent()) {
4550 std::swap(PFB, PTB);
4551 InvertPCond = true;
4552 }
4553 if (QFB == PostBB) {
4554 std::swap(QFB, QTB);
4555 InvertQCond = true;
4556 }
4557
4558 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4559 // and QFB may not. Model fallthroughs as a nullptr block.
4560 if (PTB == QBI->getParent())
4561 PTB = nullptr;
4562 if (QTB == PostBB)
4563 QTB = nullptr;
4564
4565 // Legality bailouts. We must have at least the non-fallthrough blocks and
4566 // the post-dominating block, and the non-fallthroughs must only have one
4567 // predecessor.
4568 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4569 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4570 };
4571 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4572 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4573 return false;
4574 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4575 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4576 return false;
4577 if (!QBI->getParent()->hasNUses(2))
4578 return false;
4579
4580 // OK, this is a sequence of two diamonds or triangles.
4581 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4582 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4583 for (auto *BB : {PTB, PFB}) {
4584 if (!BB)
4585 continue;
4586 for (auto &I : *BB)
4588 PStoreAddresses.insert(SI->getPointerOperand());
4589 }
4590 for (auto *BB : {QTB, QFB}) {
4591 if (!BB)
4592 continue;
4593 for (auto &I : *BB)
4595 QStoreAddresses.insert(SI->getPointerOperand());
4596 }
4597
4598 set_intersect(PStoreAddresses, QStoreAddresses);
4599 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4600 // clear what it contains.
4601 auto &CommonAddresses = PStoreAddresses;
4602
4603 bool Changed = false;
4604 for (auto *Address : CommonAddresses)
4605 Changed |=
4606 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4607 InvertPCond, InvertQCond, DTU, DL, TTI);
4608 return Changed;
4609}
4610
4611/// If the previous block ended with a widenable branch, determine if reusing
4612/// the target block is profitable and legal. This will have the effect of
4613/// "widening" PBI, but doesn't require us to reason about hosting safety.
4615 DomTreeUpdater *DTU) {
4616 // TODO: This can be generalized in two important ways:
4617 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4618 // values from the PBI edge.
4619 // 2) We can sink side effecting instructions into BI's fallthrough
4620 // successor provided they doesn't contribute to computation of
4621 // BI's condition.
4622 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4623 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4624 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4625 !BI->getParent()->getSinglePredecessor())
4626 return false;
4627 if (!IfFalseBB->phis().empty())
4628 return false; // TODO
4629 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4630 // may undo the transform done here.
4631 // TODO: There might be a more fine-grained solution to this.
4632 if (!llvm::succ_empty(IfFalseBB))
4633 return false;
4634 // Use lambda to lazily compute expensive condition after cheap ones.
4635 auto NoSideEffects = [](BasicBlock &BB) {
4636 return llvm::none_of(BB, [](const Instruction &I) {
4637 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4638 });
4639 };
4640 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4641 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4642 NoSideEffects(*BI->getParent())) {
4643 auto *OldSuccessor = BI->getSuccessor(1);
4644 OldSuccessor->removePredecessor(BI->getParent());
4645 BI->setSuccessor(1, IfFalseBB);
4646 if (DTU)
4647 DTU->applyUpdates(
4648 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4649 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4650 return true;
4651 }
4652 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4653 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4654 NoSideEffects(*BI->getParent())) {
4655 auto *OldSuccessor = BI->getSuccessor(0);
4656 OldSuccessor->removePredecessor(BI->getParent());
4657 BI->setSuccessor(0, IfFalseBB);
4658 if (DTU)
4659 DTU->applyUpdates(
4660 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4661 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4662 return true;
4663 }
4664 return false;
4665}
4666
4667/// If we have a conditional branch as a predecessor of another block,
4668/// this function tries to simplify it. We know
4669/// that PBI and BI are both conditional branches, and BI is in one of the
4670/// successor blocks of PBI - PBI branches to BI.
4672 DomTreeUpdater *DTU,
4673 const DataLayout &DL,
4674 const TargetTransformInfo &TTI) {
4675 assert(PBI->isConditional() && BI->isConditional());
4676 BasicBlock *BB = BI->getParent();
4677
4678 // If this block ends with a branch instruction, and if there is a
4679 // predecessor that ends on a branch of the same condition, make
4680 // this conditional branch redundant.
4681 if (PBI->getCondition() == BI->getCondition() &&
4682 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4683 // Okay, the outcome of this conditional branch is statically
4684 // knowable. If this block had a single pred, handle specially, otherwise
4685 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4686 if (BB->getSinglePredecessor()) {
4687 // Turn this into a branch on constant.
4688 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4689 BI->setCondition(
4690 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4691 return true; // Nuke the branch on constant.
4692 }
4693 }
4694
4695 // If the previous block ended with a widenable branch, determine if reusing
4696 // the target block is profitable and legal. This will have the effect of
4697 // "widening" PBI, but doesn't require us to reason about hosting safety.
4698 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4699 return true;
4700
4701 // If both branches are conditional and both contain stores to the same
4702 // address, remove the stores from the conditionals and create a conditional
4703 // merged store at the end.
4704 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4705 return true;
4706
4707 // If this is a conditional branch in an empty block, and if any
4708 // predecessors are a conditional branch to one of our destinations,
4709 // fold the conditions into logical ops and one cond br.
4710
4711 // Ignore dbg intrinsics.
4712 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4713 return false;
4714
4715 int PBIOp, BIOp;
4716 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4717 PBIOp = 0;
4718 BIOp = 0;
4719 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4720 PBIOp = 0;
4721 BIOp = 1;
4722 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4723 PBIOp = 1;
4724 BIOp = 0;
4725 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4726 PBIOp = 1;
4727 BIOp = 1;
4728 } else {
4729 return false;
4730 }
4731
4732 // Check to make sure that the other destination of this branch
4733 // isn't BB itself. If so, this is an infinite loop that will
4734 // keep getting unwound.
4735 if (PBI->getSuccessor(PBIOp) == BB)
4736 return false;
4737
4738 // If predecessor's branch probability to BB is too low don't merge branches.
4739 SmallVector<uint32_t, 2> PredWeights;
4740 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4741 extractBranchWeights(*PBI, PredWeights) &&
4742 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4743
4745 PredWeights[PBIOp],
4746 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4747
4748 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4749 if (CommonDestProb >= Likely)
4750 return false;
4751 }
4752
4753 // Do not perform this transformation if it would require
4754 // insertion of a large number of select instructions. For targets
4755 // without predication/cmovs, this is a big pessimization.
4756
4757 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4758 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4759 unsigned NumPhis = 0;
4760 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4761 ++II, ++NumPhis) {
4762 if (NumPhis > 2) // Disable this xform.
4763 return false;
4764 }
4765
4766 // Finally, if everything is ok, fold the branches to logical ops.
4767 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4768
4769 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4770 << "AND: " << *BI->getParent());
4771
4773
4774 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4775 // branch in it, where one edge (OtherDest) goes back to itself but the other
4776 // exits. We don't *know* that the program avoids the infinite loop
4777 // (even though that seems likely). If we do this xform naively, we'll end up
4778 // recursively unpeeling the loop. Since we know that (after the xform is
4779 // done) that the block *is* infinite if reached, we just make it an obviously
4780 // infinite loop with no cond branch.
4781 if (OtherDest == BB) {
4782 // Insert it at the end of the function, because it's either code,
4783 // or it won't matter if it's hot. :)
4784 BasicBlock *InfLoopBlock =
4785 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4786 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4787 if (DTU)
4788 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4789 OtherDest = InfLoopBlock;
4790 }
4791
4792 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4793
4794 // BI may have other predecessors. Because of this, we leave
4795 // it alone, but modify PBI.
4796
4797 // Make sure we get to CommonDest on True&True directions.
4798 Value *PBICond = PBI->getCondition();
4799 IRBuilder<NoFolder> Builder(PBI);
4800 if (PBIOp)
4801 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4802
4803 Value *BICond = BI->getCondition();
4804 if (BIOp)
4805 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4806
4807 // Merge the conditions.
4808 Value *Cond =
4809 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4810
4811 // Modify PBI to branch on the new condition to the new dests.
4812 PBI->setCondition(Cond);
4813 PBI->setSuccessor(0, CommonDest);
4814 PBI->setSuccessor(1, OtherDest);
4815
4816 if (DTU) {
4817 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4818 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4819
4820 DTU->applyUpdates(Updates);
4821 }
4822
4823 // Update branch weight for PBI.
4824 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4825 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4826 bool HasWeights =
4827 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4828 SuccTrueWeight, SuccFalseWeight);
4829 if (HasWeights) {
4830 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4831 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4832 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4833 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4834 // The weight to CommonDest should be PredCommon * SuccTotal +
4835 // PredOther * SuccCommon.
4836 // The weight to OtherDest should be PredOther * SuccOther.
4837 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4838 PredOther * SuccCommon,
4839 PredOther * SuccOther};
4840 // Halve the weights if any of them cannot fit in an uint32_t
4841 fitWeights(NewWeights);
4842
4843 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4844 // Cond may be a select instruction with the first operand set to "true", or
4845 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4847 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4848 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4849 // The select is predicated on PBICond
4850 assert(dyn_cast<SelectInst>(SI)->getCondition() == PBICond);
4851 // The corresponding probabilities are what was referred to above as
4852 // PredCommon and PredOther.
4853 setBranchWeights(SI, PredCommon, PredOther,
4854 /*IsExpected=*/false);
4855 }
4856 }
4857
4858 // OtherDest may have phi nodes. If so, add an entry from PBI's
4859 // block that are identical to the entries for BI's block.
4860 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4861
4862 // We know that the CommonDest already had an edge from PBI to
4863 // it. If it has PHIs though, the PHIs may have different
4864 // entries for BB and PBI's BB. If so, insert a select to make
4865 // them agree.
4866 for (PHINode &PN : CommonDest->phis()) {
4867 Value *BIV = PN.getIncomingValueForBlock(BB);
4868 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4869 Value *PBIV = PN.getIncomingValue(PBBIdx);
4870 if (BIV != PBIV) {
4871 // Insert a select in PBI to pick the right value.
4873 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4874 PN.setIncomingValue(PBBIdx, NV);
4875 // The select has the same condition as PBI, in the same BB. The
4876 // probabilities don't change.
4877 if (HasWeights) {
4878 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4879 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4880 setBranchWeights(NV, TrueWeight, FalseWeight,
4881 /*IsExpected=*/false);
4882 }
4883 }
4884 }
4885
4886 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4887 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4888
4889 // This basic block is probably dead. We know it has at least
4890 // one fewer predecessor.
4891 return true;
4892}
4893
4894// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4895// true or to FalseBB if Cond is false.
4896// Takes care of updating the successors and removing the old terminator.
4897// Also makes sure not to introduce new successors by assuming that edges to
4898// non-successor TrueBBs and FalseBBs aren't reachable.
4899bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4900 Value *Cond, BasicBlock *TrueBB,
4901 BasicBlock *FalseBB,
4902 uint32_t TrueWeight,
4903 uint32_t FalseWeight) {
4904 auto *BB = OldTerm->getParent();
4905 // Remove any superfluous successor edges from the CFG.
4906 // First, figure out which successors to preserve.
4907 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4908 // successor.
4909 BasicBlock *KeepEdge1 = TrueBB;
4910 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4911
4912 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4913
4914 // Then remove the rest.
4915 for (BasicBlock *Succ : successors(OldTerm)) {
4916 // Make sure only to keep exactly one copy of each edge.
4917 if (Succ == KeepEdge1)
4918 KeepEdge1 = nullptr;
4919 else if (Succ == KeepEdge2)
4920 KeepEdge2 = nullptr;
4921 else {
4922 Succ->removePredecessor(BB,
4923 /*KeepOneInputPHIs=*/true);
4924
4925 if (Succ != TrueBB && Succ != FalseBB)
4926 RemovedSuccessors.insert(Succ);
4927 }
4928 }
4929
4930 IRBuilder<> Builder(OldTerm);
4931 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4932
4933 // Insert an appropriate new terminator.
4934 if (!KeepEdge1 && !KeepEdge2) {
4935 if (TrueBB == FalseBB) {
4936 // We were only looking for one successor, and it was present.
4937 // Create an unconditional branch to it.
4938 Builder.CreateBr(TrueBB);
4939 } else {
4940 // We found both of the successors we were looking for.
4941 // Create a conditional branch sharing the condition of the select.
4942 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4943 if (TrueWeight != FalseWeight)
4944 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4945 }
4946 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4947 // Neither of the selected blocks were successors, so this
4948 // terminator must be unreachable.
4949 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4950 } else {
4951 // One of the selected values was a successor, but the other wasn't.
4952 // Insert an unconditional branch to the one that was found;
4953 // the edge to the one that wasn't must be unreachable.
4954 if (!KeepEdge1) {
4955 // Only TrueBB was found.
4956 Builder.CreateBr(TrueBB);
4957 } else {
4958 // Only FalseBB was found.
4959 Builder.CreateBr(FalseBB);
4960 }
4961 }
4962
4964
4965 if (DTU) {
4966 SmallVector<DominatorTree::UpdateType, 2> Updates;
4967 Updates.reserve(RemovedSuccessors.size());
4968 for (auto *RemovedSuccessor : RemovedSuccessors)
4969 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4970 DTU->applyUpdates(Updates);
4971 }
4972
4973 return true;
4974}
4975
4976// Replaces
4977// (switch (select cond, X, Y)) on constant X, Y
4978// with a branch - conditional if X and Y lead to distinct BBs,
4979// unconditional otherwise.
4980bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4981 SelectInst *Select) {
4982 // Check for constant integer values in the select.
4983 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4984 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4985 if (!TrueVal || !FalseVal)
4986 return false;
4987
4988 // Find the relevant condition and destinations.
4989 Value *Condition = Select->getCondition();
4990 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4991 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4992
4993 // Get weight for TrueBB and FalseBB.
4994 uint32_t TrueWeight = 0, FalseWeight = 0;
4995 SmallVector<uint64_t, 8> Weights;
4996 bool HasWeights = hasBranchWeightMD(*SI);
4997 if (HasWeights) {
4998 getBranchWeights(SI, Weights);
4999 if (Weights.size() == 1 + SI->getNumCases()) {
5000 TrueWeight =
5001 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
5002 FalseWeight =
5003 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
5004 }
5005 }
5006
5007 // Perform the actual simplification.
5008 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
5009 FalseWeight);
5010}
5011
5012// Replaces
5013// (indirectbr (select cond, blockaddress(@fn, BlockA),
5014// blockaddress(@fn, BlockB)))
5015// with
5016// (br cond, BlockA, BlockB).
5017bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
5018 SelectInst *SI) {
5019 // Check that both operands of the select are block addresses.
5020 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
5021 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
5022 if (!TBA || !FBA)
5023 return false;
5024
5025 // Extract the actual blocks.
5026 BasicBlock *TrueBB = TBA->getBasicBlock();
5027 BasicBlock *FalseBB = FBA->getBasicBlock();
5028
5029 // Perform the actual simplification.
5030 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
5031 0);
5032}
5033
5034/// This is called when we find an icmp instruction
5035/// (a seteq/setne with a constant) as the only instruction in a
5036/// block that ends with an uncond branch. We are looking for a very specific
5037/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5038/// this case, we merge the first two "or's of icmp" into a switch, but then the
5039/// default value goes to an uncond block with a seteq in it, we get something
5040/// like:
5041///
5042/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5043/// DEFAULT:
5044/// %tmp = icmp eq i8 %A, 92
5045/// br label %end
5046/// end:
5047/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5048///
5049/// We prefer to split the edge to 'end' so that there is a true/false entry to
5050/// the PHI, merging the third icmp into the switch.
5051bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5052 ICmpInst *ICI, IRBuilder<> &Builder) {
5053 BasicBlock *BB = ICI->getParent();
5054
5055 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5056 // complex.
5057 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5058 return false;
5059
5060 Value *V = ICI->getOperand(0);
5061 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5062
5063 // The pattern we're looking for is where our only predecessor is a switch on
5064 // 'V' and this block is the default case for the switch. In this case we can
5065 // fold the compared value into the switch to simplify things.
5066 BasicBlock *Pred = BB->getSinglePredecessor();
5067 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5068 return false;
5069
5070 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5071 if (SI->getCondition() != V)
5072 return false;
5073
5074 // If BB is reachable on a non-default case, then we simply know the value of
5075 // V in this block. Substitute it and constant fold the icmp instruction
5076 // away.
5077 if (SI->getDefaultDest() != BB) {
5078 ConstantInt *VVal = SI->findCaseDest(BB);
5079 assert(VVal && "Should have a unique destination value");
5080 ICI->setOperand(0, VVal);
5081
5082 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5083 ICI->replaceAllUsesWith(V);
5084 ICI->eraseFromParent();
5085 }
5086 // BB is now empty, so it is likely to simplify away.
5087 return requestResimplify();
5088 }
5089
5090 // Ok, the block is reachable from the default dest. If the constant we're
5091 // comparing exists in one of the other edges, then we can constant fold ICI
5092 // and zap it.
5093 if (SI->findCaseValue(Cst) != SI->case_default()) {
5094 Value *V;
5095 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5097 else
5099
5100 ICI->replaceAllUsesWith(V);
5101 ICI->eraseFromParent();
5102 // BB is now empty, so it is likely to simplify away.
5103 return requestResimplify();
5104 }
5105
5106 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5107 // the block.
5108 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5109 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5110 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5112 return false;
5113
5114 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5115 // true in the PHI.
5116 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5117 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5118
5119 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5120 std::swap(DefaultCst, NewCst);
5121
5122 // Replace ICI (which is used by the PHI for the default value) with true or
5123 // false depending on if it is EQ or NE.
5124 ICI->replaceAllUsesWith(DefaultCst);
5125 ICI->eraseFromParent();
5126
5127 SmallVector<DominatorTree::UpdateType, 2> Updates;
5128
5129 // Okay, the switch goes to this block on a default value. Add an edge from
5130 // the switch to the merge point on the compared value.
5131 BasicBlock *NewBB =
5132 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5133 {
5134 SwitchInstProfUpdateWrapper SIW(*SI);
5135 auto W0 = SIW.getSuccessorWeight(0);
5137 if (W0) {
5138 NewW = ((uint64_t(*W0) + 1) >> 1);
5139 SIW.setSuccessorWeight(0, *NewW);
5140 }
5141 SIW.addCase(Cst, NewBB, NewW);
5142 if (DTU)
5143 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5144 }
5145
5146 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5147 Builder.SetInsertPoint(NewBB);
5148 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5149 Builder.CreateBr(SuccBlock);
5150 PHIUse->addIncoming(NewCst, NewBB);
5151 if (DTU) {
5152 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5153 DTU->applyUpdates(Updates);
5154 }
5155 return true;
5156}
5157
5158/// The specified branch is a conditional branch.
5159/// Check to see if it is branching on an or/and chain of icmp instructions, and
5160/// fold it into a switch instruction if so.
5161bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5162 IRBuilder<> &Builder,
5163 const DataLayout &DL) {
5165 if (!Cond)
5166 return false;
5167
5168 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5169 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5170 // 'setne's and'ed together, collect them.
5171
5172 // Try to gather values from a chain of and/or to be turned into a switch
5173 ConstantComparesGatherer ConstantCompare(Cond, DL);
5174 // Unpack the result
5175 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5176 Value *CompVal = ConstantCompare.CompValue;
5177 unsigned UsedICmps = ConstantCompare.UsedICmps;
5178 Value *ExtraCase = ConstantCompare.Extra;
5179 bool TrueWhenEqual = ConstantCompare.IsEq;
5180
5181 // If we didn't have a multiply compared value, fail.
5182 if (!CompVal)
5183 return false;
5184
5185 // Avoid turning single icmps into a switch.
5186 if (UsedICmps <= 1)
5187 return false;
5188
5189 // There might be duplicate constants in the list, which the switch
5190 // instruction can't handle, remove them now.
5191 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5192 Values.erase(llvm::unique(Values), Values.end());
5193
5194 // If Extra was used, we require at least two switch values to do the
5195 // transformation. A switch with one value is just a conditional branch.
5196 if (ExtraCase && Values.size() < 2)
5197 return false;
5198
5199 // TODO: Preserve branch weight metadata, similarly to how
5200 // foldValueComparisonIntoPredecessors preserves it.
5201
5202 // Figure out which block is which destination.
5203 BasicBlock *DefaultBB = BI->getSuccessor(1);
5204 BasicBlock *EdgeBB = BI->getSuccessor(0);
5205 if (!TrueWhenEqual)
5206 std::swap(DefaultBB, EdgeBB);
5207
5208 BasicBlock *BB = BI->getParent();
5209
5210 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5211 << " cases into SWITCH. BB is:\n"
5212 << *BB);
5213
5214 SmallVector<DominatorTree::UpdateType, 2> Updates;
5215
5216 // If there are any extra values that couldn't be folded into the switch
5217 // then we evaluate them with an explicit branch first. Split the block
5218 // right before the condbr to handle it.
5219 if (ExtraCase) {
5220 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5221 /*MSSAU=*/nullptr, "switch.early.test");
5222
5223 // Remove the uncond branch added to the old block.
5224 Instruction *OldTI = BB->getTerminator();
5225 Builder.SetInsertPoint(OldTI);
5226
5227 // There can be an unintended UB if extra values are Poison. Before the
5228 // transformation, extra values may not be evaluated according to the
5229 // condition, and it will not raise UB. But after transformation, we are
5230 // evaluating extra values before checking the condition, and it will raise
5231 // UB. It can be solved by adding freeze instruction to extra values.
5232 AssumptionCache *AC = Options.AC;
5233
5234 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5235 ExtraCase = Builder.CreateFreeze(ExtraCase);
5236
5237 if (TrueWhenEqual)
5238 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5239 else
5240 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5241
5242 OldTI->eraseFromParent();
5243
5244 if (DTU)
5245 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5246
5247 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5248 // for the edge we just added.
5249 addPredecessorToBlock(EdgeBB, BB, NewBB);
5250
5251 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5252 << "\nEXTRABB = " << *BB);
5253 BB = NewBB;
5254 }
5255
5256 Builder.SetInsertPoint(BI);
5257 // Convert pointer to int before we switch.
5258 if (CompVal->getType()->isPointerTy()) {
5259 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5260 "Should not end up here with unstable pointers");
5261 CompVal = Builder.CreatePtrToInt(
5262 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5263 }
5264
5265 // Create the new switch instruction now.
5266 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5267
5268 // Add all of the 'cases' to the switch instruction.
5269 for (ConstantInt *Val : Values)
5270 New->addCase(Val, EdgeBB);
5271
5272 // We added edges from PI to the EdgeBB. As such, if there were any
5273 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5274 // the number of edges added.
5275 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5276 PHINode *PN = cast<PHINode>(BBI);
5277 Value *InVal = PN->getIncomingValueForBlock(BB);
5278 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5279 PN->addIncoming(InVal, BB);
5280 }
5281
5282 // Erase the old branch instruction.
5284 if (DTU)
5285 DTU->applyUpdates(Updates);
5286
5287 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5288 return true;
5289}
5290
5291bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5292 if (isa<PHINode>(RI->getValue()))
5293 return simplifyCommonResume(RI);
5294 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5295 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5296 // The resume must unwind the exception that caused control to branch here.
5297 return simplifySingleResume(RI);
5298
5299 return false;
5300}
5301
5302// Check if cleanup block is empty
5304 for (Instruction &I : R) {
5305 auto *II = dyn_cast<IntrinsicInst>(&I);
5306 if (!II)
5307 return false;
5308
5309 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5310 switch (IntrinsicID) {
5311 case Intrinsic::dbg_declare:
5312 case Intrinsic::dbg_value:
5313 case Intrinsic::dbg_label:
5314 case Intrinsic::lifetime_end:
5315 break;
5316 default:
5317 return false;
5318 }
5319 }
5320 return true;
5321}
5322
5323// Simplify resume that is shared by several landing pads (phi of landing pad).
5324bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5325 BasicBlock *BB = RI->getParent();
5326
5327 // Check that there are no other instructions except for debug and lifetime
5328 // intrinsics between the phi's and resume instruction.
5329 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5330 BB->getTerminator()->getIterator())))
5331 return false;
5332
5333 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5334 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5335
5336 // Check incoming blocks to see if any of them are trivial.
5337 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5338 Idx++) {
5339 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5340 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5341
5342 // If the block has other successors, we can not delete it because
5343 // it has other dependents.
5344 if (IncomingBB->getUniqueSuccessor() != BB)
5345 continue;
5346
5347 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5348 // Not the landing pad that caused the control to branch here.
5349 if (IncomingValue != LandingPad)
5350 continue;
5351
5353 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5354 TrivialUnwindBlocks.insert(IncomingBB);
5355 }
5356
5357 // If no trivial unwind blocks, don't do any simplifications.
5358 if (TrivialUnwindBlocks.empty())
5359 return false;
5360
5361 // Turn all invokes that unwind here into calls.
5362 for (auto *TrivialBB : TrivialUnwindBlocks) {
5363 // Blocks that will be simplified should be removed from the phi node.
5364 // Note there could be multiple edges to the resume block, and we need
5365 // to remove them all.
5366 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5367 BB->removePredecessor(TrivialBB, true);
5368
5369 for (BasicBlock *Pred :
5371 removeUnwindEdge(Pred, DTU);
5372 ++NumInvokes;
5373 }
5374
5375 // In each SimplifyCFG run, only the current processed block can be erased.
5376 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5377 // of erasing TrivialBB, we only remove the branch to the common resume
5378 // block so that we can later erase the resume block since it has no
5379 // predecessors.
5380 TrivialBB->getTerminator()->eraseFromParent();
5381 new UnreachableInst(RI->getContext(), TrivialBB);
5382 if (DTU)
5383 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5384 }
5385
5386 // Delete the resume block if all its predecessors have been removed.
5387 if (pred_empty(BB))
5388 DeleteDeadBlock(BB, DTU);
5389
5390 return !TrivialUnwindBlocks.empty();
5391}
5392
5393// Simplify resume that is only used by a single (non-phi) landing pad.
5394bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5395 BasicBlock *BB = RI->getParent();
5396 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5397 assert(RI->getValue() == LPInst &&
5398 "Resume must unwind the exception that caused control to here");
5399
5400 // Check that there are no other instructions except for debug intrinsics.
5402 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5403 return false;
5404
5405 // Turn all invokes that unwind here into calls and delete the basic block.
5406 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5407 removeUnwindEdge(Pred, DTU);
5408 ++NumInvokes;
5409 }
5410
5411 // The landingpad is now unreachable. Zap it.
5412 DeleteDeadBlock(BB, DTU);
5413 return true;
5414}
5415
5417 // If this is a trivial cleanup pad that executes no instructions, it can be
5418 // eliminated. If the cleanup pad continues to the caller, any predecessor
5419 // that is an EH pad will be updated to continue to the caller and any
5420 // predecessor that terminates with an invoke instruction will have its invoke
5421 // instruction converted to a call instruction. If the cleanup pad being
5422 // simplified does not continue to the caller, each predecessor will be
5423 // updated to continue to the unwind destination of the cleanup pad being
5424 // simplified.
5425 BasicBlock *BB = RI->getParent();
5426 CleanupPadInst *CPInst = RI->getCleanupPad();
5427 if (CPInst->getParent() != BB)
5428 // This isn't an empty cleanup.
5429 return false;
5430
5431 // We cannot kill the pad if it has multiple uses. This typically arises
5432 // from unreachable basic blocks.
5433 if (!CPInst->hasOneUse())
5434 return false;
5435
5436 // Check that there are no other instructions except for benign intrinsics.
5438 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5439 return false;
5440
5441 // If the cleanup return we are simplifying unwinds to the caller, this will
5442 // set UnwindDest to nullptr.
5443 BasicBlock *UnwindDest = RI->getUnwindDest();
5444
5445 // We're about to remove BB from the control flow. Before we do, sink any
5446 // PHINodes into the unwind destination. Doing this before changing the
5447 // control flow avoids some potentially slow checks, since we can currently
5448 // be certain that UnwindDest and BB have no common predecessors (since they
5449 // are both EH pads).
5450 if (UnwindDest) {
5451 // First, go through the PHI nodes in UnwindDest and update any nodes that
5452 // reference the block we are removing
5453 for (PHINode &DestPN : UnwindDest->phis()) {
5454 int Idx = DestPN.getBasicBlockIndex(BB);
5455 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5456 assert(Idx != -1);
5457 // This PHI node has an incoming value that corresponds to a control
5458 // path through the cleanup pad we are removing. If the incoming
5459 // value is in the cleanup pad, it must be a PHINode (because we
5460 // verified above that the block is otherwise empty). Otherwise, the
5461 // value is either a constant or a value that dominates the cleanup
5462 // pad being removed.
5463 //
5464 // Because BB and UnwindDest are both EH pads, all of their
5465 // predecessors must unwind to these blocks, and since no instruction
5466 // can have multiple unwind destinations, there will be no overlap in
5467 // incoming blocks between SrcPN and DestPN.
5468 Value *SrcVal = DestPN.getIncomingValue(Idx);
5469 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5470
5471 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5472 for (auto *Pred : predecessors(BB)) {
5473 Value *Incoming =
5474 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5475 DestPN.addIncoming(Incoming, Pred);
5476 }
5477 }
5478
5479 // Sink any remaining PHI nodes directly into UnwindDest.
5480 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5481 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5482 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5483 // If the PHI node has no uses or all of its uses are in this basic
5484 // block (meaning they are debug or lifetime intrinsics), just leave
5485 // it. It will be erased when we erase BB below.
5486 continue;
5487
5488 // Otherwise, sink this PHI node into UnwindDest.
5489 // Any predecessors to UnwindDest which are not already represented
5490 // must be back edges which inherit the value from the path through
5491 // BB. In this case, the PHI value must reference itself.
5492 for (auto *pred : predecessors(UnwindDest))
5493 if (pred != BB)
5494 PN.addIncoming(&PN, pred);
5495 PN.moveBefore(InsertPt);
5496 // Also, add a dummy incoming value for the original BB itself,
5497 // so that the PHI is well-formed until we drop said predecessor.
5498 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5499 }
5500 }
5501
5502 std::vector<DominatorTree::UpdateType> Updates;
5503
5504 // We use make_early_inc_range here because we will remove all predecessors.
5506 if (UnwindDest == nullptr) {
5507 if (DTU) {
5508 DTU->applyUpdates(Updates);
5509 Updates.clear();
5510 }
5511 removeUnwindEdge(PredBB, DTU);
5512 ++NumInvokes;
5513 } else {
5514 BB->removePredecessor(PredBB);
5515 Instruction *TI = PredBB->getTerminator();
5516 TI->replaceUsesOfWith(BB, UnwindDest);
5517 if (DTU) {
5518 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5519 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5520 }
5521 }
5522 }
5523
5524 if (DTU)
5525 DTU->applyUpdates(Updates);
5526
5527 DeleteDeadBlock(BB, DTU);
5528
5529 return true;
5530}
5531
5532// Try to merge two cleanuppads together.
5534 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5535 // with.
5536 BasicBlock *UnwindDest = RI->getUnwindDest();
5537 if (!UnwindDest)
5538 return false;
5539
5540 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5541 // be safe to merge without code duplication.
5542 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5543 return false;
5544
5545 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5546 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5547 if (!SuccessorCleanupPad)
5548 return false;
5549
5550 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5551 // Replace any uses of the successor cleanupad with the predecessor pad
5552 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5553 // funclet bundle operands.
5554 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5555 // Remove the old cleanuppad.
5556 SuccessorCleanupPad->eraseFromParent();
5557 // Now, we simply replace the cleanupret with a branch to the unwind
5558 // destination.
5559 BranchInst::Create(UnwindDest, RI->getParent());
5560 RI->eraseFromParent();
5561
5562 return true;
5563}
5564
5565bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5566 // It is possible to transiantly have an undef cleanuppad operand because we
5567 // have deleted some, but not all, dead blocks.
5568 // Eventually, this block will be deleted.
5569 if (isa<UndefValue>(RI->getOperand(0)))
5570 return false;
5571
5572 if (mergeCleanupPad(RI))
5573 return true;
5574
5575 if (removeEmptyCleanup(RI, DTU))
5576 return true;
5577
5578 return false;
5579}
5580
5581// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5582bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5583 BasicBlock *BB = UI->getParent();
5584
5585 bool Changed = false;
5586
5587 // Ensure that any debug-info records that used to occur after the Unreachable
5588 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5589 // the block.
5591
5592 // Debug-info records on the unreachable inst itself should be deleted, as
5593 // below we delete everything past the final executable instruction.
5594 UI->dropDbgRecords();
5595
5596 // If there are any instructions immediately before the unreachable that can
5597 // be removed, do so.
5598 while (UI->getIterator() != BB->begin()) {
5600 --BBI;
5601
5603 break; // Can not drop any more instructions. We're done here.
5604 // Otherwise, this instruction can be freely erased,
5605 // even if it is not side-effect free.
5606
5607 // Note that deleting EH's here is in fact okay, although it involves a bit
5608 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5609 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5610 // and we can therefore guarantee this block will be erased.
5611
5612 // If we're deleting this, we're deleting any subsequent debug info, so
5613 // delete DbgRecords.
5614 BBI->dropDbgRecords();
5615
5616 // Delete this instruction (any uses are guaranteed to be dead)
5617 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5618 BBI->eraseFromParent();
5619 Changed = true;
5620 }
5621
5622 // If the unreachable instruction is the first in the block, take a gander
5623 // at all of the predecessors of this instruction, and simplify them.
5624 if (&BB->front() != UI)
5625 return Changed;
5626
5627 std::vector<DominatorTree::UpdateType> Updates;
5628
5629 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5630 for (BasicBlock *Predecessor : Preds) {
5631 Instruction *TI = Predecessor->getTerminator();
5632 IRBuilder<> Builder(TI);
5633 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5634 // We could either have a proper unconditional branch,
5635 // or a degenerate conditional branch with matching destinations.
5636 if (all_of(BI->successors(),
5637 [BB](auto *Successor) { return Successor == BB; })) {
5638 new UnreachableInst(TI->getContext(), TI->getIterator());
5639 TI->eraseFromParent();
5640 Changed = true;
5641 } else {
5642 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5643 Value* Cond = BI->getCondition();
5644 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5645 "The destinations are guaranteed to be different here.");
5646 CallInst *Assumption;
5647 if (BI->getSuccessor(0) == BB) {
5648 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5649 Builder.CreateBr(BI->getSuccessor(1));
5650 } else {
5651 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5652 Assumption = Builder.CreateAssumption(Cond);
5653 Builder.CreateBr(BI->getSuccessor(0));
5654 }
5655 if (Options.AC)
5656 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5657
5659 Changed = true;
5660 }
5661 if (DTU)
5662 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5663 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5664 SwitchInstProfUpdateWrapper SU(*SI);
5665 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5666 if (i->getCaseSuccessor() != BB) {
5667 ++i;
5668 continue;
5669 }
5670 BB->removePredecessor(SU->getParent());
5671 i = SU.removeCase(i);
5672 e = SU->case_end();
5673 Changed = true;
5674 }
5675 // Note that the default destination can't be removed!
5676 if (DTU && SI->getDefaultDest() != BB)
5677 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5678 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5679 if (II->getUnwindDest() == BB) {
5680 if (DTU) {
5681 DTU->applyUpdates(Updates);
5682 Updates.clear();
5683 }
5684 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5685 if (!CI->doesNotThrow())
5686 CI->setDoesNotThrow();
5687 Changed = true;
5688 }
5689 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5690 if (CSI->getUnwindDest() == BB) {
5691 if (DTU) {
5692 DTU->applyUpdates(Updates);
5693 Updates.clear();
5694 }
5695 removeUnwindEdge(TI->getParent(), DTU);
5696 Changed = true;
5697 continue;
5698 }
5699
5700 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5701 E = CSI->handler_end();
5702 I != E; ++I) {
5703 if (*I == BB) {
5704 CSI->removeHandler(I);
5705 --I;
5706 --E;
5707 Changed = true;
5708 }
5709 }
5710 if (DTU)
5711 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5712 if (CSI->getNumHandlers() == 0) {
5713 if (CSI->hasUnwindDest()) {
5714 // Redirect all predecessors of the block containing CatchSwitchInst
5715 // to instead branch to the CatchSwitchInst's unwind destination.
5716 if (DTU) {
5717 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5718 Updates.push_back({DominatorTree::Insert,
5719 PredecessorOfPredecessor,
5720 CSI->getUnwindDest()});
5721 Updates.push_back({DominatorTree::Delete,
5722 PredecessorOfPredecessor, Predecessor});
5723 }
5724 }
5725 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5726 } else {
5727 // Rewrite all preds to unwind to caller (or from invoke to call).
5728 if (DTU) {
5729 DTU->applyUpdates(Updates);
5730 Updates.clear();
5731 }
5732 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5733 for (BasicBlock *EHPred : EHPreds)
5734 removeUnwindEdge(EHPred, DTU);
5735 }
5736 // The catchswitch is no longer reachable.
5737 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5738 CSI->eraseFromParent();
5739 Changed = true;
5740 }
5741 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5742 (void)CRI;
5743 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5744 "Expected to always have an unwind to BB.");
5745 if (DTU)
5746 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5747 new UnreachableInst(TI->getContext(), TI->getIterator());
5748 TI->eraseFromParent();
5749 Changed = true;
5750 }
5751 }
5752
5753 if (DTU)
5754 DTU->applyUpdates(Updates);
5755
5756 // If this block is now dead, remove it.
5757 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5758 DeleteDeadBlock(BB, DTU);
5759 return true;
5760 }
5761
5762 return Changed;
5763}
5764
5766 assert(Cases.size() >= 1);
5767
5769 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5770 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5771 return false;
5772 }
5773 return true;
5774}
5775
5777 DomTreeUpdater *DTU,
5778 bool RemoveOrigDefaultBlock = true) {
5779 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5780 auto *BB = Switch->getParent();
5781 auto *OrigDefaultBlock = Switch->getDefaultDest();
5782 if (RemoveOrigDefaultBlock)
5783 OrigDefaultBlock->removePredecessor(BB);
5784 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5785 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5786 OrigDefaultBlock);
5787 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5789 Switch->setDefaultDest(&*NewDefaultBlock);
5790 if (DTU) {
5792 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5793 if (RemoveOrigDefaultBlock &&
5794 !is_contained(successors(BB), OrigDefaultBlock))
5795 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5796 DTU->applyUpdates(Updates);
5797 }
5798}
5799
5800/// Turn a switch into an integer range comparison and branch.
5801/// Switches with more than 2 destinations are ignored.
5802/// Switches with 1 destination are also ignored.
5803bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5804 IRBuilder<> &Builder) {
5805 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5806
5807 bool HasDefault = !SI->defaultDestUnreachable();
5808
5809 auto *BB = SI->getParent();
5810
5811 // Partition the cases into two sets with different destinations.
5812 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5813 BasicBlock *DestB = nullptr;
5816
5817 for (auto Case : SI->cases()) {
5818 BasicBlock *Dest = Case.getCaseSuccessor();
5819 if (!DestA)
5820 DestA = Dest;
5821 if (Dest == DestA) {
5822 CasesA.push_back(Case.getCaseValue());
5823 continue;
5824 }
5825 if (!DestB)
5826 DestB = Dest;
5827 if (Dest == DestB) {
5828 CasesB.push_back(Case.getCaseValue());
5829 continue;
5830 }
5831 return false; // More than two destinations.
5832 }
5833 if (!DestB)
5834 return false; // All destinations are the same and the default is unreachable
5835
5836 assert(DestA && DestB &&
5837 "Single-destination switch should have been folded.");
5838 assert(DestA != DestB);
5839 assert(DestB != SI->getDefaultDest());
5840 assert(!CasesB.empty() && "There must be non-default cases.");
5841 assert(!CasesA.empty() || HasDefault);
5842
5843 // Figure out if one of the sets of cases form a contiguous range.
5844 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5845 BasicBlock *ContiguousDest = nullptr;
5846 BasicBlock *OtherDest = nullptr;
5847 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5848 ContiguousCases = &CasesA;
5849 ContiguousDest = DestA;
5850 OtherDest = DestB;
5851 } else if (casesAreContiguous(CasesB)) {
5852 ContiguousCases = &CasesB;
5853 ContiguousDest = DestB;
5854 OtherDest = DestA;
5855 } else
5856 return false;
5857
5858 // Start building the compare and branch.
5859
5860 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5861 Constant *NumCases =
5862 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5863
5864 Value *Sub = SI->getCondition();
5865 if (!Offset->isNullValue())
5866 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5867
5868 Value *Cmp;
5869 // If NumCases overflowed, then all possible values jump to the successor.
5870 if (NumCases->isNullValue() && !ContiguousCases->empty())
5871 Cmp = ConstantInt::getTrue(SI->getContext());
5872 else
5873 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5874 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5875
5876 // Update weight for the newly-created conditional branch.
5877 if (hasBranchWeightMD(*SI)) {
5878 SmallVector<uint64_t, 8> Weights;
5879 getBranchWeights(SI, Weights);
5880 if (Weights.size() == 1 + SI->getNumCases()) {
5881 uint64_t TrueWeight = 0;
5882 uint64_t FalseWeight = 0;
5883 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5884 if (SI->getSuccessor(I) == ContiguousDest)
5885 TrueWeight += Weights[I];
5886 else
5887 FalseWeight += Weights[I];
5888 }
5889 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5890 TrueWeight /= 2;
5891 FalseWeight /= 2;
5892 }
5893 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5894 }
5895 }
5896
5897 // Prune obsolete incoming values off the successors' PHI nodes.
5898 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5899 unsigned PreviousEdges = ContiguousCases->size();
5900 if (ContiguousDest == SI->getDefaultDest())
5901 ++PreviousEdges;
5902 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5903 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5904 }
5905 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5906 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5907 if (OtherDest == SI->getDefaultDest())
5908 ++PreviousEdges;
5909 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5910 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5911 }
5912
5913 // Clean up the default block - it may have phis or other instructions before
5914 // the unreachable terminator.
5915 if (!HasDefault)
5917
5918 auto *UnreachableDefault = SI->getDefaultDest();
5919
5920 // Drop the switch.
5921 SI->eraseFromParent();
5922
5923 if (!HasDefault && DTU)
5924 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5925
5926 return true;
5927}
5928
5929/// Compute masked bits for the condition of a switch
5930/// and use it to remove dead cases.
5932 AssumptionCache *AC,
5933 const DataLayout &DL) {
5934 Value *Cond = SI->getCondition();
5935 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
5936
5937 // We can also eliminate cases by determining that their values are outside of
5938 // the limited range of the condition based on how many significant (non-sign)
5939 // bits are in the condition value.
5940 unsigned MaxSignificantBitsInCond =
5942
5943 // Gather dead cases.
5945 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5946 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5947 for (const auto &Case : SI->cases()) {
5948 auto *Successor = Case.getCaseSuccessor();
5949 if (DTU) {
5950 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
5951 if (Inserted)
5952 UniqueSuccessors.push_back(Successor);
5953 ++It->second;
5954 }
5955 const APInt &CaseVal = Case.getCaseValue()->getValue();
5956 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5957 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5958 DeadCases.push_back(Case.getCaseValue());
5959 if (DTU)
5960 --NumPerSuccessorCases[Successor];
5961 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5962 << " is dead.\n");
5963 }
5964 }
5965
5966 // If we can prove that the cases must cover all possible values, the
5967 // default destination becomes dead and we can remove it. If we know some
5968 // of the bits in the value, we can use that to more precisely compute the
5969 // number of possible unique case values.
5970 bool HasDefault = !SI->defaultDestUnreachable();
5971 const unsigned NumUnknownBits =
5972 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5973 assert(NumUnknownBits <= Known.getBitWidth());
5974 if (HasDefault && DeadCases.empty() &&
5975 NumUnknownBits < 64 /* avoid overflow */) {
5976 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5977 if (SI->getNumCases() == AllNumCases) {
5979 return true;
5980 }
5981 // When only one case value is missing, replace default with that case.
5982 // Eliminating the default branch will provide more opportunities for
5983 // optimization, such as lookup tables.
5984 if (SI->getNumCases() == AllNumCases - 1) {
5985 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5986 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5987 if (CondTy->getIntegerBitWidth() > 64 ||
5988 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5989 return false;
5990
5991 uint64_t MissingCaseVal = 0;
5992 for (const auto &Case : SI->cases())
5993 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5994 auto *MissingCase =
5995 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5997 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5998 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5999 SIW.setSuccessorWeight(0, 0);
6000 return true;
6001 }
6002 }
6003
6004 if (DeadCases.empty())
6005 return false;
6006
6008 for (ConstantInt *DeadCase : DeadCases) {
6009 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6010 assert(CaseI != SI->case_default() &&
6011 "Case was not found. Probably mistake in DeadCases forming.");
6012 // Prune unused values from PHI nodes.
6013 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6014 SIW.removeCase(CaseI);
6015 }
6016
6017 if (DTU) {
6018 std::vector<DominatorTree::UpdateType> Updates;
6019 for (auto *Successor : UniqueSuccessors)
6020 if (NumPerSuccessorCases[Successor] == 0)
6021 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6022 DTU->applyUpdates(Updates);
6023 }
6024
6025 return true;
6026}
6027
6028/// If BB would be eligible for simplification by
6029/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6030/// by an unconditional branch), look at the phi node for BB in the successor
6031/// block and see if the incoming value is equal to CaseValue. If so, return
6032/// the phi node, and set PhiIndex to BB's index in the phi node.
6034 BasicBlock *BB, int *PhiIndex) {
6035 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6036 return nullptr; // BB must be empty to be a candidate for simplification.
6037 if (!BB->getSinglePredecessor())
6038 return nullptr; // BB must be dominated by the switch.
6039
6041 if (!Branch || !Branch->isUnconditional())
6042 return nullptr; // Terminator must be unconditional branch.
6043
6044 BasicBlock *Succ = Branch->getSuccessor(0);
6045
6046 for (PHINode &PHI : Succ->phis()) {
6047 int Idx = PHI.getBasicBlockIndex(BB);
6048 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6049
6050 Value *InValue = PHI.getIncomingValue(Idx);
6051 if (InValue != CaseValue)
6052 continue;
6053
6054 *PhiIndex = Idx;
6055 return &PHI;
6056 }
6057
6058 return nullptr;
6059}
6060
6061/// Try to forward the condition of a switch instruction to a phi node
6062/// dominated by the switch, if that would mean that some of the destination
6063/// blocks of the switch can be folded away. Return true if a change is made.
6065 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6066
6067 ForwardingNodesMap ForwardingNodes;
6068 BasicBlock *SwitchBlock = SI->getParent();
6069 bool Changed = false;
6070 for (const auto &Case : SI->cases()) {
6071 ConstantInt *CaseValue = Case.getCaseValue();
6072 BasicBlock *CaseDest = Case.getCaseSuccessor();
6073
6074 // Replace phi operands in successor blocks that are using the constant case
6075 // value rather than the switch condition variable:
6076 // switchbb:
6077 // switch i32 %x, label %default [
6078 // i32 17, label %succ
6079 // ...
6080 // succ:
6081 // %r = phi i32 ... [ 17, %switchbb ] ...
6082 // -->
6083 // %r = phi i32 ... [ %x, %switchbb ] ...
6084
6085 for (PHINode &Phi : CaseDest->phis()) {
6086 // This only works if there is exactly 1 incoming edge from the switch to
6087 // a phi. If there is >1, that means multiple cases of the switch map to 1
6088 // value in the phi, and that phi value is not the switch condition. Thus,
6089 // this transform would not make sense (the phi would be invalid because
6090 // a phi can't have different incoming values from the same block).
6091 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6092 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6093 count(Phi.blocks(), SwitchBlock) == 1) {
6094 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6095 Changed = true;
6096 }
6097 }
6098
6099 // Collect phi nodes that are indirectly using this switch's case constants.
6100 int PhiIdx;
6101 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6102 ForwardingNodes[Phi].push_back(PhiIdx);
6103 }
6104
6105 for (auto &ForwardingNode : ForwardingNodes) {
6106 PHINode *Phi = ForwardingNode.first;
6107 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6108 // Check if it helps to fold PHI.
6109 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6110 continue;
6111
6112 for (int Index : Indexes)
6113 Phi->setIncomingValue(Index, SI->getCondition());
6114 Changed = true;
6115 }
6116
6117 return Changed;
6118}
6119
6120/// Return true if the backend will be able to handle
6121/// initializing an array of constants like C.
6123 if (C->isThreadDependent())
6124 return false;
6125 if (C->isDLLImportDependent())
6126 return false;
6127
6128 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6131 return false;
6132
6134 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6135 // materializing the array of constants.
6136 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6137 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6138 return false;
6139 }
6140
6141 if (!TTI.shouldBuildLookupTablesForConstant(C))
6142 return false;
6143
6144 return true;
6145}
6146
6147/// If V is a Constant, return it. Otherwise, try to look up
6148/// its constant value in ConstantPool, returning 0 if it's not there.
6149static Constant *
6152 if (Constant *C = dyn_cast<Constant>(V))
6153 return C;
6154 return ConstantPool.lookup(V);
6155}
6156
6157/// Try to fold instruction I into a constant. This works for
6158/// simple instructions such as binary operations where both operands are
6159/// constant or can be replaced by constants from the ConstantPool. Returns the
6160/// resulting constant on success, 0 otherwise.
6161static Constant *
6165 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6166 if (!A)
6167 return nullptr;
6168 if (A->isAllOnesValue())
6169 return lookupConstant(Select->getTrueValue(), ConstantPool);
6170 if (A->isNullValue())
6171 return lookupConstant(Select->getFalseValue(), ConstantPool);
6172 return nullptr;
6173 }
6174
6176 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6177 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6178 COps.push_back(A);
6179 else
6180 return nullptr;
6181 }
6182
6183 return ConstantFoldInstOperands(I, COps, DL);
6184}
6185
6186/// Try to determine the resulting constant values in phi nodes
6187/// at the common destination basic block, *CommonDest, for one of the case
6188/// destionations CaseDest corresponding to value CaseVal (0 for the default
6189/// case), of a switch instruction SI.
6190static bool
6192 BasicBlock **CommonDest,
6193 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6194 const DataLayout &DL, const TargetTransformInfo &TTI) {
6195 // The block from which we enter the common destination.
6196 BasicBlock *Pred = SI->getParent();
6197
6198 // If CaseDest is empty except for some side-effect free instructions through
6199 // which we can constant-propagate the CaseVal, continue to its successor.
6201 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6202 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6203 if (I.isTerminator()) {
6204 // If the terminator is a simple branch, continue to the next block.
6205 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6206 return false;
6207 Pred = CaseDest;
6208 CaseDest = I.getSuccessor(0);
6209 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6210 // Instruction is side-effect free and constant.
6211
6212 // If the instruction has uses outside this block or a phi node slot for
6213 // the block, it is not safe to bypass the instruction since it would then
6214 // no longer dominate all its uses.
6215 for (auto &Use : I.uses()) {
6216 User *User = Use.getUser();
6218 if (I->getParent() == CaseDest)
6219 continue;
6220 if (PHINode *Phi = dyn_cast<PHINode>(User))
6221 if (Phi->getIncomingBlock(Use) == CaseDest)
6222 continue;
6223 return false;
6224 }
6225
6226 ConstantPool.insert(std::make_pair(&I, C));
6227 } else {
6228 break;
6229 }
6230 }
6231
6232 // If we did not have a CommonDest before, use the current one.
6233 if (!*CommonDest)
6234 *CommonDest = CaseDest;
6235 // If the destination isn't the common one, abort.
6236 if (CaseDest != *CommonDest)
6237 return false;
6238
6239 // Get the values for this case from phi nodes in the destination block.
6240 for (PHINode &PHI : (*CommonDest)->phis()) {
6241 int Idx = PHI.getBasicBlockIndex(Pred);
6242 if (Idx == -1)
6243 continue;
6244
6245 Constant *ConstVal =
6246 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6247 if (!ConstVal)
6248 return false;
6249
6250 // Be conservative about which kinds of constants we support.
6251 if (!validLookupTableConstant(ConstVal, TTI))
6252 return false;
6253
6254 Res.push_back(std::make_pair(&PHI, ConstVal));
6255 }
6256
6257 return Res.size() > 0;
6258}
6259
6260// Helper function used to add CaseVal to the list of cases that generate
6261// Result. Returns the updated number of cases that generate this result.
6262static size_t mapCaseToResult(ConstantInt *CaseVal,
6263 SwitchCaseResultVectorTy &UniqueResults,
6264 Constant *Result) {
6265 for (auto &I : UniqueResults) {
6266 if (I.first == Result) {
6267 I.second.push_back(CaseVal);
6268 return I.second.size();
6269 }
6270 }
6271 UniqueResults.push_back(
6272 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6273 return 1;
6274}
6275
6276// Helper function that initializes a map containing
6277// results for the PHI node of the common destination block for a switch
6278// instruction. Returns false if multiple PHI nodes have been found or if
6279// there is not a common destination block for the switch.
6281 BasicBlock *&CommonDest,
6282 SwitchCaseResultVectorTy &UniqueResults,
6283 Constant *&DefaultResult,
6284 const DataLayout &DL,
6285 const TargetTransformInfo &TTI,
6286 uintptr_t MaxUniqueResults) {
6287 for (const auto &I : SI->cases()) {
6288 ConstantInt *CaseVal = I.getCaseValue();
6289
6290 // Resulting value at phi nodes for this case value.
6291 SwitchCaseResultsTy Results;
6292 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6293 DL, TTI))
6294 return false;
6295
6296 // Only one value per case is permitted.
6297 if (Results.size() > 1)
6298 return false;
6299
6300 // Add the case->result mapping to UniqueResults.
6301 const size_t NumCasesForResult =
6302 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6303
6304 // Early out if there are too many cases for this result.
6305 if (NumCasesForResult > MaxSwitchCasesPerResult)
6306 return false;
6307
6308 // Early out if there are too many unique results.
6309 if (UniqueResults.size() > MaxUniqueResults)
6310 return false;
6311
6312 // Check the PHI consistency.
6313 if (!PHI)
6314 PHI = Results[0].first;
6315 else if (PHI != Results[0].first)
6316 return false;
6317 }
6318 // Find the default result value.
6320 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6321 DL, TTI);
6322 // If the default value is not found abort unless the default destination
6323 // is unreachable.
6324 DefaultResult =
6325 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6326
6327 return DefaultResult || SI->defaultDestUnreachable();
6328}
6329
6330// Helper function that checks if it is possible to transform a switch with only
6331// two cases (or two cases + default) that produces a result into a select.
6332// TODO: Handle switches with more than 2 cases that map to the same result.
6333// The branch weights correspond to the provided Condition (i.e. if Condition is
6334// modified from the original SwitchInst, the caller must adjust the weights)
6335static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6336 Constant *DefaultResult, Value *Condition,
6337 IRBuilder<> &Builder, const DataLayout &DL,
6338 ArrayRef<uint32_t> BranchWeights) {
6339 // If we are selecting between only two cases transform into a simple
6340 // select or a two-way select if default is possible.
6341 // Example:
6342 // switch (a) { %0 = icmp eq i32 %a, 10
6343 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6344 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6345 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6346 // }
6347
6348 const bool HasBranchWeights =
6349 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6350
6351 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6352 ResultVector[1].second.size() == 1) {
6353 ConstantInt *FirstCase = ResultVector[0].second[0];
6354 ConstantInt *SecondCase = ResultVector[1].second[0];
6355 Value *SelectValue = ResultVector[1].first;
6356 if (DefaultResult) {
6357 Value *ValueCompare =
6358 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6359 SelectInst *SelectValueInst = cast<SelectInst>(Builder.CreateSelect(
6360 ValueCompare, ResultVector[1].first, DefaultResult, "switch.select"));
6361 SelectValue = SelectValueInst;
6362 if (HasBranchWeights) {
6363 // We start with 3 probabilities, where the numerator is the
6364 // corresponding BranchWeights[i], and the denominator is the sum over
6365 // BranchWeights. We want the probability and negative probability of
6366 // Condition == SecondCase.
6367 assert(BranchWeights.size() == 3);
6368 setBranchWeights(SelectValueInst, BranchWeights[2],
6369 BranchWeights[0] + BranchWeights[1],
6370 /*IsExpected=*/false);
6371 }
6372 }
6373 Value *ValueCompare =
6374 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6375 SelectInst *Ret = cast<SelectInst>(Builder.CreateSelect(
6376 ValueCompare, ResultVector[0].first, SelectValue, "switch.select"));
6377 if (HasBranchWeights) {
6378 // We may have had a DefaultResult. Base the position of the first and
6379 // second's branch weights accordingly. Also the proability that Condition
6380 // != FirstCase needs to take that into account.
6381 assert(BranchWeights.size() >= 2);
6382 size_t FirstCasePos = (Condition != nullptr);
6383 size_t SecondCasePos = FirstCasePos + 1;
6384 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6385 setBranchWeights(Ret, BranchWeights[FirstCasePos],
6386 DefaultCase + BranchWeights[SecondCasePos],
6387 /*IsExpected=*/false);
6388 }
6389 return Ret;
6390 }
6391
6392 // Handle the degenerate case where two cases have the same result value.
6393 if (ResultVector.size() == 1 && DefaultResult) {
6394 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6395 unsigned CaseCount = CaseValues.size();
6396 // n bits group cases map to the same result:
6397 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6398 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6399 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6400 if (isPowerOf2_32(CaseCount)) {
6401 ConstantInt *MinCaseVal = CaseValues[0];
6402 // If there are bits that are set exclusively by CaseValues, we
6403 // can transform the switch into a select if the conjunction of
6404 // all the values uniquely identify CaseValues.
6405 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6406
6407 // Find the minimum value and compute the and of all the case values.
6408 for (auto *Case : CaseValues) {
6409 if (Case->getValue().slt(MinCaseVal->getValue()))
6410 MinCaseVal = Case;
6411 AndMask &= Case->getValue();
6412 }
6413 KnownBits Known = computeKnownBits(Condition, DL);
6414
6415 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6416 // Compute the number of bits that are free to vary.
6417 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6418
6419 // Check if the number of values covered by the mask is equal
6420 // to the number of cases.
6421 if (FreeBits == Log2_32(CaseCount)) {
6422 Value *And = Builder.CreateAnd(Condition, AndMask);
6423 Value *Cmp = Builder.CreateICmpEQ(
6424 And, Constant::getIntegerValue(And->getType(), AndMask));
6426 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult));
6427 if (HasBranchWeights) {
6428 // We know there's a Default case. We base the resulting branch
6429 // weights off its probability.
6430 assert(BranchWeights.size() >= 2);
6431 setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0),
6432 BranchWeights[0], /*IsExpected=*/false);
6433 }
6434 return Ret;
6435 }
6436 }
6437
6438 // Mark the bits case number touched.
6439 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6440 for (auto *Case : CaseValues)
6441 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6442
6443 // Check if cases with the same result can cover all number
6444 // in touched bits.
6445 if (BitMask.popcount() == Log2_32(CaseCount)) {
6446 if (!MinCaseVal->isNullValue())
6447 Condition = Builder.CreateSub(Condition, MinCaseVal);
6448 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6449 Value *Cmp = Builder.CreateICmpEQ(
6450 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6452 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult));
6453 if (HasBranchWeights) {
6454 assert(BranchWeights.size() >= 2);
6455 setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0),
6456 BranchWeights[0], /*IsExpected=*/false);
6457 }
6458 return Ret;
6459 }
6460 }
6461
6462 // Handle the degenerate case where two cases have the same value.
6463 if (CaseValues.size() == 2) {
6464 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6465 "switch.selectcmp.case1");
6466 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6467 "switch.selectcmp.case2");
6468 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6470 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult));
6471 if (HasBranchWeights) {
6472 assert(BranchWeights.size() >= 2);
6473 setBranchWeights(Ret, accumulate(drop_begin(BranchWeights), 0),
6474 BranchWeights[0], /*IsExpected=*/false);
6475 }
6476 return Ret;
6477 }
6478 }
6479
6480 return nullptr;
6481}
6482
6483// Helper function to cleanup a switch instruction that has been converted into
6484// a select, fixing up PHI nodes and basic blocks.
6486 Value *SelectValue,
6487 IRBuilder<> &Builder,
6488 DomTreeUpdater *DTU) {
6489 std::vector<DominatorTree::UpdateType> Updates;
6490
6491 BasicBlock *SelectBB = SI->getParent();
6492 BasicBlock *DestBB = PHI->getParent();
6493
6494 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6495 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6496 Builder.CreateBr(DestBB);
6497
6498 // Remove the switch.
6499
6500 PHI->removeIncomingValueIf(
6501 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6502 PHI->addIncoming(SelectValue, SelectBB);
6503
6504 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6505 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6506 BasicBlock *Succ = SI->getSuccessor(i);
6507
6508 if (Succ == DestBB)
6509 continue;
6510 Succ->removePredecessor(SelectBB);
6511 if (DTU && RemovedSuccessors.insert(Succ).second)
6512 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6513 }
6514 SI->eraseFromParent();
6515 if (DTU)
6516 DTU->applyUpdates(Updates);
6517}
6518
6519/// If a switch is only used to initialize one or more phi nodes in a common
6520/// successor block with only two different constant values, try to replace the
6521/// switch with a select. Returns true if the fold was made.
6523 DomTreeUpdater *DTU, const DataLayout &DL,
6524 const TargetTransformInfo &TTI) {
6525 Value *const Cond = SI->getCondition();
6526 PHINode *PHI = nullptr;
6527 BasicBlock *CommonDest = nullptr;
6528 Constant *DefaultResult;
6529 SwitchCaseResultVectorTy UniqueResults;
6530 // Collect all the cases that will deliver the same value from the switch.
6531 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6532 DL, TTI, /*MaxUniqueResults*/ 2))
6533 return false;
6534
6535 assert(PHI != nullptr && "PHI for value select not found");
6536 Builder.SetInsertPoint(SI);
6537 SmallVector<uint32_t, 4> BranchWeights;
6539 [[maybe_unused]] auto HasWeights =
6541 assert(!HasWeights == (BranchWeights.empty()));
6542 }
6543 assert(BranchWeights.empty() ||
6544 (BranchWeights.size() >=
6545 UniqueResults.size() + (DefaultResult != nullptr)));
6546
6547 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6548 Builder, DL, BranchWeights);
6549 if (!SelectValue)
6550 return false;
6551
6552 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6553 return true;
6554}
6555
6556namespace {
6557
6558/// This class finds alternatives for switches to ultimately
6559/// replace the switch.
6560class SwitchReplacement {
6561public:
6562 /// Create a helper for optimizations to use as a switch replacement.
6563 /// Find a better representation for the content of Values,
6564 /// using DefaultValue to fill any holes in the table.
6565 SwitchReplacement(
6566 Module &M, uint64_t TableSize, ConstantInt *Offset,
6567 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6568 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6569
6570 /// Build instructions with Builder to retrieve values using Index
6571 /// and replace the switch.
6572 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6573 Function *Func);
6574
6575 /// Return true if a table with TableSize elements of
6576 /// type ElementType would fit in a target-legal register.
6577 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6578 Type *ElementType);
6579
6580 /// Return the default value of the switch.
6581 Constant *getDefaultValue();
6582
6583 /// Return true if the replacement is a lookup table.
6584 bool isLookupTable();
6585
6586private:
6587 // Depending on the switch, there are different alternatives.
6588 enum {
6589 // For switches where each case contains the same value, we just have to
6590 // store that single value and return it for each lookup.
6591 SingleValueKind,
6592
6593 // For switches where there is a linear relationship between table index
6594 // and values. We calculate the result with a simple multiplication
6595 // and addition instead of a table lookup.
6596 LinearMapKind,
6597
6598 // For small tables with integer elements, we can pack them into a bitmap
6599 // that fits into a target-legal register. Values are retrieved by
6600 // shift and mask operations.
6601 BitMapKind,
6602
6603 // The table is stored as an array of values. Values are retrieved by load
6604 // instructions from the table.
6605 LookupTableKind
6606 } Kind;
6607
6608 // The default value of the switch.
6609 Constant *DefaultValue;
6610
6611 // The type of the output values.
6612 Type *ValueType;
6613
6614 // For SingleValueKind, this is the single value.
6615 Constant *SingleValue = nullptr;
6616
6617 // For BitMapKind, this is the bitmap.
6618 ConstantInt *BitMap = nullptr;
6619 IntegerType *BitMapElementTy = nullptr;
6620
6621 // For LinearMapKind, these are the constants used to derive the value.
6622 ConstantInt *LinearOffset = nullptr;
6623 ConstantInt *LinearMultiplier = nullptr;
6624 bool LinearMapValWrapped = false;
6625
6626 // For LookupTableKind, this is the table.
6627 Constant *Initializer = nullptr;
6628};
6629
6630} // end anonymous namespace
6631
6632SwitchReplacement::SwitchReplacement(
6633 Module &M, uint64_t TableSize, ConstantInt *Offset,
6634 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6635 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6636 : DefaultValue(DefaultValue) {
6637 assert(Values.size() && "Can't build lookup table without values!");
6638 assert(TableSize >= Values.size() && "Can't fit values in table!");
6639
6640 // If all values in the table are equal, this is that value.
6641 SingleValue = Values.begin()->second;
6642
6643 ValueType = Values.begin()->second->getType();
6644
6645 // Build up the table contents.
6646 SmallVector<Constant *, 64> TableContents(TableSize);
6647 for (const auto &[CaseVal, CaseRes] : Values) {
6648 assert(CaseRes->getType() == ValueType);
6649
6650 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6651 TableContents[Idx] = CaseRes;
6652
6653 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6654 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6655 }
6656
6657 // Fill in any holes in the table with the default result.
6658 if (Values.size() < TableSize) {
6659 assert(DefaultValue &&
6660 "Need a default value to fill the lookup table holes.");
6661 assert(DefaultValue->getType() == ValueType);
6662 for (uint64_t I = 0; I < TableSize; ++I) {
6663 if (!TableContents[I])
6664 TableContents[I] = DefaultValue;
6665 }
6666
6667 // If the default value is poison, all the holes are poison.
6668 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6669
6670 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6671 SingleValue = nullptr;
6672 }
6673
6674 // If each element in the table contains the same value, we only need to store
6675 // that single value.
6676 if (SingleValue) {
6677 Kind = SingleValueKind;
6678 return;
6679 }
6680
6681 // Check if we can derive the value with a linear transformation from the
6682 // table index.
6684 bool LinearMappingPossible = true;
6685 APInt PrevVal;
6686 APInt DistToPrev;
6687 // When linear map is monotonic and signed overflow doesn't happen on
6688 // maximum index, we can attach nsw on Add and Mul.
6689 bool NonMonotonic = false;
6690 assert(TableSize >= 2 && "Should be a SingleValue table.");
6691 // Check if there is the same distance between two consecutive values.
6692 for (uint64_t I = 0; I < TableSize; ++I) {
6693 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6694
6695 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6696 // This is an poison, so it's (probably) a lookup table hole.
6697 // To prevent any regressions from before we switched to using poison as
6698 // the default value, holes will fall back to using the first value.
6699 // This can be removed once we add proper handling for poisons in lookup
6700 // tables.
6701 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6702 }
6703
6704 if (!ConstVal) {
6705 // This is an undef. We could deal with it, but undefs in lookup tables
6706 // are very seldom. It's probably not worth the additional complexity.
6707 LinearMappingPossible = false;
6708 break;
6709 }
6710 const APInt &Val = ConstVal->getValue();
6711 if (I != 0) {
6712 APInt Dist = Val - PrevVal;
6713 if (I == 1) {
6714 DistToPrev = Dist;
6715 } else if (Dist != DistToPrev) {
6716 LinearMappingPossible = false;
6717 break;
6718 }
6719 NonMonotonic |=
6720 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6721 }
6722 PrevVal = Val;
6723 }
6724 if (LinearMappingPossible) {
6725 LinearOffset = cast<ConstantInt>(TableContents[0]);
6726 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6727 APInt M = LinearMultiplier->getValue();
6728 bool MayWrap = true;
6729 if (isIntN(M.getBitWidth(), TableSize - 1))
6730 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6731 LinearMapValWrapped = NonMonotonic || MayWrap;
6732 Kind = LinearMapKind;
6733 return;
6734 }
6735 }
6736
6737 // If the type is integer and the table fits in a register, build a bitmap.
6738 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6740 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6741 for (uint64_t I = TableSize; I > 0; --I) {
6742 TableInt <<= IT->getBitWidth();
6743 // Insert values into the bitmap. Undef values are set to zero.
6744 if (!isa<UndefValue>(TableContents[I - 1])) {
6745 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6746 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6747 }
6748 }
6749 BitMap = ConstantInt::get(M.getContext(), TableInt);
6750 BitMapElementTy = IT;
6751 Kind = BitMapKind;
6752 return;
6753 }
6754
6755 // Store the table in an array.
6756 auto *TableTy = ArrayType::get(ValueType, TableSize);
6757 Initializer = ConstantArray::get(TableTy, TableContents);
6758
6759 Kind = LookupTableKind;
6760}
6761
6762Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6763 const DataLayout &DL, Function *Func) {
6764 switch (Kind) {
6765 case SingleValueKind:
6766 return SingleValue;
6767 case LinearMapKind: {
6768 ++NumLinearMaps;
6769 // Derive the result value from the input value.
6770 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6771 false, "switch.idx.cast");
6772 if (!LinearMultiplier->isOne())
6773 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6774 /*HasNUW = */ false,
6775 /*HasNSW = */ !LinearMapValWrapped);
6776
6777 if (!LinearOffset->isZero())
6778 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6779 /*HasNUW = */ false,
6780 /*HasNSW = */ !LinearMapValWrapped);
6781 return Result;
6782 }
6783 case BitMapKind: {
6784 ++NumBitMaps;
6785 // Type of the bitmap (e.g. i59).
6786 IntegerType *MapTy = BitMap->getIntegerType();
6787
6788 // Cast Index to the same type as the bitmap.
6789 // Note: The Index is <= the number of elements in the table, so
6790 // truncating it to the width of the bitmask is safe.
6791 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6792
6793 // Multiply the shift amount by the element width. NUW/NSW can always be
6794 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6795 // BitMap's bit width.
6796 ShiftAmt = Builder.CreateMul(
6797 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6798 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6799
6800 // Shift down.
6801 Value *DownShifted =
6802 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6803 // Mask off.
6804 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6805 }
6806 case LookupTableKind: {
6807 ++NumLookupTables;
6808 auto *Table =
6809 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6810 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6811 Initializer, "switch.table." + Func->getName());
6812 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6813 // Set the alignment to that of an array items. We will be only loading one
6814 // value out of it.
6815 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
6816 Type *IndexTy = DL.getIndexType(Table->getType());
6817 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
6818
6819 if (Index->getType() != IndexTy) {
6820 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6821 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6822 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6823 Zext->setNonNeg(
6824 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6825 }
6826
6827 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6828 Value *GEP =
6829 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
6830 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6831 }
6832 }
6833 llvm_unreachable("Unknown helper kind!");
6834}
6835
6836bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6837 uint64_t TableSize,
6838 Type *ElementType) {
6839 auto *IT = dyn_cast<IntegerType>(ElementType);
6840 if (!IT)
6841 return false;
6842 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6843 // are <= 15, we could try to narrow the type.
6844
6845 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6846 if (TableSize >= UINT_MAX / IT->getBitWidth())
6847 return false;
6848 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6849}
6850
6852 const DataLayout &DL) {
6853 // Allow any legal type.
6854 if (TTI.isTypeLegal(Ty))
6855 return true;
6856
6857 auto *IT = dyn_cast<IntegerType>(Ty);
6858 if (!IT)
6859 return false;
6860
6861 // Also allow power of 2 integer types that have at least 8 bits and fit in
6862 // a register. These types are common in frontend languages and targets
6863 // usually support loads of these types.
6864 // TODO: We could relax this to any integer that fits in a register and rely
6865 // on ABI alignment and padding in the table to allow the load to be widened.
6866 // Or we could widen the constants and truncate the load.
6867 unsigned BitWidth = IT->getBitWidth();
6868 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6869 DL.fitsInLegalInteger(IT->getBitWidth());
6870}
6871
6872Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
6873
6874bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
6875
6876static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6877 // 40% is the default density for building a jump table in optsize/minsize
6878 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6879 // function was based on.
6880 const uint64_t MinDensity = 40;
6881
6882 if (CaseRange >= UINT64_MAX / 100)
6883 return false; // Avoid multiplication overflows below.
6884
6885 return NumCases * 100 >= CaseRange * MinDensity;
6886}
6887
6889 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6890 uint64_t Range = Diff + 1;
6891 if (Range < Diff)
6892 return false; // Overflow.
6893
6894 return isSwitchDense(Values.size(), Range);
6895}
6896
6897/// Determine whether a lookup table should be built for this switch, based on
6898/// the number of cases, size of the table, and the types of the results.
6899// TODO: We could support larger than legal types by limiting based on the
6900// number of loads required and/or table size. If the constants are small we
6901// could use smaller table entries and extend after the load.
6903 const TargetTransformInfo &TTI,
6904 const DataLayout &DL,
6905 const SmallVector<Type *> &ResultTypes) {
6906 if (SI->getNumCases() > TableSize)
6907 return false; // TableSize overflowed.
6908
6909 bool AllTablesFitInRegister = true;
6910 bool HasIllegalType = false;
6911 for (const auto &Ty : ResultTypes) {
6912 // Saturate this flag to true.
6913 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6914
6915 // Saturate this flag to false.
6916 AllTablesFitInRegister =
6917 AllTablesFitInRegister &&
6918 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
6919
6920 // If both flags saturate, we're done. NOTE: This *only* works with
6921 // saturating flags, and all flags have to saturate first due to the
6922 // non-deterministic behavior of iterating over a dense map.
6923 if (HasIllegalType && !AllTablesFitInRegister)
6924 break;
6925 }
6926
6927 // If each table would fit in a register, we should build it anyway.
6928 if (AllTablesFitInRegister)
6929 return true;
6930
6931 // Don't build a table that doesn't fit in-register if it has illegal types.
6932 if (HasIllegalType)
6933 return false;
6934
6935 return isSwitchDense(SI->getNumCases(), TableSize);
6936}
6937
6939 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6940 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
6941 const DataLayout &DL, const TargetTransformInfo &TTI) {
6942 if (MinCaseVal.isNullValue())
6943 return true;
6944 if (MinCaseVal.isNegative() ||
6945 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6946 !HasDefaultResults)
6947 return false;
6948 return all_of(ResultTypes, [&](const auto &ResultType) {
6949 return SwitchReplacement::wouldFitInRegister(
6950 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
6951 });
6952}
6953
6954/// Try to reuse the switch table index compare. Following pattern:
6955/// \code
6956/// if (idx < tablesize)
6957/// r = table[idx]; // table does not contain default_value
6958/// else
6959/// r = default_value;
6960/// if (r != default_value)
6961/// ...
6962/// \endcode
6963/// Is optimized to:
6964/// \code
6965/// cond = idx < tablesize;
6966/// if (cond)
6967/// r = table[idx];
6968/// else
6969/// r = default_value;
6970/// if (cond)
6971/// ...
6972/// \endcode
6973/// Jump threading will then eliminate the second if(cond).
6975 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6976 Constant *DefaultValue,
6977 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6979 if (!CmpInst)
6980 return;
6981
6982 // We require that the compare is in the same block as the phi so that jump
6983 // threading can do its work afterwards.
6984 if (CmpInst->getParent() != PhiBlock)
6985 return;
6986
6988 if (!CmpOp1)
6989 return;
6990
6991 Value *RangeCmp = RangeCheckBranch->getCondition();
6992 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6993 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6994
6995 // Check if the compare with the default value is constant true or false.
6996 const DataLayout &DL = PhiBlock->getDataLayout();
6998 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6999 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7000 return;
7001
7002 // Check if the compare with the case values is distinct from the default
7003 // compare result.
7004 for (auto ValuePair : Values) {
7006 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7007 if (!CaseConst || CaseConst == DefaultConst ||
7008 (CaseConst != TrueConst && CaseConst != FalseConst))
7009 return;
7010 }
7011
7012 // Check if the branch instruction dominates the phi node. It's a simple
7013 // dominance check, but sufficient for our needs.
7014 // Although this check is invariant in the calling loops, it's better to do it
7015 // at this late stage. Practically we do it at most once for a switch.
7016 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7017 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7018 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7019 return;
7020 }
7021
7022 if (DefaultConst == FalseConst) {
7023 // The compare yields the same result. We can replace it.
7024 CmpInst->replaceAllUsesWith(RangeCmp);
7025 ++NumTableCmpReuses;
7026 } else {
7027 // The compare yields the same result, just inverted. We can replace it.
7028 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7029 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7030 RangeCheckBranch->getIterator());
7031 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7032 ++NumTableCmpReuses;
7033 }
7034}
7035
7036/// If the switch is only used to initialize one or more phi nodes in a common
7037/// successor block with different constant values, replace the switch with
7038/// lookup tables.
7040 DomTreeUpdater *DTU, const DataLayout &DL,
7041 const TargetTransformInfo &TTI) {
7042 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7043
7044 BasicBlock *BB = SI->getParent();
7045 Function *Fn = BB->getParent();
7046
7047 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7048 // split off a dense part and build a lookup table for that.
7049
7050 // FIXME: This creates arrays of GEPs to constant strings, which means each
7051 // GEP needs a runtime relocation in PIC code. We should just build one big
7052 // string and lookup indices into that.
7053
7054 // Ignore switches with less than three cases. Lookup tables will not make
7055 // them faster, so we don't analyze them.
7056 if (SI->getNumCases() < 3)
7057 return false;
7058
7059 // Figure out the corresponding result for each case value and phi node in the
7060 // common destination, as well as the min and max case values.
7061 assert(!SI->cases().empty());
7062 SwitchInst::CaseIt CI = SI->case_begin();
7063 ConstantInt *MinCaseVal = CI->getCaseValue();
7064 ConstantInt *MaxCaseVal = CI->getCaseValue();
7065
7066 BasicBlock *CommonDest = nullptr;
7067
7068 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7070
7072 SmallVector<Type *> ResultTypes;
7074
7075 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7076 ConstantInt *CaseVal = CI->getCaseValue();
7077 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7078 MinCaseVal = CaseVal;
7079 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7080 MaxCaseVal = CaseVal;
7081
7082 // Resulting value at phi nodes for this case value.
7084 ResultsTy Results;
7085 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7086 Results, DL, TTI))
7087 return false;
7088
7089 // Append the result and result types from this case to the list for each
7090 // phi.
7091 for (const auto &I : Results) {
7092 PHINode *PHI = I.first;
7093 Constant *Value = I.second;
7094 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7095 if (Inserted)
7096 PHIs.push_back(PHI);
7097 It->second.push_back(std::make_pair(CaseVal, Value));
7098 ResultTypes.push_back(PHI->getType());
7099 }
7100 }
7101
7102 // If the table has holes, we need a constant result for the default case
7103 // or a bitmask that fits in a register.
7104 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7105 bool HasDefaultResults =
7106 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7107 DefaultResultsList, DL, TTI);
7108 for (const auto &I : DefaultResultsList) {
7109 PHINode *PHI = I.first;
7110 Constant *Result = I.second;
7111 DefaultResults[PHI] = Result;
7112 }
7113
7114 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7115 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7116 uint64_t TableSize;
7117 ConstantInt *TableIndexOffset;
7118 if (UseSwitchConditionAsTableIndex) {
7119 TableSize = MaxCaseVal->getLimitedValue() + 1;
7120 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7121 } else {
7122 TableSize =
7123 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7124
7125 TableIndexOffset = MinCaseVal;
7126 }
7127
7128 // If the default destination is unreachable, or if the lookup table covers
7129 // all values of the conditional variable, branch directly to the lookup table
7130 // BB. Otherwise, check that the condition is within the case range.
7131 uint64_t NumResults = ResultLists[PHIs[0]].size();
7132 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7133
7134 bool TableHasHoles = (NumResults < TableSize);
7135
7136 // If the table has holes but the default destination doesn't produce any
7137 // constant results, the lookup table entries corresponding to the holes will
7138 // contain poison.
7139 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7140
7141 // If the default destination doesn't produce a constant result but is still
7142 // reachable, and the lookup table has holes, we need to use a mask to
7143 // determine if the current index should load from the lookup table or jump
7144 // to the default case.
7145 // The mask is unnecessary if the table has holes but the default destination
7146 // is unreachable, as in that case the holes must also be unreachable.
7147 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7148 if (NeedMask) {
7149 // As an extra penalty for the validity test we require more cases.
7150 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7151 return false;
7152 if (!DL.fitsInLegalInteger(TableSize))
7153 return false;
7154 }
7155
7156 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7157 return false;
7158
7159 // Compute the table index value.
7160 Value *TableIndex;
7161 if (UseSwitchConditionAsTableIndex) {
7162 TableIndex = SI->getCondition();
7163 if (HasDefaultResults) {
7164 // Grow the table to cover all possible index values to avoid the range
7165 // check. It will use the default result to fill in the table hole later,
7166 // so make sure it exist.
7167 ConstantRange CR =
7168 computeConstantRange(TableIndex, /* ForSigned */ false);
7169 // Grow the table shouldn't have any size impact by checking
7170 // wouldFitInRegister.
7171 // TODO: Consider growing the table also when it doesn't fit in a register
7172 // if no optsize is specified.
7173 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7174 if (!CR.isUpperWrapped() &&
7175 all_of(ResultTypes, [&](const auto &ResultType) {
7176 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7177 ResultType);
7178 })) {
7179 // There may be some case index larger than the UpperBound (unreachable
7180 // case), so make sure the table size does not get smaller.
7181 TableSize = std::max(UpperBound, TableSize);
7182 // The default branch is unreachable after we enlarge the lookup table.
7183 // Adjust DefaultIsReachable to reuse code path.
7184 DefaultIsReachable = false;
7185 }
7186 }
7187 }
7188
7189 // Keep track of the switch replacement for each phi
7191 for (PHINode *PHI : PHIs) {
7192 const auto &ResultList = ResultLists[PHI];
7193
7194 Type *ResultType = ResultList.begin()->second->getType();
7195 // Use any value to fill the lookup table holes.
7197 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7198 StringRef FuncName = Fn->getName();
7199 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7200 ResultList, DefaultVal, DL, FuncName);
7201 PhiToReplacementMap.insert({PHI, Replacement});
7202 }
7203
7204 bool AnyLookupTables = any_of(
7205 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7206
7207 // A few conditions prevent the generation of lookup tables:
7208 // 1. The target does not support lookup tables.
7209 // 2. The "no-jump-tables" function attribute is set.
7210 // However, these objections do not apply to other switch replacements, like
7211 // the bitmap, so we only stop here if any of these conditions are met and we
7212 // want to create a LUT. Otherwise, continue with the switch replacement.
7213 if (AnyLookupTables &&
7214 (!TTI.shouldBuildLookupTables() ||
7215 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7216 return false;
7217
7218 Builder.SetInsertPoint(SI);
7219 // TableIndex is the switch condition - TableIndexOffset if we don't
7220 // use the condition directly
7221 if (!UseSwitchConditionAsTableIndex) {
7222 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7223 // we can try to attach nsw.
7224 bool MayWrap = true;
7225 if (!DefaultIsReachable) {
7226 APInt Res =
7227 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7228 (void)Res;
7229 }
7230 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7231 "switch.tableidx", /*HasNUW =*/false,
7232 /*HasNSW =*/!MayWrap);
7233 }
7234
7235 std::vector<DominatorTree::UpdateType> Updates;
7236
7237 // Compute the maximum table size representable by the integer type we are
7238 // switching upon.
7239 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7240 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7241 assert(MaxTableSize >= TableSize &&
7242 "It is impossible for a switch to have more entries than the max "
7243 "representable value of its input integer type's size.");
7244
7245 // Create the BB that does the lookups.
7246 Module &Mod = *CommonDest->getParent()->getParent();
7247 BasicBlock *LookupBB = BasicBlock::Create(
7248 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7249
7250 BranchInst *RangeCheckBranch = nullptr;
7251
7252 Builder.SetInsertPoint(SI);
7253 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7254 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7255 Builder.CreateBr(LookupBB);
7256 if (DTU)
7257 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7258 // Note: We call removeProdecessor later since we need to be able to get the
7259 // PHI value for the default case in case we're using a bit mask.
7260 } else {
7261 Value *Cmp = Builder.CreateICmpULT(
7262 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7263 RangeCheckBranch =
7264 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7265 if (DTU)
7266 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7267 }
7268
7269 // Populate the BB that does the lookups.
7270 Builder.SetInsertPoint(LookupBB);
7271
7272 if (NeedMask) {
7273 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7274 // re-purposed to do the hole check, and we create a new LookupBB.
7275 BasicBlock *MaskBB = LookupBB;
7276 MaskBB->setName("switch.hole_check");
7277 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7278 CommonDest->getParent(), CommonDest);
7279
7280 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7281 // unnecessary illegal types.
7282 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7283 APInt MaskInt(TableSizePowOf2, 0);
7284 APInt One(TableSizePowOf2, 1);
7285 // Build bitmask; fill in a 1 bit for every case.
7286 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7287 for (const auto &Result : ResultList) {
7288 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7289 .getLimitedValue();
7290 MaskInt |= One << Idx;
7291 }
7292 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7293
7294 // Get the TableIndex'th bit of the bitmask.
7295 // If this bit is 0 (meaning hole) jump to the default destination,
7296 // else continue with table lookup.
7297 IntegerType *MapTy = TableMask->getIntegerType();
7298 Value *MaskIndex =
7299 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7300 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7301 Value *LoBit = Builder.CreateTrunc(
7302 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7303 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7304 if (DTU) {
7305 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7306 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7307 }
7308 Builder.SetInsertPoint(LookupBB);
7309 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7310 }
7311
7312 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7313 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7314 // do not delete PHINodes here.
7315 SI->getDefaultDest()->removePredecessor(BB,
7316 /*KeepOneInputPHIs=*/true);
7317 if (DTU)
7318 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7319 }
7320
7321 for (PHINode *PHI : PHIs) {
7322 const ResultListTy &ResultList = ResultLists[PHI];
7323 auto Replacement = PhiToReplacementMap.at(PHI);
7324 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7325 // Do a small peephole optimization: re-use the switch table compare if
7326 // possible.
7327 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7328 BasicBlock *PhiBlock = PHI->getParent();
7329 // Search for compare instructions which use the phi.
7330 for (auto *User : PHI->users()) {
7331 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7332 Replacement.getDefaultValue(), ResultList);
7333 }
7334 }
7335
7336 PHI->addIncoming(Result, LookupBB);
7337 }
7338
7339 Builder.CreateBr(CommonDest);
7340 if (DTU)
7341 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7342
7343 // Remove the switch.
7344 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7345 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7346 BasicBlock *Succ = SI->getSuccessor(i);
7347
7348 if (Succ == SI->getDefaultDest())
7349 continue;
7350 Succ->removePredecessor(BB);
7351 if (DTU && RemovedSuccessors.insert(Succ).second)
7352 Updates.push_back({DominatorTree::Delete, BB, Succ});
7353 }
7354 SI->eraseFromParent();
7355
7356 if (DTU)
7357 DTU->applyUpdates(Updates);
7358
7359 if (NeedMask)
7360 ++NumLookupTablesHoles;
7361 return true;
7362}
7363
7364/// Try to transform a switch that has "holes" in it to a contiguous sequence
7365/// of cases.
7366///
7367/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7368/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7369///
7370/// This converts a sparse switch into a dense switch which allows better
7371/// lowering and could also allow transforming into a lookup table.
7373 const DataLayout &DL,
7374 const TargetTransformInfo &TTI) {
7375 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7376 if (CondTy->getIntegerBitWidth() > 64 ||
7377 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7378 return false;
7379 // Only bother with this optimization if there are more than 3 switch cases;
7380 // SDAG will only bother creating jump tables for 4 or more cases.
7381 if (SI->getNumCases() < 4)
7382 return false;
7383
7384 // This transform is agnostic to the signedness of the input or case values. We
7385 // can treat the case values as signed or unsigned. We can optimize more common
7386 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7387 // as signed.
7389 for (const auto &C : SI->cases())
7390 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7391 llvm::sort(Values);
7392
7393 // If the switch is already dense, there's nothing useful to do here.
7394 if (isSwitchDense(Values))
7395 return false;
7396
7397 // First, transform the values such that they start at zero and ascend.
7398 int64_t Base = Values[0];
7399 for (auto &V : Values)
7400 V -= (uint64_t)(Base);
7401
7402 // Now we have signed numbers that have been shifted so that, given enough
7403 // precision, there are no negative values. Since the rest of the transform
7404 // is bitwise only, we switch now to an unsigned representation.
7405
7406 // This transform can be done speculatively because it is so cheap - it
7407 // results in a single rotate operation being inserted.
7408
7409 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7410 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7411 // less than 64.
7412 unsigned Shift = 64;
7413 for (auto &V : Values)
7414 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7415 assert(Shift < 64);
7416 if (Shift > 0)
7417 for (auto &V : Values)
7418 V = (int64_t)((uint64_t)V >> Shift);
7419
7420 if (!isSwitchDense(Values))
7421 // Transform didn't create a dense switch.
7422 return false;
7423
7424 // The obvious transform is to shift the switch condition right and emit a
7425 // check that the condition actually cleanly divided by GCD, i.e.
7426 // C & (1 << Shift - 1) == 0
7427 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7428 //
7429 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7430 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7431 // are nonzero then the switch condition will be very large and will hit the
7432 // default case.
7433
7434 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7435 Builder.SetInsertPoint(SI);
7436 Value *Sub =
7437 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7438 Value *Rot = Builder.CreateIntrinsic(
7439 Ty, Intrinsic::fshl,
7440 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7441 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7442
7443 for (auto Case : SI->cases()) {
7444 auto *Orig = Case.getCaseValue();
7445 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7446 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7447 }
7448 return true;
7449}
7450
7451/// Tries to transform switch of powers of two to reduce switch range.
7452/// For example, switch like:
7453/// switch (C) { case 1: case 2: case 64: case 128: }
7454/// will be transformed to:
7455/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7456///
7457/// This transformation allows better lowering and may transform the switch
7458/// instruction into a sequence of bit manipulation and a smaller
7459/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7460/// address of the jump target, and indirectly jump to it).
7462 const DataLayout &DL,
7463 const TargetTransformInfo &TTI) {
7464 Value *Condition = SI->getCondition();
7465 LLVMContext &Context = SI->getContext();
7466 auto *CondTy = cast<IntegerType>(Condition->getType());
7467
7468 if (CondTy->getIntegerBitWidth() > 64 ||
7469 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7470 return false;
7471
7472 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7473 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7474 {Condition, ConstantInt::getTrue(Context)});
7475 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7476 TTI::TCC_Basic * 2)
7477 return false;
7478
7479 // Only bother with this optimization if there are more than 3 switch cases.
7480 // SDAG will start emitting jump tables for 4 or more cases.
7481 if (SI->getNumCases() < 4)
7482 return false;
7483
7484 // We perform this optimization only for switches with
7485 // unreachable default case.
7486 // This assumtion will save us from checking if `Condition` is a power of two.
7487 if (!SI->defaultDestUnreachable())
7488 return false;
7489
7490 // Check that switch cases are powers of two.
7492 for (const auto &Case : SI->cases()) {
7493 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7494 if (llvm::has_single_bit(CaseValue))
7495 Values.push_back(CaseValue);
7496 else
7497 return false;
7498 }
7499
7500 // isSwichDense requires case values to be sorted.
7501 llvm::sort(Values);
7502 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7503 llvm::countr_zero(Values.front()) + 1))
7504 // Transform is unable to generate dense switch.
7505 return false;
7506
7507 Builder.SetInsertPoint(SI);
7508
7509 // Replace each case with its trailing zeros number.
7510 for (auto &Case : SI->cases()) {
7511 auto *OrigValue = Case.getCaseValue();
7512 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7513 OrigValue->getValue().countr_zero()));
7514 }
7515
7516 // Replace condition with its trailing zeros number.
7517 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7518 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7519
7520 SI->setCondition(ConditionTrailingZeros);
7521
7522 return true;
7523}
7524
7525/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7526/// the same destination.
7528 DomTreeUpdater *DTU) {
7529 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7530 if (!Cmp || !Cmp->hasOneUse())
7531 return false;
7532
7534 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7535 if (!HasWeights)
7536 Weights.resize(4); // Avoid checking HasWeights everywhere.
7537
7538 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7539 int64_t Res;
7540 BasicBlock *Succ, *OtherSucc;
7541 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7542 BasicBlock *Unreachable = nullptr;
7543
7544 if (SI->getNumCases() == 2) {
7545 // Find which of 1, 0 or -1 is missing (handled by default dest).
7546 SmallSet<int64_t, 3> Missing;
7547 Missing.insert(1);
7548 Missing.insert(0);
7549 Missing.insert(-1);
7550
7551 Succ = SI->getDefaultDest();
7552 SuccWeight = Weights[0];
7553 OtherSucc = nullptr;
7554 for (auto &Case : SI->cases()) {
7555 std::optional<int64_t> Val =
7556 Case.getCaseValue()->getValue().trySExtValue();
7557 if (!Val)
7558 return false;
7559 if (!Missing.erase(*Val))
7560 return false;
7561 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7562 return false;
7563 OtherSucc = Case.getCaseSuccessor();
7564 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7565 }
7566
7567 assert(Missing.size() == 1 && "Should have one case left");
7568 Res = *Missing.begin();
7569 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7570 // Normalize so that Succ is taken once and OtherSucc twice.
7571 Unreachable = SI->getDefaultDest();
7572 Succ = OtherSucc = nullptr;
7573 for (auto &Case : SI->cases()) {
7574 BasicBlock *NewSucc = Case.getCaseSuccessor();
7575 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7576 if (!OtherSucc || OtherSucc == NewSucc) {
7577 OtherSucc = NewSucc;
7578 OtherSuccWeight += Weight;
7579 } else if (!Succ) {
7580 Succ = NewSucc;
7581 SuccWeight = Weight;
7582 } else if (Succ == NewSucc) {
7583 std::swap(Succ, OtherSucc);
7584 std::swap(SuccWeight, OtherSuccWeight);
7585 } else
7586 return false;
7587 }
7588 for (auto &Case : SI->cases()) {
7589 std::optional<int64_t> Val =
7590 Case.getCaseValue()->getValue().trySExtValue();
7591 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7592 return false;
7593 if (Case.getCaseSuccessor() == Succ) {
7594 Res = *Val;
7595 break;
7596 }
7597 }
7598 } else {
7599 return false;
7600 }
7601
7602 // Determine predicate for the missing case.
7604 switch (Res) {
7605 case 1:
7606 Pred = ICmpInst::ICMP_UGT;
7607 break;
7608 case 0:
7609 Pred = ICmpInst::ICMP_EQ;
7610 break;
7611 case -1:
7612 Pred = ICmpInst::ICMP_ULT;
7613 break;
7614 }
7615 if (Cmp->isSigned())
7616 Pred = ICmpInst::getSignedPredicate(Pred);
7617
7618 MDNode *NewWeights = nullptr;
7619 if (HasWeights)
7620 NewWeights = MDBuilder(SI->getContext())
7621 .createBranchWeights(SuccWeight, OtherSuccWeight);
7622
7623 BasicBlock *BB = SI->getParent();
7624 Builder.SetInsertPoint(SI->getIterator());
7625 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7626 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7627 SI->getMetadata(LLVMContext::MD_unpredictable));
7628 OtherSucc->removePredecessor(BB);
7629 if (Unreachable)
7630 Unreachable->removePredecessor(BB);
7631 SI->eraseFromParent();
7632 Cmp->eraseFromParent();
7633 if (DTU && Unreachable)
7634 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7635 return true;
7636}
7637
7638/// Checking whether two cases of SI are equal depends on the contents of the
7639/// BasicBlock and the incoming values of their successor PHINodes.
7640/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7641/// calling this function on each BasicBlock every time isEqual is called,
7642/// especially since the same BasicBlock may be passed as an argument multiple
7643/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7644/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7645/// of the incoming values.
7650
7651namespace llvm {
7652template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7654 return static_cast<SwitchSuccWrapper *>(
7656 }
7658 return static_cast<SwitchSuccWrapper *>(
7660 }
7661 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7662 BasicBlock *Succ = SSW->Dest;
7664 assert(BI->isUnconditional() &&
7665 "Only supporting unconditional branches for now");
7666 assert(BI->getNumSuccessors() == 1 &&
7667 "Expected unconditional branches to have one successor");
7668 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7669
7670 // Since we assume the BB is just a single BranchInst with a single
7671 // successor, we hash as the BB and the incoming Values of its successor
7672 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7673 // including the incoming PHI values leads to better performance.
7674 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7675 // time and passing it in SwitchSuccWrapper, but this slowed down the
7676 // average compile time without having any impact on the worst case compile
7677 // time.
7678 BasicBlock *BB = BI->getSuccessor(0);
7679 SmallVector<Value *> PhiValsForBB;
7680 for (PHINode &Phi : BB->phis())
7681 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7682
7683 return hash_combine(BB, hash_combine_range(PhiValsForBB));
7684 }
7685 static bool isEqual(const SwitchSuccWrapper *LHS,
7686 const SwitchSuccWrapper *RHS) {
7689 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7690 return LHS == RHS;
7691
7692 BasicBlock *A = LHS->Dest;
7693 BasicBlock *B = RHS->Dest;
7694
7695 // FIXME: we checked that the size of A and B are both 1 in
7696 // simplifyDuplicateSwitchArms to make the Case list smaller to
7697 // improve performance. If we decide to support BasicBlocks with more
7698 // than just a single instruction, we need to check that A.size() ==
7699 // B.size() here, and we need to check more than just the BranchInsts
7700 // for equality.
7701
7702 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7703 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7704 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7705 "Only supporting unconditional branches for now");
7706 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7707 return false;
7708
7709 // Need to check that PHIs in successor have matching values
7710 BasicBlock *Succ = ABI->getSuccessor(0);
7711 for (PHINode &Phi : Succ->phis()) {
7712 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7713 if (PredIVs[A] != PredIVs[B])
7714 return false;
7715 }
7716
7717 return true;
7718 }
7719};
7720} // namespace llvm
7721
7722bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7723 DomTreeUpdater *DTU) {
7724 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7725 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7726 // an entire PHI at once after the loop, opposed to calling
7727 // getIncomingValueForBlock inside this loop, since each call to
7728 // getIncomingValueForBlock is O(|Preds|).
7729 SmallPtrSet<PHINode *, 8> Phis;
7730 SmallPtrSet<BasicBlock *, 8> Seen;
7731 DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> PhiPredIVs;
7732 DenseMap<BasicBlock *, SmallVector<unsigned, 32>> BBToSuccessorIndexes;
7734 Cases.reserve(SI->getNumSuccessors());
7735
7736 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7737 BasicBlock *BB = SI->getSuccessor(I);
7738
7739 // FIXME: Support more than just a single BranchInst. One way we could do
7740 // this is by taking a hashing approach of all insts in BB.
7741 if (BB->size() != 1)
7742 continue;
7743
7744 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7745 // on other kinds of terminators. We decide to only support unconditional
7746 // branches for now for compile time reasons.
7747 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7748 if (!BI || BI->isConditional())
7749 continue;
7750
7751 if (!Seen.insert(BB).second) {
7752 auto It = BBToSuccessorIndexes.find(BB);
7753 if (It != BBToSuccessorIndexes.end())
7754 It->second.emplace_back(I);
7755 continue;
7756 }
7757
7758 // FIXME: This case needs some extra care because the terminators other than
7759 // SI need to be updated. For now, consider only backedges to the SI.
7760 if (BB->getUniquePredecessor() != SI->getParent())
7761 continue;
7762
7763 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7764 for (BasicBlock *Succ : BI->successors())
7766
7767 // Add the successor only if not previously visited.
7768 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7769 BBToSuccessorIndexes[BB].emplace_back(I);
7770 }
7771
7772 // Precompute a data structure to improve performance of isEqual for
7773 // SwitchSuccWrapper.
7774 PhiPredIVs.reserve(Phis.size());
7775 for (PHINode *Phi : Phis) {
7776 auto &IVs =
7777 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
7778 for (auto &IV : Phi->incoming_values())
7779 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
7780 }
7781
7782 // Build a set such that if the SwitchSuccWrapper exists in the set and
7783 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7784 // which is not in the set should be replaced with the one in the set. If the
7785 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7786 // other SwitchSuccWrappers can check against it in the same manner. We use
7787 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7788 // around information to isEquality, getHashValue, and when doing the
7789 // replacement with better performance.
7790 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
7791 ReplaceWith.reserve(Cases.size());
7792
7794 Updates.reserve(ReplaceWith.size());
7795 bool MadeChange = false;
7796 for (auto &SSW : Cases) {
7797 // SSW is a candidate for simplification. If we find a duplicate BB,
7798 // replace it.
7799 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7800 if (!Inserted) {
7801 // We know that SI's parent BB no longer dominates the old case successor
7802 // since we are making it dead.
7803 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7804 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7805 for (unsigned Idx : Successors)
7806 SI->setSuccessor(Idx, (*It)->Dest);
7807 MadeChange = true;
7808 }
7809 }
7810
7811 if (DTU)
7812 DTU->applyUpdates(Updates);
7813
7814 return MadeChange;
7815}
7816
7817bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7818 BasicBlock *BB = SI->getParent();
7819
7820 if (isValueEqualityComparison(SI)) {
7821 // If we only have one predecessor, and if it is a branch on this value,
7822 // see if that predecessor totally determines the outcome of this switch.
7823 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7824 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7825 return requestResimplify();
7826
7827 Value *Cond = SI->getCondition();
7828 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7829 if (simplifySwitchOnSelect(SI, Select))
7830 return requestResimplify();
7831
7832 // If the block only contains the switch, see if we can fold the block
7833 // away into any preds.
7834 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7835 if (foldValueComparisonIntoPredecessors(SI, Builder))
7836 return requestResimplify();
7837 }
7838
7839 // Try to transform the switch into an icmp and a branch.
7840 // The conversion from switch to comparison may lose information on
7841 // impossible switch values, so disable it early in the pipeline.
7842 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7843 return requestResimplify();
7844
7845 // Remove unreachable cases.
7846 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7847 return requestResimplify();
7848
7849 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7850 return requestResimplify();
7851
7852 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7853 return requestResimplify();
7854
7855 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7856 return requestResimplify();
7857
7858 // The conversion from switch to lookup tables results in difficult-to-analyze
7859 // code and makes pruning branches much harder. This is a problem if the
7860 // switch expression itself can still be restricted as a result of inlining or
7861 // CVP. Therefore, only apply this transformation during late stages of the
7862 // optimisation pipeline.
7863 if (Options.ConvertSwitchToLookupTable &&
7864 simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
7865 return requestResimplify();
7866
7867 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7868 return requestResimplify();
7869
7870 if (reduceSwitchRange(SI, Builder, DL, TTI))
7871 return requestResimplify();
7872
7873 if (HoistCommon &&
7874 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7875 return requestResimplify();
7876
7877 if (simplifyDuplicateSwitchArms(SI, DTU))
7878 return requestResimplify();
7879
7880 return false;
7881}
7882
7883bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7884 BasicBlock *BB = IBI->getParent();
7885 bool Changed = false;
7886
7887 // Eliminate redundant destinations.
7888 SmallPtrSet<Value *, 8> Succs;
7889 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
7890 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7891 BasicBlock *Dest = IBI->getDestination(i);
7892 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7893 if (!Dest->hasAddressTaken())
7894 RemovedSuccs.insert(Dest);
7895 Dest->removePredecessor(BB);
7896 IBI->removeDestination(i);
7897 --i;
7898 --e;
7899 Changed = true;
7900 }
7901 }
7902
7903 if (DTU) {
7904 std::vector<DominatorTree::UpdateType> Updates;
7905 Updates.reserve(RemovedSuccs.size());
7906 for (auto *RemovedSucc : RemovedSuccs)
7907 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7908 DTU->applyUpdates(Updates);
7909 }
7910
7911 if (IBI->getNumDestinations() == 0) {
7912 // If the indirectbr has no successors, change it to unreachable.
7913 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7915 return true;
7916 }
7917
7918 if (IBI->getNumDestinations() == 1) {
7919 // If the indirectbr has one successor, change it to a direct branch.
7922 return true;
7923 }
7924
7925 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7926 if (simplifyIndirectBrOnSelect(IBI, SI))
7927 return requestResimplify();
7928 }
7929 return Changed;
7930}
7931
7932/// Given an block with only a single landing pad and a unconditional branch
7933/// try to find another basic block which this one can be merged with. This
7934/// handles cases where we have multiple invokes with unique landing pads, but
7935/// a shared handler.
7936///
7937/// We specifically choose to not worry about merging non-empty blocks
7938/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7939/// practice, the optimizer produces empty landing pad blocks quite frequently
7940/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7941/// sinking in this file)
7942///
7943/// This is primarily a code size optimization. We need to avoid performing
7944/// any transform which might inhibit optimization (such as our ability to
7945/// specialize a particular handler via tail commoning). We do this by not
7946/// merging any blocks which require us to introduce a phi. Since the same
7947/// values are flowing through both blocks, we don't lose any ability to
7948/// specialize. If anything, we make such specialization more likely.
7949///
7950/// TODO - This transformation could remove entries from a phi in the target
7951/// block when the inputs in the phi are the same for the two blocks being
7952/// merged. In some cases, this could result in removal of the PHI entirely.
7954 BasicBlock *BB, DomTreeUpdater *DTU) {
7955 auto Succ = BB->getUniqueSuccessor();
7956 assert(Succ);
7957 // If there's a phi in the successor block, we'd likely have to introduce
7958 // a phi into the merged landing pad block.
7959 if (isa<PHINode>(*Succ->begin()))
7960 return false;
7961
7962 for (BasicBlock *OtherPred : predecessors(Succ)) {
7963 if (BB == OtherPred)
7964 continue;
7965 BasicBlock::iterator I = OtherPred->begin();
7967 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7968 continue;
7969 ++I;
7971 if (!BI2 || !BI2->isIdenticalTo(BI))
7972 continue;
7973
7974 std::vector<DominatorTree::UpdateType> Updates;
7975
7976 // We've found an identical block. Update our predecessors to take that
7977 // path instead and make ourselves dead.
7979 for (BasicBlock *Pred : UniquePreds) {
7980 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7981 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7982 "unexpected successor");
7983 II->setUnwindDest(OtherPred);
7984 if (DTU) {
7985 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7986 Updates.push_back({DominatorTree::Delete, Pred, BB});
7987 }
7988 }
7989
7991 for (BasicBlock *Succ : UniqueSuccs) {
7992 Succ->removePredecessor(BB);
7993 if (DTU)
7994 Updates.push_back({DominatorTree::Delete, BB, Succ});
7995 }
7996
7997 IRBuilder<> Builder(BI);
7998 Builder.CreateUnreachable();
7999 BI->eraseFromParent();
8000 if (DTU)
8001 DTU->applyUpdates(Updates);
8002 return true;
8003 }
8004 return false;
8005}
8006
8007bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
8008 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
8009 : simplifyCondBranch(Branch, Builder);
8010}
8011
8012bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
8013 IRBuilder<> &Builder) {
8014 BasicBlock *BB = BI->getParent();
8015 BasicBlock *Succ = BI->getSuccessor(0);
8016
8017 // If the Terminator is the only non-phi instruction, simplify the block.
8018 // If LoopHeader is provided, check if the block or its successor is a loop
8019 // header. (This is for early invocations before loop simplify and
8020 // vectorization to keep canonical loop forms for nested loops. These blocks
8021 // can be eliminated when the pass is invoked later in the back-end.)
8022 // Note that if BB has only one predecessor then we do not introduce new
8023 // backedge, so we can eliminate BB.
8024 bool NeedCanonicalLoop =
8025 Options.NeedCanonicalLoop &&
8026 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8027 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8029 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8030 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8031 return true;
8032
8033 // If the only instruction in the block is a seteq/setne comparison against a
8034 // constant, try to simplify the block.
8035 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
8036 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8037 ++I;
8038 if (I->isTerminator() &&
8039 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8040 return true;
8041 }
8042
8043 // See if we can merge an empty landing pad block with another which is
8044 // equivalent.
8045 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8046 ++I;
8047 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8048 return true;
8049 }
8050
8051 // If this basic block is ONLY a compare and a branch, and if a predecessor
8052 // branches to us and our successor, fold the comparison into the
8053 // predecessor and use logical operations to update the incoming value
8054 // for PHI nodes in common successor.
8055 if (Options.SpeculateBlocks &&
8056 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8057 Options.BonusInstThreshold))
8058 return requestResimplify();
8059 return false;
8060}
8061
8063 BasicBlock *PredPred = nullptr;
8064 for (auto *P : predecessors(BB)) {
8065 BasicBlock *PPred = P->getSinglePredecessor();
8066 if (!PPred || (PredPred && PredPred != PPred))
8067 return nullptr;
8068 PredPred = PPred;
8069 }
8070 return PredPred;
8071}
8072
8073/// Fold the following pattern:
8074/// bb0:
8075/// br i1 %cond1, label %bb1, label %bb2
8076/// bb1:
8077/// br i1 %cond2, label %bb3, label %bb4
8078/// bb2:
8079/// br i1 %cond2, label %bb4, label %bb3
8080/// bb3:
8081/// ...
8082/// bb4:
8083/// ...
8084/// into
8085/// bb0:
8086/// %cond = xor i1 %cond1, %cond2
8087/// br i1 %cond, label %bb4, label %bb3
8088/// bb3:
8089/// ...
8090/// bb4:
8091/// ...
8092/// NOTE: %cond2 always dominates the terminator of bb0.
8094 BasicBlock *BB = BI->getParent();
8095 BasicBlock *BB1 = BI->getSuccessor(0);
8096 BasicBlock *BB2 = BI->getSuccessor(1);
8097 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8098 if (Succ == BB)
8099 return false;
8100 if (&Succ->front() != Succ->getTerminator())
8101 return false;
8102 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8103 if (!SuccBI || !SuccBI->isConditional())
8104 return false;
8105 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8106 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8107 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8108 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8109 };
8110 BranchInst *BB1BI, *BB2BI;
8111 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8112 return false;
8113
8114 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8115 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8116 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8117 return false;
8118
8119 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8120 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8121 IRBuilder<> Builder(BI);
8122 BI->setCondition(
8123 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8124 BB1->removePredecessor(BB);
8125 BI->setSuccessor(0, BB4);
8126 BB2->removePredecessor(BB);
8127 BI->setSuccessor(1, BB3);
8128 if (DTU) {
8130 Updates.push_back({DominatorTree::Delete, BB, BB1});
8131 Updates.push_back({DominatorTree::Insert, BB, BB4});
8132 Updates.push_back({DominatorTree::Delete, BB, BB2});
8133 Updates.push_back({DominatorTree::Insert, BB, BB3});
8134
8135 DTU->applyUpdates(Updates);
8136 }
8137 bool HasWeight = false;
8138 uint64_t BBTWeight, BBFWeight;
8139 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8140 HasWeight = true;
8141 else
8142 BBTWeight = BBFWeight = 1;
8143 uint64_t BB1TWeight, BB1FWeight;
8144 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8145 HasWeight = true;
8146 else
8147 BB1TWeight = BB1FWeight = 1;
8148 uint64_t BB2TWeight, BB2FWeight;
8149 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8150 HasWeight = true;
8151 else
8152 BB2TWeight = BB2FWeight = 1;
8153 if (HasWeight) {
8154 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8155 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8156 fitWeights(Weights);
8157 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
8158 }
8159 return true;
8160}
8161
8162bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8163 assert(
8165 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8166 "Tautological conditional branch should have been eliminated already.");
8167
8168 BasicBlock *BB = BI->getParent();
8169 if (!Options.SimplifyCondBranch ||
8170 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8171 return false;
8172
8173 // Conditional branch
8174 if (isValueEqualityComparison(BI)) {
8175 // If we only have one predecessor, and if it is a branch on this value,
8176 // see if that predecessor totally determines the outcome of this
8177 // switch.
8178 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8179 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8180 return requestResimplify();
8181
8182 // This block must be empty, except for the setcond inst, if it exists.
8183 // Ignore dbg and pseudo intrinsics.
8184 auto I = BB->instructionsWithoutDebug(true).begin();
8185 if (&*I == BI) {
8186 if (foldValueComparisonIntoPredecessors(BI, Builder))
8187 return requestResimplify();
8188 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8189 ++I;
8190 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8191 return requestResimplify();
8192 }
8193 }
8194
8195 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8196 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8197 return true;
8198
8199 // If this basic block has dominating predecessor blocks and the dominating
8200 // blocks' conditions imply BI's condition, we know the direction of BI.
8201 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8202 if (Imp) {
8203 // Turn this into a branch on constant.
8204 auto *OldCond = BI->getCondition();
8205 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8206 : ConstantInt::getFalse(BB->getContext());
8207 BI->setCondition(TorF);
8209 return requestResimplify();
8210 }
8211
8212 // If this basic block is ONLY a compare and a branch, and if a predecessor
8213 // branches to us and one of our successors, fold the comparison into the
8214 // predecessor and use logical operations to pick the right destination.
8215 if (Options.SpeculateBlocks &&
8216 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8217 Options.BonusInstThreshold))
8218 return requestResimplify();
8219
8220 // We have a conditional branch to two blocks that are only reachable
8221 // from BI. We know that the condbr dominates the two blocks, so see if
8222 // there is any identical code in the "then" and "else" blocks. If so, we
8223 // can hoist it up to the branching block.
8224 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8225 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8226 if (HoistCommon &&
8227 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8228 return requestResimplify();
8229
8230 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8231 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8232 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8233 auto CanSpeculateConditionalLoadsStores = [&]() {
8234 for (auto *Succ : successors(BB)) {
8235 for (Instruction &I : *Succ) {
8236 if (I.isTerminator()) {
8237 if (I.getNumSuccessors() > 1)
8238 return false;
8239 continue;
8240 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8241 SpeculatedConditionalLoadsStores.size() ==
8243 return false;
8244 }
8245 SpeculatedConditionalLoadsStores.push_back(&I);
8246 }
8247 }
8248 return !SpeculatedConditionalLoadsStores.empty();
8249 };
8250
8251 if (CanSpeculateConditionalLoadsStores()) {
8252 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8253 std::nullopt, nullptr);
8254 return requestResimplify();
8255 }
8256 }
8257 } else {
8258 // If Successor #1 has multiple preds, we may be able to conditionally
8259 // execute Successor #0 if it branches to Successor #1.
8260 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8261 if (Succ0TI->getNumSuccessors() == 1 &&
8262 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8263 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8264 return requestResimplify();
8265 }
8266 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8267 // If Successor #0 has multiple preds, we may be able to conditionally
8268 // execute Successor #1 if it branches to Successor #0.
8269 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8270 if (Succ1TI->getNumSuccessors() == 1 &&
8271 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8272 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8273 return requestResimplify();
8274 }
8275
8276 // If this is a branch on something for which we know the constant value in
8277 // predecessors (e.g. a phi node in the current block), thread control
8278 // through this block.
8279 if (foldCondBranchOnValueKnownInPredecessor(BI))
8280 return requestResimplify();
8281
8282 // Scan predecessor blocks for conditional branches.
8283 for (BasicBlock *Pred : predecessors(BB))
8284 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8285 if (PBI != BI && PBI->isConditional())
8286 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8287 return requestResimplify();
8288
8289 // Look for diamond patterns.
8290 if (MergeCondStores)
8291 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8292 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8293 if (PBI != BI && PBI->isConditional())
8294 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8295 return requestResimplify();
8296
8297 // Look for nested conditional branches.
8298 if (mergeNestedCondBranch(BI, DTU))
8299 return requestResimplify();
8300
8301 return false;
8302}
8303
8304/// Check if passing a value to an instruction will cause undefined behavior.
8305static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8306 assert(V->getType() == I->getType() && "Mismatched types");
8308 if (!C)
8309 return false;
8310
8311 if (I->use_empty())
8312 return false;
8313
8314 if (C->isNullValue() || isa<UndefValue>(C)) {
8315 // Only look at the first use we can handle, avoid hurting compile time with
8316 // long uselists
8317 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8318 auto *Use = cast<Instruction>(U.getUser());
8319 // Change this list when we want to add new instructions.
8320 switch (Use->getOpcode()) {
8321 default:
8322 return false;
8323 case Instruction::GetElementPtr:
8324 case Instruction::Ret:
8325 case Instruction::BitCast:
8326 case Instruction::Load:
8327 case Instruction::Store:
8328 case Instruction::Call:
8329 case Instruction::CallBr:
8330 case Instruction::Invoke:
8331 case Instruction::UDiv:
8332 case Instruction::URem:
8333 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8334 // implemented to avoid code complexity as it is unclear how useful such
8335 // logic is.
8336 case Instruction::SDiv:
8337 case Instruction::SRem:
8338 return true;
8339 }
8340 });
8341 if (FindUse == I->use_end())
8342 return false;
8343 auto &Use = *FindUse;
8344 auto *User = cast<Instruction>(Use.getUser());
8345 // Bail out if User is not in the same BB as I or User == I or User comes
8346 // before I in the block. The latter two can be the case if User is a
8347 // PHI node.
8348 if (User->getParent() != I->getParent() || User == I ||
8349 User->comesBefore(I))
8350 return false;
8351
8352 // Now make sure that there are no instructions in between that can alter
8353 // control flow (eg. calls)
8354 auto InstrRange =
8355 make_range(std::next(I->getIterator()), User->getIterator());
8356 if (any_of(InstrRange, [](Instruction &I) {
8358 }))
8359 return false;
8360
8361 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8363 if (GEP->getPointerOperand() == I) {
8364 // The type of GEP may differ from the type of base pointer.
8365 // Bail out on vector GEPs, as they are not handled by other checks.
8366 if (GEP->getType()->isVectorTy())
8367 return false;
8368 // The current base address is null, there are four cases to consider:
8369 // getelementptr (TY, null, 0) -> null
8370 // getelementptr (TY, null, not zero) -> may be modified
8371 // getelementptr inbounds (TY, null, 0) -> null
8372 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8373 // undefined?
8374 if (!GEP->hasAllZeroIndices() &&
8375 (!GEP->isInBounds() ||
8376 NullPointerIsDefined(GEP->getFunction(),
8377 GEP->getPointerAddressSpace())))
8378 PtrValueMayBeModified = true;
8379 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8380 }
8381
8382 // Look through return.
8383 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8384 bool HasNoUndefAttr =
8385 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8386 // Return undefined to a noundef return value is undefined.
8387 if (isa<UndefValue>(C) && HasNoUndefAttr)
8388 return true;
8389 // Return null to a nonnull+noundef return value is undefined.
8390 if (C->isNullValue() && HasNoUndefAttr &&
8391 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8392 return !PtrValueMayBeModified;
8393 }
8394 }
8395
8396 // Load from null is undefined.
8397 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8398 if (!LI->isVolatile())
8399 return !NullPointerIsDefined(LI->getFunction(),
8400 LI->getPointerAddressSpace());
8401
8402 // Store to null is undefined.
8404 if (!SI->isVolatile())
8405 return (!NullPointerIsDefined(SI->getFunction(),
8406 SI->getPointerAddressSpace())) &&
8407 SI->getPointerOperand() == I;
8408
8409 // llvm.assume(false/undef) always triggers immediate UB.
8410 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8411 // Ignore assume operand bundles.
8412 if (I == Assume->getArgOperand(0))
8413 return true;
8414 }
8415
8416 if (auto *CB = dyn_cast<CallBase>(User)) {
8417 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8418 return false;
8419 // A call to null is undefined.
8420 if (CB->getCalledOperand() == I)
8421 return true;
8422
8423 if (CB->isArgOperand(&Use)) {
8424 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8425 // Passing null to a nonnnull+noundef argument is undefined.
8427 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8428 return !PtrValueMayBeModified;
8429 // Passing undef to a noundef argument is undefined.
8430 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8431 return true;
8432 }
8433 }
8434 // Div/Rem by zero is immediate UB
8435 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8436 return true;
8437 }
8438 return false;
8439}
8440
8441/// If BB has an incoming value that will always trigger undefined behavior
8442/// (eg. null pointer dereference), remove the branch leading here.
8444 DomTreeUpdater *DTU,
8445 AssumptionCache *AC) {
8446 for (PHINode &PHI : BB->phis())
8447 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8448 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8449 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8450 Instruction *T = Predecessor->getTerminator();
8451 IRBuilder<> Builder(T);
8452 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8453 BB->removePredecessor(Predecessor);
8454 // Turn unconditional branches into unreachables and remove the dead
8455 // destination from conditional branches.
8456 if (BI->isUnconditional())
8457 Builder.CreateUnreachable();
8458 else {
8459 // Preserve guarding condition in assume, because it might not be
8460 // inferrable from any dominating condition.
8461 Value *Cond = BI->getCondition();
8462 CallInst *Assumption;
8463 if (BI->getSuccessor(0) == BB)
8464 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8465 else
8466 Assumption = Builder.CreateAssumption(Cond);
8467 if (AC)
8468 AC->registerAssumption(cast<AssumeInst>(Assumption));
8469 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8470 : BI->getSuccessor(0));
8471 }
8472 BI->eraseFromParent();
8473 if (DTU)
8474 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8475 return true;
8476 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8477 // Redirect all branches leading to UB into
8478 // a newly created unreachable block.
8479 BasicBlock *Unreachable = BasicBlock::Create(
8480 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8481 Builder.SetInsertPoint(Unreachable);
8482 // The new block contains only one instruction: Unreachable
8483 Builder.CreateUnreachable();
8484 for (const auto &Case : SI->cases())
8485 if (Case.getCaseSuccessor() == BB) {
8486 BB->removePredecessor(Predecessor);
8487 Case.setSuccessor(Unreachable);
8488 }
8489 if (SI->getDefaultDest() == BB) {
8490 BB->removePredecessor(Predecessor);
8491 SI->setDefaultDest(Unreachable);
8492 }
8493
8494 if (DTU)
8495 DTU->applyUpdates(
8496 { { DominatorTree::Insert, Predecessor, Unreachable },
8497 { DominatorTree::Delete, Predecessor, BB } });
8498 return true;
8499 }
8500 }
8501
8502 return false;
8503}
8504
8505bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8506 bool Changed = false;
8507
8508 assert(BB && BB->getParent() && "Block not embedded in function!");
8509 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8510
8511 // Remove basic blocks that have no predecessors (except the entry block)...
8512 // or that just have themself as a predecessor. These are unreachable.
8513 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8514 BB->getSinglePredecessor() == BB) {
8515 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8516 DeleteDeadBlock(BB, DTU);
8517 return true;
8518 }
8519
8520 // Check to see if we can constant propagate this terminator instruction
8521 // away...
8522 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8523 /*TLI=*/nullptr, DTU);
8524
8525 // Check for and eliminate duplicate PHI nodes in this block.
8527
8528 // Check for and remove branches that will always cause undefined behavior.
8530 return requestResimplify();
8531
8532 // Merge basic blocks into their predecessor if there is only one distinct
8533 // pred, and if there is only one distinct successor of the predecessor, and
8534 // if there are no PHI nodes.
8535 if (MergeBlockIntoPredecessor(BB, DTU))
8536 return true;
8537
8538 if (SinkCommon && Options.SinkCommonInsts)
8539 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8540 mergeCompatibleInvokes(BB, DTU)) {
8541 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8542 // so we may now how duplicate PHI's.
8543 // Let's rerun EliminateDuplicatePHINodes() first,
8544 // before foldTwoEntryPHINode() potentially converts them into select's,
8545 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8546 return true;
8547 }
8548
8549 IRBuilder<> Builder(BB);
8550
8551 if (Options.SpeculateBlocks &&
8552 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8553 // If there is a trivial two-entry PHI node in this basic block, and we can
8554 // eliminate it, do so now.
8555 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8556 if (PN->getNumIncomingValues() == 2)
8557 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8558 Options.SpeculateUnpredictables))
8559 return true;
8560 }
8561
8563 Builder.SetInsertPoint(Terminator);
8564 switch (Terminator->getOpcode()) {
8565 case Instruction::Br:
8566 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8567 break;
8568 case Instruction::Resume:
8569 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8570 break;
8571 case Instruction::CleanupRet:
8572 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8573 break;
8574 case Instruction::Switch:
8575 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8576 break;
8577 case Instruction::Unreachable:
8578 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8579 break;
8580 case Instruction::IndirectBr:
8581 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8582 break;
8583 }
8584
8585 return Changed;
8586}
8587
8588bool SimplifyCFGOpt::run(BasicBlock *BB) {
8589 bool Changed = false;
8590
8591 // Repeated simplify BB as long as resimplification is requested.
8592 do {
8593 Resimplify = false;
8594
8595 // Perform one round of simplifcation. Resimplify flag will be set if
8596 // another iteration is requested.
8597 Changed |= simplifyOnce(BB);
8598 } while (Resimplify);
8599
8600 return Changed;
8601}
8602
8605 ArrayRef<WeakVH> LoopHeaders) {
8606 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8607 Options)
8608 .run(BB);
8609}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1574
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition ArrayRef.h:156
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:934
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:984
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:767
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1120
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:220
bool isNegative() const
Definition Constants.h:209
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:193
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:256
static DebugLoc getTemporary()
Definition DebugLoc.h:161
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:183
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:170
static DebugLoc getDropped()
Definition DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:237
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:213
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2345
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2100
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2637
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1931
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1805
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1847
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1860
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2194
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2068
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2277
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1077
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:119
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
op_range operands()
Definition User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:201
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:256
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1690
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2056
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1757
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2108
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< uint32_t, 2 > &B1, const SmallVector< uint32_t, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1719
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3081
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3339
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1934
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3842
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2010
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2100
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1584
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2068
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:315
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:257