-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[VPlan] Handle early exit before forming regions. (NFC) #138393
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesMove early-exit handling up front to original VPlan construction, before This builds on #137709, which This retains the exit conditions early, and means we can handle early Once we retain all exits initially, handling early exits before region Removing the reliance on VPRecipeBuilder removes the dependence on Depends on #137709 (included in PR). Patch is 20.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138393.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4a809badb0ee7..0aa6a627c2d5f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9431,7 +9431,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
VPlanTransforms::prepareForVectorization(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
CM.foldTailByMasking(), OrigLoop,
- getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
+ getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()),
+ Legal->hasUncountableEarlyExit(), Range);
VPlanTransforms::createLoopRegions(*Plan);
// Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9629,12 +9630,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
R->setOperand(1, WideIV->getStepValue());
}
- if (auto *UncountableExitingBlock =
- Legal->getUncountableEarlyExitingBlock()) {
- VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
- OrigLoop, UncountableExitingBlock, RecipeBuilder,
- Range);
- }
DenseMap<VPValue *, VPValue *> IVEndValues;
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9732,7 +9727,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
VPlanTransforms::prepareForVectorization(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
- getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
+ getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
+ Range);
VPlanTransforms::createLoopRegions(*Plan);
for (ElementCount VF : Range)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 89c77f2189b1f..3b3d00c6a8b0a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -117,6 +117,7 @@ class VPBlockBase {
Predecessors.erase(Pos);
}
+public:
/// Remove \p Successor from the successors of this block.
void removeSuccessor(VPBlockBase *Successor) {
auto Pos = find(Successors, Successor);
@@ -129,8 +130,6 @@ class VPBlockBase {
void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
auto I = find(Predecessors, Old);
assert(I != Predecessors.end());
- assert(Old->getParent() == New->getParent() &&
- "replaced predecessor must have the same parent");
*I = New;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index c7132e84f689c..86c1ee0d7cdfa 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -113,6 +113,9 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
return VPBB;
}
+ if (!TheLoop->contains(BB))
+ return Plan->getExitBlock(BB);
+
// Create new VPBB.
StringRef Name = BB->getName();
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
@@ -146,14 +149,6 @@ bool PlainCFGBuilder::isExternalDef(Value *Val) {
// Instruction definition is in outermost loop PH.
return false;
- // Check whether Instruction definition is in a loop exit.
- SmallVector<BasicBlock *> ExitBlocks;
- TheLoop->getExitBlocks(ExitBlocks);
- if (is_contained(ExitBlocks, InstParent)) {
- // Instruction definition is in outermost loop exit.
- return false;
- }
-
// Check whether Instruction definition is in loop body.
return !TheLoop->contains(Inst);
}
@@ -202,11 +197,6 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
"Instruction shouldn't have been visited.");
if (auto *Br = dyn_cast<BranchInst>(Inst)) {
- if (TheLoop->getLoopLatch() == BB ||
- any_of(successors(BB),
- [this](BasicBlock *Succ) { return !TheLoop->contains(Succ); }))
- continue;
-
// Conditional branch instruction are represented using BranchOnCond
// recipes.
if (Br->isConditional()) {
@@ -296,7 +286,6 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
for (BasicBlock *BB : RPO) {
// Create or retrieve the VPBasicBlock for this BB.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
- Loop *LoopForBB = LI->getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
setVPBBPredsFromBB(VPBB, BB);
@@ -327,24 +316,12 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
BasicBlock *IRSucc1 = BI->getSuccessor(1);
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
-
- // Don't connect any blocks outside the current loop except the latches for
- // inner loops.
- // TODO: Also connect exit blocks during initial VPlan construction.
- if (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch()) {
- if (!LoopForBB->contains(IRSucc0)) {
- VPBB->setOneSuccessor(Successor1);
- continue;
- }
- if (!LoopForBB->contains(IRSucc1)) {
- VPBB->setOneSuccessor(Successor0);
- continue;
- }
- }
-
VPBB->setTwoSuccessors(Successor0, Successor1);
}
+ for (auto *EB : Plan->getExitBlocks())
+ setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
+
// 2. The whole CFG has been built at this point so all the input Values must
// have a VPlan counterpart. Fix VPlan header phi by adding their
// corresponding VPlan operands.
@@ -447,19 +424,21 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
assert(LatchVPBB->getNumSuccessors() <= 1 &&
"Latch has more than one successor");
- if (Succ)
- VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+ LatchVPBB->removeSuccessor(Succ);
auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
false /*isReplicator*/);
// All VPBB's reachable shallowly from HeaderVPB belong to top level loop,
// because VPlan is expected to end at top level latch disconnected above.
+ SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan.getExitBlocks().begin(),
+ Plan.getExitBlocks().end());
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
- VPBB->setParent(R);
+ if (!ExitBlocks.contains(VPBB))
+ VPBB->setParent(R);
VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
- if (Succ)
- VPBlockUtils::connectBlocks(R, Succ);
+ R->setOneSuccessor(Succ);
+ Succ->replacePredecessor(LatchVPBB, R);
}
// Add the necessary canonical IV and branch recipes required to control the
@@ -495,11 +474,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
}
-void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
- PredicatedScalarEvolution &PSE,
- bool RequiresScalarEpilogueCheck,
- bool TailFolded, Loop *TheLoop,
- DebugLoc IVDL) {
+void VPlanTransforms::prepareForVectorization(
+ VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
+ bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
+ DebugLoc IVDL, bool HandleUncountableExit, VFRange &Range) {
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
@@ -511,11 +489,35 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
- VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
- LatchVPB->swapSuccessors();
+ VPBlockBase *LatchExitVPB = LatchVPB->getNumSuccessors() == 2
+ ? LatchVPB->getSuccessors()[0]
+ : nullptr;
+ if (LatchExitVPB) {
+ LatchVPB->getSuccessors()[0] = MiddleVPBB;
+ MiddleVPBB->setPredecessors({LatchVPB});
+ MiddleVPBB->setSuccessors({LatchExitVPB});
+ LatchExitVPB->replacePredecessor(LatchVPB, MiddleVPBB);
+ } else {
+ VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
+ LatchVPB->swapSuccessors();
+ }
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
+ if (HandleUncountableExit) {
+ handleUncountableEarlyExit(Plan, cast<VPBasicBlock>(HeaderVPB),
+ cast<VPBasicBlock>(LatchVPB), Range);
+ } else {
+ // Disconnect all edges between exit blocks other than from the latch.
+ for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
+ for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
+ if (Pred == MiddleVPBB)
+ continue;
+ cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
+ VPBlockUtils::disconnectBlocks(Pred, EB);
+ }
+ }
+ }
// Create SCEV and VPValue for the trip count.
// We use the symbolic max backedge-taken-count, which works also when
@@ -541,8 +543,9 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
// Thus if tail is to be folded, we know we don't need to run the
// remainder and we can set the condition to true.
// 3) Otherwise, construct a runtime check.
-
if (!RequiresScalarEpilogueCheck) {
+ if (LatchExitVPB)
+ VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
// The exit blocks are unreachable, remove their recipes to make sure no
// users remain that may pessimize transforms.
@@ -554,9 +557,6 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
}
// The connection order corresponds to the operands of the conditional branch.
- BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
- auto *VPExitBlock = Plan.getExitBlock(IRExitBlock);
- VPBlockUtils::connectBlocks(MiddleVPBB, VPExitBlock);
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7093d378d8c3e..419a60d027314 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2488,64 +2488,75 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
R->eraseFromParent();
}
-void VPlanTransforms::handleUncountableEarlyExit(
- VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock,
- VPRecipeBuilder &RecipeBuilder, VFRange &Range) {
- VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
- auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
+void VPlanTransforms::handleUncountableEarlyExit(VPlan &Plan,
+ VPBasicBlock *HeaderVPBB,
+ VPBasicBlock *LatchVPBB,
+ VFRange &Range) {
+ // First find the uncountable early exiting block by looking at the
+ // predecessors of the exit blocks.
+ VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
+ VPBasicBlock *EarlyExitingVPBB = nullptr;
+ VPIRBasicBlock *EarlyExitVPBB = nullptr;
+ for (auto *EB : Plan.getExitBlocks()) {
+ for (VPBlockBase *Pred : EB->getPredecessors()) {
+ if (Pred != MiddleVPBB) {
+ EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
+ EarlyExitVPBB = EB;
+ break;
+ }
+ }
+ }
+
VPBuilder Builder(LatchVPBB->getTerminator());
- auto *MiddleVPBB = Plan.getMiddleBlock();
- VPValue *IsEarlyExitTaken = nullptr;
-
- // Process the uncountable exiting block. Update IsEarlyExitTaken, which
- // tracks if the uncountable early exit has been taken. Also split the middle
- // block and have it conditionally branch to the early exit block if
- // EarlyExitTaken.
- auto *EarlyExitingBranch =
- cast<BranchInst>(UncountableExitingBlock->getTerminator());
- BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
- BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
- BasicBlock *EarlyExitIRBB =
- !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
- VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
-
- VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
- OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
- auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
- IsEarlyExitTaken =
- Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
+ VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
+ VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0);
+ auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB
+ ? EarlyExitCond
+ : Builder.createNot(EarlyExitCond);
+
+ if (!EarlyExitVPBB->getSinglePredecessor() &&
+ EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) {
+ for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
+ // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
+ // a single predecessor and 1 if it has two.
+ // If EarlyExitVPBB has two predecessors, they are already ordered such
+ // that early exit is second (and latch exit is first), by construction.
+ // But its underlying IRBB (EarlyExitIRBB) may have its predecessors
+ // ordered the other way around, and it is the order of the latter which
+ // corresponds to the order of operands of EarlyExitVPBB's phi recipes.
+ // Therefore, if early exit (UncountableExitingBlock) is the first
+ // predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
+ // thereby bringing them to match EarlyExitVPBB's predecessor order,
+ // with early exit being last (second). Otherwise they already match.
+ cast<VPIRPhi>(&R)->swapOperands();
+ }
+ }
+ EarlyExitingVPBB->getTerminator()->eraseFromParent();
+ VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
+
+ // Split the middle block and have it conditionally branch to the early exit
+ // block if EarlyExitTaken.
+ VPValue *IsEarlyExitTaken =
+ Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
VPBasicBlock *VectorEarlyExitVPBB =
Plan.createVPBasicBlock("vector.early.exit");
- VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
+ VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
NewMiddle->swapSuccessors();
- VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
+ VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
// Update the exit phis in the early exit block.
VPBuilder MiddleBuilder(NewMiddle);
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
- for (VPRecipeBase &R : VPEarlyExitBlock->phis()) {
+ for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
- // Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has
+ // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
// a single predecessor and 1 if it has two.
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
- if (!VPEarlyExitBlock->getSinglePredecessor()) {
- // If VPEarlyExitBlock has two predecessors, they are already ordered such
- // that early exit is second (and latch exit is first), by construction.
- // But its underlying IRBB (EarlyExitIRBB) may have its predecessors
- // ordered the other way around, and it is the order of the latter which
- // corresponds to the order of operands of VPEarlyExitBlock's phi recipes.
- // Therefore, if early exit (UncountableExitingBlock) is the first
- // predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
- // thereby bringing them to match VPEarlyExitBlock's predecessor order,
- // with early exit being last (second). Otherwise they already match.
- if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) ==
- UncountableExitingBlock)
- ExitIRI->swapOperands();
-
+ if (!EarlyExitVPBB->getSinglePredecessor()) {
// The first of two operands corresponds to the latch exit, via MiddleVPBB
// predecessor. Extract its last lane.
ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 9e8b518a0c7eb..701e533fee5c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -69,7 +69,8 @@ struct VPlanTransforms {
PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck,
bool TailFolded, Loop *TheLoop,
- DebugLoc IVDL);
+ DebugLoc IVDL, bool HandleUncountableExit,
+ VFRange &Range);
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
/// flat CFG into a hierarchical CFG.
@@ -179,9 +180,8 @@ struct VPlanTransforms {
/// exit conditions
/// * splitting the original middle block to branch to the early exit block
/// if taken.
- static void handleUncountableEarlyExit(VPlan &Plan, Loop *OrigLoop,
- BasicBlock *UncountableExitingBlock,
- VPRecipeBuilder &RecipeBuilder,
+ static void handleUncountableEarlyExit(VPlan &Plan, VPBasicBlock *HeaderVPBB,
+ VPBasicBlock *LatchVPBB,
VFRange &Range);
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 91a5ea6b7fe36..fe845ae74cbee 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -31,7 +31,11 @@ define void @foo(i64 %n) {
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
-; CHECK-NEXT: Successor(s): outer.header
+; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, outer.header
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
entry:
br label %outer.header
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index bf67a5596b270..15e21972840f6 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -13,6 +13,7 @@
#define LLVM_UNITTESTS_TRANSFORMS_VECTORIZE_VPLANTESTBASE_H
#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanHelpers.h"
#include "../lib/Transforms/Vectorize/VPlanTransforms.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -72,8 +73,9 @@ class VPlanTestIRBase : public testing::Test {
PredicatedScalarEvolution PSE(*SE, *L);
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
+ VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2));
VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
- ...
[truncated]
|
8c682a2
to
4f96480
Compare
@@ -9383,7 +9383,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |||
VPlanTransforms::prepareForVectorization( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Independent) Should prepareForVectorization
be renamed to a more informative name, perhaps canonicalizeTopLoop
, as it takes care of canonicalizing header and latch blocks, introducing and connecting preheader, middle-block, scalar preheader, canonical IV recipes and trip-count value.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will to separately.
|
||
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB), | ||
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL); | ||
|
||
if (HandleUncountableExit) { | ||
// Convert VPlans with early exits to a form only exiting via the latch |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Convert VPlans with early exits to a form only exiting via the latch | |
// Convert VPlans with early exits to a form exiting only via the latch |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
|
||
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB), | ||
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL); | ||
|
||
if (HandleUncountableExit) { | ||
// Convert VPlans with early exits to a form only exiting via the latch | ||
// here, including adjusting the exit condition. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// here, including adjusting the exit condition. | |
// here, including adjusting the condition of the non-early exit. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to the exit condition of the latch.
return Pred == EarlyExitingVPBB || Pred == MiddleVPBB; | ||
}); | ||
}) && | ||
"All exit blocks must only have EarlyExitingVPBB or MiddleVPBB as " |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"All exit blocks must only have EarlyExitingVPBB or MiddleVPBB as " | |
"Every exit block must have EarlyExitingVPBB and/or MiddleVPBB as " |
better fits in verifier?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Assert is gone now, as this is now passed from the caller, which checks there that we handle exactly one uncountable early exit if needed, thanks
void VPlanTransforms::prepareForVectorization( | ||
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, | ||
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop, | ||
DebugLoc IVDL, bool HandleUncountableExit, VFRange &Range) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DebugLoc IVDL, bool HandleUncountableExit, VFRange &Range) { | |
DebugLoc IVDL, bool hasUncountableEarlyExit, VFRange &Range) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated ,thanks!
IsEarlyExitTaken = | ||
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond}); | ||
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0]; | ||
VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0); | |
Assert terminator is a conditional branch? | |
VPValue *CondOfEarlyExitingVPBB = EarlyExitingVPBB->getTerminator()->getOperand(0); |
as it may not be the condition to (early) exit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond}); | ||
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0]; | ||
VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0); | ||
auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB | |
auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
VPBuilder Builder(LatchVPBB->getTerminator()); | ||
auto *MiddleVPBB = Plan.getMiddleBlock(); | ||
VPValue *IsEarlyExitTaken = nullptr; | ||
|
||
// Process the uncountable exiting block. Update IsEarlyExitTaken, which | ||
// tracks if the uncountable early exit has been taken. Also split the middle | ||
// block and have it conditionally branch to the early exit block if | ||
// EarlyExitTaken. | ||
auto *EarlyExitingBranch = | ||
cast<BranchInst>(UncountableExitingBlock->getTerminator()); | ||
BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0); | ||
BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1); | ||
BasicBlock *EarlyExitIRBB = | ||
!OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc; | ||
VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB); | ||
|
||
VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask( | ||
OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); | ||
auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond); | ||
IsEarlyExitTaken = | ||
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond}); | ||
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0]; | ||
VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0); | ||
auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Define these below, closer to where used?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, moved thanks.
// Therefore, if early exit (UncountableExitingBlock) is the first | ||
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This corresponds to checking if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) == UncountableExitingBlock)
which is now gone.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Simplified the comment and moved outside the loop, thanks
for (auto *EB : Plan.getExitBlocks()) { | ||
for (VPBlockBase *Pred : EB->getPredecessors()) { | ||
if (Pred != MiddleVPBB) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Similar to the loop looking for early exit edges in the caller, common to dealing with them by sinking to scalar epilog?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, updated to pass the early exiting and exit blocks as they are known in the caller, thanks
Move early-exit handling up front to original VPlan construction, before introducing early exits. This builds on llvm#137709, which adds exiting edges to the original VPlan, instead of adding exit blocks later. This retains the exit conditions early, and means we can handle early exits before forming regions, without the reliance on VPRecipeBuilder. Once we retain all exits initially, handling early exits before region construction ensures the regions are valid; otherwise we would leave edges exiting the region from elsewhere than the latch. Removing the reliance on VPRecipeBuilder removes the dependence on mapping IR BBs to VPBBs and unblocks predication as VPlan transform: llvm#128420. Depends on llvm#137709.
4f96480
to
76c470a
Compare
// TODO: VPlans with early exits should be explicitly converted to a form | ||
// exiting only via the latch here, including adjusting the exit condition, | ||
// instead of simply disconnecting the edges and adjusting the VPlan later. | ||
for (VPBlockBase *EB : Plan.getExitBlocks()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps worth replacing the old comment with a new one explaining what the loop is doing at a high level?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0]; | ||
if (!EarlyExitVPBB->getSinglePredecessor() && | ||
EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) { | ||
// Early exit operand should always be last phi operand. If EarlyExitVPBB |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it worth adding an assert that MiddleVPBB is actually the second predecessor?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done, thanks
// Early exit operand should always be last phi operand. If EarlyExitVPBB | ||
// has two predecessors and MiddleVPBB isn't the first, swap the operands of | ||
// the phis. | ||
for (VPRecipeBase &R : EarlyExitVPBB->phis()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is making sure the operand corresponding to the middle block is always first, right? I guess in future if we do want to support multiple early exits this will get a bit more complicated, but swapping operands works for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep
UncountableExitingBlock) | ||
ExitIRI->swapOperands(); | ||
|
||
if (ExitIRI->getNumOperands() != 1) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not relevant for this patch, but the name extractLastLaneOfFirstOperand
confused me at first because I was expecting it to return a value. It feels like it should be something like updatedFirstOperandToExtractLastLane
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be good to improve the naming, but it may be even better to create all required extracts up-front separately, which is something I am looking into
auto *ExitIRI = cast<VPIRPhi>(&R); | ||
// Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has | ||
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this comment still valid given you've already swapped operands above?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ep, this just re-states the expected order, as explanation for setting early exit index below.
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond}); | ||
void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, | ||
VPBasicBlock *EarlyExitVPBB, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed thanks
VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB); | ||
|
||
// Split the middle block and have it conditionally branch to the early exit | ||
// block if EarlyExitTaken. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// block if EarlyExitTaken. | |
// block if CondToEarlyExit. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks
@@ -69,7 +69,8 @@ struct VPlanTransforms { | |||
PredicatedScalarEvolution &PSE, | |||
bool RequiresScalarEpilogueCheck, | |||
bool TailFolded, Loop *TheLoop, | |||
DebugLoc IVDL); | |||
DebugLoc IVDL, bool HandleUncountableExit, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DebugLoc IVDL, bool HandleUncountableExit, | |
DebugLoc IVDL, bool HasUncountableEarlyExit, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks
cast<VPBasicBlock>(HeaderVPB), | ||
cast<VPBasicBlock>(LatchVPB), Range); | ||
HandledUncountableEarlyExit = true; | ||
continue; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
continue; |
All early exits are disconnected.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could fall exit the loop here, but then the verification would be more work. For all supported loops, we should leave the loop after the continue.
// Handle the remaining early exits, either by converting the plan to one only | ||
// exiting via the latch or by disconnecting all early exiting edges and | ||
// requiring a scalar epilogue. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Handle the remaining early exits, either by converting the plan to one only | |
// exiting via the latch or by disconnecting all early exiting edges and | |
// requiring a scalar epilogue. | |
// Disconnect all early exits from the loop leaving it with a single exit from the latch. Early exits that are countable are left for a scalar epilog. The condition of uncountable early exits (currently at most one is supported) is fused into the latch exit, and used to branch from middle block to the early exit destination. |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, updated!
@@ -9383,7 +9383,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |||
VPlanTransforms::prepareForVectorization( | |||
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck, | |||
CM.foldTailByMasking(), OrigLoop, | |||
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction())); | |||
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), | |||
Legal->hasUncountableEarlyExit(), Range); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, thanks for clarifying!
"If it is false, it means there are no uncountable early exits, i.e., all early exits are countable which requires a scalar epilogue."
// Split the middle block and have it conditionally branch to the early exit | ||
// block if EarlyExitTaken. | ||
VPValue *IsEarlyExitTaken = | ||
Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit}); | ||
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Independent:
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split"); | |
VPBasicBlock *MiddleSplit = Plan.createVPBasicBlock("middle.split"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can do separately, thanks
/// Update \p Plan to account for the uncountable early exit block in \p | ||
/// UncountableExitingBlock by | ||
/// Update \p Plan to account for the uncountable early exit from \p | ||
/// EarlyExitingVPBB to \p EarlyExitVPBB by | ||
/// * updating the condition exiting the vector loop to include the early | ||
/// exit conditions |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// exit conditions | |
/// exit condition. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated
/// Update \p Plan to account for the uncountable early exit block in \p | ||
/// UncountableExitingBlock by | ||
/// Update \p Plan to account for the uncountable early exit from \p | ||
/// EarlyExitingVPBB to \p EarlyExitVPBB by | ||
/// * updating the condition exiting the vector loop to include the early | ||
/// exit conditions | ||
/// * splitting the original middle block to branch to the early exit block | ||
/// if taken. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// if taken. | |
/// conditionally - according to the early exit condition. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated thanks
EarlyExitingVPBB->getTerminator()->eraseFromParent(); | ||
VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Waiting for a commit?
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This LGTM!, thanks, hope also to @david-arm ?
/// Update \p Plan to account for the uncountable early exit block in \p | ||
/// UncountableExitingBlock by | ||
/// Update \p Plan to account for the uncountable early exit from \p | ||
/// EarlyExitingVPBB to \p EarlyExitVPBB by | ||
/// * updating the condition exiting the vector loop to include the early |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// * updating the condition exiting the vector loop to include the early | |
/// * updating the condition exiting the loop via the latch to include the early |
Move early-exit handling up front to original VPlan construction, before
introducing early exits.
This builds on #137709, which
adds exiting edges to the original VPlan, instead of adding exit blocks
later.
This retains the exit conditions early, and means we can handle early
exits before forming regions, without the reliance on VPRecipeBuilder.
Once we retain all exits initially, handling early exits before region
construction ensures the regions are valid; otherwise we would leave
edges exiting the region from elsewhere than the latch.
Removing the reliance on VPRecipeBuilder removes the dependence on
mapping IR BBs to VPBBs and unblocks predication as VPlan transform:
#128420.
Depends on #137709 (included in PR).