-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[VPlan] Introduce VPlan-level constant folder #125365
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesIntroduce VPlanConstantFolder, a variation of ConstantFolder for VPlan, and use it in VPBuilder to constant-fold when all the underlying IR values passed into the API are constants. -- 8< -- Patch is 28.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125365.diff 10 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index bc44ec11edb7b0..cfbc075e6dcd7d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -25,7 +25,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
#include "VPlan.h"
-#include "llvm/ADT/SmallSet.h"
+#include "VPlanConstantFolder.h"
#include "llvm/Support/InstructionCost.h"
namespace llvm {
@@ -45,6 +45,8 @@ class VPRecipeBuilder;
class VPBuilder {
VPBasicBlock *BB = nullptr;
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
+ VPlan &Plan;
+ VPConstantFolder Folder;
/// Insert \p VPI in BB at InsertPt if BB is set.
template <typename T> T *tryInsertInstruction(T *R) {
@@ -66,10 +68,15 @@ class VPBuilder {
}
public:
- VPBuilder() = default;
- VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
- VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
- VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
+ VPBuilder(VPlan &Plan) : Plan(Plan) {}
+ VPBuilder(VPlan &Plan, VPBasicBlock *InsertBB) : Plan(Plan) {
+ setInsertPoint(InsertBB);
+ }
+ VPBuilder(VPlan &Plan, VPRecipeBase *InsertPt) : Plan(Plan) {
+ setInsertPoint(InsertPt);
+ }
+ VPBuilder(VPlan &Plan, VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
+ : Plan(Plan) {
setInsertPoint(TheBB, IP);
}
@@ -83,13 +90,6 @@ class VPBuilder {
VPBasicBlock *getInsertBlock() const { return BB; }
VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
- /// Create a VPBuilder to insert after \p R.
- static VPBuilder getToInsertAfter(VPRecipeBase *R) {
- VPBuilder B;
- B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
- return B;
- }
-
/// InsertPoint - A saved insertion point.
class VPInsertPoint {
VPBasicBlock *Block = nullptr;
@@ -176,17 +176,22 @@ class VPBuilder {
VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldNot(Operand))
+ return Plan.getOrAddLiveIn(V);
return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
}
VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldAnd(LHS, RHS))
+ return Plan.getOrAddLiveIn(V);
return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
}
VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
-
+ if (auto *V = Folder.foldOr(LHS, RHS))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(new VPInstruction(
Instruction::BinaryOps::Or, {LHS, RHS},
VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
@@ -194,6 +199,8 @@ class VPBuilder {
VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldLogicalAnd(LHS, RHS))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
}
@@ -201,6 +208,8 @@ class VPBuilder {
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
DebugLoc DL = {}, const Twine &Name = "",
std::optional<FastMathFlags> FMFs = std::nullopt) {
+ if (auto *V = Folder.foldSelect(Cond, TrueVal, FalseVal))
+ return Plan.getOrAddLiveIn(V);
auto *Select =
FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
*FMFs, DL, Name)
@@ -216,17 +225,23 @@ class VPBuilder {
DebugLoc DL = {}, const Twine &Name = "") {
assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
+ if (auto *V = Folder.foldCmp(Pred, A, B))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));
}
- VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
- const Twine &Name = "") {
+ VPValue *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
+ const Twine &Name = "") {
+ if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::none()))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));
}
VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::inBounds()))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
}
@@ -242,14 +257,18 @@ class VPBuilder {
new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
}
- VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
- Type *ResultTy, DebugLoc DL) {
+ VPValue *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *ResultTy, DebugLoc DL) {
+ if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(
new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));
}
- VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
- Type *ResultTy) {
+ VPValue *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *ResultTy) {
+ if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));
}
@@ -390,9 +409,6 @@ class LoopVectorizationPlanner {
/// Profitable vector factors.
SmallVector<VectorizationFactor, 8> ProfitableVFs;
- /// A builder used to construct the current plan.
- VPBuilder Builder;
-
/// Computes the cost of \p Plan for vectorization factor \p VF.
///
/// The current implementation requires access to the
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 493ce848171211..858eddae56943b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8947,7 +8947,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
Header->insert(CanonicalIVPHI, Header->begin());
- VPBuilder Builder(TopRegion->getExitingBasicBlock());
+ VPBuilder Builder(Plan, TopRegion->getExitingBasicBlock());
// Add a VPInstruction to increment the scalar canonical IV by VF * UF.
auto *CanonicalIVIncrement = Builder.createOverflowingOp(
Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL,
@@ -9007,9 +9007,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
VPBuilder VectorPHBuilder(
- cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
- VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
- VPBuilder ScalarPHBuilder(ScalarPH);
+ Plan, cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
+ VPBuilder MiddleBuilder(Plan, MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPBuilder ScalarPHBuilder(Plan, ScalarPH);
VPValue *OneVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) {
@@ -9101,7 +9101,7 @@ addUsersInExitBlocks(VPlan &Plan,
return;
auto *MiddleVPBB = Plan.getMiddleBlock();
- VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPBuilder B(Plan, MiddleVPBB, MiddleVPBB->getFirstNonPhi());
// Introduce extract for exiting values and update the VPIRInstructions
// modeling the corresponding LCSSA phis.
@@ -9123,8 +9123,8 @@ static void addExitUsersForFirstOrderRecurrences(
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
auto *ScalarPHVPBB = Plan.getScalarPreheader();
auto *MiddleVPBB = Plan.getMiddleBlock();
- VPBuilder ScalarPHBuilder(ScalarPHVPBB);
- VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPBuilder ScalarPHBuilder(Plan, ScalarPHVPBB);
+ VPBuilder MiddleBuilder(Plan, MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPValue *TwoVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2));
@@ -9261,8 +9261,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
- VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
- Builder);
+ VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE);
// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
@@ -9318,7 +9317,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// ingredients and fill a new VPBasicBlock.
if (VPBB != HeaderVPBB)
VPBB->setName(BB->getName());
- Builder.setInsertPoint(VPBB);
+ RecipeBuilder.setInsertPoint(VPBB);
if (VPBB == HeaderVPBB)
RecipeBuilder.createHeaderMask();
@@ -9482,7 +9481,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// Sink users of fixed-order recurrence past the recipe defining the previous
// value and introduce FirstOrderRecurrenceSplice VPInstructions.
if (!VPlanTransforms::runPass(VPlanTransforms::adjustFixedOrderRecurrences,
- *Plan, Builder))
+ *Plan, RecipeBuilder.getIRBuilder()))
return nullptr;
if (useActiveLaneMask(Style)) {
@@ -9532,8 +9531,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// Collect mapping of IR header phis to header phi recipes, to be used in
// addScalarResumePhis.
- VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
- Builder);
+ VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE);
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
@@ -9698,6 +9696,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
}
}
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
+ VPBuilder Builder(*Plan);
Builder.setInsertPoint(&*LatchVPBB->begin());
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
for (VPRecipeBase &R :
@@ -10205,7 +10204,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
m_Specific(VectorTC), m_SpecificInt(0)));
}))
return;
- VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
+ VPBuilder ScalarPHBuilder(MainPlan, MainScalarPH, MainScalarPH->begin());
ScalarPHBuilder.createNaryOp(
VPInstruction::ResumePhi,
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 87c97d1edd7b6a..c9c3a1abec5283 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -12,7 +12,6 @@
#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerUnion.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/IRBuilder.h"
@@ -65,7 +64,7 @@ class VPRecipeBuilder {
PredicatedScalarEvolution &PSE;
- VPBuilder &Builder;
+ VPBuilder Builder;
/// When we if-convert we need to create edge masks. We have to cache values
/// so that we don't end up with exponential recursion/IR. Note that
@@ -155,9 +154,13 @@ class VPRecipeBuilder {
const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM,
- PredicatedScalarEvolution &PSE, VPBuilder &Builder)
+ PredicatedScalarEvolution &PSE)
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
- CM(CM), PSE(PSE), Builder(Builder) {}
+ CM(CM), PSE(PSE), Builder(Plan) {}
+
+ void setInsertPoint(VPBasicBlock *VPBB) { Builder.setInsertPoint(VPBB); }
+
+ VPBuilder &getIRBuilder() { return Builder; }
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
auto It = ScaledReductionMap.find(ExitInst);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4a1512abe4e48c..5f7a69fd35a088 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -915,7 +915,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
// of the corresponding compare because they may have ended up with
// different line numbers and we want to avoid awkward line stepping while
// debugging. Eg. if the compare has got a line number inside the loop.
- VPBuilder Builder(MiddleVPBB);
+ VPBuilder Builder(*Plan, MiddleVPBB);
VPValue *Cmp =
TailFolded
? Plan->getOrAddLiveIn(ConstantInt::getTrue(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
new file mode 100644
index 00000000000000..01e47e92afa5cf
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
@@ -0,0 +1,96 @@
+//===- VPlanConstantFolder.h - ConstantFolder for VPlan -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VPlanValue.h"
+#include "llvm/IR/ConstantFold.h"
+#include "llvm/IR/Constants.h"
+
+namespace llvm {
+class VPConstantFolder {
+private:
+ Constant *getIRConstant(VPValue *V) const {
+ return dyn_cast_or_null<Constant>(V->getUnderlyingValue());
+ }
+
+ Value *foldBinOp(Instruction::BinaryOps Opcode, VPValue *LHS,
+ VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC) {
+ if (ConstantExpr::isDesirableBinOp(Opcode))
+ return ConstantExpr::get(Opcode, LC, RC);
+ return ConstantFoldBinaryInstruction(Opcode, LC, RC);
+ }
+ return nullptr;
+ }
+
+public:
+ Value *foldAnd(VPValue *LHS, VPValue *RHS) const {
+ return foldBinOp(Instruction::BinaryOps::And, LHS, RHS);
+ }
+
+ Value *foldOr(VPValue *LHS, VPValue *RHS) const {
+ return foldBinOp(Instruction::BinaryOps::Or, LHS, RHS);
+ }
+
+ Value *foldNot(VPValue *Op) const {
+ auto *C = getIRConstant(Op);
+ if (C)
+ return ConstantExpr::get(Instruction::BinaryOps::Xor, C,
+ Constant::getAllOnesValue(C->getType()));
+ return nullptr;
+ }
+
+ Value *foldLogicalAnd(VPValue *LHS, VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC)
+ return ConstantFoldSelectInstruction(
+ LC, RC, ConstantInt::getNullValue(RC->getType()));
+ return nullptr;
+ }
+
+ Value *foldSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) const {
+ auto *CC = getIRConstant(Cond);
+ auto *TV = getIRConstant(TrueVal);
+ auto *FV = getIRConstant(FalseVal);
+ if (CC && TV && FV)
+ return ConstantFoldSelectInstruction(CC, TV, FV);
+ return nullptr;
+ }
+
+ Value *foldCmp(CmpInst::Predicate Pred, VPValue *LHS, VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC)
+ return ConstantFoldCompareInstruction(Pred, LC, RC);
+ return nullptr;
+ }
+
+ Value *foldPtrAdd(VPValue *Base, VPValue *Offset, GEPNoWrapFlags NW) const {
+ auto *BC = getIRConstant(Base);
+ auto *OC = getIRConstant(Offset);
+ if (BC && OC) {
+ auto &Ctx = BC->getType()->getContext();
+ return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ctx), BC, OC, NW);
+ }
+ return nullptr;
+ }
+
+ Value *foldCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *DestTy) const {
+ auto *C = getIRConstant(Op);
+ if (C) {
+ if (ConstantExpr::isDesirableCastOp(Opcode))
+ return ConstantExpr::getCast(Opcode, C, DestTy);
+ return ConstantFoldCastInstruction(Opcode, C, DestTy);
+ }
+ return nullptr;
+ }
+};
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 5a2e5d7cfee48d..a0505e0d1bb8d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -72,7 +72,7 @@ class PlainCFGBuilder {
public:
PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
- : TheLoop(Lp), LI(LI), Plan(P) {}
+ : TheLoop(Lp), LI(LI), Plan(P), VPIRBuilder(Plan) {}
/// Build plain CFG for TheLoop and connects it to Plan's entry.
void buildPlainCFG();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a1a2cf211abf88..5e0d1efb959030 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -532,8 +532,8 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
VPBuilder &Builder) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
- VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
- Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
+ VPValue *BaseIV = Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV,
+ Step, "offset.idx");
// Truncate base induction if needed.
Type *CanonicalIVType = CanonicalIV->getScalarType();
@@ -591,7 +591,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
- VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
+ VPBuilder Builder(Plan, HeaderVPBB, HeaderVPBB->getFirstNonPhi());
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *PhiR = dyn_cast<VPWidenInductionRecipe>(&Phi);
if (!PhiR)
@@ -744,7 +744,7 @@ void VPlanTransforms::optimizeInductionExitUsers(
"predecessor must be the middle block");
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
- VPBuilder B(Plan.getMiddleBlock()->getTerminator());
+ VPBuilder B(Plan, Plan.getMiddleBlock()->getTerminator());
for (VPRecipeBase &R : *ExitVPBB) {
auto *ExitIRI = cast<VPIRInstruction>(&R);
if (!isa<PHINode>(ExitIRI->getInstruction()))
@@ -1505,7 +1505,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
// we have to take unrolling into account. Each part needs to start at
// Part * VF
auto *VecPreheader = Plan.getVectorPreheader();
- VPBuilder Builder(VecPreheader);
+ VPBuilder Builder(Plan, VecPreheader);
// Create the ActiveLaneMask instruction using the correct start values.
VPValue *TC = Plan.getTripCount();
@@ -1624,7 +1624,8 @@ void VPlanTransforms::addActiveLaneMask(
LaneMask = addVPLaneMaskPhiA...
[truncated]
|
c06b65b
to
2df7409
Compare
2df7409
to
cc98b9e
Compare
Okay, this is now fixed to get the Plan on-demand. |
@nikic I'm not sure how the ConstantExpr creation ties into your effort to remove constant-expressions. Could you kindly have a look? |
Please only use APIs defined in https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Analysis/ConstantFolding.h. I'd consider only extracting the IR values and delegating the actual folding to https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Analysis/TargetFolder.h. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, I am a bit surprised there's not more to fold in the existing test cases. Any chance we could at least one test covering each fold?
llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, I am a bit surprised there's not more to fold in the existing test cases. Any chance we could at least one test covering each fold?
I was actually very surprised when I authored the patch and saw so few test changes. Yes, will write tests to cover each fold, along with the other pending items in this review.
llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
Show resolved
Hide resolved
237087d
to
2f96400
Compare
Fixed issues, and added test coverage. I'm not really happy with the |
Gentle ping. |
2f96400
to
3447419
Compare
3447419
to
0b7d703
Compare
0b7d703
to
241c8a6
Compare
Thanks for the review: I've now moved the logic to simplifyRecipe, leaving a couple of TODOs. If this is the desired approach, we can improve upon it in follow-ups. |
241c8a6
to
a1b8383
Compare
Thanks for the review. The new design is looking good. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect this will fix #125278 by folding away the redundant casts?
a1b8383
to
d2ddfec
Compare
It doesn't, unfortunately. |
Gentle ping. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the latest updates.
It looks like some checks related to pointers may be missing, I am seeing a crash with the following IR
; opt -p loop-vectorize
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx15.0.0"
@postscale = external constant [64 x float]
define void @test(ptr %data) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv78 = phi i64 [ %indvars.iv.next79, %for.body ], [ 0, %entry ]
%0 = or disjoint i64 %indvars.iv78, 1
%arrayidx142 = getelementptr [64 x float], ptr @postscale, i64 0, i64 %0
%1 = load float, ptr %arrayidx142, align 4, !tbaa !0
%2 = tail call i64 @llvm.lrint.i64.f32(float %1)
%conv145 = trunc i64 %2 to i16
store i16 %conv145, ptr %data, align 2, !tbaa !4
%indvars.iv.next79 = add i64 %indvars.iv78, 1
%exitcond.not = icmp eq i64 %indvars.iv.next79, 8
br i1 %exitcond.not, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.lrint.i64.f32(float) #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
!0 = !{!1, !1, i64 0}
!1 = !{!"float", !2, i64 0}
!2 = !{!"omnipotent char", !3, i64 0}
!3 = !{!"Simple C/C++ TBAA"}
!4 = !{!5, !5, i64 0}
!5 = !{!"short", !2, i64 0}
Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan, and use it in VPBuilder to constant-fold when all the underlying IR values passed into the API are constants.
Thanks, this was actually a bug in GEP type inference. I've now added your test. |
Instruction *ScalarPredInst = | ||
cast<Instruction>(State.get(getOperand(0), *State.Lane)); | ||
Value *ScalarPred = State.get(getOperand(0), *State.Lane); | ||
Instruction *ScalarPredInst = dyn_cast<Instruction>(ScalarPred); | ||
if (!ScalarPredInst) { | ||
if (State.hasScalarValue(this, *State.Lane)) | ||
State.reset(this, ScalarPred, *State.Lane); | ||
else | ||
State.set(this, ScalarPred, *State.Lane); | ||
return; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately, it is impossible to cook up a test that will facilitate sending this change independently: the short-sighted handling is only exposed by the constant-folding patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the latest updates!
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( | ||
Plan.getEntry()); | ||
VPTypeAnalysis TypeInfo(&CanonicalIVTy); | ||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { | ||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||
simplifyRecipe(R, TypeInfo); | ||
simplifyRecipe(R, TypeInfo, DL); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible to retrieve DL from Plan
, by using the scalar header IR BB, which is always available, something like Plan.getScalarHeader()->getIRBasicBlock()->getDataLayout()
?
Introduce a VPlan-level constant folder in simplifyRecipes that tries to fold a recipe to a constant using TargetFolder.