Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[VPlan] Introduce VPlan-level constant folder #125365

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from

Conversation

artagnon
Copy link
Contributor

@artagnon artagnon commented Feb 1, 2025

Introduce a VPlan-level constant folder in simplifyRecipes that tries to fold a recipe to a constant using TargetFolder.

@llvmbot
Copy link
Member

llvmbot commented Feb 1, 2025

@llvm/pr-subscribers-backend-powerpc
@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Ramkumar Ramachandra (artagnon)

Changes

Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan, and use it in VPBuilder to constant-fold when all the underlying IR values passed into the API are constants.

-- 8< --
Based on #125364.


Patch is 28.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125365.diff

10 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h (+38-22)
  • (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+13-14)
  • (modified) llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h (+7-4)
  • (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+1-1)
  • (added) llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h (+96)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp (+1-1)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+13-12)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+1-1)
  • (modified) llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll (+2-48)
  • (modified) llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (+2-3)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index bc44ec11edb7b0..cfbc075e6dcd7d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -25,7 +25,7 @@
 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
 
 #include "VPlan.h"
-#include "llvm/ADT/SmallSet.h"
+#include "VPlanConstantFolder.h"
 #include "llvm/Support/InstructionCost.h"
 
 namespace llvm {
@@ -45,6 +45,8 @@ class VPRecipeBuilder;
 class VPBuilder {
   VPBasicBlock *BB = nullptr;
   VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
+  VPlan &Plan;
+  VPConstantFolder Folder;
 
   /// Insert \p VPI in BB at InsertPt if BB is set.
   template <typename T> T *tryInsertInstruction(T *R) {
@@ -66,10 +68,15 @@ class VPBuilder {
   }
 
 public:
-  VPBuilder() = default;
-  VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
-  VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
-  VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
+  VPBuilder(VPlan &Plan) : Plan(Plan) {}
+  VPBuilder(VPlan &Plan, VPBasicBlock *InsertBB) : Plan(Plan) {
+    setInsertPoint(InsertBB);
+  }
+  VPBuilder(VPlan &Plan, VPRecipeBase *InsertPt) : Plan(Plan) {
+    setInsertPoint(InsertPt);
+  }
+  VPBuilder(VPlan &Plan, VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
+      : Plan(Plan) {
     setInsertPoint(TheBB, IP);
   }
 
@@ -83,13 +90,6 @@ class VPBuilder {
   VPBasicBlock *getInsertBlock() const { return BB; }
   VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
 
-  /// Create a VPBuilder to insert after \p R.
-  static VPBuilder getToInsertAfter(VPRecipeBase *R) {
-    VPBuilder B;
-    B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
-    return B;
-  }
-
   /// InsertPoint - A saved insertion point.
   class VPInsertPoint {
     VPBasicBlock *Block = nullptr;
@@ -176,17 +176,22 @@ class VPBuilder {
 
   VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
                      const Twine &Name = "") {
+    if (auto *V = Folder.foldNot(Operand))
+      return Plan.getOrAddLiveIn(V);
     return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
   }
 
   VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
                      const Twine &Name = "") {
+    if (auto *V = Folder.foldAnd(LHS, RHS))
+      return Plan.getOrAddLiveIn(V);
     return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
   }
 
   VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
                     const Twine &Name = "") {
-
+    if (auto *V = Folder.foldOr(LHS, RHS))
+      return Plan.getOrAddLiveIn(V);
     return tryInsertInstruction(new VPInstruction(
         Instruction::BinaryOps::Or, {LHS, RHS},
         VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
@@ -194,6 +199,8 @@ class VPBuilder {
 
   VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
                             const Twine &Name = "") {
+    if (auto *V = Folder.foldLogicalAnd(LHS, RHS))
+      return Plan.getOrAddLiveIn(V);
     return tryInsertInstruction(
         new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
   }
@@ -201,6 +208,8 @@ class VPBuilder {
   VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
                         DebugLoc DL = {}, const Twine &Name = "",
                         std::optional<FastMathFlags> FMFs = std::nullopt) {
+    if (auto *V = Folder.foldSelect(Cond, TrueVal, FalseVal))
+      return Plan.getOrAddLiveIn(V);
     auto *Select =
         FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
                                  *FMFs, DL, Name)
@@ -216,17 +225,23 @@ class VPBuilder {
                       DebugLoc DL = {}, const Twine &Name = "") {
     assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
            Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
+    if (auto *V = Folder.foldCmp(Pred, A, B))
+      return Plan.getOrAddLiveIn(V);
     return tryInsertInstruction(
         new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));
   }
 
-  VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
-                              const Twine &Name = "") {
+  VPValue *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
+                        const Twine &Name = "") {
+    if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::none()))
+      return Plan.getOrAddLiveIn(V);
     return tryInsertInstruction(
         new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));
   }
   VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
                                 const Twine &Name = "") {
+    if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::inBounds()))
+      return Plan.getOrAddLiveIn(V);
     return tryInsertInstruction(
         new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
   }
@@ -242,14 +257,18 @@ class VPBuilder {
         new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
   }
 
-  VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
-                                       Type *ResultTy, DebugLoc DL) {
+  VPValue *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
+                            Type *ResultTy, DebugLoc DL) {
+    if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
+      return Plan.getOrAddLiveIn(V);
     return tryInsertInstruction(
         new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));
   }
 
-  VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
-                                     Type *ResultTy) {
+  VPValue *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
+                           Type *ResultTy) {
+    if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
+      return Plan.getOrAddLiveIn(V);
     return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));
   }
 
@@ -390,9 +409,6 @@ class LoopVectorizationPlanner {
   /// Profitable vector factors.
   SmallVector<VectorizationFactor, 8> ProfitableVFs;
 
-  /// A builder used to construct the current plan.
-  VPBuilder Builder;
-
   /// Computes the cost of \p Plan for vectorization factor \p VF.
   ///
   /// The current implementation requires access to the
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 493ce848171211..858eddae56943b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8947,7 +8947,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
   VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
   Header->insert(CanonicalIVPHI, Header->begin());
 
-  VPBuilder Builder(TopRegion->getExitingBasicBlock());
+  VPBuilder Builder(Plan, TopRegion->getExitingBasicBlock());
   // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
   auto *CanonicalIVIncrement = Builder.createOverflowingOp(
       Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL,
@@ -9007,9 +9007,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
   auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
   VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
   VPBuilder VectorPHBuilder(
-      cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
-  VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
-  VPBuilder ScalarPHBuilder(ScalarPH);
+      Plan, cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
+  VPBuilder MiddleBuilder(Plan, MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+  VPBuilder ScalarPHBuilder(Plan, ScalarPH);
   VPValue *OneVPV = Plan.getOrAddLiveIn(
       ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
   for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) {
@@ -9101,7 +9101,7 @@ addUsersInExitBlocks(VPlan &Plan,
     return;
 
   auto *MiddleVPBB = Plan.getMiddleBlock();
-  VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+  VPBuilder B(Plan, MiddleVPBB, MiddleVPBB->getFirstNonPhi());
 
   // Introduce extract for exiting values and update the VPIRInstructions
   // modeling the corresponding LCSSA phis.
@@ -9123,8 +9123,8 @@ static void addExitUsersForFirstOrderRecurrences(
   VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
   auto *ScalarPHVPBB = Plan.getScalarPreheader();
   auto *MiddleVPBB = Plan.getMiddleBlock();
-  VPBuilder ScalarPHBuilder(ScalarPHVPBB);
-  VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+  VPBuilder ScalarPHBuilder(Plan, ScalarPHVPBB);
+  VPBuilder MiddleBuilder(Plan, MiddleVPBB, MiddleVPBB->getFirstNonPhi());
   VPValue *TwoVPV = Plan.getOrAddLiveIn(
       ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2));
 
@@ -9261,8 +9261,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
   bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
   addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
 
-  VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
-                                Builder);
+  VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE);
 
   // ---------------------------------------------------------------------------
   // Pre-construction: record ingredients whose recipes we'll need to further
@@ -9318,7 +9317,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
     // ingredients and fill a new VPBasicBlock.
     if (VPBB != HeaderVPBB)
       VPBB->setName(BB->getName());
-    Builder.setInsertPoint(VPBB);
+    RecipeBuilder.setInsertPoint(VPBB);
 
     if (VPBB == HeaderVPBB)
       RecipeBuilder.createHeaderMask();
@@ -9482,7 +9481,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
   // Sink users of fixed-order recurrence past the recipe defining the previous
   // value and introduce FirstOrderRecurrenceSplice VPInstructions.
   if (!VPlanTransforms::runPass(VPlanTransforms::adjustFixedOrderRecurrences,
-                                *Plan, Builder))
+                                *Plan, RecipeBuilder.getIRBuilder()))
     return nullptr;
 
   if (useActiveLaneMask(Style)) {
@@ -9532,8 +9531,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
 
   // Collect mapping of IR header phis to header phi recipes, to be used in
   // addScalarResumePhis.
-  VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
-                                Builder);
+  VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE);
   for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
     if (isa<VPCanonicalIVPHIRecipe>(&R))
       continue;
@@ -9698,6 +9696,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     }
   }
   VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
+  VPBuilder Builder(*Plan);
   Builder.setInsertPoint(&*LatchVPBB->begin());
   VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
   for (VPRecipeBase &R :
@@ -10205,7 +10204,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
                              m_Specific(VectorTC), m_SpecificInt(0)));
       }))
     return;
-  VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
+  VPBuilder ScalarPHBuilder(MainPlan, MainScalarPH, MainScalarPH->begin());
   ScalarPHBuilder.createNaryOp(
       VPInstruction::ResumePhi,
       {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 87c97d1edd7b6a..c9c3a1abec5283 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -12,7 +12,6 @@
 #include "LoopVectorizationPlanner.h"
 #include "VPlan.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/IR/IRBuilder.h"
 
@@ -65,7 +64,7 @@ class VPRecipeBuilder {
 
   PredicatedScalarEvolution &PSE;
 
-  VPBuilder &Builder;
+  VPBuilder Builder;
 
   /// When we if-convert we need to create edge masks. We have to cache values
   /// so that we don't end up with exponential recursion/IR. Note that
@@ -155,9 +154,13 @@ class VPRecipeBuilder {
                   const TargetTransformInfo *TTI,
                   LoopVectorizationLegality *Legal,
                   LoopVectorizationCostModel &CM,
-                  PredicatedScalarEvolution &PSE, VPBuilder &Builder)
+                  PredicatedScalarEvolution &PSE)
       : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
-        CM(CM), PSE(PSE), Builder(Builder) {}
+        CM(CM), PSE(PSE), Builder(Plan) {}
+
+  void setInsertPoint(VPBasicBlock *VPBB) { Builder.setInsertPoint(VPBB); }
+
+  VPBuilder &getIRBuilder() { return Builder; }
 
   std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
     auto It = ScaledReductionMap.find(ExitInst);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4a1512abe4e48c..5f7a69fd35a088 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -915,7 +915,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
   // of the corresponding compare because they may have ended up with
   // different line numbers and we want to avoid awkward line stepping while
   // debugging. Eg. if the compare has got a line number inside the loop.
-  VPBuilder Builder(MiddleVPBB);
+  VPBuilder Builder(*Plan, MiddleVPBB);
   VPValue *Cmp =
       TailFolded
           ? Plan->getOrAddLiveIn(ConstantInt::getTrue(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
new file mode 100644
index 00000000000000..01e47e92afa5cf
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
@@ -0,0 +1,96 @@
+//===- VPlanConstantFolder.h - ConstantFolder for VPlan -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VPlanValue.h"
+#include "llvm/IR/ConstantFold.h"
+#include "llvm/IR/Constants.h"
+
+namespace llvm {
+class VPConstantFolder {
+private:
+  Constant *getIRConstant(VPValue *V) const {
+    return dyn_cast_or_null<Constant>(V->getUnderlyingValue());
+  }
+
+  Value *foldBinOp(Instruction::BinaryOps Opcode, VPValue *LHS,
+                   VPValue *RHS) const {
+    auto *LC = getIRConstant(LHS);
+    auto *RC = getIRConstant(RHS);
+    if (LC && RC) {
+      if (ConstantExpr::isDesirableBinOp(Opcode))
+        return ConstantExpr::get(Opcode, LC, RC);
+      return ConstantFoldBinaryInstruction(Opcode, LC, RC);
+    }
+    return nullptr;
+  }
+
+public:
+  Value *foldAnd(VPValue *LHS, VPValue *RHS) const {
+    return foldBinOp(Instruction::BinaryOps::And, LHS, RHS);
+  }
+
+  Value *foldOr(VPValue *LHS, VPValue *RHS) const {
+    return foldBinOp(Instruction::BinaryOps::Or, LHS, RHS);
+  }
+
+  Value *foldNot(VPValue *Op) const {
+    auto *C = getIRConstant(Op);
+    if (C)
+      return ConstantExpr::get(Instruction::BinaryOps::Xor, C,
+                               Constant::getAllOnesValue(C->getType()));
+    return nullptr;
+  }
+
+  Value *foldLogicalAnd(VPValue *LHS, VPValue *RHS) const {
+    auto *LC = getIRConstant(LHS);
+    auto *RC = getIRConstant(RHS);
+    if (LC && RC)
+      return ConstantFoldSelectInstruction(
+          LC, RC, ConstantInt::getNullValue(RC->getType()));
+    return nullptr;
+  }
+
+  Value *foldSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) const {
+    auto *CC = getIRConstant(Cond);
+    auto *TV = getIRConstant(TrueVal);
+    auto *FV = getIRConstant(FalseVal);
+    if (CC && TV && FV)
+      return ConstantFoldSelectInstruction(CC, TV, FV);
+    return nullptr;
+  }
+
+  Value *foldCmp(CmpInst::Predicate Pred, VPValue *LHS, VPValue *RHS) const {
+    auto *LC = getIRConstant(LHS);
+    auto *RC = getIRConstant(RHS);
+    if (LC && RC)
+      return ConstantFoldCompareInstruction(Pred, LC, RC);
+    return nullptr;
+  }
+
+  Value *foldPtrAdd(VPValue *Base, VPValue *Offset, GEPNoWrapFlags NW) const {
+    auto *BC = getIRConstant(Base);
+    auto *OC = getIRConstant(Offset);
+    if (BC && OC) {
+      auto &Ctx = BC->getType()->getContext();
+      return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ctx), BC, OC, NW);
+    }
+    return nullptr;
+  }
+
+  Value *foldCast(Instruction::CastOps Opcode, VPValue *Op,
+                  Type *DestTy) const {
+    auto *C = getIRConstant(Op);
+    if (C) {
+      if (ConstantExpr::isDesirableCastOp(Opcode))
+        return ConstantExpr::getCast(Opcode, C, DestTy);
+      return ConstantFoldCastInstruction(Opcode, C, DestTy);
+    }
+    return nullptr;
+  }
+};
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 5a2e5d7cfee48d..a0505e0d1bb8d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -72,7 +72,7 @@ class PlainCFGBuilder {
 
 public:
   PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
-      : TheLoop(Lp), LI(LI), Plan(P) {}
+      : TheLoop(Lp), LI(LI), Plan(P), VPIRBuilder(Plan) {}
 
   /// Build plain CFG for TheLoop  and connects it to Plan's entry.
   void buildPlainCFG();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a1a2cf211abf88..5e0d1efb959030 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -532,8 +532,8 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
                     VPBuilder &Builder) {
   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
   VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
-  VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
-      Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
+  VPValue *BaseIV = Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV,
+                                            Step, "offset.idx");
 
   // Truncate base induction if needed.
   Type *CanonicalIVType = CanonicalIV->getScalarType();
@@ -591,7 +591,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
   using namespace llvm::VPlanPatternMatch;
   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
   bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
-  VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
+  VPBuilder Builder(Plan, HeaderVPBB, HeaderVPBB->getFirstNonPhi());
   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
     auto *PhiR = dyn_cast<VPWidenInductionRecipe>(&Phi);
     if (!PhiR)
@@ -744,7 +744,7 @@ void VPlanTransforms::optimizeInductionExitUsers(
          "predecessor must be the middle block");
 
   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
-  VPBuilder B(Plan.getMiddleBlock()->getTerminator());
+  VPBuilder B(Plan, Plan.getMiddleBlock()->getTerminator());
   for (VPRecipeBase &R : *ExitVPBB) {
     auto *ExitIRI = cast<VPIRInstruction>(&R);
     if (!isa<PHINode>(ExitIRI->getInstruction()))
@@ -1505,7 +1505,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
   // we have to take unrolling into account. Each part needs to start at
   //   Part * VF
   auto *VecPreheader = Plan.getVectorPreheader();
-  VPBuilder Builder(VecPreheader);
+  VPBuilder Builder(Plan, VecPreheader);
 
   // Create the ActiveLaneMask instruction using the correct start values.
   VPValue *TC = Plan.getTripCount();
@@ -1624,7 +1624,8 @@ void VPlanTransforms::addActiveLaneMask(
     LaneMask = addVPLaneMaskPhiA...
[truncated]

@artagnon artagnon requested a review from ayalz February 18, 2025 10:26
@artagnon artagnon changed the title VPlan: implement VPlan-level constant-folding [VPlan] implement VPlan-level constant-folding Feb 21, 2025
@artagnon artagnon changed the title [VPlan] implement VPlan-level constant-folding [VPlan] Implement VPlan-level constant-folding Feb 21, 2025
@artagnon artagnon changed the title [VPlan] Implement VPlan-level constant-folding [VPlan] Implement constant-folding Feb 22, 2025
@artagnon artagnon changed the title [VPlan] Implement constant-folding [VPlan] Introduce VPlanConstantFolder Feb 22, 2025
@artagnon
Copy link
Contributor Author

Okay, this is now fixed to get the Plan on-demand.

@artagnon artagnon requested a review from nikic February 24, 2025 12:06
@artagnon
Copy link
Contributor Author

@nikic I'm not sure how the ConstantExpr creation ties into your effort to remove constant-expressions. Could you kindly have a look?

@nikic
Copy link
Contributor

nikic commented Feb 24, 2025

@nikic I'm not sure how the ConstantExpr creation ties into your effort to remove constant-expressions. Could you kindly have a look?

Please only use APIs defined in https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Analysis/ConstantFolding.h.

I'd consider only extracting the IR values and delegating the actual folding to https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Analysis/TargetFolder.h.

Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I am a bit surprised there's not more to fold in the existing test cases. Any chance we could at least one test covering each fold?

Copy link
Contributor Author

@artagnon artagnon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I am a bit surprised there's not more to fold in the existing test cases. Any chance we could at least one test covering each fold?

I was actually very surprised when I authored the patch and saw so few test changes. Yes, will write tests to cover each fold, along with the other pending items in this review.

@artagnon
Copy link
Contributor Author

Fixed issues, and added test coverage. I'm not really happy with the if (BB) all over the place, but I don't see what alternative we have if new VPValue will introduce a leak.

@artagnon
Copy link
Contributor Author

Gentle ping.

@artagnon
Copy link
Contributor Author

artagnon commented Apr 2, 2025

Thanks for the review: I've now moved the logic to simplifyRecipe, leaving a couple of TODOs. If this is the desired approach, we can improve upon it in follow-ups.

@artagnon
Copy link
Contributor Author

Thanks for the review. The new design is looking good.

@artagnon artagnon requested a review from huntergr-arm April 22, 2025 16:40
Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect this will fix #125278 by folding away the redundant casts?

@artagnon
Copy link
Contributor Author

I suspect this will fix #125278 by folding away the redundant casts?

It doesn't, unfortunately.

@artagnon
Copy link
Contributor Author

artagnon commented May 3, 2025

Gentle ping.

@artagnon artagnon force-pushed the vplan-constfold branch from e13db19 to d3a5271 Compare May 3, 2025 14:29
Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the latest updates.

It looks like some checks related to pointers may be missing, I am seeing a crash with the following IR

; opt -p loop-vectorize

target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx15.0.0"

@postscale = external constant [64 x float]

define void @test(ptr %data) {
entry:
  br label %for.body

for.body:                                         ; preds = %for.body, %entry
  %indvars.iv78 = phi i64 [ %indvars.iv.next79, %for.body ], [ 0, %entry ]
  %0 = or disjoint i64 %indvars.iv78, 1
  %arrayidx142 = getelementptr [64 x float], ptr @postscale, i64 0, i64 %0
  %1 = load float, ptr %arrayidx142, align 4, !tbaa !0
  %2 = tail call i64 @llvm.lrint.i64.f32(float %1)
  %conv145 = trunc i64 %2 to i16
  store i16 %conv145, ptr %data, align 2, !tbaa !4
  %indvars.iv.next79 = add i64 %indvars.iv78, 1
  %exitcond.not = icmp eq i64 %indvars.iv.next79, 8
  br i1 %exitcond.not, label %for.end, label %for.body

for.end:                                          ; preds = %for.body
  ret void
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.lrint.i64.f32(float) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

!0 = !{!1, !1, i64 0}
!1 = !{!"float", !2, i64 0}
!2 = !{!"omnipotent char", !3, i64 0}
!3 = !{!"Simple C/C++ TBAA"}
!4 = !{!5, !5, i64 0}
!5 = !{!"short", !2, i64 0}

artagnon added 6 commits May 6, 2025 14:47
Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan,
and use it in VPBuilder to constant-fold when all the underlying IR
values passed into the API are constants.
@artagnon artagnon force-pushed the vplan-constfold branch from d3a5271 to aedd1bb Compare May 6, 2025 13:51
@artagnon
Copy link
Contributor Author

artagnon commented May 6, 2025

It looks like some checks related to pointers may be missing, I am seeing a crash with the following IR

Thanks, this was actually a bug in GEP type inference. I've now added your test.

Comment on lines -2731 to +2739
Instruction *ScalarPredInst =
cast<Instruction>(State.get(getOperand(0), *State.Lane));
Value *ScalarPred = State.get(getOperand(0), *State.Lane);
Instruction *ScalarPredInst = dyn_cast<Instruction>(ScalarPred);
if (!ScalarPredInst) {
if (State.hasScalarValue(this, *State.Lane))
State.reset(this, ScalarPred, *State.Lane);
else
State.set(this, ScalarPred, *State.Lane);
return;
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, it is impossible to cook up a test that will facilitate sending this change independently: the short-sighted handling is only exposed by the constant-folding patch.

@artagnon artagnon changed the title [VPlan] Introduce VPlanConstantFolder [VPlan] Introduce VPlan-level constant folder May 7, 2025
Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the latest updates!

ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
Plan.getEntry());
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
simplifyRecipe(R, TypeInfo);
simplifyRecipe(R, TypeInfo, DL);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to retrieve DL from Plan, by using the scalar header IR BB, which is always available, something like Plan.getScalarHeader()->getIRBasicBlock()->getDataLayout()?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants