Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[SLP][NFC]Extract values state/operands analysis into separate class #138724

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

alexey-bataev
Copy link
Member

Extract values state and operands analysis/building into a separate
class. This class allows to localize instrutions state and operands
building for future support of copyable elements vectorization.

Created using spr 1.3.5
@llvmbot
Copy link
Member

llvmbot commented May 6, 2025

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Alexey Bataev (alexey-bataev)

Changes

Extract values state and operands analysis/building into a separate
class. This class allows to localize instrutions state and operands
building for future support of copyable elements vectorization.


Patch is 26.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138724.diff

1 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+273-146)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a6ae26f2f0e1a..d0602762afc2f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2855,9 +2855,13 @@ class BoUpSLP {
     }
 
     /// Go through the instructions in VL and append their operands.
-    void appendOperandsOfVL(ArrayRef<Value *> VL, const InstructionsState &S) {
-      assert(!VL.empty() && "Bad VL");
-      assert((empty() || VL.size() == getNumLanes()) &&
+    void appendOperands(ArrayRef<Value *> VL, ArrayRef<ValueList> Operands,
+                        const InstructionsState &S) {
+      assert(!Operands.empty() && !VL.empty() && "Bad list of operands");
+      assert((empty() || all_of(Operands,
+                                [this](const ValueList &VL) {
+                                  return VL.size() == getNumLanes();
+                                })) &&
              "Expected same number of lanes");
       assert(S.valid() && "InstructionsState is invalid.");
       // IntrinsicInst::isCommutative returns true if swapping the first "two"
@@ -2866,26 +2870,11 @@ class BoUpSLP {
       Instruction *MainOp = S.getMainOp();
       unsigned NumOperands = MainOp->getNumOperands();
       ArgSize = isa<IntrinsicInst>(MainOp) ? IntrinsicNumOperands : NumOperands;
-      OpsVec.resize(NumOperands);
+      OpsVec.resize(ArgSize);
       unsigned NumLanes = VL.size();
       for (OperandDataVec &Ops : OpsVec)
         Ops.resize(NumLanes);
       for (unsigned Lane : seq<unsigned>(NumLanes)) {
-        Value *V = VL[Lane];
-        assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
-               "Expected instruction or poison value");
-        if (isa<PoisonValue>(V)) {
-          for (unsigned OpIdx : seq<unsigned>(NumOperands))
-            OpsVec[OpIdx][Lane] = {
-                PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true,
-                false};
-          if (auto *EI = dyn_cast<ExtractElementInst>(MainOp)) {
-            OpsVec[0][Lane] = {EI->getVectorOperand(), true, false};
-          } else if (auto *EV = dyn_cast<ExtractValueInst>(MainOp)) {
-            OpsVec[0][Lane] = {EV->getAggregateOperand(), true, false};
-          }
-          continue;
-        }
         // Our tree has just 3 nodes: the root and two operands.
         // It is therefore trivial to get the APO. We only need to check the
         // opcode of V and whether the operand at OpIdx is the LHS or RHS
@@ -2896,11 +2885,17 @@ class BoUpSLP {
         // Since operand reordering is performed on groups of commutative
         // operations or alternating sequences (e.g., +, -), we can safely tell
         // the inverse operations by checking commutativity.
-        auto [SelectedOp, Ops] = convertTo(cast<Instruction>(VL[Lane]), S);
+        auto *I = dyn_cast<Instruction>(VL[Lane]);
+        if (!I && isa<PoisonValue>(VL[Lane])) {
+          for (unsigned OpIdx : seq<unsigned>(NumOperands))
+            OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], true, false};
+          continue;
+        }
+        auto [SelectedOp, Ops] = convertTo(I, S);
         bool IsInverseOperation = !isCommutative(SelectedOp);
-        for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+        for (unsigned OpIdx : seq<unsigned>(ArgSize)) {
           bool APO = (OpIdx == 0) ? false : IsInverseOperation;
-          OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
+          OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], APO, false};
         }
       }
     }
@@ -3006,12 +3001,12 @@ class BoUpSLP {
 
   public:
     /// Initialize with all the operands of the instruction vector \p RootVL.
-    VLOperands(ArrayRef<Value *> RootVL, const InstructionsState &S,
-               const BoUpSLP &R)
+    VLOperands(ArrayRef<Value *> RootVL, ArrayRef<ValueList> Operands,
+               const InstructionsState &S, const BoUpSLP &R)
         : TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
           L(R.LI->getLoopFor(S.getMainOp()->getParent())) {
       // Append all the operands of RootVL.
-      appendOperandsOfVL(RootVL, S);
+      appendOperands(RootVL, Operands, S);
     }
 
     /// \Returns a value vector with the operands across all lanes for the
@@ -3821,12 +3816,6 @@ class BoUpSLP {
     /// Interleaving factor for interleaved loads Vectorize nodes.
     unsigned InterleaveFactor = 0;
 
-  public:
-    /// Returns interleave factor for interleave nodes.
-    unsigned getInterleaveFactor() const { return InterleaveFactor; }
-    /// Sets interleaving factor for the interleaving nodes.
-    void setInterleave(unsigned Factor) { InterleaveFactor = Factor; }
-
     /// Set this bundle's \p OpIdx'th operand to \p OpVL.
     void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
       if (Operands.size() < OpIdx + 1)
@@ -3838,13 +3827,16 @@ class BoUpSLP {
       copy(OpVL, Operands[OpIdx].begin());
     }
 
-    /// Set this bundle's operand from Scalars.
-    void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
-      VLOperands Ops(Scalars, S, R);
-      if (RequireReorder)
-        Ops.reorder();
-      for (unsigned I : seq<unsigned>(S.getMainOp()->getNumOperands()))
-        setOperand(I, Ops.getVL(I));
+  public:
+    /// Returns interleave factor for interleave nodes.
+    unsigned getInterleaveFactor() const { return InterleaveFactor; }
+    /// Sets interleaving factor for the interleaving nodes.
+    void setInterleave(unsigned Factor) { InterleaveFactor = Factor; }
+
+    /// Set this bundle's operands from \p Operands.
+    void setOperands(ArrayRef<ValueList> Operands) {
+      for (unsigned I : seq<unsigned>(Operands.size()))
+        setOperand(I, Operands[I]);
     }
 
     /// Reorders operands of the node to the given mask \p Mask.
@@ -4840,12 +4832,11 @@ class BoUpSLP {
           // where their second (immediate) operand is not added. Since
           // immediates do not affect scheduler behavior this is considered
           // okay.
-          assert(
-              In &&
-              (isa<ExtractValueInst, ExtractElementInst, IntrinsicInst>(In) ||
-               In->getNumOperands() ==
-                   Bundle->getTreeEntry()->getNumOperands()) &&
-              "Missed TreeEntry operands?");
+          assert(In &&
+                 (isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
+                  In->getNumOperands() ==
+                      Bundle->getTreeEntry()->getNumOperands()) &&
+                 "Missed TreeEntry operands?");
 
           for (unsigned OpIdx :
                seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
@@ -9734,6 +9725,190 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
   return true;
 }
 
+namespace {
+/// Class accepts incoming list of values and generates the list of values
+/// for scheduling and list of operands for the new nodes.
+class InstructionsCompatibilityAnalysis {
+  DominatorTree &DT;
+  const DataLayout &DL;
+  const TargetTransformInfo &TTI;
+  const TargetLibraryInfo &TLI;
+
+  /// Builds operands for the original instructions.
+  void
+  buildOriginalOperands(const InstructionsState &S, ArrayRef<Value *> VL,
+                        SmallVectorImpl<BoUpSLP::ValueList> &Operands) const {
+
+    unsigned ShuffleOrOp =
+        S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode();
+    Instruction *VL0 = S.getMainOp();
+
+    switch (ShuffleOrOp) {
+    case Instruction::PHI: {
+      auto *PH = cast<PHINode>(VL0);
+
+      // Keeps the reordered operands to avoid code duplication.
+      PHIHandler Handler(DT, PH, VL);
+      Handler.buildOperands();
+      Operands.assign(PH->getNumOperands(), {});
+      for (unsigned I : seq<unsigned>(PH->getNumOperands()))
+        Operands[I].assign(Handler.getOperands(I).begin(),
+                           Handler.getOperands(I).end());
+      return;
+    }
+    case Instruction::ExtractValue:
+    case Instruction::ExtractElement:
+      // This is a special case, as it does not gather, but at the same time
+      // we are not extending buildTree_rec() towards the operands.
+      Operands.assign(1, {VL.size(), VL0->getOperand(0)});
+      return;
+    case Instruction::InsertElement:
+      Operands.assign(2, {VL.size(), nullptr});
+      for (auto [Idx, V] : enumerate(VL)) {
+        auto *IE = cast<InsertElementInst>(V);
+        for (auto [OpIdx, Ops] : enumerate(Operands))
+          Ops[Idx] = IE->getOperand(OpIdx);
+      }
+      return;
+    case Instruction::Load:
+      Operands.assign(
+          1, {VL.size(),
+              PoisonValue::get(cast<LoadInst>(VL0)->getPointerOperandType())});
+      for (auto [V, Op] : zip(VL, Operands.back())) {
+        auto *LI = dyn_cast<LoadInst>(V);
+        if (!LI)
+          continue;
+        Op = LI->getPointerOperand();
+      }
+      return;
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::SIToFP:
+    case Instruction::UIToFP:
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::BitCast:
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+    case Instruction::Select:
+    case Instruction::FNeg:
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    case Instruction::Freeze:
+    case Instruction::Store:
+    case Instruction::ShuffleVector:
+      Operands.assign(VL0->getNumOperands(), {VL.size(), nullptr});
+      for (auto [Idx, V] : enumerate(VL)) {
+        auto *I = dyn_cast<Instruction>(V);
+        if (!I) {
+          for (auto [OpIdx, Ops] : enumerate(Operands))
+            Ops[Idx] = PoisonValue::get(VL0->getOperand(OpIdx)->getType());
+          continue;
+        }
+        auto [Op, ConvertedOps] = convertTo(I, S);
+        for (auto [OpIdx, Ops] : enumerate(Operands))
+          Ops[Idx] = ConvertedOps[OpIdx];
+      }
+      return;
+    case Instruction::GetElementPtr: {
+      Operands.assign(2, {VL.size(), nullptr});
+      // Need to cast all indices to the same type before vectorization to
+      // avoid crash.
+      // Required to be able to find correct matches between different gather
+      // nodes and reuse the vectorized values rather than trying to gather them
+      // again.
+      const unsigned IndexIdx = 1;
+      Type *VL0Ty = VL0->getOperand(IndexIdx)->getType();
+      Type *Ty = all_of(VL,
+                        [VL0Ty](Value *V) {
+                          auto *GEP = dyn_cast<GetElementPtrInst>(V);
+                          if (!GEP)
+                            return true;
+                          return VL0Ty == GEP->getOperand(IndexIdx)->getType();
+                        })
+                     ? VL0Ty
+                     : DL.getIndexType(cast<GetElementPtrInst>(VL0)
+                                           ->getPointerOperandType()
+                                           ->getScalarType());
+      for (auto [Idx, V] : enumerate(VL)) {
+        auto *GEP = dyn_cast<GetElementPtrInst>(V);
+        if (!GEP) {
+          Operands[0][Idx] = V;
+          Operands[1][Idx] = ConstantInt::getNullValue(Ty);
+          continue;
+        }
+        Operands[0][Idx] = GEP->getPointerOperand();
+        auto *Op = GEP->getOperand(IndexIdx);
+        auto *CI = dyn_cast<ConstantInt>(Op);
+        Operands[1][Idx] = CI ? ConstantFoldIntegerCast(
+                                    CI, Ty, CI->getValue().isSignBitSet(), DL)
+                              : Op;
+      }
+      return;
+    }
+    case Instruction::Call: {
+      auto *CI = cast<CallInst>(VL0);
+      Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, &TLI);
+      for (unsigned Idx : seq<unsigned>(CI->arg_size())) {
+        if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx, &TTI))
+          continue;
+        auto &Ops = Operands.emplace_back();
+        for (Value *V : VL) {
+          auto *I = dyn_cast<Instruction>(V);
+          Ops.push_back(I ? I->getOperand(Idx)
+                          : PoisonValue::get(VL0->getOperand(Idx)->getType()));
+        }
+      }
+      return;
+    }
+    default:
+      break;
+    }
+    llvm_unreachable("Unexpected vectorization of the instructions.");
+  }
+
+public:
+  InstructionsCompatibilityAnalysis(DominatorTree &DT, const DataLayout &DL,
+                                    const TargetTransformInfo &TTI,
+                                    const TargetLibraryInfo &TLI)
+      : DT(DT), DL(DL), TTI(TTI), TLI(TLI) {}
+
+  InstructionsState buildInstructionsState(ArrayRef<Value *> VL) {
+    InstructionsState S = getSameOpcode(VL, TLI);
+    return S;
+  }
+
+  SmallVector<BoUpSLP::ValueList> buildOperands(const InstructionsState &S,
+                                                ArrayRef<Value *> VL) {
+    assert(S && "Invalid state!");
+    SmallVector<BoUpSLP::ValueList> Operands;
+    buildOriginalOperands(S, VL, Operands);
+    return Operands;
+  }
+};
+} // namespace
+
 bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
                                         const EdgeInfo &UserTreeIdx,
                                         InstructionsState &S,
@@ -9741,9 +9916,10 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
                                         bool &TrySplitVectorize) const {
   assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
 
-  S = getSameOpcode(VL, *TLI);
   TryToFindDuplicates = true;
   TrySplitVectorize = false;
+  InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+  S = Analysis.buildInstructionsState(VL);
 
   // Don't go into catchswitch blocks, which can happen with PHIs.
   // Such blocks can only have PHIs and the catchswitch.  There is no
@@ -10118,6 +10294,8 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
       registerNonVectorizableLoads(ArrayRef(VL));
     return;
   }
+  InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+  SmallVector<ValueList> Operands = Analysis.buildOperands(S, VL);
   ScheduleBundle Empty;
   ScheduleBundle &Bundle = BundlePtr.value() ? *BundlePtr.value() : Empty;
   LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
@@ -10142,21 +10320,12 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
   };
   switch (ShuffleOrOp) {
     case Instruction::PHI: {
-      auto *PH = cast<PHINode>(VL0);
-
       TreeEntry *TE =
           newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndices);
       LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (PHINode).\n";
                  TE->dump());
 
-      // Keeps the reordered operands to avoid code duplication.
-      PHIHandler Handler(*DT, PH, VL);
-      Handler.buildOperands();
-      for (unsigned I : seq<unsigned>(PH->getNumOperands()))
-        TE->setOperand(I, Handler.getOperands(I));
-      SmallVector<ArrayRef<Value *>> Operands(PH->getNumOperands());
-      for (unsigned I : seq<unsigned>(PH->getNumOperands()))
-        Operands[I] = Handler.getOperands(I);
+      TE->setOperands(Operands);
       CreateOperandNodes(TE, Operands);
       return;
     }
@@ -10183,7 +10352,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
                  TE->dump());
       // This is a special case, as it does not gather, but at the same time
       // we are not extending buildTreeRec() towards the operands.
-      TE->setOperand(*this);
+      TE->setOperands(Operands);
       return;
     }
     case Instruction::InsertElement: {
@@ -10214,7 +10383,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
       LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (InsertElementInst).\n";
                  TE->dump());
 
-      TE->setOperand(*this);
+      TE->setOperands(Operands);
       buildTreeRec(TE->getOperand(1), Depth + 1, {TE, 1});
       return;
     }
@@ -10269,7 +10438,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
       case TreeEntry::NeedToGather:
         llvm_unreachable("Unexpected loads state.");
       }
-      TE->setOperand(*this);
+      TE->setOperands(Operands);
       if (State == TreeEntry::ScatterVectorize)
         buildTreeRec(PointerOps, Depth + 1, {TE, 0});
       return;
@@ -10310,7 +10479,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
       LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (CastInst).\n";
                  TE->dump());
 
-      TE->setOperand(*this);
+      TE->setOperands(Operands);
       for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
         buildTreeRec(TE->getOperand(I), Depth + 1, {TE, I});
       if (ShuffleOrOp == Instruction::Trunc) {
@@ -10338,37 +10507,28 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
       LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (CmpInst).\n";
                  TE->dump());
 
-      ValueList Left, Right;
-      VLOperands Ops(VL, S, *this);
+      VLOperands Ops(VL, Operands, S, *this);
       if (cast<CmpInst>(VL0)->isCommutative()) {
         // Commutative predicate - collect + sort operands of the instructions
         // so that each side is more likely to have the same opcode.
         assert(P0 == CmpInst::getSwappedPredicate(P0) &&
                "Commutative Predicate mismatch");
         Ops.reorder();
-        Left = Ops.getVL(0);
-        Right = Ops.getVL(1);
+        Operands.front() = Ops.getVL(0);
+        Operands.back() = Ops.getVL(1);
       } else {
         // Collect operands - commute if it uses the swapped predicate.
-        for (Value *V : VL) {
-          if (isa<PoisonValue>(V)) {
-            Left.push_back(PoisonValue::get(VL0->getOperand(0)->getType()));
-            Right.push_back(PoisonValue::get(VL0->getOperand(1)->getType()));
+        for (auto [Idx, V] : enumerate(VL)) {
+          if (isa<PoisonValue>(V))
             continue;
-          }
           auto *Cmp = cast<CmpInst>(V);
-          Value *LHS = Cmp->getOperand(0);
-          Value *RHS = Cmp->getOperand(1);
           if (Cmp->getPredicate() != P0)
-            std::swap(LHS, RHS);
-          Left.push_back(LHS);
-          Right.push_back(RHS);
+            std::swap(Operands.front()[Idx], Operands.back()[Idx]);
         }
       }
-      TE->setOperand(0, Left);
-      TE->setOperand(1, Right);
-      buildTreeRec(Left, Depth + 1, {TE, 0});
-      buildTreeRec(Right, Depth + 1, {TE, 1});
+      TE->setOperands(Operands);
+      buildTreeRec(Operands.front(), Depth + 1, {TE, 0});
+      buildTreeRec(Operands.back(), Depth + 1, {TE, 1});
       if (ShuffleOrOp == Instruction::ICmp) {
         unsigned NumSignBits0 =
             ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT);
@@ -10411,7 +10571,13 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
                     "(SelectInst/UnaryOperator/BinaryOperator/FreezeInst).\n";
           TE->dump());
 
-      TE->setOperand(*this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
+      if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
+        VLOperands Ops(VL, Operands, S, *this);
+        Ops.reorder();
+        Operands[0] = Ops.getVL(0);
+        Operands[1] = Ops.getVL(1);
+      }
+      TE->setOperands(Operands);
       for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
         buildTreeRec(TE->getOperand(I), Depth + 1, {TE, I});
       return;
@@ -10421,52 +10587,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
                                    ReuseShuffleIndices);
       LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (GetElementPtrInst).\n";
                  TE->dump());
-      SmallVector<ValueList, 2> Operands(2);
-      // Prepare the operand vector for pointer operands.
-      for (Value *V : ...
[truncated]

Copy link

github-actions bot commented May 6, 2025

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
View the diff from clang-format here.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ef12adf4b..4044ed7ea 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10318,16 +10318,15 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
       buildTreeRec(Operands[I], Depth + 1, {TE, I});
   };
   switch (ShuffleOrOp) {
-    case Instruction::PHI: {
-      TreeEntry *TE =
-          newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndices);
-      LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (PHINode).\n";
-                 TE->dump());
+  case Instruction::PHI: {
+    TreeEntry *TE =
+        newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndices);
+    LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (PHINode).\n"; TE->dump());
 
-      TE->setOperands(Operands);
-      CreateOperandNodes(TE, Operands);
-      return;
-    }
+    TE->setOperands(Operands);
+    CreateOperandNodes(TE, Operands);
+    return;
+  }
     case Instruction::ExtractValue:
     case Instruction::ExtractElement: {
       if (CurrentOrder.empty()) {

@alexey-bataev
Copy link
Member Author

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
View the diff from clang-format here.

Clang-format is not related to the change itself, but to the original formatting, need to reformat the whole switch statement

@alexey-bataev alexey-bataev requested a review from HanKuanChen May 7, 2025 14:14
Comment on lines 2888 to 2889
auto *I = dyn_cast<Instruction>(VL[Lane]);
if (!I && isa<PoisonValue>(VL[Lane])) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can VL[Lane] have a value other than Instruction or PoisonValue?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently no, in future patches - yes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we keep

        assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
               "Expected instruction or poison value");

and use if (isa<PoisonValue>(VL[Lane])) { here? Also auto [SelectedOp, Ops] = convertTo(cast<Instruction>(VL[Lane]), S);.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why? The logic I introduced here does not brake any assumptions. Added the assertion, that the instruction is expected.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just feel it is weird because the following code is removed; however, it could have been kept.

        assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
               "Expected instruction or poison value");

If we don't introduce any new concept, why do we remove the assert?
And if we keep the original assert, we can use

        if (isa<PoisonValue>(VL[Lane])) {
          for (unsigned OpIdx : seq<unsigned>(NumOperands))
            OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], true, false};
          continue;
        }
        auto [SelectedOp, Ops] = convertTo(cast<Instruction>(VL[Lane]), S);

Created using spr 1.3.5
Created using spr 1.3.5
Created using spr 1.3.5
Created using spr 1.3.5
@alexey-bataev alexey-bataev merged commit 3954e9d into main May 9, 2025
5 of 9 checks passed
@alexey-bataev alexey-bataev deleted the users/alexey-bataev/spr/slpnfcextract-values-stateoperands-analysis-into-separate-class branch May 9, 2025 13:38
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request May 9, 2025
…rate class

Extract values state and operands analysis/building into a separate
class. This class allows to localize instrutions state and operands
building for future support of copyable elements vectorization.

Reviewers: HanKuanChen, RKSimon

Reviewed By: HanKuanChen

Pull Request: llvm/llvm-project#138724
alexey-bataev added a commit that referenced this pull request May 9, 2025
Extract values state and operands analysis/building into a separate
class. This class allows to localize instrutions state and operands
building for future support of copyable elements vectorization.

Recommit after revert 10f5120

Reviewers: HanKuanChen, RKSimon

Reviewed By: HanKuanChen

Pull Request: #138724
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request May 9, 2025
…rate class

Extract values state and operands analysis/building into a separate
class. This class allows to localize instrutions state and operands
building for future support of copyable elements vectorization.

Recommit after revert 10f5120

Reviewers: HanKuanChen, RKSimon

Reviewed By: HanKuanChen

Pull Request: llvm/llvm-project#138724
@asb
Copy link
Contributor

asb commented May 10, 2025

I've just reverted this commit in 6a2a8eb (EDIT: and here's the build going green again after this revert https://lab.llvm.org/buildbot/#/builders/132/builds/1052)

It's causing the RVV CI to fail with a crash compiling oggenc.c from llvm-test-suite. I've reduced the C input to come up with this test case:

reduced oggenc.c:

float *mdct_forward_x;
int mdct_forward_j;
void mdct_butterfly_16(float *x) {
  float r0 = x[6] + 2, r1 = x[6] - x[2], r2 = 4 + x[0], r3 = x[4] - x[0];
  x[4] = r0 - r2;
  r0 = 5 - x[1];
  r2 = 7 - x[3];
  x[0] = r0;
  x[2] = r1 - r0;
  x[3] = r2 + r3;
  x[5] = r1 - r0;
}
void mdct_butterfly_32(float *x) { mdct_butterfly_16(x + 6); }
void mdct_forward() {
  for (;;)
    mdct_butterfly_32(mdct_forward_x + mdct_forward_j);
}

Then to reproduce the crash:

~/llvm-project/build/release/bin/clang --target=riscv64-linux-gnu -march=rv64gcv -O3 -c oggenc.c -fno-strict-aliasing
clang: ../../llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:19512: auto llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *>, BoUpSLP *, const InstructionsState &)::(anonymous class)::operator()(bool, ScheduleBundle &) const: Assertion `Picked->isReady() && "must be ready to schedule"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.	Program arguments: /home/asb/llvm-project/build/release/bin/clang --target=riscv64-linux-gnu -march=rv64gcv -O3 -c oggenc.c -fno-strict-aliasing
1.	<eof> parser at end of file
2.	Optimizer
3.	Running pass "function<eager-inv>(float2int,lower-constant-intrinsics,chr,loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,slp-vectorizer,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>)" on module "oggenc.c"
4.	Running pass "slp-vectorizer" on function "mdct_forward"
 #0 0x000060a794223306 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/asb/llvm-project/build/release/bin/clang+0x8789306)
 #1 0x000060a794220c1e llvm::sys::RunSignalHandlers() (/home/asb/llvm-project/build/release/bin/clang+0x8786c1e)
 #2 0x000060a79418239d CrashRecoverySignalHandler(int) CrashRecoveryContext.cpp:0:0
 #3 0x00007c6e1424c1d0 (/usr/lib/libc.so.6+0x3d1d0)
 #4 0x00007c6e142a5334 (/usr/lib/libc.so.6+0x96334)
 #5 0x00007c6e1424c120 raise (/usr/lib/libc.so.6+0x3d120)
 #6 0x00007c6e142334c3 abort (/usr/lib/libc.so.6+0x244c3)
 #7 0x00007c6e142333df (/usr/lib/libc.so.6+0x243df)
 #8 0x00007c6e14244177 (/usr/lib/libc.so.6+0x35177)
 #9 0x000060a795db0d31 llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP*, (anonymous namespace)::InstructionsState const&)::$_0::operator()(bool, llvm::slpvectorizer::BoUpSLP::ScheduleBundle&) const SLPVectorizer.cpp:0:0
#10 0x000060a795d1b82a llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (/home/asb/llvm-project/build/release/bin/clang+0xa28182a)
#11 0x000060a795d1ad91 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (/home/asb/llvm-project/build/release/bin/clang+0xa280d91)
........
........

Looking at the diff of a log with -mllvm -debug -mllvm -print-after-all I see it starts to diverge in the ordering of some operands listed in printouts (i.e. under Operand 0:).

alexey-bataev added a commit that referenced this pull request May 11, 2025
Extract values state and operands analysis/building into a separate
class. This class allows to localize instrutions state and operands
building for future support of copyable elements vectorization.

Recommit after revert 10f5120

Recommit after revert 6a2a8eb

Reviewers: HanKuanChen, RKSimon

Reviewed By: HanKuanChen

Pull Request: #138724
@alexey-bataev
Copy link
Member Author

I've just reverted this commit in 6a2a8eb (EDIT: and here's the build going green again after this revert https://lab.llvm.org/buildbot/#/builders/132/builds/1052)

It's causing the RVV CI to fail with a crash compiling oggenc.c from llvm-test-suite. I've reduced the C input to come up with this test case:

reduced oggenc.c:

float *mdct_forward_x;
int mdct_forward_j;
void mdct_butterfly_16(float *x) {
  float r0 = x[6] + 2, r1 = x[6] - x[2], r2 = 4 + x[0], r3 = x[4] - x[0];
  x[4] = r0 - r2;
  r0 = 5 - x[1];
  r2 = 7 - x[3];
  x[0] = r0;
  x[2] = r1 - r0;
  x[3] = r2 + r3;
  x[5] = r1 - r0;
}
void mdct_butterfly_32(float *x) { mdct_butterfly_16(x + 6); }
void mdct_forward() {
  for (;;)
    mdct_butterfly_32(mdct_forward_x + mdct_forward_j);
}

Then to reproduce the crash:

~/llvm-project/build/release/bin/clang --target=riscv64-linux-gnu -march=rv64gcv -O3 -c oggenc.c -fno-strict-aliasing
clang: ../../llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:19512: auto llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *>, BoUpSLP *, const InstructionsState &)::(anonymous class)::operator()(bool, ScheduleBundle &) const: Assertion `Picked->isReady() && "must be ready to schedule"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.	Program arguments: /home/asb/llvm-project/build/release/bin/clang --target=riscv64-linux-gnu -march=rv64gcv -O3 -c oggenc.c -fno-strict-aliasing
1.	<eof> parser at end of file
2.	Optimizer
3.	Running pass "function<eager-inv>(float2int,lower-constant-intrinsics,chr,loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,slp-vectorizer,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>)" on module "oggenc.c"
4.	Running pass "slp-vectorizer" on function "mdct_forward"
 #0 0x000060a794223306 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/asb/llvm-project/build/release/bin/clang+0x8789306)
 #1 0x000060a794220c1e llvm::sys::RunSignalHandlers() (/home/asb/llvm-project/build/release/bin/clang+0x8786c1e)
 #2 0x000060a79418239d CrashRecoverySignalHandler(int) CrashRecoveryContext.cpp:0:0
 #3 0x00007c6e1424c1d0 (/usr/lib/libc.so.6+0x3d1d0)
 #4 0x00007c6e142a5334 (/usr/lib/libc.so.6+0x96334)
 #5 0x00007c6e1424c120 raise (/usr/lib/libc.so.6+0x3d120)
 #6 0x00007c6e142334c3 abort (/usr/lib/libc.so.6+0x244c3)
 #7 0x00007c6e142333df (/usr/lib/libc.so.6+0x243df)
 #8 0x00007c6e14244177 (/usr/lib/libc.so.6+0x35177)
 #9 0x000060a795db0d31 llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP*, (anonymous namespace)::InstructionsState const&)::$_0::operator()(bool, llvm::slpvectorizer::BoUpSLP::ScheduleBundle&) const SLPVectorizer.cpp:0:0
#10 0x000060a795d1b82a llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (/home/asb/llvm-project/build/release/bin/clang+0xa28182a)
#11 0x000060a795d1ad91 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (/home/asb/llvm-project/build/release/bin/clang+0xa280d91)
........
........

Looking at the diff of a log with -mllvm -debug -mllvm -print-after-all I see it starts to diverge in the ordering of some operands listed in printouts (i.e. under Operand 0:).

Thanks, fixed

llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request May 11, 2025
…rate class

Extract values state and operands analysis/building into a separate
class. This class allows to localize instrutions state and operands
building for future support of copyable elements vectorization.

Recommit after revert 10f5120

Recommit after revert 6a2a8eb

Reviewers: HanKuanChen, RKSimon

Reviewed By: HanKuanChen

Pull Request: llvm/llvm-project#138724
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants