Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Conversation

topperc
Copy link
Collaborator

@topperc topperc commented Sep 13, 2025

The PowerPC changes are caused by shifts created by different IR operations being CSEd now. This allows consecutive loads to be turned into vectors earlier. This has effects on the ordering of other combines and legalizations. This leads to some improvements and some regressions.

The PowerPC changes are caused by shifts created by different IR
operations being CSEd now. This allows consecutive loads to be
turned into vectors earlier. This has effects on the ordering of
other combines and legalizations. This leads to some improvements
and some regressions.
@llvmbot
Copy link
Member

llvmbot commented Sep 13, 2025

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-powerpc

Author: Craig Topper (topperc)

Changes

The PowerPC changes are caused by shifts created by different IR operations being CSEd now. This allows consecutive loads to be turned into vectors earlier. This has effects on the ordering of other combines and legalizations. This leads to some improvements and some regressions.


Full diff: https://github.com/llvm/llvm-project/pull/158400.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+11-16)
  • (modified) llvm/test/CodeGen/PowerPC/build-vector-tests.ll (+16-22)
  • (modified) llvm/test/CodeGen/PowerPC/mma-intrinsics.ll (+16-16)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 430e47451fd49..299acf6b1c080 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -223,10 +223,9 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
           std::swap(Lo, Hi);
         EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
-        Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
-                         DAG.getConstant(Lo.getValueSizeInBits(), DL,
-                                         TLI.getShiftAmountTy(
-                                             TotalVT, DAG.getDataLayout())));
+        Hi = DAG.getNode(
+            ISD::SHL, DL, TotalVT, Hi,
+            DAG.getShiftAmountConstant(Lo.getValueSizeInBits(), TotalVT, DL));
         Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
         Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
       }
@@ -4469,9 +4468,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
         if (ElementMul != 1) {
           if (ElementMul.isPowerOf2()) {
             unsigned Amt = ElementMul.logBase2();
-            IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN,
-                               DAG.getConstant(Amt, dl, IdxN.getValueType()),
-                               ScaleFlags);
+            IdxN = DAG.getNode(
+                ISD::SHL, dl, N.getValueType(), IdxN,
+                DAG.getShiftAmountConstant(Amt, N.getValueType(), dl),
+                ScaleFlags);
           } else {
             SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
                                             IdxN.getValueType());
@@ -5460,10 +5460,8 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
                            const TargetLowering &TLI, const SDLoc &dl) {
   SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
                            DAG.getConstant(0x7f800000, dl, MVT::i32));
-  SDValue t1 = DAG.getNode(
-      ISD::SRL, dl, MVT::i32, t0,
-      DAG.getConstant(23, dl,
-                      TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
+  SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+                           DAG.getShiftAmountConstant(23, MVT::i32, dl));
   SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
                            DAG.getConstant(127, dl, MVT::i32));
   return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
@@ -5488,11 +5486,8 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
   SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
 
   //   IntegerPartOfX <<= 23;
-  IntegerPartOfX =
-      DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-                  DAG.getConstant(23, dl,
-                                  DAG.getTargetLoweringInfo().getShiftAmountTy(
-                                      MVT::i32, DAG.getDataLayout())));
+  IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                               DAG.getShiftAmountConstant(23, MVT::i32, dl));
 
   SDValue TwoToFractionalPartOfX;
   if (LimitFloatPrecision <= 6) {
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index 9dd0fbe4474b1..fb55511162a7e 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -1036,12 +1036,8 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    sldi r4, r4, 2
 ; P9LE-NEXT:    add r3, r3, r4
-; P9LE-NEXT:    li r4, -12
-; P9LE-NEXT:    lxvx v2, r3, r4
-; P9LE-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
-; P9LE-NEXT:    addi r3, r3, .LCPI9_0@toc@l
-; P9LE-NEXT:    lxv vs0, 0(r3)
-; P9LE-NEXT:    xxperm v2, v2, vs0
+; P9LE-NEXT:    addi r3, r3, -12
+; P9LE-NEXT:    lxvw4x v2, 0, r3
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromDiffMemVarDi:
@@ -1058,15 +1054,16 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
 ;
 ; P8LE-LABEL: fromDiffMemVarDi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r5, r2, .LCPI9_0@toc@ha
 ; P8LE-NEXT:    sldi r4, r4, 2
-; P8LE-NEXT:    addi r5, r5, .LCPI9_0@toc@l
 ; P8LE-NEXT:    add r3, r3, r4
-; P8LE-NEXT:    lxvd2x vs0, 0, r5
 ; P8LE-NEXT:    addi r3, r3, -12
-; P8LE-NEXT:    lxvd2x v3, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI9_0@toc@l
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    vperm v2, v3, v3, v2
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
@@ -2524,12 +2521,8 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    sldi r4, r4, 2
 ; P9LE-NEXT:    add r3, r3, r4
-; P9LE-NEXT:    li r4, -12
-; P9LE-NEXT:    lxvx v2, r3, r4
-; P9LE-NEXT:    addis r3, r2, .LCPI41_0@toc@ha
-; P9LE-NEXT:    addi r3, r3, .LCPI41_0@toc@l
-; P9LE-NEXT:    lxv vs0, 0(r3)
-; P9LE-NEXT:    xxperm v2, v2, vs0
+; P9LE-NEXT:    addi r3, r3, -12
+; P9LE-NEXT:    lxvw4x v2, 0, r3
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromDiffMemVarDui:
@@ -2546,15 +2539,16 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
 ;
 ; P8LE-LABEL: fromDiffMemVarDui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r5, r2, .LCPI41_0@toc@ha
 ; P8LE-NEXT:    sldi r4, r4, 2
-; P8LE-NEXT:    addi r5, r5, .LCPI41_0@toc@l
 ; P8LE-NEXT:    add r3, r3, r4
-; P8LE-NEXT:    lxvd2x vs0, 0, r5
 ; P8LE-NEXT:    addi r3, r3, -12
-; P8LE-NEXT:    lxvd2x v3, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addis r3, r2, .LCPI41_0@toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI41_0@toc@l
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    vperm v2, v3, v3, v2
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index 9d5e5b2210c07..8fbc9d785796d 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -394,18 +394,18 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
 ; CHECK-NEXT:    xxsetaccz acc2
 ; CHECK-NEXT:    xxsetaccz acc1
 ; CHECK-NEXT:    addi r6, r6, 6
+; CHECK-NEXT:    add r8, r5, r7
 ; CHECK-NEXT:    lxvx vs0, r5, r7
-; CHECK-NEXT:    add r7, r5, r7
-; CHECK-NEXT:    lxv vs1, 16(r7)
+; CHECK-NEXT:    rldic r7, r4, 6, 26
+; CHECK-NEXT:    addi r4, r4, 3
+; CHECK-NEXT:    lxv vs1, 16(r8)
 ; CHECK-NEXT:    xvf32gerpp acc2, vs0, vs1
-; CHECK-NEXT:    lxv vs0, 32(r7)
-; CHECK-NEXT:    lxv vs1, 48(r7)
+; CHECK-NEXT:    lxv vs0, 32(r8)
+; CHECK-NEXT:    lxv vs1, 48(r8)
 ; CHECK-NEXT:    xvf32gerpn acc1, vs0, vs1
-; CHECK-NEXT:    lxv vs12, 64(r7)
-; CHECK-NEXT:    lxv vs13, 80(r7)
+; CHECK-NEXT:    lxv vs12, 64(r8)
+; CHECK-NEXT:    lxv vs13, 80(r8)
 ; CHECK-NEXT:    xxsetaccz acc0
-; CHECK-NEXT:    rldic r7, r4, 6, 26
-; CHECK-NEXT:    addi r4, r4, 3
 ; CHECK-NEXT:    add r8, r3, r7
 ; CHECK-NEXT:    xxmfacc acc2
 ; CHECK-NEXT:    xvf32gernp acc0, vs12, vs13
@@ -443,18 +443,18 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
 ; CHECK-BE-NEXT:    xxsetaccz acc2
 ; CHECK-BE-NEXT:    xxsetaccz acc1
 ; CHECK-BE-NEXT:    addi r6, r6, 6
+; CHECK-BE-NEXT:    add r8, r5, r7
 ; CHECK-BE-NEXT:    lxvx vs0, r5, r7
-; CHECK-BE-NEXT:    add r7, r5, r7
-; CHECK-BE-NEXT:    lxv vs1, 16(r7)
+; CHECK-BE-NEXT:    rldic r7, r4, 6, 26
+; CHECK-BE-NEXT:    addi r4, r4, 3
+; CHECK-BE-NEXT:    lxv vs1, 16(r8)
 ; CHECK-BE-NEXT:    xvf32gerpp acc2, vs0, vs1
-; CHECK-BE-NEXT:    lxv vs0, 32(r7)
-; CHECK-BE-NEXT:    lxv vs1, 48(r7)
+; CHECK-BE-NEXT:    lxv vs0, 32(r8)
+; CHECK-BE-NEXT:    lxv vs1, 48(r8)
 ; CHECK-BE-NEXT:    xvf32gerpn acc1, vs0, vs1
-; CHECK-BE-NEXT:    lxv vs12, 64(r7)
-; CHECK-BE-NEXT:    lxv vs13, 80(r7)
+; CHECK-BE-NEXT:    lxv vs12, 64(r8)
+; CHECK-BE-NEXT:    lxv vs13, 80(r8)
 ; CHECK-BE-NEXT:    xxsetaccz acc0
-; CHECK-BE-NEXT:    rldic r7, r4, 6, 26
-; CHECK-BE-NEXT:    addi r4, r4, 3
 ; CHECK-BE-NEXT:    add r8, r3, r7
 ; CHECK-BE-NEXT:    xxmfacc acc2
 ; CHECK-BE-NEXT:    xvf32gernp acc0, vs12, vs13

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:PowerPC llvm:SelectionDAG SelectionDAGISel as well
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants