-
Notifications
You must be signed in to change notification settings - Fork 15k
[SelectionDAGBuilder][PPC] Use getShiftAmountConstant. #158400
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
The PowerPC changes are caused by shifts created by different IR operations being CSEd now. This allows consecutive loads to be turned into vectors earlier. This has effects on the ordering of other combines and legalizations. This leads to some improvements and some regressions.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-powerpc Author: Craig Topper (topperc) ChangesThe PowerPC changes are caused by shifts created by different IR operations being CSEd now. This allows consecutive loads to be turned into vectors earlier. This has effects on the ordering of other combines and legalizations. This leads to some improvements and some regressions. Full diff: https://github.com/llvm/llvm-project/pull/158400.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 430e47451fd49..299acf6b1c080 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -223,10 +223,9 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
std::swap(Lo, Hi);
EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
- Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
- DAG.getConstant(Lo.getValueSizeInBits(), DL,
- TLI.getShiftAmountTy(
- TotalVT, DAG.getDataLayout())));
+ Hi = DAG.getNode(
+ ISD::SHL, DL, TotalVT, Hi,
+ DAG.getShiftAmountConstant(Lo.getValueSizeInBits(), TotalVT, DL));
Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
}
@@ -4469,9 +4468,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (ElementMul != 1) {
if (ElementMul.isPowerOf2()) {
unsigned Amt = ElementMul.logBase2();
- IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN,
- DAG.getConstant(Amt, dl, IdxN.getValueType()),
- ScaleFlags);
+ IdxN = DAG.getNode(
+ ISD::SHL, dl, N.getValueType(), IdxN,
+ DAG.getShiftAmountConstant(Amt, N.getValueType(), dl),
+ ScaleFlags);
} else {
SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
IdxN.getValueType());
@@ -5460,10 +5460,8 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
const TargetLowering &TLI, const SDLoc &dl) {
SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x7f800000, dl, MVT::i32));
- SDValue t1 = DAG.getNode(
- ISD::SRL, dl, MVT::i32, t0,
- DAG.getConstant(23, dl,
- TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
+ SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+ DAG.getShiftAmountConstant(23, MVT::i32, dl));
SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
DAG.getConstant(127, dl, MVT::i32));
return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
@@ -5488,11 +5486,8 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
// IntegerPartOfX <<= 23;
- IntegerPartOfX =
- DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
- DAG.getConstant(23, dl,
- DAG.getTargetLoweringInfo().getShiftAmountTy(
- MVT::i32, DAG.getDataLayout())));
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getShiftAmountConstant(23, MVT::i32, dl));
SDValue TwoToFractionalPartOfX;
if (LimitFloatPrecision <= 6) {
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index 9dd0fbe4474b1..fb55511162a7e 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -1036,12 +1036,8 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r4, 2
; P9LE-NEXT: add r3, r3, r4
-; P9LE-NEXT: li r4, -12
-; P9LE-NEXT: lxvx v2, r3, r4
-; P9LE-NEXT: addis r3, r2, .LCPI9_0@toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI9_0@toc@l
-; P9LE-NEXT: lxv vs0, 0(r3)
-; P9LE-NEXT: xxperm v2, v2, vs0
+; P9LE-NEXT: addi r3, r3, -12
+; P9LE-NEXT: lxvw4x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemVarDi:
@@ -1058,15 +1054,16 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
;
; P8LE-LABEL: fromDiffMemVarDi:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha
; P8LE-NEXT: sldi r4, r4, 2
-; P8LE-NEXT: addi r5, r5, .LCPI9_0@toc@l
; P8LE-NEXT: add r3, r3, r4
-; P8LE-NEXT: lxvd2x vs0, 0, r5
; P8LE-NEXT: addi r3, r3, -12
-; P8LE-NEXT: lxvd2x v3, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: addis r3, r2, .LCPI9_0@toc@ha
+; P8LE-NEXT: addi r3, r3, .LCPI9_0@toc@l
; P8LE-NEXT: xxswapd v2, vs0
-; P8LE-NEXT: vperm v2, v3, v3, v2
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v3, vs0
+; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
entry:
%idxprom = sext i32 %elem to i64
@@ -2524,12 +2521,8 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r4, 2
; P9LE-NEXT: add r3, r3, r4
-; P9LE-NEXT: li r4, -12
-; P9LE-NEXT: lxvx v2, r3, r4
-; P9LE-NEXT: addis r3, r2, .LCPI41_0@toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI41_0@toc@l
-; P9LE-NEXT: lxv vs0, 0(r3)
-; P9LE-NEXT: xxperm v2, v2, vs0
+; P9LE-NEXT: addi r3, r3, -12
+; P9LE-NEXT: lxvw4x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemVarDui:
@@ -2546,15 +2539,16 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
;
; P8LE-LABEL: fromDiffMemVarDui:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha
; P8LE-NEXT: sldi r4, r4, 2
-; P8LE-NEXT: addi r5, r5, .LCPI41_0@toc@l
; P8LE-NEXT: add r3, r3, r4
-; P8LE-NEXT: lxvd2x vs0, 0, r5
; P8LE-NEXT: addi r3, r3, -12
-; P8LE-NEXT: lxvd2x v3, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: addis r3, r2, .LCPI41_0@toc@ha
+; P8LE-NEXT: addi r3, r3, .LCPI41_0@toc@l
; P8LE-NEXT: xxswapd v2, vs0
-; P8LE-NEXT: vperm v2, v3, v3, v2
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v3, vs0
+; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
entry:
%idxprom = sext i32 %elem to i64
diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index 9d5e5b2210c07..8fbc9d785796d 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -394,18 +394,18 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
; CHECK-NEXT: xxsetaccz acc2
; CHECK-NEXT: xxsetaccz acc1
; CHECK-NEXT: addi r6, r6, 6
+; CHECK-NEXT: add r8, r5, r7
; CHECK-NEXT: lxvx vs0, r5, r7
-; CHECK-NEXT: add r7, r5, r7
-; CHECK-NEXT: lxv vs1, 16(r7)
+; CHECK-NEXT: rldic r7, r4, 6, 26
+; CHECK-NEXT: addi r4, r4, 3
+; CHECK-NEXT: lxv vs1, 16(r8)
; CHECK-NEXT: xvf32gerpp acc2, vs0, vs1
-; CHECK-NEXT: lxv vs0, 32(r7)
-; CHECK-NEXT: lxv vs1, 48(r7)
+; CHECK-NEXT: lxv vs0, 32(r8)
+; CHECK-NEXT: lxv vs1, 48(r8)
; CHECK-NEXT: xvf32gerpn acc1, vs0, vs1
-; CHECK-NEXT: lxv vs12, 64(r7)
-; CHECK-NEXT: lxv vs13, 80(r7)
+; CHECK-NEXT: lxv vs12, 64(r8)
+; CHECK-NEXT: lxv vs13, 80(r8)
; CHECK-NEXT: xxsetaccz acc0
-; CHECK-NEXT: rldic r7, r4, 6, 26
-; CHECK-NEXT: addi r4, r4, 3
; CHECK-NEXT: add r8, r3, r7
; CHECK-NEXT: xxmfacc acc2
; CHECK-NEXT: xvf32gernp acc0, vs12, vs13
@@ -443,18 +443,18 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
; CHECK-BE-NEXT: xxsetaccz acc2
; CHECK-BE-NEXT: xxsetaccz acc1
; CHECK-BE-NEXT: addi r6, r6, 6
+; CHECK-BE-NEXT: add r8, r5, r7
; CHECK-BE-NEXT: lxvx vs0, r5, r7
-; CHECK-BE-NEXT: add r7, r5, r7
-; CHECK-BE-NEXT: lxv vs1, 16(r7)
+; CHECK-BE-NEXT: rldic r7, r4, 6, 26
+; CHECK-BE-NEXT: addi r4, r4, 3
+; CHECK-BE-NEXT: lxv vs1, 16(r8)
; CHECK-BE-NEXT: xvf32gerpp acc2, vs0, vs1
-; CHECK-BE-NEXT: lxv vs0, 32(r7)
-; CHECK-BE-NEXT: lxv vs1, 48(r7)
+; CHECK-BE-NEXT: lxv vs0, 32(r8)
+; CHECK-BE-NEXT: lxv vs1, 48(r8)
; CHECK-BE-NEXT: xvf32gerpn acc1, vs0, vs1
-; CHECK-BE-NEXT: lxv vs12, 64(r7)
-; CHECK-BE-NEXT: lxv vs13, 80(r7)
+; CHECK-BE-NEXT: lxv vs12, 64(r8)
+; CHECK-BE-NEXT: lxv vs13, 80(r8)
; CHECK-BE-NEXT: xxsetaccz acc0
-; CHECK-BE-NEXT: rldic r7, r4, 6, 26
-; CHECK-BE-NEXT: addi r4, r4, 3
; CHECK-BE-NEXT: add r8, r3, r7
; CHECK-BE-NEXT: xxmfacc acc2
; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13
|
The PowerPC changes are caused by shifts created by different IR operations being CSEd now. This allows consecutive loads to be turned into vectors earlier. This has effects on the ordering of other combines and legalizations. This leads to some improvements and some regressions.