Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[PowerPC] catch v2i64 shift left by 1 is add case #138772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

RolandF77
Copy link
Collaborator

For vector element sizes i8 - i32 the PPC BE catches the case of x << 1 at selection and generates x + x. This patch gets the v2i64 case.

@llvmbot
Copy link
Member

llvmbot commented May 6, 2025

@llvm/pr-subscribers-backend-powerpc

Author: None (RolandF77)

Changes

For vector element sizes i8 - i32 the PPC BE catches the case of x << 1 at selection and generates x + x. This patch gets the v2i64 case.


Full diff: https://github.com/llvm/llvm-project/pull/138772.diff

4 Files Affected:

  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+35)
  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+1)
  • (modified) llvm/test/CodeGen/PowerPC/optimize-vector.ll (+1-4)
  • (modified) llvm/test/CodeGen/PowerPC/pr47891.ll (+10-13)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f9e791e4b34f8..9e562d7ca498e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -18456,10 +18456,45 @@ static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
   return SDValue();
 }
 
+SDValue PPCTargetLowering::combineVectorSHL(SDNode *N,
+                                            DAGCombinerInfo &DCI) const {
+  assert(N->getValueType(0).isVector() && "Vector type expected.");
+
+  SDValue N1 = N->getOperand(1);
+  if (!Subtarget.hasP8Altivec() || N1.getOpcode() != ISD::BUILD_VECTOR ||
+      !isOperationLegal(ISD::ADD, N->getValueType(0)))
+    return SDValue();
+
+  // For 64-bit there is no splat immediate so we want to catch shift by 1 here
+  // before the BUILD_VECTOR is replaced by a load.
+  EVT EltTy = N->getValueType(0).getScalarType();
+  if (EltTy != MVT::i64)
+    return SDValue();
+
+  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
+  APInt APSplatBits, APSplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  bool BVNIsConstantSplat =
+      BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                           HasAnyUndefs, 0, !Subtarget.isLittleEndian());
+  if (!BVNIsConstantSplat || SplatBitSize != EltTy.getSizeInBits())
+    return SDValue();
+  uint64_t SplatBits = APSplatBits.getZExtValue();
+  if (SplatBits != 1)
+    return SDValue();
+
+  return DCI.DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0),
+                         N->getOperand(0), N->getOperand(0));
+}
+
 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
     return Value;
 
+  if (N->getValueType(0).isVector())
+    return combineVectorSHL(N, DCI);
+
   SDValue N0 = N->getOperand(0);
   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
   if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7365f3103276c..e7e7c21b50395 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1441,6 +1441,7 @@ namespace llvm {
     SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineVectorSHL(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/test/CodeGen/PowerPC/optimize-vector.ll b/llvm/test/CodeGen/PowerPC/optimize-vector.ll
index df5ef43e8ee0c..4d5481b630c98 100644
--- a/llvm/test/CodeGen/PowerPC/optimize-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/optimize-vector.ll
@@ -36,10 +36,7 @@ entry:
 define dso_local <2 x i64> @x2d(<2 x i64> noundef %x) {
 ; CHECK-LABEL: x2d:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NEXT:    addi r3, r3, .LCPI3_0@toc@l
-; CHECK-NEXT:    lxvd2x v3, 0, r3
-; CHECK-NEXT:    vsld v2, v2, v3
+; CHECK-NEXT:    vaddudm v2, v2, v2
 ; CHECK-NEXT:    blr
 entry:
   %add = shl <2 x i64> %x, <i64 1, i64 1>
diff --git a/llvm/test/CodeGen/PowerPC/pr47891.ll b/llvm/test/CodeGen/PowerPC/pr47891.ll
index 4e41b3ee12155..46ff074fae647 100644
--- a/llvm/test/CodeGen/PowerPC/pr47891.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47891.ll
@@ -7,26 +7,17 @@
 define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 {
 ; CHECK-LABEL: poly2_lshift1:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r6, r2, .LCPI0_0@toc@ha
 ; CHECK-NEXT:    li r4, 72
-; CHECK-NEXT:    addis r6, r2, .LCPI0_1@toc@ha
 ; CHECK-NEXT:    ld r5, 64(r3)
-; CHECK-NEXT:    lxvd2x vs0, r3, r4
-; CHECK-NEXT:    addi r6, r6, .LCPI0_1@toc@l
-; CHECK-NEXT:    lxvd2x v4, 0, r6
-; CHECK-NEXT:    addis r6, r2, .LCPI0_0@toc@ha
 ; CHECK-NEXT:    addi r6, r6, .LCPI0_0@toc@l
-; CHECK-NEXT:    xxswapd v2, vs0
-; CHECK-NEXT:    mtfprd f0, r5
-; CHECK-NEXT:    xxpermdi v3, v2, vs0, 2
-; CHECK-NEXT:    vsld v2, v2, v4
+; CHECK-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-NEXT:    lxvd2x v4, 0, r6
 ; CHECK-NEXT:    ld r6, 0(r3)
 ; CHECK-NEXT:    sldi r7, r6, 1
 ; CHECK-NEXT:    rotldi r6, r6, 1
 ; CHECK-NEXT:    std r7, 0(r3)
 ; CHECK-NEXT:    ld r7, 8(r3)
-; CHECK-NEXT:    vsrd v3, v3, v4
-; CHECK-NEXT:    xxlor vs0, v2, v3
 ; CHECK-NEXT:    rldimi r6, r7, 1, 0
 ; CHECK-NEXT:    rotldi r7, r7, 1
 ; CHECK-NEXT:    std r6, 8(r3)
@@ -44,6 +35,8 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 {
 ; CHECK-NEXT:    std r7, 32(r3)
 ; CHECK-NEXT:    ld r7, 40(r3)
 ; CHECK-NEXT:    rldimi r6, r7, 1, 0
+; CHECK-NEXT:    xxswapd v2, vs0
+; CHECK-NEXT:    mtfprd f0, r5
 ; CHECK-NEXT:    rotldi r7, r7, 1
 ; CHECK-NEXT:    std r6, 40(r3)
 ; CHECK-NEXT:    ld r6, 48(r3)
@@ -54,10 +47,14 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 {
 ; CHECK-NEXT:    rldimi r6, r7, 1, 0
 ; CHECK-NEXT:    std r6, 56(r3)
 ; CHECK-NEXT:    rotldi r6, r7, 1
-; CHECK-NEXT:    xxswapd vs0, vs0
-; CHECK-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-NEXT:    rldimi r6, r5, 1, 0
 ; CHECK-NEXT:    std r6, 64(r3)
+; CHECK-NEXT:    xxpermdi v3, v2, vs0, 2
+; CHECK-NEXT:    vsrd v3, v3, v4
+; CHECK-NEXT:    vaddudm v2, v2, v2
+; CHECK-NEXT:    xxlor vs0, v2, v3
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i64, ptr %p, align 8

@RolandF77 RolandF77 self-assigned this May 7, 2025
@RolandF77 RolandF77 requested review from diggerlin, maryammo and lei137 May 7, 2025 15:31
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants