diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f9e791e4b34f8..9e562d7ca498e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -18456,10 +18456,45 @@ static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, return SDValue(); } +SDValue PPCTargetLowering::combineVectorSHL(SDNode *N, + DAGCombinerInfo &DCI) const { + assert(N->getValueType(0).isVector() && "Vector type expected."); + + SDValue N1 = N->getOperand(1); + if (!Subtarget.hasP8Altivec() || N1.getOpcode() != ISD::BUILD_VECTOR || + !isOperationLegal(ISD::ADD, N->getValueType(0))) + return SDValue(); + + // For 64-bit there is no splat immediate so we want to catch shift by 1 here + // before the BUILD_VECTOR is replaced by a load. + EVT EltTy = N->getValueType(0).getScalarType(); + if (EltTy != MVT::i64) + return SDValue(); + + BuildVectorSDNode *BVN = cast(N1); + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool BVNIsConstantSplat = + BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, 0, !Subtarget.isLittleEndian()); + if (!BVNIsConstantSplat || SplatBitSize != EltTy.getSizeInBits()) + return SDValue(); + uint64_t SplatBits = APSplatBits.getZExtValue(); + if (SplatBits != 1) + return SDValue(); + + return DCI.DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(0)); +} + SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) return Value; + if (N->getValueType(0).isVector()) + return combineVectorSHL(N, DCI); + SDValue N0 = N->getOperand(0); ConstantSDNode *CN1 = dyn_cast(N->getOperand(1)); if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() || diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 7365f3103276c..e7e7c21b50395 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1441,6 +1441,7 @@ namespace llvm { SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVectorSHL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/test/CodeGen/PowerPC/optimize-vector.ll b/llvm/test/CodeGen/PowerPC/optimize-vector.ll index df5ef43e8ee0c..4d5481b630c98 100644 --- a/llvm/test/CodeGen/PowerPC/optimize-vector.ll +++ b/llvm/test/CodeGen/PowerPC/optimize-vector.ll @@ -36,10 +36,7 @@ entry: define dso_local <2 x i64> @x2d(<2 x i64> noundef %x) { ; CHECK-LABEL: x2d: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-NEXT: lxvd2x v3, 0, r3 -; CHECK-NEXT: vsld v2, v2, v3 +; CHECK-NEXT: vaddudm v2, v2, v2 ; CHECK-NEXT: blr entry: %add = shl <2 x i64> %x, diff --git a/llvm/test/CodeGen/PowerPC/pr47891.ll b/llvm/test/CodeGen/PowerPC/pr47891.ll index 4e41b3ee12155..46ff074fae647 100644 --- a/llvm/test/CodeGen/PowerPC/pr47891.ll +++ b/llvm/test/CodeGen/PowerPC/pr47891.ll @@ -7,26 +7,17 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 { ; CHECK-LABEL: poly2_lshift1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r6, r2, .LCPI0_0@toc@ha ; CHECK-NEXT: li r4, 72 -; CHECK-NEXT: addis r6, r2, .LCPI0_1@toc@ha ; CHECK-NEXT: ld r5, 64(r3) -; CHECK-NEXT: lxvd2x vs0, r3, r4 -; CHECK-NEXT: addi r6, r6, .LCPI0_1@toc@l -; CHECK-NEXT: lxvd2x v4, 0, r6 -; CHECK-NEXT: addis r6, r2, .LCPI0_0@toc@ha ; CHECK-NEXT: addi r6, r6, .LCPI0_0@toc@l -; CHECK-NEXT: xxswapd v2, vs0 -; CHECK-NEXT: mtfprd f0, r5 -; CHECK-NEXT: xxpermdi v3, v2, vs0, 2 -; CHECK-NEXT: vsld v2, v2, v4 +; CHECK-NEXT: lxvd2x vs0, r3, r4 ; CHECK-NEXT: lxvd2x v4, 0, r6 ; CHECK-NEXT: ld r6, 0(r3) ; CHECK-NEXT: sldi r7, r6, 1 ; CHECK-NEXT: rotldi r6, r6, 1 ; CHECK-NEXT: std r7, 0(r3) ; CHECK-NEXT: ld r7, 8(r3) -; CHECK-NEXT: vsrd v3, v3, v4 -; CHECK-NEXT: xxlor vs0, v2, v3 ; CHECK-NEXT: rldimi r6, r7, 1, 0 ; CHECK-NEXT: rotldi r7, r7, 1 ; CHECK-NEXT: std r6, 8(r3) @@ -44,6 +35,8 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 { ; CHECK-NEXT: std r7, 32(r3) ; CHECK-NEXT: ld r7, 40(r3) ; CHECK-NEXT: rldimi r6, r7, 1, 0 +; CHECK-NEXT: xxswapd v2, vs0 +; CHECK-NEXT: mtfprd f0, r5 ; CHECK-NEXT: rotldi r7, r7, 1 ; CHECK-NEXT: std r6, 40(r3) ; CHECK-NEXT: ld r6, 48(r3) @@ -54,10 +47,14 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 { ; CHECK-NEXT: rldimi r6, r7, 1, 0 ; CHECK-NEXT: std r6, 56(r3) ; CHECK-NEXT: rotldi r6, r7, 1 -; CHECK-NEXT: xxswapd vs0, vs0 -; CHECK-NEXT: stxvd2x vs0, r3, r4 ; CHECK-NEXT: rldimi r6, r5, 1, 0 ; CHECK-NEXT: std r6, 64(r3) +; CHECK-NEXT: xxpermdi v3, v2, vs0, 2 +; CHECK-NEXT: vsrd v3, v3, v4 +; CHECK-NEXT: vaddudm v2, v2, v2 +; CHECK-NEXT: xxlor vs0, v2, v3 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: stxvd2x vs0, r3, r4 ; CHECK-NEXT: blr entry: %0 = load i64, ptr %p, align 8