-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[PowerPC] catch v2i64 shift left by 1 is add case #138772
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
RolandF77
wants to merge
1
commit into
llvm:main
Choose a base branch
from
RolandF77:v2i64shl1
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
+47
−17
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-powerpc Author: None (RolandF77) ChangesFor vector element sizes i8 - i32 the PPC BE catches the case of x << 1 at selection and generates x + x. This patch gets the v2i64 case. Full diff: https://github.com/llvm/llvm-project/pull/138772.diff 4 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f9e791e4b34f8..9e562d7ca498e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -18456,10 +18456,45 @@ static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
return SDValue();
}
+SDValue PPCTargetLowering::combineVectorSHL(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert(N->getValueType(0).isVector() && "Vector type expected.");
+
+ SDValue N1 = N->getOperand(1);
+ if (!Subtarget.hasP8Altivec() || N1.getOpcode() != ISD::BUILD_VECTOR ||
+ !isOperationLegal(ISD::ADD, N->getValueType(0)))
+ return SDValue();
+
+ // For 64-bit there is no splat immediate so we want to catch shift by 1 here
+ // before the BUILD_VECTOR is replaced by a load.
+ EVT EltTy = N->getValueType(0).getScalarType();
+ if (EltTy != MVT::i64)
+ return SDValue();
+
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool BVNIsConstantSplat =
+ BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, !Subtarget.isLittleEndian());
+ if (!BVNIsConstantSplat || SplatBitSize != EltTy.getSizeInBits())
+ return SDValue();
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+ if (SplatBits != 1)
+ return SDValue();
+
+ return DCI.DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N->getOperand(0));
+}
+
SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
return Value;
+ if (N->getValueType(0).isVector())
+ return combineVectorSHL(N, DCI);
+
SDValue N0 = N->getOperand(0);
ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7365f3103276c..e7e7c21b50395 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1441,6 +1441,7 @@ namespace llvm {
SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVectorSHL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/test/CodeGen/PowerPC/optimize-vector.ll b/llvm/test/CodeGen/PowerPC/optimize-vector.ll
index df5ef43e8ee0c..4d5481b630c98 100644
--- a/llvm/test/CodeGen/PowerPC/optimize-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/optimize-vector.ll
@@ -36,10 +36,7 @@ entry:
define dso_local <2 x i64> @x2d(<2 x i64> noundef %x) {
; CHECK-LABEL: x2d:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l
-; CHECK-NEXT: lxvd2x v3, 0, r3
-; CHECK-NEXT: vsld v2, v2, v3
+; CHECK-NEXT: vaddudm v2, v2, v2
; CHECK-NEXT: blr
entry:
%add = shl <2 x i64> %x, <i64 1, i64 1>
diff --git a/llvm/test/CodeGen/PowerPC/pr47891.ll b/llvm/test/CodeGen/PowerPC/pr47891.ll
index 4e41b3ee12155..46ff074fae647 100644
--- a/llvm/test/CodeGen/PowerPC/pr47891.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47891.ll
@@ -7,26 +7,17 @@
define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 {
; CHECK-LABEL: poly2_lshift1:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis r6, r2, .LCPI0_0@toc@ha
; CHECK-NEXT: li r4, 72
-; CHECK-NEXT: addis r6, r2, .LCPI0_1@toc@ha
; CHECK-NEXT: ld r5, 64(r3)
-; CHECK-NEXT: lxvd2x vs0, r3, r4
-; CHECK-NEXT: addi r6, r6, .LCPI0_1@toc@l
-; CHECK-NEXT: lxvd2x v4, 0, r6
-; CHECK-NEXT: addis r6, r2, .LCPI0_0@toc@ha
; CHECK-NEXT: addi r6, r6, .LCPI0_0@toc@l
-; CHECK-NEXT: xxswapd v2, vs0
-; CHECK-NEXT: mtfprd f0, r5
-; CHECK-NEXT: xxpermdi v3, v2, vs0, 2
-; CHECK-NEXT: vsld v2, v2, v4
+; CHECK-NEXT: lxvd2x vs0, r3, r4
; CHECK-NEXT: lxvd2x v4, 0, r6
; CHECK-NEXT: ld r6, 0(r3)
; CHECK-NEXT: sldi r7, r6, 1
; CHECK-NEXT: rotldi r6, r6, 1
; CHECK-NEXT: std r7, 0(r3)
; CHECK-NEXT: ld r7, 8(r3)
-; CHECK-NEXT: vsrd v3, v3, v4
-; CHECK-NEXT: xxlor vs0, v2, v3
; CHECK-NEXT: rldimi r6, r7, 1, 0
; CHECK-NEXT: rotldi r7, r7, 1
; CHECK-NEXT: std r6, 8(r3)
@@ -44,6 +35,8 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 {
; CHECK-NEXT: std r7, 32(r3)
; CHECK-NEXT: ld r7, 40(r3)
; CHECK-NEXT: rldimi r6, r7, 1, 0
+; CHECK-NEXT: xxswapd v2, vs0
+; CHECK-NEXT: mtfprd f0, r5
; CHECK-NEXT: rotldi r7, r7, 1
; CHECK-NEXT: std r6, 40(r3)
; CHECK-NEXT: ld r6, 48(r3)
@@ -54,10 +47,14 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 {
; CHECK-NEXT: rldimi r6, r7, 1, 0
; CHECK-NEXT: std r6, 56(r3)
; CHECK-NEXT: rotldi r6, r7, 1
-; CHECK-NEXT: xxswapd vs0, vs0
-; CHECK-NEXT: stxvd2x vs0, r3, r4
; CHECK-NEXT: rldimi r6, r5, 1, 0
; CHECK-NEXT: std r6, 64(r3)
+; CHECK-NEXT: xxpermdi v3, v2, vs0, 2
+; CHECK-NEXT: vsrd v3, v3, v4
+; CHECK-NEXT: vaddudm v2, v2, v2
+; CHECK-NEXT: xxlor vs0, v2, v3
+; CHECK-NEXT: xxswapd vs0, vs0
+; CHECK-NEXT: stxvd2x vs0, r3, r4
; CHECK-NEXT: blr
entry:
%0 = load i64, ptr %p, align 8
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
For vector element sizes i8 - i32 the PPC BE catches the case of x << 1 at selection and generates x + x. This patch gets the v2i64 case.