-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AArch64] Combine ADDS and SUBS nodes with the non-flag setting versions #157563
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: AZero13 (AZero13) ChangesWe do that with the other flag setting nodes; why not with adds and subs? Full diff: https://github.com/llvm/llvm-project/pull/157563.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b7011e0ea1669..ff0be2d3fb81f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27179,6 +27179,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
return R;
return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
+ case AArch64ISD::ADDS:
+ return performFlagSettingCombine(N, DCI, ISD::ADD);
+ case AArch64ISD::SUBS:
+ return performFlagSettingCombine(N, DCI, ISD::SUB);
case AArch64ISD::BICi: {
APInt DemandedBits =
APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 02c76ba7343a0..37319642f5b34 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w1, w8
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w8, w1
+; CHECK-NEXT: cneg w0, w8, gt
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs w8, w1, w0
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, gt
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: subs w8, w8, w0
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w0, w1, sxth
+; CHECK-NEXT: cneg w0, w8, gt
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs w8, w1, w0
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, gt
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x1, x0
-; CHECK-NEXT: cneg x0, x8, ge
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, gt
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x1, x0
-; CHECK-NEXT: cneg x0, x8, ge
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, gt
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index bf52e71ec21fe..30ac22cfb6b1f 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -112,8 +112,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: subs w8, w0, w8
+; CHECK-NEXT: subs w8, w0, w1, sxth
; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 400031b64cb84..79fc12ea76f63 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w1, w8
-; CHECK-NEXT: cneg w0, w8, hs
+; CHECK-NEXT: subs w8, w8, w1
+; CHECK-NEXT: cneg w0, w8, hi
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i32 %b to i64
@@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs w8, w1, w0
-; CHECK-NEXT: cneg w0, w8, hs
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, hi
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: subs w8, w8, w0
-; CHECK-NEXT: cneg w0, w8, hs
+; CHECK-NEXT: subs w8, w0, w1, uxth
+; CHECK-NEXT: cneg w0, w8, hi
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i16 %b to i64
@@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs w8, w1, w0
-; CHECK-NEXT: cneg w0, w8, hs
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, hi
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x1, x0
-; CHECK-NEXT: cneg x0, x8, hs
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, hi
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x1, x0
-; CHECK-NEXT: cneg x0, x8, hs
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, hi
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 8d2b0b0742d7d..af4ce92b16342 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -112,8 +112,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: subs w8, w0, w8
+; CHECK-NEXT: subs w8, w0, w1, uxth
; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/adds_cmn.ll b/llvm/test/CodeGen/AArch64/adds_cmn.ll
index 7f1cb0df049b1..aa070b7886ba5 100644
--- a/llvm/test/CodeGen/AArch64/adds_cmn.ll
+++ b/llvm/test/CodeGen/AArch64/adds_cmn.ll
@@ -4,10 +4,8 @@
define { i32, i32 } @adds_cmn(i32 noundef %x, i32 noundef %y) {
; CHECK-LABEL: adds_cmn:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: cmn w0, w1
-; CHECK-NEXT: add w1, w0, w1
-; CHECK-NEXT: cset w8, lo
-; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: adds w1, w0, w1
+; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
entry:
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
|
So, if we merge this and merge #157552, then we can get rid of the CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) transform entirely! @davemgreen @rj-jesus |
@davemgreen Thoughts on this? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My memory is that we didn't do this just because we didn't have a meaningful test case for it. It sounds OK to me.
Can you update the commit message to be a better description?
Done! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, Good enough.
LGTM
Thank you: can you please merge @davemgreen |
We do that with the other flag setting nodes. We should do this with all flag setting and non-flag setting nodes.