[AArch64] Combine ADDS and SUBS nodes with the non-flag setting versions #157563

AZero13 · 2025-09-08T21:50:42Z

We do that with the other flag setting nodes. We should do this with all flag setting and non-flag setting nodes.

llvmbot · 2025-09-08T21:51:17Z

@llvm/pr-subscribers-backend-aarch64

Author: AZero13 (AZero13)

Changes

We do that with the other flag setting nodes; why not with adds and subs?

Full diff: https://github.com/llvm/llvm-project/pull/157563.diff

6 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+4)
(modified) llvm/test/CodeGen/AArch64/abds-neg.ll (+12-13)
(modified) llvm/test/CodeGen/AArch64/abds.ll (+1-2)
(modified) llvm/test/CodeGen/AArch64/abdu-neg.ll (+12-13)
(modified) llvm/test/CodeGen/AArch64/abdu.ll (+1-2)
(modified) llvm/test/CodeGen/AArch64/adds_cmn.ll (+2-4)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b7011e0ea1669..ff0be2d3fb81f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27179,6 +27179,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
       return R;
     return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
+  case AArch64ISD::ADDS:
+    return performFlagSettingCombine(N, DCI, ISD::ADD);
+  case AArch64ISD::SUBS:
+    return performFlagSettingCombine(N, DCI, ISD::SUB);
   case AArch64ISD::BICi: {
     APInt DemandedBits =
         APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 02c76ba7343a0..37319642f5b34 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    subs w8, w1, w8
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w8, w1
+; CHECK-NEXT:    cneg w0, w8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
   %bext = sext i32 %b to i64
@@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    subs w8, w8, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1, sxth
+; CHECK-NEXT:    cneg w0, w8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i16 %b to i64
@@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, gt
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index bf52e71ec21fe..30ac22cfb6b1f 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -112,8 +112,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    subs w8, w0, w8
+; CHECK-NEXT:    subs w8, w0, w1, sxth
 ; CHECK-NEXT:    cneg w0, w8, le
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 400031b64cb84..79fc12ea76f63 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    subs w8, w1, w8
-; CHECK-NEXT:    cneg w0, w8, hs
+; CHECK-NEXT:    subs w8, w8, w1
+; CHECK-NEXT:    cneg w0, w8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
   %bext = zext i32 %b to i64
@@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, hs
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i32 %a to i64
   %bext = zext i32 %b to i64
@@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    subs w8, w8, w0
-; CHECK-NEXT:    cneg w0, w8, hs
+; CHECK-NEXT:    subs w8, w0, w1, uxth
+; CHECK-NEXT:    cneg w0, w8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i32 %a to i64
   %bext = zext i16 %b to i64
@@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, hs
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i32 %a to i64
   %bext = zext i32 %b to i64
@@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, hs
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i64 %a to i128
   %bext = zext i64 %b to i128
@@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, hs
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, hi
 ; CHECK-NEXT:    ret
   %aext = zext i64 %a to i128
   %bext = zext i64 %b to i128
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 8d2b0b0742d7d..af4ce92b16342 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -112,8 +112,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, #0xffff
-; CHECK-NEXT:    subs w8, w0, w8
+; CHECK-NEXT:    subs w8, w0, w1, uxth
 ; CHECK-NEXT:    cneg w0, w8, ls
 ; CHECK-NEXT:    ret
   %aext = zext i32 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/adds_cmn.ll b/llvm/test/CodeGen/AArch64/adds_cmn.ll
index 7f1cb0df049b1..aa070b7886ba5 100644
--- a/llvm/test/CodeGen/AArch64/adds_cmn.ll
+++ b/llvm/test/CodeGen/AArch64/adds_cmn.ll
@@ -4,10 +4,8 @@
 define { i32, i32 } @adds_cmn(i32 noundef %x, i32 noundef %y) {
 ; CHECK-LABEL: adds_cmn:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmn w0, w1
-; CHECK-NEXT:    add w1, w0, w1
-; CHECK-NEXT:    cset w8, lo
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    adds w1, w0, w1
+; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)

AZero13 · 2025-09-08T22:02:42Z

So, if we merge this and merge #157552, then we can get rid of the CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) transform entirely! @davemgreen @rj-jesus

AZero13 · 2025-09-10T15:00:46Z

@davemgreen Thoughts on this?

davemgreen

My memory is that we didn't do this just because we didn't have a meaningful test case for it. It sounds OK to me.

Can you update the commit message to be a better description?

AZero13 · 2025-09-11T11:24:55Z

My memory is that we didn't do this just because we didn't have a meaningful test case for it. It sounds OK to me.

Can you update the commit message to be a better description?

Done!

davemgreen

Thanks, Good enough.

LGTM

AZero13 · 2025-09-12T21:05:12Z

Thank you: can you please merge @davemgreen

combine subs and subs

dffb4bc

llvmbot added the backend:AArch64 label Sep 8, 2025

AZero13 changed the title ~~Combine ADDS and SUBS nodes~~ [AArch64] Combine ADDS with the non-flag setting versions and SUBS too. Sep 8, 2025

davemgreen reviewed Sep 11, 2025

View reviewed changes

AZero13 requested a review from davemgreen September 11, 2025 13:29

AZero13 changed the title ~~[AArch64] Combine ADDS with the non-flag setting versions and SUBS too.~~ [AArch64] Combine ADDS and SUBS nodes with the non-flag setting versions Sep 11, 2025

davemgreen approved these changes Sep 12, 2025

View reviewed changes

AZero13 mentioned this pull request Sep 12, 2025

[AArch64] Improve lowering of scalar abs(sub(a, b)). #151180

Merged

davemgreen merged commit a4993a2 into llvm:main Sep 13, 2025
11 checks passed

AZero13 deleted the combine branch September 13, 2025 11:17

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] Combine ADDS and SUBS nodes with the non-flag setting versions #157563

[AArch64] Combine ADDS and SUBS nodes with the non-flag setting versions #157563

Uh oh!

AZero13 commented Sep 8, 2025 •

edited

Loading

Uh oh!

llvmbot commented Sep 8, 2025

Uh oh!

AZero13 commented Sep 8, 2025

Uh oh!

AZero13 commented Sep 10, 2025

Uh oh!

davemgreen left a comment

Uh oh!

AZero13 commented Sep 11, 2025

Uh oh!

davemgreen left a comment

Uh oh!

AZero13 commented Sep 12, 2025

Uh oh!

Uh oh!

Uh oh!

[AArch64] Combine ADDS and SUBS nodes with the non-flag setting versions #157563

[AArch64] Combine ADDS and SUBS nodes with the non-flag setting versions #157563

Uh oh!

Conversation

AZero13 commented Sep 8, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Sep 8, 2025

Uh oh!

AZero13 commented Sep 8, 2025

Uh oh!

AZero13 commented Sep 10, 2025

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

AZero13 commented Sep 11, 2025

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

AZero13 commented Sep 12, 2025

Uh oh!

Uh oh!

Uh oh!

AZero13 commented Sep 8, 2025 •

edited

Loading