Thanks to visit codestin.com
Credit goes to github.com

Skip to content

AMDGPU: Form min3/max3 from minimumnum/maximumnum #139137

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented May 8, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented May 8, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Patch is 198.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139137.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+8)
  • (modified) llvm/test/CodeGen/AMDGPU/fmax3-maximumnum.ll (+290-884)
  • (modified) llvm/test/CodeGen/AMDGPU/fmin3-minimumnum.ll (+290-884)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 76c4546f1207e..2c1480101590d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -930,6 +930,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
                        ISD::FMAXNUM_IEEE,
                        ISD::FMINIMUM,
                        ISD::FMAXIMUM,
+                       ISD::FMINIMUMNUM,
+                       ISD::FMAXIMUMNUM,
                        ISD::FMA,
                        ISD::SMIN,
                        ISD::SMAX,
@@ -13479,6 +13481,7 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
   switch (Opc) {
   case ISD::FMAXNUM:
   case ISD::FMAXNUM_IEEE:
+  case ISD::FMAXIMUMNUM:
     return AMDGPUISD::FMAX3;
   case ISD::FMAXIMUM:
     return AMDGPUISD::FMAXIMUM3;
@@ -13488,6 +13491,7 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
     return AMDGPUISD::UMAX3;
   case ISD::FMINNUM:
   case ISD::FMINNUM_IEEE:
+  case ISD::FMINIMUMNUM:
     return AMDGPUISD::FMIN3;
   case ISD::FMINIMUM:
     return AMDGPUISD::FMINIMUM3;
@@ -13609,6 +13613,8 @@ static bool supportsMin3Max3(const GCNSubtarget &Subtarget, unsigned Opc,
   case ISD::FMAXNUM:
   case ISD::FMINNUM_IEEE:
   case ISD::FMAXNUM_IEEE:
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM:
   case AMDGPUISD::FMIN_LEGACY:
   case AMDGPUISD::FMAX_LEGACY:
     return (VT == MVT::f32) || (VT == MVT::f16 && Subtarget.hasMin3Max3_16());
@@ -15314,6 +15320,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::FMINNUM_IEEE:
   case ISD::FMAXIMUM:
   case ISD::FMINIMUM:
+  case ISD::FMAXIMUMNUM:
+  case ISD::FMINIMUMNUM:
   case ISD::SMAX:
   case ISD::SMIN:
   case ISD::UMAX:
diff --git a/llvm/test/CodeGen/AMDGPU/fmax3-maximumnum.ll b/llvm/test/CodeGen/AMDGPU/fmax3-maximumnum.ll
index 396fdf93eaaa3..d458bb2492f23 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3-maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3-maximumnum.ll
@@ -21,60 +21,37 @@ define float @v_max3_f32_maximumnum_maximumnum__v_v_v_0(float %a, float %b, floa
 ; GFX6-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX6-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX6-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX6-NEXT:    v_mul_f32_e32 v1, 1.0, v2
-; GFX6-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX6-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_0:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v2
-; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v2
-; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_0:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_0:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_dual_max_f32 v0, v0, v1 :: v_dual_max_f32 v1, v2, v2
-; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    v_max3_f32 v0, v0, v1, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_0:
@@ -84,10 +61,7 @@ define float @v_max3_f32_maximumnum_maximumnum__v_v_v_0(float %a, float %b, floa
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v2
-; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
+; GFX12-NEXT:    v_max3_num_f32 v0, v0, v1, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %tmp0 = call float @llvm.maximumnum.f32(float %a, float %b)
   %max3 = call float @llvm.maximumnum.f32(float %tmp0, float %c)
@@ -98,60 +72,37 @@ define float @v_max3_f32_maximumnum_maximumnum__v_v_v_1(float %a, float %b, floa
 ; GFX6-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX6-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX6-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX6-NEXT:    v_mul_f32_e32 v1, 1.0, v2
-; GFX6-NEXT:    v_max_f32_e32 v0, v1, v0
+; GFX6-NEXT:    v_max3_f32 v0, v2, v0, v1
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v2
-; GFX7-NEXT:    v_max_f32_e32 v0, v1, v0
+; GFX7-NEXT:    v_max3_f32 v0, v2, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_1:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v2
-; GFX8-NEXT:    v_max_f32_e32 v0, v1, v0
+; GFX8-NEXT:    v_max3_f32 v0, v2, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX9-NEXT:    v_max_f32_e32 v0, v1, v0
+; GFX9-NEXT:    v_max3_f32 v0, v2, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX10-NEXT:    v_max_f32_e32 v1, v2, v2
-; GFX10-NEXT:    v_max_f32_e32 v0, v1, v0
+; GFX10-NEXT:    v_max3_f32 v0, v2, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_1:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_dual_max_f32 v0, v0, v1 :: v_dual_max_f32 v1, v2, v2
-; GFX11-NEXT:    v_max_f32_e32 v0, v1, v0
+; GFX11-NEXT:    v_max3_f32 v0, v2, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_v_1:
@@ -161,10 +112,7 @@ define float @v_max3_f32_maximumnum_maximumnum__v_v_v_1(float %a, float %b, floa
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v2
-; GFX12-NEXT:    v_max_num_f32_e32 v0, v1, v0
+; GFX12-NEXT:    v_max3_num_f32 v0, v2, v0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %tmp0 = call float @llvm.maximumnum.f32(float %a, float %b)
   %max3 = call float @llvm.maximumnum.f32(float %c, float %tmp0)
@@ -175,48 +123,36 @@ define float @v_max3_f32_maximumnum_maximumnum__i_v_v_0(float %b, float %c) {
 ; GFX6-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX6-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_0:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX7-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX8-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX9-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_0:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_0:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -227,8 +163,6 @@ define float @v_max3_f32_maximumnum_maximumnum__i_v_v_0(float %b, float %c) {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    v_max3_num_f32 v0, v0, 1.0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %tmp0 = call float @llvm.maximumnum.f32(float 1.0, float %b)
@@ -240,48 +174,36 @@ define float @v_max3_f32_maximumnum_maximumnum__i_v_v_1(float %b, float %c) {
 ; GFX6-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX6-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX7-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_1:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX8-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX9-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_max3_f32_maximumnum_maximumnum__i_v_v_1:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -292,8 +214,6 @@ define float @v_max3_f32_maximumnum_maximumnum__i_v_v_1(float %b, float %c) {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    v_max3_num_f32 v0, v1, v0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %tmp0 = call float @llvm.maximumnum.f32(float 1.0, float %b)
@@ -305,48 +225,36 @@ define float @v_max3_f32_maximumnum_maximumnum__v_i_v_0(float %a, float %c) {
 ; GFX6-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX6-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_0:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX7-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX8-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX9-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_0:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_0:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_max3_f32 v0, v0, 1.0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -357,8 +265,6 @@ define float @v_max3_f32_maximumnum_maximumnum__v_i_v_0(float %a, float %c) {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    v_max3_num_f32 v0, v0, 1.0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %tmp0 = call float @llvm.maximumnum.f32(float %a, float 1.0)
@@ -370,48 +276,36 @@ define float @v_max3_f32_maximumnum_maximumnum__v_i_v_1(float %a, float %c) {
 ; GFX6-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX6-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX7-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_1:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX8-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX9-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_max3_f32_maximumnum_maximumnum__v_i_v_1:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_max3_f32 v0, v1, v0, 1.0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -422,8 +316,6 @@ define float @v_max3_f32_maximumnum_maximumnum__v_i_v_1(float %a, float %c) {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    v_max3_num_f32 v0, v1, v0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %tmp0 = call float @llvm.maximumnum.f32(float %a, float 1.0)
@@ -435,55 +327,37 @@ define float @v_max3_f32_maximumnum_maximumnum__v_v_i(float %a, float %b) {
 ; GFX6-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_i:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX6-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX6-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX6-NEXT:    v_max_f32_e32 v0, 1.0, v0
+; GFX6-NEXT:    v_max3_f32 v0, v0, v1, 1.0
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_i:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX7-NEXT:    v_max_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    v_max3_f32 v0, v0, v1, 1.0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_max3_f32_maximumnum_maximumnum__v_v_i:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX8-NEXT:    v_max_f32_e32 v0, 1.0, v0
+; GFX8-NEXT:    v_max3_f32 v0, v0, v1, 1.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_max3_f32_maximum...
[truncated]

Copy link
Collaborator

@rampitec rampitec left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Contributor Author

arsenm commented May 9, 2025

Merge activity

  • May 9, 2:00 AM EDT: A user started a stack merge that includes this pull request via Graphite.
  • May 9, 2:10 AM EDT: Graphite rebased this pull request as part of a merge.
  • May 9, 2:12 AM EDT: @arsenm merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/amdgpu/baseline-tests-min3-minimumnum-maximumnum branch from 147d87a to 5270733 Compare May 9, 2025 06:07
Base automatically changed from users/arsenm/amdgpu/baseline-tests-min3-minimumnum-maximumnum to main May 9, 2025 06:10
@arsenm arsenm force-pushed the users/arsenm/form-min3-max3-from-minimumnum-maximumnum branch from cc9da20 to e0a42b3 Compare May 9, 2025 06:10
@arsenm arsenm merged commit f13ff77 into main May 9, 2025
6 of 10 checks passed
@arsenm arsenm deleted the users/arsenm/form-min3-max3-from-minimumnum-maximumnum branch May 9, 2025 06:12
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants