Thanks to visit codestin.com
Credit goes to github.com

Skip to content

AMDGPU: Test more subtargets in minimumnum/maximumnum tests #139144

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented May 8, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented May 8, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Patch is 137.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139144.diff

2 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/maximumnum.ll (+1043-204)
  • (modified) llvm/test/CodeGen/AMDGPU/minimumnum.ll (+1022-199)
diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
index 718a266f49f5d..df79534a0844e 100644
--- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
@@ -8,6 +10,16 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
 
 define half @v_maximumnum_f16(half %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -80,6 +92,16 @@ define half @v_maximumnum_f16(half %x, half %y) {
 }
 
 define half @v_maximumnum_f16_nnan(half %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16_nnan:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f16_nnan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -134,6 +156,14 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) {
 }
 
 define half @v_maximumnum_f16_1.0(half %x) {
+; GFX7-LABEL: v_maximumnum_f16_1.0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_max_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f16_1.0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -199,6 +229,17 @@ define half @v_maximumnum_f16_1.0(half %x) {
 }
 
 define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) {
+; GFX7-LABEL: v_maximumnum_bf16:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_bf16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -231,36 +272,67 @@ define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximumnum_bf16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
-; GFX9-NEXT:    v_cmp_gt_f32_e32 vcc, v2, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX9-NEXT:    v_bfe_u32 v3, v2, 16, 1
-; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX9-NEXT:    v_add3_u32 v3, v3, v2, s4
-; GFX9-NEXT:    v_or_b32_e32 v4, 0x400000, v2
-; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
-; GFX9-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_bf16:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
+; GFX900-NEXT:    v_cmp_gt_f32_e32 vcc, v2, v3
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX900-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX900-NEXT:    v_bfe_u32 v3, v2, 16, 1
+; GFX900-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX900-NEXT:    v_add3_u32 v3, v3, v2, s4
+; GFX900-NEXT:    v_or_b32_e32 v4, 0x400000, v2
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX900-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
+; GFX900-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_bf16:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX950-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
+; GFX950-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX950-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX950-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
+; GFX950-NEXT:    v_cmp_gt_f32_e32 vcc, v2, v3
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX950-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX950-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX950-NEXT:    v_cvt_pk_bf16_f32 v2, v2, s0
+; GFX950-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; GFX950-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximumnum_bf16:
 ; GFX10:       ; %bb.0:
@@ -474,6 +546,17 @@ define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) {
 }
 
 define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) {
+; GFX7-LABEL: v_maximumnum_bf16_nnan:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_bf16_nnan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -490,21 +573,41 @@ define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximumnum_bf16_nnan:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
-; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
-; GFX9-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
-; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
-; GFX9-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_bf16_nnan:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
+; GFX900-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
+; GFX900-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v2
+; GFX900-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX900-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; GFX900-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_bf16_nnan:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
+; GFX950-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
+; GFX950-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v2
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX950-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; GFX950-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX950-NEXT:    s_nop 1
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximumnum_bf16_nnan:
 ; GFX10:       ; %bb.0:
@@ -618,6 +721,14 @@ define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) {
 }
 
 define float @v_maximumnum_f32(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -666,6 +777,12 @@ define float @v_maximumnum_f32(float %x, float %y) {
 }
 
 define float @v_maximumnum_f32_nnan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_nnan:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f32_nnan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -704,6 +821,14 @@ define float @v_maximumnum_f32_nnan(float %x, float %y) {
 }
 
 define double @v_maximumnum_f64(double %x, double %y) {
+; GFX7-LABEL: v_maximumnum_f64:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -754,6 +879,12 @@ define double @v_maximumnum_f64(double %x, double %y) {
 }
 
 define double @v_maximumnum_f64_nnan(double %x, double %y) {
+; GFX7-LABEL: v_maximumnum_f64_nnan:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f64_nnan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -792,6 +923,13 @@ define double @v_maximumnum_f64_nnan(double %x, double %y) {
 }
 
 define float @v_maximumnum_f32_1.0(float %x) {
+; GFX7-LABEL: v_maximumnum_f32_1.0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    v_max_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f32_1.0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -837,6 +975,14 @@ define float @v_maximumnum_f32_1.0(float %x) {
 }
 
 define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_rhs_not_snan:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f32_rhs_not_snan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -886,6 +1032,14 @@ define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) {
 }
 
 define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_lhs_not_snan:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f32_lhs_not_snan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -935,6 +1089,14 @@ define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) {
 }
 
 define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_both_operands_not_snan:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f32_both_operands_not_snan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -985,6 +1147,13 @@ define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) {
 }
 
 define double @v_maximumnum_f64_1.0(double %x) {
+; GFX7-LABEL: v_maximumnum_f64_1.0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], 1.0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f64_1.0:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1030,6 +1199,16 @@ define double @v_maximumnum_f64_1.0(double %x) {
 }
 
 define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16_s_v:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, s16
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_max_f32_e32 v0, v1, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f16_s_v:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1038,13 +1217,21 @@ define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
 ; GFX8-NEXT:    v_max_f16_e32 v0, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximumnum_f16_s_v:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT:    v_max_f16_e64 v1, s16, s16
-; GFX9-NEXT:    v_max_f16_e32 v0, v1, v0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_s_v:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT:    v_max_f16_e64 v1, s16, s16
+; GFX900-NEXT:    v_max_f16_e32 v0, v1, v0
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_s_v:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT:    v_max_f16_e64 v1, s0, s0
+; GFX950-NEXT:    v_max_f16_e32 v0, v1, v0
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximumnum_f16_s_v:
 ; GFX10:       ; %bb.0:
@@ -1102,6 +1289,16 @@ define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
 }
 
 define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
+; GFX7-LABEL: v_maximumnum_f16_v_s:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, s16
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f16_v_s:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1110,13 +1307,21 @@ define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
 ; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximumnum_f16_v_s:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f16_e64 v1, s16, s16
-; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_v_s:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_max_f16_e64 v1, s16, s16
+; GFX900-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_v_s:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_max_f16_e64 v1, s0, s0
+; GFX950-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximumnum_f16_v_s:
 ; GFX10:       ; %bb.0:
@@ -1174,6 +1379,16 @@ define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
 }
 
 define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) {
+; GFX7-LABEL: v_maximumnum_f16_s_s:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, s16
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, s17
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX8-LABEL: v_maximumnum_f16_s_s:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1182,13 +1397,21 @@ define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) {
 ; GFX8-NEXT:    v_max_f16_e32 v0, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximumnum_f16_s_s:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_max_f16_e64 v0, s17, s17
-; GFX9-NEXT:    v_max_f16_e64 v1...
[truncated]

Copy link
Contributor Author

arsenm commented May 9, 2025

Merge activity

  • May 9, 2:00 AM EDT: A user started a stack merge that includes this pull request via Graphite.
  • May 9, 2:13 AM EDT: Graphite rebased this pull request as part of a merge.
  • May 9, 2:15 AM EDT: @arsenm merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/form-min3-max3-from-minimumnum-maximumnum branch from cc9da20 to e0a42b3 Compare May 9, 2025 06:10
Base automatically changed from users/arsenm/form-min3-max3-from-minimumnum-maximumnum to main May 9, 2025 06:12
@arsenm arsenm force-pushed the users/arsenm/add-more-subtargets-minimumnum-maximumnum-tests branch from 7babb7b to b897ebf Compare May 9, 2025 06:13
@arsenm arsenm merged commit 6cf84e0 into main May 9, 2025
5 of 10 checks passed
@arsenm arsenm deleted the users/arsenm/add-more-subtargets-minimumnum-maximumnum-tests branch May 9, 2025 06:15
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants