Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c91ea57

Browse files
committed
R600: Remove softPromoteHalfType
Also includes a kind of hacky, minimal change to avoid assertions when softPromoteHalfType is removed to fix kernel arguments lowered as f16. Half support was never really implemented for r600, and there just happened to be a few incidental tests which included a half argument (which were also not even meaningful, since the function body just folded to nothing due to no callable function support).
1 parent 34cfc39 commit c91ea57

3 files changed

Lines changed: 167 additions & 2 deletions

File tree

llvm/lib/Target/AMDGPU/R600ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,6 +1480,9 @@ SDValue R600TargetLowering::LowerFormalArguments(
14801480
MemVT = MemVT.getVectorElementType();
14811481
}
14821482

1483+
if (VT.isInteger() && !MemVT.isInteger())
1484+
MemVT = MemVT.changeTypeToInteger();
1485+
14831486
if (AMDGPU::isShader(CallConv)) {
14841487
Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
14851488
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);

llvm/lib/Target/AMDGPU/R600ISelLowering.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,6 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
117117

118118
TargetLowering::AtomicExpansionKind
119119
shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override;
120-
121-
bool softPromoteHalfType() const override { return false; }
122120
};
123121

124122
} // End namespace llvm;

llvm/test/CodeGen/AMDGPU/kernel-args.ll

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6187,3 +6187,167 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace
61876187
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
61886188
ret void
61896189
}
6190+
6191+
define amdgpu_kernel void @f16_arg(half %arg, ptr addrspace(1) %ptr) {
6192+
; SI-LABEL: f16_arg:
6193+
; SI: ; %bb.0:
6194+
; SI-NEXT: s_load_dword s6, s[4:5], 0x9
6195+
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
6196+
; SI-NEXT: s_mov_b32 s3, 0xf000
6197+
; SI-NEXT: s_mov_b32 s2, -1
6198+
; SI-NEXT: s_waitcnt lgkmcnt(0)
6199+
; SI-NEXT: v_mov_b32_e32 v0, s6
6200+
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
6201+
; SI-NEXT: s_endpgm
6202+
;
6203+
; VI-LABEL: f16_arg:
6204+
; VI: ; %bb.0:
6205+
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
6206+
; VI-NEXT: s_load_dword s2, s[4:5], 0x24
6207+
; VI-NEXT: s_waitcnt lgkmcnt(0)
6208+
; VI-NEXT: v_mov_b32_e32 v0, s0
6209+
; VI-NEXT: v_mov_b32_e32 v1, s1
6210+
; VI-NEXT: v_mov_b32_e32 v2, s2
6211+
; VI-NEXT: flat_store_short v[0:1], v2
6212+
; VI-NEXT: s_endpgm
6213+
;
6214+
; GFX9-LABEL: f16_arg:
6215+
; GFX9: ; %bb.0:
6216+
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x0
6217+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8
6218+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
6219+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
6220+
; GFX9-NEXT: v_mov_b32_e32 v1, s2
6221+
; GFX9-NEXT: global_store_short v0, v1, s[0:1]
6222+
; GFX9-NEXT: s_endpgm
6223+
;
6224+
; EG-LABEL: f16_arg:
6225+
; EG: ; %bb.0:
6226+
; EG-NEXT: ALU 0, @8, KC0[], KC1[]
6227+
; EG-NEXT: TEX 0 @6
6228+
; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
6229+
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
6230+
; EG-NEXT: CF_END
6231+
; EG-NEXT: PAD
6232+
; EG-NEXT: Fetch clause starting at 6:
6233+
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
6234+
; EG-NEXT: ALU clause starting at 8:
6235+
; EG-NEXT: MOV * T0.X, 0.0,
6236+
; EG-NEXT: ALU clause starting at 9:
6237+
; EG-NEXT: AND_INT T0.W, KC0[2].Z, literal.x,
6238+
; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
6239+
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
6240+
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
6241+
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
6242+
; EG-NEXT: LSHL T0.X, T1.W, PV.W,
6243+
; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
6244+
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
6245+
; EG-NEXT: MOV T0.Y, 0.0,
6246+
; EG-NEXT: MOV * T0.Z, 0.0,
6247+
; EG-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
6248+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6249+
;
6250+
; CM-LABEL: f16_arg:
6251+
; CM: ; %bb.0:
6252+
; CM-NEXT: ALU 0, @8, KC0[], KC1[]
6253+
; CM-NEXT: TEX 0 @6
6254+
; CM-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
6255+
; CM-NEXT: MEM_RAT MSKOR T0.XW, T1.X
6256+
; CM-NEXT: CF_END
6257+
; CM-NEXT: PAD
6258+
; CM-NEXT: Fetch clause starting at 6:
6259+
; CM-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
6260+
; CM-NEXT: ALU clause starting at 8:
6261+
; CM-NEXT: MOV * T0.X, 0.0,
6262+
; CM-NEXT: ALU clause starting at 9:
6263+
; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
6264+
; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
6265+
; CM-NEXT: AND_INT T0.Z, T0.X, literal.x,
6266+
; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
6267+
; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
6268+
; CM-NEXT: LSHL T0.X, PV.Z, PV.W,
6269+
; CM-NEXT: LSHL * T0.W, literal.x, PV.W,
6270+
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
6271+
; CM-NEXT: MOV T0.Y, 0.0,
6272+
; CM-NEXT: MOV * T0.Z, 0.0,
6273+
; CM-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
6274+
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6275+
store half %arg, ptr addrspace(1) %ptr
6276+
ret void
6277+
}
6278+
6279+
define amdgpu_kernel void @v2f16_arg(<2 x half> %arg, ptr addrspace(1) %ptr) {
6280+
; SI-LABEL: v2f16_arg:
6281+
; SI: ; %bb.0:
6282+
; SI-NEXT: s_load_dword s6, s[4:5], 0x9
6283+
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
6284+
; SI-NEXT: s_mov_b32 s3, 0xf000
6285+
; SI-NEXT: s_mov_b32 s2, -1
6286+
; SI-NEXT: s_waitcnt lgkmcnt(0)
6287+
; SI-NEXT: v_mov_b32_e32 v0, s6
6288+
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
6289+
; SI-NEXT: s_endpgm
6290+
;
6291+
; VI-LABEL: v2f16_arg:
6292+
; VI: ; %bb.0:
6293+
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
6294+
; VI-NEXT: s_load_dword s2, s[4:5], 0x24
6295+
; VI-NEXT: s_waitcnt lgkmcnt(0)
6296+
; VI-NEXT: v_mov_b32_e32 v0, s0
6297+
; VI-NEXT: v_mov_b32_e32 v1, s1
6298+
; VI-NEXT: v_mov_b32_e32 v2, s2
6299+
; VI-NEXT: flat_store_dword v[0:1], v2
6300+
; VI-NEXT: s_endpgm
6301+
;
6302+
; GFX9-LABEL: v2f16_arg:
6303+
; GFX9: ; %bb.0:
6304+
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x0
6305+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8
6306+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
6307+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
6308+
; GFX9-NEXT: v_mov_b32_e32 v1, s2
6309+
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
6310+
; GFX9-NEXT: s_endpgm
6311+
;
6312+
; EG-LABEL: v2f16_arg:
6313+
; EG: ; %bb.0:
6314+
; EG-NEXT: ALU 0, @10, KC0[], KC1[]
6315+
; EG-NEXT: TEX 1 @6
6316+
; EG-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[]
6317+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
6318+
; EG-NEXT: CF_END
6319+
; EG-NEXT: PAD
6320+
; EG-NEXT: Fetch clause starting at 6:
6321+
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 38, #3
6322+
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
6323+
; EG-NEXT: ALU clause starting at 10:
6324+
; EG-NEXT: MOV * T0.X, 0.0,
6325+
; EG-NEXT: ALU clause starting at 11:
6326+
; EG-NEXT: LSHL * T0.W, T1.X, literal.x,
6327+
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6328+
; EG-NEXT: OR_INT T0.X, T0.X, PV.W,
6329+
; EG-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
6330+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6331+
;
6332+
; CM-LABEL: v2f16_arg:
6333+
; CM: ; %bb.0:
6334+
; CM-NEXT: ALU 0, @10, KC0[], KC1[]
6335+
; CM-NEXT: TEX 1 @6
6336+
; CM-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[]
6337+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
6338+
; CM-NEXT: CF_END
6339+
; CM-NEXT: PAD
6340+
; CM-NEXT: Fetch clause starting at 6:
6341+
; CM-NEXT: VTX_READ_16 T1.X, T0.X, 38, #3
6342+
; CM-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
6343+
; CM-NEXT: ALU clause starting at 10:
6344+
; CM-NEXT: MOV * T0.X, 0.0,
6345+
; CM-NEXT: ALU clause starting at 11:
6346+
; CM-NEXT: LSHL * T0.W, T1.X, literal.x,
6347+
; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6348+
; CM-NEXT: OR_INT * T0.X, T0.X, PV.W,
6349+
; CM-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
6350+
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6351+
store <2 x half> %arg, ptr addrspace(1) %ptr
6352+
ret void
6353+
}

0 commit comments

Comments
 (0)