Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a751e30

Browse files
[AMDLIBM] - Add new vector call from amdlibm library (#194793)
Add vector call from AMDLIBM erfinv Vector - vrd2, vrd4, vrd8 erfcinv Vector - vrd2, vrd4, vrd8 cdfnorminv Vector - vrd2, vrd4, vrd8 As per the latest external supported calls [amdlibm_vec.h](https://github.com/amd/aocl-libm-ose/blob/master/include/external/amdlibm_vec.h)
1 parent 346fdd5 commit a751e30

4 files changed

Lines changed: 151 additions & 1 deletion

File tree

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,6 +1912,14 @@ TLI_DEFINE_VECFUNC("erfc", "amd_vrd2_erfc", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
19121912
TLI_DEFINE_VECFUNC("erfc", "amd_vrd4_erfc", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
19131913
TLI_DEFINE_VECFUNC("erfc", "amd_vrd8_erfc", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
19141914

1915+
TLI_DEFINE_VECFUNC("erfinv", "amd_vrd2_erfinv", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1916+
TLI_DEFINE_VECFUNC("erfinv", "amd_vrd4_erfinv", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1917+
TLI_DEFINE_VECFUNC("erfinv", "amd_vrd8_erfinv", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1918+
1919+
TLI_DEFINE_VECFUNC("erfcinv", "amd_vrd2_erfcinv", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1920+
TLI_DEFINE_VECFUNC("erfcinv", "amd_vrd4_erfcinv", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1921+
TLI_DEFINE_VECFUNC("erfcinv", "amd_vrd8_erfcinv", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1922+
19151923
TLI_DEFINE_VECFUNC("erfcf", "amd_vrs4_erfcf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
19161924
TLI_DEFINE_VECFUNC("erfcf", "amd_vrs8_erfcf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
19171925
TLI_DEFINE_VECFUNC("erfcf", "amd_vrs16_erfcf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
@@ -1920,6 +1928,10 @@ TLI_DEFINE_VECFUNC("cdfnorm", "amd_vrd2_cdfnorm", FIXED(2), NOMASK, "_ZGV_LLVM_N
19201928
TLI_DEFINE_VECFUNC("cdfnorm", "amd_vrd4_cdfnorm", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
19211929
TLI_DEFINE_VECFUNC("cdfnorm", "amd_vrd8_cdfnorm", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
19221930

1931+
TLI_DEFINE_VECFUNC("cdfnorminv", "amd_vrd2_cdfnorminv", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1932+
TLI_DEFINE_VECFUNC("cdfnorminv", "amd_vrd4_cdfnorminv", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1933+
TLI_DEFINE_VECFUNC("cdfnorminv", "amd_vrd8_cdfnorminv", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1934+
19231935
TLI_DEFINE_VECFUNC("roundf", "amd_vrs16_roundf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
19241936
TLI_DEFINE_VECFUNC("roundf", "amd_vrs8_roundf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
19251937
TLI_DEFINE_VECFUNC("roundf", "amd_vrs4_roundf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")

llvm/include/llvm/IR/RuntimeLibcalls.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,9 @@ foreach S = !listconcat(F32VectorSuffixes, F64VectorSuffixes) in {
218218
def COSH_#S : RuntimeLibcall;
219219
def COSPI_#S : RuntimeLibcall;
220220
def ERFC_#S : RuntimeLibcall;
221+
def ERFCINV_#S : RuntimeLibcall;
221222
def ERF_#S : RuntimeLibcall;
223+
def ERFINV_#S : RuntimeLibcall;
222224
def EXP_#S : RuntimeLibcall;
223225
def EXP_FINITE_#S : RuntimeLibcall;
224226
def EXP10_#S : RuntimeLibcall;
@@ -253,6 +255,7 @@ foreach S = !listconcat(F32VectorSuffixes, F64VectorSuffixes) in {
253255
def TANH_#S : RuntimeLibcall;
254256
def TGAMMA_#S : RuntimeLibcall;
255257
def CDFNORM_#S : RuntimeLibcall;
258+
def CDFNORMINV_#S : RuntimeLibcall;
256259
def ROUND_#S : RuntimeLibcall;
257260
}
258261

@@ -4456,6 +4459,12 @@ defset list<RuntimeLibcallImpl> AMDLIBM_VECFUNCS = {
44564459
def amd_vrd8_sin : RuntimeLibcallImpl<SIN_V8F64>;
44574460
def amd_vrd8_sincos : RuntimeLibcallImpl<SINCOS_V8F64>;
44584461
def amd_vrd8_tan : RuntimeLibcallImpl<TAN_V8F64>;
4462+
def amd_vrd2_erfinv : RuntimeLibcallImpl<ERFINV_V2F64>;
4463+
def amd_vrd4_erfinv : RuntimeLibcallImpl<ERFINV_V4F64>;
4464+
def amd_vrd8_erfinv : RuntimeLibcallImpl<ERFINV_V8F64>;
4465+
def amd_vrd2_erfcinv : RuntimeLibcallImpl<ERFCINV_V2F64>;
4466+
def amd_vrd4_erfcinv : RuntimeLibcallImpl<ERFCINV_V4F64>;
4467+
def amd_vrd8_erfcinv : RuntimeLibcallImpl<ERFCINV_V8F64>;
44594468
def amd_vrs16_acosf : RuntimeLibcallImpl<ACOS_V16F32>;
44604469
def amd_vrs16_asinf : RuntimeLibcallImpl<ASIN_V16F32>;
44614470
def amd_vrs16_atanf : RuntimeLibcallImpl<ATAN_V16F32>;
@@ -4510,6 +4519,9 @@ defset list<RuntimeLibcallImpl> AMDLIBM_VECFUNCS = {
45104519
def amd_vrd2_cdfnorm : RuntimeLibcallImpl<CDFNORM_V2F64>;
45114520
def amd_vrd4_cdfnorm : RuntimeLibcallImpl<CDFNORM_V4F64>;
45124521
def amd_vrd8_cdfnorm : RuntimeLibcallImpl<CDFNORM_V8F64>;
4522+
def amd_vrd2_cdfnorminv : RuntimeLibcallImpl<CDFNORMINV_V2F64>;
4523+
def amd_vrd4_cdfnorminv : RuntimeLibcallImpl<CDFNORMINV_V4F64>;
4524+
def amd_vrd8_cdfnorminv : RuntimeLibcallImpl<CDFNORMINV_V8F64>;
45134525
def amd_vrs4_roundf : RuntimeLibcallImpl<ROUND_V4F32>;
45144526
def amd_vrs8_roundf : RuntimeLibcallImpl<ROUND_V8F32>;
45154527
def amd_vrs16_roundf : RuntimeLibcallImpl<ROUND_V16F32>;

llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,6 +1779,54 @@ for.cond.cleanup:
17791779
ret void
17801780
}
17811781

1782+
; ======================= erfinv ============================
1783+
define void @erfinv_f64(ptr nocapture %varray) {
1784+
; CHECK-LABEL: @erfinv_f64(
1785+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_erfinv(<2 x double> [[TMP4:%.*]])
1786+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_erfinv(<4 x double> [[TMP4:%.*]])
1787+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_erfinv(<8 x double> [[TMP4:%.*]])
1788+
entry:
1789+
br label %for.body
1790+
1791+
for.body:
1792+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1793+
%tmp = trunc i64 %iv to i32
1794+
%conv = sitofp i32 %tmp to double
1795+
%call = tail call double @erfinv(double %conv)
1796+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1797+
store double %call, ptr %arrayidx, align 4
1798+
%iv.next = add nuw nsw i64 %iv, 1
1799+
%exitcond = icmp eq i64 %iv.next, 1000
1800+
br i1 %exitcond, label %for.end, label %for.body
1801+
1802+
for.end:
1803+
ret void
1804+
}
1805+
1806+
; ======================= erfcinv ============================
1807+
define void @erfcinv_f64(ptr nocapture %varray) {
1808+
; CHECK-LABEL: @erfcinv_f64(
1809+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_erfcinv(<2 x double> [[TMP4:%.*]])
1810+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_erfcinv(<4 x double> [[TMP4:%.*]])
1811+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_erfcinv(<8 x double> [[TMP4:%.*]])
1812+
entry:
1813+
br label %for.body
1814+
1815+
for.body:
1816+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1817+
%tmp = trunc i64 %iv to i32
1818+
%conv = sitofp i32 %tmp to double
1819+
%call = tail call double @erfcinv(double %conv)
1820+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1821+
store double %call, ptr %arrayidx, align 4
1822+
%iv.next = add nuw nsw i64 %iv, 1
1823+
%exitcond = icmp eq i64 %iv.next, 1000
1824+
br i1 %exitcond, label %for.end, label %for.body
1825+
1826+
for.end:
1827+
ret void
1828+
}
1829+
17821830

17831831
; ======================= erfc ============================
17841832
define void @erfc_f64(ptr nocapture %varray) {
@@ -1805,6 +1853,33 @@ for.end:
18051853
ret void
18061854
}
18071855

1856+
; ======================= cdfnorminv ============================
1857+
define void @cdfnorminv_f64(ptr nocapture %varray) {
1858+
; CHECK-LABEL: @cdfnorminv_f64(
1859+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cdfnorminv(<2 x double> [[TMP4:%.*]])
1860+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cdfnorminv(<4 x double> [[TMP4:%.*]])
1861+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cdfnorminv(<8 x double> [[TMP4:%.*]])
1862+
; CHECK-VF16: {{.*}} = tail call double @cdfnorminv(double {{.*}})
1863+
; CHECK: ret void
1864+
;
1865+
entry:
1866+
br label %for.body
1867+
1868+
for.body:
1869+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1870+
%tmp = trunc i64 %iv to i32
1871+
%conv = sitofp i32 %tmp to double
1872+
%call = tail call double @cdfnorminv(double %conv)
1873+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1874+
store double %call, ptr %arrayidx, align 4
1875+
%iv.next = add nuw nsw i64 %iv, 1
1876+
%exitcond = icmp eq i64 %iv.next, 1000
1877+
br i1 %exitcond, label %for.end, label %for.body
1878+
1879+
for.end:
1880+
ret void
1881+
}
1882+
18081883
; ======================= erfcf ============================
18091884
define void @erfcf_f32(ptr nocapture %varray) {
18101885
; CHECK-LABEL: @erfcf_f32(
@@ -2024,9 +2099,12 @@ declare double @exp10(double) #0
20242099
declare float @exp10f(float) #0
20252100
declare void @sincos(double, ptr, ptr)
20262101
declare void @sincosf(float, ptr, ptr)
2102+
declare double @erfinv(double) #0
2103+
declare double @erfcinv(double) #0
20272104
declare double @erfc(double) #0
20282105
declare float @erfcf(float) #0
20292106
declare double @cdfnorm(double) #0
2107+
declare double @cdfnorminv(double) #0
20302108
declare double @round(double) #0
20312109
declare float @roundf(float) #0
20322110
declare double @expm1(double) #0

llvm/test/Transforms/Util/add-TLI-mappings.ll

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
; SVML-SAME: ptr @__svml_log10f4,
1717
; SVML-SAME: ptr @__svml_log10f8,
1818
; SVML-SAME: ptr @__svml_log10f16
19-
; AMDLIBM-SAME: [12 x ptr] [
19+
; AMDLIBM-SAME: [21 x ptr] [
2020
; AMDLIBM-SAME: ptr @amd_vrd2_sin,
2121
; AMDLIBM-SAME: ptr @amd_vrd4_sin,
2222
; AMDLIBM-SAME: ptr @amd_vrd8_sin,
@@ -26,6 +26,15 @@
2626
; AMDLIBM-SAME: ptr @amd_vrs4_sincosf,
2727
; AMDLIBM-SAME: ptr @amd_vrs8_sincosf,
2828
; AMDLIBM-SAME: ptr @amd_vrs16_sincosf,
29+
; AMDLIBM-SAME: ptr @amd_vrd2_erfinv,
30+
; AMDLIBM-SAME: ptr @amd_vrd4_erfinv,
31+
; AMDLIBM-SAME: ptr @amd_vrd8_erfinv,
32+
; AMDLIBM-SAME: ptr @amd_vrd2_erfcinv,
33+
; AMDLIBM-SAME: ptr @amd_vrd4_erfcinv,
34+
; AMDLIBM-SAME: ptr @amd_vrd8_erfcinv,
35+
; AMDLIBM-SAME: ptr @amd_vrd2_cdfnorminv,
36+
; AMDLIBM-SAME: ptr @amd_vrd4_cdfnorminv,
37+
; AMDLIBM-SAME: ptr @amd_vrd8_cdfnorminv,
2938
; AMDLIBM-SAME: ptr @amd_vrs4_log10f,
3039
; AMDLIBM-SAME: ptr @amd_vrs8_log10f,
3140
; AMDLIBM-SAME: ptr @amd_vrs16_log10f
@@ -164,6 +173,33 @@ define void @sincospi_f32(float %in, ptr %sin, ptr %cos) {
164173

165174
declare void @sincospif(float, ptr, ptr) #0
166175

176+
define double @erfinv_f64(double %in) {
177+
; COMMON-LABEL: @erfinv_f64(
178+
; AMDLIBM: call double @erfinv(double %{{.*}}) #[[ERFINV:[0-9]+]]
179+
%call = tail call double @erfinv(double %in)
180+
ret double %call
181+
}
182+
183+
declare double @erfinv(double) #0
184+
185+
define double @erfcinv_f64(double %in) {
186+
; COMMON-LABEL: @erfcinv_f64(
187+
; AMDLIBM: call double @erfcinv(double %{{.*}}) #[[ERFCINV:[0-9]+]]
188+
%call = tail call double @erfcinv(double %in)
189+
ret double %call
190+
}
191+
192+
declare double @erfcinv(double) #0
193+
194+
define double @cdfnorminv_f64(double %in) {
195+
; COMMON-LABEL: @cdfnorminv_f64(
196+
; AMDLIBM: call double @cdfnorminv(double %{{.*}}) #[[CDFNORMINV:[0-9]+]]
197+
%call = tail call double @cdfnorminv(double %in)
198+
ret double %call
199+
}
200+
201+
declare double @cdfnorminv(double) #0
202+
167203
define float @call_llvm.log10.f32(float %in) {
168204
; COMMON-LABEL: @call_llvm.log10.f32(
169205
; SVML: call float @llvm.log10.f32(float %{{.*}})
@@ -287,6 +323,18 @@ attributes #0 = { nounwind readnone }
287323
; AMDLIBM-SAME: "_ZGV_LLVM_N4vl4l4_sincosf(amd_vrs4_sincosf),
288324
; AMDLIBM-SAME: _ZGV_LLVM_N8vl4l4_sincosf(amd_vrs8_sincosf),
289325
; AMDLIBM-SAME: _ZGV_LLVM_N16vl4l4_sincosf(amd_vrs16_sincosf)" }
326+
; AMDLIBM: attributes #[[ERFINV]] = { "vector-function-abi-variant"=
327+
; AMDLIBM-SAME: "_ZGV_LLVM_N2v_erfinv(amd_vrd2_erfinv),
328+
; AMDLIBM-SAME: _ZGV_LLVM_N4v_erfinv(amd_vrd4_erfinv),
329+
; AMDLIBM-SAME: _ZGV_LLVM_N8v_erfinv(amd_vrd8_erfinv)" }
330+
; AMDLIBM: attributes #[[ERFCINV]] = { "vector-function-abi-variant"=
331+
; AMDLIBM-SAME: "_ZGV_LLVM_N2v_erfcinv(amd_vrd2_erfcinv),
332+
; AMDLIBM-SAME: _ZGV_LLVM_N4v_erfcinv(amd_vrd4_erfcinv),
333+
; AMDLIBM-SAME: _ZGV_LLVM_N8v_erfcinv(amd_vrd8_erfcinv)" }
334+
; AMDLIBM: attributes #[[CDFNORMINV]] = { "vector-function-abi-variant"=
335+
; AMDLIBM-SAME: "_ZGV_LLVM_N2v_cdfnorminv(amd_vrd2_cdfnorminv),
336+
; AMDLIBM-SAME: _ZGV_LLVM_N4v_cdfnorminv(amd_vrd4_cdfnorminv),
337+
; AMDLIBM-SAME: _ZGV_LLVM_N8v_cdfnorminv(amd_vrd8_cdfnorminv)" }
290338
; AMDLIBM: attributes #[[LOG10]] = { "vector-function-abi-variant"=
291339
; AMDLIBM-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(amd_vrs4_log10f),
292340
; AMDLIBM-SAME: _ZGV_LLVM_N8v_llvm.log10.f32(amd_vrs8_log10f),

0 commit comments

Comments
 (0)