Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ int main()

|CPU|ISA|
|:---:|---|
|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxvnniint16` `avxifma` `avxneconvert` `amxfp16` `amxbf16` `amxint8` `amxtile` `bmi1` `bmi2` `gfni` `aesni` `vaes` `sha1` `sha256` `sha512` `sm3` `sm4` `rdrand` `rdseed` `tsx`|
|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512bitalg` `avx512vpopcntdq` `avx512vp2intersect` `vpclmulqdq` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxvnniint16` `avxifma` `avxneconvert` `amxfp16` `amxbf16` `amxint8` `amxtile` `bmi1` `bmi2` `adx` `lzcnt` `tbm` `gfni` `aesni` `vaes` `sha1` `sha256` `sha512` `sm3` `sm4` `clzero` `rdpru` `rdrand` `rdseed` `tsx`|
|arm|`half` `edsp` `neon` `vfpv4` `idiv`|
|aarch64|`neon` `vfpv4` `lse` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `frint` `jscvt` `fcma` `mte` `mte2` `sve` `sve2` `sve2p1` `svebf16` `svei8mm` `svef32mm` `svef64mm` `sme` `smef16f16` `smef64f64` `smei64i64` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `svepmull` `svebitperm` `sveaes` `svesha3` `svesm4` `amx` `paca` `pacg`|
|mips|`msa` `mmi` `sx` `asx` `msa2` `crypto`|
Expand Down
1 change: 1 addition & 0 deletions _codeql_detected_source_root
9 changes: 9 additions & 0 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ int main()
PRINT_ISA_SUPPORT(avx512vbmi)
PRINT_ISA_SUPPORT(avx512vbmi2)
PRINT_ISA_SUPPORT(avx512fp16)
PRINT_ISA_SUPPORT(avx512bitalg)
PRINT_ISA_SUPPORT(avx512vpopcntdq)
PRINT_ISA_SUPPORT(avx512vp2intersect)
PRINT_ISA_SUPPORT(vpclmulqdq)
PRINT_ISA_SUPPORT(avx512er)
PRINT_ISA_SUPPORT(avx5124fmaps)
PRINT_ISA_SUPPORT(avx5124vnniw)
Expand All @@ -66,6 +70,11 @@ int main()
PRINT_ISA_SUPPORT(rdrand)
PRINT_ISA_SUPPORT(rdseed)
PRINT_ISA_SUPPORT(tsx)
PRINT_ISA_SUPPORT(adx)
PRINT_ISA_SUPPORT(lzcnt)
PRINT_ISA_SUPPORT(tbm)
PRINT_ISA_SUPPORT(clzero)
PRINT_ISA_SUPPORT(rdpru)

#elif __aarch64__ || defined(_M_ARM64)
PRINT_ISA_SUPPORT(neon)
Expand Down
18 changes: 18 additions & 0 deletions ruapu.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ RUAPU_INSTCODE(avx512ifma, 0x62, 0xf2, 0xfd, 0x48, 0xb4, 0xc0) // vpmadd52luq zm
RUAPU_INSTCODE(avx512vbmi, 0x62, 0xf2, 0x7d, 0x48, 0x75, 0xc0) // vpermi2b zmm0,zmm0,zmm0
RUAPU_INSTCODE(avx512vbmi2, 0x62, 0xf2, 0x7d, 0x48, 0x71, 0xc0) // vpshldvd zmm0,zmm0,zmm0
RUAPU_INSTCODE(avx512fp16, 0x62, 0xf6, 0x7d, 0x48, 0x98, 0xc0) // vfmadd132ph zmm0,zmm0,zmm0
RUAPU_INSTCODE(avx512bitalg, 0x62, 0xf2, 0x7d, 0x48, 0x54, 0xc0) // vpopcntb zmm0,zmm0
RUAPU_INSTCODE(avx512vpopcntdq, 0x62, 0xf2, 0x7d, 0x48, 0x55, 0xc0) // vpopcntd zmm0,zmm0
RUAPU_INSTCODE(avx512vp2intersect, 0x62, 0xf2, 0x7f, 0x48, 0x68, 0xc8) // vp2intersectd k1,zmm0,zmm0
RUAPU_INSTCODE(vpclmulqdq, 0x62, 0xf3, 0x7d, 0x48, 0x44, 0xc0, 0x00) // vpclmullqlqdq zmm0,zmm0,zmm0
// TODO:avx512pf, vgatherpf1dps DWORD PTR [esp+zmm0*1]{k1}
RUAPU_INSTCODE(avx512er, 0x62, 0xf2, 0xfd, 0x48, 0xc8, 0xc0) //vexp2pd zmm0,zmm0
RUAPU_INSTCODE(avx5124fmaps, 0x67, 0x62, 0xf2, 0x7f, 0x48, 0x9a, 0x04, 0x24) //v4fmaddps zmm0,zmm0,XMMWORD PTR [esp]
Expand All @@ -273,6 +277,11 @@ RUAPU_INSTCODE(sm4, 0xc4, 0xe2, 0x7e, 0xda, 0xc0) // vsm4key4 ymm0,ymm0,ymm0
RUAPU_INSTCODE(rdrand, 0x0f, 0xc7, 0xf0) // rdrand eax
RUAPU_INSTCODE(rdseed, 0x0f, 0xc7, 0xf8) // rdseed eax
RUAPU_INSTCODE(tsx, 0x0f, 0x01, 0xd6) // xtest
RUAPU_INSTCODE(adx, 0x66, 0x0f, 0x38, 0xf6, 0xc0) // adcx eax,eax
RUAPU_INSTCODE(lzcnt, 0xf3, 0x0f, 0xbd, 0xc0) // lzcnt eax,eax
RUAPU_INSTCODE(tbm, 0x8f, 0xe9, 0x78, 0x01, 0xc8) // blcfill eax,eax
RUAPU_INSTCODE(clzero, 0x48, 0x8d, 0x04, 0x24, 0x0f, 0x01, 0xfc) // lea rax,[rsp] + clzero
RUAPU_INSTCODE(rdpru, 0x0f, 0x01, 0xfd) // rdpru

#elif __aarch64__ || defined(_M_ARM64)
RUAPU_INSTCODE(neon, 0x4e20d400) // fadd v0.4s,v0.4s,v0.4s
Expand Down Expand Up @@ -494,6 +503,10 @@ RUAPU_ISAENTRY(avx512ifma)
RUAPU_ISAENTRY(avx512vbmi)
RUAPU_ISAENTRY(avx512vbmi2)
RUAPU_ISAENTRY(avx512fp16)
RUAPU_ISAENTRY(avx512bitalg)
RUAPU_ISAENTRY(avx512vpopcntdq)
RUAPU_ISAENTRY(avx512vp2intersect)
RUAPU_ISAENTRY(vpclmulqdq)
// TODO:avx512pf
RUAPU_ISAENTRY(avx512er)
RUAPU_ISAENTRY(avx5124fmaps)
Expand All @@ -520,6 +533,11 @@ RUAPU_ISAENTRY(sm4)
RUAPU_ISAENTRY(rdrand)
RUAPU_ISAENTRY(rdseed)
RUAPU_ISAENTRY(tsx)
RUAPU_ISAENTRY(adx)
RUAPU_ISAENTRY(lzcnt)
RUAPU_ISAENTRY(tbm)
RUAPU_ISAENTRY(clzero)
RUAPU_ISAENTRY(rdpru)

#elif __aarch64__ || defined(_M_ARM64)
RUAPU_ISAENTRY(neon)
Expand Down