From 3dca2afb76fbfe88edb43cab01556cc4b380485d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Dec 2025 09:06:15 +0000 Subject: [PATCH 1/3] Initial plan From 00a52de9669aec7211ab859452c456dd20893b61 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Dec 2025 09:15:29 +0000 Subject: [PATCH 2/3] feat: extend x86 isa detection Co-authored-by: nihui <171016+nihui@users.noreply.github.com> --- README.md | 2 +- main.c | 9 +++++++++ ruapu.h | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7e4a756..6ff0511 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ int main() |CPU|ISA| |:---:|---| -|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxvnniint16` `avxifma` `avxneconvert` `amxfp16` `amxbf16` `amxint8` `amxtile` `bmi1` `bmi2` `gfni` `aesni` `vaes` `sha1` `sha256` `sha512` `sm3` `sm4` `rdrand` `rdseed` `tsx`| +|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512bitalg` `avx512vpopcntdq` `avx512vp2intersect` `vpclmulqdq` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxvnniint16` `avxifma` `avxneconvert` `amxfp16` `amxbf16` `amxint8` `amxtile` `bmi1` `bmi2` `adx` `lzcnt` `tbm` `gfni` `aesni` `vaes` `sha1` `sha256` `sha512` `sm3` `sm4` `clzero` `rdpru` `rdrand` `rdseed` `tsx`| |arm|`half` `edsp` `neon` `vfpv4` `idiv`| |aarch64|`neon` `vfpv4` `lse` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `frint` `jscvt` `fcma` `mte` `mte2` `sve` `sve2` `sve2p1` `svebf16` `svei8mm` `svef32mm` `svef64mm` `sme` `smef16f16` `smef64f64` `smei64i64` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `svepmull` `svebitperm` `sveaes` `svesha3` `svesm4` `amx` `paca` `pacg`| |mips|`msa` `mmi` `sx` `asx` `msa2` `crypto`| diff --git a/main.c b/main.c index ad81bdc..6d28473 100644 --- a/main.c +++ b/main.c @@ -41,6 +41,10 @@ int main() PRINT_ISA_SUPPORT(avx512vbmi) PRINT_ISA_SUPPORT(avx512vbmi2) PRINT_ISA_SUPPORT(avx512fp16) + PRINT_ISA_SUPPORT(avx512bitalg) + PRINT_ISA_SUPPORT(avx512vpopcntdq) + PRINT_ISA_SUPPORT(avx512vp2intersect) + PRINT_ISA_SUPPORT(vpclmulqdq) PRINT_ISA_SUPPORT(avx512er) PRINT_ISA_SUPPORT(avx5124fmaps) PRINT_ISA_SUPPORT(avx5124vnniw) @@ -66,6 +70,11 @@ int main() PRINT_ISA_SUPPORT(rdrand) PRINT_ISA_SUPPORT(rdseed) PRINT_ISA_SUPPORT(tsx) + PRINT_ISA_SUPPORT(adx) + PRINT_ISA_SUPPORT(lzcnt) + PRINT_ISA_SUPPORT(tbm) + PRINT_ISA_SUPPORT(clzero) + PRINT_ISA_SUPPORT(rdpru) #elif __aarch64__ || defined(_M_ARM64) PRINT_ISA_SUPPORT(neon) diff --git a/ruapu.h b/ruapu.h index fa3a71f..d6a678f 100644 --- a/ruapu.h +++ b/ruapu.h @@ -247,6 +247,10 @@ RUAPU_INSTCODE(avx512ifma, 0x62, 0xf2, 0xfd, 0x48, 0xb4, 0xc0) // vpmadd52luq zm RUAPU_INSTCODE(avx512vbmi, 0x62, 0xf2, 0x7d, 0x48, 0x75, 0xc0) // vpermi2b zmm0,zmm0,zmm0 RUAPU_INSTCODE(avx512vbmi2, 0x62, 0xf2, 0x7d, 0x48, 0x71, 0xc0) // vpshldvd zmm0,zmm0,zmm0 RUAPU_INSTCODE(avx512fp16, 0x62, 0xf6, 0x7d, 0x48, 0x98, 0xc0) // vfmadd132ph zmm0,zmm0,zmm0 +RUAPU_INSTCODE(avx512bitalg, 0x62, 0xf2, 0x7d, 0x48, 0x54, 0xc0) // vpopcntb zmm0,zmm0 +RUAPU_INSTCODE(avx512vpopcntdq, 0x62, 0xf2, 0x7d, 0x48, 0x55, 0xc0) // vpopcntd zmm0,zmm0 +RUAPU_INSTCODE(avx512vp2intersect, 0x62, 0xf2, 0x7f, 0x48, 0x68, 0xc8) // vp2intersectd k1,zmm0,zmm0 +RUAPU_INSTCODE(vpclmulqdq, 0x62, 0xf3, 0x7d, 0x48, 0x44, 0xc0, 0x00) // vpclmullqlqdq zmm0,zmm0,zmm0 // TODO:avx512pf, vgatherpf1dps DWORD PTR [esp+zmm0*1]{k1} RUAPU_INSTCODE(avx512er, 0x62, 0xf2, 0xfd, 0x48, 0xc8, 0xc0) //vexp2pd zmm0,zmm0 RUAPU_INSTCODE(avx5124fmaps, 0x67, 0x62, 0xf2, 0x7f, 0x48, 0x9a, 0x04, 0x24) //v4fmaddps zmm0,zmm0,XMMWORD PTR [esp] @@ -273,6 +277,11 @@ RUAPU_INSTCODE(sm4, 0xc4, 0xe2, 0x7e, 0xda, 0xc0) // vsm4key4 ymm0,ymm0,ymm0 RUAPU_INSTCODE(rdrand, 0x0f, 0xc7, 0xf0) // rdrand eax RUAPU_INSTCODE(rdseed, 0x0f, 0xc7, 0xf8) // rdseed eax RUAPU_INSTCODE(tsx, 0x0f, 0x01, 0xd6) // xtest +RUAPU_INSTCODE(adx, 0x66, 0x0f, 0x38, 0xf6, 0xc0) // adcx eax,eax +RUAPU_INSTCODE(lzcnt, 0xf3, 0x0f, 0xbd, 0xc0) // lzcnt eax,eax +RUAPU_INSTCODE(tbm, 0x8f, 0xe9, 0x78, 0x01, 0xc8) // blcfill eax,eax +RUAPU_INSTCODE(clzero, 0x48, 0x8d, 0x04, 0x24, 0x0f, 0x01, 0xfc) // lea rax,[rsp] + clzero +RUAPU_INSTCODE(rdpru, 0x0f, 0x01, 0xfd) // rdpru #elif __aarch64__ || defined(_M_ARM64) RUAPU_INSTCODE(neon, 0x4e20d400) // fadd v0.4s,v0.4s,v0.4s @@ -494,6 +503,10 @@ RUAPU_ISAENTRY(avx512ifma) RUAPU_ISAENTRY(avx512vbmi) RUAPU_ISAENTRY(avx512vbmi2) RUAPU_ISAENTRY(avx512fp16) +RUAPU_ISAENTRY(avx512bitalg) +RUAPU_ISAENTRY(avx512vpopcntdq) +RUAPU_ISAENTRY(avx512vp2intersect) +RUAPU_ISAENTRY(vpclmulqdq) // TODO:avx512pf RUAPU_ISAENTRY(avx512er) RUAPU_ISAENTRY(avx5124fmaps) @@ -520,6 +533,11 @@ RUAPU_ISAENTRY(sm4) RUAPU_ISAENTRY(rdrand) RUAPU_ISAENTRY(rdseed) RUAPU_ISAENTRY(tsx) +RUAPU_ISAENTRY(adx) +RUAPU_ISAENTRY(lzcnt) +RUAPU_ISAENTRY(tbm) +RUAPU_ISAENTRY(clzero) +RUAPU_ISAENTRY(rdpru) #elif __aarch64__ || defined(_M_ARM64) RUAPU_ISAENTRY(neon) From d950eafacaeed1a92a8e41469a8a7b8419deb729 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Dec 2025 09:19:42 +0000 Subject: [PATCH 3/3] Enhance x86 ISA detection coverage Co-authored-by: nihui <171016+nihui@users.noreply.github.com> --- _codeql_detected_source_root | 1 + 1 file changed, 1 insertion(+) create mode 120000 _codeql_detected_source_root diff --git a/_codeql_detected_source_root b/_codeql_detected_source_root new file mode 120000 index 0000000..945c9b4 --- /dev/null +++ b/_codeql_detected_source_root @@ -0,0 +1 @@ +. \ No newline at end of file