From a753c213fb1b1549de27da7c1fd1d4de81a035da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Fri, 1 Mar 2024 23:49:07 +0800 Subject: [PATCH] Add more AVX512 ISA(AVX512ER AVX512_4VNNIW AVX512_4FMAPS). (#62) --- README.md | 14 +++++++++++++- main.c | 3 +++ ruapu.h | 10 ++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 55b4643..4996520 100644 --- a/README.md +++ b/README.md @@ -359,6 +359,9 @@ avx512ifma = 0 avx512vbmi = 0 avx512vbmi2 = 0 avx512fp16 = 0 +avx512er = 0 +avx5124fmaps = 0 +avx5124vnniw = 0 avxvnni = 0 avxvnniint8 = 0 avxifma = 0 @@ -394,6 +397,9 @@ avx512ifma = 0 avx512vbmi = 0 avx512vbmi2 = 0 avx512fp16 = 0 +avx512er = 0 +avx5124fmaps = 0 +avx5124vnniw = 0 avxvnni = 0 avxvnniint8 = 0 avxifma = 0 @@ -450,6 +456,9 @@ avx512ifma = 0 avx512vbmi = 0 avx512vbmi2 = 0 avx512fp16 = 0 +avx512er = 0 +avx5124fmaps = 0 +avx5124vnniw = 0 avxvnni = 0 avxvnniint8 = 0 avxifma = 0 @@ -486,6 +495,9 @@ avx512ifma = 0 avx512vbmi = 0 avx512vbmi2 = 0 avx512fp16 = 0 +avx512er = 0 +avx5124fmaps = 0 +avx5124vnniw = 0 avxvnni = 0 avxvnniint8 = 0 avxifma = 0 @@ -513,7 +525,7 @@ _`fma4` on zen1, ISA in hypervisor, etc._ |CPU|ISA| |:---:|---| -|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avxvnni` `avxvnniint8` `avxifma`| +|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxifma`| |arm|`edsp` `neon` `vfpv4` `idiv`| |aarch64|`neon` `vfpv4` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `mte` `sve` `sve2` `svebf16` `svei8mm` `svef32mm` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `amx`| |mips|`msa`| diff --git a/main.c b/main.c index 3901690..145a5a8 100644 --- a/main.c +++ b/main.c @@ -41,6 +41,9 @@ int main() PRINT_ISA_SUPPORT(avx512vbmi) PRINT_ISA_SUPPORT(avx512vbmi2) PRINT_ISA_SUPPORT(avx512fp16) + PRINT_ISA_SUPPORT(avx512er) + PRINT_ISA_SUPPORT(avx5124fmaps) + PRINT_ISA_SUPPORT(avx5124vnniw) PRINT_ISA_SUPPORT(avxvnni) PRINT_ISA_SUPPORT(avxvnniint8) PRINT_ISA_SUPPORT(avxifma) diff --git a/ruapu.h b/ruapu.h index ee7132a..bcce189 100644 --- a/ruapu.h +++ b/ruapu.h @@ -204,8 +204,13 @@ RUAPU_INSTCODE(avx512ifma, 0x62, 0xf2, 0xfd, 0x48, 0xb4, 0xc0) // vpmadd52luq zm RUAPU_INSTCODE(avx512vbmi, 0x62, 0xf2, 0x7d, 0x48, 0x75, 0xc0) // vpermi2b zmm0,zmm0,zmm0 RUAPU_INSTCODE(avx512vbmi2, 0x62, 0xf2, 0x7d, 0x48, 0x71, 0xc0) // vpshldvd zmm0,zmm0,zmm0 RUAPU_INSTCODE(avx512fp16, 0x62, 0xf6, 0x7d, 0x48, 0x98, 0xc0) // vfmadd132ph zmm0,zmm0,zmm0 +// TODO:avx512pf, vgatherpf1dps DWORD PTR [esp+zmm0*1]{k1} +RUAPU_INSTCODE(avx512er, 0x62, 0xf2, 0xfd, 0x48, 0xc8, 0xc0) //vexp2pd zmm0,zmm0 +RUAPU_INSTCODE(avx5124fmaps, 0x67, 0x62, 0xf2, 0x7f, 0x48, 0x9a, 0x04, 0x24) //v4fmaddps zmm0,zmm0,XMMWORD PTR [esp] +RUAPU_INSTCODE(avx5124vnniw, 0x67, 0x62, 0xf2, 0x7f, 0x48, 0x52, 0x04, 0x24) //vp4dpwssd zmm0,zmm0,XMMWORD PTR [esp] RUAPU_INSTCODE(avxvnni, 0xc4, 0xe2, 0x7d, 0x52, 0xc0) // vpdpwssd ymm0,ymm0,ymm0 RUAPU_INSTCODE(avxvnniint8, 0xc4, 0xe2, 0x7f, 0x50, 0xc0) // vpdpbssd ymm0,ymm0,ymm0 +// TODO:avxvnniint16, vpdpwusd xmm,xmm,xmm RUAPU_INSTCODE(avxifma, 0xc4, 0xe2, 0xfd, 0xb4, 0xc0) // vpmadd52luq ymm0,ymm0,ymm0 #elif __aarch64__ || defined(_M_ARM64) @@ -316,8 +321,13 @@ RUAPU_ISAENTRY(avx512ifma) RUAPU_ISAENTRY(avx512vbmi) RUAPU_ISAENTRY(avx512vbmi2) RUAPU_ISAENTRY(avx512fp16) +// TODO:avx512pf +RUAPU_ISAENTRY(avx512er) +RUAPU_ISAENTRY(avx5124fmaps) +RUAPU_ISAENTRY(avx5124vnniw) RUAPU_ISAENTRY(avxvnni) RUAPU_ISAENTRY(avxvnniint8) +// TODO:avxvnniint16 RUAPU_ISAENTRY(avxifma) #elif __aarch64__ || defined(_M_ARM64)