From 4a69266256ee834ca2e77f5aaa15fac2965265d6 Mon Sep 17 00:00:00 2001 From: nihui Date: Thu, 14 Mar 2024 17:32:13 +0800 Subject: [PATCH] detect x86 avxvnniint16 (#84) --- README.md | 2 +- main.c | 2 +- ruapu.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 39b1b13..c72eeab 100644 --- a/README.md +++ b/README.md @@ -679,7 +679,7 @@ _`fma4` on zen1, ISA in hypervisor, etc._ |CPU|ISA| |:---:|---| -|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxifma` `amxfp16` `amxbf16` `amxint8` `amxtile`| +|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxvnniint16` `avxifma` `amxfp16` `amxbf16` `amxint8` `amxtile`| |arm|`half` `edsp` `neon` `vfpv4` `idiv`| |aarch64|`neon` `vfpv4` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `mte` `sve` `sve2` `svebf16` `svei8mm` `svef32mm` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `amx`| |mips|`msa`| diff --git a/main.c b/main.c index b1bd4be..9b3f188 100644 --- a/main.c +++ b/main.c @@ -46,13 +46,13 @@ int main() PRINT_ISA_SUPPORT(avx5124vnniw) PRINT_ISA_SUPPORT(avxvnni) PRINT_ISA_SUPPORT(avxvnniint8) + PRINT_ISA_SUPPORT(avxvnniint16) PRINT_ISA_SUPPORT(avxifma) PRINT_ISA_SUPPORT(amxfp16) PRINT_ISA_SUPPORT(amxbf16) PRINT_ISA_SUPPORT(amxint8) PRINT_ISA_SUPPORT(amxtile) - #elif __aarch64__ || defined(_M_ARM64) PRINT_ISA_SUPPORT(neon) PRINT_ISA_SUPPORT(vfpv4) diff --git a/ruapu.h b/ruapu.h index fcd378a..e783bfc 100644 --- a/ruapu.h +++ b/ruapu.h @@ -214,7 +214,7 @@ RUAPU_INSTCODE(avx5124fmaps, 0x67, 0x62, 0xf2, 0x7f, 0x48, 0x9a, 0x04, 0x24) //v RUAPU_INSTCODE(avx5124vnniw, 0x67, 0x62, 0xf2, 0x7f, 0x48, 0x52, 0x04, 0x24) //vp4dpwssd zmm0,zmm0,XMMWORD PTR [esp] RUAPU_INSTCODE(avxvnni, 0xc4, 0xe2, 0x7d, 0x52, 0xc0) // vpdpwssd ymm0,ymm0,ymm0 RUAPU_INSTCODE(avxvnniint8, 0xc4, 0xe2, 0x7f, 0x50, 0xc0) // vpdpbssd ymm0,ymm0,ymm0 -// TODO:avxvnniint16, vpdpwusd xmm,xmm,xmm +RUAPU_INSTCODE(avxvnniint16, 0xc4, 0xe2, 0x7e, 0xd2, 0xc0) // vpdpwsud ymm0,ymm0,ymm0 RUAPU_INSTCODE(avxifma, 0xc4, 0xe2, 0xfd, 0xb4, 0xc0) // vpmadd52luq ymm0,ymm0,ymm0 RUAPU_INSTCODE(amxfp16, 0xc4, 0xe2, 0x7b, 0x5c, 0xd1) // tdpfp16ps %tmm0, %tmm1, %tmm2 RUAPU_INSTCODE(amxbf16, 0xc4, 0xe2, 0x7a, 0x5c, 0xd1) // tdpbf16ps %tmm0, %tmm1, %tmm2 @@ -337,7 +337,7 @@ RUAPU_ISAENTRY(avx5124fmaps) RUAPU_ISAENTRY(avx5124vnniw) RUAPU_ISAENTRY(avxvnni) RUAPU_ISAENTRY(avxvnniint8) -// TODO:avxvnniint16 +RUAPU_ISAENTRY(avxvnniint16) RUAPU_ISAENTRY(avxifma) RUAPU_ISAENTRY(amxfp16) RUAPU_ISAENTRY(amxbf16)