From 8dbb51001be5a930237a4d85b1c1b832ebf21fa3 Mon Sep 17 00:00:00 2001 From: nihui Date: Sun, 31 Mar 2024 15:01:48 +0800 Subject: [PATCH] detect aarch64 frint jscvt fcma mte2 svef64mm svepmull svebitperm sveaes svesha3 svesm4 (#88) --- README.md | 2 +- main.c | 10 ++++++++++ ruapu.h | 20 ++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8360143..c8b4bd8 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ _`fma4` on zen1, ISA in hypervisor, etc._ |:---:|---| |x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxvnniint16` `avxifma` `amxfp16` `amxbf16` `amxint8` `amxtile`| |arm|`half` `edsp` `neon` `vfpv4` `idiv`| -|aarch64|`neon` `vfpv4` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `mte` `sve` `sve2` `svebf16` `svei8mm` `svef32mm` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `amx`| +|aarch64|`neon` `vfpv4` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `frint` `jscvt` `fcma` `mte` `mte2` `sve` `sve2` `svebf16` `svei8mm` `svef32mm` `svef64mm` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `svepmull` `svebitperm` `sveaes` `svesha3` `svesm4` `amx`| |mips|`msa`| |powerpc|`vsx`| |s390x|`zvector`| diff --git a/main.c b/main.c index 9b3f188..695481b 100644 --- a/main.c +++ b/main.c @@ -63,12 +63,17 @@ int main() PRINT_ISA_SUPPORT(asimdfhm) PRINT_ISA_SUPPORT(bf16) PRINT_ISA_SUPPORT(i8mm) + PRINT_ISA_SUPPORT(frint) + PRINT_ISA_SUPPORT(jscvt) + PRINT_ISA_SUPPORT(fcma) PRINT_ISA_SUPPORT(mte) + PRINT_ISA_SUPPORT(mte2) PRINT_ISA_SUPPORT(sve) PRINT_ISA_SUPPORT(sve2) PRINT_ISA_SUPPORT(svebf16) PRINT_ISA_SUPPORT(svei8mm) PRINT_ISA_SUPPORT(svef32mm) + PRINT_ISA_SUPPORT(svef64mm) PRINT_ISA_SUPPORT(pmull) PRINT_ISA_SUPPORT(crc32) PRINT_ISA_SUPPORT(aes) @@ -78,6 +83,11 @@ int main() PRINT_ISA_SUPPORT(sha512) PRINT_ISA_SUPPORT(sm3) PRINT_ISA_SUPPORT(sm4) + PRINT_ISA_SUPPORT(svepmull) + PRINT_ISA_SUPPORT(svebitperm) + PRINT_ISA_SUPPORT(sveaes) + PRINT_ISA_SUPPORT(svesha3) + PRINT_ISA_SUPPORT(svesm4) PRINT_ISA_SUPPORT(amx) #elif __arm__ || defined(_M_ARM) diff --git a/ruapu.h b/ruapu.h index e783bfc..336f142 100644 --- a/ruapu.h +++ b/ruapu.h @@ -231,12 +231,17 @@ RUAPU_INSTCODE(asimddp, 0x4e809400) // sdot v0.4h,v0.16b,v0.16b RUAPU_INSTCODE(asimdfhm, 0x4e20ec00) // fmlal v0.4s,v0.4h,v0.4h RUAPU_INSTCODE(bf16, 0x6e40ec00) // bfmmla v0.4h,v0.8h,v0.8h RUAPU_INSTCODE(i8mm, 0x4e80a400) // smmla v0.4h,v0.16b,v0.16b +RUAPU_INSTCODE(frint, 0x4e21e800) // frint32z v0.4s,v0.4s +RUAPU_INSTCODE(jscvt, 0x1e7e0000) // fjcvtzs w0,d0 +RUAPU_INSTCODE(fcma, 0x6e80c400) // fcmla v0.4s,v0.4s,v0.4s,#0 RUAPU_INSTCODE(mte, 0xd96003e0) // ldg x0,[sp] +RUAPU_INSTCODE(mte2, 0xd9e003e0) // ldgm x0,[sp] RUAPU_INSTCODE(sve, 0x65608000) // fmad z0.h,p0/m,z0.h,z0.h RUAPU_INSTCODE(sve2, 0x44405000) // smlslb z0.h,z0.b,z0.b RUAPU_INSTCODE(svebf16, 0x6460e400) // bfmmla z0.s,z0.h,z0.h RUAPU_INSTCODE(svei8mm, 0x45009800) // smmla z0.s,z0.b,z0.b RUAPU_INSTCODE(svef32mm, 0x64a0e400) // fmmla z0.s,z0.s,z0.s +RUAPU_INSTCODE(svef64mm, 0x64e0e400) // fmmla z0.d,z0.d,z0.d RUAPU_INSTCODE(pmull, 0x0e20e000) // pmull v0.8h,v0.8b,v0.8b RUAPU_INSTCODE(crc32, 0x1ac04000) // crc32b w0,w0,w0 RUAPU_INSTCODE(aes, 0x4e285800) // aesd v0.16b,v0.16b @@ -246,6 +251,11 @@ RUAPU_INSTCODE(sha3, 0xce000000) // eor3 v0.16b, v0.16b, v0.16b, v0.16b RUAPU_INSTCODE(sha512, 0xce608000) // sha512h q0, q0, v0.2d RUAPU_INSTCODE(sm3, 0xce60c000) // sm3partw1 v0.4s, v0.4s, v0.4s RUAPU_INSTCODE(sm4, 0xcec08400) // sm4e v0.4s, v0.4s +RUAPU_INSTCODE(svepmull, 0x45006800) // pmullb z0.q,z0.d,z0.d +RUAPU_INSTCODE(svebitperm, 0x4500b000) // bext z0.b,z0.b,z0.b +RUAPU_INSTCODE(sveaes, 0x4522e400) // aesd z0.b,z0.b,z0.b +RUAPU_INSTCODE(svesha3, 0x4520f400) // rax1 z0.d,z0.d,z0.d +RUAPU_INSTCODE(svesm4, 0x4523e000) // sm4e z0.s,z0.s,z0.s RUAPU_INSTCODE(amx, 0x00201220) // amx setup @@ -354,12 +364,17 @@ RUAPU_ISAENTRY(asimddp) RUAPU_ISAENTRY(asimdfhm) RUAPU_ISAENTRY(bf16) RUAPU_ISAENTRY(i8mm) +RUAPU_ISAENTRY(frint) +RUAPU_ISAENTRY(jscvt) +RUAPU_ISAENTRY(fcma) RUAPU_ISAENTRY(mte) +RUAPU_ISAENTRY(mte2) RUAPU_ISAENTRY(sve) RUAPU_ISAENTRY(sve2) RUAPU_ISAENTRY(svebf16) RUAPU_ISAENTRY(svei8mm) RUAPU_ISAENTRY(svef32mm) +RUAPU_ISAENTRY(svef64mm) RUAPU_ISAENTRY(pmull) RUAPU_ISAENTRY(crc32) RUAPU_ISAENTRY(aes) @@ -369,6 +384,11 @@ RUAPU_ISAENTRY(sha3) RUAPU_ISAENTRY(sha512) RUAPU_ISAENTRY(sm3) RUAPU_ISAENTRY(sm4) +RUAPU_ISAENTRY(svepmull) +RUAPU_ISAENTRY(svebitperm) +RUAPU_ISAENTRY(sveaes) +RUAPU_ISAENTRY(svesha3) +RUAPU_ISAENTRY(svesm4) RUAPU_ISAENTRY(amx) #elif __arm__ || defined(_M_ARM)