From b7518ab97d1ae5fc0b812afe28896a5bacd6566b Mon Sep 17 00:00:00 2001 From: nihui Date: Sun, 31 Mar 2024 16:39:24 +0800 Subject: [PATCH] detect aarch64 lse and sme family (#89) --- README.md | 2 +- main.c | 5 +++++ ruapu.h | 10 ++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c8b4bd8..f2d0a76 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ _`fma4` on zen1, ISA in hypervisor, etc._ |:---:|---| |x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxvnniint16` `avxifma` `amxfp16` `amxbf16` `amxint8` `amxtile`| |arm|`half` `edsp` `neon` `vfpv4` `idiv`| -|aarch64|`neon` `vfpv4` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `frint` `jscvt` `fcma` `mte` `mte2` `sve` `sve2` `svebf16` `svei8mm` `svef32mm` `svef64mm` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `svepmull` `svebitperm` `sveaes` `svesha3` `svesm4` `amx`| +|aarch64|`neon` `vfpv4` `lse` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `frint` `jscvt` `fcma` `mte` `mte2` `sve` `sve2` `svebf16` `svei8mm` `svef32mm` `svef64mm` `sme` `smef16f16` `smef64f64` `smei64i64` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `svepmull` `svebitperm` `sveaes` `svesha3` `svesm4` `amx`| |mips|`msa`| |powerpc|`vsx`| |s390x|`zvector`| diff --git a/main.c b/main.c index 695481b..a13be32 100644 --- a/main.c +++ b/main.c @@ -56,6 +56,7 @@ int main() #elif __aarch64__ || defined(_M_ARM64) PRINT_ISA_SUPPORT(neon) PRINT_ISA_SUPPORT(vfpv4) + PRINT_ISA_SUPPORT(lse) PRINT_ISA_SUPPORT(cpuid) PRINT_ISA_SUPPORT(asimdrdm) PRINT_ISA_SUPPORT(asimdhp) @@ -74,6 +75,10 @@ int main() PRINT_ISA_SUPPORT(svei8mm) PRINT_ISA_SUPPORT(svef32mm) PRINT_ISA_SUPPORT(svef64mm) + PRINT_ISA_SUPPORT(sme) + PRINT_ISA_SUPPORT(smef16f16) + PRINT_ISA_SUPPORT(smef64f64) + PRINT_ISA_SUPPORT(smei64i64) PRINT_ISA_SUPPORT(pmull) PRINT_ISA_SUPPORT(crc32) PRINT_ISA_SUPPORT(aes) diff --git a/ruapu.h b/ruapu.h index 336f142..525b07e 100644 --- a/ruapu.h +++ b/ruapu.h @@ -224,6 +224,7 @@ RUAPU_INSTCODE(amxtile, 0xc4, 0xe2, 0x7a, 0x49, 0xc0) // tilezero %tmm0 #elif __aarch64__ || defined(_M_ARM64) RUAPU_INSTCODE(neon, 0x4e20d400) // fadd v0.4s,v0.4s,v0.4s RUAPU_INSTCODE(vfpv4, 0x1f000000) // fmadd s0,s0,s0,s0 +RUAPU_INSTCODE(lse, 0xf82083e0, 0xf82083e0) // swp x0,x0,[sp] + swp x0,x0,[sp] RUAPU_INSTCODE(cpuid, 0xd5380000) // mrs x0,midr_el1 RUAPU_INSTCODE(asimdrdm, 0x6e808400) // sqrdmlah v0.4s,v0.4s,v0.4s RUAPU_INSTCODE(asimdhp, 0x0e401400) // fadd v0.4h,v0.4h,v0.4h @@ -242,6 +243,10 @@ RUAPU_INSTCODE(svebf16, 0x6460e400) // bfmmla z0.s,z0.h,z0.h RUAPU_INSTCODE(svei8mm, 0x45009800) // smmla z0.s,z0.b,z0.b RUAPU_INSTCODE(svef32mm, 0x64a0e400) // fmmla z0.s,z0.s,z0.s RUAPU_INSTCODE(svef64mm, 0x64e0e400) // fmmla z0.d,z0.d,z0.d +RUAPU_INSTCODE(sme, 0x80800000) // fmopa za0.s,p0/m,p0/m,z0.s,z0.s +RUAPU_INSTCODE(smef16f16, 0x81800008) // fmopa za0.h,p0/m,p0/m,z0.h,z0.h +RUAPU_INSTCODE(smef64f64, 0x80c00000) // fmopa za0.d,p0/m,p0/m,z0.d,z0.d +RUAPU_INSTCODE(smei64i64, 0xa0c00000) // smopa za0.d,p0/m,p0/m,z0.h,z0.h RUAPU_INSTCODE(pmull, 0x0e20e000) // pmull v0.8h,v0.8b,v0.8b RUAPU_INSTCODE(crc32, 0x1ac04000) // crc32b w0,w0,w0 RUAPU_INSTCODE(aes, 0x4e285800) // aesd v0.16b,v0.16b @@ -357,6 +362,7 @@ RUAPU_ISAENTRY(amxtile) #elif __aarch64__ || defined(_M_ARM64) RUAPU_ISAENTRY(neon) RUAPU_ISAENTRY(vfpv4) +RUAPU_ISAENTRY(lse) RUAPU_ISAENTRY(cpuid) RUAPU_ISAENTRY(asimdrdm) RUAPU_ISAENTRY(asimdhp) @@ -375,6 +381,10 @@ RUAPU_ISAENTRY(svebf16) RUAPU_ISAENTRY(svei8mm) RUAPU_ISAENTRY(svef32mm) RUAPU_ISAENTRY(svef64mm) +RUAPU_ISAENTRY(sme) +RUAPU_ISAENTRY(smef16f16) +RUAPU_ISAENTRY(smef64f64) +RUAPU_ISAENTRY(smei64i64) RUAPU_ISAENTRY(pmull) RUAPU_ISAENTRY(crc32) RUAPU_ISAENTRY(aes)