detect rvv1.0 and its extensions (#107)

* detect rvv1.0 and its extensions

* use vaaddu to detect v extension

* Use `csrr a0, vlenb` for VLEN probe

* check if vcsr is available first

* inline helper functions into their callers
This commit is contained in:
Yingwei Zheng 2024-05-15 11:13:01 +08:00 committed by GitHub
parent 0659730b6b
commit fad2fe7dbf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 72 additions and 1 deletions

View File

@ -69,7 +69,7 @@ _`fma4` on zen1, ISA in hypervisor, etc._
|powerpc|`vsx`|
|s390x|`zvector`|
|loongarch|`lsx` `lasx`|
|risc-v|`i` `m` `a` `f` `d` `c` `zba` `zbb` `zbc` `zbs` `zbkb` `zbkc` `zbkx` `zfa` `zfbfmin` `zfh` `zfhmin` `zicond` `zicsr` `zifencei` `zmmul` `xtheadba` `xtheadbb` `xtheadbs` `xtheadcondmov` `xtheadfmemidx` `xtheadfmv` `xtheadmac` `xtheadmemidx` `xtheadmempair` `xtheadsync` `xtheadvdot`|
|risc-v|`i` `m` `a` `f` `d` `c` `v` `zba` `zbb` `zbc` `zbs` `zbkb` `zbkc` `zbkx` `zfa` `zfbfmin` `zfh` `zfhmin` `zicond` `zicsr` `zifencei` `zmmul` `zvbb` `zvbc` `zvfh` `zvfhmin` `zvfbfmin` `zvfbfwma` `zvkb` `zvl32b` `zvl64b` `zvl128b` `zvl256b` `zvl512b` `zvl1024b` `xtheadba` `xtheadbb` `xtheadbs` `xtheadcondmov` `xtheadfmemidx` `xtheadfmv` `xtheadmac` `xtheadmemidx` `xtheadmempair` `xtheadsync` `xtheadvdot`|
|openrisc| `orbis32` `orbis64` `orfpx32` `orfpx64` `orvdx64` |
## Let's ruapu

14
main.c
View File

@ -127,6 +127,7 @@ int main()
PRINT_ISA_SUPPORT(f)
PRINT_ISA_SUPPORT(d)
PRINT_ISA_SUPPORT(c)
PRINT_ISA_SUPPORT(v)
PRINT_ISA_SUPPORT(zba)
PRINT_ISA_SUPPORT(zbb)
PRINT_ISA_SUPPORT(zbc)
@ -142,6 +143,19 @@ int main()
PRINT_ISA_SUPPORT(zicsr)
PRINT_ISA_SUPPORT(zifencei)
PRINT_ISA_SUPPORT(zmmul)
PRINT_ISA_SUPPORT(zvbb)
PRINT_ISA_SUPPORT(zvbc)
PRINT_ISA_SUPPORT(zvfh)
PRINT_ISA_SUPPORT(zvfhmin)
PRINT_ISA_SUPPORT(zvfbfmin)
PRINT_ISA_SUPPORT(zvfbfwma)
PRINT_ISA_SUPPORT(zvkb)
PRINT_ISA_SUPPORT(zvl32b)
PRINT_ISA_SUPPORT(zvl64b)
PRINT_ISA_SUPPORT(zvl128b)
PRINT_ISA_SUPPORT(zvl256b)
PRINT_ISA_SUPPORT(zvl512b)
PRINT_ISA_SUPPORT(zvl1024b)
PRINT_ISA_SUPPORT(xtheadba)
PRINT_ISA_SUPPORT(xtheadbb)

57
ruapu.h
View File

@ -19,6 +19,7 @@ const char* const* ruapu_rua();
#ifdef RUAPU_IMPLEMENTATION
#include <stdint.h>
#include <string.h>
typedef void (*ruapu_some_inst)();
@ -334,6 +335,48 @@ RUAPU_INSTCODE(xtheadmempair, 0xe0a1450b) // th.lwd a0,a0,(sp),#0,3
RUAPU_INSTCODE(xtheadsync, 0x0180000b) // th.sync
RUAPU_INSTCODE(xtheadvdot, 0x8000600b) // th.vmaqa.vv v0,v0,v0
// RVV 1.0 support
// unimp (csrrw x0, cycle, x0)
#define RUAPU_RV_TRAP() asm volatile(".align 2\n.word 0xc0001073")
// vcsr is only defined in rvv 1.0, which doesn't exist in rvv 0.7.1 or xtheadvector.
// csrr x0, vcsr
#define RUAPU_RVV1P0_AVAIL() asm volatile(".align 2\n.word 0x00f02573")
// csrr res, vlenb
#define RUAPU_DETECT_ZVL(len) static void ruapu_some_zvl##len##b() { \
RUAPU_RVV1P0_AVAIL(); \
intptr_t res; \
asm volatile(".align 2\n.insn i 0x73, 0x2, %0, x0, -990" : "=r"(res)); \
if (res < len/8) RUAPU_RV_TRAP(); \
}
RUAPU_DETECT_ZVL(32)
RUAPU_DETECT_ZVL(64)
RUAPU_DETECT_ZVL(128)
RUAPU_DETECT_ZVL(256)
RUAPU_DETECT_ZVL(512)
RUAPU_DETECT_ZVL(1024)
#undef RUAPU_DETECT_ZVL
// vsetvl res, zero, vtype
// check vill bits after vsetvl
#define RUAPU_RVV_INSTCODE(isa, vtype, ...) static void ruapu_some_##isa() { \
RUAPU_RVV1P0_AVAIL(); \
intptr_t res; \
asm volatile(".align 2\n.insn r 0x57, 0x7, 0x40, %0, x0, %1" : "=r"(res) : "r"(vtype)); \
if (res < 0) RUAPU_RV_TRAP(); \
asm volatile(".align 2\n.word " #__VA_ARGS__ ); \
}
RUAPU_RVV_INSTCODE(zvbb, 0, 0x4a862257) // vclz.v v4, v8 with SEW = 8
RUAPU_RVV_INSTCODE(zvbc, 0, 0x32842257) // vclmul.vv v4, v8, v8 with SEW = 8
RUAPU_RVV_INSTCODE(zvfh, 8, 0x02841257) // vfadd.vv v4, v8, v8 with SEW = 16
RUAPU_RVV_INSTCODE(zvfhmin, 8, 0x4a8a1257) // vfncvt.f.f.v v4, v8 with SEW = 16
RUAPU_RVV_INSTCODE(zvfbfmin, 8, 0x4a8e9257) // vfncvtbf16.f.f.w v4, v8 with SEW = 16
RUAPU_RVV_INSTCODE(zvfbfwma, 8, 0xee855257) // vfwmaccbf16.vf v4, fa0, v8 with SEW = 16
RUAPU_RVV_INSTCODE(zvkb, 0, 0x56860257) // vrol.vv v4, v8, v12 with SEW = 8
RUAPU_RVV_INSTCODE(v, 24, 0x22842257) // vaaddu.vv v4, v8, v8 with SEW = 64
#undef RUAPU_RVV_INSTCODE
#undef RUAPU_RV_TRAP
#undef RUAPU_RVV1P0_AVAIL
#endif
#undef RUAPU_INSTCODE
@ -461,6 +504,7 @@ RUAPU_ISAENTRY(a)
RUAPU_ISAENTRY(f)
RUAPU_ISAENTRY(d)
RUAPU_ISAENTRY(c)
RUAPU_ISAENTRY(v)
RUAPU_ISAENTRY(zba)
RUAPU_ISAENTRY(zbb)
RUAPU_ISAENTRY(zbc)
@ -476,6 +520,19 @@ RUAPU_ISAENTRY(zicond)
RUAPU_ISAENTRY(zicsr)
RUAPU_ISAENTRY(zifencei)
RUAPU_ISAENTRY(zmmul)
RUAPU_ISAENTRY(zvbb)
RUAPU_ISAENTRY(zvbc)
RUAPU_ISAENTRY(zvfh)
RUAPU_ISAENTRY(zvfhmin)
RUAPU_ISAENTRY(zvfbfmin)
RUAPU_ISAENTRY(zvfbfwma)
RUAPU_ISAENTRY(zvkb)
RUAPU_ISAENTRY(zvl32b)
RUAPU_ISAENTRY(zvl64b)
RUAPU_ISAENTRY(zvl128b)
RUAPU_ISAENTRY(zvl256b)
RUAPU_ISAENTRY(zvl512b)
RUAPU_ISAENTRY(zvl1024b)
RUAPU_ISAENTRY(xtheadba)
RUAPU_ISAENTRY(xtheadbb)