detect rvv1.0 and its extensions (#107)
* detect rvv1.0 and its extensions * use vaaddu to detect v extension * Use `csrr a0, vlenb` for VLEN probe * check if vcsr is available first * inline helper functions into their callers
This commit is contained in:
parent
0659730b6b
commit
fad2fe7dbf
@ -69,7 +69,7 @@ _`fma4` on zen1, ISA in hypervisor, etc._
|
||||
|powerpc|`vsx`|
|
||||
|s390x|`zvector`|
|
||||
|loongarch|`lsx` `lasx`|
|
||||
|risc-v|`i` `m` `a` `f` `d` `c` `zba` `zbb` `zbc` `zbs` `zbkb` `zbkc` `zbkx` `zfa` `zfbfmin` `zfh` `zfhmin` `zicond` `zicsr` `zifencei` `zmmul` `xtheadba` `xtheadbb` `xtheadbs` `xtheadcondmov` `xtheadfmemidx` `xtheadfmv` `xtheadmac` `xtheadmemidx` `xtheadmempair` `xtheadsync` `xtheadvdot`|
|
||||
|risc-v|`i` `m` `a` `f` `d` `c` `v` `zba` `zbb` `zbc` `zbs` `zbkb` `zbkc` `zbkx` `zfa` `zfbfmin` `zfh` `zfhmin` `zicond` `zicsr` `zifencei` `zmmul` `zvbb` `zvbc` `zvfh` `zvfhmin` `zvfbfmin` `zvfbfwma` `zvkb` `zvl32b` `zvl64b` `zvl128b` `zvl256b` `zvl512b` `zvl1024b` `xtheadba` `xtheadbb` `xtheadbs` `xtheadcondmov` `xtheadfmemidx` `xtheadfmv` `xtheadmac` `xtheadmemidx` `xtheadmempair` `xtheadsync` `xtheadvdot`|
|
||||
|openrisc| `orbis32` `orbis64` `orfpx32` `orfpx64` `orvdx64` |
|
||||
|
||||
## Let's ruapu
|
||||
|
14
main.c
14
main.c
@ -127,6 +127,7 @@ int main()
|
||||
PRINT_ISA_SUPPORT(f)
|
||||
PRINT_ISA_SUPPORT(d)
|
||||
PRINT_ISA_SUPPORT(c)
|
||||
PRINT_ISA_SUPPORT(v)
|
||||
PRINT_ISA_SUPPORT(zba)
|
||||
PRINT_ISA_SUPPORT(zbb)
|
||||
PRINT_ISA_SUPPORT(zbc)
|
||||
@ -142,6 +143,19 @@ int main()
|
||||
PRINT_ISA_SUPPORT(zicsr)
|
||||
PRINT_ISA_SUPPORT(zifencei)
|
||||
PRINT_ISA_SUPPORT(zmmul)
|
||||
PRINT_ISA_SUPPORT(zvbb)
|
||||
PRINT_ISA_SUPPORT(zvbc)
|
||||
PRINT_ISA_SUPPORT(zvfh)
|
||||
PRINT_ISA_SUPPORT(zvfhmin)
|
||||
PRINT_ISA_SUPPORT(zvfbfmin)
|
||||
PRINT_ISA_SUPPORT(zvfbfwma)
|
||||
PRINT_ISA_SUPPORT(zvkb)
|
||||
PRINT_ISA_SUPPORT(zvl32b)
|
||||
PRINT_ISA_SUPPORT(zvl64b)
|
||||
PRINT_ISA_SUPPORT(zvl128b)
|
||||
PRINT_ISA_SUPPORT(zvl256b)
|
||||
PRINT_ISA_SUPPORT(zvl512b)
|
||||
PRINT_ISA_SUPPORT(zvl1024b)
|
||||
|
||||
PRINT_ISA_SUPPORT(xtheadba)
|
||||
PRINT_ISA_SUPPORT(xtheadbb)
|
||||
|
57
ruapu.h
57
ruapu.h
@ -19,6 +19,7 @@ const char* const* ruapu_rua();
|
||||
|
||||
#ifdef RUAPU_IMPLEMENTATION
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef void (*ruapu_some_inst)();
|
||||
@ -334,6 +335,48 @@ RUAPU_INSTCODE(xtheadmempair, 0xe0a1450b) // th.lwd a0,a0,(sp),#0,3
|
||||
RUAPU_INSTCODE(xtheadsync, 0x0180000b) // th.sync
|
||||
RUAPU_INSTCODE(xtheadvdot, 0x8000600b) // th.vmaqa.vv v0,v0,v0
|
||||
|
||||
// RVV 1.0 support
|
||||
// unimp (csrrw x0, cycle, x0)
|
||||
#define RUAPU_RV_TRAP() asm volatile(".align 2\n.word 0xc0001073")
|
||||
// vcsr is only defined in rvv 1.0, which doesn't exist in rvv 0.7.1 or xtheadvector.
|
||||
// csrr x0, vcsr
|
||||
#define RUAPU_RVV1P0_AVAIL() asm volatile(".align 2\n.word 0x00f02573")
|
||||
// csrr res, vlenb
|
||||
#define RUAPU_DETECT_ZVL(len) static void ruapu_some_zvl##len##b() { \
|
||||
RUAPU_RVV1P0_AVAIL(); \
|
||||
intptr_t res; \
|
||||
asm volatile(".align 2\n.insn i 0x73, 0x2, %0, x0, -990" : "=r"(res)); \
|
||||
if (res < len/8) RUAPU_RV_TRAP(); \
|
||||
}
|
||||
RUAPU_DETECT_ZVL(32)
|
||||
RUAPU_DETECT_ZVL(64)
|
||||
RUAPU_DETECT_ZVL(128)
|
||||
RUAPU_DETECT_ZVL(256)
|
||||
RUAPU_DETECT_ZVL(512)
|
||||
RUAPU_DETECT_ZVL(1024)
|
||||
#undef RUAPU_DETECT_ZVL
|
||||
// vsetvl res, zero, vtype
|
||||
// check vill bits after vsetvl
|
||||
#define RUAPU_RVV_INSTCODE(isa, vtype, ...) static void ruapu_some_##isa() { \
|
||||
RUAPU_RVV1P0_AVAIL(); \
|
||||
intptr_t res; \
|
||||
asm volatile(".align 2\n.insn r 0x57, 0x7, 0x40, %0, x0, %1" : "=r"(res) : "r"(vtype)); \
|
||||
if (res < 0) RUAPU_RV_TRAP(); \
|
||||
asm volatile(".align 2\n.word " #__VA_ARGS__ ); \
|
||||
}
|
||||
|
||||
RUAPU_RVV_INSTCODE(zvbb, 0, 0x4a862257) // vclz.v v4, v8 with SEW = 8
|
||||
RUAPU_RVV_INSTCODE(zvbc, 0, 0x32842257) // vclmul.vv v4, v8, v8 with SEW = 8
|
||||
RUAPU_RVV_INSTCODE(zvfh, 8, 0x02841257) // vfadd.vv v4, v8, v8 with SEW = 16
|
||||
RUAPU_RVV_INSTCODE(zvfhmin, 8, 0x4a8a1257) // vfncvt.f.f.v v4, v8 with SEW = 16
|
||||
RUAPU_RVV_INSTCODE(zvfbfmin, 8, 0x4a8e9257) // vfncvtbf16.f.f.w v4, v8 with SEW = 16
|
||||
RUAPU_RVV_INSTCODE(zvfbfwma, 8, 0xee855257) // vfwmaccbf16.vf v4, fa0, v8 with SEW = 16
|
||||
RUAPU_RVV_INSTCODE(zvkb, 0, 0x56860257) // vrol.vv v4, v8, v12 with SEW = 8
|
||||
RUAPU_RVV_INSTCODE(v, 24, 0x22842257) // vaaddu.vv v4, v8, v8 with SEW = 64
|
||||
|
||||
#undef RUAPU_RVV_INSTCODE
|
||||
#undef RUAPU_RV_TRAP
|
||||
#undef RUAPU_RVV1P0_AVAIL
|
||||
#endif
|
||||
|
||||
#undef RUAPU_INSTCODE
|
||||
@ -461,6 +504,7 @@ RUAPU_ISAENTRY(a)
|
||||
RUAPU_ISAENTRY(f)
|
||||
RUAPU_ISAENTRY(d)
|
||||
RUAPU_ISAENTRY(c)
|
||||
RUAPU_ISAENTRY(v)
|
||||
RUAPU_ISAENTRY(zba)
|
||||
RUAPU_ISAENTRY(zbb)
|
||||
RUAPU_ISAENTRY(zbc)
|
||||
@ -476,6 +520,19 @@ RUAPU_ISAENTRY(zicond)
|
||||
RUAPU_ISAENTRY(zicsr)
|
||||
RUAPU_ISAENTRY(zifencei)
|
||||
RUAPU_ISAENTRY(zmmul)
|
||||
RUAPU_ISAENTRY(zvbb)
|
||||
RUAPU_ISAENTRY(zvbc)
|
||||
RUAPU_ISAENTRY(zvfh)
|
||||
RUAPU_ISAENTRY(zvfhmin)
|
||||
RUAPU_ISAENTRY(zvfbfmin)
|
||||
RUAPU_ISAENTRY(zvfbfwma)
|
||||
RUAPU_ISAENTRY(zvkb)
|
||||
RUAPU_ISAENTRY(zvl32b)
|
||||
RUAPU_ISAENTRY(zvl64b)
|
||||
RUAPU_ISAENTRY(zvl128b)
|
||||
RUAPU_ISAENTRY(zvl256b)
|
||||
RUAPU_ISAENTRY(zvl512b)
|
||||
RUAPU_ISAENTRY(zvl1024b)
|
||||
|
||||
RUAPU_ISAENTRY(xtheadba)
|
||||
RUAPU_ISAENTRY(xtheadbb)
|
||||
|
Loading…
Reference in New Issue
Block a user