diff --git a/README.md b/README.md index cdc1c45..29d8de8 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ _`fma4` on zen1, ISA in hypervisor, etc._ |powerpc|`vsx`| |s390x|`zvector`| |loongarch|`lsx` `lasx`| -|risc-v|`i` `m` `a` `f` `d` `c` `zba` `zbb` `zbc` `zbs` `zbkb` `zbkc` `zbkx` `zfa` `zfbfmin` `zfh` `zfhmin` `zicond` `zicsr` `zifencei` `zmmul` `xtheadba` `xtheadbb` `xtheadbs` `xtheadcondmov` `xtheadfmemidx` `xtheadfmv` `xtheadmac` `xtheadmemidx` `xtheadmempair` `xtheadsync` `xtheadvdot`| +|risc-v|`i` `m` `a` `f` `d` `c` `v` `zba` `zbb` `zbc` `zbs` `zbkb` `zbkc` `zbkx` `zfa` `zfbfmin` `zfh` `zfhmin` `zicond` `zicsr` `zifencei` `zmmul` `zvbb` `zvbc` `zvfh` `zvfhmin` `zvfbfmin` `zvfbfwma` `zvkb` `zvl32b` `zvl64b` `zvl128b` `zvl256b` `zvl512b` `zvl1024b` `xtheadba` `xtheadbb` `xtheadbs` `xtheadcondmov` `xtheadfmemidx` `xtheadfmv` `xtheadmac` `xtheadmemidx` `xtheadmempair` `xtheadsync` `xtheadvdot`| |openrisc| `orbis32` `orbis64` `orfpx32` `orfpx64` `orvdx64` | ## Let's ruapu diff --git a/main.c b/main.c index dd0c790..fde0ee7 100644 --- a/main.c +++ b/main.c @@ -127,6 +127,7 @@ int main() PRINT_ISA_SUPPORT(f) PRINT_ISA_SUPPORT(d) PRINT_ISA_SUPPORT(c) + PRINT_ISA_SUPPORT(v) PRINT_ISA_SUPPORT(zba) PRINT_ISA_SUPPORT(zbb) PRINT_ISA_SUPPORT(zbc) @@ -142,6 +143,19 @@ int main() PRINT_ISA_SUPPORT(zicsr) PRINT_ISA_SUPPORT(zifencei) PRINT_ISA_SUPPORT(zmmul) + PRINT_ISA_SUPPORT(zvbb) + PRINT_ISA_SUPPORT(zvbc) + PRINT_ISA_SUPPORT(zvfh) + PRINT_ISA_SUPPORT(zvfhmin) + PRINT_ISA_SUPPORT(zvfbfmin) + PRINT_ISA_SUPPORT(zvfbfwma) + PRINT_ISA_SUPPORT(zvkb) + PRINT_ISA_SUPPORT(zvl32b) + PRINT_ISA_SUPPORT(zvl64b) + PRINT_ISA_SUPPORT(zvl128b) + PRINT_ISA_SUPPORT(zvl256b) + PRINT_ISA_SUPPORT(zvl512b) + PRINT_ISA_SUPPORT(zvl1024b) PRINT_ISA_SUPPORT(xtheadba) PRINT_ISA_SUPPORT(xtheadbb) diff --git a/ruapu.h b/ruapu.h index a082033..900e0f1 100644 --- a/ruapu.h +++ b/ruapu.h @@ -19,6 +19,7 @@ const char* const* ruapu_rua(); #ifdef RUAPU_IMPLEMENTATION +#include #include typedef void (*ruapu_some_inst)(); @@ -334,6 +335,48 @@ RUAPU_INSTCODE(xtheadmempair, 0xe0a1450b) // th.lwd a0,a0,(sp),#0,3 RUAPU_INSTCODE(xtheadsync, 0x0180000b) // th.sync RUAPU_INSTCODE(xtheadvdot, 0x8000600b) // th.vmaqa.vv v0,v0,v0 +// RVV 1.0 support +// unimp (csrrw x0, cycle, x0) +#define RUAPU_RV_TRAP() asm volatile(".align 2\n.word 0xc0001073") +// vcsr is only defined in rvv 1.0, which doesn't exist in rvv 0.7.1 or xtheadvector. +// csrr x0, vcsr +#define RUAPU_RVV1P0_AVAIL() asm volatile(".align 2\n.word 0x00f02573") +// csrr res, vlenb +#define RUAPU_DETECT_ZVL(len) static void ruapu_some_zvl##len##b() { \ + RUAPU_RVV1P0_AVAIL(); \ + intptr_t res; \ + asm volatile(".align 2\n.insn i 0x73, 0x2, %0, x0, -990" : "=r"(res)); \ + if (res < len/8) RUAPU_RV_TRAP(); \ + } +RUAPU_DETECT_ZVL(32) +RUAPU_DETECT_ZVL(64) +RUAPU_DETECT_ZVL(128) +RUAPU_DETECT_ZVL(256) +RUAPU_DETECT_ZVL(512) +RUAPU_DETECT_ZVL(1024) +#undef RUAPU_DETECT_ZVL +// vsetvl res, zero, vtype +// check vill bits after vsetvl +#define RUAPU_RVV_INSTCODE(isa, vtype, ...) static void ruapu_some_##isa() { \ + RUAPU_RVV1P0_AVAIL(); \ + intptr_t res; \ + asm volatile(".align 2\n.insn r 0x57, 0x7, 0x40, %0, x0, %1" : "=r"(res) : "r"(vtype)); \ + if (res < 0) RUAPU_RV_TRAP(); \ + asm volatile(".align 2\n.word " #__VA_ARGS__ ); \ + } + +RUAPU_RVV_INSTCODE(zvbb, 0, 0x4a862257) // vclz.v v4, v8 with SEW = 8 +RUAPU_RVV_INSTCODE(zvbc, 0, 0x32842257) // vclmul.vv v4, v8, v8 with SEW = 8 +RUAPU_RVV_INSTCODE(zvfh, 8, 0x02841257) // vfadd.vv v4, v8, v8 with SEW = 16 +RUAPU_RVV_INSTCODE(zvfhmin, 8, 0x4a8a1257) // vfncvt.f.f.v v4, v8 with SEW = 16 +RUAPU_RVV_INSTCODE(zvfbfmin, 8, 0x4a8e9257) // vfncvtbf16.f.f.w v4, v8 with SEW = 16 +RUAPU_RVV_INSTCODE(zvfbfwma, 8, 0xee855257) // vfwmaccbf16.vf v4, fa0, v8 with SEW = 16 +RUAPU_RVV_INSTCODE(zvkb, 0, 0x56860257) // vrol.vv v4, v8, v12 with SEW = 8 +RUAPU_RVV_INSTCODE(v, 24, 0x22842257) // vaaddu.vv v4, v8, v8 with SEW = 64 + +#undef RUAPU_RVV_INSTCODE +#undef RUAPU_RV_TRAP +#undef RUAPU_RVV1P0_AVAIL #endif #undef RUAPU_INSTCODE @@ -461,6 +504,7 @@ RUAPU_ISAENTRY(a) RUAPU_ISAENTRY(f) RUAPU_ISAENTRY(d) RUAPU_ISAENTRY(c) +RUAPU_ISAENTRY(v) RUAPU_ISAENTRY(zba) RUAPU_ISAENTRY(zbb) RUAPU_ISAENTRY(zbc) @@ -476,6 +520,19 @@ RUAPU_ISAENTRY(zicond) RUAPU_ISAENTRY(zicsr) RUAPU_ISAENTRY(zifencei) RUAPU_ISAENTRY(zmmul) +RUAPU_ISAENTRY(zvbb) +RUAPU_ISAENTRY(zvbc) +RUAPU_ISAENTRY(zvfh) +RUAPU_ISAENTRY(zvfhmin) +RUAPU_ISAENTRY(zvfbfmin) +RUAPU_ISAENTRY(zvfbfwma) +RUAPU_ISAENTRY(zvkb) +RUAPU_ISAENTRY(zvl32b) +RUAPU_ISAENTRY(zvl64b) +RUAPU_ISAENTRY(zvl128b) +RUAPU_ISAENTRY(zvl256b) +RUAPU_ISAENTRY(zvl512b) +RUAPU_ISAENTRY(zvl1024b) RUAPU_ISAENTRY(xtheadba) RUAPU_ISAENTRY(xtheadbb)