detect rvv1.0 and its extensions (#107)
* detect rvv1.0 and its extensions * use vaaddu to detect v extension * Use `csrr a0, vlenb` for VLEN probe * check if vcsr is available first * inline helper functions into their callers
This commit is contained in:
		
							parent
							
								
									0659730b6b
								
							
						
					
					
						commit
						fad2fe7dbf
					
				| @ -69,7 +69,7 @@ _`fma4` on zen1, ISA in hypervisor, etc._ | |||||||
| |powerpc|`vsx`| | |powerpc|`vsx`| | ||||||
| |s390x|`zvector`| | |s390x|`zvector`| | ||||||
| |loongarch|`lsx` `lasx`| | |loongarch|`lsx` `lasx`| | ||||||
| |risc-v|`i` `m` `a` `f` `d` `c` `zba` `zbb` `zbc` `zbs` `zbkb` `zbkc` `zbkx` `zfa` `zfbfmin` `zfh` `zfhmin` `zicond` `zicsr` `zifencei` `zmmul` `xtheadba` `xtheadbb` `xtheadbs` `xtheadcondmov` `xtheadfmemidx` `xtheadfmv` `xtheadmac` `xtheadmemidx` `xtheadmempair` `xtheadsync` `xtheadvdot`| | |risc-v|`i` `m` `a` `f` `d` `c` `v` `zba` `zbb` `zbc` `zbs` `zbkb` `zbkc` `zbkx` `zfa` `zfbfmin` `zfh` `zfhmin` `zicond` `zicsr` `zifencei` `zmmul` `zvbb` `zvbc` `zvfh` `zvfhmin` `zvfbfmin` `zvfbfwma` `zvkb` `zvl32b` `zvl64b` `zvl128b` `zvl256b` `zvl512b` `zvl1024b` `xtheadba` `xtheadbb` `xtheadbs` `xtheadcondmov` `xtheadfmemidx` `xtheadfmv` `xtheadmac` `xtheadmemidx` `xtheadmempair` `xtheadsync` `xtheadvdot`| | ||||||
| |openrisc| `orbis32` `orbis64` `orfpx32` `orfpx64` `orvdx64` | | |openrisc| `orbis32` `orbis64` `orfpx32` `orfpx64` `orvdx64` | | ||||||
| 
 | 
 | ||||||
| ## Let's ruapu | ## Let's ruapu | ||||||
|  | |||||||
							
								
								
									
										14
									
								
								main.c
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								main.c
									
									
									
									
									
								
							| @ -127,6 +127,7 @@ int main() | |||||||
|     PRINT_ISA_SUPPORT(f) |     PRINT_ISA_SUPPORT(f) | ||||||
|     PRINT_ISA_SUPPORT(d) |     PRINT_ISA_SUPPORT(d) | ||||||
|     PRINT_ISA_SUPPORT(c) |     PRINT_ISA_SUPPORT(c) | ||||||
|  |     PRINT_ISA_SUPPORT(v) | ||||||
|     PRINT_ISA_SUPPORT(zba) |     PRINT_ISA_SUPPORT(zba) | ||||||
|     PRINT_ISA_SUPPORT(zbb) |     PRINT_ISA_SUPPORT(zbb) | ||||||
|     PRINT_ISA_SUPPORT(zbc) |     PRINT_ISA_SUPPORT(zbc) | ||||||
| @ -142,6 +143,19 @@ int main() | |||||||
|     PRINT_ISA_SUPPORT(zicsr) |     PRINT_ISA_SUPPORT(zicsr) | ||||||
|     PRINT_ISA_SUPPORT(zifencei) |     PRINT_ISA_SUPPORT(zifencei) | ||||||
|     PRINT_ISA_SUPPORT(zmmul) |     PRINT_ISA_SUPPORT(zmmul) | ||||||
|  |     PRINT_ISA_SUPPORT(zvbb) | ||||||
|  |     PRINT_ISA_SUPPORT(zvbc) | ||||||
|  |     PRINT_ISA_SUPPORT(zvfh) | ||||||
|  |     PRINT_ISA_SUPPORT(zvfhmin) | ||||||
|  |     PRINT_ISA_SUPPORT(zvfbfmin) | ||||||
|  |     PRINT_ISA_SUPPORT(zvfbfwma) | ||||||
|  |     PRINT_ISA_SUPPORT(zvkb) | ||||||
|  |     PRINT_ISA_SUPPORT(zvl32b) | ||||||
|  |     PRINT_ISA_SUPPORT(zvl64b) | ||||||
|  |     PRINT_ISA_SUPPORT(zvl128b) | ||||||
|  |     PRINT_ISA_SUPPORT(zvl256b) | ||||||
|  |     PRINT_ISA_SUPPORT(zvl512b) | ||||||
|  |     PRINT_ISA_SUPPORT(zvl1024b) | ||||||
| 
 | 
 | ||||||
|     PRINT_ISA_SUPPORT(xtheadba) |     PRINT_ISA_SUPPORT(xtheadba) | ||||||
|     PRINT_ISA_SUPPORT(xtheadbb) |     PRINT_ISA_SUPPORT(xtheadbb) | ||||||
|  | |||||||
							
								
								
									
										57
									
								
								ruapu.h
									
									
									
									
									
								
							
							
						
						
									
										57
									
								
								ruapu.h
									
									
									
									
									
								
							| @ -19,6 +19,7 @@ const char* const* ruapu_rua(); | |||||||
| 
 | 
 | ||||||
| #ifdef RUAPU_IMPLEMENTATION | #ifdef RUAPU_IMPLEMENTATION | ||||||
| 
 | 
 | ||||||
|  | #include <stdint.h> | ||||||
| #include <string.h> | #include <string.h> | ||||||
| 
 | 
 | ||||||
| typedef void (*ruapu_some_inst)(); | typedef void (*ruapu_some_inst)(); | ||||||
| @ -334,6 +335,48 @@ RUAPU_INSTCODE(xtheadmempair, 0xe0a1450b) // th.lwd a0,a0,(sp),#0,3 | |||||||
| RUAPU_INSTCODE(xtheadsync, 0x0180000b) // th.sync
 | RUAPU_INSTCODE(xtheadsync, 0x0180000b) // th.sync
 | ||||||
| RUAPU_INSTCODE(xtheadvdot, 0x8000600b) // th.vmaqa.vv v0,v0,v0
 | RUAPU_INSTCODE(xtheadvdot, 0x8000600b) // th.vmaqa.vv v0,v0,v0
 | ||||||
| 
 | 
 | ||||||
|  | // RVV 1.0 support
 | ||||||
|  | // unimp (csrrw x0, cycle, x0)
 | ||||||
|  | #define RUAPU_RV_TRAP() asm volatile(".align 2\n.word 0xc0001073") | ||||||
|  | // vcsr is only defined in rvv 1.0, which doesn't exist in rvv 0.7.1 or xtheadvector.
 | ||||||
|  | // csrr x0, vcsr
 | ||||||
|  | #define RUAPU_RVV1P0_AVAIL() asm volatile(".align 2\n.word 0x00f02573") | ||||||
|  | // csrr res, vlenb
 | ||||||
|  | #define RUAPU_DETECT_ZVL(len) static void ruapu_some_zvl##len##b() { \ | ||||||
|  |         RUAPU_RVV1P0_AVAIL(); \ | ||||||
|  |         intptr_t res; \ | ||||||
|  |         asm volatile(".align 2\n.insn i 0x73, 0x2, %0, x0, -990" : "=r"(res)); \ | ||||||
|  |         if (res < len/8) RUAPU_RV_TRAP(); \ | ||||||
|  |     } | ||||||
|  | RUAPU_DETECT_ZVL(32) | ||||||
|  | RUAPU_DETECT_ZVL(64) | ||||||
|  | RUAPU_DETECT_ZVL(128) | ||||||
|  | RUAPU_DETECT_ZVL(256) | ||||||
|  | RUAPU_DETECT_ZVL(512) | ||||||
|  | RUAPU_DETECT_ZVL(1024) | ||||||
|  | #undef RUAPU_DETECT_ZVL | ||||||
|  | // vsetvl res, zero, vtype
 | ||||||
|  | // check vill bits after vsetvl
 | ||||||
|  | #define RUAPU_RVV_INSTCODE(isa, vtype, ...) static void ruapu_some_##isa() { \ | ||||||
|  |         RUAPU_RVV1P0_AVAIL(); \ | ||||||
|  |         intptr_t res; \ | ||||||
|  |         asm volatile(".align 2\n.insn r 0x57, 0x7, 0x40, %0, x0, %1" : "=r"(res) : "r"(vtype)); \ | ||||||
|  |         if (res < 0) RUAPU_RV_TRAP(); \ | ||||||
|  |         asm volatile(".align 2\n.word " #__VA_ARGS__ ); \ | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | RUAPU_RVV_INSTCODE(zvbb, 0, 0x4a862257) // vclz.v v4, v8 with SEW = 8
 | ||||||
|  | RUAPU_RVV_INSTCODE(zvbc, 0, 0x32842257) // vclmul.vv v4, v8, v8 with SEW = 8
 | ||||||
|  | RUAPU_RVV_INSTCODE(zvfh, 8, 0x02841257) // vfadd.vv v4, v8, v8 with SEW = 16
 | ||||||
|  | RUAPU_RVV_INSTCODE(zvfhmin, 8, 0x4a8a1257) // vfncvt.f.f.v v4, v8 with SEW = 16
 | ||||||
|  | RUAPU_RVV_INSTCODE(zvfbfmin, 8, 0x4a8e9257) // vfncvtbf16.f.f.w v4, v8 with SEW = 16
 | ||||||
|  | RUAPU_RVV_INSTCODE(zvfbfwma, 8, 0xee855257) // vfwmaccbf16.vf v4, fa0, v8 with SEW = 16
 | ||||||
|  | RUAPU_RVV_INSTCODE(zvkb, 0, 0x56860257) // vrol.vv v4, v8, v12 with SEW = 8
 | ||||||
|  | RUAPU_RVV_INSTCODE(v, 24, 0x22842257) // vaaddu.vv v4, v8, v8 with SEW = 64
 | ||||||
|  | 
 | ||||||
|  | #undef RUAPU_RVV_INSTCODE | ||||||
|  | #undef RUAPU_RV_TRAP | ||||||
|  | #undef RUAPU_RVV1P0_AVAIL | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #undef RUAPU_INSTCODE | #undef RUAPU_INSTCODE | ||||||
| @ -461,6 +504,7 @@ RUAPU_ISAENTRY(a) | |||||||
| RUAPU_ISAENTRY(f) | RUAPU_ISAENTRY(f) | ||||||
| RUAPU_ISAENTRY(d) | RUAPU_ISAENTRY(d) | ||||||
| RUAPU_ISAENTRY(c) | RUAPU_ISAENTRY(c) | ||||||
|  | RUAPU_ISAENTRY(v) | ||||||
| RUAPU_ISAENTRY(zba) | RUAPU_ISAENTRY(zba) | ||||||
| RUAPU_ISAENTRY(zbb) | RUAPU_ISAENTRY(zbb) | ||||||
| RUAPU_ISAENTRY(zbc) | RUAPU_ISAENTRY(zbc) | ||||||
| @ -476,6 +520,19 @@ RUAPU_ISAENTRY(zicond) | |||||||
| RUAPU_ISAENTRY(zicsr) | RUAPU_ISAENTRY(zicsr) | ||||||
| RUAPU_ISAENTRY(zifencei) | RUAPU_ISAENTRY(zifencei) | ||||||
| RUAPU_ISAENTRY(zmmul) | RUAPU_ISAENTRY(zmmul) | ||||||
|  | RUAPU_ISAENTRY(zvbb) | ||||||
|  | RUAPU_ISAENTRY(zvbc) | ||||||
|  | RUAPU_ISAENTRY(zvfh) | ||||||
|  | RUAPU_ISAENTRY(zvfhmin) | ||||||
|  | RUAPU_ISAENTRY(zvfbfmin) | ||||||
|  | RUAPU_ISAENTRY(zvfbfwma) | ||||||
|  | RUAPU_ISAENTRY(zvkb) | ||||||
|  | RUAPU_ISAENTRY(zvl32b) | ||||||
|  | RUAPU_ISAENTRY(zvl64b) | ||||||
|  | RUAPU_ISAENTRY(zvl128b) | ||||||
|  | RUAPU_ISAENTRY(zvl256b) | ||||||
|  | RUAPU_ISAENTRY(zvl512b) | ||||||
|  | RUAPU_ISAENTRY(zvl1024b) | ||||||
| 
 | 
 | ||||||
| RUAPU_ISAENTRY(xtheadba) | RUAPU_ISAENTRY(xtheadba) | ||||||
| RUAPU_ISAENTRY(xtheadbb) | RUAPU_ISAENTRY(xtheadbb) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user