Detect x86 AMX(AMX-FP16 AMX-BF16 AMX-INT8 AMX-TILE) (#79)
This commit is contained in:
		
							parent
							
								
									b113072379
								
							
						
					
					
						commit
						35b28cd404
					
				
							
								
								
									
										19
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								README.md
									
									
									
									
									
								
							@ -456,6 +456,10 @@ avx5124vnniw = 0
 | 
				
			|||||||
avxvnni = 0
 | 
					avxvnni = 0
 | 
				
			||||||
avxvnniint8 = 0
 | 
					avxvnniint8 = 0
 | 
				
			||||||
avxifma = 0
 | 
					avxifma = 0
 | 
				
			||||||
 | 
					amxfp16 = 0
 | 
				
			||||||
 | 
					amxbf16 = 0
 | 
				
			||||||
 | 
					amxint8 = 0
 | 
				
			||||||
 | 
					amxtile = 0
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
</details>
 | 
					</details>
 | 
				
			||||||
@ -494,6 +498,10 @@ avx5124vnniw = 0
 | 
				
			|||||||
avxvnni = 0
 | 
					avxvnni = 0
 | 
				
			||||||
avxvnniint8 = 0
 | 
					avxvnniint8 = 0
 | 
				
			||||||
avxifma = 0
 | 
					avxifma = 0
 | 
				
			||||||
 | 
					amxfp16 = 0
 | 
				
			||||||
 | 
					amxbf16 = 0
 | 
				
			||||||
 | 
					amxint8 = 0
 | 
				
			||||||
 | 
					amxtile = 0
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
</details>
 | 
					</details>
 | 
				
			||||||
@ -553,6 +561,10 @@ avx5124vnniw = 0
 | 
				
			|||||||
avxvnni = 0
 | 
					avxvnni = 0
 | 
				
			||||||
avxvnniint8 = 0
 | 
					avxvnniint8 = 0
 | 
				
			||||||
avxifma = 0
 | 
					avxifma = 0
 | 
				
			||||||
 | 
					amxfp16 = 0
 | 
				
			||||||
 | 
					amxbf16 = 0
 | 
				
			||||||
 | 
					amxint8 = 0
 | 
				
			||||||
 | 
					amxtile = 0
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
</details>
 | 
					</details>
 | 
				
			||||||
@ -592,6 +604,10 @@ avx5124vnniw = 0
 | 
				
			|||||||
avxvnni = 0
 | 
					avxvnni = 0
 | 
				
			||||||
avxvnniint8 = 0
 | 
					avxvnniint8 = 0
 | 
				
			||||||
avxifma = 0
 | 
					avxifma = 0
 | 
				
			||||||
 | 
					amxfp16 = 0
 | 
				
			||||||
 | 
					amxbf16 = 0
 | 
				
			||||||
 | 
					amxint8 = 0
 | 
				
			||||||
 | 
					amxtile = 0
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
</details>
 | 
					</details>
 | 
				
			||||||
@ -616,7 +632,7 @@ _`fma4` on zen1, ISA in hypervisor, etc._
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
|CPU|ISA|
 | 
					|CPU|ISA|
 | 
				
			||||||
|:---:|---|
 | 
					|:---:|---|
 | 
				
			||||||
|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxifma`|
 | 
					|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxifma` `amxfp16` `amxbf16` `amxint8` `amxtile`|
 | 
				
			||||||
|arm|`half` `edsp` `neon` `vfpv4` `idiv`|
 | 
					|arm|`half` `edsp` `neon` `vfpv4` `idiv`|
 | 
				
			||||||
|aarch64|`neon` `vfpv4` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `mte` `sve` `sve2` `svebf16` `svei8mm` `svef32mm` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `amx`|
 | 
					|aarch64|`neon` `vfpv4` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `mte` `sve` `sve2` `svebf16` `svei8mm` `svef32mm` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `amx`|
 | 
				
			||||||
|mips|`msa`|
 | 
					|mips|`msa`|
 | 
				
			||||||
@ -671,6 +687,7 @@ ruapu determines whether the CPU supports certain instruction sets by trying to
 | 
				
			|||||||
* [@dreamcmi](https://github.com/dreamcmi)  _Detect more risc-v ISA_
 | 
					* [@dreamcmi](https://github.com/dreamcmi)  _Detect more risc-v ISA_
 | 
				
			||||||
* [@cocoa-xu](https://github.com/cocoa-xu)  _Add FreeBSD support, python support_
 | 
					* [@cocoa-xu](https://github.com/cocoa-xu)  _Add FreeBSD support, python support_
 | 
				
			||||||
* [@YuzukiTsuru](https://github.com/YuzukiTsuru)  _Add OpenRISC support_
 | 
					* [@YuzukiTsuru](https://github.com/YuzukiTsuru)  _Add OpenRISC support_
 | 
				
			||||||
 | 
					* [@whyb](https://github.com/whyb)  _Detect x86 AMX_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## License
 | 
					## License
 | 
				
			||||||
MIT License
 | 
					MIT License
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										5
									
								
								main.c
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								main.c
									
									
									
									
									
								
							@ -47,6 +47,11 @@ int main()
 | 
				
			|||||||
    PRINT_ISA_SUPPORT(avxvnni)
 | 
					    PRINT_ISA_SUPPORT(avxvnni)
 | 
				
			||||||
    PRINT_ISA_SUPPORT(avxvnniint8)
 | 
					    PRINT_ISA_SUPPORT(avxvnniint8)
 | 
				
			||||||
    PRINT_ISA_SUPPORT(avxifma)
 | 
					    PRINT_ISA_SUPPORT(avxifma)
 | 
				
			||||||
 | 
					    PRINT_ISA_SUPPORT(amxfp16)
 | 
				
			||||||
 | 
					    PRINT_ISA_SUPPORT(amxbf16)
 | 
				
			||||||
 | 
					    PRINT_ISA_SUPPORT(amxint8)
 | 
				
			||||||
 | 
					    PRINT_ISA_SUPPORT(amxtile)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#elif __aarch64__ || defined(_M_ARM64)
 | 
					#elif __aarch64__ || defined(_M_ARM64)
 | 
				
			||||||
    PRINT_ISA_SUPPORT(neon)
 | 
					    PRINT_ISA_SUPPORT(neon)
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										8
									
								
								ruapu.h
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								ruapu.h
									
									
									
									
									
								
							@ -216,6 +216,10 @@ RUAPU_INSTCODE(avxvnni, 0xc4, 0xe2, 0x7d, 0x52, 0xc0) // vpdpwssd ymm0,ymm0,ymm0
 | 
				
			|||||||
RUAPU_INSTCODE(avxvnniint8, 0xc4, 0xe2, 0x7f, 0x50, 0xc0) // vpdpbssd ymm0,ymm0,ymm0
 | 
					RUAPU_INSTCODE(avxvnniint8, 0xc4, 0xe2, 0x7f, 0x50, 0xc0) // vpdpbssd ymm0,ymm0,ymm0
 | 
				
			||||||
// TODO:avxvnniint16, vpdpwusd xmm,xmm,xmm
 | 
					// TODO:avxvnniint16, vpdpwusd xmm,xmm,xmm
 | 
				
			||||||
RUAPU_INSTCODE(avxifma, 0xc4, 0xe2, 0xfd, 0xb4, 0xc0) // vpmadd52luq ymm0,ymm0,ymm0
 | 
					RUAPU_INSTCODE(avxifma, 0xc4, 0xe2, 0xfd, 0xb4, 0xc0) // vpmadd52luq ymm0,ymm0,ymm0
 | 
				
			||||||
 | 
					RUAPU_INSTCODE(amxfp16, 0xc4, 0xe2, 0x7b, 0x5c, 0xd1) // tdpfp16ps %tmm0, %tmm1, %tmm2
 | 
				
			||||||
 | 
					RUAPU_INSTCODE(amxbf16, 0xc4, 0xe2, 0x7a, 0x5c, 0xd1) // tdpbf16ps %tmm0, %tmm1, %tmm2
 | 
				
			||||||
 | 
					RUAPU_INSTCODE(amxint8, 0xc4, 0xe2, 0x7b, 0x5e, 0xd1) // tdpbssd %tmm0, %tmm1, %tmm2
 | 
				
			||||||
 | 
					RUAPU_INSTCODE(amxtile, 0xc4, 0xe2, 0x7a, 0x49, 0xc0) // tilezero %tmm0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#elif __aarch64__ || defined(_M_ARM64)
 | 
					#elif __aarch64__ || defined(_M_ARM64)
 | 
				
			||||||
RUAPU_INSTCODE(neon, 0x4e20d400) // fadd v0.4s,v0.4s,v0.4s
 | 
					RUAPU_INSTCODE(neon, 0x4e20d400) // fadd v0.4s,v0.4s,v0.4s
 | 
				
			||||||
@ -335,6 +339,10 @@ RUAPU_ISAENTRY(avxvnni)
 | 
				
			|||||||
RUAPU_ISAENTRY(avxvnniint8)
 | 
					RUAPU_ISAENTRY(avxvnniint8)
 | 
				
			||||||
// TODO:avxvnniint16
 | 
					// TODO:avxvnniint16
 | 
				
			||||||
RUAPU_ISAENTRY(avxifma)
 | 
					RUAPU_ISAENTRY(avxifma)
 | 
				
			||||||
 | 
					RUAPU_ISAENTRY(amxfp16)
 | 
				
			||||||
 | 
					RUAPU_ISAENTRY(amxbf16)
 | 
				
			||||||
 | 
					RUAPU_ISAENTRY(amxint8)
 | 
				
			||||||
 | 
					RUAPU_ISAENTRY(amxtile)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#elif __aarch64__ || defined(_M_ARM64)
 | 
					#elif __aarch64__ || defined(_M_ARM64)
 | 
				
			||||||
RUAPU_ISAENTRY(neon)
 | 
					RUAPU_ISAENTRY(neon)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user