0
0
mirror of https://github.com/netwide-assembler/nasm.git synced 2025-10-10 00:25:06 -04:00
Files
nasm/travis/test/amx.asm
H. Peter Anvin (Intel) a398a41f0a travis: update AMX test
Update AMX test per previous commit from IntLatx64.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
2025-10-08 11:59:23 -07:00

118 lines
4.0 KiB
NASM

bits 64
%macro amx 3
%define treg tmm %+ %1
%define treg2 tmm %+ %2
%define treg3 tmm %+ %3
%define zreg zmm %+ %1
ldtilecfg [rsi] ;AMX_TILE
sttilecfg [rdi] ;AMX_TILE
tilezero treg ;AMX_TILE
tileloadd treg, [rax] ;AMX_TILE
tileloadd treg, [rax+rdx] ;AMX_TILE
tileloadd treg, [rax+rdx*2] ;AMX_TILE
tileloaddt1 treg, [rax] ;AMX_TILE
tileloaddt1 treg, [rax+rdx] ;AMX_TILE
tileloaddt1 treg, [rax+rdx*2] ;AMX_TILE
tileloaddrs treg, [rax] ;AMX-MOVRS
tileloaddrs treg, [rax+rdx] ;AMX-MOVRS
tileloaddrs treg, [rax+rdx*2] ;AMX-MOVRS
tileloaddrst1 treg, [rax] ;AMX-MOVRS
tileloaddrst1 treg, [rax+rdx] ;AMX-MOVRS
tileloaddrst1 treg, [rax+rdx*2] ;AMX-MOVRS
tdpbf16ps treg, treg2, treg3 ;AMX-BF16
tdpbssd treg, treg2, treg3 ;AMX_INT8
tdpbusd treg, treg2, treg3 ;AMX_INT8
tdpbsud treg, treg2, treg3 ;AMX_INT8
tdpbuud treg, treg2, treg3 ;AMX_INT8
tdpfp16ps treg, treg2, treg3 ;AMX-FP16
tcmmimfp16ps treg, treg2, treg3 ;AMX-COMPLEX
tcmmrlfp16ps treg, treg2, treg3 ;AMX-COMPLEX
tmmultf32ps treg, treg2, treg3 ;AMX_TF32
tdpbf8ps treg, treg2, treg3 ;AMX-FP8
tdpbhf8ps treg, treg2, treg3 ;AMX-FP8
tdphbf8ps treg, treg2, treg3 ;AMX-FP8
tdphf8ps treg, treg2, treg3 ;AMX-FP8
tcvtrowd2ps zreg, treg, eax ;AMX-AVX512
tcvtrowd2ps zreg, treg, %1 ;AMX-AVX512
tcvtrowps2bf16h zreg, treg, eax ;AMX-AVX512
tcvtrowps2bf16h zreg, treg, %1 ;AMX-AVX512
tcvtrowps2bf16l zreg, treg, eax ;AMX-AVX512
tcvtrowps2bf16l zreg, treg, %1 ;AMX-AVX512
tcvtrowps2phh zreg, treg, eax ;AMX-AVX512
tcvtrowps2phh zreg, treg, %1 ;AMX-AVX512
tcvtrowps2phl zreg, treg, eax ;AMX-AVX512
tcvtrowps2phl zreg, treg, %1 ;AMX-AVX512
tilemovrow zreg, treg, eax ;AMX-AVX512
tilemovrow zreg, treg, %1 ;AMX-AVX512
t2rpntlvwz0 treg, [rax] ;AMX-TRANSPOSE
t2rpntlvwz0 treg, [rax+rdx] ;AMX-TRANSPOSE
t2rpntlvwz0 treg, [rax+rdx*2] ;AMX-TRANSPOSE
t2rpntlvwz0t1 treg, [rax] ;AMX-TRANSPOSE
t2rpntlvwz0t1 treg, [rax+rdx] ;AMX-TRANSPOSE
t2rpntlvwz0t1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
t2rpntlvwz1 treg, [rax] ;AMX-TRANSPOSE
t2rpntlvwz1 treg, [rax+rdx] ;AMX-TRANSPOSE
t2rpntlvwz1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
t2rpntlvwz1t1 treg, [rax] ;AMX-TRANSPOSE
t2rpntlvwz1t1 treg, [rax+rdx] ;AMX-TRANSPOSE
t2rpntlvwz1t1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
ttransposed treg, treg ;AMX-TRANSPOSE
t2rpntlvwz0rs treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz0rs treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz0rs treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz0rst1 treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz0rst1 treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz0rst1 treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rs treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rs treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rs treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rst1 treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rst1 treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rst1 treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
ttdpbf16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-BF16
ttdpfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-FP16
ttcmmimfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
ttcmmrlfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
tconjtcmmimfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
tconjtfp16 treg, treg ;AMX-TRANSPOSE + AMX-COMPLEX
ttmmultf32ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX_TF32
tilestored [rax], treg ;AMX_TILE
tilestored [rax,rdx], treg ;AMX_TILE
tilestored [rax,rdx*2], treg ;AMX_TILE
tilerelease ;AMX_TILE
%endmacro
%assign n 0
%assign m 1
%assign l 2
%rep 8
amx n, m, l
%assign n ((n+1) % 8)
%assign m ((m+1) % 8)
%assign l ((l+1) % 8)
%endrep