0
0
mirror of https://github.com/netwide-assembler/nasm.git synced 2025-10-10 00:25:06 -04:00
Files
nasm/test/avx512bf16-64.asm
InstLatx64 70ed10d2e2 AVX512BF16 fix
AVX512BF16 fix:
-- VCVTNEPS2BF16 operand count
-- VDPBF16PS operand size
AVX512BF16 test files
Checked with XED version: [v2025.06.08]
2025-10-06 19:17:51 +02:00

109 lines
3.0 KiB
NASM

BITS 64
vcvtne2ps2bf16 xmm1, xmm2, xmm3
vcvtne2ps2bf16 ymm1, ymm2, ymm3
vcvtne2ps2bf16 zmm1, zmm2, zmm3
vcvtneps2bf16 xmm1, xmm2
vcvtneps2bf16 xmm1, ymm2
vcvtneps2bf16 ymm1, zmm2
vdpbf16ps xmm1, xmm2, xmm3
vdpbf16ps ymm1, ymm2, ymm3
vdpbf16ps zmm1, zmm2, zmm3
vcvtne2ps2bf16 xmm1, xmm2, [rax]
vcvtne2ps2bf16 ymm1, ymm2, [rcx+1]
vcvtne2ps2bf16 zmm1, zmm2, [2*rdx+64]
vcvtneps2bf16 xmm1, oword [rax]
vcvtneps2bf16 xmm1, yword [rcx+1]
vcvtneps2bf16 ymm1, [2*rdx+64]
vdpbf16ps xmm1, xmm2, [rax]
vdpbf16ps ymm1, ymm2, [rcx+1]
vdpbf16ps zmm1, zmm2, [2*rdx+64]
vcvtne2ps2bf16 xmm1, xmm2, [rax]{1to4}
vcvtne2ps2bf16 ymm1, ymm2, [rcx+1]{1to8}
vcvtne2ps2bf16 zmm1, zmm2, [2*rdx+4]{1to16}
vcvtneps2bf16 xmm1, [rax]{1to4}
vcvtneps2bf16 xmm1, [rcx+1]{1to8}
vcvtneps2bf16 ymm1, [2*rdx+4]{1to16}
vdpbf16ps xmm1, xmm2, [rax]{1to4}
vdpbf16ps ymm1, ymm2, [rcx+1]{1to8}
vdpbf16ps zmm1, zmm2, [2*rdx+4]{1to16}
vcvtne2ps2bf16 xmm1 {k1}, xmm2, xmm3
vcvtne2ps2bf16 ymm1 {k1}, ymm2, ymm3
vcvtne2ps2bf16 zmm1 {k1}, zmm2, zmm3
vcvtneps2bf16 xmm1 {k1}, xmm2
vcvtneps2bf16 xmm1 {k1}, ymm2
vcvtneps2bf16 ymm1 {k1}, zmm2
vdpbf16ps xmm1 {k1}, xmm2, xmm3
vdpbf16ps ymm1 {k1}, ymm2, ymm3
vdpbf16ps zmm1 {k1}, zmm2, zmm3
vcvtne2ps2bf16 xmm1 {k1}, xmm2, [rax]
vcvtne2ps2bf16 ymm1 {k1}, ymm2, [rcx+1]
vcvtne2ps2bf16 zmm1 {k1}, zmm2, [2*rdx+64]
vcvtneps2bf16 xmm1 {k1}, oword [rax]
vcvtneps2bf16 xmm1 {k1}, yword [rcx+1]
vcvtneps2bf16 ymm1 {k1}, [2*rdx+64]
vdpbf16ps xmm1 {k1}, xmm2, [rax]
vdpbf16ps ymm1 {k1}, ymm2, [rcx+1]
vdpbf16ps zmm1 {k1}, zmm2, [2*rdx+64]
vcvtne2ps2bf16 xmm1 {k1}, xmm2, [rax]{1to4}
vcvtne2ps2bf16 ymm1 {k1}, ymm2, [rcx+1]{1to8}
vcvtne2ps2bf16 zmm1 {k1}, zmm2, [2*rdx+4]{1to16}
vcvtneps2bf16 xmm1 {k1}, [rax]{1to4}
vcvtneps2bf16 xmm1 {k1}, [rcx+1]{1to8}
vcvtneps2bf16 ymm1 {k1}, [2*rdx+4]{1to16}
vdpbf16ps xmm1 {k1}, xmm2, [rax]{1to4}
vdpbf16ps ymm1 {k1}, ymm2, [rcx+1]{1to8}
vdpbf16ps zmm1 {k1}, zmm2, [2*rdx+4]{1to16}
vcvtne2ps2bf16 xmm1 {k1}{z}, xmm2, xmm3
vcvtne2ps2bf16 ymm1 {k1}{z}, ymm2, ymm3
vcvtne2ps2bf16 zmm1 {k1}{z}, zmm2, zmm3
vcvtneps2bf16 xmm1 {k1}{z}, xmm2
vcvtneps2bf16 xmm1 {k1}{z}, ymm2
vcvtneps2bf16 ymm1 {k1}{z}, zmm2
vdpbf16ps xmm1 {k1}{z}, xmm2, xmm3
vdpbf16ps ymm1 {k1}{z}, ymm2, ymm3
vdpbf16ps zmm1 {k1}{z}, zmm2, zmm3
vcvtne2ps2bf16 xmm1 {k1}{z}, xmm2, [rax]
vcvtne2ps2bf16 ymm1 {k1}{z}, ymm2, [rcx+1]
vcvtne2ps2bf16 zmm1 {k1}{z}, zmm2, [2*rdx+64]
vcvtneps2bf16 xmm1 {k1}{z}, oword [rax]
vcvtneps2bf16 xmm1 {k1}{z}, yword [rcx+1]
vcvtneps2bf16 ymm1 {k1}{z}, [2*rax+64]
vdpbf16ps xmm1 {k1}{z}, xmm2, [rax]
vdpbf16ps ymm1 {k1}{z}, ymm2, [rcx+1]
vdpbf16ps zmm1 {k1}{z}, zmm2, [2*rdx+64]
vcvtne2ps2bf16 xmm1 {k1}{z}, xmm2, [rax]{1to4}
vcvtne2ps2bf16 ymm1 {k1}{z}, ymm2, [rcx+1]{1to8}
vcvtne2ps2bf16 zmm1 {k1}{z}, zmm2, [2*rdx+4]{1to16}
vcvtneps2bf16 xmm1 {k1}{z}, [rax]{1to4}
vcvtneps2bf16 xmm1 {k1}{z}, [rcx+1]{1to8}
vcvtneps2bf16 ymm1 {k1}{z}, [2*rdx+4]{1to16}
vdpbf16ps xmm1 {k1}{z}, xmm2, [rax]{1to4}
vdpbf16ps ymm1 {k1}{z}, ymm2, [rcx+1]{1to8}
vdpbf16ps zmm1 {k1}{z}, zmm2, [2*rdx+4]{1to16}