mirror of
https://github.com/netwide-assembler/nasm.git
synced 2025-10-10 00:25:06 -04:00
AVX512BF16 fix: -- VCVTNEPS2BF16 operand count -- VDPBF16PS operand size AVX512BF16 test files Checked with XED version: [v2025.06.08]
109 lines
3.0 KiB
NASM
109 lines
3.0 KiB
NASM
BITS 32
|
|
vcvtne2ps2bf16 xmm1, xmm2, xmm3
|
|
vcvtne2ps2bf16 ymm1, ymm2, ymm3
|
|
vcvtne2ps2bf16 zmm1, zmm2, zmm3
|
|
|
|
vcvtneps2bf16 xmm1, xmm2
|
|
vcvtneps2bf16 xmm1, ymm2
|
|
vcvtneps2bf16 ymm1, zmm2
|
|
|
|
vdpbf16ps xmm1, xmm2, xmm3
|
|
vdpbf16ps ymm1, ymm2, ymm3
|
|
vdpbf16ps zmm1, zmm2, zmm3
|
|
|
|
vcvtne2ps2bf16 xmm1, xmm2, [eax]
|
|
vcvtne2ps2bf16 ymm1, ymm2, [ecx+1]
|
|
vcvtne2ps2bf16 zmm1, zmm2, [2*edx+64]
|
|
|
|
vcvtneps2bf16 xmm1, oword [eax]
|
|
vcvtneps2bf16 xmm1, yword [ecx+1]
|
|
vcvtneps2bf16 ymm1, [2*edx+64]
|
|
|
|
vdpbf16ps xmm1, xmm2, [eax]
|
|
vdpbf16ps ymm1, ymm2, [ecx+1]
|
|
vdpbf16ps zmm1, zmm2, [2*edx+64]
|
|
|
|
vcvtne2ps2bf16 xmm1, xmm2, [eax]{1to4}
|
|
vcvtne2ps2bf16 ymm1, ymm2, [ecx+1]{1to8}
|
|
vcvtne2ps2bf16 zmm1, zmm2, [2*edx+4]{1to16}
|
|
|
|
vcvtneps2bf16 xmm1, [eax]{1to4}
|
|
vcvtneps2bf16 xmm1, [ecx+1]{1to8}
|
|
vcvtneps2bf16 ymm1, [2*edx+4]{1to16}
|
|
|
|
vdpbf16ps xmm1, xmm2, [eax]{1to4}
|
|
vdpbf16ps ymm1, ymm2, [ecx+1]{1to8}
|
|
vdpbf16ps zmm1, zmm2, [2*edx+4]{1to16}
|
|
|
|
vcvtne2ps2bf16 xmm1 {k1}, xmm2, xmm3
|
|
vcvtne2ps2bf16 ymm1 {k1}, ymm2, ymm3
|
|
vcvtne2ps2bf16 zmm1 {k1}, zmm2, zmm3
|
|
|
|
vcvtneps2bf16 xmm1 {k1}, xmm2
|
|
vcvtneps2bf16 xmm1 {k1}, ymm2
|
|
vcvtneps2bf16 ymm1 {k1}, zmm2
|
|
|
|
vdpbf16ps xmm1 {k1}, xmm2, xmm3
|
|
vdpbf16ps ymm1 {k1}, ymm2, ymm3
|
|
vdpbf16ps zmm1 {k1}, zmm2, zmm3
|
|
|
|
vcvtne2ps2bf16 xmm1 {k1}, xmm2, [eax]
|
|
vcvtne2ps2bf16 ymm1 {k1}, ymm2, [ecx+1]
|
|
vcvtne2ps2bf16 zmm1 {k1}, zmm2, [2*edx+64]
|
|
|
|
vcvtneps2bf16 xmm1 {k1}, oword [eax]
|
|
vcvtneps2bf16 xmm1 {k1}, yword [ecx+1]
|
|
vcvtneps2bf16 ymm1 {k1}, [2*edx+64]
|
|
|
|
vdpbf16ps xmm1 {k1}, xmm2, [eax]
|
|
vdpbf16ps ymm1 {k1}, ymm2, [ecx+1]
|
|
vdpbf16ps zmm1 {k1}, zmm2, [2*edx+64]
|
|
|
|
vcvtne2ps2bf16 xmm1 {k1}, xmm2, [eax]{1to4}
|
|
vcvtne2ps2bf16 ymm1 {k1}, ymm2, [ecx+1]{1to8}
|
|
vcvtne2ps2bf16 zmm1 {k1}, zmm2, [2*edx+4]{1to16}
|
|
|
|
vcvtneps2bf16 xmm1 {k1}, [eax]{1to4}
|
|
vcvtneps2bf16 xmm1 {k1}, [ecx+1]{1to8}
|
|
vcvtneps2bf16 ymm1 {k1}, [2*edx+4]{1to16}
|
|
|
|
vdpbf16ps xmm1 {k1}, xmm2, [eax]{1to4}
|
|
vdpbf16ps ymm1 {k1}, ymm2, [ecx+1]{1to8}
|
|
vdpbf16ps zmm1 {k1}, zmm2, [2*edx+4]{1to16}
|
|
|
|
vcvtne2ps2bf16 xmm1 {k1}, xmm2, xmm3
|
|
vcvtne2ps2bf16 ymm1 {k1}, ymm2, ymm3
|
|
vcvtne2ps2bf16 zmm1 {k1}, zmm2, zmm3
|
|
|
|
vcvtneps2bf16 xmm1 {k1}, xmm2
|
|
vcvtneps2bf16 xmm1 {k1}, ymm2
|
|
vcvtneps2bf16 ymm1 {k1}, zmm2
|
|
|
|
vdpbf16ps xmm1 {k1}{z}, xmm2, xmm3
|
|
vdpbf16ps ymm1 {k1}{z}, ymm2, ymm3
|
|
vdpbf16ps zmm1 {k1}{z}, zmm2, zmm3
|
|
|
|
vcvtne2ps2bf16 xmm1 {k1}{z}, xmm2, [eax]
|
|
vcvtne2ps2bf16 ymm1 {k1}{z}, ymm2, [ecx+1]
|
|
vcvtne2ps2bf16 zmm1 {k1}{z}, zmm2, [2*edx+64]
|
|
|
|
vcvtneps2bf16 xmm1 {k1}{z}, oword [eax]
|
|
vcvtneps2bf16 xmm1 {k1}{z}, yword [ecx+1]
|
|
vcvtneps2bf16 ymm1 {k1}{z}, [2*edx+64]
|
|
|
|
vdpbf16ps xmm1 {k1}{z}, xmm2, [eax]
|
|
vdpbf16ps ymm1 {k1}{z}, ymm2, [ecx+1]
|
|
vdpbf16ps zmm1 {k1}{z}, zmm2, [2*edx+64]
|
|
|
|
vcvtne2ps2bf16 xmm1 {k1}{z}, xmm2, [eax]{1to4}
|
|
vcvtne2ps2bf16 ymm1 {k1}{z}, ymm2, [ecx+1]{1to8}
|
|
vcvtne2ps2bf16 zmm1 {k1}{z}, zmm2, [2*edx+4]{1to16}
|
|
|
|
vcvtneps2bf16 xmm1 {k1}{z}, [eax]{1to4}
|
|
vcvtneps2bf16 xmm1 {k1}{z}, [ecx+1]{1to8}
|
|
vcvtneps2bf16 ymm1 {k1}{z}, [2*edx+4]{1to16}
|
|
|
|
vdpbf16ps xmm1 {k1}{z}, xmm2, [eax]{1to4}
|
|
vdpbf16ps ymm1 {k1}{z}, ymm2, [ecx+1]{1to8}
|
|
vdpbf16ps zmm1 {k1}{z}, zmm2, [2*edx+4]{1to16}
|