0
0
mirror of https://github.com/netwide-assembler/nasm.git synced 2025-10-10 00:25:06 -04:00

insns.pl: sanity-check that instruction encodings match operands

Error out if an encoding position is invalid, like an "r" operand
matches an "xmmrm" operand.

Document the instruction encoding symbols; there are too many of them
by now.

Add symbols 'n' and 'w' meaning immediates that are supposed to be
encoded as if they were 'm' memory addresses and 'v' register numbers,
respectively; this is necessary to indicate a validation exception.

Remove broken ARPL "memory-like" encoding. It probably never worked
anyway.

This verification caught two bugs already:

- VPMASKMOV[DQ] cannot omit the second operand.

- Incorrect operand encoding order for VREDUCESH.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin
2025-09-30 10:48:13 -07:00
parent 54547eba35
commit 48f7170772
3 changed files with 133 additions and 43 deletions

View File

@@ -7,8 +7,22 @@ and consumed by asm/assemble.c and disasm/disasm.c.
Values prefixed with \ are in octal, values prefixed with \x are in
hexadecimal.
The mnemonics are the ones used in x86/insns.txt, where applicable.
The mnemonics are the ones used in x86/insns.dat, where applicable.
In x86/insns.dat, the encoding slot of each operand is encoded as:
- implicit operand (no encoding)
x+y multiple encoding slots for one operand
r "r" position in modr/m, or base register with "+r"
m "m" position in modr/m
n immediate encoded in the "m" position in modr/m
b register encoded in the "m" position in modr/m
x register encoded in the "x" position in modr/m + sib (MIB)
v "v" register position in vex/evex
s "s" registe rposition in /is4
w immediate encoded in the "v" position in vex/evex
i first immediate or mem_offs
j second immediate or mem_offs
Codes Mnemonic Explanation

View File

@@ -96,7 +96,7 @@ $bwdq MOVRS reg#,mem# [rm: evex.nf0.nd0.l0.m4.o# 8a# /r] FUTURE,SM
;# Load effective address
$wdq LEA reg#,mem [rm: o# 8d /r] 8086
$wdq LEA reg#,imm# [rm: o# 8d /r] 8086,ND
$wdq LEA reg#,imm# [rn: o# 8d /r] 8086,ND
;# The basic 8 arithmetic operations
$arith nf=nf ADD OR nf=,ADC nf=,SBB AND SUB XOR nf=,!evex,CMP
@@ -448,7 +448,7 @@ WBNOINVD void [ f3 0f 09] WBNOINVD,PRIV
INVPCID reg32,mem128 [rm: 66 0f38 82 /r] INVPCID,PRIV,NOLONG
INVPCID reg64,mem128 [rm: 66 0f38 82 /r] INVPCID,PRIV,LONG
INVPCID reg64,mem128 [rm: evex.nf0.nd0.l0.f3.m4.w1 f2 /r] APX,INVPCID,PRIV,LONG
INVPCID reg64,mem128 [rm: evex.nf0.nd0.l0.f3.m4.w1 f2 /r] APX,INVPCID,PRIV,LONG
INVLPG mem [m: 0f 01 /7] 486,PRIV
$wdq INVLPGA ax#,reg_ecx [--: a# 0f 01 df] X86_64,AMD
INVLPGA void [ adf 0f 01 df] X86_64,AMD,ND
@@ -578,7 +578,7 @@ $dq RDGSBASE reg# [m: w# f3 0f ae /1] LONG
$dq WRFSBASE reg# [m: w# f3 0f ae /2] LONG
$dq WRGSBASE reg# [m: w# f3 0f ae /3] LONG
$zwd ARPL rm16,sel# [mr: optw# 63 /r] 286,PROT,SM,NOLONG
$wd ARPL rm16,reg# [mr: optw# 63 /r] 286,PROT,SM,NOLONG
$wdq LAR reg#,rm_sel [rm: optd# 0f 02 /r] 286,PROT
$wdq LSL reg#,rm_sel [rm: optd# 0f 03 /r] 286,PROT
@@ -984,14 +984,14 @@ FWAIT void [ wait] 8086
XLATB void [ d7] 8086
XLAT void [ d7] 8086,ND
$bwdq CCMPscc spec4,rm#,reg# [vmr: evex.scc.dfv.l0.m4.o# 38# /r ] APX,SM1-2
$bwdq CCMPscc spec4,reg#,rm# [vrm: evex.scc.dfv.l0.m4.o# 3a# /r ] APX,SM1-2
$wdq CCMPscc spec4,rm#,sbyte# [vmi: evex.scc.dfv.l0.m4.o# 83 /7 ib,s ] APX,SM1-2
$bwdq CCMPscc spec4,rm#,imm# [vmi: evex.scc.dfv.l0.m4.o# 80# /7 i# ] APX,SM1-2
$bwdq CCMPscc spec4,rm#,reg# [wmr: evex.scc.dfv.l0.m4.o# 38# /r ] APX,SM1-2
$bwdq CCMPscc spec4,reg#,rm# [wrm: evex.scc.dfv.l0.m4.o# 3a# /r ] APX,SM1-2
$wdq CCMPscc spec4,rm#,sbyte# [wmi: evex.scc.dfv.l0.m4.o# 83 /7 ib,s ] APX,SM1-2
$bwdq CCMPscc spec4,rm#,imm# [wmi: evex.scc.dfv.l0.m4.o# 80# /7 i# ] APX,SM1-2
$bwdq CTESTscc spec4,rm#,reg# [vmr: evex.scc.dfv.l0.m4.o# 84# /r ] APX,SM1-2
$bwdq CTESTscc spec4,rm#,imm# [vmi: evex.scc.dfv.l0.m4.o# f6# /0 i# ] APX,SM1-2
$bwdq CTESTscc spec4,rm#,imm# [vmi: evex.scc.dfv.l0.m4.o# f6# /1 i# ] APX,SM1-2
$bwdq CTESTscc spec4,rm#,reg# [wmr: evex.scc.dfv.l0.m4.o# 84# /r ] APX,SM1-2
$bwdq CTESTscc spec4,rm#,imm# [wmi: evex.scc.dfv.l0.m4.o# f6# /0 i# ] APX,SM1-2
$bwdq CTESTscc spec4,rm#,imm# [wmi: evex.scc.dfv.l0.m4.o# f6# /1 i# ] APX,SM1-2
;# Conditional instructions
$wdq CMOVcc reg#,rm# [rm: o# 0f 40+c /r] P6,SM
@@ -2957,10 +2957,10 @@ VPMASKMOVD ymmreg,ymmreg*,mem256 [rvm: vex.nds.256.66.0f38.w0 8c /r] AVX2
VPMASKMOVQ xmmreg,xmmreg*,mem128 [rvm: vex.nds.128.66.0f38.w1 8c /r] AVX2
VPMASKMOVQ ymmreg,ymmreg*,mem256 [rvm: vex.nds.256.66.0f38.w1 8c /r] AVX2
VPMASKMOVD mem128,xmmreg*,xmmreg [mvr: vex.nds.128.66.0f38.w0 8e /r] AVX2
VPMASKMOVD mem256,ymmreg*,ymmreg [mvr: vex.nds.256.66.0f38.w0 8e /r] AVX2
VPMASKMOVQ mem128,xmmreg*,xmmreg [mvr: vex.nds.128.66.0f38.w1 8e /r] AVX2
VPMASKMOVQ mem256,ymmreg*,ymmreg [mvr: vex.nds.256.66.0f38.w1 8e /r] AVX2
VPMASKMOVD mem128,xmmreg,xmmreg [mvr: vex.nds.128.66.0f38.w0 8e /r] AVX2
VPMASKMOVD mem256,ymmreg,ymmreg [mvr: vex.nds.256.66.0f38.w0 8e /r] AVX2
VPMASKMOVQ mem128,xmmreg,xmmreg [mvr: vex.nds.128.66.0f38.w1 8e /r] AVX2
VPMASKMOVQ mem256,ymmreg,ymmreg [mvr: vex.nds.256.66.0f38.w1 8e /r] AVX2
VPSLLVD xmmreg,xmmreg*,xmmrm128 [rvm: vex.nds.128.66.0f38.w0 47 /r] AVX2
VPSLLVQ xmmreg,xmmreg*,xmmrm128 [rvm: vex.nds.128.66.0f38.w1 47 /r] AVX2
@@ -5749,7 +5749,7 @@ VRCPSH xmmreg|mask|z,xmmreg*,xmmrm16|sae [rvm:t1s: evex.nds.lig.66.map6.w0 4d /
VREDUCEPH xmmreg|mask|z,xmmrm128|b16,imm8 [rmi:fv: evex.128.np.0f3a.w0 56 /r ib] AVX512FP16,AVX512VL
VREDUCEPH ymmreg|mask|z,ymmrm256|b16,imm8 [rmi:fv: evex.256.np.0f3a.w0 56 /r ib] AVX512FP16,AVX512VL
VREDUCEPH zmmreg|mask|z,zmmrm512|b16|sae,imm8 [rmi:fv: evex.512.np.0f3a.w0 56 /r ib] AVX512FP16
VREDUCESH xmmreg|mask|z,xmmreg*,xmmrm16|sae,imm8 [rmvi:t1s: evex.nds.lig.np.0f3a.w0 57 /r ib] AVX512FP16
VREDUCESH xmmreg|mask|z,xmmreg*,xmmrm16|sae,imm8 [rvmi:t1s: evex.nds.lig.np.0f3a.w0 57 /r ib] AVX512FP16
VENDSCALEPH xmmreg|mask|z,xmmrm128|b16,imm8 [rmi:fv: evex.128.np.0f3a.w0 08 /r ib] AVX512FP16,AVX512VL
VENDSCALEPH ymmreg|mask|z,ymmrm256|b16,imm8 [rmi:fv: evex.256.np.0f3a.w0 08 /r ib] AVX512FP16,AVX512VL
VENDSCALEPH zmmreg|mask|z,zmmrm512|b16|sae,imm8 [rmi:fv: evex.512.np.0f3a.w0 08 /r ib] AVX512FP16

View File

@@ -118,7 +118,7 @@ sub startseq($$) {
my $enc = 0; # Legacy
my $map = 0; # Map 0
@codes = decodify(undef, $codestr, {});
@codes = decodify(undef, $codestr, {}, undef);
while (defined($c0 = shift(@codes))) {
$c1 = $codes[0]; # The immediate following code
@@ -740,7 +740,7 @@ sub format_insn($$$$) {
my ($num, $flagsindex);
my @bytecode;
my ($op, @ops, @opsize, $opp, @opx, @oppx, @decos, @opevex);
my %oppos;
my $opinfo;
return (undef, undef) if $operands eq 'ignore';
@@ -751,7 +751,8 @@ sub format_insn($$$$) {
set_implied_flags(\%flags);
# Generate byte code. This may modify the flags.
@bytecode = (decodify($opcode, $codes, \%flags, \%oppos), 0);
@bytecode = (decodify($opcode, $codes, \%flags, \$opinfo), 0);
my($oppos, $openc) = @$opinfo;
push(@bytecode_list, [@bytecode]);
$codes = hexstr(@bytecode);
count_bytecodes(@bytecode);
@@ -766,8 +767,13 @@ sub format_insn($$$$) {
@opsize = ();
@decos = ();
if ($operands ne 'void') {
my $opnum = scalar(@ops);
foreach $op (split(/,/, $operands)) {
my $opnum = scalar(@ops);
my $isreg = 0;
my $ismem = 0;
my $ismoffs = 0;
my $isimm = 0;
my $isrm = 0;
my $iszero = 0;
my $opsz = 0;
@opx = ();
@@ -778,6 +784,8 @@ sub format_insn($$$$) {
push(@opevex, $1);
}
$opp =~ s/^reg([0-9]*)na$/reg_na$1/;
if ($opp =~ s/([^0-9]0?)(8|16|32|64|80|128|256|512|1024|1k)$/$1/) {
push(@oppx, "bits$2");
$opsz = $1 + 0;
@@ -789,35 +797,68 @@ sub format_insn($$$$) {
$opp .= 'reg';
}
}
$opp =~ s/^mem$/memory/;
$opp =~ s/^memory_offs$/mem_offs/;
$opp =~ s/^mem$/memory/;
if ($opp =~ s/^(spec|imm)4$/$1/) {
push(@oppx, 'fourbits');
$isimm = 1;
}
$opp =~ s/^spec$/immediate/; # Immediate or special immediate
$opp =~ s/^imm$/imm_normal/; # Normal immediates only
$opp =~ s/^spec$/immediate/; # Special or normal immediate
$opp =~ s/^imm$/imm_normal/; # Normal immediate only
if ($opp =~ /^(unity|sbyted?word|[su]dword)$/) {
push(@oppx, 'imm_normal');
$isimm = 1;
}
if ($opp =~ /^imm/) {
$isimm = 1;
}
$opp =~ s/^([a-z]+)rm$/rm_$1/;
$opp =~ s/^(rm|reg)$/$1_gpr/;
$opp =~ s/^rm_k$/rm_opmask/;
$opp =~ s/^kreg$/opmaskreg/;
my $isreg = ($opp =~ /(\brm_|\breg_|reg\b)/);
my $isrm = $isreg || ($opp =~ /\bmem/);
my $isvec = ($opp =~ /\b[xyzt]mm/);
if ($isrm &&
if ($opp =~ /\brm_/) {
$isrm = 1;
} elsif ($opp =~ /(\breg_|reg\b)/) {
$isreg = 1;
} elsif ($opp =~ /\b[xyzt]?mem/) {
$ismem = 1;
}
if ($opp =~ /\bmem_offs/) {
$ismoffs = 1;
}
if ($opp =~ /\b[xyzt]mm/) {
$isvec = 1;
}
if (($isrm || ($ismem && !$ismoffs) || $isreg) &&
!(($flags{'EVEX'} && $isvec) || !$flags{'NOAPX'})) {
# Register numbers >= 16 disallowed
push(@oppx, 'rn_l16');
}
if ($isreg && $isvec &&
defined($oppos->{'b'}) && $opnum == $oppos->{'b'}) {
if ($isreg && $isvec && $openc->[$opnum] =~ /b/) {
$flags{'MOPVEC'}++;
}
push(@opx, $opp, @oppx) if $opp;
}
$op = join('|', @opx);
# Sanity-check the encoding of this operand
my $opvalid = '-';
if ($isreg) {
$opvalid .= 'rvmsbx';
} elsif ($isimm || $ismoffs) {
$opvalid .= 'ijnw';
} elsif ($ismem || $isrm) {
$opvalid .= 'm';
}
foreach my $c (split(//, $openc->[$opnum])) {
if (index($opvalid, $c) < 0) {
die "$fname:$line: $opcode: operand $opnum \"$op\": '$c' must be one of '$opvalid'\n";
}
}
$op = join('|',@opx);
push(@ops, $op);
push(@opsize, $opsz);
push(@decos, (@opevex ? join('|', @opevex) : '0'));
@@ -954,17 +995,17 @@ sub show_iflags($) {
#
# Turn a code string into a sequence of bytes
#
sub decodify($$$) {
sub decodify($$$$) {
# Although these are C-syntax strings, by convention they should have
# only octal escapes (for directives) and hexadecimal escapes
# (for verbatim bytes)
my($opcode, $codestr, $flags) = @_;
my($opcode, $codestr, $flags, $opinfo) = @_;
my @codes;
if ($codestr eq 'ignore') {
@codes = ();
} elsif ($codestr =~ /^\s*\[([^\]]*)\]\s*$/) {
@codes = byte_code_compile($opcode, $1, $flags);
@codes = byte_code_compile($opcode, $1, $flags, $opinfo);
} else {
# This really shouldn't happen anymore...
warn "$fname:$line: raw bytecodes?!\n";
@@ -1056,7 +1097,7 @@ sub tupletype($) {
# enter it as e.g. "r+v".
#
sub byte_code_compile($$$$) {
my($opcode, $str, $flags, $oppos) = @_;
my($opcode, $str, $flags, $opinfo) = @_;
my $opr;
my $opc;
my @codes = ();
@@ -1158,14 +1199,49 @@ sub byte_code_compile($$$$) {
$opc = lc($4);
$op = 0;
$oppos = {};
my $oppos = {};
my $openc = [];
if (defined($opinfo)) {
$$opinfo = [$oppos, $openc];
}
for ($i = 0; $i < length($opr); $i++) {
my $c = substr($opr,$i,1);
if ($c eq '+') {
die "$fname:$line: $opcode: invalid use of '+' in '$opr'\n"
if ($op < 1);
$op--;
} elsif ($c =~ /^[rmnvwsijbx-]$/) {
# n means an immediate which is encoded as a memory address,
# but unlike a mem_offs it supports rel encoding on 64 bits.
# w means an immediate to be encoded into the v register
# position.
(my $realc = $c) =~ tr/nw/mv/;
$openc->[$op] = '' unless (defined($openc->[$op]));
$openc->[$op] .= $c;
if (defined($oppos->{$realc})) {
my $what = ($c eq $realc) ? "'$c'" : "[${realc}${c}]";
die "$fname:$line: $opcode: More than one $what operand in '$opr'\n";
}
$oppos->{$realc} = $op unless ($realc eq '-');
$op++;
} else {
$oppos->{$c} = $op++;
}
die "$fname:$line: $opcode: Unknown operand encoding '$c'\n";
}
}
if (defined($oppos->{'m'})) {
if (defined($oppos->{'b'})) {
die "$fname:$line: $opcode: [mn] operand mutually exclusive with 'b'\n";
} elsif (defined($oppos->{'x'})) {
# memory operand + x register operand requires MIB
$flags->{'MIB'}++;
}
}
if (defined($oppos->{'s'}) && defined($oppos->{'i'})) {
die "$fname:$line: $opcode: 's' operand mutually exclusive with 'i'\n";
}
if (defined($oppos->{'j'}) && !defined($oppos->{'i'})) {
die "$fname:$line: $opcode 'j' without 'i' operand\n";
}
$tup = tupletype($tuple);
@@ -1223,7 +1299,7 @@ sub byte_code_compile($$$$) {
$prefix_ok = 0;
} elsif ($op eq '/r') {
if (!defined($oppos->{'r'}) || !defined($oppos->{'m'})) {
die "$fname:$line: $opcode: $op requires r and m operands\n";
die "$fname:$line: $opcode: $op requires 'r' and [mn] operands\n";
}
$opex = (($oppos->{'m'} & 4) ? 06 : 0) |
(($oppos->{'r'} & 4) ? 05 : 0);
@@ -1234,14 +1310,14 @@ sub byte_code_compile($$$$) {
$prefix_ok = 0;
} elsif ($op =~ m:^/([0-7])$:) {
if (!defined($oppos->{'m'})) {
die "$fname:$line: $opcode: $op requires an m operand\n";
die "$fname:$line: $opcode: $op requires an [mn] operand\n";
}
push(@codes, 06) if ($oppos->{'m'} & 4);
push(@codes, 0200 + (($oppos->{'m'} & 3) << 3) + $1);
$prefix_ok = 0;
} elsif ($op =~ m:^/([0-3]?)r([0-7])$:) {
if (!defined($oppos->{'r'})) {
die "$fname:$line: $opcode: $op requires an r operand\n";
die "$fname:$line: $opcode: $op requires an 'r' operand\n";
}
push(@codes, 05) if ($oppos->{'r'} & 4);
push(@codes, 0171);
@@ -1332,7 +1408,7 @@ sub byte_code_compile($$$$) {
$m = $2+0;
} elsif ($oq eq 'nds' || $oq eq 'ndd' || $oq eq 'dds') {
if (!defined($oppos->{'v'})) {
die "$fname:$line: $opcode: $vexname.$oq without 'v' operand\n";
die "$fname:$line: $opcode: $vexname.$oq without [vw] operand\n";
}
$has_nds = 1;
} else {
@@ -1476,7 +1552,7 @@ sub byte_code_compile($$$$) {
$flags->{'ZU_E'}++;
} elsif ($oq =~ /^(nds|ndd|nd|dds)$/) {
if (!defined($oppos->{'v'})) {
die "$fname:$line: $opcode: evex.$oq without 'v' operand\n";
die "$fname:$line: $opcode: evex.$oq without [vw] operand\n";
}
$nds = 1;
$nd = $oq eq 'nd';
@@ -1544,7 +1620,7 @@ sub byte_code_compile($$$$) {
} elsif (defined $imm_codes{$op}) {
if ($op eq 'seg') {
if ($last_imm lt 'i') {
die "$fname:$line: $opcode: seg without an immediate operand\n";
die "$fname:$line: $opcode: seg without an [ij] operand\n";
}
} else {
$last_imm++;