From 48f7170772c1e7da64be586a7b51b2929d837e37 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 30 Sep 2025 10:48:13 -0700 Subject: [PATCH] insns.pl: sanity-check that instruction encodings match operands Error out if an encoding position is invalid, like an "r" operand matches an "xmmrm" operand. Document the instruction encoding symbols; there are too many of them by now. Add symbols 'n' and 'w' meaning immediates that are supposed to be encoded as if they were 'm' memory addresses and 'v' register numbers, respectively; this is necessary to indicate a validation exception. Remove broken ARPL "memory-like" encoding. It probably never worked anyway. This verification caught two bugs already: - VPMASKMOV[DQ] cannot omit the second operand. - Incorrect operand encoding order for VREDUCESH. Signed-off-by: H. Peter Anvin (Intel) --- x86/bytecode.txt | 16 +++++- x86/insns.dat | 30 +++++------ x86/insns.pl | 130 +++++++++++++++++++++++++++++++++++++---------- 3 files changed, 133 insertions(+), 43 deletions(-) diff --git a/x86/bytecode.txt b/x86/bytecode.txt index 08abf3c3..ee17e495 100644 --- a/x86/bytecode.txt +++ b/x86/bytecode.txt @@ -7,8 +7,22 @@ and consumed by asm/assemble.c and disasm/disasm.c. Values prefixed with \ are in octal, values prefixed with \x are in hexadecimal. -The mnemonics are the ones used in x86/insns.txt, where applicable. +The mnemonics are the ones used in x86/insns.dat, where applicable. +In x86/insns.dat, the encoding slot of each operand is encoded as: + + - implicit operand (no encoding) + x+y multiple encoding slots for one operand + r "r" position in modr/m, or base register with "+r" + m "m" position in modr/m + n immediate encoded in the "m" position in modr/m + b register encoded in the "m" position in modr/m + x register encoded in the "x" position in modr/m + sib (MIB) + v "v" register position in vex/evex + s "s" registe rposition in /is4 + w immediate encoded in the "v" position in vex/evex + i first immediate or mem_offs + j second immediate or mem_offs Codes Mnemonic Explanation diff --git a/x86/insns.dat b/x86/insns.dat index 26b9cae5..4da03ba4 100644 --- a/x86/insns.dat +++ b/x86/insns.dat @@ -96,7 +96,7 @@ $bwdq MOVRS reg#,mem# [rm: evex.nf0.nd0.l0.m4.o# 8a# /r] FUTURE,SM ;# Load effective address $wdq LEA reg#,mem [rm: o# 8d /r] 8086 -$wdq LEA reg#,imm# [rm: o# 8d /r] 8086,ND +$wdq LEA reg#,imm# [rn: o# 8d /r] 8086,ND ;# The basic 8 arithmetic operations $arith nf=nf ADD OR nf=,ADC nf=,SBB AND SUB XOR nf=,!evex,CMP @@ -448,7 +448,7 @@ WBNOINVD void [ f3 0f 09] WBNOINVD,PRIV INVPCID reg32,mem128 [rm: 66 0f38 82 /r] INVPCID,PRIV,NOLONG INVPCID reg64,mem128 [rm: 66 0f38 82 /r] INVPCID,PRIV,LONG -INVPCID reg64,mem128 [rm: evex.nf0.nd0.l0.f3.m4.w1 f2 /r] APX,INVPCID,PRIV,LONG +INVPCID reg64,mem128 [rm: evex.nf0.nd0.l0.f3.m4.w1 f2 /r] APX,INVPCID,PRIV,LONG INVLPG mem [m: 0f 01 /7] 486,PRIV $wdq INVLPGA ax#,reg_ecx [--: a# 0f 01 df] X86_64,AMD INVLPGA void [ adf 0f 01 df] X86_64,AMD,ND @@ -578,7 +578,7 @@ $dq RDGSBASE reg# [m: w# f3 0f ae /1] LONG $dq WRFSBASE reg# [m: w# f3 0f ae /2] LONG $dq WRGSBASE reg# [m: w# f3 0f ae /3] LONG -$zwd ARPL rm16,sel# [mr: optw# 63 /r] 286,PROT,SM,NOLONG +$wd ARPL rm16,reg# [mr: optw# 63 /r] 286,PROT,SM,NOLONG $wdq LAR reg#,rm_sel [rm: optd# 0f 02 /r] 286,PROT $wdq LSL reg#,rm_sel [rm: optd# 0f 03 /r] 286,PROT @@ -984,14 +984,14 @@ FWAIT void [ wait] 8086 XLATB void [ d7] 8086 XLAT void [ d7] 8086,ND -$bwdq CCMPscc spec4,rm#,reg# [vmr: evex.scc.dfv.l0.m4.o# 38# /r ] APX,SM1-2 -$bwdq CCMPscc spec4,reg#,rm# [vrm: evex.scc.dfv.l0.m4.o# 3a# /r ] APX,SM1-2 -$wdq CCMPscc spec4,rm#,sbyte# [vmi: evex.scc.dfv.l0.m4.o# 83 /7 ib,s ] APX,SM1-2 -$bwdq CCMPscc spec4,rm#,imm# [vmi: evex.scc.dfv.l0.m4.o# 80# /7 i# ] APX,SM1-2 +$bwdq CCMPscc spec4,rm#,reg# [wmr: evex.scc.dfv.l0.m4.o# 38# /r ] APX,SM1-2 +$bwdq CCMPscc spec4,reg#,rm# [wrm: evex.scc.dfv.l0.m4.o# 3a# /r ] APX,SM1-2 +$wdq CCMPscc spec4,rm#,sbyte# [wmi: evex.scc.dfv.l0.m4.o# 83 /7 ib,s ] APX,SM1-2 +$bwdq CCMPscc spec4,rm#,imm# [wmi: evex.scc.dfv.l0.m4.o# 80# /7 i# ] APX,SM1-2 -$bwdq CTESTscc spec4,rm#,reg# [vmr: evex.scc.dfv.l0.m4.o# 84# /r ] APX,SM1-2 -$bwdq CTESTscc spec4,rm#,imm# [vmi: evex.scc.dfv.l0.m4.o# f6# /0 i# ] APX,SM1-2 -$bwdq CTESTscc spec4,rm#,imm# [vmi: evex.scc.dfv.l0.m4.o# f6# /1 i# ] APX,SM1-2 +$bwdq CTESTscc spec4,rm#,reg# [wmr: evex.scc.dfv.l0.m4.o# 84# /r ] APX,SM1-2 +$bwdq CTESTscc spec4,rm#,imm# [wmi: evex.scc.dfv.l0.m4.o# f6# /0 i# ] APX,SM1-2 +$bwdq CTESTscc spec4,rm#,imm# [wmi: evex.scc.dfv.l0.m4.o# f6# /1 i# ] APX,SM1-2 ;# Conditional instructions $wdq CMOVcc reg#,rm# [rm: o# 0f 40+c /r] P6,SM @@ -2957,10 +2957,10 @@ VPMASKMOVD ymmreg,ymmreg*,mem256 [rvm: vex.nds.256.66.0f38.w0 8c /r] AVX2 VPMASKMOVQ xmmreg,xmmreg*,mem128 [rvm: vex.nds.128.66.0f38.w1 8c /r] AVX2 VPMASKMOVQ ymmreg,ymmreg*,mem256 [rvm: vex.nds.256.66.0f38.w1 8c /r] AVX2 -VPMASKMOVD mem128,xmmreg*,xmmreg [mvr: vex.nds.128.66.0f38.w0 8e /r] AVX2 -VPMASKMOVD mem256,ymmreg*,ymmreg [mvr: vex.nds.256.66.0f38.w0 8e /r] AVX2 -VPMASKMOVQ mem128,xmmreg*,xmmreg [mvr: vex.nds.128.66.0f38.w1 8e /r] AVX2 -VPMASKMOVQ mem256,ymmreg*,ymmreg [mvr: vex.nds.256.66.0f38.w1 8e /r] AVX2 +VPMASKMOVD mem128,xmmreg,xmmreg [mvr: vex.nds.128.66.0f38.w0 8e /r] AVX2 +VPMASKMOVD mem256,ymmreg,ymmreg [mvr: vex.nds.256.66.0f38.w0 8e /r] AVX2 +VPMASKMOVQ mem128,xmmreg,xmmreg [mvr: vex.nds.128.66.0f38.w1 8e /r] AVX2 +VPMASKMOVQ mem256,ymmreg,ymmreg [mvr: vex.nds.256.66.0f38.w1 8e /r] AVX2 VPSLLVD xmmreg,xmmreg*,xmmrm128 [rvm: vex.nds.128.66.0f38.w0 47 /r] AVX2 VPSLLVQ xmmreg,xmmreg*,xmmrm128 [rvm: vex.nds.128.66.0f38.w1 47 /r] AVX2 @@ -5749,7 +5749,7 @@ VRCPSH xmmreg|mask|z,xmmreg*,xmmrm16|sae [rvm:t1s: evex.nds.lig.66.map6.w0 4d / VREDUCEPH xmmreg|mask|z,xmmrm128|b16,imm8 [rmi:fv: evex.128.np.0f3a.w0 56 /r ib] AVX512FP16,AVX512VL VREDUCEPH ymmreg|mask|z,ymmrm256|b16,imm8 [rmi:fv: evex.256.np.0f3a.w0 56 /r ib] AVX512FP16,AVX512VL VREDUCEPH zmmreg|mask|z,zmmrm512|b16|sae,imm8 [rmi:fv: evex.512.np.0f3a.w0 56 /r ib] AVX512FP16 -VREDUCESH xmmreg|mask|z,xmmreg*,xmmrm16|sae,imm8 [rmvi:t1s: evex.nds.lig.np.0f3a.w0 57 /r ib] AVX512FP16 +VREDUCESH xmmreg|mask|z,xmmreg*,xmmrm16|sae,imm8 [rvmi:t1s: evex.nds.lig.np.0f3a.w0 57 /r ib] AVX512FP16 VENDSCALEPH xmmreg|mask|z,xmmrm128|b16,imm8 [rmi:fv: evex.128.np.0f3a.w0 08 /r ib] AVX512FP16,AVX512VL VENDSCALEPH ymmreg|mask|z,ymmrm256|b16,imm8 [rmi:fv: evex.256.np.0f3a.w0 08 /r ib] AVX512FP16,AVX512VL VENDSCALEPH zmmreg|mask|z,zmmrm512|b16|sae,imm8 [rmi:fv: evex.512.np.0f3a.w0 08 /r ib] AVX512FP16 diff --git a/x86/insns.pl b/x86/insns.pl index ece4540d..65d7e283 100755 --- a/x86/insns.pl +++ b/x86/insns.pl @@ -118,7 +118,7 @@ sub startseq($$) { my $enc = 0; # Legacy my $map = 0; # Map 0 - @codes = decodify(undef, $codestr, {}); + @codes = decodify(undef, $codestr, {}, undef); while (defined($c0 = shift(@codes))) { $c1 = $codes[0]; # The immediate following code @@ -740,7 +740,7 @@ sub format_insn($$$$) { my ($num, $flagsindex); my @bytecode; my ($op, @ops, @opsize, $opp, @opx, @oppx, @decos, @opevex); - my %oppos; + my $opinfo; return (undef, undef) if $operands eq 'ignore'; @@ -751,7 +751,8 @@ sub format_insn($$$$) { set_implied_flags(\%flags); # Generate byte code. This may modify the flags. - @bytecode = (decodify($opcode, $codes, \%flags, \%oppos), 0); + @bytecode = (decodify($opcode, $codes, \%flags, \$opinfo), 0); + my($oppos, $openc) = @$opinfo; push(@bytecode_list, [@bytecode]); $codes = hexstr(@bytecode); count_bytecodes(@bytecode); @@ -766,8 +767,13 @@ sub format_insn($$$$) { @opsize = (); @decos = (); if ($operands ne 'void') { - my $opnum = scalar(@ops); foreach $op (split(/,/, $operands)) { + my $opnum = scalar(@ops); + my $isreg = 0; + my $ismem = 0; + my $ismoffs = 0; + my $isimm = 0; + my $isrm = 0; my $iszero = 0; my $opsz = 0; @opx = (); @@ -778,6 +784,8 @@ sub format_insn($$$$) { push(@opevex, $1); } + $opp =~ s/^reg([0-9]*)na$/reg_na$1/; + if ($opp =~ s/([^0-9]0?)(8|16|32|64|80|128|256|512|1024|1k)$/$1/) { push(@oppx, "bits$2"); $opsz = $1 + 0; @@ -789,35 +797,68 @@ sub format_insn($$$$) { $opp .= 'reg'; } } - $opp =~ s/^mem$/memory/; + $opp =~ s/^memory_offs$/mem_offs/; + $opp =~ s/^mem$/memory/; + if ($opp =~ s/^(spec|imm)4$/$1/) { push(@oppx, 'fourbits'); + $isimm = 1; } - $opp =~ s/^spec$/immediate/; # Immediate or special immediate - $opp =~ s/^imm$/imm_normal/; # Normal immediates only + $opp =~ s/^spec$/immediate/; # Special or normal immediate + $opp =~ s/^imm$/imm_normal/; # Normal immediate only if ($opp =~ /^(unity|sbyted?word|[su]dword)$/) { push(@oppx, 'imm_normal'); + $isimm = 1; + } + if ($opp =~ /^imm/) { + $isimm = 1; } $opp =~ s/^([a-z]+)rm$/rm_$1/; $opp =~ s/^(rm|reg)$/$1_gpr/; $opp =~ s/^rm_k$/rm_opmask/; $opp =~ s/^kreg$/opmaskreg/; - my $isreg = ($opp =~ /(\brm_|\breg_|reg\b)/); - my $isrm = $isreg || ($opp =~ /\bmem/); - my $isvec = ($opp =~ /\b[xyzt]mm/); - if ($isrm && + if ($opp =~ /\brm_/) { + $isrm = 1; + } elsif ($opp =~ /(\breg_|reg\b)/) { + $isreg = 1; + } elsif ($opp =~ /\b[xyzt]?mem/) { + $ismem = 1; + } + if ($opp =~ /\bmem_offs/) { + $ismoffs = 1; + } + if ($opp =~ /\b[xyzt]mm/) { + $isvec = 1; + } + if (($isrm || ($ismem && !$ismoffs) || $isreg) && !(($flags{'EVEX'} && $isvec) || !$flags{'NOAPX'})) { # Register numbers >= 16 disallowed push(@oppx, 'rn_l16'); } - if ($isreg && $isvec && - defined($oppos->{'b'}) && $opnum == $oppos->{'b'}) { + if ($isreg && $isvec && $openc->[$opnum] =~ /b/) { $flags{'MOPVEC'}++; } push(@opx, $opp, @oppx) if $opp; } - $op = join('|', @opx); + + # Sanity-check the encoding of this operand + my $opvalid = '-'; + if ($isreg) { + $opvalid .= 'rvmsbx'; + } elsif ($isimm || $ismoffs) { + $opvalid .= 'ijnw'; + } elsif ($ismem || $isrm) { + $opvalid .= 'm'; + } + + foreach my $c (split(//, $openc->[$opnum])) { + if (index($opvalid, $c) < 0) { + die "$fname:$line: $opcode: operand $opnum \"$op\": '$c' must be one of '$opvalid'\n"; + } + } + + $op = join('|',@opx); push(@ops, $op); push(@opsize, $opsz); push(@decos, (@opevex ? join('|', @opevex) : '0')); @@ -954,17 +995,17 @@ sub show_iflags($) { # # Turn a code string into a sequence of bytes # -sub decodify($$$) { +sub decodify($$$$) { # Although these are C-syntax strings, by convention they should have # only octal escapes (for directives) and hexadecimal escapes # (for verbatim bytes) - my($opcode, $codestr, $flags) = @_; + my($opcode, $codestr, $flags, $opinfo) = @_; my @codes; if ($codestr eq 'ignore') { @codes = (); } elsif ($codestr =~ /^\s*\[([^\]]*)\]\s*$/) { - @codes = byte_code_compile($opcode, $1, $flags); + @codes = byte_code_compile($opcode, $1, $flags, $opinfo); } else { # This really shouldn't happen anymore... warn "$fname:$line: raw bytecodes?!\n"; @@ -1056,7 +1097,7 @@ sub tupletype($) { # enter it as e.g. "r+v". # sub byte_code_compile($$$$) { - my($opcode, $str, $flags, $oppos) = @_; + my($opcode, $str, $flags, $opinfo) = @_; my $opr; my $opc; my @codes = (); @@ -1158,14 +1199,49 @@ sub byte_code_compile($$$$) { $opc = lc($4); $op = 0; - $oppos = {}; + my $oppos = {}; + my $openc = []; + if (defined($opinfo)) { + $$opinfo = [$oppos, $openc]; + } for ($i = 0; $i < length($opr); $i++) { my $c = substr($opr,$i,1); if ($c eq '+') { + die "$fname:$line: $opcode: invalid use of '+' in '$opr'\n" + if ($op < 1); $op--; + } elsif ($c =~ /^[rmnvwsijbx-]$/) { + # n means an immediate which is encoded as a memory address, + # but unlike a mem_offs it supports rel encoding on 64 bits. + # w means an immediate to be encoded into the v register + # position. + (my $realc = $c) =~ tr/nw/mv/; + $openc->[$op] = '' unless (defined($openc->[$op])); + $openc->[$op] .= $c; + if (defined($oppos->{$realc})) { + my $what = ($c eq $realc) ? "'$c'" : "[${realc}${c}]"; + die "$fname:$line: $opcode: More than one $what operand in '$opr'\n"; + } + $oppos->{$realc} = $op unless ($realc eq '-'); + $op++; } else { - $oppos->{$c} = $op++; - } + die "$fname:$line: $opcode: Unknown operand encoding '$c'\n"; + } + } + + if (defined($oppos->{'m'})) { + if (defined($oppos->{'b'})) { + die "$fname:$line: $opcode: [mn] operand mutually exclusive with 'b'\n"; + } elsif (defined($oppos->{'x'})) { + # memory operand + x register operand requires MIB + $flags->{'MIB'}++; + } + } + if (defined($oppos->{'s'}) && defined($oppos->{'i'})) { + die "$fname:$line: $opcode: 's' operand mutually exclusive with 'i'\n"; + } + if (defined($oppos->{'j'}) && !defined($oppos->{'i'})) { + die "$fname:$line: $opcode 'j' without 'i' operand\n"; } $tup = tupletype($tuple); @@ -1223,7 +1299,7 @@ sub byte_code_compile($$$$) { $prefix_ok = 0; } elsif ($op eq '/r') { if (!defined($oppos->{'r'}) || !defined($oppos->{'m'})) { - die "$fname:$line: $opcode: $op requires r and m operands\n"; + die "$fname:$line: $opcode: $op requires 'r' and [mn] operands\n"; } $opex = (($oppos->{'m'} & 4) ? 06 : 0) | (($oppos->{'r'} & 4) ? 05 : 0); @@ -1234,14 +1310,14 @@ sub byte_code_compile($$$$) { $prefix_ok = 0; } elsif ($op =~ m:^/([0-7])$:) { if (!defined($oppos->{'m'})) { - die "$fname:$line: $opcode: $op requires an m operand\n"; + die "$fname:$line: $opcode: $op requires an [mn] operand\n"; } push(@codes, 06) if ($oppos->{'m'} & 4); push(@codes, 0200 + (($oppos->{'m'} & 3) << 3) + $1); $prefix_ok = 0; } elsif ($op =~ m:^/([0-3]?)r([0-7])$:) { if (!defined($oppos->{'r'})) { - die "$fname:$line: $opcode: $op requires an r operand\n"; + die "$fname:$line: $opcode: $op requires an 'r' operand\n"; } push(@codes, 05) if ($oppos->{'r'} & 4); push(@codes, 0171); @@ -1332,7 +1408,7 @@ sub byte_code_compile($$$$) { $m = $2+0; } elsif ($oq eq 'nds' || $oq eq 'ndd' || $oq eq 'dds') { if (!defined($oppos->{'v'})) { - die "$fname:$line: $opcode: $vexname.$oq without 'v' operand\n"; + die "$fname:$line: $opcode: $vexname.$oq without [vw] operand\n"; } $has_nds = 1; } else { @@ -1476,7 +1552,7 @@ sub byte_code_compile($$$$) { $flags->{'ZU_E'}++; } elsif ($oq =~ /^(nds|ndd|nd|dds)$/) { if (!defined($oppos->{'v'})) { - die "$fname:$line: $opcode: evex.$oq without 'v' operand\n"; + die "$fname:$line: $opcode: evex.$oq without [vw] operand\n"; } $nds = 1; $nd = $oq eq 'nd'; @@ -1544,7 +1620,7 @@ sub byte_code_compile($$$$) { } elsif (defined $imm_codes{$op}) { if ($op eq 'seg') { if ($last_imm lt 'i') { - die "$fname:$line: $opcode: seg without an immediate operand\n"; + die "$fname:$line: $opcode: seg without an [ij] operand\n"; } } else { $last_imm++;