diff --git a/x86/bytecode.txt b/x86/bytecode.txt index 08abf3c3..ee17e495 100644 --- a/x86/bytecode.txt +++ b/x86/bytecode.txt @@ -7,8 +7,22 @@ and consumed by asm/assemble.c and disasm/disasm.c. Values prefixed with \ are in octal, values prefixed with \x are in hexadecimal. -The mnemonics are the ones used in x86/insns.txt, where applicable. +The mnemonics are the ones used in x86/insns.dat, where applicable. +In x86/insns.dat, the encoding slot of each operand is encoded as: + + - implicit operand (no encoding) + x+y multiple encoding slots for one operand + r "r" position in modr/m, or base register with "+r" + m "m" position in modr/m + n immediate encoded in the "m" position in modr/m + b register encoded in the "m" position in modr/m + x register encoded in the "x" position in modr/m + sib (MIB) + v "v" register position in vex/evex + s "s" registe rposition in /is4 + w immediate encoded in the "v" position in vex/evex + i first immediate or mem_offs + j second immediate or mem_offs Codes Mnemonic Explanation diff --git a/x86/insns.dat b/x86/insns.dat index 26b9cae5..4da03ba4 100644 --- a/x86/insns.dat +++ b/x86/insns.dat @@ -96,7 +96,7 @@ $bwdq MOVRS reg#,mem# [rm: evex.nf0.nd0.l0.m4.o# 8a# /r] FUTURE,SM ;# Load effective address $wdq LEA reg#,mem [rm: o# 8d /r] 8086 -$wdq LEA reg#,imm# [rm: o# 8d /r] 8086,ND +$wdq LEA reg#,imm# [rn: o# 8d /r] 8086,ND ;# The basic 8 arithmetic operations $arith nf=nf ADD OR nf=,ADC nf=,SBB AND SUB XOR nf=,!evex,CMP @@ -448,7 +448,7 @@ WBNOINVD void [ f3 0f 09] WBNOINVD,PRIV INVPCID reg32,mem128 [rm: 66 0f38 82 /r] INVPCID,PRIV,NOLONG INVPCID reg64,mem128 [rm: 66 0f38 82 /r] INVPCID,PRIV,LONG -INVPCID reg64,mem128 [rm: evex.nf0.nd0.l0.f3.m4.w1 f2 /r] APX,INVPCID,PRIV,LONG +INVPCID reg64,mem128 [rm: evex.nf0.nd0.l0.f3.m4.w1 f2 /r] APX,INVPCID,PRIV,LONG INVLPG mem [m: 0f 01 /7] 486,PRIV $wdq INVLPGA ax#,reg_ecx [--: a# 0f 01 df] X86_64,AMD INVLPGA void [ adf 0f 01 df] X86_64,AMD,ND @@ -578,7 +578,7 @@ $dq RDGSBASE reg# [m: w# f3 0f ae /1] LONG $dq WRFSBASE reg# [m: w# f3 0f ae /2] LONG $dq WRGSBASE reg# [m: w# f3 0f ae /3] LONG -$zwd ARPL rm16,sel# [mr: optw# 63 /r] 286,PROT,SM,NOLONG +$wd ARPL rm16,reg# [mr: optw# 63 /r] 286,PROT,SM,NOLONG $wdq LAR reg#,rm_sel [rm: optd# 0f 02 /r] 286,PROT $wdq LSL reg#,rm_sel [rm: optd# 0f 03 /r] 286,PROT @@ -984,14 +984,14 @@ FWAIT void [ wait] 8086 XLATB void [ d7] 8086 XLAT void [ d7] 8086,ND -$bwdq CCMPscc spec4,rm#,reg# [vmr: evex.scc.dfv.l0.m4.o# 38# /r ] APX,SM1-2 -$bwdq CCMPscc spec4,reg#,rm# [vrm: evex.scc.dfv.l0.m4.o# 3a# /r ] APX,SM1-2 -$wdq CCMPscc spec4,rm#,sbyte# [vmi: evex.scc.dfv.l0.m4.o# 83 /7 ib,s ] APX,SM1-2 -$bwdq CCMPscc spec4,rm#,imm# [vmi: evex.scc.dfv.l0.m4.o# 80# /7 i# ] APX,SM1-2 +$bwdq CCMPscc spec4,rm#,reg# [wmr: evex.scc.dfv.l0.m4.o# 38# /r ] APX,SM1-2 +$bwdq CCMPscc spec4,reg#,rm# [wrm: evex.scc.dfv.l0.m4.o# 3a# /r ] APX,SM1-2 +$wdq CCMPscc spec4,rm#,sbyte# [wmi: evex.scc.dfv.l0.m4.o# 83 /7 ib,s ] APX,SM1-2 +$bwdq CCMPscc spec4,rm#,imm# [wmi: evex.scc.dfv.l0.m4.o# 80# /7 i# ] APX,SM1-2 -$bwdq CTESTscc spec4,rm#,reg# [vmr: evex.scc.dfv.l0.m4.o# 84# /r ] APX,SM1-2 -$bwdq CTESTscc spec4,rm#,imm# [vmi: evex.scc.dfv.l0.m4.o# f6# /0 i# ] APX,SM1-2 -$bwdq CTESTscc spec4,rm#,imm# [vmi: evex.scc.dfv.l0.m4.o# f6# /1 i# ] APX,SM1-2 +$bwdq CTESTscc spec4,rm#,reg# [wmr: evex.scc.dfv.l0.m4.o# 84# /r ] APX,SM1-2 +$bwdq CTESTscc spec4,rm#,imm# [wmi: evex.scc.dfv.l0.m4.o# f6# /0 i# ] APX,SM1-2 +$bwdq CTESTscc spec4,rm#,imm# [wmi: evex.scc.dfv.l0.m4.o# f6# /1 i# ] APX,SM1-2 ;# Conditional instructions $wdq CMOVcc reg#,rm# [rm: o# 0f 40+c /r] P6,SM @@ -2957,10 +2957,10 @@ VPMASKMOVD ymmreg,ymmreg*,mem256 [rvm: vex.nds.256.66.0f38.w0 8c /r] AVX2 VPMASKMOVQ xmmreg,xmmreg*,mem128 [rvm: vex.nds.128.66.0f38.w1 8c /r] AVX2 VPMASKMOVQ ymmreg,ymmreg*,mem256 [rvm: vex.nds.256.66.0f38.w1 8c /r] AVX2 -VPMASKMOVD mem128,xmmreg*,xmmreg [mvr: vex.nds.128.66.0f38.w0 8e /r] AVX2 -VPMASKMOVD mem256,ymmreg*,ymmreg [mvr: vex.nds.256.66.0f38.w0 8e /r] AVX2 -VPMASKMOVQ mem128,xmmreg*,xmmreg [mvr: vex.nds.128.66.0f38.w1 8e /r] AVX2 -VPMASKMOVQ mem256,ymmreg*,ymmreg [mvr: vex.nds.256.66.0f38.w1 8e /r] AVX2 +VPMASKMOVD mem128,xmmreg,xmmreg [mvr: vex.nds.128.66.0f38.w0 8e /r] AVX2 +VPMASKMOVD mem256,ymmreg,ymmreg [mvr: vex.nds.256.66.0f38.w0 8e /r] AVX2 +VPMASKMOVQ mem128,xmmreg,xmmreg [mvr: vex.nds.128.66.0f38.w1 8e /r] AVX2 +VPMASKMOVQ mem256,ymmreg,ymmreg [mvr: vex.nds.256.66.0f38.w1 8e /r] AVX2 VPSLLVD xmmreg,xmmreg*,xmmrm128 [rvm: vex.nds.128.66.0f38.w0 47 /r] AVX2 VPSLLVQ xmmreg,xmmreg*,xmmrm128 [rvm: vex.nds.128.66.0f38.w1 47 /r] AVX2 @@ -5749,7 +5749,7 @@ VRCPSH xmmreg|mask|z,xmmreg*,xmmrm16|sae [rvm:t1s: evex.nds.lig.66.map6.w0 4d / VREDUCEPH xmmreg|mask|z,xmmrm128|b16,imm8 [rmi:fv: evex.128.np.0f3a.w0 56 /r ib] AVX512FP16,AVX512VL VREDUCEPH ymmreg|mask|z,ymmrm256|b16,imm8 [rmi:fv: evex.256.np.0f3a.w0 56 /r ib] AVX512FP16,AVX512VL VREDUCEPH zmmreg|mask|z,zmmrm512|b16|sae,imm8 [rmi:fv: evex.512.np.0f3a.w0 56 /r ib] AVX512FP16 -VREDUCESH xmmreg|mask|z,xmmreg*,xmmrm16|sae,imm8 [rmvi:t1s: evex.nds.lig.np.0f3a.w0 57 /r ib] AVX512FP16 +VREDUCESH xmmreg|mask|z,xmmreg*,xmmrm16|sae,imm8 [rvmi:t1s: evex.nds.lig.np.0f3a.w0 57 /r ib] AVX512FP16 VENDSCALEPH xmmreg|mask|z,xmmrm128|b16,imm8 [rmi:fv: evex.128.np.0f3a.w0 08 /r ib] AVX512FP16,AVX512VL VENDSCALEPH ymmreg|mask|z,ymmrm256|b16,imm8 [rmi:fv: evex.256.np.0f3a.w0 08 /r ib] AVX512FP16,AVX512VL VENDSCALEPH zmmreg|mask|z,zmmrm512|b16|sae,imm8 [rmi:fv: evex.512.np.0f3a.w0 08 /r ib] AVX512FP16 diff --git a/x86/insns.pl b/x86/insns.pl index ece4540d..65d7e283 100755 --- a/x86/insns.pl +++ b/x86/insns.pl @@ -118,7 +118,7 @@ sub startseq($$) { my $enc = 0; # Legacy my $map = 0; # Map 0 - @codes = decodify(undef, $codestr, {}); + @codes = decodify(undef, $codestr, {}, undef); while (defined($c0 = shift(@codes))) { $c1 = $codes[0]; # The immediate following code @@ -740,7 +740,7 @@ sub format_insn($$$$) { my ($num, $flagsindex); my @bytecode; my ($op, @ops, @opsize, $opp, @opx, @oppx, @decos, @opevex); - my %oppos; + my $opinfo; return (undef, undef) if $operands eq 'ignore'; @@ -751,7 +751,8 @@ sub format_insn($$$$) { set_implied_flags(\%flags); # Generate byte code. This may modify the flags. - @bytecode = (decodify($opcode, $codes, \%flags, \%oppos), 0); + @bytecode = (decodify($opcode, $codes, \%flags, \$opinfo), 0); + my($oppos, $openc) = @$opinfo; push(@bytecode_list, [@bytecode]); $codes = hexstr(@bytecode); count_bytecodes(@bytecode); @@ -766,8 +767,13 @@ sub format_insn($$$$) { @opsize = (); @decos = (); if ($operands ne 'void') { - my $opnum = scalar(@ops); foreach $op (split(/,/, $operands)) { + my $opnum = scalar(@ops); + my $isreg = 0; + my $ismem = 0; + my $ismoffs = 0; + my $isimm = 0; + my $isrm = 0; my $iszero = 0; my $opsz = 0; @opx = (); @@ -778,6 +784,8 @@ sub format_insn($$$$) { push(@opevex, $1); } + $opp =~ s/^reg([0-9]*)na$/reg_na$1/; + if ($opp =~ s/([^0-9]0?)(8|16|32|64|80|128|256|512|1024|1k)$/$1/) { push(@oppx, "bits$2"); $opsz = $1 + 0; @@ -789,35 +797,68 @@ sub format_insn($$$$) { $opp .= 'reg'; } } - $opp =~ s/^mem$/memory/; + $opp =~ s/^memory_offs$/mem_offs/; + $opp =~ s/^mem$/memory/; + if ($opp =~ s/^(spec|imm)4$/$1/) { push(@oppx, 'fourbits'); + $isimm = 1; } - $opp =~ s/^spec$/immediate/; # Immediate or special immediate - $opp =~ s/^imm$/imm_normal/; # Normal immediates only + $opp =~ s/^spec$/immediate/; # Special or normal immediate + $opp =~ s/^imm$/imm_normal/; # Normal immediate only if ($opp =~ /^(unity|sbyted?word|[su]dword)$/) { push(@oppx, 'imm_normal'); + $isimm = 1; + } + if ($opp =~ /^imm/) { + $isimm = 1; } $opp =~ s/^([a-z]+)rm$/rm_$1/; $opp =~ s/^(rm|reg)$/$1_gpr/; $opp =~ s/^rm_k$/rm_opmask/; $opp =~ s/^kreg$/opmaskreg/; - my $isreg = ($opp =~ /(\brm_|\breg_|reg\b)/); - my $isrm = $isreg || ($opp =~ /\bmem/); - my $isvec = ($opp =~ /\b[xyzt]mm/); - if ($isrm && + if ($opp =~ /\brm_/) { + $isrm = 1; + } elsif ($opp =~ /(\breg_|reg\b)/) { + $isreg = 1; + } elsif ($opp =~ /\b[xyzt]?mem/) { + $ismem = 1; + } + if ($opp =~ /\bmem_offs/) { + $ismoffs = 1; + } + if ($opp =~ /\b[xyzt]mm/) { + $isvec = 1; + } + if (($isrm || ($ismem && !$ismoffs) || $isreg) && !(($flags{'EVEX'} && $isvec) || !$flags{'NOAPX'})) { # Register numbers >= 16 disallowed push(@oppx, 'rn_l16'); } - if ($isreg && $isvec && - defined($oppos->{'b'}) && $opnum == $oppos->{'b'}) { + if ($isreg && $isvec && $openc->[$opnum] =~ /b/) { $flags{'MOPVEC'}++; } push(@opx, $opp, @oppx) if $opp; } - $op = join('|', @opx); + + # Sanity-check the encoding of this operand + my $opvalid = '-'; + if ($isreg) { + $opvalid .= 'rvmsbx'; + } elsif ($isimm || $ismoffs) { + $opvalid .= 'ijnw'; + } elsif ($ismem || $isrm) { + $opvalid .= 'm'; + } + + foreach my $c (split(//, $openc->[$opnum])) { + if (index($opvalid, $c) < 0) { + die "$fname:$line: $opcode: operand $opnum \"$op\": '$c' must be one of '$opvalid'\n"; + } + } + + $op = join('|',@opx); push(@ops, $op); push(@opsize, $opsz); push(@decos, (@opevex ? join('|', @opevex) : '0')); @@ -954,17 +995,17 @@ sub show_iflags($) { # # Turn a code string into a sequence of bytes # -sub decodify($$$) { +sub decodify($$$$) { # Although these are C-syntax strings, by convention they should have # only octal escapes (for directives) and hexadecimal escapes # (for verbatim bytes) - my($opcode, $codestr, $flags) = @_; + my($opcode, $codestr, $flags, $opinfo) = @_; my @codes; if ($codestr eq 'ignore') { @codes = (); } elsif ($codestr =~ /^\s*\[([^\]]*)\]\s*$/) { - @codes = byte_code_compile($opcode, $1, $flags); + @codes = byte_code_compile($opcode, $1, $flags, $opinfo); } else { # This really shouldn't happen anymore... warn "$fname:$line: raw bytecodes?!\n"; @@ -1056,7 +1097,7 @@ sub tupletype($) { # enter it as e.g. "r+v". # sub byte_code_compile($$$$) { - my($opcode, $str, $flags, $oppos) = @_; + my($opcode, $str, $flags, $opinfo) = @_; my $opr; my $opc; my @codes = (); @@ -1158,14 +1199,49 @@ sub byte_code_compile($$$$) { $opc = lc($4); $op = 0; - $oppos = {}; + my $oppos = {}; + my $openc = []; + if (defined($opinfo)) { + $$opinfo = [$oppos, $openc]; + } for ($i = 0; $i < length($opr); $i++) { my $c = substr($opr,$i,1); if ($c eq '+') { + die "$fname:$line: $opcode: invalid use of '+' in '$opr'\n" + if ($op < 1); $op--; + } elsif ($c =~ /^[rmnvwsijbx-]$/) { + # n means an immediate which is encoded as a memory address, + # but unlike a mem_offs it supports rel encoding on 64 bits. + # w means an immediate to be encoded into the v register + # position. + (my $realc = $c) =~ tr/nw/mv/; + $openc->[$op] = '' unless (defined($openc->[$op])); + $openc->[$op] .= $c; + if (defined($oppos->{$realc})) { + my $what = ($c eq $realc) ? "'$c'" : "[${realc}${c}]"; + die "$fname:$line: $opcode: More than one $what operand in '$opr'\n"; + } + $oppos->{$realc} = $op unless ($realc eq '-'); + $op++; } else { - $oppos->{$c} = $op++; - } + die "$fname:$line: $opcode: Unknown operand encoding '$c'\n"; + } + } + + if (defined($oppos->{'m'})) { + if (defined($oppos->{'b'})) { + die "$fname:$line: $opcode: [mn] operand mutually exclusive with 'b'\n"; + } elsif (defined($oppos->{'x'})) { + # memory operand + x register operand requires MIB + $flags->{'MIB'}++; + } + } + if (defined($oppos->{'s'}) && defined($oppos->{'i'})) { + die "$fname:$line: $opcode: 's' operand mutually exclusive with 'i'\n"; + } + if (defined($oppos->{'j'}) && !defined($oppos->{'i'})) { + die "$fname:$line: $opcode 'j' without 'i' operand\n"; } $tup = tupletype($tuple); @@ -1223,7 +1299,7 @@ sub byte_code_compile($$$$) { $prefix_ok = 0; } elsif ($op eq '/r') { if (!defined($oppos->{'r'}) || !defined($oppos->{'m'})) { - die "$fname:$line: $opcode: $op requires r and m operands\n"; + die "$fname:$line: $opcode: $op requires 'r' and [mn] operands\n"; } $opex = (($oppos->{'m'} & 4) ? 06 : 0) | (($oppos->{'r'} & 4) ? 05 : 0); @@ -1234,14 +1310,14 @@ sub byte_code_compile($$$$) { $prefix_ok = 0; } elsif ($op =~ m:^/([0-7])$:) { if (!defined($oppos->{'m'})) { - die "$fname:$line: $opcode: $op requires an m operand\n"; + die "$fname:$line: $opcode: $op requires an [mn] operand\n"; } push(@codes, 06) if ($oppos->{'m'} & 4); push(@codes, 0200 + (($oppos->{'m'} & 3) << 3) + $1); $prefix_ok = 0; } elsif ($op =~ m:^/([0-3]?)r([0-7])$:) { if (!defined($oppos->{'r'})) { - die "$fname:$line: $opcode: $op requires an r operand\n"; + die "$fname:$line: $opcode: $op requires an 'r' operand\n"; } push(@codes, 05) if ($oppos->{'r'} & 4); push(@codes, 0171); @@ -1332,7 +1408,7 @@ sub byte_code_compile($$$$) { $m = $2+0; } elsif ($oq eq 'nds' || $oq eq 'ndd' || $oq eq 'dds') { if (!defined($oppos->{'v'})) { - die "$fname:$line: $opcode: $vexname.$oq without 'v' operand\n"; + die "$fname:$line: $opcode: $vexname.$oq without [vw] operand\n"; } $has_nds = 1; } else { @@ -1476,7 +1552,7 @@ sub byte_code_compile($$$$) { $flags->{'ZU_E'}++; } elsif ($oq =~ /^(nds|ndd|nd|dds)$/) { if (!defined($oppos->{'v'})) { - die "$fname:$line: $opcode: evex.$oq without 'v' operand\n"; + die "$fname:$line: $opcode: evex.$oq without [vw] operand\n"; } $nds = 1; $nd = $oq eq 'nd'; @@ -1544,7 +1620,7 @@ sub byte_code_compile($$$$) { } elsif (defined $imm_codes{$op}) { if ($op eq 'seg') { if ($last_imm lt 'i') { - die "$fname:$line: $opcode: seg without an immediate operand\n"; + die "$fname:$line: $opcode: seg without an [ij] operand\n"; } } else { $last_imm++;