0
0
mirror of https://github.com/netwide-assembler/nasm.git synced 2025-11-08 23:27:15 -05:00
Files
nasm/x86/insns.pl
H. Peter Anvin (Intel) a7457e66cf Fix matching of branch instructions with prefixes and sizes
Matching of branch instructions with prefixes and sizes is, to say the
least, tricky. Work through it, and add a new macro to help.

Fixes: https://github.com/netwide-assembler/nasm/issues/144
Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
2025-10-10 13:07:46 -07:00

1669 lines
47 KiB
Perl
Executable File

#!/usr/bin/perl
# SPDX-License-Identifier: BSD-2-Clause
# Copyright 1996-2025 The NASM Authors - All Rights Reserved
#
# insns.pl
#
# Parse insns.dat and produce generated source code files
#
# See x86/bytecode.txt for the defintion of the byte code
# output to x86/insnsb.c.
#
require 'x86/insns-iflags.ph';
# Create disassembly root tables
my @vex_class = ( 'novex', 'vex', 'xop', 'evex', 'rex2' );
my $vex_classes = scalar(@vex_class);
my @map_count = ( 4, 32, 24, 8, 2 );
my @map_start = ( 0, 0, 8, 0, 0 );
my @map_max;
for ($c = 0; $c < $vex_classes; $c++) {
push(@distable, [(undef) x $map_start[$c]]);
push(@map_max, $map_start[$c] + $map_count[$c] - 1);
for ($m = $map_start[$c]; $m <= $map_max[$c]; $m++) {
push(@{$distable[$c]}, {});
}
}
@disasm_prefixes = (@vexlist, @disasm_prefixes, '');
%disasm_prefixes = map { $_ => 1 } @disasm_prefixes;
@bytecode_count = (0) x 256;
# Push to an array reference, creating the array if needed
sub xpush($@) {
my $ref = shift @_;
$$ref = [] unless (defined($$ref));
return push(@$$ref, @_);
}
#
# Here we determine the set of possible [encoding, map, opcode] sets
# for a given instruction, used to generate the disassembler tables.
#
# It is only necessary to take into account the following byte codes:
# \[1234] mean literal bytes, of course
# \1[0123] mean byte plus register value
# \30[0123] 0F 1[8-F]
# \35[01] REX2 prefix
# \35[567] legacy map number
# \26x \270 VEX prefix and map
# \24x \250 EVEX prefix and map
# prefixes ignored
#
my @skip_bytecode = ((0) x 256);
map { $skip_bytecode[$_] = 1; }
(05...07, 014...017, 0271...0273, 0310...0337,
0341...0347, 0360...0372, 0374...0376);
# To assist with debugging...
my %skipped_due_to;
# Create combinatorial sets of possible encodings
sub genseqs($$$@) {
my($flags, $enc, $map, @opcodes) = @_;
my @encs = ($enc);
if ($enc == 0 && $map < 2 && $flags !~ /\bNO(?:APX|REX)\b/) {
push(@encs, 4); # Allow REX2 encoding
}
my @seqs = ();
foreach my $enc (@encs) {
push(@seqs, map { [$enc, $map, $_] } @opcodes);
}
return @seqs;
}
sub startseq($$) {
my ($codestr,$flags) = @_;
my @codes = ();
my $c = $codestr;
my($c0, $c1, $i);
my $prefix = '';
my $enc = 0; # Legacy
my $map = 0; # Map 0
@codes = decodify(undef, $codestr, {}, undef);
while (defined($c0 = shift(@codes))) {
$c1 = $codes[0]; # The immediate following code
if ($c0 >= 01 && $c0 <= 04) {
# Fixed byte string, this should be the opcode
return genseqs($flags, $enc, $map, $c1);
} elsif ($c0 >= 010 && $c0 <= 013) {
return genseqs($flags, $enc, $map, $c1...($c1+7));
} elsif (($c0 & ~013) == 0144) {
return genseqs($flags, $enc, $map, $c1, $c1|2);
} elsif (($c0 & ~3) == 0300) {
return genseqs($flags, $enc, $map, 0x18...0x1f);
} elsif ($c0 >= 0355 && $c0 <= 0357) {
$map = $c0 - 0354;
} elsif (($c0 & ~1) == 0350) {
# REX2
$enc = 4; # rex2 required
} elsif (($c0 & ~3) == 0260 || $c0 == 0270) {
# VEX/XOP
my($cm,$wlp);
$cm = shift(@codes);
$wlp = shift(@codes);
$enc = (($cm >> 6) & 1) + 1; # vex or xop
$map = $cm & 31;
} elsif (($c0 & ~3) == 0240 || $c0 == 0250) {
# EVEX
my @p;
push(@p, shift(@codes));
push(@p, shift(@codes));
push(@p, shift(@codes));
my $tuple = shift(@codes);
$map = $p[0] & 7;
$enc = 3; # evex
} elsif (!$skip_bytecode[$c0]) {
# This cannot be an opcode
$skipped_due_to{$c0}++;
last;
}
}
return ();
}
# Generate relaxed form patterns if applicable
# * is used for an optional source operand, duplicating the previous one
# in the encoding if it is missing.
# ? is used for an optional destination operand which is not encoded if
# missing; if combined with evex.ndx set the nd bit if present.
sub relaxed_forms(@) {
my @field_list = ();
foreach my $fields (@_) {
if ($fields->[1] !~ /([\*\?])/) {
push(@field_list, $fields);
next;
}
my $flag = $1;
# This instruction has relaxed form(s)
if ($fields->[2] !~ /^(\[\s*)([^:\s]+)(\s*:.*\])$/) {
warn "$fname:$line: has a $flag operand but uses raw bytecodes\n";
push(@field_list, $fields);
next;
}
# Subfields of the instruction encoding field;
# [1] is the encoding of the operands
my @f2o = ($1, $2, $3);
my $opmask = 0;
my $ndmask = 0;
my $ndflag = 0;
my @ops = split(/,/, $fields->[1]);
for (my $oi = 0; $oi < scalar @ops; $oi++) {
if ($ops[$oi] =~ /\*$/) {
if ($oi == 0) {
warn "$fname:$line: has a first operand with a *\n";
next;
}
$opmask |= 1 << $oi;
} elsif ($ops[$oi] =~ /\?$/) {
$opmask |= 1 << $oi;
$ndmask |= 1 << $oi;
$ndflag = 1;
}
}
# If .ndx is present, then change it to .nd0 or .nd1
# Set to .nd0 if no ndmask fields are present, otherwise 1
# (This line applies to the full form instruction; see below
# for the modified versions.)
my $has_ndx = ($f2o[2] =~ /(\.nd)x\b/);
my %flags = split_flags($fields->[3]);
my $has_zu = $flags{'ZU'};
for (my $oi = 0; $oi < (1 << scalar @ops); $oi++) {
if (($oi & ~$opmask) == 0) {
# First and ending variants. 0 means a ? operand is to
# be omitted entirely, a 1 that is is to be treated
# like the *next* operand (emitted with a relax annotation.)
# This is used to generate the {zu} forms of instrutions
# which support ND - the {zu} form is created as the ND form
# with the destination and first source operand the same.
my $fvar = !($has_ndx && ($oi & $ndmask));
my $evar = $fvar || ($has_ndx && !$has_zu);
for (my $var = $fvar; $var <= $evar; $var++) {
my @xops = ();
my $ndflag = 0;
my $ondflag = 0;
my $setzu = 0;
my $voi = $oi;
my $f2 = $f2o[0];
my $dropped = 0;
for (my $oj = 0; $oj < scalar(@ops); $oj++) {
my $ob = 1 << $oj;
my $emit = 0;
if ($ob & ~$voi) {
$emit = 1;
} elsif ($ob & $ndmask) {
die if (($ob << 1) & $ndmask);
if (!$var) {
# Make it disappear completely
$dropped |= $ob;
next;
} else {
# Treat the *next* operand as an omitted
# * operand
$emit = 1;
$voi |= $ob << 1;
}
}
if ($emit) {
push(@xops, $ops[$oj]);
if ($ndmask & $ob) {
$setzu |= $has_ndx;
$ondflag = 1;
}
} else {
$dropped |= $ob;
$f2 .= '+';
}
$f2 .= substr($f2o[1], $oj, 1);
}
$f2 .= $f2o[2];
my @ff = @$fields;
$ff[1] = join(',', @xops);
$f2 =~ s/(\.nd)x\b/$1$ondflag/ if ($has_ndx);
$ff[2] = $f2;
my %newfl = %flags;
$newfl{'ZU'}++ if ($setzu && !$has_zu);
$newfl{'ND'}++ if ($oi && $var);
# Change ARx and SMx if needed
foreach my $as ('AR', 'SM') {
my $ndelta = 0;
for (my $i = 0; $i < $MAX_OPERANDS; $i++) {
if ($dropped & (1 << $i)) {
delete $newfl{"$as$i"};
$ndelta++;
} elsif ($ndelta) {
my $newi = $i - $ndelta;
if ($newfl{"$as$i"}) {
delete $newfl{"$as$i"};
$newfl{"$as$newi"}++ if ($newi >= 0);
}
}
}
}
$ff[3] = merge_flags(\%newfl);
push(@field_list, [@ff]);
}
}
}
}
return @field_list;
}
# Condition codes.
my $c_ccmask = 0x0f;
my $c_nd = 0x10; # Not for the disassembler
my $c_cc = 0x20; # cc only (not scc)
my $c_scc = 0x40; # scc only (not cc)
my %conds = (
'o' => 0, 'no' => 1, 'c' => 2, 'nc' => 3,
'z' => 4, 'nz' => 5, 'na' => 6, 'a' => 7,
's' => 8, 'ns' => 9,
'pe' => 10|$c_cc, 'po' => 11|$c_cc,
't' => 10|$c_scc, 'f' => 11|$c_scc,
'l' => 12, 'nl' => 13, 'ng' => 14, 'g' => 15,
'ae' => 3|$c_nd, 'b' => 2|$c_nd, 'be' => 6|$c_nd, 'e' => 4|$c_nd,
'ge' => 13|$c_nd, 'le' => 14|$c_nd, 'nae' => 2|$c_nd, 'nb' => 3|$c_nd,
'nbe' => 7|$c_nd, 'ne' => 5|$c_nd, 'nge' => 12|$c_nd, 'nle' => 15|$c_nd,
'np' => 11|$c_nd|$c_cc, 'p' => 10|$c_nd|$c_cc,
'' => 11|$c_nd|$c_scc);
my @conds = sort keys(%conds);
# Generate conditional form patterns if applicable
sub conditional_forms(@) {
my @field_list = ();
foreach my $fields (@_) {
# This is a case sensitive match!
if ($fields->[0] !~ /s?cc/) {
# Not a conditional instruction pattern
push(@field_list, $fields);
next;
}
if ($fields->[2] !~ /^\[/) {
warn "$fname:$line: conditional instruction using raw bytecodes\n";
next;
}
my $exclude_mask = ($fields->[0] =~ /scc/ ? $c_cc : $c_scc);
foreach my $cc (@conds) {
my $ccval = $conds{$cc};
next if ($ccval & $exclude_mask);
my @ff = @$fields;
$ff[0] =~ s/s?cc/\U$cc/;
if ($ff[2] =~ /^(\[.*?)\b([0-9a-f]{2})\+c\b(.*\])$/) {
$ff[2] = $1.sprintf('%02x', hex($2)^($ccval & $c_ccmask)).$3;
} elsif ($ff[2] =~ /^(\[.*?\.scc\b)(.*\])$/) {
$ff[2] = $1.sprintf('%d', $ccval & $c_ccmask).$2;
} else {
my $eerr = $ff[2];
warn "$fname:$line: invalid conditional encoding: $eerr\n";
next;
}
if (($ccval & $c_nd) && !($ff[3] =~ /\bND\b/)) {
$ff[3] .= ',ND';
}
push(@field_list, [@ff]);
}
}
return @field_list;
}
# APX: EVEX promoted forms of VEX instructions
sub apx_evex_forms(@) {
my @field_list;
foreach my $fields (@_) {
my $doapx = 1;
my @ff = @$fields;
$doapx = 0 if ($ff[3] =~ /\bNOAPX\b/);
$doapx = 0 if ($ff[2] !~ /^(\[.*?)\b(vex\+)(\.[^\s]*)?(.*\])$/);
if (!$doapx) {
push(@field_list, $fields);
next;
}
my($head,$vexplus,$vexargs,$tail) = ($1,$2,$3,$4);
$doapx = $vexargs !~ /\.m([89|[1-9][0-9]+)\b/;
$ff[2] = $head.'vex'.$vexargs.$tail;
push(@field_list, [@ff]);
if ($doapx) {
$ff[2] = $head.'evex'.$vexargs.$tail;
$ff[3] .= ',APX';
push(@field_list, [@ff]);
}
}
return @field_list;
}
print STDERR "Reading insns.dat...\n";
@args = ();
undef $output;
foreach $arg ( @ARGV ) {
if ( $arg =~ /^\-/ ) {
if ( $arg =~ /^\-([abdin]|f[hc])$/ ) {
$output = $1;
} else {
die "$0: Unknown option: ${arg}\n";
}
} else {
push (@args, $arg);
}
}
die if (scalar(@args) != 2); # input output
($fname, $oname) = @args;
open(F, '<', $fname) || die "unable to open $fname";
@bytecode_list = ();
%aname = ();
$line = 0;
$insns = 0;
$n_opcodes = 0;
my @allpatterns = ();
while (<F>) {
$line++;
chomp;
next if ( /^\s*(\;.*|)$/ ); # comments or blank lines
unless (/^\s*(\S+)\s+(\S+)\s+(\S+|\[.*\])\s+(\S+)\s*$/) {
warn "$fname:$line: line does not contain four fields\n";
next;
}
my @field_list = ([$1, $2, $3, uc($4)]);
@field_list = relaxed_forms(@field_list);
@field_list = conditional_forms(@field_list);
@field_list = apx_evex_forms(@field_list);
foreach my $fields (@field_list) {
($formatted, $nd) = format_insn(@$fields);
if ($formatted) {
$insns++;
xpush(\$aname{$fields->[0]}, [$formatted, $fields]);
}
if (!defined($k_opcodes{$fields->[0]})) {
$k_opcodes{$fields->[0]} = $n_opcodes++;
}
if ($formatted && !$nd) {
push(@big, [$formatted, $fields]);
my @sseq = startseq($fields->[2], $fields->[3]);
foreach my $i (@sseq) {
xpush(\$distable[$i->[0]][$i->[1]]{$i->[2]}, $#big);
}
}
}
}
close F;
#
# Generate the bytecode array. At this point, @bytecode_list contains
# the full set of bytecodes.
#
# Sort by descending length
@bytecode_list = sort { scalar(@$b) <=> scalar(@$a) } @bytecode_list;
@bytecode_array = ();
%bytecode_pos = ();
$bytecode_next = 0;
foreach $bl (@bytecode_list) {
my $h = hexstr(@$bl);
next if (defined($bytecode_pos{$h}));
push(@bytecode_array, $bl);
while ($h ne '') {
$bytecode_pos{$h} = $bytecode_next;
$h = substr($h, 2);
$bytecode_next++;
}
}
undef @bytecode_list;
@opcodes = sort { $k_opcodes{$a} <=> $k_opcodes{$b} } keys(%k_opcodes);
if ( $output eq 'b') {
print STDERR "Writing $oname...\n";
open(B, '>', $oname);
print B "/* This file auto-generated from insns.dat by insns.pl" .
" - don't edit it */\n\n";
print B "#include \"nasm.h\"\n";
print B "#include \"insns.h\"\n\n";
print B "const uint8_t nasm_bytecodes[$bytecode_next] = {\n";
$p = 0;
foreach $bl (@bytecode_array) {
printf B " /* %5d */ ", $p;
foreach $d (@$bl) {
printf B "%#o,", $d;
$p++;
}
printf B "\n";
}
print B "};\n";
print B "\n";
print B "/*\n";
print B " * Bytecode frequencies (including reuse):\n";
print B " *\n";
for ($i = 0; $i < 32; $i++) {
print B " *";
for ($j = 0; $j < 256; $j += 32) {
print B " |" if ($j);
printf B " %3o:%5d", $i+$j, $bytecode_count[$i+$j];
}
print B "\n";
}
print B " */\n";
close B;
}
if ( $output eq 'a' ) {
print STDERR "Writing $oname...\n";
open(A, '>', $oname);
print A "/* This file auto-generated from insns.dat by insns.pl" .
" - don't edit it */\n\n";
print A "#include \"nasm.h\"\n";
print A "#include \"insns.h\"\n\n";
foreach my $i (@opcodes) {
my $pat = $aname{$i};
if (scalar(@$pat)) {
printf A "static const struct itemplate instrux_%s[%d] = {\n",
$i, scalar(@$pat);
my $e = '}';
my $n = 0;
foreach $j (@$pat) {
printf A " /* %3d : %s */\n", $n++, join(' ', @{$j->[1]});
print A ' /* ', show_bytecodes($j->[0]), ' : ', show_iflags($j->[0]), " */\n";
print A ' ', codesubst($j->[0]), "\n";
}
print A "};\n\n";
}
}
printf A "const struct itemplate_list nasm_instructions[%d] = {\n",
scalar(@opcodes);
foreach my $i (@opcodes) {
my $pat = $aname{$i};
my $npat = scalar(@$pat);
my $tbl = $npat ? "instrux_$i" : "NULL /* $i */";
printf A " { %3d, %s },\n", $npat, $tbl;
}
print A "};\n";
close A;
}
if ( $output eq 'd' ) {
print STDERR "Writing $oname...\n";
open(D, '>', $oname);
print D "/* This file auto-generated from insns.dat by insns.pl" .
" - don't edit it */\n\n";
my @skipped = sort { $a <=> $b } keys(%skipped_due_to);
if (scalar @skipped) {
print D "/*\n";
print D " * Note: skipped patterns due to byte codes:\n";
print D " *", map { sprintf(' 0%o', $_) } @skipped;
print D "\n */\n\n";
}
print D "#include \"nasm.h\"\n";
print D "#include \"insns.h\"\n\n";
print D "static const struct itemplate instrux[] = {\n";
$n = 0;
foreach $j (@big) {
printf D " /* %3d : %s */\n", $n++, join(' ', @{$j->[1]});
print D ' /* ', show_bytecodes($j->[0]), ' : ', show_iflags($j->[0]), " */\n";
print D ' ', codesubst($j->[0]), "\n";
}
print D "};\n";
my @dinstname;
for (my $c = 0; $c < $vex_classes; $c++) {
push(@dinstname, []);
for (my $m = $map_base[$c]; $m <= $map_max[$c]; $m++) {
my $ninst = scalar(keys %{$distable[$c][$m]});
if (!$ninst) {
push(@{$dinstname[$c]}, 'NULL');
} else {
my $tname = sprintf("itbl_%s_map%d", $vex_class[$c], $m);
push(@{$dinstname[$c]}, $tname);
my @itbls = ();
for (my $o = 0; $o < 256; $o++) {
my $tbl = $distable[$c][$m]{$o};
if (defined($tbl)) {
my $name = sprintf("%s_%02x", $tname, $o);
push(@itbls, $name);
printf D "\nstatic const struct itemplate * const %s[] = {\n", $name;
foreach my $j (@$tbl) {
print D " instrux + $j,\n";
}
print D " NULL\n};\n";
} else {
push(@itbls, 'NULL');
}
}
printf D "\nstatic const struct itemplate * const * const %s[256] = {\n", $tname;
print D map { " $_,\n" } @itbls;
print D "};\n";
}
}
}
print D "\nconst struct itemplate * const * const * const\n";
print D "ndisasm_itable[] = {\n";
for (my $c = 0; $c < $vex_classes; $c++) {
my $class = $vex_class[$c];
printf D " /* ---- %s ---- */\n", $class;
for (my $m = $map_start[$c]; $m <= $map_max[$c]; $m++) {
printf D " /* %2d */ %s,\n", $m, $dinstname[$c][$m];
}
}
print D "};\n";
close D;
}
if ( $output eq 'i' ) {
print STDERR "Writing $oname...\n";
open(I, '>', $oname);
print I "/* This file is auto-generated from insns.dat by insns.pl" .
" - don't edit it */\n\n";
print I "/* This file in included by nasm.h */\n\n";
print I "/* Instruction names */\n\n";
print I "#ifndef NASM_INSNSI_H\n";
print I "#define NASM_INSNSI_H 1\n\n";
print I "enum opcode {\n";
$maxlen = 0;
foreach $i (@opcodes) {
print I "\tI_${i},\n";
$len = length($i);
$maxlen = $len if ( $len > $maxlen );
}
print I "\tI_none = -1\n";
print I "};\n\n";
print I "#define MAX_INSLEN ", $maxlen, "\n";
print I "#define MAX_OPERANDS ", $MAX_OPERANDS, "\n";
print I "#define NASM_VEX_CLASSES ", $vex_classes, "\n";
my $mapcnt = 0;
for (my $c = 0; $c < $vex_classes; $c++) {
printf I "#define MAP_BASE_%s (%d-%d)\n",
uc($vex_class[$c]), $mapcnt, $map_start[$c];
$mapcnt += $map_count[$c];
}
print I "#define NO_DECORATOR\t{", join(',',(0) x $MAX_OPERANDS), "}\n";
print I "\n#endif /* NASM_INSNSI_H */\n";
close I;
}
if ( $output eq 'n' ) {
print STDERR "Writing $oname...\n";
open(N, '>', $oname);
print N "/* This file is auto-generated from insns.dat by insns.pl" .
" - don't edit it */\n\n";
print N "#include \"tables.h\"\n\n";
print N "const char * const nasm_insn_names[] = {";
foreach $i (@opcodes) {
print N "\n\t\"\L$i\"";
print N ',' if ($i < $#opcodes);
}
print N "\n};\n";
close N;
}
if ( $output eq 'fh') {
write_iflaggen_h();
}
if ( $output eq 'fc') {
write_iflag_c();
}
printf STDERR "Done: %d instructions\n", $insns;
# Count primary bytecodes, for statistics
sub count_bytecodes(@) {
my $skip = 0;
foreach my $bc (@_) {
if ($skip) {
$skip--;
next;
}
$bytecode_count[$bc]++;
if ($bc >= 01 && $bc <= 04) {
$skip = $bc;
} elsif (($bc & ~03) == 010) {
$skip = 1;
} elsif (($bc & ~013) == 0144) {
$skip = 1;
} elsif ($bc >= 0171 && $bc <= 0173) {
$skip = 1;
} elsif (($bc & ~3) == 0260 || $bc == 0270) { # VEX
$skip = 2;
} elsif (($bc & ~3) == 0240 || $bc == 0250) { # EVEX
$skip = 4;
} elsif (($bc & ~3) == 0304) {
$skip = 2;
} elsif ($bc == 0330) {
$skip = 1;
}
}
}
sub format_insn($$$$) {
my ($opcode, $operands, $codes, $flags) = @_;
my $nd = 0;
my ($num, $flagsindex);
my @bytecode;
my ($op, @ops, @opsize, $opp, @opx, @oppx, @decos, @opevex);
my $opinfo;
return (undef, undef) if $operands eq 'ignore';
# Remember if we have an ARx flag
my $arx = undef;
my %flags = split_flags($flags);
set_implied_flags(\%flags);
# Generate byte code. This may modify the flags.
@bytecode = (decodify($opcode, $codes, \%flags, \$opinfo), 0);
my($oppos, $openc) = @$opinfo;
push(@bytecode_list, [@bytecode]);
$codes = hexstr(@bytecode);
count_bytecodes(@bytecode);
my $nd = !!($flags{'ND'} || $flags{'PSEUDO'});
delete $flags{'ND'};
# format the operands
$operands =~ s/[\*\?]//g;
$operands =~ s/:/|colon,/g;
@ops = ();
@opsize = ();
@decos = ();
if ($operands ne 'void') {
foreach $op (split(/,/, $operands)) {
my $opnum = scalar(@ops);
my $isreg = 0;
my $ismem = 0;
my $ismoffs = 0;
my $isimm = 0;
my $isrm = 0;
my $iszero = 0;
my $opsz = 0;
@opx = ();
@opevex = ();
foreach $opp (split(/\|/, $op)) {
@oppx = ();
if ($opp =~ s/^(b(16|32|64)|mask|z|er|sae)$//) {
push(@opevex, $1);
}
$opp =~ s/^reg([0-9]*)na$/reg_na$1/;
if ($opp =~ s/([^0-9]0?)(8|16|32|64|80|128|256|512|1024|1k)$/$1/) {
push(@oppx, "bits$2");
$opsz = $1 + 0;
}
if ($opp =~ s/0$//) {
push(@oppx, 'rn_zero');
$iszero = 1;
if ($opp !~ /reg/) {
$opp .= 'reg';
}
}
$opp =~ s/^memory_offs$/mem_offs/;
$opp =~ s/^mem$/memory/;
if ($opp =~ s/^(spec|imm)4$/$1/) {
push(@oppx, 'fourbits');
$isimm = 1;
}
$opp =~ s/^spec$/immediate/; # Special or normal immediate
$opp =~ s/^imm$/imm_normal/; # Normal immediate only
if ($opp =~ /^(unity|sbyted?word|[su]dword)$/) {
push(@oppx, 'imm_normal');
$isimm = 1;
}
if ($opp =~ /^imm/) {
$isimm = 1;
}
$opp =~ s/^([a-z]+)rm$/rm_$1/;
$opp =~ s/^(rm|reg)$/$1_gpr/;
$opp =~ s/^rm_k$/rm_opmask/;
$opp =~ s/^kreg$/opmaskreg/;
if ($opp =~ /\brm_/) {
$isrm = 1;
} elsif ($opp =~ /(\breg_|reg\b)/) {
$isreg = 1;
} elsif ($opp =~ /\b[xyzt]?mem/) {
$ismem = 1;
}
if ($opp =~ /\bmem_offs/) {
$ismoffs = 1;
}
if ($opp =~ /\b[xyzt]mm/) {
$isvec = 1;
}
if (($isrm || ($ismem && !$ismoffs) || $isreg) &&
!(($flags{'EVEX'} && $isvec) || !$flags{'NOAPX'})) {
# Register numbers >= 16 disallowed
push(@oppx, 'rn_l16');
}
if ($isreg && $isvec && $openc->[$opnum] =~ /b/) {
$flags{'MOPVEC'}++;
}
push(@opx, $opp, @oppx) if $opp;
}
# Sanity-check the encoding of this operand
my $opvalid = '-';
if ($isreg) {
$opvalid .= 'rvmsbx';
} elsif ($isimm || $ismoffs) {
$opvalid .= 'ijnw';
} elsif ($ismem || $isrm) {
$opvalid .= 'm';
}
foreach my $c (split(//, $openc->[$opnum])) {
if (index($opvalid, $c) < 0) {
die "$fname:$line: $opcode: operand $opnum \"$op\": '$c' must be one of '$opvalid'\n";
}
}
$op = join('|',@opx);
push(@ops, $op);
push(@opsize, $opsz);
push(@decos, (@opevex ? join('|', @opevex) : '0'));
}
}
my $nops = scalar(@ops);
while (scalar(@ops) < $MAX_OPERANDS) {
push(@ops, '0');
push(@opsize, 0);
push(@decos, '0');
}
$operands = join(',', @ops);
$operands =~ tr/a-z/A-Z/;
$decorators = "{" . join(',', @decos) . "}";
if ($decorators =~ /^{(0,)+0}$/) {
$decorators = "NO_DECORATOR";
}
$decorators =~ tr/a-z/A-Z/;
# Tidy up the flags now then the operand count is known
set_implied_flags(\%flags, $nops);
# Look for SM flags clearly inconsistent with operand bitsizes
my $ssize = 0;
for (my $i = 0; $i < $nopr; $i++) {
next if (!$flags{"SM$i"} || !$opsize[$i]);
if (!$ssize) {
$ssize = $opsize[$i];
} elsif ($opsize[$i] != $ssize) {
die "$fname:$line: $opcode: inconsistent SM flag for argument $i\n";
}
}
# Look for ARx flags
my $arx = opr_flags(\%flags, 'AR');
# Look for Sx flags that can never match operand bitsizes. If the
# intent is to never match (require explicit sizes), use the SX flag.
# This doesn't apply to registers that pre-define specific sizes;
# this should really be derived from include/opflags.h...
my $fsize = size_flag(\%flags);
if ($fsize) {
for (my $i = 0; $i < $nops; $i++) {
next unless ($arx & (1 << $i));
if ($opsize[$i] && $ops[$i] !~ /(\breg_|reg\b)/ &&
$opsize[$i] != $fsize) {
die "$fname:$line: $opcode: inconsistent Sx flag for argument $i ($ops[$i])\n";
}
}
}
# Generate the final index into the flags table
$flagsindex = insns_flag_index(\%flags);
die "$fname:$line: $opcode: error in flags $flags\n" unless (defined($flagsindex));
return ("{I_$opcode, $nops, {$operands}, $decorators, \@\@CODES-$codes\@\@, $flagsindex},", $nd);
}
#
# Look for @@CODES-xxx@@ sequences and replace them with the appropriate
# offset into nasm_bytecodes
#
sub codesubst($) {
my($s) = @_;
my $n;
while ($s =~ /\@\@CODES-([0-9A-F]+)\@\@/) {
my $pos = $bytecode_pos{$1};
if (!defined($pos)) {
die "$fname:$line: no position assigned to byte code $1\n";
}
$s = $` . "nasm_bytecodes+${pos}" . "$'";
}
return $s;
}
#
# Extract byte codes in human-friendly form. Added as a comment
# to insnsa.c/insnsd.c to help debugging.
#
sub show_bytecodes($) {
my($s) = @_;
my @o = ();
if ($s =~ /\@\@CODES-([0-9A-F]+)00\@\@/) {
my $hstr = $1;
my $literals = 0;
my $hexlit = 0;
for (my $i = 0; $i < length($hstr); $i += 2) {
my $c = hex(substr($hstr,$i,2));
if ($literals) {
# Non-leading bytes
$literals--;
$o[-1] .= sprintf($hexlit ? '%02x' : '%03o', $c);
$o[-1] .= $literals ? ' ' : ')';
} else {
push(@o, sprintf("%03o", $c));
if ($c <= 4) {
$literals = $c;
$hexlit = 1;
} elsif (($c & ~3) == 0240 || $c == 0250) {
$literals = 3;
$hexlit = 1;
} elsif (($c & ~3) == 0260 || $c == 0270) {
$literals = 2;
$hexlit = 0;
} elsif ($c == 0171) {
$literals = 1;
$hexlit = 0;
} elsif ($c == 0172) {
$literals = 1;
$hexlit = 1;
}
$o[-1] .= '(' if ($literals);
}
}
return join(' ', @o);
} else {
return 'no bytecode';
}
}
# Get the iflags from an insnsa.c pattern.
# Added as a comment to help debugging.
sub show_iflags($) {
my($s) = @_;
return undef unless ($s =~ /([0-9]+)\},$/);
return get_iflags($1);
}
#
# Turn a code string into a sequence of bytes
#
sub decodify($$$$) {
# Although these are C-syntax strings, by convention they should have
# only octal escapes (for directives) and hexadecimal escapes
# (for verbatim bytes)
my($opcode, $codestr, $flags, $opinfo) = @_;
my @codes;
if ($codestr eq 'ignore') {
@codes = ();
} elsif ($codestr =~ /^\s*\[([^\]]*)\]\s*$/) {
@codes = byte_code_compile($opcode, $1, $flags, $opinfo);
} else {
# This really shouldn't happen anymore...
warn "$fname:$line: raw bytecodes?!\n";
my $c = $codestr;
@codes = ();
while ($c ne '') {
if ($c =~ /^\\x([0-9a-f]+)(.*)$/i) {
push(@codes, hex $1);
$c = $2;
next;
} elsif ($c =~ /^\\([0-7]{1,3})(.*)$/) {
push(@codes, oct $1);
$c = $2;
next;
} else {
die "$fname:$line: unknown code format in \"$codestr\"\n";
}
}
}
# Flags may have been updated
set_implied_flags($flags);
return @codes;
}
# Turn a numeric list into a hex string
sub hexstr(@) {
my $s = '';
my $c;
foreach $c (@_) {
$s .= sprintf("%02X", $c);
}
return $s;
}
# EVEX tuple types offset is 0300. e.g. 0301 is for full vector(fv).
sub tupletype($) {
my ($tuplestr) = @_;
my %tuple_codes = (
'' => 000,
'fv' => 001,
'hv' => 002,
'fvm' => 003,
't1s8' => 004,
't1s16' => 005,
't1s' => 006,
't1f32' => 007,
't1f64' => 010,
't2' => 011,
't4' => 012,
't8' => 013,
'hvm' => 014,
'qvm' => 015,
'ovm' => 016,
'm128' => 017,
'dup' => 020,
);
if (defined $tuple_codes{$tuplestr}) {
return 0300 + $tuple_codes{$tuplestr};
} else {
die "$fname:$line: undefined tuple type: $tuplestr\n";
}
}
#
# This function takes a series of byte codes in a format which is more
# typical of the Intel documentation, and encode it.
#
# The format looks like:
#
# [operands: opcodes]
#
# The operands word lists the order of the operands:
#
# r = register field in the modr/m
# m = modr/m
# v = VEX "v" field or DFV
# i = immediate
# s = register field of is4/imz2 field
# - = implicit (unencoded) operand
# x = indeX register of mib. 014..017 bytecodes are used.
#
# For an operand that should be filled into more than one field,
# enter it as e.g. "r+v".
#
sub byte_code_compile($$$$) {
my($opcode, $str, $flags, $opinfo) = @_;
my $opr;
my $opc;
my @codes = ();
my $litix = undef;
my $i;
my ($op, $oq);
my $opex;
my %imm_codes = (
'ib' => 020, # imm8
'ib,u' => 024, # Unsigned imm8
'ib,s' => 0274, # imm8 sign-extended to opsize or bits
'iw' => 030, # imm16
'iw,s' => 030, # imm16 sign-extended: NOT IMPLEMENTED
'iw,u' => 030, # imm16 zero-extended: NOT IMPLEMENTED
'id' => 040, # imm32
'id,s' => 0254, # imm32 sign-extended to opsize
'id,u' => 0264, # imm32 zero-extended to opsize
'iq' => 054, # imm64
'iq,s' => 054, # for orthogonality
'iq,u' => 054, # for orthogonality
'iwd' => 034, # imm16 or imm32, depending on opsize
'iwdq' => 044, # imm16/32/64, depending on addrsize
'rel8' => 050,
'rel16' => 060,
'rel32' => 070,
'rel' => 064, # 16 or 32 bit relative operand
'seg' => 074, # 16-bit segment value
'ibn' => 0300, # A valid HINT_NOP opcode
);
my %plain_codes = (
'o8' => undef, # 8-bit operand size (for orthogonality)
'o16' => 0320, # 16-bit operand size
'o32' => 0321, # 32-bit operand size
'odf' => 0322, # Operand size is default
'o64' => 0324, # 64-bit operand size requiring REX.W
'w1' => 0324,
'a16' => 0310,
'a32' => 0311,
'adf' => 0312, # Address size is default (disassembly)
'asz' => 0312, # Alias for adf, for macro convenience
'asm' => 0312, # Alias for adf, for macro convenience
'a64' => 0313,
'!osp' => 0364,
'!asp' => 0365,
'osp' => 0366,
'osz' => 0330, # Operand size depending on default
'f2i' => 0332, # F2 prefix, but 66 for operand size is OK
'f3i' => 0333, # F3 prefix, but 66 for operand size is OK
'mustrep' => 0336,
'mustrepne' => 0337,
'rex.l' => 0334,
'norexb' => 0314,
'norexx' => 0315,
'norexr' => 0316,
'norexw' => 0317,
'w0' => 0317,
'repe' => 0335,
'nohi' => 0325, # Use spl/bpl/sil/dil even without REX
'nof3' => 0326, # No REP 0xF3 prefix permitted
'norep' => 0331, # No REP prefix permitted
'wait' => 0341, # Needs a wait prefix
'osm' => 0342, # Operand size = mode (16/32/64)
'rex.b' => 0344,
'rex.x' => 0345,
'rex.r' => 0346,
'rex.w' => 0347,
'resb' => 0340,
'np' => 0360, # No prefix
'jlen' => 0373, # Length of jump
'hlexr' => 0271,
'hlenl' => 0272,
'hle' => 0273,
'optw' => 0336,
'optd' => 0337,
# This instruction takes XMM VSIB
'vsibx' => 0374,
'vm32x' => 0374,
'vm64x' => 0374,
# This instruction takes YMM VSIB
'vsiby' => 0375,
'vm32y' => 0375,
'vm64y' => 0375,
# This instruction takes ZMM VSIB
'vsibz' => 0376,
'vm32z' => 0376,
'vm64z' => 0376,
);
unless ($str =~ /^(([^\s:]*)\:*([^\s:]*)\:|)\s*(.*\S)\s*$/) {
die "$fname:$line: cannot parse: [$str]\n";
}
$opr = lc($2);
$tuple = lc($3); # Tuple type for AVX512
$opc = lc($4);
$op = 0;
my $oppos = {};
my $openc = [];
if (defined($opinfo)) {
$$opinfo = [$oppos, $openc];
}
for ($i = 0; $i < length($opr); $i++) {
my $c = substr($opr,$i,1);
if ($c eq '+') {
die "$fname:$line: $opcode: invalid use of '+' in '$opr'\n"
if ($op < 1);
$op--;
} elsif ($c =~ /^[rmnvwsijbx-]$/) {
# n means an immediate which is encoded as a memory address,
# but unlike a mem_offs it supports rel encoding on 64 bits.
# w means an immediate to be encoded into the v register
# position.
(my $realc = $c) =~ tr/nw/mv/;
$openc->[$op] = '' unless (defined($openc->[$op]));
$openc->[$op] .= $c;
if (defined($oppos->{$realc})) {
my $what = ($c eq $realc) ? "'$c'" : "[${realc}${c}]";
die "$fname:$line: $opcode: More than one $what operand in '$opr'\n";
}
$oppos->{$realc} = $op unless ($realc eq '-');
$op++;
} else {
die "$fname:$line: $opcode: Unknown operand encoding '$c'\n";
}
}
if (defined($oppos->{'m'})) {
if (defined($oppos->{'b'})) {
die "$fname:$line: $opcode: [mn] operand mutually exclusive with 'b'\n";
} elsif (defined($oppos->{'x'})) {
# memory operand + x register operand requires MIB
$flags->{'MIB'}++;
}
}
if (defined($oppos->{'s'}) && defined($oppos->{'i'})) {
die "$fname:$line: $opcode: 's' operand mutually exclusive with 'i'\n";
}
if (defined($oppos->{'j'}) && !defined($oppos->{'i'})) {
die "$fname:$line: $opcode 'j' without 'i' operand\n";
}
$tup = tupletype($tuple);
# Opcode map. Map 0 is the default, but not explicitly defined yet.
my $opmap = undef;
my $last_imm = 'h';
my $prefix_ok = 1;
foreach $op (split(/\s*(?:\s|(?=[\/\\]))/, $opc)) {
if (exists($plain_codes{$op})) {
# Plain code
my $pc = $plain_codes{$op};
push(@codes, $pc) if (defined($pc));
} elsif ($op =~ /^(o64)?(nw)$/) {
push(@codes, $1 eq '' ? 0327 : 0323);
$flags->{'NWSIZE'}++;
} elsif ($prefix_ok && $op =~ /^(66|f2|f3)$/) {
# 66/F2/F3 prefix used as an opcode extension
if ($op eq '66') {
push(@codes, 0361);
} elsif ($op eq 'f2') {
push(@codes, 0332);
} else {
push(@codes, 0333);
}
} elsif ($prefix_ok && $op =~ /^(0f|0f38|0f3a|m([0-3]))$/) {
if ($2 ne '') {
$opmap = $2 + 0;
} elsif ($op eq '0f') {
$opmap = 1;
} elsif ($op eq '0f38') {
$opmap = 2;
} elsif ($op eq '0f3a') {
$opmap = 3;
}
push(@codes, 0354 + $opmap) if ($opmap > 0);
$prefix_ok = 0;
} elsif ($op =~ /^(m[0-9]+|0f38|0f3a)$/) {
if ($prefix_ok) {
die "$fname:$line: $opcode: invalid legacy map: $m\n";
} elsif (defined($opmap)) {
die "$fname:$line: $opcode: multiple legacy map specifiers\n";
} else {
die "$fname:$line: $opcode: legacy map must precede opcodes\n";
}
} elsif ($op =~ /^[0-9a-f]{2}$/) {
if (defined($litix) && $litix+$codes[$litix]+1 == scalar @codes &&
$codes[$litix] < 4) {
$codes[$litix]++;
push(@codes, hex $op);
} else {
$litix = scalar(@codes);
push(@codes, 01, hex $op);
}
$prefix_ok = 0;
} elsif ($op eq '/r') {
if (!defined($oppos->{'r'}) || !defined($oppos->{'m'})) {
die "$fname:$line: $opcode: $op requires 'r' and [mn] operands\n";
}
$opex = (($oppos->{'m'} & 4) ? 06 : 0) |
(($oppos->{'r'} & 4) ? 05 : 0);
push(@codes, $opex) if ($opex);
# if mib is composed with two separate operands - ICC style
push(@codes, 014 + ($oppos->{'x'} & 3)) if (defined($oppos->{'x'}));
push(@codes, 0100 + (($oppos->{'m'} & 3) << 3) + ($oppos->{'r'} & 3));
$prefix_ok = 0;
} elsif ($op =~ m:^/([0-7])$:) {
if (!defined($oppos->{'m'})) {
die "$fname:$line: $opcode: $op requires an [mn] operand\n";
}
push(@codes, 06) if ($oppos->{'m'} & 4);
push(@codes, 0200 + (($oppos->{'m'} & 3) << 3) + $1);
$prefix_ok = 0;
} elsif ($op =~ m:^/([0-3]?)r([0-7])$:) {
if (!defined($oppos->{'r'})) {
die "$fname:$line: $opcode: $op requires an 'r' operand\n";
}
push(@codes, 05) if ($oppos->{'r'} & 4);
push(@codes, 0171);
push(@codes, (($1+0) << 6) + (($oppos->{'r'} & 3) << 3) + $2);
$prefix_ok = 0;
} elsif ($op =~ /^(vex|xop)(|\..*)$/) {
my $vexname = $1;
my $c = $vexname eq 'xop' ? 1 : 0;
my ($m,$w,$l,$p) = (undef,undef,undef,undef);
my $has_nds = 0;
my @subops = split(/\./, $2);
my $opsize = undef;
if (defined($opmap)) {
warn "$fname:$line: $opcode: legacy prefix ignored with VEX\n";
}
foreach $oq (@subops) {
if ($oq eq '') {
next;
} elsif ($oq eq '128' || $oq eq 'l0' || $oq eq 'lz') {
$l = 0;
} elsif ($oq eq '256' || $oq eq 'l1') {
$l = 1;
} elsif ($oq eq 'lig') {
$l = 0 unless (defined($l));
$flags->{'LIG'}++;
} elsif ($oq eq 'w0') {
$w = 0;
} elsif ($oq eq 'w1') {
$w = 1;
} elsif ($oq eq 'wig') {
$w = 0;
$flags->{'WIG'}++;
} elsif ($oq eq 'ww') {
$flags->{'WW'}++;
} elsif ($oq eq 'o8') {
$p = 0 unless (defined($p)); # np
if (!defined($w)) { # wig
$w = 0;
$flags->{'WIG'}++;
}
} elsif ($oq eq 'o16') {
$p = 1 unless (defined($p)); # 66
$w = 0 unless (defined($w)); # w0
$opsize = 0320;
} elsif ($oq eq 'o32') {
$p = 0 unless (defined($p)); # np
if (!defined($w)) {
$w = 0 unless (defined($w)); # w0
$flags->{'WW'}++;
}
$opsize = 0321;
} elsif ($oq eq 'o64') {
$p = 0 unless (defined($p)); # np
$w = 1 unless (defined($w)); # w1
$flags->{'WW'}++;
$opsize = 0323 + $w;
} elsif ($oq eq 'ko8') {
$p = 1 unless (defined($p)); # 66
$w = 0 unless (defined($w)); # w0
} elsif ($oq eq 'ko16') {
$p = 0 unless (defined($p)); # np
$w = 0 unless (defined($w)); # w0
# $opsize = 0320;
} elsif ($oq eq 'ko32') {
$p = 1 unless (defined($p)); # 66
$w = 1 unless (defined($w)); # w1
# $opsize = 0321;
} elsif ($oq eq 'ko64') {
$p = 0 unless (defined($p)); # 66
$w = 1 unless (defined($w)); # w1
# $opsize = 0323 + $w;
} elsif ($oq eq 'np' || $oq eq 'p0') {
$p = 0;
} elsif ($oq eq '66' || $oq eq 'p1') {
$p = 1;
} elsif ($oq eq 'f3' || $oq eq 'p2') {
$p = 2;
} elsif ($oq eq 'f2' || $oq eq 'p3') {
$p = 3;
} elsif ($oq eq '0f') {
$m = 1;
} elsif ($oq eq '0f38') {
$m = 2;
} elsif ($oq eq '0f3a') {
$m = 3;
} elsif ($oq =~ /^(m|map)([0-9]+)$/) {
$m = $2+0;
} elsif ($oq eq 'nds' || $oq eq 'ndd' || $oq eq 'dds') {
if (!defined($oppos->{'v'})) {
die "$fname:$line: $opcode: $vexname.$oq without [vw] operand\n";
}
$has_nds = 1;
} else {
die "$fname:$line: $opcode: undefined modifier: $vexname.$oq\n";
}
}
if (!defined($m) || !defined($l)) {
die "$fname:$line: $opcode: missing fields in \U$vexname\E specification\n";
}
if (!defined($w)) {
$w = 0;
$flags->{'WIG'}++;
}
my $minmap = ($c == 1) ? 8 : 0; # 0-31 for VEX, 8-31 for XOP
if ($m < $minmap || $m > 31) {
die "$fname:$line: $opcode: Only maps ${minmap}-31 are valid for \U${vexname}\n";
}
push(@codes, 05) if ($oppos->{'v'} > 3);
push(@codes, defined($oppos->{'v'}) ? 0260+$oppos->{'v'} : 0270,
($c << 6)+$m, ($w << 7)+($l << 2)+$p);
push(@codes, $opsize) if (defined($opsize));
$flags->{'VEX'}++;
$flags->{'NOAPX'}++; # VEX doesn't support registers 16+
$prefix_ok = 0;
} elsif ($op =~ /^(evex)(|\..*)$/) {
my $c = $vexmap{$1};
my ($m,$w,$l,$p,$scc,$nf,$u,$ndd) = (undef,undef,undef,undef,undef,undef,undef,0,0);
my ($nds,$nd,$dfv,$v) = (0, 0, 0, 0);
my $opsize = undef;
my @bad_op = ();
my @subops = split(/\./, $2);
if (defined($opmap)) {
warn "$fname:$line: $opcode: legacy prefix ignored with EVEX\n";
}
foreach $oq (@subops) {
if ($oq eq '') {
next;
} elsif ($oq =~ /^(128|ll?[0z])$/) {
$l = 0;
} elsif ($oq =~ /^(256|ll?1)$/) {
$l = 1;
} elsif ($oq =~ /^(512|ll?2)$/) {
$l = 2;
} elsif ($oq =~ /^(1024|1k|ll?3)$/) {
# Not actually defined, but...
$l = 3;
} elsif ($oq eq 'lig') {
$l = 0 unless (defined($l));
$flags->{'LIG'}++;
} elsif ($oq eq 'w0') {
$w = 0;
} elsif ($oq eq 'w1') {
$w = 1;
} elsif ($oq eq 'wig') {
$w = 0;
$flags->{'WIG'}++;
} elsif ($oq eq 'ww') {
$w = 0;
$flags->{'WW'}++;
} elsif ($oq eq 'np' || $oq eq 'p0') {
$p = 0;
} elsif ($oq eq '66' || $oq eq 'p1') {
$p = 1;
} elsif ($oq eq 'f3' || $oq eq 'p2') {
$p = 2;
} elsif ($oq eq 'f2' || $oq eq 'p3') {
$p = 3;
} elsif ($oq eq 'o8') {
$p = 0 unless (defined($p)); # np
if (!defined($w)) { # wig
$w = 0;
$flags->{'WIG'}++;
}
} elsif ($oq eq 'o16') {
$p = 1 unless (defined($p)); # 66
$w = 0 unless (defined($w)); # w0
$opsize = 0320;
} elsif ($oq eq 'o32') {
$p = 0 unless (defined($p)); # np
if (!defined($w)) {
$w = 0 unless (defined($w)); # w0
$flags->{'WW'}++;
}
$opsize = 0321;
} elsif ($oq eq 'o64') {
$p = 0 unless (defined($p)); # np
$w = 1 unless (defined($w)); # w1
$flags->{'WW'}++;
$opsize = 0323 + $w;
} elsif ($oq eq 'ko8') {
$p = 1 unless (defined($p)); # 66
$w = 0 unless (defined($w)); # w0
} elsif ($oq eq 'ko16') {
$p = 0 unless (defined($p)); # np
$w = 0 unless (defined($w)); # w0
# $opsize = 0320;
} elsif ($oq eq 'ko32') {
$p = 1 unless (defined($p)); # 66
$w = 1 unless (defined($w)); # w1
# $opsize = 0321;
} elsif ($oq eq 'ko64') {
$p = 0 unless (defined($p)); # 66
$w = 1 unless (defined($w)); # w1
# $opsize = 0323 + $w;
} elsif ($oq eq 'ww') {
$flags->{'WW'}++;
} elsif ($oq eq '0f') {
$m = 1;
} elsif ($oq eq '0f38') {
$m = 2;
} elsif ($oq eq '0f3a') {
$m = 3;
} elsif ($oq =~ /^(m|map)([0-7])$/) {
$m = $2+0;
} elsif ($oq =~ /^scc([0-9]+)$/) {
$scc = $1+0;
$flags->{'SCC'}++;
push(@bad_op, ['v', $oq]);
} elsif ($oq eq 'u') {
$u = 1;
} elsif ($oq eq 'nf') {
$flags->{'NF_E'}++;
$nf = 0;
} elsif ($oq =~ /^nf([01])$/) {
$nf = $1 + 0;
} elsif ($oq =~ /^v([0-9]+)$/) {
$v = $1 + 0;
push(@bad_op, ['v', $oq]);
} elsif ($oq eq 'dfv') {
$flags->{'DFV'}++;
$dfv = 1;
push(@bad_op, ['v', $oq]);
} elsif ($oq =~ /^nd([01])$/) {
$nd = $1 + 0;
} elsif ($oq eq 'zu') {
# Set .ND if {zu} prefix is present
$flags->{'ZU_E'}++;
} elsif ($oq =~ /^(nds|ndd|nd|dds)$/) {
if (!defined($oppos->{'v'})) {
die "$fname:$line: $opcode: evex.$oq without [vw] operand\n";
}
$nds = 1;
$nd = $oq eq 'nd';
} else {
die "$fname:$line: $opcode: undefined modifier: evex.$oq\n";
}
}
# Currently too many patterns miss .w or .p; it would be good
# to figure out if they should be .[wp]0 or .[wp]ig, but
# don't warn yet to keep the noise down
if (!defined($m) || !defined($l)) {
warn "$fname:$line: $opcode: missing fields in EVEX specification\n";
}
if ($m > 7) {
die "$fname:$line: $opcode: Only maps 0-7 are valid for EVEX\n";
}
foreach my $bad (@bad_op) {
my($what, $because) = @$inv;
if (defined($oppos->{$what})) {
die "$fname:$line: $opcode: $what and evex.$because are mutually incompatible\n";
}
}
if ($scc && $nf) {
die "$fname:$line: $opcode: evex.scc and evex.nf are mutually incompatible\n";
}
my @p = ($m | 0xf0, $p | 0x7c, ($l << 5) | 0x08);
$v ^= 0x0f if ($dfv);
$v ^= 0x10 if (defined($scc));
$p[1] ^= ($v & 15) << 3;
$p[1] ^= $w << 7;
$p[2] ^= ($v & 16) >> 1;
$p[2] ^= $scc & 15;
$p[2] |= 0x04 if ($nf);
$p[2] |= 0x10 if ($nd);
push(@codes, 05) if ($oppos->{'v'} > 3);
push(@codes, defined($oppos->{'v'}) ? 0240+$oppos->{'v'} : 0250, @p);
push(@codes, $tup);
push(@codes, $opsize) if (defined($opsize));
$flags->{'EVEX'}++;
$prefix_ok = 0;
} elsif ($op =~ /^(rex2)(\..*)?$/) {
my $name = $1;
my @subops = split(/\./, $2);
my $c = 0350;
my $m = undef;
foreach $oq (@subops) {
if ($oq eq '') {
next;
} elsif ($oq eq 'w') {
$c |= 01;
} else {
die "$fname:$line: $opcode: unknown modifier: $name.$oq\n";
}
}
push(@codes, $c);
$flags->{'APX'}++;
$flags->{'REX2'}++;
$flags->{'LONG'}++;
$prefix_ok = 0;
} elsif (defined $imm_codes{$op}) {
if ($op eq 'seg') {
if ($last_imm lt 'i') {
die "$fname:$line: $opcode: seg without an [ij] operand\n";
}
} else {
$last_imm++;
if ($last_imm gt 'j') {
die "$fname:$line: $opcode: too many immediate operands\n";
}
}
if (!defined($oppos->{$last_imm})) {
die "$fname:$line: $opcode: $op without '$last_imm' operand\n";
}
push(@codes, 05) if ($oppos->{$last_imm} & 4);
push(@codes, $imm_codes{$op} + ($oppos->{$last_imm} & 3));
$prefix_ok = 0;
} elsif ($op =~ /^ib(,[us])?[\^]([0-9a-f]+)$/) {
my $type = $1 eq ',u' ? 2 : $1 eq ',s' ? 1 : 0;
my $mod = hex $2;
$last_imm++;
if ($last_imm gt 'j') {
die "$fname:$line: $opcode: too many immediate operands\n";
}
if (!defined($oppos->{$last_imm})) {
die "$fname:$line: $opcode: $op without '$last_imm' operand\n";
}
push(@codes, 05) if ($oppos->{$last_imm} & 4);
push(@codes, 0304 + ($oppos->{$last_imm} & 3));
push(@codes, $type, $mod);
$flags->{'ND'}++;
} elsif ($op eq '/is4') {
if (!defined($oppos->{'s'})) {
die "$fname:$line: $opcode: $op without 's' operand\n";
}
if (defined($oppos->{'i'})) {
push(@codes, 0172, ($oppos->{'s'} << 3)+$oppos->{'i'});
} else {
push(@codes, 05) if ($oppos->{'s'} & 4);
push(@codes, 0174+($oppos->{'s'} & 3));
}
$prefix_ok = 0;
} elsif ($op =~ /^\/is4\=([0-9]+)$/) {
my $imm = $1;
if (!defined($oppos->{'s'})) {
die "$fname:$line: $opcode: $op without 's' operand\n";
}
if ($imm < 0 || $imm > 15) {
die "$fname:$line: $opcode: invalid imm4 value for $op: $imm\n";
}
push(@codes, 0173, ($oppos->{'s'} << 4) + $imm);
$prefix_ok = 0;
} elsif ($op =~ /^([0-9a-f]{2})\+r$/) {
if (!defined($oppos->{'r'})) {
die "$fname:$line: $opcode: $op without 'r' operand\n";
}
push(@codes, 05) if ($oppos->{'r'} & 4);
push(@codes, 010 + ($oppos->{'r'} & 3), hex $1);
$prefix_ok = 0;
} elsif ($op =~ /^(jcc|jmp)8$/) {
# A relaxable jump instruction
push(@codes, $op eq 'jcc8' ? 0370 : 0371);
$flags->{'JMP_RELAX'}++;
} elsif ($op =~ /^\\([0-7]+|x[0-9a-f]{2})$/) {
# Escape to enter literal bytecodes
push(@codes, oct $1);
} else {
die "$fname:$line: $opcode: unknown operation: $op\n";
}
}
# Legacy maps 2+ do not support REX2 encodings
if ($opmap > 1 && !$flags{'EVEX'}) {
$flags->{'NOAPX'}++;
}
return @codes;
}