Fix matching of branch instructions with prefixes and sizes

Matching of branch instructions with prefixes and sizes is, to say the least, tricky. Work through it, and add a new macro to help. Fixes: https://github.com/netwide-assembler/nasm/issues/144 Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
2025-11-08 23:27:15 -05:00 · 2025-10-10 13:03:33 -07:00
parent 2c71e67762
commit a7457e66cf
22 changed files with 6875 additions and 100 deletions
--- a/x86/iflags.ph
+++ b/x86/iflags.ph
@@ -30,7 +30,6 @@ if_("AR2",               "SB, SW, SD applies to operand 2");
 if_("AR3",               "SB, SW, SD applies to operand 3");
 if_("AR4",               "SB, SW, SD applies to operand 4");
 # These must match the order of the BITSx flags in opflags.h
-# Are these obsolete?
 if_("SB",                "Unsized operands can't be non-byte");
 if_("SW",                "Unsized operands can't be non-word");
 if_("SD",                "Unsized operands can't be non-dword");
@@ -41,7 +40,8 @@ if_("SY",                "Unsized operands can't be non-yword");
 if_("SZ",                "Unsized operands can't be non-zword");
 # End BITSx order match requirement
 if_("NWSIZE",            "Operand size defaults to 64 in 64-bit mode");
-if_("OSIZE",             "Unsized operands must match the default operand size");
+# OSIZE can be modified by osp prefixes, but not by other operands
+if_("OSIZE",             "Unsized operands must match the operand size");
 if_("ASIZE",             "Unsized operands must match the address size");
 if_("ANYSIZE",           "Ignore operand size even if explicit");
 if_("SX",                "Unsized operands not allowed");
--- a/x86/insns.dat
+++ b/x86/insns.dat
@@ -262,19 +262,15 @@ $bwd  CMPXCHG486 rm#,reg#			[mr:	0f a6# /r]				486,SM,UNDOC,NOLONG,ND,LOCK,OBSOL

 ;# Jumps
 ; APX absolute 64-bit jmp
-      JMPABS	imm64				[i:	a64 np rex2 a1 iq		]	APX
-      JMP	imm64|abs			[i:	a64 np rex2 a1 iq		]	APX,ND
-      JMPABS	imm64|abs			[i:	a64 np rex2 a1 iq		]	APX,ND
-; Jump-over emulation of JMPABS on !APX
-      JMPABS	imm64				[i:	a64 ff 25 00 00 00 00 iq	]	NOAPX,LONG,ND
-      JMP	imm64|abs			[i:	a64 ff 25 00 00 00 00 iq	]	NOAPX,LONG,ND
-      JMPABS	imm64|abs			[i:	a64 ff 25 00 00 00 00 iq	]	NOAPX,LONG,ND
+      JMPABS	imm64|near			[i:	a64 np rex2 a1 iq		]	APX
+      JMP	imm64|abs|near			[i:	a64 np rex2 a1 iq		]	APX,ND
+      JMPABS	imm64|abs|near			[i:	a64 np rex2 a1 iq		]	APX,ND

 ; Call/jmp near imm/reg/mem are always 64-bit in long mode.
-      JMP	imm8|short			[i:	nw eb rel8]				8086,NOAPX
-      JMP	imm				[i:	jmp8 nw eb rel8]			8086,JMP_RELAX,NOAPX,ND
-$wdq  JMP	imm#|near			[i:	nw o# e9 rel]				8086,BND,(wd:NOLONG,OSIZE)
-$wdq  JMP	rm#|near			[m:	nw o# ff /4]				8086,BND,(wd:NOLONG,OSIZE)
+$br   JMP	short				[i:	os eb rel8]				8086,NOAPX,ND
+$br   JMP	near				[i:	jmp8 os eb rel8]			8086,NOAPX,JMP_RELAX
+$br   JMP	near				[i:	os e9 rel]				8086,BND
+$wdq  JMP	rm#|near			[m:	nw o# ff /4]				8086,BND,OSIZE

 $wd   JMP	imm#|far			[i:	o# ea iwd seg]				8086,OSIZE,ND,NOLONG
 ; These are hacks to support the legacy syntax "[d]word seg:offs" to mean "seg:[d]word offs"
@@ -287,37 +283,37 @@ $wd   JMP	imm16:imm#|far			[ji:	o# ea i# iw]				8086,OSIZE,AR1,NOLONG,ND
 ; This is an intentional "programmer friendliness" quirk.
 $wdq  JMP	mem#|far			[m:	o# ff /5]				8086,OSIZE,NWSIZE,(w:NOLONG)

-      Jcc	imm8|short			[i:	nw 70+c rel8]				8086,ND,BND,SX,JCC_HINT,NOAPX
-      Jcc	imm				[i:	jcc8 nw 70+c rel8]			8086,BND,SX,JCC_HINT,NOAPX
-$wdq  Jcc	imm#|near			[i:	nw o# 0f 80+c rel]			386,BND,NOAPX,JCC_HINT,(wd:NOLONG,OSIZE)
+$br   Jcc	short				[i:	os 70+c rel8]				8086,ND,BND,JCC_HINT,NOAPX
+$br   Jcc	near				[i:	jcc8 os 70+c rel8]			8086,BND,JCC_HINT,JMP_RELAX,NOAPX
+$br   Jcc	near				[i:	os 0f 80+c rel]				386,BND,NOAPX,JCC_HINT
 ; Jump-over emulation of Jcc on < 386
-      Jcc	imm16|near			[i:	nw 71+c jlen e9 rel16]			8086,ND,NOAPX,NOLONG
+; This could/should be improved to handle osp properly
+      Jcc	imm				[i:	71+c jlen e9 rel]			8086,ND,NOAPX,OSIZE

-; The following only have short forms, hence imm8|near|short
-      JCXZ	imm8|near|short			[i:	a16 e3 rel8]				8086,NOLONG
-      JECXZ	imm8|near|short			[i:	a32 e3 rel8]				386,NOAPX
-      JRCXZ	imm8|near|short			[i:	a64 e3 rel8]				X86_64,LONG,NOAPX
-$wdq  JCXZ	imm8|near|short,cx#		[i-:	a# e3 rel8]				8086,ND
+; The following only have short forms, but use the *address* size to encode
+; the size of the counter register.
+$br $wdq  JCX#Z		near|short		[i:	a# os e3 rel8]				8086,NOAPX
+$br $wdq  JCXZ		near|short,cx#		[i:	a# os e3 rel8]				8086,NOAPX,ND

-$zwdq LOOP%	imm8|near|short			[i:	a#  nw e2 rel8]				8086,NOAPX,(wdq:ND)
-$zwdq LOOPE%	imm8|near|short			[i:	a#  nw e1 rel8]				8086,NOAPX,(wdq:ND)
-$zwdq LOOPNE%	imm8|near|short			[i:	a#  nw e0 rel8]				8086,NOAPX,(wdq:ND)
-$zwdq LOOPZ%	imm8|near|short			[i:	a#  nw e1 rel8]				8086,NOAPX,ND
-$zwdq LOOPNZ%	imm8|near|short			[i:	a#  nw e0 rel8]				8086,NOAPX,ND
-$wdq  LOOP	imm8|near|short,cx#		[i-:	a#  nw e2 rel8]				8086,NOAPX
-$wdq  LOOPE	imm8|near|short,cx#		[i-:	a#  nw e1 rel8]				8086,NOAPX
-$wdq  LOOPNE	imm8|near|short,cx#		[i-:	a#  nw e0 rel8]				8086,NOAPX
-$wdq  LOOPZ	imm8|near|short,cx#		[i-:	a#  nw e1 rel8]				8086,NOAPX,ND
-$wdq  LOOPNZ	imm8|near|short,cx#		[i-:	a#  nw e0 rel8]				8086,NOAPX,ND
+$br $zwdq LOOP%		near|short		[i:	a# os e2 rel8]				8086,NOAPX,(wdq:ND)
+$br $zwdq LOOPE%	near|short		[i:	a# os e1 rel8]				8086,NOAPX,(wdq:ND)
+$br $zwdq LOOPNE%	near|short		[i:	a# os e0 rel8]				8086,NOAPX,(wdq:ND)
+$br $zwdq LOOPZ%	near|short		[i:	a# os e1 rel8]				8086,NOAPX,ND
+$br $zwdq LOOPNZ%	near|short		[i:	a# os e0 rel8]				8086,NOAPX,ND
+$br $wdq  LOOP		near|short,cx#		[i-:	a# os e2 rel8]				8086,NOAPX
+$br $wdq  LOOPE		near|short,cx#		[i-:	a# os e1 rel8]				8086,NOAPX
+$br $wdq  LOOPNE	near|short,cx#		[i-:	a# os e0 rel8]				8086,NOAPX
+$br $wdq  LOOPZ		near|short,cx#		[i-:	a# os e1 rel8]				8086,NOAPX
+$br $wdq  LOOPNZ	near|short,cx#		[i-:	a# os e0 rel8]				8086,NOAPX

 ; JMPE is obsolete, but seems to be used by a fair number of virtual environments?
-$zwdq JMPE	imm##|near			[i:	nw o# 0f b8 rel]			IA64
+$br   JMPE	near				[i:	os 0f b8 rel]				IA64
 ; 0f 00 /6 with a prefix has been repurposed in long mode
-$wdq  JMPE	rm#|near			[m:	nw o# np 0f 00 /6]			IA64
-$wd   JMPE	rm#|near			[m:	o#       0f 00 /6]			IA64,ND,NOLONG
+$wdq  JMPE	rm#|near			[m:	nw o# np 0f 00 /6]			IA64,OSIZE
+$wd   JMPE	rm#|near			[m:	o#       0f 00 /6]			IA64,OSIZE,NOLONG

 ;# Call and return
-$wdq  CALL	imm##|near			[i:	nw o# e8 rel]				8086,BND,NOAPX,(wd:OSIZE,NOLONG)
+$br   CALL	near				[i:	os e8 rel]				8086,BND,NOAPX
 $wdq  CALL	rm#|near			[m:	nw o# ff /2]				8086,BND,(wd:OSIZE,NOLONG)

 $wd   CALL	imm#|far			[i:	o# 9a iwd seg]				8086,ND,NOLONG,OSIZE
@@ -2913,18 +2909,10 @@ VPGATHERQQ	ymmreg,ymem64,ymmreg		[rmv:	vm64y vex.dds.256.66.0f38.w1 91 /r]	AVX2

 ;# Intel Transactional Synchronization Extensions (TSX)
 XABORT		imm8				[i:	c6 f8 ib]				RTM
-XBEGIN		imm				[i:	nw odf c7 f8 rel]			RTM
-XBEGIN		imm|near			[i:	nw odf c7 f8 rel]			RTM,SX,ND
-XBEGIN		imm16				[i:	   o16 c7 f8 rel]			RTM,NOLONG,SX
-XBEGIN		imm16|near			[i:	   o16 c7 f8 rel]			RTM,NOLONG,SX,ND
-XBEGIN		imm32				[i:	   o32 c7 f8 rel]			RTM,NOLONG,SX
-XBEGIN		imm32|near			[i:	   o32 c7 f8 rel]			RTM,NOLONG,SX,ND
-XBEGIN		imm64				[i:	 o64nw c7 f8 rel]			RTM,LONG,SX
-XBEGIN		imm64|near			[i:	 o64nw c7 f8 rel]			RTM,LONG,SX,ND
+$br XBEGIN	near				[i:	os c7 f8 rel]				RTM
 XEND		void				[	0f 01 d5]				RTM
 XTEST		void				[	0f 01 d6]				HLE,RTM

-
 PREFETCHWT1     mem8         [m:                               0f 0d /2 ]  PREFETCHWT1

 ;# Intel Memory Protection Extensions (MPX)
--- a/x86/insns.pl
+++ b/x86/insns.pl
@@ -487,7 +487,7 @@ if ( $output eq 'b') {
        print B " *";
        for ($j = 0; $j < 256; $j += 32) {
            print B " |" if ($j);
-            printf B " %3o:%4d", $i+$j, $bytecode_count[$i+$j];
+            printf B " %3o:%5d", $i+$j, $bytecode_count[$i+$j];
        }
        print B "\n";
    }
@@ -691,12 +691,14 @@ sub count_bytecodes(@) {
            $skip = 1;
        } elsif (($bc & ~013) == 0144) {
            $skip = 1;
-        } elsif ($bc == 0172 || $bc == 0173) {
+        } elsif ($bc >= 0171 && $bc <= 0173) {
            $skip = 1;
        } elsif (($bc & ~3) == 0260 || $bc == 0270) {   # VEX
            $skip = 2;
        } elsif (($bc & ~3) == 0240 || $bc == 0250) {   # EVEX
            $skip = 4;
+	} elsif (($bc & ~3) == 0304) {
+	    $skip = 2;
        } elsif ($bc == 0330) {
            $skip = 1;
        }
--- a/x86/preinsns.pl
+++ b/x86/preinsns.pl
@@ -152,7 +152,7 @@ sub func_multisize($$$) {
 	$ins = $o.$ins;
 	$o = '';

-	while ($ins =~ /^(.*?)((?:\b[0-9a-f]{2}(?:\+r)?|\bsbyte|\bimm|\bsel|\bopt\w?|\b[ioa]d?|\b(?:reg_)?[abcd]x|\bk?reg|\bk?rm|\bw)?\#{1,2}|\b(?:reg|rm)64\b|\b(?:o64)?nw\b|\b(?:NO)?LONG\w+\b|\%{1,2})(.*)$/) {
+	while ($ins =~ /^(.*?)((?:\b[0-9a-f]{2}(?:\+r)?|\bsbyte|\bimm|\bsel|\bopt\w?|\b[ioa]d?|\b(?:reg_)?[abcd]x|\bk?reg|\bk?rm|\bw|\bS\b)?\#{1,2}|\b(?:reg|rm)64\b|\b(?:o64)?nw\b|\b(?:NO)?LONG\w+\b|\%{1,2}|[ABCD]X\#)(.*)$/) {
 	    $o .= $1;
 	    my $mw = $2;
 	    $ins = $3;
@@ -178,15 +178,17 @@ sub func_multisize($$$) {
 		$o .= !$i ? 'iwd' : ($s >= 64) ? 'id,s' : "i$sn";
 	    } elsif ($mw eq 'i##') {
 		$o .= !$i ? 'iwdq' : "i$sn";
-	    } elsif ($mw =~ /^(?:reg_)?([abcd])x\#$/) {
+	    } elsif ($mw =~ /^(?:reg_)?([abcd])x\#$/i) {
+		my $rl = $1;
+		my $upr = ($rl =~ /^[A-Z]/);
 		if ($i == 1) {
-		    $o .= "reg_${1}l";
+		    $o .= $upr ? "${rl}L" : "reg_${rl}l";
 		} elsif ($i == 2) {
-		    $o .= "reg_${1}x";
+		    $o .= $upr ? "${rl}X" : "reg_${rl}x";
 		} elsif ($i == 3) {
-		    $o .= "reg_e${1}x";
+		    $o .= $upr ? "E${rl}X" : "reg_e${rl}x";
 		} elsif ($i == 4) {
-		    $o .= "reg_r${1}x";
+		    $o .= $upr ? "R${rl}X" : "reg_r${rl}x";
 		    $long |= 1;
 		} else {
 		    die "$0:$infile:$line: register cannot be used with z\n";
@@ -237,6 +239,8 @@ sub func_multisize($$$) {
 		}
 	    } elsif ($mw eq 'w##') {
 		$o .= 'w'.(($i-1) & 1);
+	    } elsif ($mw eq 'S#') {
+		$o .= 'S'
 	    } elsif ($mw eq '#') {
 		$o .= $s;
 	    } else {
@@ -260,6 +264,39 @@ sub func_multisize($$$) {
    return @ol;
 }

+# Near branch operand size patterns
+# This allows the "normal" size patterns to be used for
+# address size features, as used by JCXZ and LOOP.
+# This also allows the syntax "jmp dword foo" in 64-bit
+# mode, even though it is really bogus.
+$macros{'br'} = {
+    'func' =>
+	sub {
+	    my($mac, $args, $rawargs) = @_;
+	    my @ol;
+	    my $ins = join(' ', @$rawargs);
+
+	    foreach my $wx ([16,16], [32,32], [64,64], [64,32]) {
+		my($w,$iw,$sz) = @$wx;
+		my $i = $ins;
+		my $argn;
+		if ($i =~ /^(.*)\b(near|short)\b/) {
+		    my $what = $2;
+		    next if ($what eq 'short' && $iw != $w);
+		    (my $argn = $1) =~ s/[^,:]+//g;
+		    $argn = 'AR'.length($argn);
+		}
+		$i =~ s/\b(near|short)\b/imm$iw|$1/;
+		$i =~ s/\bos\b/nw o$w/;
+		$i .= ",$argn";
+		$i .= ($iw != $w) ? ',SX,ND' : ',OSIZE';
+		$i .= ($w == 64) ? ',LONG' : ',NOLONG';
+		push(@ol, $i);
+	    }
+	    return(@ol);
+    }
+};
+
 # Common pattern for K-register instructions
 $macros{'k'} = {
    'func' =>