Reshuffle shift and rotate patterns for APX

The shift and rotate patterns are "interesting" in the following way: 1. Even though only 4/5/6 bits of the input are ever used, for the regular instructions the input is specified as the CL register, but for the -X instructions as a size-matching register. This makes the optimization patterns "interesting." 2. The sequencing of legacy, VEX -X versions, APX EVEX, and APX -X For #1, allow any size register to contain the shift count. For #2, split up the macro generation of the patterns, and add a new "$xmacro" macro to deal with the combinatorics of generating all the -X patterns. Written directly in Perl since it seemed easier than trying to make anything more general for what is very much a special case... Reported-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com> Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
2025-10-10 00:25:06 -04:00 · 2025-10-07 16:22:32 -07:00
parent e39b856bde
commit e3f26e25a2
2 changed files with 53 additions and 13 deletions
--- a/x86/insns.dat
+++ b/x86/insns.dat
@@ -80,19 +80,12 @@ $bwdq TEST	rm#,imm#			[mi:	o# f6# /0 i#		]		8086,SM
 ;# The basic shift and rotate operations
 $shift		ROL ROR RCL RCR SHL,SAL SHR - SAR
-$dq   RORX	reg#,rm#*,imm8			[rmi:	vex+.lz.f2.0f3a.w# f0 /r ib]		BMI2,SM0-1
+; RORX, SHLX, SARX
-$dq   ROLX	reg#,rm#*,imm_known8		[rmi:	vex+.lz.f2.0f3a.w# f0 /r ib^(d:1f/3f)]	BMI2,SM0-1
+$xshift evex=0
 $dq   SHLX	reg#,rm#*,reg#			[rmv:	vex+.lz.66.0f38.w# f7 /r]		BMI2,SM
 $dq   SALX	reg#,rm#*,reg#			[rmv:	vex+.lz.66.0f38.w# f7 /r]		BMI2,SM,ND
 $dq   SARX	reg#,rm#*,reg#			[rmv:	vex+.lz.f3.0f38.w# f7 /r]		BMI2,SM
 $dq   SHRX	reg#,rm#*,reg#			[rmv:	vex+.lz.f2.0f38.w# f7 /r]		BMI2,SM
-$dq   ROR	reg#,rm#,imm8			[rmi:	vex+.lz.f2.0f3a.w# f0 /r ib]		BMI2,SM0-1,ND,NF!,OPT
+;# APX EVEX versions
-$dq   ROL	reg#,rm#*,imm_known8		[rmi:	vex+.lz.f2.0f3a.w# f0 /r ib^(d:1f/3f)]	BMI2,SM0-1,ND,NF!,OPT
+$eshift		ROL ROR RCL RCR SHL,SAL SHR - SAR
-$dq   SHL	reg#,rm#*,reg#			[rmv:	vex+.lz.66.0f38.w# f7 /r]		BMI2,ND,NF!,OPT
+$xshift evex=1
 $dq   SAL	reg#,rm#*,reg#			[rmv:	vex+.lz.66.0f38.w# f7 /r]		BMI2,ND,NF!,OPT
 $dq   SAR	reg#,rm#*,reg#			[rmv:	vex+.lz.f3.0f38.w# f7 /r]		BMI2,ND,NF!,OPT
 $dq   SHR	reg#,rm#*,reg#			[rmv:	vex+.lz.f2.0f38.w# f7 /r]		BMI2,ND,NF!,OPT
 ;# Other basic integer arithmetic
 $wd   INC	reg#				[r:	o# 40+r]				8086,NOLONG
--- a/x86/preinsns.pl
+++ b/x86/preinsns.pl
@@ -35,18 +35,64 @@ EOL
 };
 # Common pattern for the basic shift and rotate instructions
 # Separate legacy and EVEX versions because additional patterns are
 # needed to handle the -X VEX versions
 $macros{'shift'} = {
    'def' => *def_eightfold,
 	'txt' => <<'EOL'
 $$bwdq $op	rm#,unity			[m-:	o# d0# /$n]				]	8086,FL
 $$bwdq $op	rm#,reg_cl			[m-:	o# d2# /$n]				]	8086,FL
 $$bwdq $op	rm#,reg_cx			[m-:	o# d2# /$n]				]	8086,FL,ND
 $$bwdq $op	rm#,reg_ecx			[m-:	o# d2# /$n]				]	8086,FL,ND
 $$bwdq $op	rm#,reg_rcx			[m-:	o# d2# /$n]				]	8086,FL,ND
 $$bwdq $op	rm#,imm8			[mi:	o# c0# /$n ib,u]			]	186,FL
 EOL
 };
 # APX EVEX versions
 $macros{'eshift'} = {
    'def' => *def_eightfold,
 	'txt' => <<'EOL'
 $$bwdq $op	reg#?,rm#,unity			[vm-:	evex.ndx.nf.l0.m4.o#  d0# /$n		]	$apx,FL,SM0-1
 $$bwdq $op	reg#?,rm#,reg_cl		[vm-:	evex.ndx.nf.l0.m4.o#  d2# /$n		]	$apx,FL,SM0-1
 $$bwdq $op	reg#?,rm#,reg_cx		[vm-:	evex.ndx.nf.l0.m4.o#  d2# /$n		]	$apx,FL,SM0-1,ND
 $$bwdq $op	reg#?,rm#,reg_ecx		[vm-:	evex.ndx.nf.l0.m4.o#  d2# /$n		]	$apx,FL,SM0-1,ND
 $$bwdq $op	reg#?,rm#,reg_rcx		[vm-:	evex.ndx.nf.l0.m4.o#  d2# /$n		]	$apx,FL,SM0-1,ND
 $$bwdq $op	reg#?,rm#,imm8			[vmi:	evex.ndx.nf.l0.m4.o#  c0# /$n ib,u	]	$apx,FL,SM0-1
 EOL
 };
 # -X shifts
 $macros{'xshift'} = {
    'func' => sub {
 	my($mac, $args, $rawargs) = @_;
 	my @ol;
 	my $vex = 'vex';
 	my $vfl = '';
 	if (grep { /^evex=1$/ } @$rawargs) {
 	    $vex = 'evex';
 	    $vfl = 'APX';
 	}
 	foreach my $xf (['X',"$vfl"], ['', "$vfl,ND,NF!,OPT"]) {
 	    my($x,$fl) = @$xf;
 	    foreach my $os (32, 64) {
 		my $w = ($os eq 32) ? 'w0' : 'w1';
 		my $ixor = sprintf('%02x', $os-1);
 		push(@ol, "ROR$x reg$os,rm$os,imm8       [rmi: $vex.lz.f2.0f3a.$w f0 /r ib] BMI2,SM0-1,!FL,$fl");
 		push(@ol, "ROL$x reg$os,rm$os,imm_known8 [rmi: $vex.lz.f2.0f3a.$w f0 /r ib^$ixor] BMI2,SM0-1,!FL,$fl");
 		foreach my $ss (8, 16, 32, 64) {
 		    foreach my $opp (['SHL','66'], ['SAL','66'], ['SAR','f3'], ['SHR','f2']) {
 			my($op,$pp) = @$opp;
 			my $ndss = ',ND' unless ($ss == $os && $op ne 'SAR');
 			push(@ol, "$op$x reg$os,rm${os}*,reg$ss [rmv: $vex.lz.$pp.0f38.$w f7 /r] BMI2,SM0-1,!FL,$fl,$ndss");
 		    }
 		}
 	    }
 	}
 	return @ol;
    }
 };
 #
 # Common pattern for multiple 32/64, 16/32/64, or 8/16/32/64 instructions.
 # 'z' is used for a null-prefixed default-sized instruction (osm/osd)
@@ -433,7 +479,8 @@ $line = 0;
 ## XXX: check: CMPSS, CMPSD
 ## XXX: check VEX encoded instructions that do not write
-# Instructions which (possibly) change the flags
+# Instructions which (possibly) change the flags without annotations
 # The FL or !FL flags will override this
 my $flaggy = '^(aa[adms]|ad[dc]|ad[co]x|aes\w*kl|and|andn|arpl|bextr|bl[sc]ic?|bl[sc]msk|bl[sc]r|\
 bs[rf]|bt|bt[crs]|bzhi|clac|clc|cld|cli|clrssbsy|cmc|cmp|cmpxchg.*|da[as]|dec|div|\
 encodekey.*|enqcmd.*|fu?comip?|idiv|imul|inc|iret.*|kortest.*|ktest.*|lar|loadiwkey|\