0
0
mirror of https://github.com/netwide-assembler/nasm.git synced 2025-10-10 00:25:06 -04:00

Reshuffle shift and rotate patterns for APX

The shift and rotate patterns are "interesting" in the following way:

1. Even though only 4/5/6 bits of the input are ever used, for the
   regular instructions the input is specified as the CL register, but
   for the -X instructions as a size-matching register. This makes the
   optimization patterns "interesting."

2. The sequencing of legacy, VEX -X versions, APX EVEX, and APX -X

For #1, allow any size register to contain the shift count.

For #2, split up the macro generation of the patterns, and add a new
"$xmacro" macro to deal with the combinatorics of generating all the
-X patterns.  Written directly in Perl since it seemed easier than
trying to make anything more general for what is very much a special
case...

Reported-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin (Intel)
2025-10-07 16:22:32 -07:00
parent e39b856bde
commit e3f26e25a2
2 changed files with 53 additions and 13 deletions

View File

@@ -80,19 +80,12 @@ $bwdq TEST rm#,imm# [mi: o# f6# /0 i# ] 8086,SM
;# The basic shift and rotate operations
$shift ROL ROR RCL RCR SHL,SAL SHR - SAR
$dq RORX reg#,rm#*,imm8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib] BMI2,SM0-1
$dq ROLX reg#,rm#*,imm_known8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib^(d:1f/3f)] BMI2,SM0-1
$dq SHLX reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,SM
$dq SALX reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,SM,ND
$dq SARX reg#,rm#*,reg# [rmv: vex+.lz.f3.0f38.w# f7 /r] BMI2,SM
$dq SHRX reg#,rm#*,reg# [rmv: vex+.lz.f2.0f38.w# f7 /r] BMI2,SM
; RORX, SHLX, SARX
$xshift evex=0
$dq ROR reg#,rm#,imm8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib] BMI2,SM0-1,ND,NF!,OPT
$dq ROL reg#,rm#*,imm_known8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib^(d:1f/3f)] BMI2,SM0-1,ND,NF!,OPT
$dq SHL reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,ND,NF!,OPT
$dq SAL reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,ND,NF!,OPT
$dq SAR reg#,rm#*,reg# [rmv: vex+.lz.f3.0f38.w# f7 /r] BMI2,ND,NF!,OPT
$dq SHR reg#,rm#*,reg# [rmv: vex+.lz.f2.0f38.w# f7 /r] BMI2,ND,NF!,OPT
;# APX EVEX versions
$eshift ROL ROR RCL RCR SHL,SAL SHR - SAR
$xshift evex=1
;# Other basic integer arithmetic
$wd INC reg# [r: o# 40+r] 8086,NOLONG

View File

@@ -35,18 +35,64 @@ EOL
};
# Common pattern for the basic shift and rotate instructions
# Separate legacy and EVEX versions because additional patterns are
# needed to handle the -X VEX versions
$macros{'shift'} = {
'def' => *def_eightfold,
'txt' => <<'EOL'
$$bwdq $op rm#,unity [m-: o# d0# /$n] ] 8086,FL
$$bwdq $op rm#,reg_cl [m-: o# d2# /$n] ] 8086,FL
$$bwdq $op rm#,reg_cx [m-: o# d2# /$n] ] 8086,FL,ND
$$bwdq $op rm#,reg_ecx [m-: o# d2# /$n] ] 8086,FL,ND
$$bwdq $op rm#,reg_rcx [m-: o# d2# /$n] ] 8086,FL,ND
$$bwdq $op rm#,imm8 [mi: o# c0# /$n ib,u] ] 186,FL
EOL
};
# APX EVEX versions
$macros{'eshift'} = {
'def' => *def_eightfold,
'txt' => <<'EOL'
$$bwdq $op reg#?,rm#,unity [vm-: evex.ndx.nf.l0.m4.o# d0# /$n ] $apx,FL,SM0-1
$$bwdq $op reg#?,rm#,reg_cl [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1
$$bwdq $op reg#?,rm#,reg_cx [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1,ND
$$bwdq $op reg#?,rm#,reg_ecx [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1,ND
$$bwdq $op reg#?,rm#,reg_rcx [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1,ND
$$bwdq $op reg#?,rm#,imm8 [vmi: evex.ndx.nf.l0.m4.o# c0# /$n ib,u ] $apx,FL,SM0-1
EOL
};
# -X shifts
$macros{'xshift'} = {
'func' => sub {
my($mac, $args, $rawargs) = @_;
my @ol;
my $vex = 'vex';
my $vfl = '';
if (grep { /^evex=1$/ } @$rawargs) {
$vex = 'evex';
$vfl = 'APX';
}
foreach my $xf (['X',"$vfl"], ['', "$vfl,ND,NF!,OPT"]) {
my($x,$fl) = @$xf;
foreach my $os (32, 64) {
my $w = ($os eq 32) ? 'w0' : 'w1';
my $ixor = sprintf('%02x', $os-1);
push(@ol, "ROR$x reg$os,rm$os,imm8 [rmi: $vex.lz.f2.0f3a.$w f0 /r ib] BMI2,SM0-1,!FL,$fl");
push(@ol, "ROL$x reg$os,rm$os,imm_known8 [rmi: $vex.lz.f2.0f3a.$w f0 /r ib^$ixor] BMI2,SM0-1,!FL,$fl");
foreach my $ss (8, 16, 32, 64) {
foreach my $opp (['SHL','66'], ['SAL','66'], ['SAR','f3'], ['SHR','f2']) {
my($op,$pp) = @$opp;
my $ndss = ',ND' unless ($ss == $os && $op ne 'SAR');
push(@ol, "$op$x reg$os,rm${os}*,reg$ss [rmv: $vex.lz.$pp.0f38.$w f7 /r] BMI2,SM0-1,!FL,$fl,$ndss");
}
}
}
}
return @ol;
}
};
#
# Common pattern for multiple 32/64, 16/32/64, or 8/16/32/64 instructions.
# 'z' is used for a null-prefixed default-sized instruction (osm/osd)
@@ -433,7 +479,8 @@ $line = 0;
## XXX: check: CMPSS, CMPSD
## XXX: check VEX encoded instructions that do not write
# Instructions which (possibly) change the flags
# Instructions which (possibly) change the flags without annotations
# The FL or !FL flags will override this
my $flaggy = '^(aa[adms]|ad[dc]|ad[co]x|aes\w*kl|and|andn|arpl|bextr|bl[sc]ic?|bl[sc]msk|bl[sc]r|\
bs[rf]|bt|bt[crs]|bzhi|clac|clc|cld|cli|clrssbsy|cmc|cmp|cmpxchg.*|da[as]|dec|div|\
encodekey.*|enqcmd.*|fu?comip?|idiv|imul|inc|iret.*|kortest.*|ktest.*|lar|loadiwkey|\