mirror of
https://github.com/netwide-assembler/nasm.git
synced 2025-10-10 00:25:06 -04:00
Reshuffle shift and rotate patterns for APX
The shift and rotate patterns are "interesting" in the following way: 1. Even though only 4/5/6 bits of the input are ever used, for the regular instructions the input is specified as the CL register, but for the -X instructions as a size-matching register. This makes the optimization patterns "interesting." 2. The sequencing of legacy, VEX -X versions, APX EVEX, and APX -X For #1, allow any size register to contain the shift count. For #2, split up the macro generation of the patterns, and add a new "$xmacro" macro to deal with the combinatorics of generating all the -X patterns. Written directly in Perl since it seemed easier than trying to make anything more general for what is very much a special case... Reported-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com> Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
@@ -80,19 +80,12 @@ $bwdq TEST rm#,imm# [mi: o# f6# /0 i# ] 8086,SM
|
|||||||
;# The basic shift and rotate operations
|
;# The basic shift and rotate operations
|
||||||
$shift ROL ROR RCL RCR SHL,SAL SHR - SAR
|
$shift ROL ROR RCL RCR SHL,SAL SHR - SAR
|
||||||
|
|
||||||
$dq RORX reg#,rm#*,imm8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib] BMI2,SM0-1
|
; RORX, SHLX, SARX
|
||||||
$dq ROLX reg#,rm#*,imm_known8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib^(d:1f/3f)] BMI2,SM0-1
|
$xshift evex=0
|
||||||
$dq SHLX reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,SM
|
|
||||||
$dq SALX reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,SM,ND
|
|
||||||
$dq SARX reg#,rm#*,reg# [rmv: vex+.lz.f3.0f38.w# f7 /r] BMI2,SM
|
|
||||||
$dq SHRX reg#,rm#*,reg# [rmv: vex+.lz.f2.0f38.w# f7 /r] BMI2,SM
|
|
||||||
|
|
||||||
$dq ROR reg#,rm#,imm8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib] BMI2,SM0-1,ND,NF!,OPT
|
;# APX EVEX versions
|
||||||
$dq ROL reg#,rm#*,imm_known8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib^(d:1f/3f)] BMI2,SM0-1,ND,NF!,OPT
|
$eshift ROL ROR RCL RCR SHL,SAL SHR - SAR
|
||||||
$dq SHL reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,ND,NF!,OPT
|
$xshift evex=1
|
||||||
$dq SAL reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,ND,NF!,OPT
|
|
||||||
$dq SAR reg#,rm#*,reg# [rmv: vex+.lz.f3.0f38.w# f7 /r] BMI2,ND,NF!,OPT
|
|
||||||
$dq SHR reg#,rm#*,reg# [rmv: vex+.lz.f2.0f38.w# f7 /r] BMI2,ND,NF!,OPT
|
|
||||||
|
|
||||||
;# Other basic integer arithmetic
|
;# Other basic integer arithmetic
|
||||||
$wd INC reg# [r: o# 40+r] 8086,NOLONG
|
$wd INC reg# [r: o# 40+r] 8086,NOLONG
|
||||||
|
@@ -35,18 +35,64 @@ EOL
|
|||||||
};
|
};
|
||||||
|
|
||||||
# Common pattern for the basic shift and rotate instructions
|
# Common pattern for the basic shift and rotate instructions
|
||||||
|
# Separate legacy and EVEX versions because additional patterns are
|
||||||
|
# needed to handle the -X VEX versions
|
||||||
$macros{'shift'} = {
|
$macros{'shift'} = {
|
||||||
'def' => *def_eightfold,
|
'def' => *def_eightfold,
|
||||||
'txt' => <<'EOL'
|
'txt' => <<'EOL'
|
||||||
$$bwdq $op rm#,unity [m-: o# d0# /$n] ] 8086,FL
|
$$bwdq $op rm#,unity [m-: o# d0# /$n] ] 8086,FL
|
||||||
$$bwdq $op rm#,reg_cl [m-: o# d2# /$n] ] 8086,FL
|
$$bwdq $op rm#,reg_cl [m-: o# d2# /$n] ] 8086,FL
|
||||||
|
$$bwdq $op rm#,reg_cx [m-: o# d2# /$n] ] 8086,FL,ND
|
||||||
|
$$bwdq $op rm#,reg_ecx [m-: o# d2# /$n] ] 8086,FL,ND
|
||||||
|
$$bwdq $op rm#,reg_rcx [m-: o# d2# /$n] ] 8086,FL,ND
|
||||||
$$bwdq $op rm#,imm8 [mi: o# c0# /$n ib,u] ] 186,FL
|
$$bwdq $op rm#,imm8 [mi: o# c0# /$n ib,u] ] 186,FL
|
||||||
|
EOL
|
||||||
|
};
|
||||||
|
|
||||||
|
# APX EVEX versions
|
||||||
|
$macros{'eshift'} = {
|
||||||
|
'def' => *def_eightfold,
|
||||||
|
'txt' => <<'EOL'
|
||||||
$$bwdq $op reg#?,rm#,unity [vm-: evex.ndx.nf.l0.m4.o# d0# /$n ] $apx,FL,SM0-1
|
$$bwdq $op reg#?,rm#,unity [vm-: evex.ndx.nf.l0.m4.o# d0# /$n ] $apx,FL,SM0-1
|
||||||
$$bwdq $op reg#?,rm#,reg_cl [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1
|
$$bwdq $op reg#?,rm#,reg_cl [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1
|
||||||
|
$$bwdq $op reg#?,rm#,reg_cx [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1,ND
|
||||||
|
$$bwdq $op reg#?,rm#,reg_ecx [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1,ND
|
||||||
|
$$bwdq $op reg#?,rm#,reg_rcx [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1,ND
|
||||||
$$bwdq $op reg#?,rm#,imm8 [vmi: evex.ndx.nf.l0.m4.o# c0# /$n ib,u ] $apx,FL,SM0-1
|
$$bwdq $op reg#?,rm#,imm8 [vmi: evex.ndx.nf.l0.m4.o# c0# /$n ib,u ] $apx,FL,SM0-1
|
||||||
EOL
|
EOL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# -X shifts
|
||||||
|
$macros{'xshift'} = {
|
||||||
|
'func' => sub {
|
||||||
|
my($mac, $args, $rawargs) = @_;
|
||||||
|
my @ol;
|
||||||
|
my $vex = 'vex';
|
||||||
|
my $vfl = '';
|
||||||
|
if (grep { /^evex=1$/ } @$rawargs) {
|
||||||
|
$vex = 'evex';
|
||||||
|
$vfl = 'APX';
|
||||||
|
}
|
||||||
|
foreach my $xf (['X',"$vfl"], ['', "$vfl,ND,NF!,OPT"]) {
|
||||||
|
my($x,$fl) = @$xf;
|
||||||
|
foreach my $os (32, 64) {
|
||||||
|
my $w = ($os eq 32) ? 'w0' : 'w1';
|
||||||
|
my $ixor = sprintf('%02x', $os-1);
|
||||||
|
push(@ol, "ROR$x reg$os,rm$os,imm8 [rmi: $vex.lz.f2.0f3a.$w f0 /r ib] BMI2,SM0-1,!FL,$fl");
|
||||||
|
push(@ol, "ROL$x reg$os,rm$os,imm_known8 [rmi: $vex.lz.f2.0f3a.$w f0 /r ib^$ixor] BMI2,SM0-1,!FL,$fl");
|
||||||
|
foreach my $ss (8, 16, 32, 64) {
|
||||||
|
foreach my $opp (['SHL','66'], ['SAL','66'], ['SAR','f3'], ['SHR','f2']) {
|
||||||
|
my($op,$pp) = @$opp;
|
||||||
|
my $ndss = ',ND' unless ($ss == $os && $op ne 'SAR');
|
||||||
|
push(@ol, "$op$x reg$os,rm${os}*,reg$ss [rmv: $vex.lz.$pp.0f38.$w f7 /r] BMI2,SM0-1,!FL,$fl,$ndss");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return @ol;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#
|
#
|
||||||
# Common pattern for multiple 32/64, 16/32/64, or 8/16/32/64 instructions.
|
# Common pattern for multiple 32/64, 16/32/64, or 8/16/32/64 instructions.
|
||||||
# 'z' is used for a null-prefixed default-sized instruction (osm/osd)
|
# 'z' is used for a null-prefixed default-sized instruction (osm/osd)
|
||||||
@@ -433,7 +479,8 @@ $line = 0;
|
|||||||
## XXX: check: CMPSS, CMPSD
|
## XXX: check: CMPSS, CMPSD
|
||||||
## XXX: check VEX encoded instructions that do not write
|
## XXX: check VEX encoded instructions that do not write
|
||||||
|
|
||||||
# Instructions which (possibly) change the flags
|
# Instructions which (possibly) change the flags without annotations
|
||||||
|
# The FL or !FL flags will override this
|
||||||
my $flaggy = '^(aa[adms]|ad[dc]|ad[co]x|aes\w*kl|and|andn|arpl|bextr|bl[sc]ic?|bl[sc]msk|bl[sc]r|\
|
my $flaggy = '^(aa[adms]|ad[dc]|ad[co]x|aes\w*kl|and|andn|arpl|bextr|bl[sc]ic?|bl[sc]msk|bl[sc]r|\
|
||||||
bs[rf]|bt|bt[crs]|bzhi|clac|clc|cld|cli|clrssbsy|cmc|cmp|cmpxchg.*|da[as]|dec|div|\
|
bs[rf]|bt|bt[crs]|bzhi|clac|clc|cld|cli|clrssbsy|cmc|cmp|cmpxchg.*|da[as]|dec|div|\
|
||||||
encodekey.*|enqcmd.*|fu?comip?|idiv|imul|inc|iret.*|kortest.*|ktest.*|lar|loadiwkey|\
|
encodekey.*|enqcmd.*|fu?comip?|idiv|imul|inc|iret.*|kortest.*|ktest.*|lar|loadiwkey|\
|
||||||
|
Reference in New Issue
Block a user