Infrastructure support for AMD's new XOP prefix

Handle AMD's XOP prefixes; they use basically the same encoding as VEX prefixes, so treat them simply as a variant of VEX. Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2025-10-10 00:25:06 -04:00 · 2009-05-03 21:42:34 -07:00
parent 34c768fcc5
commit a04019c7f4
5 changed files with 76 additions and 37 deletions
--- a/assemble.c
+++ b/assemble.c
@@ -56,19 +56,20 @@
 *                 is not equal to the truncated and sign-extended 32-bit
 *                 operand; used for 32-bit immediates in 64-bit mode.
 * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
- * \260..\263    - this instruction uses VEX rather than REX, with the
+ * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
 *		   V field taken from operand 0..3.
- * \270		 - this instruction uses VEX rather than REX, with the
+ * \270		 - this instruction uses VEX/XOP rather than REX, with the
 *		   V field set to 1111b.
 *
- * VEX prefixes are followed by the sequence:
- * \mm\wlp         where mm is the M field; and wlp is:
+ * VEX/XOP prefixes are followed by the sequence:
+ * \tmm\wlp        where mm is the M field; and wlp is:
 *                 00 0ww lpp
 *                 [w0] ww = 0 for W = 0
 *                 [w1] ww = 1 for W = 1
 *                 [wx] ww = 2 for W don't care (always assembled as 0)
 *                 [ww] ww = 3 for W used as REX.W
 *
+ * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 *
 * \274..\277    - a signed byte immediate operand, from operand 0..3,
 *                 which is to be extended to the operand size.
@@ -936,14 +937,14 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 	case4(0260):
 	    ins->rex |= REX_V;
 	    ins->drexdst = regval(opx);
-	    ins->vex_m = *codes++;
+	    ins->vex_cm = *codes++;
 	    ins->vex_wlp = *codes++;
 	    break;

 	case 0270:
 	    ins->rex |= REX_V;
 	    ins->drexdst = 0;
-	    ins->vex_m = *codes++;
+	    ins->vex_cm = *codes++;
 	    ins->vex_wlp = *codes++;
 	    break;

@@ -1141,7 +1142,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 	    errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
 	    return -1;
 	}
-	if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
+	if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
 	    length += 3;
 	else
 	    length += 2;
@@ -1536,9 +1537,9 @@ static void gencode(int32_t segment, int64_t offset, int bits,
 	case4(0260):
 	case 0270:
 	    codes += 2;
-	    if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
-		bytes[0] = 0xc4;
-		bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
+	    if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
+		bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
+		bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
 		bytes[2] = ((ins->rex & REX_W) << (7-3)) |
 		    ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
 		out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
--- a/disasm.c
+++ b/disasm.c
@@ -49,6 +49,7 @@ struct prefix_info {
    uint8_t wait;		/* WAIT "prefix" present */
    uint8_t lock;		/* Lock prefix present */
    uint8_t vex[3];		/* VEX prefix present */
+    uint8_t vex_c;		/* VEX "class" (VEX, XOP, ...) */
    uint8_t vex_m;		/* VEX.M field */
    uint8_t vex_v;
    uint8_t vex_lp;		/* VEX.LP fields */
@@ -1049,6 +1050,7 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
 		prefix.vex[1] = *data++;

 		prefix.rex = REX_V;
+		prefix.vex_c = 0;

 		if (prefix.vex[0] == 0xc4) {
 		    prefix.vex[2] = *data++;
@@ -1064,7 +1066,28 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
 		    prefix.vex_lp = prefix.vex[1] & 7;
 		}

-		ix = itable_VEX[prefix.vex_m][prefix.vex_lp];
+		ix = itable_VEX[0][prefix.vex_m][prefix.vex_lp];
+	    }
+	    end_prefix = true;
+	    break;
+
+	case 0x8F:
+	    if ((data[1] & 030) != 0 &&
+		(segsize == 64 || (data[1] & 0xc0) == 0xc0)) {
+		prefix.vex[0] = *data++;
+		prefix.vex[1] = *data++;
+		prefix.vex[2] = *data++;
+
+		prefix.rex = REX_V;
+		prefix.vex_c = 1;
+
+		prefix.rex |= (~prefix.vex[1] >> 5) & 7; /* REX_RXB */
+		prefix.rex |= (prefix.vex[2] >> (7-3)) & REX_W;
+		prefix.vex_m = prefix.vex[1] & 0x1f;
+		prefix.vex_v = (~prefix.vex[2] >> 3) & 15;
+		prefix.vex_lp = prefix.vex[2] & 7;
+
+		ix = itable_VEX[1][prefix.vex_m][prefix.vex_lp];
 	    }
 	    end_prefix = true;
 	    break;
--- a/insns.h
+++ b/insns.h
@@ -32,7 +32,7 @@ struct disasm_index {
 /* Tables for the assembler and disassembler, respectively */
 extern const struct itemplate * const nasm_instructions[];
 extern const struct disasm_index itable[256];
-extern const struct disasm_index * const itable_VEX[32][8];
+extern const struct disasm_index * const itable_VEX[2][32][8];

 /* Common table for the byte codes */
 extern const uint8_t nasm_bytecodes[];
--- a/insns.pl
+++ b/insns.pl
@@ -14,11 +14,17 @@
 # This should match MAX_OPERANDS from nasm.h
 $MAX_OPERANDS = 5;

-# Add VEX prefixes
+# Add VEX/XOP prefixes
+@vex_class = ( 'VEX', 'XOP' );
+$vex_classes = scalar(@vex_class);
@vexlist = ();
-for ($m = 0; $m < 32; $m++) {
-    for ($lp = 0; $lp < 8; $lp++) {
-	push(@vexlist, sprintf("VEX%02X%01X", $m, $lp));
+%vexmap = ();
+for ($c = 0; $c < $vex_classes; $c++) {
+    $vexmap{"\L$vex_class[$c]"} = $c;
+    for ($m = 0; $m < 32; $m++) {
+	for ($lp = 0; $lp < 8; $lp++) {
+	    push(@vexlist, sprintf("%s%02X%01X", $vex_class[$c], $m, $lp));
+	}
    }
 }
@disasm_prefixes = (@vexlist, @disasm_prefixes);
@@ -243,20 +249,26 @@ if ( !defined($output) || $output eq 'd' ) {
 	print D "};\n";
    }

-    print D "\nconst struct disasm_index * const itable_VEX[32][8] = {\n   ";
-    for ($m = 0; $m < 32; $m++) {
-	print D " {\n";
-	for ($lp = 0; $lp < 8; $lp++) {
-	    $vp = sprintf("VEX%02X%01X", $m, $lp);
-	    if ($is_prefix{$vp}) {
-		printf D "        itable_%s,\n", $vp;
-	    } else {
-		print  D "        NULL,\n";
+    printf D "\nconst struct disasm_index * const itable_VEX[%d][32][8] =\n",
+        $vex_classes;
+    print D "{\n";
+    for ($c = 0; $c < $vex_classes; $c++) {
+	print D "    {\n";
+	for ($m = 0; $m < 32; $m++) {
+	    print D "        {\n";
+	    for ($lp = 0; $lp < 8; $lp++) {
+		$vp = sprintf("%s%02X%01X", $vex_class[$c], $m, $lp);
+		if ($is_prefix{$vp}) {
+		    printf D "            itable_%s,\n", $vp;
+		} else {
+		    print  D "            NULL,\n";
+		}
 	    }
+	    print D "        },\n";
 	}
-	print D "    },";
+	print D "    },\n";
    }
-    print D "\n};\n";
+    print D "};\n";

    close D;
 }
@@ -521,10 +533,12 @@ sub startseq($) {
      } elsif ($c0 == 0347) {
 	  return addprefix($prefix, 0xA1, 0xA9);
      } elsif (($c0 & ~3) == 0260 || $c0 == 0270) {
-	  my $m,$wlp,$vxp;
+	  my $c,$m,$wlp;
 	  $m   = shift(@codes);
 	  $wlp = shift(@codes);
-	  $prefix .= sprintf('VEX%02X%01X', $m, $wlp & 7);
+	  $c = ($m >> 6);
+	  $m = $m & 31;
+	  $prefix .= sprintf('%s%02X%01X', $vex_class[$c], $m, $wlp & 7);
      } elsif ($c0 >= 0172 && $c0 <= 174) {
 	  shift(@codes);	# Skip is4 control byte
      } else {
@@ -644,13 +658,14 @@ sub byte_code_compile($) {
 	    push(@codes, 06) if ($oppos{'m'} & 4);
 	    push(@codes, 0200 + (($oppos{'m'} & 3) << 3) + $1);
 	    $prefix_ok = 0;
-	} elsif ($op =~ /^vex(|\..*)$/) {
+	} elsif ($op =~ /^(vex|xop)(|\..*)$/) {
+	    my $c = $vexmap{$1};
 	    my ($m,$w,$l,$p) = (undef,2,undef,0);
 	    my $has_nds = 0;
-	    foreach $oq (split(/\./, $op)) {
-		if ($oq eq 'vex') {
-		    # prefix
-		} elsif ($oq eq '128' || $oq eq 'l0') {
+	    my @subops = split(/\./, $op);
+	    shift @subops;	# Drop prefix
+	    foreach $oq (@subops) {
+		if ($oq eq '128' || $oq eq 'l0') {
 		    $l = 0;
 		} elsif ($oq eq '256' || $oq eq 'l1') {
 		    $l = 1;
@@ -692,7 +707,7 @@ sub byte_code_compile($) {
 		die "$fname: $line: 'v' operand without vex.nds or vex.ndd\n";
 	    }
 	    push(@codes, defined($oppos{'v'}) ? 0260+($oppos{'v'} & 3) : 0270,
-		 $m, ($w << 3)+($l << 2)+$p);
+		 ($c << 6)+$m, ($w << 3)+($l << 2)+$p);
 	    $prefix_ok = 0;
 	} elsif ($op =~ /^\/drex([01])$/) {
 	    my $oc0 = $1;
--- a/nasm.h
+++ b/nasm.h
@@ -599,7 +599,7 @@ enum ccode {			/* condition code names */
 #define REX_H		0x80	/* High register present, REX forbidden */
 #define REX_D		0x0100	/* Instruction uses DREX instead of REX */
 #define REX_OC		0x0200	/* DREX suffix has the OC0 bit set */
-#define REX_V		0x0400	/* Instruction uses VEX instead of REX */
+#define REX_V		0x0400	/* Instruction uses VEX/XOP instead of REX */

 /*
 * Note that because segment registers may be used as instruction
@@ -702,7 +702,7 @@ typedef struct insn {		/* an instruction itself */
    bool forw_ref;              /* is there a forward reference? */
    int rex;			/* Special REX Prefix */
    int drexdst;		/* Destination register for DREX/VEX suffix */
-    int vex_m;			/* M register for VEX prefix */
+    int vex_cm;			/* Class and M field for VEX prefix */
    int vex_wlp;		/* W, P and L information for VEX prefix */
 } insn;