diff --git a/assemble.c b/assemble.c index 60fe369e..0c3f5f1b 100644 --- a/assemble.c +++ b/assemble.c @@ -48,11 +48,25 @@ * kindly to a zero byte in the _middle_ of a compile time * string constant, so I had to put this hack in.) * \171 - placement of DREX suffix in the absence of an EA + * \172\ab - the register number from operand a in bits 7..4, with + * the 4-bit immediate from operand b in bits 0..3. * \2ab - a ModRM, calculated on EA in operand a, with the spare * field equal to digit b. * \250..\253 - same as \150..\153, except warn if the 64-bit operand * is not equal to the truncated and sign-extended 32-bit * operand; used for 32-bit immediates in 64-bit mode. + * \260..\263 - this instruction uses VEX rather than REX, with the + * V field taken from operand 0..3. + * \270 - this instruction uses VEX rather than REX, with the + * V field set to 1111b. + * + * VEX prefixes are followed by the sequence: + * \1mm\1wp where mm is the M field; and wp is: + * 01 0ww lpp + * ww = 0 for W = 0 + * ww = 1 for W = 1 + * ww = 2 for W used as REX.W + * * \310 - indicates fixed 16-bit address size, i.e. optional 0x67. * \311 - indicates fixed 32-bit address size, i.e. optional 0x67. * \312 - (disassembler only) marker on LOOP, LOOPxx instructions. @@ -190,7 +204,7 @@ static void out(int64_t offset, int32_t segto, const void *data, errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8"); return; } - + WRITEADDR(q, *(int64_t *)data, size); data = p; type = OUT_RAWDATA; @@ -964,7 +978,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, case 0163: length++; ins->rex |= REX_D; - ins->drexdst = regval(&ins->oprs[c & 3]); + ins->drexdst = regval(opx); break; case 0164: case 0165: @@ -972,19 +986,40 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, case 0167: length++; ins->rex |= REX_D|REX_OC; - ins->drexdst = regval(&ins->oprs[c & 3]); + ins->drexdst = regval(opx); break; case 0170: length++; break; case 0171: break; + case 0172: + codes++; + length++; + break; case 0250: case 0251: case 0252: case 0253: length += is_sbyte64(ins, c & 3) ? 1 : 4; break; + case 0260: + case 0261: + case 0262: + case 0263: + length += 2; + ins->rex |= REX_V; + ins->drexdst = regval(opx); + ins->vex_m = *codes++; + ins->vex_wlp = *codes++; + break; + case 0270: + length += 2; + ins->rex |= REX_V; + ins->drexdst = 0; + ins->vex_m = *codes++; + ins->vex_wlp = *codes++; + break; case 0300: case 0301: case 0302: @@ -1093,12 +1128,40 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, ins->rex &= rex_mask; - if (ins->rex & REX_D) { + if (ins->rex & REX_V) { + int bad32 = REX_R|REX_W|REX_X|REX_B; + + if (ins->rex & REX_H) { + errfunc(ERR_NONFATAL, "cannot use high register in vex instruction"); + return -1; + } + switch (ins->vex_wlp & 030) { + case 000: + ins->rex &= ~REX_W; + break; + case 010: + ins->rex |= REX_W; + bad32 &= ~REX_W; + break; + default: + /* Follow REX_W */ + break; + } + + if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) { + errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); + return -1; + } + if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B))) + length += 3; + else + length += 2; + } else if (ins->rex & REX_D) { if (ins->rex & REX_H) { errfunc(ERR_NONFATAL, "cannot use high register in drex instruction"); return -1; } - if (bits != 64 && ((ins->rex & (REX_W|REX_X|REX_B)) || + if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) || ins->drexdst > 7)) { errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); return -1; @@ -1126,7 +1189,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, } #define EMIT_REX() \ - if (!(ins->rex & REX_D) && (ins->rex & REX_REAL) && (bits == 64)) { \ + if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \ ins->rex = (ins->rex & REX_REAL)|REX_P; \ out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \ ins->rex = 0; \ @@ -1507,6 +1570,26 @@ static void gencode(int32_t segment, int64_t offset, int bits, offset++; break; + case 0172: + c = *codes++; + opx = &ins->oprs[c >> 3]; + bytes[0] = regvals[opx->basereg] << 4; + opx = &ins->oprs[c & 7]; + if (opx->segment != NO_SEG || opx->wrt != NO_SEG) { + errfunc(ERR_NONFATAL, + "non-absolute expression not permitted as argument %d", + c & 7); + } else { + if (opx->offset & ~15) { + errfunc(ERR_WARNING | ERR_WARN_NOV, + "four-bit argument exceeds bounds"); + } + bytes[0] |= opx->offset & 15; + } + out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG); + offset++; + break; + case 0250: case 0251: case 0252: @@ -1525,6 +1608,28 @@ static void gencode(int32_t segment, int64_t offset, int bits, } break; + case 0260: + case 0261: + case 0262: + case 0263: + case 0270: + codes += 2; + if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) { + bytes[0] = 0xc4; + bytes[1] = ins->vex_m | ((ins->rex & 7) << 5); + bytes[2] = ((ins->rex & REX_W) << (7-3)) | + (ins->drexdst << 3) | (ins->vex_wlp & 07); + out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG); + offset += 3; + } else { + bytes[0] = 0xc5; + bytes[1] = ((ins->rex & REX_R) << (7-2)) | + (ins->drexdst << 3) | (ins->vex_wlp & 07); + out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG); + offset += 2; + } + break; + case 0300: case 0301: case 0302: @@ -1887,7 +1992,7 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits) int32_t type = instruction->oprs[i].type; if (!(type & SIZE_MASK)) type |= size[i]; - + if (itemp->opd[i] & SAME_AS) { int j = itemp->opd[i] & ~SAME_AS; if (type != instruction->oprs[j].type || diff --git a/disasm.c b/disasm.c index e6b97ad7..8263d508 100644 --- a/disasm.c +++ b/disasm.c @@ -643,6 +643,16 @@ static int matches(const struct itemplate *t, uint8_t *data, return false; break; + case 0172: + { + uint8_t ximm = *data++; + c = *r++; + ins->oprs[c >> 3].basereg = ximm >> 4; + ins->oprs[c >> 3].segment |= SEG_RMREG; + ins->oprs[c & 7].offset = ximm & 15; + } + break; + case4(0200): case4(0204): case4(0210): diff --git a/insns.dat b/insns.dat index 23027035..7d8e050d 100644 --- a/insns.dat +++ b/insns.dat @@ -2023,6 +2023,12 @@ GETSEC void \2\x0F\x37 KATMAI PFRCP mmxreg,mmxrm \323\2\x0F\x0F\110\1\x86 PENT,3DNOW,SQ,CYRIX PFRSQRT mmxreg,mmxrm \323\2\x0F\x0F\110\1\x87 PENT,3DNOW,SQ,CYRIX +;# Intel AVX instructions +VPERMIL2PS xmmreg,xmmreg,xmmrm,xmmreg,imm \260\103\101\1\x48\123\172\34 AVX,SANDYBANKS +VPERMIL2PS xmmreg,xmmreg,xmmreg,xmmrm,imm \260\103\111\1\x48\132\172\14 AVX,SANDYBANKS +VPERMIL2PS ymmreg,ymmreg,ymmrm,ymmreg,imm \260\103\105\1\x48\123\172\34 AVX,SANDYBANKS +VPERMIL2PS ymmreg,ymmreg,ymmreg,ymmrm,imm \260\103\115\1\x48\132\172\14 AVX,SANDYBANKS + ;# VIA (Centaur) security instructions XSTORE void \3\x0F\xA7\xC0 PENT,CYRIX XCRYPTECB void \333\3\x0F\xA7\xC8 PENT,CYRIX diff --git a/insns.h b/insns.h index 49cc1851..7a0ecff7 100644 --- a/insns.h +++ b/insns.h @@ -98,6 +98,7 @@ extern const struct disasm_index itable[256]; #define IF_SSE41 0x00800000UL /* it's an SSE4.1 instruction */ #define IF_SSE42 0x00800000UL /* HACK NEED TO REORGANIZE THESE BITS */ #define IF_SSE5 0x00800000UL /* HACK NEED TO REORGANIZE THESE BITS */ +#define IF_AVX 0x00800000UL /* HACK NEED TO REORGANIZE THESE BITS */ #define IF_PMASK 0xFF000000UL /* the mask for processor types */ #define IF_PLEVEL 0x0F000000UL /* the mask for processor instr. level */ /* also the highest possible processor */ @@ -114,6 +115,7 @@ extern const struct disasm_index itable[256]; #define IF_PRESCOTT 0x09000000UL /* Prescott instructions */ #define IF_X86_64 0x0A000000UL /* x86-64 instruction (long or legacy mode) */ #define IF_NEHALEM 0x0B000000UL /* Nehalem instruction */ +#define IF_SANDYBANKS 0x0C000000UL /* Sandy Banks instruction */ #define IF_X64 (IF_LONG|IF_X86_64) #define IF_IA64 0x0F000000UL /* IA64 instructions (in x86 mode) */ #define IF_CYRIX 0x10000000UL /* Cyrix-specific instruction */ diff --git a/insns.pl b/insns.pl index 8192e90e..3218ef4e 100644 --- a/insns.pl +++ b/insns.pl @@ -241,8 +241,7 @@ sub format { $operands =~ s/imm(\d+)/imm|bits$1/g; $operands =~ s/imm/immediate/g; $operands =~ s/rm(\d+)/rm_gpr|bits$1/g; - $operands =~ s/mmxrm/rm_mmx/g; - $operands =~ s/xmmrm/rm_xmm/g; + $operands =~ s/(mmx|xmm|ymm)rm/rm_$1/g; $operands =~ s/\=([0-9]+)/same_as|$1/g; if ($operands eq 'void') { @ops = (); diff --git a/nasm.c b/nasm.c index 2af74ef1..85865e8e 100644 --- a/nasm.c +++ b/nasm.c @@ -216,7 +216,7 @@ static void define_macros_early(void) strftime(temp, sizeof temp, "__UTC_TIME_NUM__=%H%M%S", &gm); pp_pre_define(temp); } - + if (gm_p) posix_time = posix_mktime(&gm); else if (lt_p) @@ -502,7 +502,7 @@ static bool process_arg(char *p, char *q) case 'O': /* Optimization level */ { int opt; - + if (!*param) { /* Naked -O == -Ox */ optimizing = INT_MAX >> 1; /* Almost unlimited */ @@ -512,7 +512,7 @@ static bool process_arg(char *p, char *q) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': opt = strtoul(param, ¶m, 10); - + /* -O0 -> optimizing == -1, 0.98 behaviour */ /* -O1 -> optimizing == 0, 0.98.09 behaviour */ if (opt < 2) @@ -520,18 +520,18 @@ static bool process_arg(char *p, char *q) else optimizing = opt; break; - + case 'v': case '+': param++; opt_verbose_info = true; break; - + case 'x': param++; optimizing = INT_MAX >> 1; /* Almost unlimited */ break; - + default: report_error(ERR_FATAL, "unknown optimization option -O%c\n", diff --git a/nasm.h b/nasm.h index 0f8d51fc..052bbe73 100644 --- a/nasm.h +++ b/nasm.h @@ -540,7 +540,7 @@ typedef uint32_t opflags_t; #define IP_REL 0x0002c000U /* IP-relative offset */ /* memory which matches any type of r/m operand */ -#define MEMORY_ANY (MEMORY|RM_GPR|RM_MMX|RM_XMM) +#define MEMORY_ANY (MEMORY|RM_GPR|RM_MMX|RM_XMM|RM_YMM) /* special type of immediate operand */ #define UNITY 0x00012000U /* for shift/rotate instructions */ @@ -564,16 +564,17 @@ enum ccode { /* condition code names */ /* * REX flags */ -#define REX_OC 0x0200 /* DREX suffix has the OC0 bit set */ -#define REX_D 0x0100 /* Instruction uses DREX instead of REX */ -#define REX_H 0x80 /* High register present, REX forbidden */ -#define REX_P 0x40 /* REX prefix present/required */ -#define REX_L 0x20 /* Use LOCK prefix instead of REX.R */ -#define REX_W 0x08 /* 64-bit operand size */ -#define REX_R 0x04 /* ModRM reg extension */ -#define REX_X 0x02 /* SIB index extension */ -#define REX_B 0x01 /* ModRM r/m extension */ #define REX_REAL 0x4f /* Actual REX prefix bits */ +#define REX_B 0x01 /* ModRM r/m extension */ +#define REX_X 0x02 /* SIB index extension */ +#define REX_R 0x04 /* ModRM reg extension */ +#define REX_W 0x08 /* 64-bit operand size */ +#define REX_L 0x20 /* Use LOCK prefix instead of REX.R */ +#define REX_P 0x40 /* REX prefix present/required */ +#define REX_H 0x80 /* High register present, REX forbidden */ +#define REX_D 0x0100 /* Instruction uses DREX instead of REX */ +#define REX_OC 0x0200 /* DREX suffix has the OC0 bit set */ +#define REX_V 0x0400 /* Instruction uses VEX instead of REX */ /* * Note that because segment registers may be used as instruction @@ -651,7 +652,7 @@ enum prefix_pos { MAXPREFIX /* Total number of prefix slots */ }; -#define MAX_OPERANDS 4 +#define MAX_OPERANDS 5 typedef struct insn { /* an instruction itself */ char *label; /* the label defined, or NULL */ @@ -667,7 +668,9 @@ typedef struct insn { /* an instruction itself */ int32_t times; /* repeat count (TIMES prefix) */ int forw_ref; /* is there a forward reference? */ int rex; /* Special REX Prefix */ - int drexdst; /* Destination register for DREX suffix */ + int drexdst; /* Destination register for DREX/VEX suffix */ + int vex_m; /* M register for VEX prefix */ + int vex_wlp; /* W, P and L information for VEX prefix */ } insn; enum geninfo { GI_SWITCH }; diff --git a/nasmlib.c b/nasmlib.c index d5cf207f..03a28bb9 100644 --- a/nasmlib.c +++ b/nasmlib.c @@ -671,7 +671,7 @@ void saa_wleb128u(struct SAA *psaa, int value) ptemp++; len++; } while (value != 0); - saa_wbytes(psaa, temp, len); + saa_wbytes(psaa, temp, len); } /* write signed LEB128 value to SAA */ @@ -703,8 +703,8 @@ void saa_wleb128s(struct SAA *psaa, int value) *ptemp = byte; ptemp++; len++; - } - saa_wbytes(psaa, temp, len); + } + saa_wbytes(psaa, temp, len); } void saa_rewind(struct SAA *s) diff --git a/preproc.c b/preproc.c index 3a12fc84..f3ef2729 100644 --- a/preproc.c +++ b/preproc.c @@ -1585,14 +1585,14 @@ static bool if_condition(Token * tline, enum preproc_token ct) iftype: t = tline = expand_smacro(tline); - + while (tok_type_(t, TOK_WHITESPACE) || (needtype == TOK_NUMBER && tok_type_(t, TOK_OTHER) && (t->text[0] == '-' || t->text[0] == '+') && !t->text[1])) t = t->next; - + j = tok_type_(t, needtype); break; diff --git a/test/avx.asm b/test/avx.asm new file mode 100644 index 00000000..9f35940c --- /dev/null +++ b/test/avx.asm @@ -0,0 +1,9 @@ + bits 64 + vpermil2ps xmm0,xmm1,[rdi],xmm3,0 + vpermil2ps xmm0,xmm1,xmm2,[rdi],1 + vpermil2ps ymm0,ymm1,ymm2,ymm3,2 + vpermil2ps ymm0,ymm1,ymm2,[rdi],3 + vpermil2ps ymm0,ymm1,[rdi],ymm3,2 + vpermil2ps ymm0,ymm1,ymm2,[rdi],3 + + \ No newline at end of file diff --git a/wsaa.h b/wsaa.h index 69124d69..63f8335d 100644 --- a/wsaa.h +++ b/wsaa.h @@ -32,13 +32,13 @@ #else /* !X86_MEMORY */ -#define WSAACHAR(s,p,v) \ +#define WSAACHAR(s,p,v) \ do { \ *(uint8_t *)(p) = (v); \ saa_wbytes(s, p, 1); \ } while (0) -#define WSAASHORT(s,p,v) \ +#define WSAASHORT(s,p,v) \ do { \ uint16_t _wss_v = (v); \ uint8_t *_wss_p = (uint8_t *)(p); \ @@ -58,7 +58,7 @@ saa_wbytes(s, _wsl_p, 4); \ } while (0) -#define WSAADLONG(s,p,v) \ +#define WSAADLONG(s,p,v) \ do { \ uint64_t _wsq_v = (v); \ uint8_t *_wsq_p = (uint8_t *)(p); \