From 3e458a89d8c985527313305b55d5725cf382e862 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 1 May 2017 20:28:29 -0700 Subject: [PATCH] a) Fix handling of DZ/ZWORD; b) don't crash on TIMES JMP a) Fix a number of missing instances of DZ and ZWORD. b) NASM would crash if TIMES was used on an instruction which varies in size, e.g. JMP. Fix this by moving the handling of TIMES at a higher level, so we generate the instruction "de novo" for each iteration. The exception is INCBIN, so we can avoid reading the included file over and over. c) When using the RESx instructions, just fold TIMES into the reserved space size; there is absolutely no point to iterate over it. Signed-off-by: H. Peter Anvin --- asm/assemble.c | 99 ++++++++++---------------- asm/nasm.c | 177 ++++++++++++++++++++++++++-------------------- asm/parser.c | 47 +++--------- common/common.c | 29 ++++++++ include/insns.h | 19 +++++ include/nasm.h | 1 + include/nasmlib.h | 2 - output/codeview.c | 3 + output/outelf.c | 4 ++ test/incbin.asm | 1 + test/times.asm | 13 ++++ 11 files changed, 216 insertions(+), 179 deletions(-) diff --git a/asm/assemble.c b/asm/assemble.c index a7a5f468..9b720772 100644 --- a/asm/assemble.c +++ b/asm/assemble.c @@ -546,7 +546,6 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction) struct out_data data; const struct itemplate *temp; enum match_result m; - int32_t itimes; int64_t wsize; /* size for DB etc. */ nasm_zero(data); @@ -562,52 +561,40 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction) if (wsize) { extop *e; - int32_t t = instruction->times; - - if (t < 0) - nasm_panic(0, "instruction->times < 0 (%"PRId32") in assemble()", t); - - while (t--) { /* repeat TIMES times */ - list_for_each(e, instruction->eops) { - if (e->type == EOT_DB_NUMBER) { - if (wsize > 8) { - nasm_error(ERR_NONFATAL, - "integer supplied to a DT, DO or DY" - " instruction"); - } else { - data.insoffs = 0; - data.type = e->relative ? OUT_RELADDR : OUT_ADDRESS; - data.inslen = data.size = wsize; - data.toffset = e->offset; - data.tsegment = e->segment; - data.twrt = e->wrt; - data.relbase = 0; - out(&data); - } - } else if (e->type == EOT_DB_STRING || - e->type == EOT_DB_STRING_FREE) { - int align = e->stringlen % wsize; - if (align) - align = wsize - align; + list_for_each(e, instruction->eops) { + if (e->type == EOT_DB_NUMBER) { + if (wsize > 8) { + nasm_error(ERR_NONFATAL, + "integer supplied to a DT, DO, DY or DZ" + " instruction"); + } else { data.insoffs = 0; - data.inslen = e->stringlen + align; - - out_rawdata(&data, e->stringval, e->stringlen); - out_rawdata(&data, zero_buffer, align); + data.type = e->relative ? OUT_RELADDR : OUT_ADDRESS; + data.inslen = data.size = wsize; + data.toffset = e->offset; + data.tsegment = e->segment; + data.twrt = e->wrt; + data.relbase = 0; + out(&data); } - } - if (t > 0 && t == instruction->times - 1) { - lfmt->set_offset(start); - lfmt->uplevel(LIST_TIMES); + } else if (e->type == EOT_DB_STRING || + e->type == EOT_DB_STRING_FREE) { + int align = e->stringlen % wsize; + if (align) + align = wsize - align; + + data.insoffs = 0; + data.inslen = e->stringlen + align; + + out_rawdata(&data, e->stringval, e->stringlen); + out_rawdata(&data, zero_buffer, align); } } - if (instruction->times > 1) - lfmt->downlevel(LIST_TIMES); } else if (instruction->opcode == I_INCBIN) { const char *fname = instruction->eops->stringval; FILE *fp; - size_t t = instruction->times; + size_t t = instruction->times; /* INCBIN handles TIMES by itself */ off_t base = 0; off_t len; const void *map = NULL; @@ -717,6 +704,7 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction) nasm_unmap_file(map, len); fclose(fp); done: + instruction->times = 1; /* Tell the upper layer not to iterate */ ; } else { /* "Real" instruction */ @@ -730,27 +718,15 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction) /* Matches! */ int64_t insn_size = calcsize(data.segment, data.offset, bits, instruction, temp); - itimes = instruction->times; - if (insn_size < 0) /* shouldn't be, on pass two */ - nasm_panic(0, "errors made it through from pass one"); + nasm_assert(insn_size >= 0); data.itemp = temp; data.bits = bits; + data.insoffs = 0; + data.inslen = insn_size; - while (itimes--) { - data.insoffs = 0; - data.inslen = insn_size; - - gencode(&data, instruction); - nasm_assert(data.insoffs == insn_size); - - if (itimes > 0 && itimes == instruction->times - 1) { - lfmt->set_offset(start); - lfmt->uplevel(LIST_TIMES); - } - } - if (instruction->times > 1) - lfmt->downlevel(LIST_TIMES); + gencode(&data, instruction); + nasm_assert(data.insoffs == insn_size); } else { /* No match */ switch (m) { @@ -795,6 +771,8 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction) "invalid combination of opcode and operands"); break; } + + instruction->times = 1; /* Avoid repeated error messages */ } } return data.offset - start; @@ -808,15 +786,13 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction) if (instruction->opcode == I_none) return 0; - if (instruction->opcode == I_DB || instruction->opcode == I_DW || - instruction->opcode == I_DD || instruction->opcode == I_DQ || - instruction->opcode == I_DT || instruction->opcode == I_DO || - instruction->opcode == I_DY) { + if (opcode_is_db(instruction->opcode)) { extop *e; int32_t isize, osize, wsize; isize = 0; wsize = idata_bytes(instruction->opcode); + nasm_assert(wsize > 0); list_for_each(e, instruction->eops) { int32_t align; @@ -860,6 +836,9 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction) } } + len *= instruction->times; + instruction->times = 1; /* Tell the upper layer to not iterate */ + return len; } diff --git a/asm/nasm.c b/asm/nasm.c index 288cb0e1..8d302811 100644 --- a/asm/nasm.c +++ b/asm/nasm.c @@ -1337,93 +1337,114 @@ static void assemble_file(char *fname, StrList **depend_ptr) } } } else { /* instruction isn't an EQU */ + int32_t n; - if (pass1 == 1) { - int64_t l = insn_size(location.segment, offs, globalbits, - &output_ins); - l *= output_ins.times; + nasm_assert(output_ins.times >= 0); - /* if (using_debug_info) && output_ins.opcode != -1) */ - if (using_debug_info) - { /* fbk 03/25/01 */ + for (n = 1; n <= output_ins.times; n++) { + if (pass1 == 1) { + int64_t l = insn_size(location.segment, offs, + globalbits, &output_ins); + + /* if (using_debug_info) && output_ins.opcode != -1) */ + if (using_debug_info) + { /* fbk 03/25/01 */ /* this is done here so we can do debug type info */ - int32_t typeinfo = - TYS_ELEMENTS(output_ins.operands); - switch (output_ins.opcode) { - case I_RESB: - typeinfo = - TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_BYTE; - break; - case I_RESW: - typeinfo = - TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_WORD; - break; - case I_RESD: - typeinfo = - TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_DWORD; - break; - case I_RESQ: - typeinfo = - TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_QWORD; - break; - case I_REST: - typeinfo = - TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_TBYTE; - break; - case I_RESO: - typeinfo = - TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_OWORD; - break; - case I_RESY: - typeinfo = - TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_YWORD; - break; - case I_DB: - typeinfo |= TY_BYTE; - break; - case I_DW: - typeinfo |= TY_WORD; - break; - case I_DD: - if (output_ins.eops_float) - typeinfo |= TY_FLOAT; - else - typeinfo |= TY_DWORD; - break; - case I_DQ: - typeinfo |= TY_QWORD; - break; - case I_DT: - typeinfo |= TY_TBYTE; - break; - case I_DO: - typeinfo |= TY_OWORD; - break; - case I_DY: - typeinfo |= TY_YWORD; - break; - default: - typeinfo = TY_LABEL; + int32_t typeinfo = + TYS_ELEMENTS(output_ins.operands); + switch (output_ins.opcode) { + case I_RESB: + typeinfo = + TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_BYTE; + break; + case I_RESW: + typeinfo = + TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_WORD; + break; + case I_RESD: + typeinfo = + TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_DWORD; + break; + case I_RESQ: + typeinfo = + TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_QWORD; + break; + case I_REST: + typeinfo = + TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_TBYTE; + break; + case I_RESO: + typeinfo = + TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_OWORD; + break; + case I_RESY: + typeinfo = + TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_YWORD; + break; + case I_RESZ: + typeinfo = + TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_ZWORD; + break; + case I_DB: + typeinfo |= TY_BYTE; + break; + case I_DW: + typeinfo |= TY_WORD; + break; + case I_DD: + if (output_ins.eops_float) + typeinfo |= TY_FLOAT; + else + typeinfo |= TY_DWORD; + break; + case I_DQ: + typeinfo |= TY_QWORD; + break; + case I_DT: + typeinfo |= TY_TBYTE; + break; + case I_DO: + typeinfo |= TY_OWORD; + break; + case I_DY: + typeinfo |= TY_YWORD; + break; + case I_DZ: + typeinfo |= TY_ZWORD; + break; + default: + typeinfo = TY_LABEL; + break; + } + dfmt->debug_typevalue(typeinfo); } - dfmt->debug_typevalue(typeinfo); - } - if (l != -1) { - offs += l; + /* + * For INCBIN, let the code in assemble + * handle TIMES, so we don't have to read the + * input file over and over. + */ + if (l != -1) { + offs += l; + set_curr_offs(offs); + } + /* + * else l == -1 => invalid instruction, which will be + * flagged as an error on pass 2 + */ + } else { + if (n == 2) + lfmt->uplevel(LIST_TIMES); + offs += assemble(location.segment, offs, + globalbits, &output_ins); set_curr_offs(offs); } - /* - * else l == -1 => invalid instruction, which will be - * flagged as an error on pass 2 - */ + } /* not an EQU */ + } + if (output_ins.times > 1) + lfmt->downlevel(LIST_TIMES); - } else { - offs += assemble(location.segment, offs, globalbits, &output_ins); - set_curr_offs(offs); - - } - } /* not an EQU */ cleanup_insn(&output_ins); end_of_line: diff --git a/asm/parser.c b/asm/parser.c index d1e82ed0..d701d7fd 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -444,6 +444,9 @@ restart_parse: stdscan_set(buffer); i = stdscan(NULL, &tokval); + nasm_static_assert(P_none == 0); + memset(result->prefixes, P_none, sizeof(result->prefixes)); + result->times = 1; /* No TIMES either yet */ result->label = NULL; /* Assume no label */ result->eops = NULL; /* must do this, whatever happens */ result->operands = 0; /* must initialize this */ @@ -491,10 +494,6 @@ restart_parse: if (i == TOKEN_EOS) goto fail; - nasm_static_assert(P_none == 0); - memset(result->prefixes, P_none, sizeof(result->prefixes)); - result->times = 1L; - while (i == TOKEN_PREFIX || (i == TOKEN_REG && IS_SREG(tokval.t_integer))) { first = false; @@ -581,11 +580,7 @@ restart_parse: } else critical = (pass == 2 ? 2 : 0); - if (result->opcode == I_DB || result->opcode == I_DW || - result->opcode == I_DD || result->opcode == I_DQ || - result->opcode == I_DT || result->opcode == I_DO || - result->opcode == I_DY || result->opcode == I_DZ || - result->opcode == I_INCBIN) { + if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) { extop *eop, **tail = &result->eops, **fixptr; int oper_num = 0; int32_t sign; @@ -1133,37 +1128,11 @@ is_expression: /* * Transform RESW, RESD, RESQ, REST, RESO, RESY, RESZ into RESB. */ - switch (result->opcode) { - case I_RESW: + if (opcode_is_resb(result->opcode)) { + result->oprs[0].offset *= resv_bytes(result->opcode); + result->oprs[0].offset *= result->times; + result->times = 1; result->opcode = I_RESB; - result->oprs[0].offset *= 2; - break; - case I_RESD: - result->opcode = I_RESB; - result->oprs[0].offset *= 4; - break; - case I_RESQ: - result->opcode = I_RESB; - result->oprs[0].offset *= 8; - break; - case I_REST: - result->opcode = I_RESB; - result->oprs[0].offset *= 10; - break; - case I_RESO: - result->opcode = I_RESB; - result->oprs[0].offset *= 16; - break; - case I_RESY: - result->opcode = I_RESB; - result->oprs[0].offset *= 32; - break; - case I_RESZ: - result->opcode = I_RESB; - result->oprs[0].offset *= 64; - break; - default: - break; } return result; diff --git a/common/common.c b/common/common.c index 9c91f910..5a546207 100644 --- a/common/common.c +++ b/common/common.c @@ -91,3 +91,32 @@ int idata_bytes(int opcode) return 0; } } + +/* + * Uninitialized data bytes length from opcode + */ +int resv_bytes(int opcode) +{ + switch (opcode) { + case I_RESB: + return 1; + case I_RESW: + return 2; + case I_RESD: + return 4; + case I_RESQ: + return 8; + case I_REST: + return 10; + case I_RESO: + return 16; + case I_RESY: + return 32; + case I_RESZ: + return 64; + case I_none: + return -1; + default: + return 0; + } +} diff --git a/include/insns.h b/include/insns.h index 8f04d13c..0a1cd741 100644 --- a/include/insns.h +++ b/include/insns.h @@ -48,4 +48,23 @@ extern const uint8_t nasm_bytecodes[]; */ #define ITEMPLATE_END {-1,-1,{-1,-1,-1,-1,-1},{-1,-1,-1,-1,-1},NULL,0} +/* Width of Dx and RESx instructions */ +int const_func idata_bytes(enum opcode opcode); +int const_func resv_bytes(enum opcode opcode); + +/* + * Pseudo-op tests + */ +/* DB-type instruction (DB, DW, ...) */ +static inline bool opcode_is_db(enum opcode opcode) +{ + return idata_bytes(opcode) > 0; +} + +/* RESB-type instruction (RESB, RESW, ...) */ +static inline bool opcode_is_resb(enum opcode opcode) +{ + return resv_bytes(opcode) > 0; +} + #endif /* NASM_INSNS_H */ diff --git a/include/nasm.h b/include/nasm.h index a475aa38..a50062ef 100644 --- a/include/nasm.h +++ b/include/nasm.h @@ -1053,6 +1053,7 @@ extern const struct dfmt *dfmt; #define TY_TBYTE 0x38 #define TY_OWORD 0x40 #define TY_YWORD 0x48 +#define TY_ZWORD 0x50 #define TY_COMMON 0xE0 #define TY_SEG 0xE8 #define TY_EXTERN 0xF0 diff --git a/include/nasmlib.h b/include/nasmlib.h index 672764c6..4510f187 100644 --- a/include/nasmlib.h +++ b/include/nasmlib.h @@ -505,8 +505,6 @@ static inline int64_t const_func signed_bits(int64_t value, int bits) return value; } -int const_func idata_bytes(int opcode); - /* check if value is power of 2 */ #define is_power2(v) ((v) && ((v) & ((v) - 1)) == 0) diff --git a/output/codeview.c b/output/codeview.c index 579ac8d6..c60d4920 100644 --- a/output/codeview.c +++ b/output/codeview.c @@ -282,6 +282,9 @@ static void cv8_typevalue(int32_t type) case TY_YWORD: cv8_state.last_sym->symtype = TYPE_REAL256; break; + case TY_ZWORD: + cv8_state.last_sym->symtype = TYPE_REAL512; + break; case TY_UNKNOWN: break; case TY_LABEL: diff --git a/output/outelf.c b/output/outelf.c index 1ee6dacc..133a8dc8 100644 --- a/output/outelf.c +++ b/output/outelf.c @@ -2435,6 +2435,10 @@ static void debug_typevalue(int32_t type) ssize = 32; stype = STT_OBJECT; break; + case TY_ZWORD: + ssize = 64; + stype = STT_OBJECT; + break; case TY_COMMON: ssize = 0; stype = STT_COMMON; diff --git a/test/incbin.asm b/test/incbin.asm index 20aa4d9f..40b18e49 100644 --- a/test/incbin.asm +++ b/test/incbin.asm @@ -4,3 +4,4 @@ section more start=0x1000000 db '*** TWELVE ***', 0Ah times 12 incbin "incbin.data",32 + db '', 0Ah diff --git a/test/times.asm b/test/times.asm index a8e3d58e..b8f7ed08 100644 --- a/test/times.asm +++ b/test/times.asm @@ -6,3 +6,16 @@ ; Broken per BR 3392279 bswap r12d times 4 bswap r12d + +; Forward jump + times 128 jmp there + +there: + nop + +; Backwards jump + times 128 jmp there + + section .bss + times 0x10 resb 0x20 + resb 1