mirror of
https://github.com/netwide-assembler/nasm.git
synced 2025-07-24 10:25:42 -04:00
masm.mac, parser: VERY limited MASM emulation package
Very limited MASM emulation. The parser has been extended to emulate the PTR keyword if the corresponding macro is enabled, and the syntax displacement[index] for memory operations is now recognized. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
parent
02b60ddd1c
commit
8981724f17
310
asm/parser.c
310
asm/parser.c
@ -234,96 +234,91 @@ static bool parse_braces(decoflags_t *decoflags)
|
||||
}
|
||||
}
|
||||
|
||||
static inline const expr *next_expr(const expr *e, const expr **next_list)
|
||||
{
|
||||
e++;
|
||||
if (!e->type) {
|
||||
if (next_list) {
|
||||
e = *next_list;
|
||||
*next_list = NULL;
|
||||
} else {
|
||||
e = NULL;
|
||||
}
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
static inline void init_operand(operand *op)
|
||||
{
|
||||
memset(op, 0, sizeof *op);
|
||||
|
||||
op->basereg = -1;
|
||||
op->indexreg = -1;
|
||||
op->segment = NO_SEG;
|
||||
op->wrt = NO_SEG;
|
||||
}
|
||||
|
||||
static int parse_mref(operand *op, const expr *e)
|
||||
{
|
||||
int b, i, s; /* basereg, indexreg, scale */
|
||||
int64_t o; /* offset */
|
||||
|
||||
b = i = -1;
|
||||
o = s = 0;
|
||||
op->segment = op->wrt = NO_SEG;
|
||||
b = op->basereg;
|
||||
i = op->indexreg;
|
||||
s = op->scale;
|
||||
o = op->offset;
|
||||
|
||||
if (e->type && e->type <= EXPR_REG_END) { /* this bit's a register */
|
||||
bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
|
||||
for (; e->type; e++) {
|
||||
if (e->type <= EXPR_REG_END) {
|
||||
bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
|
||||
|
||||
if (is_gpr && e->value == 1)
|
||||
b = e->type; /* It can be basereg */
|
||||
else /* No, it has to be indexreg */
|
||||
i = e->type, s = e->value;
|
||||
e++;
|
||||
}
|
||||
if (e->type && e->type <= EXPR_REG_END) { /* it's a 2nd register */
|
||||
bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
|
||||
|
||||
if (b != -1) /* If the first was the base, ... */
|
||||
i = e->type, s = e->value; /* second has to be indexreg */
|
||||
|
||||
else if (!is_gpr || e->value != 1) {
|
||||
/* If both want to be index */
|
||||
nasm_nonfatal("invalid effective address: two index registers");
|
||||
return -1;
|
||||
} else
|
||||
b = e->type;
|
||||
e++;
|
||||
}
|
||||
|
||||
if (e->type) { /* is there an offset? */
|
||||
if (e->type <= EXPR_REG_END) { /* in fact, is there an error? */
|
||||
nasm_nonfatal("invalid effective address: impossible register");
|
||||
return -1;
|
||||
} else {
|
||||
if (e->type == EXPR_UNKNOWN) {
|
||||
op->opflags |= OPFLAG_UNKNOWN;
|
||||
o = 0; /* doesn't matter what */
|
||||
while (e->type)
|
||||
e++; /* go to the end of the line */
|
||||
if (is_gpr && e->value == 1 && b == -1) {
|
||||
/* It can be basereg */
|
||||
b = e->type;
|
||||
} else if (i == -1) {
|
||||
/* Must be index register */
|
||||
i = e->type;
|
||||
s = e->value;
|
||||
} else {
|
||||
if (e->type == EXPR_SIMPLE) {
|
||||
o = e->value;
|
||||
e++;
|
||||
}
|
||||
if (e->type == EXPR_WRT) {
|
||||
op->wrt = e->value;
|
||||
e++;
|
||||
}
|
||||
/*
|
||||
* Look for a segment base type.
|
||||
*/
|
||||
for (; e->type; e++) {
|
||||
if (!e->value)
|
||||
continue;
|
||||
|
||||
if (e->type <= EXPR_REG_END) {
|
||||
nasm_nonfatal("invalid effective address: too many registers");
|
||||
return -1;
|
||||
} else if (e->type < EXPR_SEGBASE) {
|
||||
nasm_nonfatal("invalid effective address: bad subexpression type");
|
||||
return -1;
|
||||
} else if (e->value == 1) {
|
||||
if (op->segment != NO_SEG) {
|
||||
nasm_nonfatal("invalid effective address: multiple base segments");
|
||||
return -1;
|
||||
}
|
||||
op->segment = e->type - EXPR_SEGBASE;
|
||||
} else if (e->value == -1 &&
|
||||
e->type == location.segment + EXPR_SEGBASE &&
|
||||
!(op->opflags & OPFLAG_RELATIVE)) {
|
||||
op->opflags |= OPFLAG_RELATIVE;
|
||||
} else {
|
||||
nasm_nonfatal("invalid effective address: impossible segment base multiplier");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (b == -1)
|
||||
nasm_nonfatal("invalid effective address: two index registers");
|
||||
else if (!is_gpr)
|
||||
nasm_nonfatal("invalid effective address: impossible register");
|
||||
else
|
||||
nasm_nonfatal("invalid effective address: too many registers");
|
||||
return -1;
|
||||
}
|
||||
} else if (e->type == EXPR_UNKNOWN) {
|
||||
op->opflags |= OPFLAG_UNKNOWN;
|
||||
} else if (e->type == EXPR_SIMPLE) {
|
||||
o += e->value;
|
||||
} else if (e->type == EXPR_WRT) {
|
||||
op->wrt = e->value;
|
||||
} else if (e->type >= EXPR_SEGBASE) {
|
||||
if (e->value == 1) {
|
||||
if (op->segment != NO_SEG) {
|
||||
nasm_nonfatal("invalid effective address: multiple base segments");
|
||||
return -1;
|
||||
}
|
||||
op->segment = e->type - EXPR_SEGBASE;
|
||||
} else if (e->value == -1 &&
|
||||
e->type == location.segment + EXPR_SEGBASE &&
|
||||
!(op->opflags & OPFLAG_RELATIVE)) {
|
||||
op->opflags |= OPFLAG_RELATIVE;
|
||||
} else {
|
||||
nasm_nonfatal("invalid effective address: impossible segment base multiplier");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
nasm_nonfatal("invalid effective address: bad subexpression type");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nasm_assert(!e->type); /* We should be at the end */
|
||||
|
||||
op->basereg = b;
|
||||
op->basereg = b;
|
||||
op->indexreg = i;
|
||||
op->scale = s;
|
||||
op->offset = o;
|
||||
op->scale = s;
|
||||
op->offset = o;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -419,6 +414,7 @@ insn *parse_line(char *buffer, insn *result)
|
||||
bool critical;
|
||||
bool first;
|
||||
bool recover;
|
||||
bool far_jmp_ok;
|
||||
int i;
|
||||
|
||||
nasm_static_assert(P_none == 0);
|
||||
@ -740,20 +736,18 @@ is_expression:
|
||||
* Now we begin to parse the operands. There may be up to four
|
||||
* of these, separated by commas, and terminated by a zero token.
|
||||
*/
|
||||
far_jmp_ok = result->opcode == I_JMP || result->opcode == I_CALL;
|
||||
|
||||
for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
|
||||
operand *op = &result->oprs[opnum];
|
||||
expr *value; /* used most of the time */
|
||||
bool mref; /* is this going to be a memory ref? */
|
||||
bool bracket; /* is it a [] mref, or a & mref? */
|
||||
bool mref = false; /* is this going to be a memory ref? */
|
||||
int bracket = 0; /* is it a [] mref, or a "naked" mref? */
|
||||
bool mib; /* compound (mib) mref? */
|
||||
int setsize = 0;
|
||||
decoflags_t brace_flags = 0; /* flags for decorators in braces */
|
||||
|
||||
op->disp_size = 0; /* have to zero this whatever */
|
||||
op->eaflags = 0; /* and this */
|
||||
op->opflags = 0;
|
||||
op->decoflags = 0;
|
||||
init_operand(op);
|
||||
|
||||
i = stdscan(NULL, &tokval);
|
||||
if (i == TOKEN_EOS)
|
||||
@ -829,30 +823,55 @@ is_expression:
|
||||
i = stdscan(NULL, &tokval);
|
||||
}
|
||||
|
||||
if (i == '[' || i == '&') { /* memory reference */
|
||||
if (i == '[' || i == TOKEN_MASM_PTR || i == '&') {
|
||||
/* memory reference */
|
||||
mref = true;
|
||||
bracket = (i == '[');
|
||||
i = stdscan(NULL, &tokval); /* then skip the colon */
|
||||
while (i == TOKEN_SPECIAL || i == TOKEN_SIZE ||
|
||||
i == TOKEN_PREFIX) {
|
||||
process_size_override(result, op);
|
||||
i = stdscan(NULL, &tokval);
|
||||
}
|
||||
/* when a comma follows an opening bracket - [ , eax*4] */
|
||||
if (i == ',') {
|
||||
/* treat as if there is a zero displacement virtually */
|
||||
tokval.t_type = TOKEN_NUM;
|
||||
tokval.t_integer = 0;
|
||||
stdscan_set(stdscan_get() - 1); /* rewind the comma */
|
||||
}
|
||||
} else { /* immediate operand, or register */
|
||||
mref = false;
|
||||
bracket = false; /* placate optimisers */
|
||||
bracket += (i == '[');
|
||||
i = stdscan(NULL, &tokval);
|
||||
}
|
||||
|
||||
if ((op->type & FAR) && !mref &&
|
||||
result->opcode != I_JMP && result->opcode != I_CALL)
|
||||
nasm_nonfatal("invalid use of FAR operand specifier");
|
||||
mref_more:
|
||||
if (mref) {
|
||||
bool done = false;
|
||||
bool nofw = false;
|
||||
|
||||
while (!done) {
|
||||
switch (i) {
|
||||
case TOKEN_SPECIAL:
|
||||
case TOKEN_SIZE:
|
||||
case TOKEN_PREFIX:
|
||||
process_size_override(result, op);
|
||||
break;
|
||||
|
||||
case '[':
|
||||
bracket++;
|
||||
break;
|
||||
|
||||
case ',':
|
||||
tokval.t_type = TOKEN_NUM;
|
||||
tokval.t_integer = 0;
|
||||
stdscan_set(stdscan_get() - 1); /* rewind the comma */
|
||||
done = nofw = true;
|
||||
break;
|
||||
|
||||
case TOKEN_MASM_FLAT:
|
||||
i = stdscan(NULL, &tokval);
|
||||
if (i != ':') {
|
||||
nasm_nonfatal("unknown use of FLAT in MASM emulation");
|
||||
nofw = true;
|
||||
}
|
||||
done = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
done = nofw = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!nofw)
|
||||
i = stdscan(NULL, &tokval);
|
||||
}
|
||||
}
|
||||
|
||||
value = evaluate(stdscan, NULL, &tokval,
|
||||
&op->opflags, critical, &hints);
|
||||
@ -862,7 +881,18 @@ is_expression:
|
||||
}
|
||||
if (!value) /* Error in evaluator */
|
||||
goto fail;
|
||||
if (i == ':' && mref) { /* it was seg:offset */
|
||||
|
||||
if (i == '[' && !bracket) {
|
||||
/* displacement[regs] syntax */
|
||||
mref = true;
|
||||
parse_mref(op, value); /* Process what we have so far */
|
||||
goto mref_more;
|
||||
}
|
||||
|
||||
if (i == ':' && (mref || !far_jmp_ok)) {
|
||||
/* segment override? */
|
||||
mref = true;
|
||||
|
||||
/*
|
||||
* Process the segment override.
|
||||
*/
|
||||
@ -879,29 +909,15 @@ is_expression:
|
||||
}
|
||||
|
||||
i = stdscan(NULL, &tokval); /* then skip the colon */
|
||||
while (i == TOKEN_SPECIAL || i == TOKEN_SIZE ||
|
||||
i == TOKEN_PREFIX) {
|
||||
process_size_override(result, op);
|
||||
i = stdscan(NULL, &tokval);
|
||||
}
|
||||
value = evaluate(stdscan, NULL, &tokval,
|
||||
&op->opflags, critical, &hints);
|
||||
i = tokval.t_type;
|
||||
if (op->opflags & OPFLAG_FORWARD) {
|
||||
result->forw_ref = true;
|
||||
}
|
||||
/* and get the offset */
|
||||
if (!value) /* Error in evaluator */
|
||||
goto fail;
|
||||
goto mref_more;
|
||||
}
|
||||
|
||||
mib = false;
|
||||
if (mref && bracket && i == ',') {
|
||||
/* [seg:base+offset,index*scale] syntax (mib) */
|
||||
operand o2; /* Index operand */
|
||||
|
||||
operand o1, o2; /* Partial operands */
|
||||
|
||||
if (parse_mref(&o1, value))
|
||||
if (parse_mref(op, value))
|
||||
goto fail;
|
||||
|
||||
i = stdscan(NULL, &tokval); /* Eat comma */
|
||||
@ -911,6 +927,7 @@ is_expression:
|
||||
if (!value)
|
||||
goto fail;
|
||||
|
||||
init_operand(&o2);
|
||||
if (parse_mref(&o2, value))
|
||||
goto fail;
|
||||
|
||||
@ -920,18 +937,14 @@ is_expression:
|
||||
o2.basereg = -1;
|
||||
}
|
||||
|
||||
if (o1.indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
|
||||
if (op->indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
|
||||
o2.segment != NO_SEG || o2.wrt != NO_SEG) {
|
||||
nasm_nonfatal("invalid mib expression");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
op->basereg = o1.basereg;
|
||||
op->indexreg = o2.indexreg;
|
||||
op->scale = o2.scale;
|
||||
op->offset = o1.offset;
|
||||
op->segment = o1.segment;
|
||||
op->wrt = o1.wrt;
|
||||
|
||||
if (op->basereg != -1) {
|
||||
op->hintbase = op->basereg;
|
||||
@ -948,21 +961,33 @@ is_expression:
|
||||
}
|
||||
|
||||
recover = false;
|
||||
if (mref && bracket) { /* find ] at the end */
|
||||
if (i != ']') {
|
||||
nasm_nonfatal("parser: expecting ]");
|
||||
recover = true;
|
||||
} else { /* we got the required ] */
|
||||
i = stdscan(NULL, &tokval);
|
||||
if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
|
||||
/* parse opmask (and zeroing) after an operand */
|
||||
recover = parse_braces(&brace_flags);
|
||||
i = tokval.t_type;
|
||||
}
|
||||
if (i != 0 && i != ',') {
|
||||
nasm_nonfatal("comma or end of line expected");
|
||||
if (mref) {
|
||||
if (bracket == 1) {
|
||||
if (i == ']') {
|
||||
bracket--;
|
||||
i = stdscan(NULL, &tokval);
|
||||
} else {
|
||||
nasm_nonfatal("expecting ] at end of memory operand");
|
||||
recover = true;
|
||||
}
|
||||
} else if (bracket == 0) {
|
||||
/* Do nothing */
|
||||
} else if (bracket > 0) {
|
||||
nasm_nonfatal("excess brackets in memory operand");
|
||||
recover = true;
|
||||
} else if (bracket < 0) {
|
||||
nasm_nonfatal("unmatched ] in memory operand");
|
||||
recover = true;
|
||||
}
|
||||
|
||||
if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
|
||||
/* parse opmask (and zeroing) after an operand */
|
||||
recover = parse_braces(&brace_flags);
|
||||
i = tokval.t_type;
|
||||
}
|
||||
if (!recover && i != 0 && i != ',') {
|
||||
nasm_nonfatal("comma, decorator or end of line expected, got %d", i);
|
||||
recover = true;
|
||||
}
|
||||
} else { /* immediate operand */
|
||||
if (i != 0 && i != ',' && i != ':' &&
|
||||
@ -998,6 +1023,9 @@ is_expression:
|
||||
op->hinttype = hints.type;
|
||||
}
|
||||
mref_set_optype(op);
|
||||
} else if ((op->type & FAR) && !far_jmp_ok) {
|
||||
nasm_nonfatal("invalid use of FAR operand specifier");
|
||||
recover = true;
|
||||
} else { /* it's not a memory reference */
|
||||
if (is_just_unknown(value)) { /* it's immediate but unknown */
|
||||
op->type |= IMMEDIATE;
|
||||
|
@ -125,6 +125,10 @@ __ilog2c__
|
||||
seg
|
||||
wrt
|
||||
|
||||
% TOKEN_{__*__}, 0, 0, 0
|
||||
__masm_ptr__
|
||||
__masm_flat__
|
||||
|
||||
% TOKEN_DECORATOR, 0, TFLAG_BRC | TFLAG_BRDCAST , BRC_1TO{1to*}
|
||||
1to2
|
||||
1to4
|
||||
|
@ -190,6 +190,8 @@ enum token_type { /* token types, other than chars */
|
||||
TOKEN_STRFUNC, /* __utf16*__, __utf32*__ */
|
||||
TOKEN_IFUNC, /* __ilog2*__ */
|
||||
TOKEN_DECORATOR, /* decorators such as {...} */
|
||||
TOKEN_MASM_PTR, /* __masm_ptr__ for the masm package */
|
||||
TOKEN_MASM_FLAT, /* __masm_flat__ for the masm package */
|
||||
TOKEN_OPMASK /* translated token for opmask registers */
|
||||
};
|
||||
|
||||
|
80
macros/masm.mac
Normal file
80
macros/masm.mac
Normal file
@ -0,0 +1,80 @@
|
||||
;; --------------------------------------------------------------------------
|
||||
;;
|
||||
;; Copyright 2019 The NASM Authors - All Rights Reserved
|
||||
;; See the file AUTHORS included with the NASM distribution for
|
||||
;; the specific copyright holders.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following
|
||||
;; conditions are met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;; * Redistributions in binary form must reproduce the above
|
||||
;; copyright notice, this list of conditions and the following
|
||||
;; disclaimer in the documentation and/or other materials provided
|
||||
;; with the distribution.
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
;; CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
;; INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
;; MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
;; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
;; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
;; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
;; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
;; EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
;;
|
||||
;; masm.mac
|
||||
;;
|
||||
;; Very limited MASM compatiblity package; intended to be used
|
||||
;; primarily with machine-generated code. It does not include any
|
||||
;; "programmer friendly" shortcuts, nor does it in any way support
|
||||
;; ASSUME, symbol typing, or MASM-style structures.
|
||||
;;
|
||||
|
||||
USE: masm
|
||||
|
||||
%unimacro segment 1+
|
||||
|
||||
%imacro segment 0-1+.nolist
|
||||
%define __SECT__ [segment %00 %1]
|
||||
__SECT__
|
||||
%endmacro
|
||||
|
||||
%imacro ends 0+.nolist
|
||||
%pragma ignore ends %00
|
||||
%endmacro
|
||||
|
||||
%imacro proc 0-*.nolist
|
||||
%rep %0
|
||||
%ifidni %1,far
|
||||
%idefine ret retf
|
||||
%else
|
||||
%idefine ret retn
|
||||
%endif
|
||||
%rotate 1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
%imacro endp 0.nolist
|
||||
%pragma ignore endp %00
|
||||
%undef ret
|
||||
%endmacro
|
||||
|
||||
%idefine ptr __masm_ptr__
|
||||
%idefine flat __masm_flat__ ; is %idefine really correct here?
|
||||
%idefine offset
|
||||
|
||||
%imacro end 0+.nolist
|
||||
; Nothing
|
||||
%endmacro
|
||||
|
||||
default rel
|
Loading…
x
Reference in New Issue
Block a user