mirror of
https://github.com/netwide-assembler/nasm.git
synced 2025-09-22 10:43:39 -04:00
BR 3392906: error out on bad syntax "db 1 2"
NASM would try to "eat the comma token" in db expressions, even for cases where the token was not a comma. Fix that and error out properly. To give better error messages, track where in the input string a token starts or ends. This information is only valid as long as the input string is kept, but that is just fine for error messages during parsing. Reported-by: Peter Cordes <pcordes@gmail.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
60
asm/parser.c
60
asm/parser.c
@@ -1,6 +1,6 @@
|
|||||||
/* ----------------------------------------------------------------------- *
|
/* ----------------------------------------------------------------------- *
|
||||||
*
|
*
|
||||||
* Copyright 1996-2020 The NASM Authors - All Rights Reserved
|
* Copyright 1996-2023 The NASM Authors - All Rights Reserved
|
||||||
* See the file AUTHORS included with the NASM distribution for
|
* See the file AUTHORS included with the NASM distribution for
|
||||||
* the specific copyright holders.
|
* the specific copyright holders.
|
||||||
*
|
*
|
||||||
@@ -55,6 +55,21 @@ static int end_expression_next(void);
|
|||||||
|
|
||||||
static struct tokenval tokval;
|
static struct tokenval tokval;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Human-readable description of a token, intended for error messages.
|
||||||
|
* The resulting string needs to be freed.
|
||||||
|
*/
|
||||||
|
static char *tokstr(const struct tokenval *tok)
|
||||||
|
{
|
||||||
|
if (tok->t_type == TOKEN_EOS) {
|
||||||
|
return nasm_strdup("end of line");
|
||||||
|
} else if (tok->t_len) {
|
||||||
|
return nasm_asprintf("`%.*s'", tok->t_len, tok->t_start);
|
||||||
|
} else {
|
||||||
|
return nasm_strdup("invalid token");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void process_size_override(insn *result, operand *op)
|
static void process_size_override(insn *result, operand *op)
|
||||||
{
|
{
|
||||||
if (tasm_compatible_mode) {
|
if (tasm_compatible_mode) {
|
||||||
@@ -384,6 +399,7 @@ static int parse_eops(extop **result, bool critical, int elem)
|
|||||||
|
|
||||||
/* End of string is obvious; ) ends a sub-expression list e.g. DUP */
|
/* End of string is obvious; ) ends a sub-expression list e.g. DUP */
|
||||||
for (i = tokval.t_type; i != TOKEN_EOS; i = stdscan(NULL, &tokval)) {
|
for (i = tokval.t_type; i != TOKEN_EOS; i = stdscan(NULL, &tokval)) {
|
||||||
|
bool skip;
|
||||||
char endparen = ')'; /* Is a right paren the end of list? */
|
char endparen = ')'; /* Is a right paren the end of list? */
|
||||||
|
|
||||||
if (i == ')')
|
if (i == ')')
|
||||||
@@ -397,12 +413,9 @@ static int parse_eops(extop **result, bool critical, int elem)
|
|||||||
}
|
}
|
||||||
sign = +1;
|
sign = +1;
|
||||||
|
|
||||||
/*
|
|
||||||
* end_expression_next() here is to distinguish this from
|
|
||||||
* a string used as part of an expression...
|
|
||||||
*/
|
|
||||||
if (i == TOKEN_QMARK) {
|
if (i == TOKEN_QMARK) {
|
||||||
eop->type = EOT_DB_RESERVE;
|
eop->type = EOT_DB_RESERVE;
|
||||||
|
skip = true;
|
||||||
} else if (do_subexpr && i == '(') {
|
} else if (do_subexpr && i == '(') {
|
||||||
extop *subexpr;
|
extop *subexpr;
|
||||||
|
|
||||||
@@ -432,12 +445,13 @@ static int parse_eops(extop **result, bool critical, int elem)
|
|||||||
|
|
||||||
/* We should have ended on a closing paren */
|
/* We should have ended on a closing paren */
|
||||||
if (tokval.t_type != ')') {
|
if (tokval.t_type != ')') {
|
||||||
nasm_nonfatal("expected `)' after subexpression, got `%s'",
|
char *tp = tokstr(&tokval);
|
||||||
i == TOKEN_EOS ?
|
nasm_nonfatal("expected `)' after subexpression, got %s", tp);
|
||||||
"end of line" : tokval.t_charptr);
|
nasm_free(tp);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
endparen = 0; /* This time the paren is not the end */
|
endparen = 0; /* This time the paren is not the end */
|
||||||
|
skip = true;
|
||||||
} else if (i == '%') {
|
} else if (i == '%') {
|
||||||
/* %(expression_list) */
|
/* %(expression_list) */
|
||||||
do_subexpr = true;
|
do_subexpr = true;
|
||||||
@@ -448,9 +462,14 @@ static int parse_eops(extop **result, bool critical, int elem)
|
|||||||
do_subexpr = true;
|
do_subexpr = true;
|
||||||
continue;
|
continue;
|
||||||
} else if (i == TOKEN_STR && end_expression_next()) {
|
} else if (i == TOKEN_STR && end_expression_next()) {
|
||||||
|
/*
|
||||||
|
* end_expression_next() is to distinguish this from
|
||||||
|
* a string used as part of an expression...
|
||||||
|
*/
|
||||||
eop->type = EOT_DB_STRING;
|
eop->type = EOT_DB_STRING;
|
||||||
eop->val.string.data = tokval.t_charptr;
|
eop->val.string.data = tokval.t_charptr;
|
||||||
eop->val.string.len = tokval.t_inttwo;
|
eop->val.string.len = tokval.t_inttwo;
|
||||||
|
skip = true;
|
||||||
} else if (i == TOKEN_STRFUNC) {
|
} else if (i == TOKEN_STRFUNC) {
|
||||||
bool parens = false;
|
bool parens = false;
|
||||||
const char *funcname = tokval.t_charptr;
|
const char *funcname = tokval.t_charptr;
|
||||||
@@ -463,8 +482,10 @@ static int parse_eops(extop **result, bool critical, int elem)
|
|||||||
i = stdscan(NULL, &tokval);
|
i = stdscan(NULL, &tokval);
|
||||||
}
|
}
|
||||||
if (i != TOKEN_STR) {
|
if (i != TOKEN_STR) {
|
||||||
nasm_nonfatal("%s must be followed by a string constant",
|
char *tp = tokstr(&tokval);
|
||||||
funcname);
|
nasm_nonfatal("%s must be followed by a string constant, got %s",
|
||||||
|
funcname, tp);
|
||||||
|
nasm_free(tp);
|
||||||
eop->type = EOT_NOTHING;
|
eop->type = EOT_NOTHING;
|
||||||
} else {
|
} else {
|
||||||
eop->type = EOT_DB_STRING_FREE;
|
eop->type = EOT_DB_STRING_FREE;
|
||||||
@@ -481,6 +502,7 @@ static int parse_eops(extop **result, bool critical, int elem)
|
|||||||
if (i != ')')
|
if (i != ')')
|
||||||
nasm_nonfatal("unterminated %s function", funcname);
|
nasm_nonfatal("unterminated %s function", funcname);
|
||||||
}
|
}
|
||||||
|
skip = i != ',';
|
||||||
} else if (i == '-' || i == '+') {
|
} else if (i == '-' || i == '+') {
|
||||||
char *save = stdscan_get();
|
char *save = stdscan_get();
|
||||||
struct tokenval tmptok;
|
struct tokenval tmptok;
|
||||||
@@ -522,6 +544,7 @@ static int parse_eops(extop **result, bool critical, int elem)
|
|||||||
}
|
}
|
||||||
if (!eop->val.string.len)
|
if (!eop->val.string.len)
|
||||||
eop->type = EOT_NOTHING;
|
eop->type = EOT_NOTHING;
|
||||||
|
skip = true;
|
||||||
} else {
|
} else {
|
||||||
/* anything else, assume it is an expression */
|
/* anything else, assume it is an expression */
|
||||||
expr *value;
|
expr *value;
|
||||||
@@ -548,6 +571,7 @@ static int parse_eops(extop **result, bool critical, int elem)
|
|||||||
if (value_to_extop(value, eop, location.segment)) {
|
if (value_to_extop(value, eop, location.segment)) {
|
||||||
nasm_nonfatal("expression is not simple or relocatable");
|
nasm_nonfatal("expression is not simple or relocatable");
|
||||||
}
|
}
|
||||||
|
skip = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (eop->dup == 0 || eop->type == EOT_NOTHING) {
|
if (eop->dup == 0 || eop->type == EOT_NOTHING) {
|
||||||
@@ -568,6 +592,11 @@ static int parse_eops(extop **result, bool critical, int elem)
|
|||||||
oper_num++;
|
oper_num++;
|
||||||
eop = NULL; /* Done with this operand */
|
eop = NULL; /* Done with this operand */
|
||||||
|
|
||||||
|
if (skip) {
|
||||||
|
/* Consume the (last) token if that didn't happen yet */
|
||||||
|
i = stdscan(NULL, &tokval);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We're about to call stdscan(), which will eat the
|
* We're about to call stdscan(), which will eat the
|
||||||
* comma that we're currently sitting on between
|
* comma that we're currently sitting on between
|
||||||
@@ -577,13 +606,10 @@ static int parse_eops(extop **result, bool critical, int elem)
|
|||||||
if (i == TOKEN_EOS || i == endparen) /* Already at end? */
|
if (i == TOKEN_EOS || i == endparen) /* Already at end? */
|
||||||
break;
|
break;
|
||||||
if (i != ',') {
|
if (i != ',') {
|
||||||
i = stdscan(NULL, &tokval); /* eat the comma or final paren */
|
char *tp = tokstr(&tokval);
|
||||||
if (i == TOKEN_EOS || i == ')') /* got end of expression */
|
nasm_nonfatal("comma expected after operand, got %s", tp);
|
||||||
break;
|
nasm_free(tp);
|
||||||
if (i != ',') {
|
goto fail;
|
||||||
nasm_nonfatal("comma expected after operand");
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
/* ----------------------------------------------------------------------- *
|
/* ----------------------------------------------------------------------- *
|
||||||
*
|
*
|
||||||
* Copyright 1996-2018 The NASM Authors - All Rights Reserved
|
* Copyright 1996-2023 The NASM Authors - All Rights Reserved
|
||||||
* See the file AUTHORS included with the NASM distribution for
|
* See the file AUTHORS included with the NASM distribution for
|
||||||
* the specific copyright holders.
|
* the specific copyright holders.
|
||||||
*
|
*
|
||||||
@@ -122,19 +122,33 @@ static int stdscan_handle_brace(struct tokenval *tv)
|
|||||||
return tv->t_type;
|
return tv->t_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int stdscan_token(struct tokenval *tv);
|
||||||
|
|
||||||
int stdscan(void *private_data, struct tokenval *tv)
|
int stdscan(void *private_data, struct tokenval *tv)
|
||||||
{
|
{
|
||||||
const char *r;
|
int i;
|
||||||
|
|
||||||
(void)private_data; /* Don't warn that this parameter is unused */
|
(void)private_data; /* Don't warn that this parameter is unused */
|
||||||
|
|
||||||
nasm_zero(*tv);
|
nasm_zero(*tv);
|
||||||
|
|
||||||
stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
|
stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
|
||||||
|
tv->t_start = stdscan_bufptr;
|
||||||
|
|
||||||
if (!*stdscan_bufptr)
|
if (!*stdscan_bufptr)
|
||||||
return tv->t_type = TOKEN_EOS;
|
return tv->t_type = TOKEN_EOS;
|
||||||
|
|
||||||
/* we have a token; either an id, a number or a char */
|
i = stdscan_token(tv);
|
||||||
|
tv->t_len = stdscan_bufptr - tv->t_start;
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int stdscan_token(struct tokenval *tv)
|
||||||
|
{
|
||||||
|
const char *r;
|
||||||
|
|
||||||
|
/* we have a token; either an id, a number, operator or char */
|
||||||
if (nasm_isidstart(*stdscan_bufptr) ||
|
if (nasm_isidstart(*stdscan_bufptr) ||
|
||||||
(*stdscan_bufptr == '$' && nasm_isidstart(stdscan_bufptr[1]))) {
|
(*stdscan_bufptr == '$' && nasm_isidstart(stdscan_bufptr[1]))) {
|
||||||
/* now we've got an identifier */
|
/* now we've got an identifier */
|
||||||
@@ -341,6 +355,8 @@ int stdscan(void *private_data, struct tokenval *tv)
|
|||||||
} else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
|
} else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
|
||||||
stdscan_bufptr += 2;
|
stdscan_bufptr += 2;
|
||||||
return tv->t_type = TOKEN_DBL_OR;
|
return tv->t_type = TOKEN_DBL_OR;
|
||||||
} else /* just an ordinary char */
|
} else {
|
||||||
|
/* just an ordinary char */
|
||||||
return tv->t_type = (uint8_t)(*stdscan_bufptr++);
|
return tv->t_type = (uint8_t)(*stdscan_bufptr++);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -316,6 +316,8 @@ struct tokenval {
|
|||||||
int64_t t_inttwo;
|
int64_t t_inttwo;
|
||||||
enum token_type t_type;
|
enum token_type t_type;
|
||||||
int8_t t_flag;
|
int8_t t_flag;
|
||||||
|
const char *t_start; /* Pointer to token in input buffer */
|
||||||
|
int t_len; /* Length of token in input buffer */
|
||||||
};
|
};
|
||||||
typedef int (*scanner)(void *private_data, struct tokenval *tv);
|
typedef int (*scanner)(void *private_data, struct tokenval *tv);
|
||||||
|
|
||||||
|
5
test/baddb.asm
Normal file
5
test/baddb.asm
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
;; This should error
|
||||||
|
db 1 2
|
||||||
|
|
||||||
|
;; This should work
|
||||||
|
db 1, 2
|
@@ -1,15 +1,15 @@
|
|||||||
./travis/test/utf.asm:63: error: __?utf16?__ must be followed by a string constant
|
./travis/test/utf.asm:63: error: __?utf16?__ must be followed by a string constant, got `33'
|
||||||
./travis/test/utf.asm:64: error: __?utf16?__ must be followed by a string constant
|
./travis/test/utf.asm:64: error: __?utf16?__ must be followed by a string constant, got `,'
|
||||||
./travis/test/utf.asm:65: error: unterminated __?utf16?__ function
|
./travis/test/utf.asm:65: error: unterminated __?utf16?__ function
|
||||||
./travis/test/utf.asm:66: error: unterminated __?utf16?__ function
|
./travis/test/utf.asm:66: error: unterminated __?utf16?__ function
|
||||||
./travis/test/utf.asm:67: error: invalid input string to __?utf16?__
|
./travis/test/utf.asm:67: error: invalid input string to __?utf16?__
|
||||||
./travis/test/utf.asm:69: error: __?utf16le?__ must be followed by a string constant
|
./travis/test/utf.asm:69: error: __?utf16le?__ must be followed by a string constant, got `33'
|
||||||
./travis/test/utf.asm:70: error: __?utf16le?__ must be followed by a string constant
|
./travis/test/utf.asm:70: error: __?utf16le?__ must be followed by a string constant, got `,'
|
||||||
./travis/test/utf.asm:71: error: unterminated __?utf16le?__ function
|
./travis/test/utf.asm:71: error: unterminated __?utf16le?__ function
|
||||||
./travis/test/utf.asm:72: error: unterminated __?utf16le?__ function
|
./travis/test/utf.asm:72: error: unterminated __?utf16le?__ function
|
||||||
./travis/test/utf.asm:73: error: invalid input string to __?utf16le?__
|
./travis/test/utf.asm:73: error: invalid input string to __?utf16le?__
|
||||||
./travis/test/utf.asm:75: error: __?utf16be?__ must be followed by a string constant
|
./travis/test/utf.asm:75: error: __?utf16be?__ must be followed by a string constant, got `33'
|
||||||
./travis/test/utf.asm:76: error: __?utf16be?__ must be followed by a string constant
|
./travis/test/utf.asm:76: error: __?utf16be?__ must be followed by a string constant, got `,'
|
||||||
./travis/test/utf.asm:77: error: unterminated __?utf16be?__ function
|
./travis/test/utf.asm:77: error: unterminated __?utf16be?__ function
|
||||||
./travis/test/utf.asm:78: error: unterminated __?utf16be?__ function
|
./travis/test/utf.asm:78: error: unterminated __?utf16be?__ function
|
||||||
./travis/test/utf.asm:79: error: invalid input string to __?utf16be?__
|
./travis/test/utf.asm:79: error: invalid input string to __?utf16be?__
|
||||||
|
Reference in New Issue
Block a user