diff --git a/Makefile.in b/Makefile.in index 949d0131..ba0ae26a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -97,7 +97,7 @@ LIBOBJ = stdlib/snprintf.$(O) stdlib/vsnprintf.$(O) stdlib/strlcpy.$(O) \ \ nasmlib/ver.$(O) \ nasmlib/crc64.$(O) nasmlib/malloc.$(O) \ - nasmlib/md5c.$(O) nasmlib/string.$(O) \ + nasmlib/md5c.$(O) nasmlib/string.$(O) nasmlib/nctype.$(O) \ nasmlib/file.$(O) nasmlib/mmap.$(O) nasmlib/ilog2.$(O) \ nasmlib/realpath.$(O) nasmlib/path.$(O) \ nasmlib/filename.$(O) nasmlib/srcfile.$(O) \ diff --git a/asm/directiv.c b/asm/directiv.c index d2c5e9c3..16a64c0b 100644 --- a/asm/directiv.c +++ b/asm/directiv.c @@ -321,12 +321,12 @@ bool process_directives(char *directive) value++; /* skip initial $ if present */ q = value; - if (!isidstart(*q)) { + if (!nasm_isidstart(*q)) { validid = false; } else { q++; while (*q && *q != ':' && !nasm_isspace(*q)) { - if (!isidchar(*q)) + if (!nasm_isidchar(*q)) validid = false; q++; } @@ -411,7 +411,7 @@ bool process_directives(char *directive) p = value; q = debugid; badid = overlong = false; - if (!isidstart(*p)) { + if (!nasm_isidstart(*p)) { badid = true; } else { while (*p && !nasm_isspace(*p)) { @@ -419,7 +419,7 @@ bool process_directives(char *directive) overlong = true; break; } - if (!isidchar(*p)) + if (!nasm_isidchar(*p)) badid = true; *q++ = *p++; } diff --git a/asm/nasm.c b/asm/nasm.c index 9b94f397..eb6f4760 100644 --- a/asm/nasm.c +++ b/asm/nasm.c @@ -41,11 +41,11 @@ #include #include #include -#include #include #include "nasm.h" #include "nasmlib.h" +#include "nctype.h" #include "error.h" #include "saa.h" #include "raa.h" @@ -456,7 +456,7 @@ int main(int argc, char **argv) error_file = stderr; - tolower_init(); + nasm_ctype_init(); src_init(); /* @@ -1026,8 +1026,10 @@ static bool process_arg(char *p, char *q, int pass) break; case 't': - if (pass == 2) + if (pass == 2) { tasm_compatible_mode = true; + nasm_ctype_tasm_mode(); + } break; case 'v': diff --git a/asm/preproc.c b/asm/preproc.c index 0d6ea435..b27f81b2 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -953,11 +953,11 @@ static Token *tokenize(char *line) } else if (*p == '!') { type = TOK_PREPROC_ID; p++; - if (isidchar(*p)) { + if (nasm_isidchar(*p)) { do { p++; } - while (isidchar(*p)); + while (nasm_isidchar(*p)); } else if (*p == '\'' || *p == '\"' || *p == '`') { p = nasm_skip_string(p); if (*p) @@ -968,23 +968,23 @@ static Token *tokenize(char *line) /* %! without string or identifier */ type = TOK_OTHER; /* Legacy behavior... */ } - } else if (isidchar(*p) || + } else if (nasm_isidchar(*p) || ((*p == '!' || *p == '%' || *p == '$') && - isidchar(p[1]))) { + nasm_isidchar(p[1]))) { do { p++; } - while (isidchar(*p)); + while (nasm_isidchar(*p)); type = TOK_PREPROC_ID; } else { type = TOK_OTHER; if (*p == '%') p++; } - } else if (isidstart(*p) || (*p == '$' && isidstart(p[1]))) { + } else if (nasm_isidstart(*p) || (*p == '$' && nasm_isidstart(p[1]))) { type = TOK_ID; p++; - while (*p && isidchar(*p)) + while (*p && nasm_isidchar(*p)) p++; } else if (*p == '\'' || *p == '"' || *p == '`') { /* @@ -1003,7 +1003,7 @@ static Token *tokenize(char *line) } else if (p[0] == '$' && p[1] == '$') { type = TOK_OTHER; /* TOKEN_BASE */ p += 2; - } else if (isnumstart(*p)) { + } else if (nasm_isnumstart(*p)) { bool is_hex = false; bool is_float = false; bool has_e = false; @@ -1037,7 +1037,7 @@ static Token *tokenize(char *line) is_float = true; if (*p == '+' || *p == '-') p++; - } else if (isnumchar(c)) + } else if (nasm_isnumchar(c)) ; /* just advance */ else if (c == '.') { /* diff --git a/asm/stdscan.c b/asm/stdscan.c index 24f9b9c4..30cba3e2 100644 --- a/asm/stdscan.c +++ b/asm/stdscan.c @@ -137,8 +137,8 @@ int stdscan(void *private_data, struct tokenval *tv) return tv->t_type = TOKEN_EOS; /* we have a token; either an id, a number or a char */ - if (isidstart(*stdscan_bufptr) || - (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) { + if (nasm_isidstart(*stdscan_bufptr) || + (*stdscan_bufptr == '$' && nasm_isidstart(stdscan_bufptr[1]))) { /* now we've got an identifier */ bool is_sym = false; int token_type; @@ -150,7 +150,7 @@ int stdscan(void *private_data, struct tokenval *tv) r = stdscan_bufptr++; /* read the entire buffer to advance the buffer pointer but... */ - while (isidchar(*stdscan_bufptr)) + while (nasm_isidchar(*stdscan_bufptr)) stdscan_bufptr++; /* ... copy only up to IDLEN_MAX-1 characters */ @@ -178,7 +178,7 @@ int stdscan(void *private_data, struct tokenval *tv) } else { return tv->t_type = TOKEN_ID; } - } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) { + } else if (*stdscan_bufptr == '$' && !nasm_isnumchar(stdscan_bufptr[1])) { /* * It's a $ sign with no following hex number; this must * mean it's a Here token ($), evaluating to the current @@ -191,7 +191,7 @@ int stdscan(void *private_data, struct tokenval *tv) return tv->t_type = TOKEN_BASE; } return tv->t_type = TOKEN_HERE; - } else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */ + } else if (nasm_isnumstart(*stdscan_bufptr)) { /* now we've got a number */ bool rn_error; bool is_hex = false; bool is_float = false; @@ -224,7 +224,7 @@ int stdscan(void *private_data, struct tokenval *tv) is_float = true; if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-') stdscan_bufptr++; - } else if (isnumchar(c)) + } else if (nasm_isnumchar(c)) ; /* just advance */ else if (c == '.') is_float = true; @@ -273,7 +273,7 @@ int stdscan(void *private_data, struct tokenval *tv) * read the entire buffer to advance the buffer pointer * {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens. */ - while (isbrcchar(*stdscan_bufptr)) + while (nasm_isbrcchar(*stdscan_bufptr)) stdscan_bufptr++; token_len = stdscan_bufptr - r; diff --git a/configure.ac b/configure.ac index d111e249..2b2858ec 100644 --- a/configure.ac +++ b/configure.ac @@ -115,6 +115,8 @@ AC_CHECK_FUNCS(strncasecmp strnicmp) AC_CHECK_FUNCS(strsep) AC_CHECK_FUNCS(strnlen) AC_CHECK_FUNCS(strrchrnul) +AC_CHECK_FUNCS(iscntrl) +AC_CHECK_FUNCS(isascii) AC_CHECK_FUNCS(getuid) AC_CHECK_FUNCS(getgid) diff --git a/disasm/ndisasm.c b/disasm/ndisasm.c index 2d0cf153..f2067a31 100644 --- a/disasm/ndisasm.c +++ b/disasm/ndisasm.c @@ -94,7 +94,7 @@ int main(int argc, char **argv) int64_t offset; FILE *fp; - tolower_init(); + nasm_ctype_init(); nasm_set_verror(ndisasm_verror); iflag_clear_all(&prefer); diff --git a/include/nasm.h b/include/nasm.h index 6fdd0c15..1d26df98 100644 --- a/include/nasm.h +++ b/include/nasm.h @@ -44,6 +44,7 @@ #include #include "nasmlib.h" +#include "nctype.h" #include "strlist.h" #include "preproc.h" #include "insnsi.h" /* For enum opcode */ @@ -378,46 +379,6 @@ extern struct strlist *depend_list; /* TASM mode changes some properties */ extern bool tasm_compatible_mode; -/* - * Some lexical properties of the NASM source language, included - * here because they are shared between the parser and preprocessor. - */ - -/* - * isidstart matches any character that may start an identifier, and isidchar - * matches any character that may appear at places other than the start of an - * identifier. E.g. a period may only appear at the start of an identifier - * (for local labels), whereas a number may appear anywhere *but* at the - * start. - * isbrcchar matches any character that may placed inside curly braces as a - * decorator. E.g. {rn-sae}, {1to8}, {k1}{z} - */ - -static inline bool isidstart(char c) -{ - return nasm_isalpha(c) || c == '_' || c == '.' || c == '@' || - (tasm_compatible_mode && c == '?'); -} -static inline bool isidchar(char c) -{ - return isidstart(c) || nasm_isdigit(c) || c == '$' || c == '#' || c == '~'; -} - -static inline bool isbrcchar(char c) -{ - return isidchar(c) || c == '-'; -} - -static inline bool isnumstart(char c) -{ - return nasm_isdigit(c) || c == '$'; -} - -static inline bool isnumchar(char c) -{ - return nasm_isalnum(c) || c == '_'; -} - /* * inline function to skip past an identifier; returns the first character past * the identifier if valid, otherwise NULL. @@ -426,10 +387,10 @@ static inline char *nasm_skip_identifier(const char *str) { const char *p = str; - if (!isidstart(*p++)) { + if (!nasm_isidstart(*p++)) { p = NULL; } else { - while (isidchar(*p++)) + while (nasm_isidchar(*p++)) ; } return (char *)p; diff --git a/include/nasmlib.h b/include/nasmlib.h index e57d0e6d..ee8045e0 100644 --- a/include/nasmlib.h +++ b/include/nasmlib.h @@ -41,30 +41,12 @@ #include "compiler.h" #include "bytesex.h" -#include #include #include #ifdef HAVE_STRINGS_H # include #endif -/* - * tolower table -- avoids a function call on some platforms. - * NOTE: unlike the tolower() function in ctype, EOF is *NOT* - * a permitted value, for obvious reasons. - */ -void tolower_init(void); -extern unsigned char nasm_tolower_tab[256]; -#define nasm_tolower(x) nasm_tolower_tab[(unsigned char)(x)] - -/* Wrappers around functions */ -/* These are only valid for values that cannot include EOF */ -#define nasm_isspace(x) isspace((unsigned char)(x)) -#define nasm_isalpha(x) isalpha((unsigned char)(x)) -#define nasm_isdigit(x) isdigit((unsigned char)(x)) -#define nasm_isalnum(x) isalnum((unsigned char)(x)) -#define nasm_isxdigit(x) isxdigit((unsigned char)(x)) - /* * Wrappers around malloc, realloc and free. nasm_malloc will * fatal-error and die rather than return NULL; nasm_realloc will diff --git a/include/nctype.h b/include/nctype.h new file mode 100644 index 00000000..6c9bec81 --- /dev/null +++ b/include/nctype.h @@ -0,0 +1,123 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 1996-2018 The NASM Authors - All Rights Reserved + * See the file AUTHORS included with the NASM distribution for + * the specific copyright holders. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ----------------------------------------------------------------------- */ + +/* + * ctype-like functions specific to NASM + */ +#ifndef NASM_NCTYPE_H +#define NASM_NCTYPE_H + +#include "compiler.h" + +void nasm_ctype_init(void); + +extern unsigned char nasm_tolower_tab[256]; +static inline char nasm_tolower(char x) +{ + return nasm_tolower_tab[(unsigned char)x]; +} + +/* + * NASM ctype table + */ +enum nasm_ctype { + NCT_CTRL = 0x001, + NCT_SPACE = 0x002, + NCT_ASCII = 0x004, + NCT_LOWER = 0x008, /* isalpha(x) && tolower(x) == x */ + NCT_UPPER = 0x010, /* isalpha(x) && tolower(x) != x */ + NCT_DIGIT = 0x020, + NCT_HEX = 0x040, + NCT_ID = 0x080, + NCT_IDSTART = 0x100, + NCT_MINUS = 0x200, /* - */ + NCT_DOLLAR = 0x400, /* $ */ + NCT_UNDER = 0x800 /* _ */ +}; + +extern uint16_t nasm_ctype_tab[256]; +static inline bool nasm_ctype(unsigned char x, enum nasm_ctype mask) +{ + return (nasm_ctype_tab[x] & mask) != 0; +} + +static inline bool nasm_isspace(char x) +{ + return nasm_ctype(x, NCT_SPACE); +} + +static inline bool nasm_isalpha(char x) +{ + return nasm_ctype(x, NCT_LOWER|NCT_UPPER); +} + +static inline bool nasm_isdigit(char x) +{ + return nasm_ctype(x, NCT_DIGIT); +} +static inline bool nasm_isalnum(char x) +{ + return nasm_ctype(x, NCT_LOWER|NCT_UPPER|NCT_DIGIT); +} +static inline bool nasm_isxdigit(char x) +{ + return nasm_ctype(x, NCT_HEX); +} +static inline bool nasm_isidstart(char x) +{ + return nasm_ctype(x, NCT_IDSTART); +} +static inline bool nasm_isidchar(char x) +{ + return nasm_ctype(x, NCT_ID); +} +static inline bool nasm_isbrcchar(char x) +{ + return nasm_ctype(x, NCT_ID|NCT_MINUS); +} +static inline bool nasm_isnumstart(char x) +{ + return nasm_ctype(x, NCT_DIGIT|NCT_DOLLAR); +} +static inline bool nasm_isnumchar(char x) +{ + return nasm_ctype(x, NCT_DIGIT|NCT_LOWER|NCT_UPPER|NCT_UNDER); +} + +/* TASM-compatible mode requires ? to be an identifier character */ +static inline void nasm_ctype_tasm_mode(void) +{ + nasm_ctype_tab['?'] |= NCT_ID|NCT_IDSTART; +} + +#endif /* NASM_NCTYPE_H */ diff --git a/nasmlib/crc64.c b/nasmlib/crc64.c index 338e0be4..f901d403 100644 --- a/nasmlib/crc64.c +++ b/nasmlib/crc64.c @@ -32,7 +32,7 @@ * ----------------------------------------------------------------------- */ #include "compiler.h" -#include "nasmlib.h" +#include "nctype.h" #include "hashtbl.h" static const uint64_t crc64_tab[256] = { diff --git a/nasmlib/nctype.c b/nasmlib/nctype.c new file mode 100644 index 00000000..657e4e74 --- /dev/null +++ b/nasmlib/nctype.c @@ -0,0 +1,112 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 1996-2018 The NASM Authors - All Rights Reserved + * See the file AUTHORS included with the NASM distribution for + * the specific copyright holders. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ----------------------------------------------------------------------- */ + +#include "nctype.h" +#include + +/* + * Table of tolower() results. This avoids function calls + * on some platforms. + */ +unsigned char nasm_tolower_tab[256]; + +static void tolower_tab_init(void) +{ + int i; + + for (i = 0; i < 256; i++) + nasm_tolower_tab[i] = tolower(i); +} + +/* + * Table of character type flags; some are simply , + * some are NASM-specific. + */ + +uint16_t nasm_ctype_tab[256]; + +#if !defined(HAVE_ISCNTRL) && !defined(iscntrl) +# define iscntrl(x) ((x) < 32) +#endif +#if !defined(HAVE_ISASCII) && !defined(isascii) +# define isascii(x) ((x) < 128) +#endif + +static void ctype_tab_init(void) +{ + int i; + + for (i = 0; i < 256; i++) { + enum nasm_ctype ct = 0; + + if (iscntrl(i)) + ct |= NCT_CTRL; + + if (isascii(i)) + ct |= NCT_ASCII; + + if (isspace(i) && i != '\n') + ct |= NCT_SPACE; + + if (isalpha(i)) { + ct |= (nasm_tolower(i) == i) ? NCT_LOWER : NCT_UPPER; + ct |= NCT_ID|NCT_IDSTART; + } + + if (isdigit(i)) + ct |= NCT_DIGIT|NCT_ID; + + if (isxdigit(i)) + ct |= NCT_HEX; + + /* Non-ASCII character, but no ctype returned (e.g. Unicode) */ + if (!ct && !ispunct(i)) + ct |= NCT_ID|NCT_IDSTART; + + nasm_ctype_tab[i] = ct; + } + + nasm_ctype_tab['-'] |= NCT_MINUS; + nasm_ctype_tab['$'] |= NCT_DOLLAR|NCT_ID; + nasm_ctype_tab['_'] |= NCT_UNDER|NCT_ID|NCT_IDSTART; + nasm_ctype_tab['.'] |= NCT_ID|NCT_IDSTART; + nasm_ctype_tab['@'] |= NCT_ID|NCT_IDSTART; + nasm_ctype_tab['#'] |= NCT_ID; + nasm_ctype_tab['~'] |= NCT_ID; +} + +void nasm_ctype_init(void) +{ + tolower_tab_init(); + ctype_tab_init(); +} diff --git a/nasmlib/string.c b/nasmlib/string.c index 907df32f..4ee3ecbb 100644 --- a/nasmlib/string.c +++ b/nasmlib/string.c @@ -36,26 +36,8 @@ */ #include "compiler.h" - -#include -#include - #include "nasmlib.h" - -/* - * Prepare a table of tolower() results. This avoids function calls - * on some platforms. - */ - -unsigned char nasm_tolower_tab[256]; - -void tolower_init(void) -{ - int i; - - for (i = 0; i < 256; i++) - nasm_tolower_tab[i] = tolower(i); -} +#include "nctype.h" #ifndef nasm_stricmp int nasm_stricmp(const char *s1, const char *s2)