0
0
mirror of https://github.com/netwide-assembler/nasm.git synced 2025-10-10 00:25:06 -04:00

ctype: create our own ctype table

Create our own ctype table where we can do the tests we want to do
cheaply, instead of calling ctype functions and then adding additional
tests all over the code.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin
2018-11-28 14:55:58 -08:00
parent 099cc17739
commit 1350620bf1
13 changed files with 269 additions and 105 deletions

View File

@@ -97,7 +97,7 @@ LIBOBJ = stdlib/snprintf.$(O) stdlib/vsnprintf.$(O) stdlib/strlcpy.$(O) \
\
nasmlib/ver.$(O) \
nasmlib/crc64.$(O) nasmlib/malloc.$(O) \
nasmlib/md5c.$(O) nasmlib/string.$(O) \
nasmlib/md5c.$(O) nasmlib/string.$(O) nasmlib/nctype.$(O) \
nasmlib/file.$(O) nasmlib/mmap.$(O) nasmlib/ilog2.$(O) \
nasmlib/realpath.$(O) nasmlib/path.$(O) \
nasmlib/filename.$(O) nasmlib/srcfile.$(O) \

View File

@@ -321,12 +321,12 @@ bool process_directives(char *directive)
value++; /* skip initial $ if present */
q = value;
if (!isidstart(*q)) {
if (!nasm_isidstart(*q)) {
validid = false;
} else {
q++;
while (*q && *q != ':' && !nasm_isspace(*q)) {
if (!isidchar(*q))
if (!nasm_isidchar(*q))
validid = false;
q++;
}
@@ -411,7 +411,7 @@ bool process_directives(char *directive)
p = value;
q = debugid;
badid = overlong = false;
if (!isidstart(*p)) {
if (!nasm_isidstart(*p)) {
badid = true;
} else {
while (*p && !nasm_isspace(*p)) {
@@ -419,7 +419,7 @@ bool process_directives(char *directive)
overlong = true;
break;
}
if (!isidchar(*p))
if (!nasm_isidchar(*p))
badid = true;
*q++ = *p++;
}

View File

@@ -41,11 +41,11 @@
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include "nasm.h"
#include "nasmlib.h"
#include "nctype.h"
#include "error.h"
#include "saa.h"
#include "raa.h"
@@ -456,7 +456,7 @@ int main(int argc, char **argv)
error_file = stderr;
tolower_init();
nasm_ctype_init();
src_init();
/*
@@ -1026,8 +1026,10 @@ static bool process_arg(char *p, char *q, int pass)
break;
case 't':
if (pass == 2)
if (pass == 2) {
tasm_compatible_mode = true;
nasm_ctype_tasm_mode();
}
break;
case 'v':

View File

@@ -953,11 +953,11 @@ static Token *tokenize(char *line)
} else if (*p == '!') {
type = TOK_PREPROC_ID;
p++;
if (isidchar(*p)) {
if (nasm_isidchar(*p)) {
do {
p++;
}
while (isidchar(*p));
while (nasm_isidchar(*p));
} else if (*p == '\'' || *p == '\"' || *p == '`') {
p = nasm_skip_string(p);
if (*p)
@@ -968,23 +968,23 @@ static Token *tokenize(char *line)
/* %! without string or identifier */
type = TOK_OTHER; /* Legacy behavior... */
}
} else if (isidchar(*p) ||
} else if (nasm_isidchar(*p) ||
((*p == '!' || *p == '%' || *p == '$') &&
isidchar(p[1]))) {
nasm_isidchar(p[1]))) {
do {
p++;
}
while (isidchar(*p));
while (nasm_isidchar(*p));
type = TOK_PREPROC_ID;
} else {
type = TOK_OTHER;
if (*p == '%')
p++;
}
} else if (isidstart(*p) || (*p == '$' && isidstart(p[1]))) {
} else if (nasm_isidstart(*p) || (*p == '$' && nasm_isidstart(p[1]))) {
type = TOK_ID;
p++;
while (*p && isidchar(*p))
while (*p && nasm_isidchar(*p))
p++;
} else if (*p == '\'' || *p == '"' || *p == '`') {
/*
@@ -1003,7 +1003,7 @@ static Token *tokenize(char *line)
} else if (p[0] == '$' && p[1] == '$') {
type = TOK_OTHER; /* TOKEN_BASE */
p += 2;
} else if (isnumstart(*p)) {
} else if (nasm_isnumstart(*p)) {
bool is_hex = false;
bool is_float = false;
bool has_e = false;
@@ -1037,7 +1037,7 @@ static Token *tokenize(char *line)
is_float = true;
if (*p == '+' || *p == '-')
p++;
} else if (isnumchar(c))
} else if (nasm_isnumchar(c))
; /* just advance */
else if (c == '.') {
/*

View File

@@ -137,8 +137,8 @@ int stdscan(void *private_data, struct tokenval *tv)
return tv->t_type = TOKEN_EOS;
/* we have a token; either an id, a number or a char */
if (isidstart(*stdscan_bufptr) ||
(*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) {
if (nasm_isidstart(*stdscan_bufptr) ||
(*stdscan_bufptr == '$' && nasm_isidstart(stdscan_bufptr[1]))) {
/* now we've got an identifier */
bool is_sym = false;
int token_type;
@@ -150,7 +150,7 @@ int stdscan(void *private_data, struct tokenval *tv)
r = stdscan_bufptr++;
/* read the entire buffer to advance the buffer pointer but... */
while (isidchar(*stdscan_bufptr))
while (nasm_isidchar(*stdscan_bufptr))
stdscan_bufptr++;
/* ... copy only up to IDLEN_MAX-1 characters */
@@ -178,7 +178,7 @@ int stdscan(void *private_data, struct tokenval *tv)
} else {
return tv->t_type = TOKEN_ID;
}
} else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
} else if (*stdscan_bufptr == '$' && !nasm_isnumchar(stdscan_bufptr[1])) {
/*
* It's a $ sign with no following hex number; this must
* mean it's a Here token ($), evaluating to the current
@@ -191,7 +191,7 @@ int stdscan(void *private_data, struct tokenval *tv)
return tv->t_type = TOKEN_BASE;
}
return tv->t_type = TOKEN_HERE;
} else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */
} else if (nasm_isnumstart(*stdscan_bufptr)) { /* now we've got a number */
bool rn_error;
bool is_hex = false;
bool is_float = false;
@@ -224,7 +224,7 @@ int stdscan(void *private_data, struct tokenval *tv)
is_float = true;
if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-')
stdscan_bufptr++;
} else if (isnumchar(c))
} else if (nasm_isnumchar(c))
; /* just advance */
else if (c == '.')
is_float = true;
@@ -273,7 +273,7 @@ int stdscan(void *private_data, struct tokenval *tv)
* read the entire buffer to advance the buffer pointer
* {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens.
*/
while (isbrcchar(*stdscan_bufptr))
while (nasm_isbrcchar(*stdscan_bufptr))
stdscan_bufptr++;
token_len = stdscan_bufptr - r;

View File

@@ -115,6 +115,8 @@ AC_CHECK_FUNCS(strncasecmp strnicmp)
AC_CHECK_FUNCS(strsep)
AC_CHECK_FUNCS(strnlen)
AC_CHECK_FUNCS(strrchrnul)
AC_CHECK_FUNCS(iscntrl)
AC_CHECK_FUNCS(isascii)
AC_CHECK_FUNCS(getuid)
AC_CHECK_FUNCS(getgid)

View File

@@ -94,7 +94,7 @@ int main(int argc, char **argv)
int64_t offset;
FILE *fp;
tolower_init();
nasm_ctype_init();
nasm_set_verror(ndisasm_verror);
iflag_clear_all(&prefer);

View File

@@ -44,6 +44,7 @@
#include <time.h>
#include "nasmlib.h"
#include "nctype.h"
#include "strlist.h"
#include "preproc.h"
#include "insnsi.h" /* For enum opcode */
@@ -378,46 +379,6 @@ extern struct strlist *depend_list;
/* TASM mode changes some properties */
extern bool tasm_compatible_mode;
/*
* Some lexical properties of the NASM source language, included
* here because they are shared between the parser and preprocessor.
*/
/*
* isidstart matches any character that may start an identifier, and isidchar
* matches any character that may appear at places other than the start of an
* identifier. E.g. a period may only appear at the start of an identifier
* (for local labels), whereas a number may appear anywhere *but* at the
* start.
* isbrcchar matches any character that may placed inside curly braces as a
* decorator. E.g. {rn-sae}, {1to8}, {k1}{z}
*/
static inline bool isidstart(char c)
{
return nasm_isalpha(c) || c == '_' || c == '.' || c == '@' ||
(tasm_compatible_mode && c == '?');
}
static inline bool isidchar(char c)
{
return isidstart(c) || nasm_isdigit(c) || c == '$' || c == '#' || c == '~';
}
static inline bool isbrcchar(char c)
{
return isidchar(c) || c == '-';
}
static inline bool isnumstart(char c)
{
return nasm_isdigit(c) || c == '$';
}
static inline bool isnumchar(char c)
{
return nasm_isalnum(c) || c == '_';
}
/*
* inline function to skip past an identifier; returns the first character past
* the identifier if valid, otherwise NULL.
@@ -426,10 +387,10 @@ static inline char *nasm_skip_identifier(const char *str)
{
const char *p = str;
if (!isidstart(*p++)) {
if (!nasm_isidstart(*p++)) {
p = NULL;
} else {
while (isidchar(*p++))
while (nasm_isidchar(*p++))
;
}
return (char *)p;

View File

@@ -41,30 +41,12 @@
#include "compiler.h"
#include "bytesex.h"
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif
/*
* tolower table -- avoids a function call on some platforms.
* NOTE: unlike the tolower() function in ctype, EOF is *NOT*
* a permitted value, for obvious reasons.
*/
void tolower_init(void);
extern unsigned char nasm_tolower_tab[256];
#define nasm_tolower(x) nasm_tolower_tab[(unsigned char)(x)]
/* Wrappers around <ctype.h> functions */
/* These are only valid for values that cannot include EOF */
#define nasm_isspace(x) isspace((unsigned char)(x))
#define nasm_isalpha(x) isalpha((unsigned char)(x))
#define nasm_isdigit(x) isdigit((unsigned char)(x))
#define nasm_isalnum(x) isalnum((unsigned char)(x))
#define nasm_isxdigit(x) isxdigit((unsigned char)(x))
/*
* Wrappers around malloc, realloc and free. nasm_malloc will
* fatal-error and die rather than return NULL; nasm_realloc will

123
include/nctype.h Normal file
View File

@@ -0,0 +1,123 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2018 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following
* conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ----------------------------------------------------------------------- */
/*
* ctype-like functions specific to NASM
*/
#ifndef NASM_NCTYPE_H
#define NASM_NCTYPE_H
#include "compiler.h"
void nasm_ctype_init(void);
extern unsigned char nasm_tolower_tab[256];
static inline char nasm_tolower(char x)
{
return nasm_tolower_tab[(unsigned char)x];
}
/*
* NASM ctype table
*/
enum nasm_ctype {
NCT_CTRL = 0x001,
NCT_SPACE = 0x002,
NCT_ASCII = 0x004,
NCT_LOWER = 0x008, /* isalpha(x) && tolower(x) == x */
NCT_UPPER = 0x010, /* isalpha(x) && tolower(x) != x */
NCT_DIGIT = 0x020,
NCT_HEX = 0x040,
NCT_ID = 0x080,
NCT_IDSTART = 0x100,
NCT_MINUS = 0x200, /* - */
NCT_DOLLAR = 0x400, /* $ */
NCT_UNDER = 0x800 /* _ */
};
extern uint16_t nasm_ctype_tab[256];
static inline bool nasm_ctype(unsigned char x, enum nasm_ctype mask)
{
return (nasm_ctype_tab[x] & mask) != 0;
}
static inline bool nasm_isspace(char x)
{
return nasm_ctype(x, NCT_SPACE);
}
static inline bool nasm_isalpha(char x)
{
return nasm_ctype(x, NCT_LOWER|NCT_UPPER);
}
static inline bool nasm_isdigit(char x)
{
return nasm_ctype(x, NCT_DIGIT);
}
static inline bool nasm_isalnum(char x)
{
return nasm_ctype(x, NCT_LOWER|NCT_UPPER|NCT_DIGIT);
}
static inline bool nasm_isxdigit(char x)
{
return nasm_ctype(x, NCT_HEX);
}
static inline bool nasm_isidstart(char x)
{
return nasm_ctype(x, NCT_IDSTART);
}
static inline bool nasm_isidchar(char x)
{
return nasm_ctype(x, NCT_ID);
}
static inline bool nasm_isbrcchar(char x)
{
return nasm_ctype(x, NCT_ID|NCT_MINUS);
}
static inline bool nasm_isnumstart(char x)
{
return nasm_ctype(x, NCT_DIGIT|NCT_DOLLAR);
}
static inline bool nasm_isnumchar(char x)
{
return nasm_ctype(x, NCT_DIGIT|NCT_LOWER|NCT_UPPER|NCT_UNDER);
}
/* TASM-compatible mode requires ? to be an identifier character */
static inline void nasm_ctype_tasm_mode(void)
{
nasm_ctype_tab['?'] |= NCT_ID|NCT_IDSTART;
}
#endif /* NASM_NCTYPE_H */

View File

@@ -32,7 +32,7 @@
* ----------------------------------------------------------------------- */
#include "compiler.h"
#include "nasmlib.h"
#include "nctype.h"
#include "hashtbl.h"
static const uint64_t crc64_tab[256] = {

112
nasmlib/nctype.c Normal file
View File

@@ -0,0 +1,112 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2018 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following
* conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ----------------------------------------------------------------------- */
#include "nctype.h"
#include <ctype.h>
/*
* Table of tolower() results. This avoids function calls
* on some platforms.
*/
unsigned char nasm_tolower_tab[256];
static void tolower_tab_init(void)
{
int i;
for (i = 0; i < 256; i++)
nasm_tolower_tab[i] = tolower(i);
}
/*
* Table of character type flags; some are simply <ctype.h>,
* some are NASM-specific.
*/
uint16_t nasm_ctype_tab[256];
#if !defined(HAVE_ISCNTRL) && !defined(iscntrl)
# define iscntrl(x) ((x) < 32)
#endif
#if !defined(HAVE_ISASCII) && !defined(isascii)
# define isascii(x) ((x) < 128)
#endif
static void ctype_tab_init(void)
{
int i;
for (i = 0; i < 256; i++) {
enum nasm_ctype ct = 0;
if (iscntrl(i))
ct |= NCT_CTRL;
if (isascii(i))
ct |= NCT_ASCII;
if (isspace(i) && i != '\n')
ct |= NCT_SPACE;
if (isalpha(i)) {
ct |= (nasm_tolower(i) == i) ? NCT_LOWER : NCT_UPPER;
ct |= NCT_ID|NCT_IDSTART;
}
if (isdigit(i))
ct |= NCT_DIGIT|NCT_ID;
if (isxdigit(i))
ct |= NCT_HEX;
/* Non-ASCII character, but no ctype returned (e.g. Unicode) */
if (!ct && !ispunct(i))
ct |= NCT_ID|NCT_IDSTART;
nasm_ctype_tab[i] = ct;
}
nasm_ctype_tab['-'] |= NCT_MINUS;
nasm_ctype_tab['$'] |= NCT_DOLLAR|NCT_ID;
nasm_ctype_tab['_'] |= NCT_UNDER|NCT_ID|NCT_IDSTART;
nasm_ctype_tab['.'] |= NCT_ID|NCT_IDSTART;
nasm_ctype_tab['@'] |= NCT_ID|NCT_IDSTART;
nasm_ctype_tab['#'] |= NCT_ID;
nasm_ctype_tab['~'] |= NCT_ID;
}
void nasm_ctype_init(void)
{
tolower_tab_init();
ctype_tab_init();
}

View File

@@ -36,26 +36,8 @@
*/
#include "compiler.h"
#include <stdlib.h>
#include <ctype.h>
#include "nasmlib.h"
/*
* Prepare a table of tolower() results. This avoids function calls
* on some platforms.
*/
unsigned char nasm_tolower_tab[256];
void tolower_init(void)
{
int i;
for (i = 0; i < 256; i++)
nasm_tolower_tab[i] = tolower(i);
}
#include "nctype.h"
#ifndef nasm_stricmp
int nasm_stricmp(const char *s1, const char *s2)