diff --git a/FIXES b/FIXES index 19afa0b..598608a 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,10 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +February 6, 2020: + Additional small cleanups from Christos Zoulas. awk is now + a little more robust about reporting I/O errors upon exit. + January 31, 2020: Merge PR #70, which avoids use of variable length arrays. Thanks to GitHub user michaelforney. Fix issue #60 ({0} in interval diff --git a/main.c b/main.c index 2476320..832d971 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20200131"; +const char *version = "version 20200206"; #define DEBUG #include diff --git a/proto.h b/proto.h index 6918e1f..aac2547 100644 --- a/proto.h +++ b/proto.h @@ -111,6 +111,7 @@ extern double getfval(Cell *); extern char *getsval(Cell *); extern char *getpssval(Cell *); /* for print */ extern char *tostring(const char *); +extern char *tostringN(const char *, size_t); extern char *qstring(const char *, int); extern Cell *catstr(Cell *, Cell *); @@ -135,9 +136,12 @@ extern void yyerror(const char *); extern void fpecatch(int); extern void bracecheck(void); extern void bcheck2(int, int, int); -extern void SYNTAX(const char *, ...); -extern void FATAL(const char *, ...) __attribute__((__noreturn__)); -extern void WARNING(const char *, ...); +extern void SYNTAX(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +extern void FATAL(const char *, ...) + __attribute__((__format__(__printf__, 1, 2), __noreturn__)); +extern void WARNING(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); extern void error(void); extern void eprint(void); extern void bclass(int); diff --git a/run.c b/run.c index 6eafc0b..6abb8a3 100644 --- a/run.c +++ b/run.c @@ -25,6 +25,8 @@ THIS SOFTWARE. #define DEBUG #include #include +#include +#include #include #include #include @@ -37,11 +39,12 @@ THIS SOFTWARE. #include "awk.h" #include "ytab.h" -#define tempfree(x) if (istemp(x)) tfree(x); else - -/* -#undef tempfree +static void stdinit(void); +static void flush_all(void); +#if 1 +#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) +#else void tempfree(Cell *p) { if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { WARNING("bad csub %d in Cell %d %s", @@ -50,7 +53,7 @@ void tempfree(Cell *p) { if (istemp(p)) tfree(p); } -*/ +#endif /* do we really need these? */ /* #ifdef _NFILE */ @@ -131,7 +134,6 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, void run(Node *a) /* execution of parse tree starts here */ { - extern void stdinit(void); stdinit(); execute(a); @@ -220,11 +222,11 @@ struct Frame { /* stack frame for awk function calls */ struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ int nframe = 0; /* number of frames allocated */ -struct Frame *fp = NULL; /* frame pointer. bottom level unused */ +struct Frame *frp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { - static Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL }; + static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL }; int i, ncall, ndef; int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ Node *x; @@ -237,21 +239,21 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (!isfcn(fcn)) FATAL("calling undefined function %s", s); if (frame == NULL) { - fp = frame = calloc(nframe += 100, sizeof(*frame)); + frp = frame = calloc(nframe += 100, sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames calling %s", s); } for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ ncall++; ndef = (int) fcn->fval; /* args in defn */ - dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) ); + dprintf( ("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)) ); if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", s, ncall, ndef); if (ncall + ndef > NARGS) FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ - dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) ); + dprintf( ("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)) ); y = execute(x); oargs[i] = y; dprintf( ("args[%d]: %s %f <%s>, t=%o\n", @@ -268,25 +270,25 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ args[i] = gettemp(); *args[i] = newcopycell; } - fp++; /* now ok to up frame */ - if (fp >= frame + nframe) { - int dfp = fp - frame; /* old index */ + frp++; /* now ok to up frame */ + if (frp >= frame + nframe) { + int dfp = frp - frame; /* old index */ frame = realloc(frame, (nframe += 100) * sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames in %s", s); - fp = frame + dfp; + frp = frame + dfp; } - fp->fcncell = fcn; - fp->args = args; - fp->nargs = ndef; /* number defined with (excess are locals) */ - fp->retval = gettemp(); + frp->fcncell = fcn; + frp->args = args; + frp->nargs = ndef; /* number defined with (excess are locals) */ + frp->retval = gettemp(); - dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + dprintf( ("start exec of %s, frp=%d\n", s, (int) (frp-frame)) ); y = execute((Node *)(fcn->sval)); /* execute body */ - dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + dprintf( ("finished exec of %s, frp=%d\n", s, (int) (frp-frame)) ); for (i = 0; i < ndef; i++) { - Cell *t = fp->args[i]; + Cell *t = frp->args[i]; if (isarr(t)) { if (t->csub == CCOPY) { if (i >= ncall) { @@ -315,9 +317,9 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (freed == 0) { tempfree(y); /* don't free twice! */ } - z = fp->retval; /* return value */ + z = frp->retval; /* return value */ dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); - fp--; + frp--; return(z); } @@ -344,11 +346,11 @@ Cell *arg(Node **a, int n) /* nth argument of a function */ { n = ptoi(a[0]); /* argument number, counting from 0 */ - dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); - if (n+1 > fp->nargs) + dprintf( ("arg(%d), frp->nargs=%d\n", n, frp->nargs) ); + if (n+1 > frp->nargs) FATAL("argument #%d of function %s was not supplied", - n+1, fp->fcncell->nval); - return fp->args[n]; + n+1, frp->fcncell->nval); + return frp->args[n]; } Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ @@ -367,14 +369,14 @@ Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ if (a[0] != NULL) { y = execute(a[0]); if ((y->tval & (STR|NUM)) == (STR|NUM)) { - setsval(fp->retval, getsval(y)); - fp->retval->fval = getfval(y); - fp->retval->tval |= NUM; + setsval(frp->retval, getsval(y)); + frp->retval->fval = getfval(y); + frp->retval->tval |= NUM; } else if (y->tval & STR) - setsval(fp->retval, getsval(y)); + setsval(frp->retval, getsval(y)); else if (y->tval & NUM) - setfval(fp->retval, getfval(y)); + setfval(frp->retval, getfval(y)); else /* can't happen */ FATAL("bad type variable %d", y->tval); tempfree(y); @@ -793,8 +795,8 @@ Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ z = gettemp(); for (p1 = s1; *p1 != '\0'; p1++) { - for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++) - ; + for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) + continue; if (*p2 == '\0') { v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ break; @@ -1064,7 +1066,7 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ case UMINUS: i = -i; break; - case UPLUS: /* handled by getfval(), above */ + case UPLUS: /* handled by getfval(), above */ break; case POWER: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ @@ -1301,7 +1303,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ setsymtab(num, s, 0.0, STR, (Array *) ap->sval); setptr(patbeg, temp); s = patbeg + patlen; - if (*(patbeg+patlen-1) == 0 || *s == 0) { + if (*(patbeg+patlen-1) == '\0' || *s == '\0') { n++; snprintf(num, sizeof(num), "%d", n); setsymtab(num, "", 0.0, STR, (Array *) ap->sval); @@ -1322,15 +1324,16 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ pfa = NULL; } else if (sep == ' ') { for (n = 0; ; ) { - while (*s == ' ' || *s == '\t' || *s == '\n') +#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') + while (ISWS(*s)) s++; - if (*s == 0) + if (*s == '\0') break; n++; t = s; do s++; - while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0'); + while (*s != '\0' && !ISWS(*s)); temp = *s; setptr(s, '\0'); snprintf(num, sizeof(num), "%d", n); @@ -1339,22 +1342,22 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); - if (*s != 0) + if (*s != '\0') s++; } } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ - for (n = 0; *s != 0; s++) { + for (n = 0; *s != '\0'; s++) { char buf[2]; n++; snprintf(num, sizeof(num), "%d", n); buf[0] = *s; - buf[1] = 0; + buf[1] = '\0'; if (isdigit((uschar)buf[0])) setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); else setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); } - } else if (*s != 0) { + } else if (*s != '\0') { for (;;) { n++; t = s; @@ -1368,7 +1371,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); - if (*s++ == 0) + if (*s++ == '\0') break; } } @@ -1505,16 +1508,73 @@ Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ return True; } +static char *nawk_convert(const char *s, int (*fun_c)(int), + wint_t (*fun_wc)(wint_t)) +{ + char *buf = NULL; + char *pbuf = NULL; + const char *ps = NULL; + size_t n = 0; + mbstate_t mbs, mbs2; + wchar_t wc; + size_t sz = MB_CUR_MAX; + + if (sz == 1) { + buf = tostring(s); + + for (pbuf = buf; *pbuf; pbuf++) + *pbuf = fun_c((uschar)*pbuf); + + return buf; + } else { + /* upper/lower character may be shorter/longer */ + buf = tostringN(s, strlen(s) * sz + 1); + + memset(&mbs, 0, sizeof(mbs)); + memset(&mbs2, 0, sizeof(mbs2)); + + ps = s; + pbuf = buf; + while (n = mbrtowc(&wc, ps, sz, &mbs), + n > 0 && n != (size_t)-1 && n != (size_t)-2) + { + ps += n; + + n = wcrtomb(pbuf, fun_wc(wc), &mbs2); + if (n == (size_t)-1) + FATAL("illegal wide character %s", s); + + pbuf += n; + } + + *pbuf = '\0'; + + if (n) + FATAL("illegal byte sequence %s", s); + + return buf; + } +} + +static char *nawk_toupper(const char *s) +{ + return nawk_convert(s, toupper, towupper); +} + +static char *nawk_tolower(const char *s) +{ + return nawk_convert(s, tolower, towlower); +} + Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ { Cell *x, *y; Awkfloat u; int t; Awkfloat tmp; - char *p, *buf; + char *buf; Node *nextarg; FILE *fp; - void flush_all(void); int status = 0; t = ptoi(a[0]); @@ -1585,16 +1645,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis break; case FTOUPPER: case FTOLOWER: - buf = tostring(getsval(x)); - if (t == FTOUPPER) { - for (p = buf; *p; p++) - if (islower((uschar) *p)) - *p = toupper((uschar)*p); - } else { - for (p = buf; *p; p++) - if (isupper((uschar) *p)) - *p = tolower((uschar)*p); - } + if (t == FTOUPPER) + buf = nawk_toupper(getsval(x)); + else + buf = nawk_tolower(getsval(x)); tempfree(x); x = gettemp(); setsval(x, buf); @@ -1677,14 +1731,14 @@ struct files { int mode; /* '|', 'a', 'w' => LE/LT, GT */ } *files; -int nfiles; +size_t nfiles; -void stdinit(void) /* in case stdin, etc., are not constants */ +static void stdinit(void) /* in case stdin, etc., are not constants */ { nfiles = FOPEN_MAX; files = calloc(nfiles, sizeof(*files)); if (files == NULL) - FATAL("can't allocate file memory for %u files", nfiles); + FATAL("can't allocate file memory for %zu files", nfiles); files[0].fp = stdin; files[0].fname = "/dev/stdin"; files[0].mode = LT; @@ -1699,12 +1753,13 @@ void stdinit(void) /* in case stdin, etc., are not constants */ FILE *openfile(int a, const char *us) { const char *s = us; - int i, m; + size_t i; + int m; FILE *fp = NULL; if (*s == '\0') FATAL("null file name in print or getline"); - for (i=0; i < nfiles; i++) + for (i = 0; i < nfiles; i++) if (files[i].fname && strcmp(s, files[i].fname) == 0) { if (a == files[i].mode || (a==APPEND && files[i].mode==GT)) return files[i].fp; @@ -1714,15 +1769,15 @@ FILE *openfile(int a, const char *us) if (a == FFLUSH) /* didn't find it, so don't create it! */ return NULL; - for (i=0; i < nfiles; i++) + for (i = 0; i < nfiles; i++) if (files[i].fp == NULL) break; if (i >= nfiles) { struct files *nf; - int nnf = nfiles + FOPEN_MAX; + size_t nnf = nfiles + FOPEN_MAX; nf = realloc(files, nnf * sizeof(*nf)); if (nf == NULL) - FATAL("cannot grow files for %s and %d files", s, nnf); + FATAL("cannot grow files for %s and %zu files", s, nnf); memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); nfiles = nnf; files = nf; @@ -1754,7 +1809,7 @@ FILE *openfile(int a, const char *us) const char *filename(FILE *fp) { - int i; + size_t i; for (i = 0; i < nfiles; i++) if (fp == files[i].fp) @@ -1849,7 +1904,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */ while (sptr < patbeg) *pb++ = *sptr++; sptr = getsval(y); - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1867,8 +1922,8 @@ Cell *sub(Node **a, int nnn) /* substitute command */ sptr = patbeg + patlen; if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); - while ((*pb++ = *sptr++) != 0) - ; + while ((*pb++ = *sptr++) != '\0') + continue; } if (pb > buf + bufsz) FATAL("sub result2 %.30s too big; can't happen", buf); @@ -1911,11 +1966,11 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ pb = buf; rptr = getsval(y); do { - if (patlen == 0 && *patbeg != 0) { /* matched empty string */ + if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ if (mflag == 0) { /* can replace empty */ num++; sptr = rptr; - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1928,7 +1983,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb++ = *sptr++; } } - if (*t == 0) /* at end */ + if (*t == '\0') /* at end */ goto done; adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); *pb++ = *t++; @@ -1943,7 +1998,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ while (sptr < patbeg) *pb++ = *sptr++; sptr = rptr; - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1956,7 +2011,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb++ = *sptr++; } t = patbeg + patlen; - if (patlen == 0 || *t == 0 || *(t-1) == 0) + if (patlen == 0 || *t == '\0' || *(t-1) == '\0') goto done; if (pb > buf + bufsz) FATAL("gsub result1 %.30s too big; can't happen", buf); @@ -1965,8 +2020,8 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ } while (pmatch(pfa,t)); sptr = t; adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); - while ((*pb++ = *sptr++) != 0) - ; + while ((*pb++ = *sptr++) != '\0') + continue; done: if (pb < buf + bufsz) *pb = '\0'; else if (*(pb-1) != '\0') diff --git a/testdir/T.builtin b/testdir/T.builtin index 411a5e5..b36f6cb 100755 --- a/testdir/T.builtin +++ b/testdir/T.builtin @@ -29,6 +29,14 @@ $awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}' >foo1 echo 'hello, world!|HELLO, WORLD!|hello, WORLD!' >foo2 diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower)' + +if locale -a | grep -qsi de_DE.UTF-8; then + (export LANG=de_DE.UTF-8 && echo 'Dürst' | + $awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1 + echo 'dürst|DÜRST|Dürst' >foo2 + diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8' +fi + $awk 'BEGIN { j = 1; sprintf("%d", 99, ++j) # does j get incremented? if (j != 2) diff --git a/tran.c b/tran.c index d659cfa..4efaa21 100644 --- a/tran.c +++ b/tran.c @@ -344,7 +344,7 @@ void funnyvar(Cell *vp, const char *rw) if (vp->tval & FCN) FATAL("can't %s %s; it's a function.", rw, vp->nval); WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", - vp, vp->nval, vp->sval, vp->fval, vp->tval); + (void *)vp, vp->nval, vp->sval, vp->fval, vp->tval); } char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ @@ -516,6 +516,17 @@ char *tostring(const char *s) /* make a copy of string s */ return(p); } +char *tostringN(const char *s, size_t n) /* make a copy of string s */ +{ + char *p; + + p = malloc(n); + if (p == NULL) + FATAL("out of space in tostring on %s", s); + strcpy(p, s); + return(p); +} + Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */ { Cell *c;