From 40c6916307e503c77a03c512e1abb226e9c0ee03 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Wed, 29 Aug 2018 21:18:21 +0300 Subject: [PATCH 01/15] Fix REGRESS so all tests run on Mac OS X. --- ChangeLog | 6 ++++++ REGRESS | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index 17715fc..59d4b07 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2018-08-29 Arnold D. Robbins + + * REGRESS: Check for existence of a.out. If not there, run + make. Enable core dumps for T.arnold system status test + to work on MacOS X. + 2018-08-22 Arnold D. Robbins * awktest.tar (testdir/T.expr): Fix test for unary plus. diff --git a/REGRESS b/REGRESS index facbd83..7d3ded6 100755 --- a/REGRESS +++ b/REGRESS @@ -1,5 +1,15 @@ #! /bin/sh +case `uname` in +CYGWIN) EXE=a.exe ;; +*) EXE=a.out ;; +esac + +if [ ! -f $EXE ] +then + make || exit 1 +fi + if [ -d testdir ] then true # do nothing @@ -16,5 +26,10 @@ cd testdir pwd PATH=.:$PATH export PATH +if (ulimit -c unlimited > /dev/null 2>&1) +then + # Workaround broken default on MacOS X + ulimit -c unlimited +fi REGRESS From 6cf37e9d15c15a3ba5f5c93c05e2fcafc35a2338 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Fri, 14 Sep 2018 17:29:06 -0700 Subject: [PATCH 02/15] Check for format character precision argument before using it --- bugs-fixed/README | 3 +++ bugs-fixed/missing-precision.awk | 1 + bugs-fixed/missing-precision.ok | 2 ++ run.c | 3 +++ 4 files changed, 9 insertions(+) create mode 100644 bugs-fixed/missing-precision.awk create mode 100644 bugs-fixed/missing-precision.ok diff --git a/bugs-fixed/README b/bugs-fixed/README index 222ef68..629db08 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -23,3 +23,6 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. + +8. missing-precision: When using the format string "%*s", the precision +argument was used without checking if it was present first. diff --git a/bugs-fixed/missing-precision.awk b/bugs-fixed/missing-precision.awk new file mode 100644 index 0000000..4e7a74b --- /dev/null +++ b/bugs-fixed/missing-precision.awk @@ -0,0 +1 @@ +BEGIN { printf("%*s"); } diff --git a/bugs-fixed/missing-precision.ok b/bugs-fixed/missing-precision.ok new file mode 100644 index 0000000..608b4fa --- /dev/null +++ b/bugs-fixed/missing-precision.ok @@ -0,0 +1,2 @@ +./a.out: not enough args in printf(%*s) + source line number 1 diff --git a/run.c b/run.c index 81b75da..95380ef 100644 --- a/run.c +++ b/run.c @@ -863,6 +863,9 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co FATAL("'$' not permitted in awk formats"); } if (*s == '*') { + if (a == NULL) { + FATAL("not enough args in printf(%s)", os); + } x = execute(a); a = a->nnext; sprintf(t-1, "%d", fmtwd=(int) getfval(x)); From e059b3b197ce7da33b8b0b3529af65fb85b25186 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Fri, 14 Sep 2018 19:56:34 -0700 Subject: [PATCH 03/15] Protect against overflowing during OFMT/CONVFMT conversions --- bugs-fixed/README | 4 ++++ bugs-fixed/fmt-overflow.awk | 1 + bugs-fixed/fmt-overflow.ok | 1 + tran.c | 6 +++--- 4 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 bugs-fixed/fmt-overflow.awk create mode 100644 bugs-fixed/fmt-overflow.ok diff --git a/bugs-fixed/README b/bugs-fixed/README index 222ef68..7c18979 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -23,3 +23,7 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. + +X. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written +to with sprintf(), which meant that some conversions could write past the +end. diff --git a/bugs-fixed/fmt-overflow.awk b/bugs-fixed/fmt-overflow.awk new file mode 100644 index 0000000..bf5877e --- /dev/null +++ b/bugs-fixed/fmt-overflow.awk @@ -0,0 +1 @@ +BEGIN { OFMT = "%.1000f"; print 1.25; } diff --git a/bugs-fixed/fmt-overflow.ok b/bugs-fixed/fmt-overflow.ok new file mode 100644 index 0000000..5f7449e --- /dev/null +++ b/bugs-fixed/fmt-overflow.ok @@ -0,0 +1 @@ +1.2500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 diff --git a/tran.c b/tran.c index 72ca6ff..6775b01 100644 --- a/tran.c +++ b/tran.c @@ -395,7 +395,7 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */ { - char s[100]; /* BUG: unchecked */ + char s[256]; double dtemp; if ((vp->tval & (NUM | STR)) == 0) @@ -434,9 +434,9 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel if (freeable(vp)) \ xfree(vp->sval); \ if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ - sprintf(s, "%.30g", vp->fval); \ + snprintf(s, sizeof (s), "%.30g", vp->fval); \ else \ - sprintf(s, *fmt, vp->fval); \ + snprintf(s, sizeof (s), *fmt, vp->fval); \ vp->sval = tostring(s); \ vp->tval &= ~DONTFREE; \ vp->tval |= STR; \ From e26237434fb769d9c1ea239101eb5b24be588eea Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Sat, 15 Sep 2018 01:38:39 -0700 Subject: [PATCH 04/15] Fix issues with assigning during concatenation --- bugs-fixed/README | 5 +++++ bugs-fixed/concat-assign-same.awk | 4 ++++ bugs-fixed/concat-assign-same.bad | 2 ++ bugs-fixed/concat-assign-same.ok | 2 ++ run.c | 23 ++++++++++++----------- 5 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 bugs-fixed/concat-assign-same.awk create mode 100644 bugs-fixed/concat-assign-same.bad create mode 100644 bugs-fixed/concat-assign-same.ok diff --git a/bugs-fixed/README b/bugs-fixed/README index 222ef68..7bdae04 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -23,3 +23,8 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. + +X. concat-assign-same: Concatenation previously evaluated both sides of the +expression before doing its work, which, since assign() evaluates to the cell +being assigned to, meant that expressions like "print (a = 1) (a = 2)" would +print "22" rather than "12". diff --git a/bugs-fixed/concat-assign-same.awk b/bugs-fixed/concat-assign-same.awk new file mode 100644 index 0000000..ed19f35 --- /dev/null +++ b/bugs-fixed/concat-assign-same.awk @@ -0,0 +1,4 @@ +BEGIN { + print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5); + print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5); +} diff --git a/bugs-fixed/concat-assign-same.bad b/bugs-fixed/concat-assign-same.bad new file mode 100644 index 0000000..294725b --- /dev/null +++ b/bugs-fixed/concat-assign-same.bad @@ -0,0 +1,2 @@ +22345 +1 2 3 4 5 diff --git a/bugs-fixed/concat-assign-same.ok b/bugs-fixed/concat-assign-same.ok new file mode 100644 index 0000000..4475052 --- /dev/null +++ b/bugs-fixed/concat-assign-same.ok @@ -0,0 +1,2 @@ +12345 +1 2 3 4 5 diff --git a/run.c b/run.c index 81b75da..14b0e21 100644 --- a/run.c +++ b/run.c @@ -1175,25 +1175,26 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */ { Cell *x, *y, *z; int n1, n2; - char *s; + char *s = NULL; + int ssz = 0; x = execute(a[0]); + n1 = strlen(getsval(x)); + adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); + (void) strncpy(s, x->sval, ssz); + y = execute(a[1]); - getsval(x); - getsval(y); - n1 = strlen(x->sval); - n2 = strlen(y->sval); - s = (char *) malloc(n1 + n2 + 1); - if (s == NULL) - FATAL("out of space concatenating %.15s... and %.15s...", - x->sval, y->sval); - strcpy(s, x->sval); - strcpy(s+n1, y->sval); + n2 = strlen(getsval(y)); + adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); + (void) strncpy(s + n1, y->sval, ssz - n1); + tempfree(x); tempfree(y); + z = gettemp(); z->sval = s; z->tval = STR; + return(z); } From 97a4b7ed215ae6446d13fe0eab15b5b3ae4da7da Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Mon, 17 Sep 2018 11:59:04 -0700 Subject: [PATCH 05/15] Fix issues with numeric SUBSEP and large SUBSEP values --- awk.h | 1 + bugs-fixed/README | 6 ++++++ bugs-fixed/numeric-subsep.awk | 5 +++++ bugs-fixed/numeric-subsep.bad | 1 + bugs-fixed/numeric-subsep.ok | 1 + bugs-fixed/subsep-overflow.awk | 24 ++++++++++++++++++++++++ bugs-fixed/subsep-overflow.ok | 5 +++++ run.c | 11 +++++++---- tran.c | 4 +++- 9 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 bugs-fixed/numeric-subsep.awk create mode 100644 bugs-fixed/numeric-subsep.bad create mode 100644 bugs-fixed/numeric-subsep.ok create mode 100644 bugs-fixed/subsep-overflow.awk create mode 100644 bugs-fixed/subsep-overflow.ok diff --git a/awk.h b/awk.h index 70097b9..e0ae9f6 100644 --- a/awk.h +++ b/awk.h @@ -100,6 +100,7 @@ extern Cell *fnrloc; /* FNR */ extern Cell *nfloc; /* NF */ extern Cell *rstartloc; /* RSTART */ extern Cell *rlengthloc; /* RLENGTH */ +extern Cell *subseploc; /* SUBSEP */ /* Cell.tval values: */ #define NUM 01 /* number value is valid */ diff --git a/bugs-fixed/README b/bugs-fixed/README index 222ef68..a41ff53 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -23,3 +23,9 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. + +X. numeric-subsep: If SUBSEP was set to a numeric value, then its string +value wouldn't always be generated before being needed. + +X. subsep-overflow: The length of SUBSEP needs to be rechecked after +calling execute(), in case SUBSEP itself has been changed. diff --git a/bugs-fixed/numeric-subsep.awk b/bugs-fixed/numeric-subsep.awk new file mode 100644 index 0000000..1252e4a --- /dev/null +++ b/bugs-fixed/numeric-subsep.awk @@ -0,0 +1,5 @@ +BEGIN { + SUBSEP = 123.456; + a["hello", "world"] = "foo"; + print a["hello" SUBSEP "world"]; +} diff --git a/bugs-fixed/numeric-subsep.bad b/bugs-fixed/numeric-subsep.bad new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/bugs-fixed/numeric-subsep.bad @@ -0,0 +1 @@ + diff --git a/bugs-fixed/numeric-subsep.ok b/bugs-fixed/numeric-subsep.ok new file mode 100644 index 0000000..257cc56 --- /dev/null +++ b/bugs-fixed/numeric-subsep.ok @@ -0,0 +1 @@ +foo diff --git a/bugs-fixed/subsep-overflow.awk b/bugs-fixed/subsep-overflow.awk new file mode 100644 index 0000000..66c7c24 --- /dev/null +++ b/bugs-fixed/subsep-overflow.awk @@ -0,0 +1,24 @@ +function foo(c, n) { + s = ""; + for (i = 0; i < n; i++) { + s = s c; + } + return s; +} + +BEGIN { + str1 = foo("a", 4500); + str2 = foo("b", 9000); + + a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1; + + for (k in a) { + print length(k); + } + + print (((SUBSEP = str1), (SUBSEP = str2), "c") in a); + print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a); + delete a[(SUBSEP = str1), (SUBSEP = str2), "c"]; + print (((SUBSEP = str1), (SUBSEP = str2), "c") in a); + print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a); +} diff --git a/bugs-fixed/subsep-overflow.ok b/bugs-fixed/subsep-overflow.ok new file mode 100644 index 0000000..ddbbd78 --- /dev/null +++ b/bugs-fixed/subsep-overflow.ok @@ -0,0 +1,5 @@ +27001 +1 +1 +0 +0 diff --git a/run.c b/run.c index 81b75da..281bc64 100644 --- a/run.c +++ b/run.c @@ -462,7 +462,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ Node *np; char *buf; int bufsz = recsize; - int nsub = strlen(*SUBSEP); + int nsub; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in array"); @@ -472,6 +472,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array")) FATAL("out of memory for %s[%s...]", x->nval, buf); strcat(buf, s); @@ -500,7 +501,7 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * Cell *x, *y; Node *np; char *s; - int nsub = strlen(*SUBSEP); + int nsub; x = execute(a[0]); /* Cell* for symbol table */ if (!isarr(x)) @@ -519,9 +520,10 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); - strcat(buf, s); + strcat(buf, s); if (np->nnext) strcat(buf, *SUBSEP); tempfree(y); @@ -540,7 +542,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ char *buf; char *s; int bufsz = recsize; - int nsub = strlen(*SUBSEP); + int nsub; ap = execute(a[1]); /* array name */ if (!isarr(ap)) { @@ -558,6 +560,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ for (p = a[0]; p; p = p->nnext) { x = execute(p); /* expr */ s = getsval(x); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); strcat(buf, s); diff --git a/tran.c b/tran.c index 72ca6ff..808b928 100644 --- a/tran.c +++ b/tran.c @@ -59,6 +59,7 @@ Array *ARGVtab; /* symbol table containing ARGV[...] */ Array *ENVtab; /* symbol table containing ENVIRON[...] */ Cell *rstartloc; /* RSTART */ Cell *rlengthloc; /* RLENGTH */ +Cell *subseploc; /* SUBSEP */ Cell *symtabloc; /* SYMTAB */ Cell *nullloc; /* a guaranteed empty cell */ @@ -100,7 +101,8 @@ void syminit(void) /* initialize symbol table with builtin vars */ NR = &nrloc->fval; fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); FNR = &fnrloc->fval; - SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval; + subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab); + SUBSEP = &subseploc->sval; rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); RSTART = &rstartloc->fval; rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); From d45db5e9d802659e8e4ba3457c1f354fa99b18d2 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Tue, 18 Sep 2018 15:20:44 -0700 Subject: [PATCH 06/15] Fix calling split() with a third argument that lives in the target array --- bugs-fixed/README | 5 +++++ bugs-fixed/split-fs-from-array.awk | 5 +++++ bugs-fixed/split-fs-from-array.ok | 1 + run.c | 10 +++++----- 4 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 bugs-fixed/split-fs-from-array.awk create mode 100644 bugs-fixed/split-fs-from-array.ok diff --git a/bugs-fixed/README b/bugs-fixed/README index 222ef68..7a9e2c1 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -23,3 +23,8 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. + +X. split-fs-from-array: If the third argument to split() comes from the +array passed as the second argument, then split() would previously read +from the freed memory and possibly produce incorrect results (depending +on the system's malloc()/free() behaviour.) diff --git a/bugs-fixed/split-fs-from-array.awk b/bugs-fixed/split-fs-from-array.awk new file mode 100644 index 0000000..fce1607 --- /dev/null +++ b/bugs-fixed/split-fs-from-array.awk @@ -0,0 +1,5 @@ +BEGIN { + a[1] = "elephantie" + a[2] = "e" + print split(a[1],a,a[2]), a[2], a[3], split(a[2],a,a[2]) +} diff --git a/bugs-fixed/split-fs-from-array.ok b/bugs-fixed/split-fs-from-array.ok new file mode 100644 index 0000000..9402b94 --- /dev/null +++ b/bugs-fixed/split-fs-from-array.ok @@ -0,0 +1 @@ +4 l phanti 2 diff --git a/run.c b/run.c index 81b75da..4efa641 100644 --- a/run.c +++ b/run.c @@ -1240,8 +1240,9 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ { Cell *x = 0, *y, *ap; char *s, *origs; + char *fs, *origfs = NULL; int sep; - char *t, temp, num[50], *fs = 0; + char *t, temp, num[50]; int n, tempstat, arg3type; y = execute(a[0]); /* source string */ @@ -1251,7 +1252,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ fs = *FS; else if (arg3type == STRING) { /* split(str,arr,"string") */ x = execute(a[2]); - fs = getsval(x); + origfs = fs = strdup(getsval(x)); + tempfree(x); } else if (arg3type == REGEXPR) fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ else @@ -1366,9 +1368,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ tempfree(ap); tempfree(y); free(origs); - if (a[2] != 0 && arg3type == STRING) { - tempfree(x); - } + free(origfs); x = gettemp(); x->tval = NUM; x->fval = n; From 6fe0a049bb5d5e5608f399245f1e519664c6af5a Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Fri, 21 Sep 2018 11:16:27 -0700 Subject: [PATCH 07/15] Improve error reporting messages --- lex.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/lex.c b/lex.c index d09f550..ad8e878 100644 --- a/lex.c +++ b/lex.c @@ -198,6 +198,7 @@ int yylex(void) yylval.i = c; switch (c) { case '\n': /* {EOL} */ + lineno++; RET(NL); case '\r': /* assume \n is coming */ case ' ': /* {WS}+ */ @@ -213,6 +214,7 @@ int yylex(void) case '\\': if (peek() == '\n') { input(); + lineno++; } else if (peek() == '\r') { input(); input(); /* \n */ lineno++; @@ -370,10 +372,11 @@ int string(void) case '\n': case '\r': case 0: + *bp = '\0'; SYNTAX( "non-terminated string %.10s...", buf ); - lineno++; if (c == 0) /* hopeless */ FATAL( "giving up" ); + lineno++; break; case '\\': c = input(); @@ -515,6 +518,7 @@ int regexpr(void) if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) FATAL("out of space for reg expr %.10s...", buf); if (c == '\n') { + *bp = '\0'; SYNTAX( "newline in regular expression %.10s...", buf ); unput('\n'); break; @@ -553,19 +557,19 @@ int input(void) /* get next lexical input character */ lexprog++; } else /* awk -f ... */ c = pgetc(); - if (c == '\n') - lineno++; - else if (c == EOF) + if (c == EOF) c = 0; if (ep >= ebuf + sizeof ebuf) ep = ebuf; - return *ep++ = c; + *ep = c; + if (c != 0) { + ep++; + } + return (c); } void unput(int c) /* put lexical character back on input */ { - if (c == '\n') - lineno--; if (yysptr >= yysbuf + sizeof(yysbuf)) FATAL("pushed back too much: %.20s...", yysbuf); *yysptr++ = c; From 52566c0aa464c3791cf7b7c282d29d184d913f89 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Tue, 18 Sep 2018 15:45:55 -0700 Subject: [PATCH 08/15] Handle numeric FS, RS, OFS, and ORS values --- awk.h | 4 ++++ bugs-fixed/README | 5 +++-- bugs-fixed/fs-overflow.awk | 13 +++++++++++++ bugs-fixed/numeric-fs.awk | 5 +++++ bugs-fixed/numeric-fs.ok | 3 +++ bugs-fixed/numeric-output-seps.awk | 8 ++++++++ bugs-fixed/numeric-output-seps.bad | 2 ++ bugs-fixed/numeric-output-seps.ok | 1 + bugs-fixed/numeric-rs.awk | 6 ++++++ bugs-fixed/numeric-rs.bad | 1 + bugs-fixed/numeric-rs.ok | 4 ++++ lib.c | 14 +++++++++----- run.c | 6 +++--- tran.c | 17 +++++++++++++---- 14 files changed, 75 insertions(+), 14 deletions(-) create mode 100644 bugs-fixed/fs-overflow.awk create mode 100644 bugs-fixed/numeric-fs.awk create mode 100644 bugs-fixed/numeric-fs.ok create mode 100644 bugs-fixed/numeric-output-seps.awk create mode 100644 bugs-fixed/numeric-output-seps.bad create mode 100644 bugs-fixed/numeric-output-seps.ok create mode 100644 bugs-fixed/numeric-rs.awk create mode 100644 bugs-fixed/numeric-rs.bad create mode 100644 bugs-fixed/numeric-rs.ok diff --git a/awk.h b/awk.h index e0ae9f6..ddf2466 100644 --- a/awk.h +++ b/awk.h @@ -97,7 +97,11 @@ extern Array *symtab; extern Cell *nrloc; /* NR */ extern Cell *fnrloc; /* FNR */ +extern Cell *fsloc; /* FS */ extern Cell *nfloc; /* NF */ +extern Cell *ofsloc; /* OFS */ +extern Cell *orsloc; /* ORS */ +extern Cell *rsloc; /* RS */ extern Cell *rstartloc; /* RSTART */ extern Cell *rlengthloc; /* RLENGTH */ extern Cell *subseploc; /* SUBSEP */ diff --git a/bugs-fixed/README b/bugs-fixed/README index a41ff53..6d6203f 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -24,8 +24,9 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. -X. numeric-subsep: If SUBSEP was set to a numeric value, then its string -value wouldn't always be generated before being needed. +X. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP, +FS, RS, OFS, or ORS were set to a numeric value, then their string values +wouldn't always be generated before being needed. X. subsep-overflow: The length of SUBSEP needs to be rechecked after calling execute(), in case SUBSEP itself has been changed. diff --git a/bugs-fixed/fs-overflow.awk b/bugs-fixed/fs-overflow.awk new file mode 100644 index 0000000..be10f5a --- /dev/null +++ b/bugs-fixed/fs-overflow.awk @@ -0,0 +1,13 @@ +function foo() { + a = ""; + for (i = 0; i < 10000; i++) { + a = a "c"; + } + return a; +} + +BEGIN { + FS = foo(); + $0="foo"; + print $1; +} diff --git a/bugs-fixed/numeric-fs.awk b/bugs-fixed/numeric-fs.awk new file mode 100644 index 0000000..01e438d --- /dev/null +++ b/bugs-fixed/numeric-fs.awk @@ -0,0 +1,5 @@ +BEGIN { + FS = 0; split("20202", a); print a[1]; + FS = 1; $0="31313"; print $1; + FS = 2; "echo 42424" | getline; print $1; +} diff --git a/bugs-fixed/numeric-fs.ok b/bugs-fixed/numeric-fs.ok new file mode 100644 index 0000000..dcf37cd --- /dev/null +++ b/bugs-fixed/numeric-fs.ok @@ -0,0 +1,3 @@ +2 +3 +4 diff --git a/bugs-fixed/numeric-output-seps.awk b/bugs-fixed/numeric-output-seps.awk new file mode 100644 index 0000000..daa0f72 --- /dev/null +++ b/bugs-fixed/numeric-output-seps.awk @@ -0,0 +1,8 @@ +BEGIN { + $0 = "a b c"; + OFS = 1; + ORS = 2; + NF = 2; + print; + print "d", "e"; +} diff --git a/bugs-fixed/numeric-output-seps.bad b/bugs-fixed/numeric-output-seps.bad new file mode 100644 index 0000000..95310f7 --- /dev/null +++ b/bugs-fixed/numeric-output-seps.bad @@ -0,0 +1,2 @@ +a b +d e diff --git a/bugs-fixed/numeric-output-seps.ok b/bugs-fixed/numeric-output-seps.ok new file mode 100644 index 0000000..de6b202 --- /dev/null +++ b/bugs-fixed/numeric-output-seps.ok @@ -0,0 +1 @@ +a1b2d1e2 \ No newline at end of file diff --git a/bugs-fixed/numeric-rs.awk b/bugs-fixed/numeric-rs.awk new file mode 100644 index 0000000..cc7a0a0 --- /dev/null +++ b/bugs-fixed/numeric-rs.awk @@ -0,0 +1,6 @@ +BEGIN { + RS = 1; + while ("echo a1b1c1d" | getline > 0) { + print $1; + } +} diff --git a/bugs-fixed/numeric-rs.bad b/bugs-fixed/numeric-rs.bad new file mode 100644 index 0000000..2027bc6 --- /dev/null +++ b/bugs-fixed/numeric-rs.bad @@ -0,0 +1 @@ +a1b1c1d diff --git a/bugs-fixed/numeric-rs.ok b/bugs-fixed/numeric-rs.ok new file mode 100644 index 0000000..d68dd40 --- /dev/null +++ b/bugs-fixed/numeric-rs.ok @@ -0,0 +1,4 @@ +a +b +c +d diff --git a/lib.c b/lib.c index ba6ebd4..4b1527e 100644 --- a/lib.c +++ b/lib.c @@ -189,12 +189,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * int sep, c; char *rr, *buf = *pbuf; int bufsize = *pbufsize; + char *rs = getsval(rsloc); - if (strlen(*FS) >= sizeof(inputFS)) + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) FATAL("field separator %.10s... is too long", *FS); /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/ strcpy(inputFS, *FS); /* for subsequent field splitting */ - if ((sep = **RS) == 0) { + if ((sep = *rs) == 0) { sep = '\n'; while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ ; @@ -208,7 +209,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * FATAL("input record `%.30s...' too long", buf); *rr++ = c; } - if (**RS == sep || c == EOF) + if (*rs == sep || c == EOF) break; if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ break; @@ -283,6 +284,8 @@ void fldbld(void) /* create fields from current record */ } fr = fields; i = 0; /* number of fields accumulated here */ + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) + FATAL("field separator %.10s... is too long", *FS); strcpy(inputFS, *FS); if (strlen(inputFS) > 1) { /* it's a regular expression */ i = refldbld(r, inputFS); @@ -479,6 +482,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ { int i; char *r, *p; + char *sep = getsval(ofsloc); if (donerec == 1) return; @@ -490,9 +494,9 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ while ((*r = *p++) != 0) r++; if (i < *NF) { - if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2")) + if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2")) FATAL("created $0 `%.30s...' too long", record); - for (p = *OFS; (*r = *p++) != 0; ) + for (p = sep; (*r = *p++) != 0; ) r++; } } diff --git a/run.c b/run.c index 281bc64..ac354e2 100644 --- a/run.c +++ b/run.c @@ -1251,7 +1251,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ origs = s = strdup(getsval(y)); arg3type = ptoi(a[3]); if (a[2] == 0) /* fs string */ - fs = *FS; + fs = getsval(fsloc); else if (arg3type == STRING) { /* split(str,arr,"string") */ x = execute(a[2]); fs = getsval(x); @@ -1633,9 +1633,9 @@ Cell *printstat(Node **a, int n) /* print a[0] */ fputs(getpssval(y), fp); tempfree(y); if (x->nnext == NULL) - fputs(*ORS, fp); + fputs(getsval(orsloc), fp); else - fputs(*OFS, fp); + fputs(getsval(ofsloc), fp); } if (a[1] != 0) fflush(fp); diff --git a/tran.c b/tran.c index 808b928..f4a134b 100644 --- a/tran.c +++ b/tran.c @@ -55,6 +55,9 @@ Cell *fsloc; /* FS */ Cell *nrloc; /* NR */ Cell *nfloc; /* NF */ Cell *fnrloc; /* FNR */ +Cell *ofsloc; /* OFS */ +Cell *orsloc; /* ORS */ +Cell *rsloc; /* RS */ Array *ARGVtab; /* symbol table containing ARGV[...] */ Array *ENVtab; /* symbol table containing ENVIRON[...] */ Cell *rstartloc; /* RSTART */ @@ -89,9 +92,12 @@ void syminit(void) /* initialize symbol table with builtin vars */ fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); FS = &fsloc->sval; - RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; - OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; - ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; + rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab); + RS = &rsloc->sval; + ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab); + OFS = &ofsloc->sval; + orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab); + ORS = &orsloc->sval; OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; @@ -312,6 +318,9 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + } else if (vp == ofsloc) { + if (donerec == 0) + recbld(); } if (freeable(vp)) xfree(vp->sval); /* free any previous string */ @@ -353,7 +362,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; - } else if (&vp->sval == OFS) { + } else if (vp == ofsloc) { if (donerec == 0) recbld(); } From 6315525dbef08bac66b6369f7d6952b67b5adcba Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Sun, 23 Sep 2018 17:59:52 -0700 Subject: [PATCH 09/15] Rebuild fields when NF is assigned to itself --- bugs-fixed/README | 2 ++ bugs-fixed/nf-self-assign.awk | 6 ++++++ bugs-fixed/nf-self-assign.bad | 1 + bugs-fixed/nf-self-assign.ok | 1 + run.c | 4 ++-- 5 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 bugs-fixed/nf-self-assign.awk create mode 100644 bugs-fixed/nf-self-assign.bad create mode 100644 bugs-fixed/nf-self-assign.ok diff --git a/bugs-fixed/README b/bugs-fixed/README index 222ef68..29c845f 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -23,3 +23,5 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. + +X. nf-self-assign: "NF = NF" wouldn't force the record to be rebuilt. diff --git a/bugs-fixed/nf-self-assign.awk b/bugs-fixed/nf-self-assign.awk new file mode 100644 index 0000000..6ae29ee --- /dev/null +++ b/bugs-fixed/nf-self-assign.awk @@ -0,0 +1,6 @@ +BEGIN { + $0="a b c"; + OFS=","; + NF = NF; + print; +} diff --git a/bugs-fixed/nf-self-assign.bad b/bugs-fixed/nf-self-assign.bad new file mode 100644 index 0000000..3774da6 --- /dev/null +++ b/bugs-fixed/nf-self-assign.bad @@ -0,0 +1 @@ +a b c diff --git a/bugs-fixed/nf-self-assign.ok b/bugs-fixed/nf-self-assign.ok new file mode 100644 index 0000000..b2ffb02 --- /dev/null +++ b/bugs-fixed/nf-self-assign.ok @@ -0,0 +1 @@ +a,b,c diff --git a/run.c b/run.c index 81b75da..14a0c74 100644 --- a/run.c +++ b/run.c @@ -1117,8 +1117,8 @@ Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ y = execute(a[1]); x = execute(a[0]); if (n == ASSIGN) { /* ordinary assignment */ - if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */ - ; /* leave alone unless it's a field */ + if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) + ; /* self-assignment: leave alone unless it's a field or NF */ else if ((y->tval & (STR|NUM)) == (STR|NUM)) { setsval(x, getsval(y)); x->fval = getfval(y); From 179536a5160006db2bd59772a96501f469bb6029 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Tue, 25 Sep 2018 21:19:49 -0700 Subject: [PATCH 10/15] Print an error message for negative NF values --- bugs-fixed/README | 2 ++ bugs-fixed/negative-nf.awk | 1 + bugs-fixed/negative-nf.ok | 2 ++ lib.c | 2 ++ 4 files changed, 7 insertions(+) create mode 100644 bugs-fixed/negative-nf.awk create mode 100644 bugs-fixed/negative-nf.ok diff --git a/bugs-fixed/README b/bugs-fixed/README index 29c845f..dda3b2a 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -25,3 +25,5 @@ and also if CONVFMT changed. Instead, it should convert the value to numeric and give that value. X. nf-self-assign: "NF = NF" wouldn't force the record to be rebuilt. + +X. negative-nf: Setting NF to a negative value caused a segmentation fault. diff --git a/bugs-fixed/negative-nf.awk b/bugs-fixed/negative-nf.awk new file mode 100644 index 0000000..6caeee4 --- /dev/null +++ b/bugs-fixed/negative-nf.awk @@ -0,0 +1 @@ +BEGIN { NF = -5; } diff --git a/bugs-fixed/negative-nf.ok b/bugs-fixed/negative-nf.ok new file mode 100644 index 0000000..71c8604 --- /dev/null +++ b/bugs-fixed/negative-nf.ok @@ -0,0 +1,2 @@ +./a.out: cannot set NF to a negative value + source line number 1 diff --git a/lib.c b/lib.c index ba6ebd4..8078db0 100644 --- a/lib.c +++ b/lib.c @@ -390,6 +390,8 @@ void newfld(int n) /* add field n after end of existing lastfld */ void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */ { + if (n < 0) + FATAL("cannot set NF to a negative value"); if (n > nfields) growfldtab(n); From 9b093ea2d0be398b8ba7b3e838f99de17fd38018 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Tue, 9 Oct 2018 11:46:57 -0700 Subject: [PATCH 11/15] Flags that begin with "--" should not be treated as end of args --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index a183c45..1c38a1e 100644 --- a/main.c +++ b/main.c @@ -88,7 +88,7 @@ int main(int argc, char *argv[]) exit(0); break; } - if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */ + if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ argc--; argv++; break; From e8c280034fad30d5234590ac3c62ebd0fe3d25dd Mon Sep 17 00:00:00 2001 From: Brian Kernighan Date: Thu, 25 Oct 2018 13:28:54 -0400 Subject: [PATCH 12/15] fix maketab non-bug --- FIXES | 5 +++++ makefile | 4 ++-- maketab.c | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/FIXES b/FIXES index d414c6d..909afb7 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,11 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +Oct 25, 2018: + Added test in maketab.c to prevent generating a proctab entry + for YYSTYPE_IS_DEFINED. It was harmless but some gcc settings + generated a warning message. Thanks to Nan Xiao for report. + Aug 27, 2018: Disallow '$' in printf formats; arguments evaluated in order and printed in order. diff --git a/makefile b/makefile index ae80e4d..e0a43da 100644 --- a/makefile +++ b/makefile @@ -34,8 +34,8 @@ CC = gcc -g -Wall -pedantic # yacc options. pick one; this varies a lot by system. #YFLAGS = -d -S -#YACC = bison -d -y -YACC = yacc -d +YACC = bison -d -y +#YACC = yacc -d # -S uses sprintf in yacc parser instead of sprint OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o diff --git a/maketab.c b/maketab.c index e23974c..bb8e317 100644 --- a/maketab.c +++ b/maketab.c @@ -135,6 +135,8 @@ int main(int argc, char *argv[]) n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok); if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */ continue; + if (strcmp(name, "YYSTYPE_IS_DECLARED") == 0) + continue; if (tok < FIRSTTOKEN || tok > LASTTOKEN) { /* fprintf(stderr, "maketab funny token %d %s ignored\n", tok, buf); */ continue; From a6392ef31cbfee50d5b7dfd17dbfbbf2f49484b8 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Mon, 12 Nov 2018 10:25:44 -0800 Subject: [PATCH 13/15] Fix regular expressions containing [[:cntrl:]] --- b.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/b.c b/b.c index 89a7841..a54a234 100644 --- a/b.c +++ b/b.c @@ -823,7 +823,15 @@ int relex(void) /* lexical analyzer for reparse */ if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && prestr[2 + cc->cc_namelen] == ']') { prestr += cc->cc_namelen + 3; - for (i = 0; i < NCHARS; i++) { + /* + * BUG: We begin at 1, instead of 0, since we + * would otherwise prematurely terminate the + * string for classes like [[:cntrl:]]. This + * means that we can't match the NUL character, + * not without first adapting the entire + * program to track each string's length. + */ + for (i = 1; i < NCHARS; i++) { if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) FATAL("out of space for reg expr %.10s...", lastre); if (cc->cc_func(i)) { From 7580235939d2c4f300827b9444675f35341a00e0 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Mon, 12 Nov 2018 10:34:19 -0800 Subject: [PATCH 14/15] Fix initial "fields" buffer size --- lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib.c b/lib.c index ba6ebd4..247ec9a 100644 --- a/lib.c +++ b/lib.c @@ -59,7 +59,7 @@ void recinit(unsigned int n) { if ( (record = (char *) malloc(n)) == NULL || (fields = (char *) malloc(n+1)) == NULL - || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL + || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL ) FATAL("out of space for $0 and fields"); *fldtab[0] = dollar0; From e4bb3bcbf09082748f91d312dad150246ab8b6fb Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Sun, 30 Dec 2018 09:04:34 -0700 Subject: [PATCH 15/15] fixed parallel build Signed-off-by: Christoph Junghans --- makefile | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/makefile b/makefile index e0a43da..3f3c3c2 100644 --- a/makefile +++ b/makefile @@ -54,10 +54,15 @@ a.out: ytab.o $(OFILES) $(OFILES): awk.h ytab.h proto.h -ytab.c: awk.h proto.h awkgram.y +#Clear dependency for parallel build: (make -j) +#YACC generated y.tab.c and y.tab.h at the same time +#this needs to be a static pattern rules otherwise multiple target +#are mapped onto multiple executions of yacc, which overwrite +#each others outputs. +y%.c y%.h: awk.h proto.h awkgram.y $(YACC) $(YFLAGS) awkgram.y - mv y.tab.c ytab.c - mv y.tab.h ytab.h + mv y.$*.c y$*.c + mv y.$*.h y$*.h ytab.h: ytab.c