diff --git a/awk.h b/awk.h index 70097b9..ddf2466 100644 --- a/awk.h +++ b/awk.h @@ -97,9 +97,14 @@ extern Array *symtab; extern Cell *nrloc; /* NR */ extern Cell *fnrloc; /* FNR */ +extern Cell *fsloc; /* FS */ extern Cell *nfloc; /* NF */ +extern Cell *ofsloc; /* OFS */ +extern Cell *orsloc; /* ORS */ +extern Cell *rsloc; /* RS */ extern Cell *rstartloc; /* RSTART */ extern Cell *rlengthloc; /* RLENGTH */ +extern Cell *subseploc; /* SUBSEP */ /* Cell.tval values: */ #define NUM 01 /* number value is valid */ diff --git a/b.c b/b.c index 89a7841..a54a234 100644 --- a/b.c +++ b/b.c @@ -823,7 +823,15 @@ int relex(void) /* lexical analyzer for reparse */ if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && prestr[2 + cc->cc_namelen] == ']') { prestr += cc->cc_namelen + 3; - for (i = 0; i < NCHARS; i++) { + /* + * BUG: We begin at 1, instead of 0, since we + * would otherwise prematurely terminate the + * string for classes like [[:cntrl:]]. This + * means that we can't match the NUL character, + * not without first adapting the entire + * program to track each string's length. + */ + for (i = 1; i < NCHARS; i++) { if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) FATAL("out of space for reg expr %.10s...", lastre); if (cc->cc_func(i)) { diff --git a/bugs-fixed/README b/bugs-fixed/README index 1e1bdbe..e834a29 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -24,16 +24,30 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. -8. missing-precision: When using the format string "%*s", the precision +8. concat-assign-same: Concatenation previously evaluated both sides of the +expression before doing its work, which, since assign() evaluates to the cell +being assigned to, meant that expressions like "print (a = 1) (a = 2)" would +print "22" rather than "12". + +9. missing-precision: When using the format string "%*s", the precision argument was used without checking if it was present first. -9. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written +10. missing-precision: When using the format string "%*s", the precision +argument was used without checking if it was present first. + +11. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written to with sprintf(), which meant that some conversions could write past the end. -X. split-fs-from-array: If the third argument to split() comes from the +12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP, +FS, RS, OFS, or ORS were set to a numeric value, then their string values +wouldn't always be generated before being needed. + +13. subsep-overflow: The length of SUBSEP needs to be rechecked after +calling execute(), in case SUBSEP itself has been changed. + +14. split-fs-from-array: If the third argument to split() comes from the array passed as the second argument, then split() would previously read from the freed memory and possibly produce incorrect results (depending on the system's malloc()/free() behaviour.) - diff --git a/bugs-fixed/concat-assign-same.awk b/bugs-fixed/concat-assign-same.awk new file mode 100644 index 0000000..ed19f35 --- /dev/null +++ b/bugs-fixed/concat-assign-same.awk @@ -0,0 +1,4 @@ +BEGIN { + print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5); + print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5); +} diff --git a/bugs-fixed/concat-assign-same.bad b/bugs-fixed/concat-assign-same.bad new file mode 100644 index 0000000..294725b --- /dev/null +++ b/bugs-fixed/concat-assign-same.bad @@ -0,0 +1,2 @@ +22345 +1 2 3 4 5 diff --git a/bugs-fixed/concat-assign-same.ok b/bugs-fixed/concat-assign-same.ok new file mode 100644 index 0000000..4475052 --- /dev/null +++ b/bugs-fixed/concat-assign-same.ok @@ -0,0 +1,2 @@ +12345 +1 2 3 4 5 diff --git a/bugs-fixed/fs-overflow.awk b/bugs-fixed/fs-overflow.awk new file mode 100644 index 0000000..be10f5a --- /dev/null +++ b/bugs-fixed/fs-overflow.awk @@ -0,0 +1,13 @@ +function foo() { + a = ""; + for (i = 0; i < 10000; i++) { + a = a "c"; + } + return a; +} + +BEGIN { + FS = foo(); + $0="foo"; + print $1; +} diff --git a/bugs-fixed/numeric-fs.awk b/bugs-fixed/numeric-fs.awk new file mode 100644 index 0000000..01e438d --- /dev/null +++ b/bugs-fixed/numeric-fs.awk @@ -0,0 +1,5 @@ +BEGIN { + FS = 0; split("20202", a); print a[1]; + FS = 1; $0="31313"; print $1; + FS = 2; "echo 42424" | getline; print $1; +} diff --git a/bugs-fixed/numeric-fs.ok b/bugs-fixed/numeric-fs.ok new file mode 100644 index 0000000..dcf37cd --- /dev/null +++ b/bugs-fixed/numeric-fs.ok @@ -0,0 +1,3 @@ +2 +3 +4 diff --git a/bugs-fixed/numeric-output-seps.awk b/bugs-fixed/numeric-output-seps.awk new file mode 100644 index 0000000..daa0f72 --- /dev/null +++ b/bugs-fixed/numeric-output-seps.awk @@ -0,0 +1,8 @@ +BEGIN { + $0 = "a b c"; + OFS = 1; + ORS = 2; + NF = 2; + print; + print "d", "e"; +} diff --git a/bugs-fixed/numeric-output-seps.bad b/bugs-fixed/numeric-output-seps.bad new file mode 100644 index 0000000..95310f7 --- /dev/null +++ b/bugs-fixed/numeric-output-seps.bad @@ -0,0 +1,2 @@ +a b +d e diff --git a/bugs-fixed/numeric-output-seps.ok b/bugs-fixed/numeric-output-seps.ok new file mode 100644 index 0000000..de6b202 --- /dev/null +++ b/bugs-fixed/numeric-output-seps.ok @@ -0,0 +1 @@ +a1b2d1e2 \ No newline at end of file diff --git a/bugs-fixed/numeric-rs.awk b/bugs-fixed/numeric-rs.awk new file mode 100644 index 0000000..cc7a0a0 --- /dev/null +++ b/bugs-fixed/numeric-rs.awk @@ -0,0 +1,6 @@ +BEGIN { + RS = 1; + while ("echo a1b1c1d" | getline > 0) { + print $1; + } +} diff --git a/bugs-fixed/numeric-rs.bad b/bugs-fixed/numeric-rs.bad new file mode 100644 index 0000000..2027bc6 --- /dev/null +++ b/bugs-fixed/numeric-rs.bad @@ -0,0 +1 @@ +a1b1c1d diff --git a/bugs-fixed/numeric-rs.ok b/bugs-fixed/numeric-rs.ok new file mode 100644 index 0000000..d68dd40 --- /dev/null +++ b/bugs-fixed/numeric-rs.ok @@ -0,0 +1,4 @@ +a +b +c +d diff --git a/bugs-fixed/numeric-subsep.awk b/bugs-fixed/numeric-subsep.awk new file mode 100644 index 0000000..1252e4a --- /dev/null +++ b/bugs-fixed/numeric-subsep.awk @@ -0,0 +1,5 @@ +BEGIN { + SUBSEP = 123.456; + a["hello", "world"] = "foo"; + print a["hello" SUBSEP "world"]; +} diff --git a/bugs-fixed/numeric-subsep.bad b/bugs-fixed/numeric-subsep.bad new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/bugs-fixed/numeric-subsep.bad @@ -0,0 +1 @@ + diff --git a/bugs-fixed/numeric-subsep.ok b/bugs-fixed/numeric-subsep.ok new file mode 100644 index 0000000..257cc56 --- /dev/null +++ b/bugs-fixed/numeric-subsep.ok @@ -0,0 +1 @@ +foo diff --git a/bugs-fixed/subsep-overflow.awk b/bugs-fixed/subsep-overflow.awk new file mode 100644 index 0000000..66c7c24 --- /dev/null +++ b/bugs-fixed/subsep-overflow.awk @@ -0,0 +1,24 @@ +function foo(c, n) { + s = ""; + for (i = 0; i < n; i++) { + s = s c; + } + return s; +} + +BEGIN { + str1 = foo("a", 4500); + str2 = foo("b", 9000); + + a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1; + + for (k in a) { + print length(k); + } + + print (((SUBSEP = str1), (SUBSEP = str2), "c") in a); + print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a); + delete a[(SUBSEP = str1), (SUBSEP = str2), "c"]; + print (((SUBSEP = str1), (SUBSEP = str2), "c") in a); + print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a); +} diff --git a/bugs-fixed/subsep-overflow.ok b/bugs-fixed/subsep-overflow.ok new file mode 100644 index 0000000..ddbbd78 --- /dev/null +++ b/bugs-fixed/subsep-overflow.ok @@ -0,0 +1,5 @@ +27001 +1 +1 +0 +0 diff --git a/lib.c b/lib.c index ba6ebd4..f09aeb2 100644 --- a/lib.c +++ b/lib.c @@ -59,7 +59,7 @@ void recinit(unsigned int n) { if ( (record = (char *) malloc(n)) == NULL || (fields = (char *) malloc(n+1)) == NULL - || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL + || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL ) FATAL("out of space for $0 and fields"); *fldtab[0] = dollar0; @@ -189,12 +189,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * int sep, c; char *rr, *buf = *pbuf; int bufsize = *pbufsize; + char *rs = getsval(rsloc); - if (strlen(*FS) >= sizeof(inputFS)) + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) FATAL("field separator %.10s... is too long", *FS); /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/ strcpy(inputFS, *FS); /* for subsequent field splitting */ - if ((sep = **RS) == 0) { + if ((sep = *rs) == 0) { sep = '\n'; while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ ; @@ -208,7 +209,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * FATAL("input record `%.30s...' too long", buf); *rr++ = c; } - if (**RS == sep || c == EOF) + if (*rs == sep || c == EOF) break; if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ break; @@ -283,6 +284,8 @@ void fldbld(void) /* create fields from current record */ } fr = fields; i = 0; /* number of fields accumulated here */ + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) + FATAL("field separator %.10s... is too long", *FS); strcpy(inputFS, *FS); if (strlen(inputFS) > 1) { /* it's a regular expression */ i = refldbld(r, inputFS); @@ -479,6 +482,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ { int i; char *r, *p; + char *sep = getsval(ofsloc); if (donerec == 1) return; @@ -490,9 +494,9 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ while ((*r = *p++) != 0) r++; if (i < *NF) { - if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2")) + if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2")) FATAL("created $0 `%.30s...' too long", record); - for (p = *OFS; (*r = *p++) != 0; ) + for (p = sep; (*r = *p++) != 0; ) r++; } } diff --git a/main.c b/main.c index a183c45..1c38a1e 100644 --- a/main.c +++ b/main.c @@ -88,7 +88,7 @@ int main(int argc, char *argv[]) exit(0); break; } - if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */ + if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ argc--; argv++; break; diff --git a/run.c b/run.c index a450d7d..bf84b76 100644 --- a/run.c +++ b/run.c @@ -462,7 +462,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ Node *np; char *buf; int bufsz = recsize; - int nsub = strlen(*SUBSEP); + int nsub; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in array"); @@ -472,6 +472,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array")) FATAL("out of memory for %s[%s...]", x->nval, buf); strcat(buf, s); @@ -500,7 +501,7 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * Cell *x, *y; Node *np; char *s; - int nsub = strlen(*SUBSEP); + int nsub; x = execute(a[0]); /* Cell* for symbol table */ if (!isarr(x)) @@ -519,9 +520,10 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); - strcat(buf, s); + strcat(buf, s); if (np->nnext) strcat(buf, *SUBSEP); tempfree(y); @@ -540,7 +542,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ char *buf; char *s; int bufsz = recsize; - int nsub = strlen(*SUBSEP); + int nsub; ap = execute(a[1]); /* array name */ if (!isarr(ap)) { @@ -558,6 +560,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ for (p = a[0]; p; p = p->nnext) { x = execute(p); /* expr */ s = getsval(x); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); strcat(buf, s); @@ -1178,25 +1181,26 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */ { Cell *x, *y, *z; int n1, n2; - char *s; + char *s = NULL; + int ssz = 0; x = execute(a[0]); + n1 = strlen(getsval(x)); + adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); + (void) strncpy(s, x->sval, ssz); + y = execute(a[1]); - getsval(x); - getsval(y); - n1 = strlen(x->sval); - n2 = strlen(y->sval); - s = (char *) malloc(n1 + n2 + 1); - if (s == NULL) - FATAL("out of space concatenating %.15s... and %.15s...", - x->sval, y->sval); - strcpy(s, x->sval); - strcpy(s+n1, y->sval); + n2 = strlen(getsval(y)); + adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); + (void) strncpy(s + n1, y->sval, ssz - n1); + tempfree(x); tempfree(y); + z = gettemp(); z->sval = s; z->tval = STR; + return(z); } @@ -1252,7 +1256,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ origs = s = strdup(getsval(y)); arg3type = ptoi(a[3]); if (a[2] == 0) /* fs string */ - fs = *FS; + fs = getsval(fsloc); else if (arg3type == STRING) { /* split(str,arr,"string") */ x = execute(a[2]); origfs = fs = strdup(getsval(x)); @@ -1633,9 +1637,9 @@ Cell *printstat(Node **a, int n) /* print a[0] */ fputs(getpssval(y), fp); tempfree(y); if (x->nnext == NULL) - fputs(*ORS, fp); + fputs(getsval(orsloc), fp); else - fputs(*OFS, fp); + fputs(getsval(ofsloc), fp); } if (a[1] != 0) fflush(fp); diff --git a/tran.c b/tran.c index 6775b01..d1dfe2b 100644 --- a/tran.c +++ b/tran.c @@ -55,10 +55,14 @@ Cell *fsloc; /* FS */ Cell *nrloc; /* NR */ Cell *nfloc; /* NF */ Cell *fnrloc; /* FNR */ +Cell *ofsloc; /* OFS */ +Cell *orsloc; /* ORS */ +Cell *rsloc; /* RS */ Array *ARGVtab; /* symbol table containing ARGV[...] */ Array *ENVtab; /* symbol table containing ENVIRON[...] */ Cell *rstartloc; /* RSTART */ Cell *rlengthloc; /* RLENGTH */ +Cell *subseploc; /* SUBSEP */ Cell *symtabloc; /* SYMTAB */ Cell *nullloc; /* a guaranteed empty cell */ @@ -88,9 +92,12 @@ void syminit(void) /* initialize symbol table with builtin vars */ fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); FS = &fsloc->sval; - RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; - OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; - ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; + rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab); + RS = &rsloc->sval; + ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab); + OFS = &ofsloc->sval; + orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab); + ORS = &orsloc->sval; OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; @@ -100,7 +107,8 @@ void syminit(void) /* initialize symbol table with builtin vars */ NR = &nrloc->fval; fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); FNR = &fnrloc->fval; - SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval; + subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab); + SUBSEP = &subseploc->sval; rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); RSTART = &rstartloc->fval; rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); @@ -310,6 +318,9 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + } else if (vp == ofsloc) { + if (donerec == 0) + recbld(); } if (freeable(vp)) xfree(vp->sval); /* free any previous string */ @@ -351,7 +362,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; - } else if (&vp->sval == OFS) { + } else if (vp == ofsloc) { if (donerec == 0) recbld(); }