From 97a4b7ed215ae6446d13fe0eab15b5b3ae4da7da Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Mon, 17 Sep 2018 11:59:04 -0700 Subject: [PATCH] Fix issues with numeric SUBSEP and large SUBSEP values --- awk.h | 1 + bugs-fixed/README | 6 ++++++ bugs-fixed/numeric-subsep.awk | 5 +++++ bugs-fixed/numeric-subsep.bad | 1 + bugs-fixed/numeric-subsep.ok | 1 + bugs-fixed/subsep-overflow.awk | 24 ++++++++++++++++++++++++ bugs-fixed/subsep-overflow.ok | 5 +++++ run.c | 11 +++++++---- tran.c | 4 +++- 9 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 bugs-fixed/numeric-subsep.awk create mode 100644 bugs-fixed/numeric-subsep.bad create mode 100644 bugs-fixed/numeric-subsep.ok create mode 100644 bugs-fixed/subsep-overflow.awk create mode 100644 bugs-fixed/subsep-overflow.ok diff --git a/awk.h b/awk.h index 70097b9..e0ae9f6 100644 --- a/awk.h +++ b/awk.h @@ -100,6 +100,7 @@ extern Cell *fnrloc; /* FNR */ extern Cell *nfloc; /* NF */ extern Cell *rstartloc; /* RSTART */ extern Cell *rlengthloc; /* RLENGTH */ +extern Cell *subseploc; /* SUBSEP */ /* Cell.tval values: */ #define NUM 01 /* number value is valid */ diff --git a/bugs-fixed/README b/bugs-fixed/README index 222ef68..a41ff53 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -23,3 +23,9 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. + +X. numeric-subsep: If SUBSEP was set to a numeric value, then its string +value wouldn't always be generated before being needed. + +X. subsep-overflow: The length of SUBSEP needs to be rechecked after +calling execute(), in case SUBSEP itself has been changed. diff --git a/bugs-fixed/numeric-subsep.awk b/bugs-fixed/numeric-subsep.awk new file mode 100644 index 0000000..1252e4a --- /dev/null +++ b/bugs-fixed/numeric-subsep.awk @@ -0,0 +1,5 @@ +BEGIN { + SUBSEP = 123.456; + a["hello", "world"] = "foo"; + print a["hello" SUBSEP "world"]; +} diff --git a/bugs-fixed/numeric-subsep.bad b/bugs-fixed/numeric-subsep.bad new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/bugs-fixed/numeric-subsep.bad @@ -0,0 +1 @@ + diff --git a/bugs-fixed/numeric-subsep.ok b/bugs-fixed/numeric-subsep.ok new file mode 100644 index 0000000..257cc56 --- /dev/null +++ b/bugs-fixed/numeric-subsep.ok @@ -0,0 +1 @@ +foo diff --git a/bugs-fixed/subsep-overflow.awk b/bugs-fixed/subsep-overflow.awk new file mode 100644 index 0000000..66c7c24 --- /dev/null +++ b/bugs-fixed/subsep-overflow.awk @@ -0,0 +1,24 @@ +function foo(c, n) { + s = ""; + for (i = 0; i < n; i++) { + s = s c; + } + return s; +} + +BEGIN { + str1 = foo("a", 4500); + str2 = foo("b", 9000); + + a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1; + + for (k in a) { + print length(k); + } + + print (((SUBSEP = str1), (SUBSEP = str2), "c") in a); + print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a); + delete a[(SUBSEP = str1), (SUBSEP = str2), "c"]; + print (((SUBSEP = str1), (SUBSEP = str2), "c") in a); + print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a); +} diff --git a/bugs-fixed/subsep-overflow.ok b/bugs-fixed/subsep-overflow.ok new file mode 100644 index 0000000..ddbbd78 --- /dev/null +++ b/bugs-fixed/subsep-overflow.ok @@ -0,0 +1,5 @@ +27001 +1 +1 +0 +0 diff --git a/run.c b/run.c index 81b75da..281bc64 100644 --- a/run.c +++ b/run.c @@ -462,7 +462,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ Node *np; char *buf; int bufsz = recsize; - int nsub = strlen(*SUBSEP); + int nsub; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in array"); @@ -472,6 +472,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array")) FATAL("out of memory for %s[%s...]", x->nval, buf); strcat(buf, s); @@ -500,7 +501,7 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * Cell *x, *y; Node *np; char *s; - int nsub = strlen(*SUBSEP); + int nsub; x = execute(a[0]); /* Cell* for symbol table */ if (!isarr(x)) @@ -519,9 +520,10 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); - strcat(buf, s); + strcat(buf, s); if (np->nnext) strcat(buf, *SUBSEP); tempfree(y); @@ -540,7 +542,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ char *buf; char *s; int bufsz = recsize; - int nsub = strlen(*SUBSEP); + int nsub; ap = execute(a[1]); /* array name */ if (!isarr(ap)) { @@ -558,6 +560,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ for (p = a[0]; p; p = p->nnext) { x = execute(p); /* expr */ s = getsval(x); + nsub = strlen(getsval(subseploc)); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); strcat(buf, s); diff --git a/tran.c b/tran.c index 72ca6ff..808b928 100644 --- a/tran.c +++ b/tran.c @@ -59,6 +59,7 @@ Array *ARGVtab; /* symbol table containing ARGV[...] */ Array *ENVtab; /* symbol table containing ENVIRON[...] */ Cell *rstartloc; /* RSTART */ Cell *rlengthloc; /* RLENGTH */ +Cell *subseploc; /* SUBSEP */ Cell *symtabloc; /* SYMTAB */ Cell *nullloc; /* a guaranteed empty cell */ @@ -100,7 +101,8 @@ void syminit(void) /* initialize symbol table with builtin vars */ NR = &nrloc->fval; fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); FNR = &fnrloc->fval; - SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval; + subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab); + SUBSEP = &subseploc->sval; rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); RSTART = &rstartloc->fval; rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);