Merge branch 'master' into nf-self-assign

This commit is contained in:
onetrueawk 2019-01-21 14:20:28 -05:00 committed by GitHub
commit 79f008e853
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 182 additions and 40 deletions

5
awk.h
View File

@ -97,9 +97,14 @@ extern Array *symtab;
extern Cell *nrloc; /* NR */
extern Cell *fnrloc; /* FNR */
extern Cell *fsloc; /* FS */
extern Cell *nfloc; /* NF */
extern Cell *ofsloc; /* OFS */
extern Cell *orsloc; /* ORS */
extern Cell *rsloc; /* RS */
extern Cell *rstartloc; /* RSTART */
extern Cell *rlengthloc; /* RLENGTH */
extern Cell *subseploc; /* SUBSEP */
/* Cell.tval values: */
#define NUM 01 /* number value is valid */

10
b.c
View File

@ -823,7 +823,15 @@ int relex(void) /* lexical analyzer for reparse */
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
prestr[2 + cc->cc_namelen] == ']') {
prestr += cc->cc_namelen + 3;
for (i = 0; i < NCHARS; i++) {
/*
* BUG: We begin at 1, instead of 0, since we
* would otherwise prematurely terminate the
* string for classes like [[:cntrl:]]. This
* means that we can't match the NUL character,
* not without first adapting the entire
* program to track each string's length.
*/
for (i = 1; i < NCHARS; i++) {
if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
FATAL("out of space for reg expr %.10s...", lastre);
if (cc->cc_func(i)) {

View File

@ -24,14 +24,31 @@ and also if CONVFMT changed.
7. unary-plus: Unary plus on a string constant returned the string.
Instead, it should convert the value to numeric and give that value.
8. missing-precision: When using the format string "%*s", the precision
8. concat-assign-same: Concatenation previously evaluated both sides of the
expression before doing its work, which, since assign() evaluates to the cell
being assigned to, meant that expressions like "print (a = 1) (a = 2)" would
print "22" rather than "12".
9. missing-precision: When using the format string "%*s", the precision
argument was used without checking if it was present first.
9. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written
10. missing-precision: When using the format string "%*s", the precision
argument was used without checking if it was present first.
11. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written
to with sprintf(), which meant that some conversions could write past the
end.
X. nf-self-assign: "NF = NF" wouldn't force the record to be rebuilt.
12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP,
FS, RS, OFS, or ORS were set to a numeric value, then their string values
wouldn't always be generated before being needed.
13. subsep-overflow: The length of SUBSEP needs to be rechecked after
calling execute(), in case SUBSEP itself has been changed.
14. split-fs-from-array: If the third argument to split() comes from the
array passed as the second argument, then split() would previously read
from the freed memory and possibly produce incorrect results (depending
on the system's malloc()/free() behaviour.)
X. negative-nf: Setting NF to a negative value caused a segmentation fault.

View File

@ -0,0 +1,4 @@
BEGIN {
print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5);
print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5);
}

View File

@ -0,0 +1,2 @@
22345
1 2 3 4 5

View File

@ -0,0 +1,2 @@
12345
1 2 3 4 5

View File

@ -0,0 +1,13 @@
function foo() {
a = "";
for (i = 0; i < 10000; i++) {
a = a "c";
}
return a;
}
BEGIN {
FS = foo();
$0="foo";
print $1;
}

View File

@ -0,0 +1,5 @@
BEGIN {
FS = 0; split("20202", a); print a[1];
FS = 1; $0="31313"; print $1;
FS = 2; "echo 42424" | getline; print $1;
}

3
bugs-fixed/numeric-fs.ok Normal file
View File

@ -0,0 +1,3 @@
2
3
4

View File

@ -0,0 +1,8 @@
BEGIN {
$0 = "a b c";
OFS = 1;
ORS = 2;
NF = 2;
print;
print "d", "e";
}

View File

@ -0,0 +1,2 @@
a b
d e

View File

@ -0,0 +1 @@
a1b2d1e2

View File

@ -0,0 +1,6 @@
BEGIN {
RS = 1;
while ("echo a1b1c1d" | getline > 0) {
print $1;
}
}

View File

@ -0,0 +1 @@
a1b1c1d

4
bugs-fixed/numeric-rs.ok Normal file
View File

@ -0,0 +1,4 @@
a
b
c
d

View File

@ -0,0 +1,5 @@
BEGIN {
SUBSEP = 123.456;
a["hello", "world"] = "foo";
print a["hello" SUBSEP "world"];
}

View File

@ -0,0 +1 @@

View File

@ -0,0 +1 @@
foo

View File

@ -0,0 +1,5 @@
BEGIN {
a[1] = "elephantie"
a[2] = "e"
print split(a[1],a,a[2]), a[2], a[3], split(a[2],a,a[2])
}

View File

@ -0,0 +1 @@
4 l phanti 2

View File

@ -0,0 +1,24 @@
function foo(c, n) {
s = "";
for (i = 0; i < n; i++) {
s = s c;
}
return s;
}
BEGIN {
str1 = foo("a", 4500);
str2 = foo("b", 9000);
a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1;
for (k in a) {
print length(k);
}
print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
delete a[(SUBSEP = str1), (SUBSEP = str2), "c"];
print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
}

View File

@ -0,0 +1,5 @@
27001
1
1
0
0

16
lib.c
View File

@ -59,7 +59,7 @@ void recinit(unsigned int n)
{
if ( (record = (char *) malloc(n)) == NULL
|| (fields = (char *) malloc(n+1)) == NULL
|| (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL
|| (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL
|| (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL )
FATAL("out of space for $0 and fields");
*fldtab[0] = dollar0;
@ -189,12 +189,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
int sep, c;
char *rr, *buf = *pbuf;
int bufsize = *pbufsize;
char *rs = getsval(rsloc);
if (strlen(*FS) >= sizeof(inputFS))
if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS);
/*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
strcpy(inputFS, *FS); /* for subsequent field splitting */
if ((sep = **RS) == 0) {
if ((sep = *rs) == 0) {
sep = '\n';
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
;
@ -208,7 +209,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
FATAL("input record `%.30s...' too long", buf);
*rr++ = c;
}
if (**RS == sep || c == EOF)
if (*rs == sep || c == EOF)
break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
break;
@ -283,6 +284,8 @@ void fldbld(void) /* create fields from current record */
}
fr = fields;
i = 0; /* number of fields accumulated here */
if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS);
strcpy(inputFS, *FS);
if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS);
@ -481,6 +484,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
{
int i;
char *r, *p;
char *sep = getsval(ofsloc);
if (donerec == 1)
return;
@ -492,9 +496,9 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
while ((*r = *p++) != 0)
r++;
if (i < *NF) {
if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2"))
if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
FATAL("created $0 `%.30s...' too long", record);
for (p = *OFS; (*r = *p++) != 0; )
for (p = sep; (*r = *p++) != 0; )
r++;
}
}

2
main.c
View File

@ -88,7 +88,7 @@ int main(int argc, char *argv[])
exit(0);
break;
}
if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */
if (strcmp(argv[1], "--") == 0) { /* explicit end of args */
argc--;
argv++;
break;

50
run.c
View File

@ -462,7 +462,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
Node *np;
char *buf;
int bufsz = recsize;
int nsub = strlen(*SUBSEP);
int nsub;
if ((buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of memory in array");
@ -472,6 +472,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
for (np = a[1]; np; np = np->nnext) {
y = execute(np); /* subscript */
s = getsval(y);
nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
FATAL("out of memory for %s[%s...]", x->nval, buf);
strcat(buf, s);
@ -500,7 +501,7 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
Cell *x, *y;
Node *np;
char *s;
int nsub = strlen(*SUBSEP);
int nsub;
x = execute(a[0]); /* Cell* for symbol table */
if (!isarr(x))
@ -519,9 +520,10 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
for (np = a[1]; np; np = np->nnext) {
y = execute(np); /* subscript */
s = getsval(y);
nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
FATAL("out of memory deleting %s[%s...]", x->nval, buf);
strcat(buf, s);
strcat(buf, s);
if (np->nnext)
strcat(buf, *SUBSEP);
tempfree(y);
@ -540,7 +542,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
char *buf;
char *s;
int bufsz = recsize;
int nsub = strlen(*SUBSEP);
int nsub;
ap = execute(a[1]); /* array name */
if (!isarr(ap)) {
@ -558,6 +560,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
for (p = a[0]; p; p = p->nnext) {
x = execute(p); /* expr */
s = getsval(x);
nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
FATAL("out of memory deleting %s[%s...]", x->nval, buf);
strcat(buf, s);
@ -1178,25 +1181,26 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */
{
Cell *x, *y, *z;
int n1, n2;
char *s;
char *s = NULL;
int ssz = 0;
x = execute(a[0]);
n1 = strlen(getsval(x));
adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
(void) strncpy(s, x->sval, ssz);
y = execute(a[1]);
getsval(x);
getsval(y);
n1 = strlen(x->sval);
n2 = strlen(y->sval);
s = (char *) malloc(n1 + n2 + 1);
if (s == NULL)
FATAL("out of space concatenating %.15s... and %.15s...",
x->sval, y->sval);
strcpy(s, x->sval);
strcpy(s+n1, y->sval);
n2 = strlen(getsval(y));
adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
(void) strncpy(s + n1, y->sval, ssz - n1);
tempfree(x);
tempfree(y);
z = gettemp();
z->sval = s;
z->tval = STR;
return(z);
}
@ -1243,18 +1247,20 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
{
Cell *x = 0, *y, *ap;
char *s, *origs;
char *fs, *origfs = NULL;
int sep;
char *t, temp, num[50], *fs = 0;
char *t, temp, num[50];
int n, tempstat, arg3type;
y = execute(a[0]); /* source string */
origs = s = strdup(getsval(y));
arg3type = ptoi(a[3]);
if (a[2] == 0) /* fs string */
fs = *FS;
fs = getsval(fsloc);
else if (arg3type == STRING) { /* split(str,arr,"string") */
x = execute(a[2]);
fs = getsval(x);
origfs = fs = strdup(getsval(x));
tempfree(x);
} else if (arg3type == REGEXPR)
fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
else
@ -1369,9 +1375,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
tempfree(ap);
tempfree(y);
free(origs);
if (a[2] != 0 && arg3type == STRING) {
tempfree(x);
}
free(origfs);
x = gettemp();
x->tval = NUM;
x->fval = n;
@ -1633,9 +1637,9 @@ Cell *printstat(Node **a, int n) /* print a[0] */
fputs(getpssval(y), fp);
tempfree(y);
if (x->nnext == NULL)
fputs(*ORS, fp);
fputs(getsval(orsloc), fp);
else
fputs(*OFS, fp);
fputs(getsval(ofsloc), fp);
}
if (a[1] != 0)
fflush(fp);

21
tran.c
View File

@ -55,10 +55,14 @@ Cell *fsloc; /* FS */
Cell *nrloc; /* NR */
Cell *nfloc; /* NF */
Cell *fnrloc; /* FNR */
Cell *ofsloc; /* OFS */
Cell *orsloc; /* ORS */
Cell *rsloc; /* RS */
Array *ARGVtab; /* symbol table containing ARGV[...] */
Array *ENVtab; /* symbol table containing ENVIRON[...] */
Cell *rstartloc; /* RSTART */
Cell *rlengthloc; /* RLENGTH */
Cell *subseploc; /* SUBSEP */
Cell *symtabloc; /* SYMTAB */
Cell *nullloc; /* a guaranteed empty cell */
@ -88,9 +92,12 @@ void syminit(void) /* initialize symbol table with builtin vars */
fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
FS = &fsloc->sval;
RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
RS = &rsloc->sval;
ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
OFS = &ofsloc->sval;
orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
ORS = &orsloc->sval;
OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
@ -100,7 +107,8 @@ void syminit(void) /* initialize symbol table with builtin vars */
NR = &nrloc->fval;
fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
FNR = &fnrloc->fval;
SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
SUBSEP = &subseploc->sval;
rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
RSTART = &rstartloc->fval;
rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
@ -310,6 +318,9 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
} else if (vp == ofsloc) {
if (donerec == 0)
recbld();
}
if (freeable(vp))
xfree(vp->sval); /* free any previous string */
@ -351,7 +362,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
} else if (&vp->sval == OFS) {
} else if (vp == ofsloc) {
if (donerec == 0)
recbld();
}