Merge pull request #17 from melloc/subsep

Fix issues with numeric separator values
This commit is contained in:
onetrueawk 2019-01-21 14:18:07 -05:00 committed by GitHub
commit 0fd6135c23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 126 additions and 17 deletions

5
awk.h
View File

@ -97,9 +97,14 @@ extern Array *symtab;
extern Cell *nrloc; /* NR */ extern Cell *nrloc; /* NR */
extern Cell *fnrloc; /* FNR */ extern Cell *fnrloc; /* FNR */
extern Cell *fsloc; /* FS */
extern Cell *nfloc; /* NF */ extern Cell *nfloc; /* NF */
extern Cell *ofsloc; /* OFS */
extern Cell *orsloc; /* ORS */
extern Cell *rsloc; /* RS */
extern Cell *rstartloc; /* RSTART */ extern Cell *rstartloc; /* RSTART */
extern Cell *rlengthloc; /* RLENGTH */ extern Cell *rlengthloc; /* RLENGTH */
extern Cell *subseploc; /* SUBSEP */
/* Cell.tval values: */ /* Cell.tval values: */
#define NUM 01 /* number value is valid */ #define NUM 01 /* number value is valid */

View File

@ -39,3 +39,10 @@ argument was used without checking if it was present first.
to with sprintf(), which meant that some conversions could write past the to with sprintf(), which meant that some conversions could write past the
end. end.
12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP,
FS, RS, OFS, or ORS were set to a numeric value, then their string values
wouldn't always be generated before being needed.
13. subsep-overflow: The length of SUBSEP needs to be rechecked after
calling execute(), in case SUBSEP itself has been changed.

View File

@ -0,0 +1,13 @@
function foo() {
a = "";
for (i = 0; i < 10000; i++) {
a = a "c";
}
return a;
}
BEGIN {
FS = foo();
$0="foo";
print $1;
}

View File

@ -0,0 +1,5 @@
BEGIN {
FS = 0; split("20202", a); print a[1];
FS = 1; $0="31313"; print $1;
FS = 2; "echo 42424" | getline; print $1;
}

3
bugs-fixed/numeric-fs.ok Normal file
View File

@ -0,0 +1,3 @@
2
3
4

View File

@ -0,0 +1,8 @@
BEGIN {
$0 = "a b c";
OFS = 1;
ORS = 2;
NF = 2;
print;
print "d", "e";
}

View File

@ -0,0 +1,2 @@
a b
d e

View File

@ -0,0 +1 @@
a1b2d1e2

View File

@ -0,0 +1,6 @@
BEGIN {
RS = 1;
while ("echo a1b1c1d" | getline > 0) {
print $1;
}
}

View File

@ -0,0 +1 @@
a1b1c1d

4
bugs-fixed/numeric-rs.ok Normal file
View File

@ -0,0 +1,4 @@
a
b
c
d

View File

@ -0,0 +1,5 @@
BEGIN {
SUBSEP = 123.456;
a["hello", "world"] = "foo";
print a["hello" SUBSEP "world"];
}

View File

@ -0,0 +1 @@

View File

@ -0,0 +1 @@
foo

View File

@ -0,0 +1,24 @@
function foo(c, n) {
s = "";
for (i = 0; i < n; i++) {
s = s c;
}
return s;
}
BEGIN {
str1 = foo("a", 4500);
str2 = foo("b", 9000);
a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1;
for (k in a) {
print length(k);
}
print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
delete a[(SUBSEP = str1), (SUBSEP = str2), "c"];
print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
}

View File

@ -0,0 +1,5 @@
27001
1
1
0
0

14
lib.c
View File

@ -189,12 +189,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
int sep, c; int sep, c;
char *rr, *buf = *pbuf; char *rr, *buf = *pbuf;
int bufsize = *pbufsize; int bufsize = *pbufsize;
char *rs = getsval(rsloc);
if (strlen(*FS) >= sizeof(inputFS)) if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS); FATAL("field separator %.10s... is too long", *FS);
/*fflush(stdout); avoids some buffering problem but makes it 25% slower*/ /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
strcpy(inputFS, *FS); /* for subsequent field splitting */ strcpy(inputFS, *FS); /* for subsequent field splitting */
if ((sep = **RS) == 0) { if ((sep = *rs) == 0) {
sep = '\n'; sep = '\n';
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
; ;
@ -208,7 +209,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
FATAL("input record `%.30s...' too long", buf); FATAL("input record `%.30s...' too long", buf);
*rr++ = c; *rr++ = c;
} }
if (**RS == sep || c == EOF) if (*rs == sep || c == EOF)
break; break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
break; break;
@ -283,6 +284,8 @@ void fldbld(void) /* create fields from current record */
} }
fr = fields; fr = fields;
i = 0; /* number of fields accumulated here */ i = 0; /* number of fields accumulated here */
if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS);
strcpy(inputFS, *FS); strcpy(inputFS, *FS);
if (strlen(inputFS) > 1) { /* it's a regular expression */ if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS); i = refldbld(r, inputFS);
@ -479,6 +482,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
{ {
int i; int i;
char *r, *p; char *r, *p;
char *sep = getsval(ofsloc);
if (donerec == 1) if (donerec == 1)
return; return;
@ -490,9 +494,9 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
while ((*r = *p++) != 0) while ((*r = *p++) != 0)
r++; r++;
if (i < *NF) { if (i < *NF) {
if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2")) if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
FATAL("created $0 `%.30s...' too long", record); FATAL("created $0 `%.30s...' too long", record);
for (p = *OFS; (*r = *p++) != 0; ) for (p = sep; (*r = *p++) != 0; )
r++; r++;
} }
} }

17
run.c
View File

@ -462,7 +462,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
Node *np; Node *np;
char *buf; char *buf;
int bufsz = recsize; int bufsz = recsize;
int nsub = strlen(*SUBSEP); int nsub;
if ((buf = (char *) malloc(bufsz)) == NULL) if ((buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of memory in array"); FATAL("out of memory in array");
@ -472,6 +472,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
for (np = a[1]; np; np = np->nnext) { for (np = a[1]; np; np = np->nnext) {
y = execute(np); /* subscript */ y = execute(np); /* subscript */
s = getsval(y); s = getsval(y);
nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array")) if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
FATAL("out of memory for %s[%s...]", x->nval, buf); FATAL("out of memory for %s[%s...]", x->nval, buf);
strcat(buf, s); strcat(buf, s);
@ -500,7 +501,7 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
Cell *x, *y; Cell *x, *y;
Node *np; Node *np;
char *s; char *s;
int nsub = strlen(*SUBSEP); int nsub;
x = execute(a[0]); /* Cell* for symbol table */ x = execute(a[0]); /* Cell* for symbol table */
if (!isarr(x)) if (!isarr(x))
@ -519,9 +520,10 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
for (np = a[1]; np; np = np->nnext) { for (np = a[1]; np; np = np->nnext) {
y = execute(np); /* subscript */ y = execute(np); /* subscript */
s = getsval(y); s = getsval(y);
nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete")) if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
FATAL("out of memory deleting %s[%s...]", x->nval, buf); FATAL("out of memory deleting %s[%s...]", x->nval, buf);
strcat(buf, s); strcat(buf, s);
if (np->nnext) if (np->nnext)
strcat(buf, *SUBSEP); strcat(buf, *SUBSEP);
tempfree(y); tempfree(y);
@ -540,7 +542,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
char *buf; char *buf;
char *s; char *s;
int bufsz = recsize; int bufsz = recsize;
int nsub = strlen(*SUBSEP); int nsub;
ap = execute(a[1]); /* array name */ ap = execute(a[1]); /* array name */
if (!isarr(ap)) { if (!isarr(ap)) {
@ -558,6 +560,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
for (p = a[0]; p; p = p->nnext) { for (p = a[0]; p; p = p->nnext) {
x = execute(p); /* expr */ x = execute(p); /* expr */
s = getsval(x); s = getsval(x);
nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest")) if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
FATAL("out of memory deleting %s[%s...]", x->nval, buf); FATAL("out of memory deleting %s[%s...]", x->nval, buf);
strcat(buf, s); strcat(buf, s);
@ -1252,7 +1255,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
origs = s = strdup(getsval(y)); origs = s = strdup(getsval(y));
arg3type = ptoi(a[3]); arg3type = ptoi(a[3]);
if (a[2] == 0) /* fs string */ if (a[2] == 0) /* fs string */
fs = *FS; fs = getsval(fsloc);
else if (arg3type == STRING) { /* split(str,arr,"string") */ else if (arg3type == STRING) { /* split(str,arr,"string") */
x = execute(a[2]); x = execute(a[2]);
fs = getsval(x); fs = getsval(x);
@ -1634,9 +1637,9 @@ Cell *printstat(Node **a, int n) /* print a[0] */
fputs(getpssval(y), fp); fputs(getpssval(y), fp);
tempfree(y); tempfree(y);
if (x->nnext == NULL) if (x->nnext == NULL)
fputs(*ORS, fp); fputs(getsval(orsloc), fp);
else else
fputs(*OFS, fp); fputs(getsval(ofsloc), fp);
} }
if (a[1] != 0) if (a[1] != 0)
fflush(fp); fflush(fp);

21
tran.c
View File

@ -55,10 +55,14 @@ Cell *fsloc; /* FS */
Cell *nrloc; /* NR */ Cell *nrloc; /* NR */
Cell *nfloc; /* NF */ Cell *nfloc; /* NF */
Cell *fnrloc; /* FNR */ Cell *fnrloc; /* FNR */
Cell *ofsloc; /* OFS */
Cell *orsloc; /* ORS */
Cell *rsloc; /* RS */
Array *ARGVtab; /* symbol table containing ARGV[...] */ Array *ARGVtab; /* symbol table containing ARGV[...] */
Array *ENVtab; /* symbol table containing ENVIRON[...] */ Array *ENVtab; /* symbol table containing ENVIRON[...] */
Cell *rstartloc; /* RSTART */ Cell *rstartloc; /* RSTART */
Cell *rlengthloc; /* RLENGTH */ Cell *rlengthloc; /* RLENGTH */
Cell *subseploc; /* SUBSEP */
Cell *symtabloc; /* SYMTAB */ Cell *symtabloc; /* SYMTAB */
Cell *nullloc; /* a guaranteed empty cell */ Cell *nullloc; /* a guaranteed empty cell */
@ -88,9 +92,12 @@ void syminit(void) /* initialize symbol table with builtin vars */
fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
FS = &fsloc->sval; FS = &fsloc->sval;
RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; RS = &rsloc->sval;
ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
OFS = &ofsloc->sval;
orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
ORS = &orsloc->sval;
OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
@ -100,7 +107,8 @@ void syminit(void) /* initialize symbol table with builtin vars */
NR = &nrloc->fval; NR = &nrloc->fval;
fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
FNR = &fnrloc->fval; FNR = &fnrloc->fval;
SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval; subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
SUBSEP = &subseploc->sval;
rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
RSTART = &rstartloc->fval; RSTART = &rstartloc->fval;
rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
@ -310,6 +318,9 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
} else if (isrec(vp)) { } else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */ donefld = 0; /* mark $1... invalid */
donerec = 1; donerec = 1;
} else if (vp == ofsloc) {
if (donerec == 0)
recbld();
} }
if (freeable(vp)) if (freeable(vp))
xfree(vp->sval); /* free any previous string */ xfree(vp->sval); /* free any previous string */
@ -351,7 +362,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
} else if (isrec(vp)) { } else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */ donefld = 0; /* mark $1... invalid */
donerec = 1; donerec = 1;
} else if (&vp->sval == OFS) { } else if (vp == ofsloc) {
if (donerec == 0) if (donerec == 0)
recbld(); recbld();
} }