Handle numeric FS, RS, OFS, and ORS values

This commit is contained in:
Cody Peter Mello 2018-09-18 15:45:55 -07:00
parent 97a4b7ed21
commit 52566c0aa4
14 changed files with 75 additions and 14 deletions

4
awk.h
View File

@ -97,7 +97,11 @@ extern Array *symtab;
extern Cell *nrloc; /* NR */ extern Cell *nrloc; /* NR */
extern Cell *fnrloc; /* FNR */ extern Cell *fnrloc; /* FNR */
extern Cell *fsloc; /* FS */
extern Cell *nfloc; /* NF */ extern Cell *nfloc; /* NF */
extern Cell *ofsloc; /* OFS */
extern Cell *orsloc; /* ORS */
extern Cell *rsloc; /* RS */
extern Cell *rstartloc; /* RSTART */ extern Cell *rstartloc; /* RSTART */
extern Cell *rlengthloc; /* RLENGTH */ extern Cell *rlengthloc; /* RLENGTH */
extern Cell *subseploc; /* SUBSEP */ extern Cell *subseploc; /* SUBSEP */

View File

@ -24,8 +24,9 @@ and also if CONVFMT changed.
7. unary-plus: Unary plus on a string constant returned the string. 7. unary-plus: Unary plus on a string constant returned the string.
Instead, it should convert the value to numeric and give that value. Instead, it should convert the value to numeric and give that value.
X. numeric-subsep: If SUBSEP was set to a numeric value, then its string X. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP,
value wouldn't always be generated before being needed. FS, RS, OFS, or ORS were set to a numeric value, then their string values
wouldn't always be generated before being needed.
X. subsep-overflow: The length of SUBSEP needs to be rechecked after X. subsep-overflow: The length of SUBSEP needs to be rechecked after
calling execute(), in case SUBSEP itself has been changed. calling execute(), in case SUBSEP itself has been changed.

View File

@ -0,0 +1,13 @@
function foo() {
a = "";
for (i = 0; i < 10000; i++) {
a = a "c";
}
return a;
}
BEGIN {
FS = foo();
$0="foo";
print $1;
}

View File

@ -0,0 +1,5 @@
BEGIN {
FS = 0; split("20202", a); print a[1];
FS = 1; $0="31313"; print $1;
FS = 2; "echo 42424" | getline; print $1;
}

3
bugs-fixed/numeric-fs.ok Normal file
View File

@ -0,0 +1,3 @@
2
3
4

View File

@ -0,0 +1,8 @@
BEGIN {
$0 = "a b c";
OFS = 1;
ORS = 2;
NF = 2;
print;
print "d", "e";
}

View File

@ -0,0 +1,2 @@
a b
d e

View File

@ -0,0 +1 @@
a1b2d1e2

View File

@ -0,0 +1,6 @@
BEGIN {
RS = 1;
while ("echo a1b1c1d" | getline > 0) {
print $1;
}
}

View File

@ -0,0 +1 @@
a1b1c1d

4
bugs-fixed/numeric-rs.ok Normal file
View File

@ -0,0 +1,4 @@
a
b
c
d

14
lib.c
View File

@ -189,12 +189,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
int sep, c; int sep, c;
char *rr, *buf = *pbuf; char *rr, *buf = *pbuf;
int bufsize = *pbufsize; int bufsize = *pbufsize;
char *rs = getsval(rsloc);
if (strlen(*FS) >= sizeof(inputFS)) if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS); FATAL("field separator %.10s... is too long", *FS);
/*fflush(stdout); avoids some buffering problem but makes it 25% slower*/ /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
strcpy(inputFS, *FS); /* for subsequent field splitting */ strcpy(inputFS, *FS); /* for subsequent field splitting */
if ((sep = **RS) == 0) { if ((sep = *rs) == 0) {
sep = '\n'; sep = '\n';
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
; ;
@ -208,7 +209,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
FATAL("input record `%.30s...' too long", buf); FATAL("input record `%.30s...' too long", buf);
*rr++ = c; *rr++ = c;
} }
if (**RS == sep || c == EOF) if (*rs == sep || c == EOF)
break; break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
break; break;
@ -283,6 +284,8 @@ void fldbld(void) /* create fields from current record */
} }
fr = fields; fr = fields;
i = 0; /* number of fields accumulated here */ i = 0; /* number of fields accumulated here */
if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS);
strcpy(inputFS, *FS); strcpy(inputFS, *FS);
if (strlen(inputFS) > 1) { /* it's a regular expression */ if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS); i = refldbld(r, inputFS);
@ -479,6 +482,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
{ {
int i; int i;
char *r, *p; char *r, *p;
char *sep = getsval(ofsloc);
if (donerec == 1) if (donerec == 1)
return; return;
@ -490,9 +494,9 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
while ((*r = *p++) != 0) while ((*r = *p++) != 0)
r++; r++;
if (i < *NF) { if (i < *NF) {
if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2")) if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
FATAL("created $0 `%.30s...' too long", record); FATAL("created $0 `%.30s...' too long", record);
for (p = *OFS; (*r = *p++) != 0; ) for (p = sep; (*r = *p++) != 0; )
r++; r++;
} }
} }

6
run.c
View File

@ -1251,7 +1251,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
origs = s = strdup(getsval(y)); origs = s = strdup(getsval(y));
arg3type = ptoi(a[3]); arg3type = ptoi(a[3]);
if (a[2] == 0) /* fs string */ if (a[2] == 0) /* fs string */
fs = *FS; fs = getsval(fsloc);
else if (arg3type == STRING) { /* split(str,arr,"string") */ else if (arg3type == STRING) { /* split(str,arr,"string") */
x = execute(a[2]); x = execute(a[2]);
fs = getsval(x); fs = getsval(x);
@ -1633,9 +1633,9 @@ Cell *printstat(Node **a, int n) /* print a[0] */
fputs(getpssval(y), fp); fputs(getpssval(y), fp);
tempfree(y); tempfree(y);
if (x->nnext == NULL) if (x->nnext == NULL)
fputs(*ORS, fp); fputs(getsval(orsloc), fp);
else else
fputs(*OFS, fp); fputs(getsval(ofsloc), fp);
} }
if (a[1] != 0) if (a[1] != 0)
fflush(fp); fflush(fp);

17
tran.c
View File

@ -55,6 +55,9 @@ Cell *fsloc; /* FS */
Cell *nrloc; /* NR */ Cell *nrloc; /* NR */
Cell *nfloc; /* NF */ Cell *nfloc; /* NF */
Cell *fnrloc; /* FNR */ Cell *fnrloc; /* FNR */
Cell *ofsloc; /* OFS */
Cell *orsloc; /* ORS */
Cell *rsloc; /* RS */
Array *ARGVtab; /* symbol table containing ARGV[...] */ Array *ARGVtab; /* symbol table containing ARGV[...] */
Array *ENVtab; /* symbol table containing ENVIRON[...] */ Array *ENVtab; /* symbol table containing ENVIRON[...] */
Cell *rstartloc; /* RSTART */ Cell *rstartloc; /* RSTART */
@ -89,9 +92,12 @@ void syminit(void) /* initialize symbol table with builtin vars */
fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
FS = &fsloc->sval; FS = &fsloc->sval;
RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; RS = &rsloc->sval;
ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
OFS = &ofsloc->sval;
orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
ORS = &orsloc->sval;
OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
@ -312,6 +318,9 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
} else if (isrec(vp)) { } else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */ donefld = 0; /* mark $1... invalid */
donerec = 1; donerec = 1;
} else if (vp == ofsloc) {
if (donerec == 0)
recbld();
} }
if (freeable(vp)) if (freeable(vp))
xfree(vp->sval); /* free any previous string */ xfree(vp->sval); /* free any previous string */
@ -353,7 +362,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
} else if (isrec(vp)) { } else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */ donefld = 0; /* mark $1... invalid */
donerec = 1; donerec = 1;
} else if (&vp->sval == OFS) { } else if (vp == ofsloc) {
if (donerec == 0) if (donerec == 0)
recbld(); recbld();
} }