From 52566c0aa464c3791cf7b7c282d29d184d913f89 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Tue, 18 Sep 2018 15:45:55 -0700 Subject: [PATCH] Handle numeric FS, RS, OFS, and ORS values --- awk.h | 4 ++++ bugs-fixed/README | 5 +++-- bugs-fixed/fs-overflow.awk | 13 +++++++++++++ bugs-fixed/numeric-fs.awk | 5 +++++ bugs-fixed/numeric-fs.ok | 3 +++ bugs-fixed/numeric-output-seps.awk | 8 ++++++++ bugs-fixed/numeric-output-seps.bad | 2 ++ bugs-fixed/numeric-output-seps.ok | 1 + bugs-fixed/numeric-rs.awk | 6 ++++++ bugs-fixed/numeric-rs.bad | 1 + bugs-fixed/numeric-rs.ok | 4 ++++ lib.c | 14 +++++++++----- run.c | 6 +++--- tran.c | 17 +++++++++++++---- 14 files changed, 75 insertions(+), 14 deletions(-) create mode 100644 bugs-fixed/fs-overflow.awk create mode 100644 bugs-fixed/numeric-fs.awk create mode 100644 bugs-fixed/numeric-fs.ok create mode 100644 bugs-fixed/numeric-output-seps.awk create mode 100644 bugs-fixed/numeric-output-seps.bad create mode 100644 bugs-fixed/numeric-output-seps.ok create mode 100644 bugs-fixed/numeric-rs.awk create mode 100644 bugs-fixed/numeric-rs.bad create mode 100644 bugs-fixed/numeric-rs.ok diff --git a/awk.h b/awk.h index e0ae9f6..ddf2466 100644 --- a/awk.h +++ b/awk.h @@ -97,7 +97,11 @@ extern Array *symtab; extern Cell *nrloc; /* NR */ extern Cell *fnrloc; /* FNR */ +extern Cell *fsloc; /* FS */ extern Cell *nfloc; /* NF */ +extern Cell *ofsloc; /* OFS */ +extern Cell *orsloc; /* ORS */ +extern Cell *rsloc; /* RS */ extern Cell *rstartloc; /* RSTART */ extern Cell *rlengthloc; /* RLENGTH */ extern Cell *subseploc; /* SUBSEP */ diff --git a/bugs-fixed/README b/bugs-fixed/README index a41ff53..6d6203f 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -24,8 +24,9 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. -X. numeric-subsep: If SUBSEP was set to a numeric value, then its string -value wouldn't always be generated before being needed. +X. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP, +FS, RS, OFS, or ORS were set to a numeric value, then their string values +wouldn't always be generated before being needed. X. subsep-overflow: The length of SUBSEP needs to be rechecked after calling execute(), in case SUBSEP itself has been changed. diff --git a/bugs-fixed/fs-overflow.awk b/bugs-fixed/fs-overflow.awk new file mode 100644 index 0000000..be10f5a --- /dev/null +++ b/bugs-fixed/fs-overflow.awk @@ -0,0 +1,13 @@ +function foo() { + a = ""; + for (i = 0; i < 10000; i++) { + a = a "c"; + } + return a; +} + +BEGIN { + FS = foo(); + $0="foo"; + print $1; +} diff --git a/bugs-fixed/numeric-fs.awk b/bugs-fixed/numeric-fs.awk new file mode 100644 index 0000000..01e438d --- /dev/null +++ b/bugs-fixed/numeric-fs.awk @@ -0,0 +1,5 @@ +BEGIN { + FS = 0; split("20202", a); print a[1]; + FS = 1; $0="31313"; print $1; + FS = 2; "echo 42424" | getline; print $1; +} diff --git a/bugs-fixed/numeric-fs.ok b/bugs-fixed/numeric-fs.ok new file mode 100644 index 0000000..dcf37cd --- /dev/null +++ b/bugs-fixed/numeric-fs.ok @@ -0,0 +1,3 @@ +2 +3 +4 diff --git a/bugs-fixed/numeric-output-seps.awk b/bugs-fixed/numeric-output-seps.awk new file mode 100644 index 0000000..daa0f72 --- /dev/null +++ b/bugs-fixed/numeric-output-seps.awk @@ -0,0 +1,8 @@ +BEGIN { + $0 = "a b c"; + OFS = 1; + ORS = 2; + NF = 2; + print; + print "d", "e"; +} diff --git a/bugs-fixed/numeric-output-seps.bad b/bugs-fixed/numeric-output-seps.bad new file mode 100644 index 0000000..95310f7 --- /dev/null +++ b/bugs-fixed/numeric-output-seps.bad @@ -0,0 +1,2 @@ +a b +d e diff --git a/bugs-fixed/numeric-output-seps.ok b/bugs-fixed/numeric-output-seps.ok new file mode 100644 index 0000000..de6b202 --- /dev/null +++ b/bugs-fixed/numeric-output-seps.ok @@ -0,0 +1 @@ +a1b2d1e2 \ No newline at end of file diff --git a/bugs-fixed/numeric-rs.awk b/bugs-fixed/numeric-rs.awk new file mode 100644 index 0000000..cc7a0a0 --- /dev/null +++ b/bugs-fixed/numeric-rs.awk @@ -0,0 +1,6 @@ +BEGIN { + RS = 1; + while ("echo a1b1c1d" | getline > 0) { + print $1; + } +} diff --git a/bugs-fixed/numeric-rs.bad b/bugs-fixed/numeric-rs.bad new file mode 100644 index 0000000..2027bc6 --- /dev/null +++ b/bugs-fixed/numeric-rs.bad @@ -0,0 +1 @@ +a1b1c1d diff --git a/bugs-fixed/numeric-rs.ok b/bugs-fixed/numeric-rs.ok new file mode 100644 index 0000000..d68dd40 --- /dev/null +++ b/bugs-fixed/numeric-rs.ok @@ -0,0 +1,4 @@ +a +b +c +d diff --git a/lib.c b/lib.c index ba6ebd4..4b1527e 100644 --- a/lib.c +++ b/lib.c @@ -189,12 +189,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * int sep, c; char *rr, *buf = *pbuf; int bufsize = *pbufsize; + char *rs = getsval(rsloc); - if (strlen(*FS) >= sizeof(inputFS)) + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) FATAL("field separator %.10s... is too long", *FS); /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/ strcpy(inputFS, *FS); /* for subsequent field splitting */ - if ((sep = **RS) == 0) { + if ((sep = *rs) == 0) { sep = '\n'; while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ ; @@ -208,7 +209,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * FATAL("input record `%.30s...' too long", buf); *rr++ = c; } - if (**RS == sep || c == EOF) + if (*rs == sep || c == EOF) break; if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ break; @@ -283,6 +284,8 @@ void fldbld(void) /* create fields from current record */ } fr = fields; i = 0; /* number of fields accumulated here */ + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) + FATAL("field separator %.10s... is too long", *FS); strcpy(inputFS, *FS); if (strlen(inputFS) > 1) { /* it's a regular expression */ i = refldbld(r, inputFS); @@ -479,6 +482,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ { int i; char *r, *p; + char *sep = getsval(ofsloc); if (donerec == 1) return; @@ -490,9 +494,9 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ while ((*r = *p++) != 0) r++; if (i < *NF) { - if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2")) + if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2")) FATAL("created $0 `%.30s...' too long", record); - for (p = *OFS; (*r = *p++) != 0; ) + for (p = sep; (*r = *p++) != 0; ) r++; } } diff --git a/run.c b/run.c index 281bc64..ac354e2 100644 --- a/run.c +++ b/run.c @@ -1251,7 +1251,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ origs = s = strdup(getsval(y)); arg3type = ptoi(a[3]); if (a[2] == 0) /* fs string */ - fs = *FS; + fs = getsval(fsloc); else if (arg3type == STRING) { /* split(str,arr,"string") */ x = execute(a[2]); fs = getsval(x); @@ -1633,9 +1633,9 @@ Cell *printstat(Node **a, int n) /* print a[0] */ fputs(getpssval(y), fp); tempfree(y); if (x->nnext == NULL) - fputs(*ORS, fp); + fputs(getsval(orsloc), fp); else - fputs(*OFS, fp); + fputs(getsval(ofsloc), fp); } if (a[1] != 0) fflush(fp); diff --git a/tran.c b/tran.c index 808b928..f4a134b 100644 --- a/tran.c +++ b/tran.c @@ -55,6 +55,9 @@ Cell *fsloc; /* FS */ Cell *nrloc; /* NR */ Cell *nfloc; /* NF */ Cell *fnrloc; /* FNR */ +Cell *ofsloc; /* OFS */ +Cell *orsloc; /* ORS */ +Cell *rsloc; /* RS */ Array *ARGVtab; /* symbol table containing ARGV[...] */ Array *ENVtab; /* symbol table containing ENVIRON[...] */ Cell *rstartloc; /* RSTART */ @@ -89,9 +92,12 @@ void syminit(void) /* initialize symbol table with builtin vars */ fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); FS = &fsloc->sval; - RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; - OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; - ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; + rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab); + RS = &rsloc->sval; + ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab); + OFS = &ofsloc->sval; + orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab); + ORS = &orsloc->sval; OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; @@ -312,6 +318,9 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + } else if (vp == ofsloc) { + if (donerec == 0) + recbld(); } if (freeable(vp)) xfree(vp->sval); /* free any previous string */ @@ -353,7 +362,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; - } else if (&vp->sval == OFS) { + } else if (vp == ofsloc) { if (donerec == 0) recbld(); }