From e26237434fb769d9c1ea239101eb5b24be588eea Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Sat, 15 Sep 2018 01:38:39 -0700 Subject: [PATCH 1/5] Fix issues with assigning during concatenation --- bugs-fixed/README | 5 +++++ bugs-fixed/concat-assign-same.awk | 4 ++++ bugs-fixed/concat-assign-same.bad | 2 ++ bugs-fixed/concat-assign-same.ok | 2 ++ run.c | 23 ++++++++++++----------- 5 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 bugs-fixed/concat-assign-same.awk create mode 100644 bugs-fixed/concat-assign-same.bad create mode 100644 bugs-fixed/concat-assign-same.ok diff --git a/bugs-fixed/README b/bugs-fixed/README index 222ef68..7bdae04 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -23,3 +23,8 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. + +X. concat-assign-same: Concatenation previously evaluated both sides of the +expression before doing its work, which, since assign() evaluates to the cell +being assigned to, meant that expressions like "print (a = 1) (a = 2)" would +print "22" rather than "12". diff --git a/bugs-fixed/concat-assign-same.awk b/bugs-fixed/concat-assign-same.awk new file mode 100644 index 0000000..ed19f35 --- /dev/null +++ b/bugs-fixed/concat-assign-same.awk @@ -0,0 +1,4 @@ +BEGIN { + print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5); + print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5); +} diff --git a/bugs-fixed/concat-assign-same.bad b/bugs-fixed/concat-assign-same.bad new file mode 100644 index 0000000..294725b --- /dev/null +++ b/bugs-fixed/concat-assign-same.bad @@ -0,0 +1,2 @@ +22345 +1 2 3 4 5 diff --git a/bugs-fixed/concat-assign-same.ok b/bugs-fixed/concat-assign-same.ok new file mode 100644 index 0000000..4475052 --- /dev/null +++ b/bugs-fixed/concat-assign-same.ok @@ -0,0 +1,2 @@ +12345 +1 2 3 4 5 diff --git a/run.c b/run.c index 81b75da..14b0e21 100644 --- a/run.c +++ b/run.c @@ -1175,25 +1175,26 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */ { Cell *x, *y, *z; int n1, n2; - char *s; + char *s = NULL; + int ssz = 0; x = execute(a[0]); + n1 = strlen(getsval(x)); + adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); + (void) strncpy(s, x->sval, ssz); + y = execute(a[1]); - getsval(x); - getsval(y); - n1 = strlen(x->sval); - n2 = strlen(y->sval); - s = (char *) malloc(n1 + n2 + 1); - if (s == NULL) - FATAL("out of space concatenating %.15s... and %.15s...", - x->sval, y->sval); - strcpy(s, x->sval); - strcpy(s+n1, y->sval); + n2 = strlen(getsval(y)); + adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); + (void) strncpy(s + n1, y->sval, ssz - n1); + tempfree(x); tempfree(y); + z = gettemp(); z->sval = s; z->tval = STR; + return(z); } From 6fe0a049bb5d5e5608f399245f1e519664c6af5a Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Fri, 21 Sep 2018 11:16:27 -0700 Subject: [PATCH 2/5] Improve error reporting messages --- lex.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/lex.c b/lex.c index d09f550..ad8e878 100644 --- a/lex.c +++ b/lex.c @@ -198,6 +198,7 @@ int yylex(void) yylval.i = c; switch (c) { case '\n': /* {EOL} */ + lineno++; RET(NL); case '\r': /* assume \n is coming */ case ' ': /* {WS}+ */ @@ -213,6 +214,7 @@ int yylex(void) case '\\': if (peek() == '\n') { input(); + lineno++; } else if (peek() == '\r') { input(); input(); /* \n */ lineno++; @@ -370,10 +372,11 @@ int string(void) case '\n': case '\r': case 0: + *bp = '\0'; SYNTAX( "non-terminated string %.10s...", buf ); - lineno++; if (c == 0) /* hopeless */ FATAL( "giving up" ); + lineno++; break; case '\\': c = input(); @@ -515,6 +518,7 @@ int regexpr(void) if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) FATAL("out of space for reg expr %.10s...", buf); if (c == '\n') { + *bp = '\0'; SYNTAX( "newline in regular expression %.10s...", buf ); unput('\n'); break; @@ -553,19 +557,19 @@ int input(void) /* get next lexical input character */ lexprog++; } else /* awk -f ... */ c = pgetc(); - if (c == '\n') - lineno++; - else if (c == EOF) + if (c == EOF) c = 0; if (ep >= ebuf + sizeof ebuf) ep = ebuf; - return *ep++ = c; + *ep = c; + if (c != 0) { + ep++; + } + return (c); } void unput(int c) /* put lexical character back on input */ { - if (c == '\n') - lineno--; if (yysptr >= yysbuf + sizeof(yysbuf)) FATAL("pushed back too much: %.20s...", yysbuf); *yysptr++ = c; From 9b093ea2d0be398b8ba7b3e838f99de17fd38018 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Tue, 9 Oct 2018 11:46:57 -0700 Subject: [PATCH 3/5] Flags that begin with "--" should not be treated as end of args --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index a183c45..1c38a1e 100644 --- a/main.c +++ b/main.c @@ -88,7 +88,7 @@ int main(int argc, char *argv[]) exit(0); break; } - if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */ + if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ argc--; argv++; break; From a6392ef31cbfee50d5b7dfd17dbfbbf2f49484b8 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Mon, 12 Nov 2018 10:25:44 -0800 Subject: [PATCH 4/5] Fix regular expressions containing [[:cntrl:]] --- b.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/b.c b/b.c index 89a7841..a54a234 100644 --- a/b.c +++ b/b.c @@ -823,7 +823,15 @@ int relex(void) /* lexical analyzer for reparse */ if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && prestr[2 + cc->cc_namelen] == ']') { prestr += cc->cc_namelen + 3; - for (i = 0; i < NCHARS; i++) { + /* + * BUG: We begin at 1, instead of 0, since we + * would otherwise prematurely terminate the + * string for classes like [[:cntrl:]]. This + * means that we can't match the NUL character, + * not without first adapting the entire + * program to track each string's length. + */ + for (i = 1; i < NCHARS; i++) { if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) FATAL("out of space for reg expr %.10s...", lastre); if (cc->cc_func(i)) { From 7580235939d2c4f300827b9444675f35341a00e0 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Mon, 12 Nov 2018 10:34:19 -0800 Subject: [PATCH 5/5] Fix initial "fields" buffer size --- lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib.c b/lib.c index ba6ebd4..247ec9a 100644 --- a/lib.c +++ b/lib.c @@ -59,7 +59,7 @@ void recinit(unsigned int n) { if ( (record = (char *) malloc(n)) == NULL || (fields = (char *) malloc(n+1)) == NULL - || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL + || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL ) FATAL("out of space for $0 and fields"); *fldtab[0] = dollar0;