diff --git a/b.c b/b.c index 89a7841..a54a234 100644 --- a/b.c +++ b/b.c @@ -823,7 +823,15 @@ int relex(void) /* lexical analyzer for reparse */ if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && prestr[2 + cc->cc_namelen] == ']') { prestr += cc->cc_namelen + 3; - for (i = 0; i < NCHARS; i++) { + /* + * BUG: We begin at 1, instead of 0, since we + * would otherwise prematurely terminate the + * string for classes like [[:cntrl:]]. This + * means that we can't match the NUL character, + * not without first adapting the entire + * program to track each string's length. + */ + for (i = 1; i < NCHARS; i++) { if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) FATAL("out of space for reg expr %.10s...", lastre); if (cc->cc_func(i)) { diff --git a/bugs-fixed/README b/bugs-fixed/README index 4656e3e..0ee4cbe 100644 --- a/bugs-fixed/README +++ b/bugs-fixed/README @@ -24,17 +24,25 @@ and also if CONVFMT changed. 7. unary-plus: Unary plus on a string constant returned the string. Instead, it should convert the value to numeric and give that value. -8. missing-precision: When using the format string "%*s", the precision +8. concat-assign-same: Concatenation previously evaluated both sides of the +expression before doing its work, which, since assign() evaluates to the cell +being assigned to, meant that expressions like "print (a = 1) (a = 2)" would +print "22" rather than "12". + +9. missing-precision: When using the format string "%*s", the precision argument was used without checking if it was present first. -9. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written +10. missing-precision: When using the format string "%*s", the precision +argument was used without checking if it was present first. + +11. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written to with sprintf(), which meant that some conversions could write past the end. -10. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP, +12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP, FS, RS, OFS, or ORS were set to a numeric value, then their string values wouldn't always be generated before being needed. -11. subsep-overflow: The length of SUBSEP needs to be rechecked after +13. subsep-overflow: The length of SUBSEP needs to be rechecked after calling execute(), in case SUBSEP itself has been changed. diff --git a/bugs-fixed/concat-assign-same.awk b/bugs-fixed/concat-assign-same.awk new file mode 100644 index 0000000..ed19f35 --- /dev/null +++ b/bugs-fixed/concat-assign-same.awk @@ -0,0 +1,4 @@ +BEGIN { + print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5); + print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5); +} diff --git a/bugs-fixed/concat-assign-same.bad b/bugs-fixed/concat-assign-same.bad new file mode 100644 index 0000000..294725b --- /dev/null +++ b/bugs-fixed/concat-assign-same.bad @@ -0,0 +1,2 @@ +22345 +1 2 3 4 5 diff --git a/bugs-fixed/concat-assign-same.ok b/bugs-fixed/concat-assign-same.ok new file mode 100644 index 0000000..4475052 --- /dev/null +++ b/bugs-fixed/concat-assign-same.ok @@ -0,0 +1,2 @@ +12345 +1 2 3 4 5 diff --git a/lex.c b/lex.c index d09f550..ad8e878 100644 --- a/lex.c +++ b/lex.c @@ -198,6 +198,7 @@ int yylex(void) yylval.i = c; switch (c) { case '\n': /* {EOL} */ + lineno++; RET(NL); case '\r': /* assume \n is coming */ case ' ': /* {WS}+ */ @@ -213,6 +214,7 @@ int yylex(void) case '\\': if (peek() == '\n') { input(); + lineno++; } else if (peek() == '\r') { input(); input(); /* \n */ lineno++; @@ -370,10 +372,11 @@ int string(void) case '\n': case '\r': case 0: + *bp = '\0'; SYNTAX( "non-terminated string %.10s...", buf ); - lineno++; if (c == 0) /* hopeless */ FATAL( "giving up" ); + lineno++; break; case '\\': c = input(); @@ -515,6 +518,7 @@ int regexpr(void) if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) FATAL("out of space for reg expr %.10s...", buf); if (c == '\n') { + *bp = '\0'; SYNTAX( "newline in regular expression %.10s...", buf ); unput('\n'); break; @@ -553,19 +557,19 @@ int input(void) /* get next lexical input character */ lexprog++; } else /* awk -f ... */ c = pgetc(); - if (c == '\n') - lineno++; - else if (c == EOF) + if (c == EOF) c = 0; if (ep >= ebuf + sizeof ebuf) ep = ebuf; - return *ep++ = c; + *ep = c; + if (c != 0) { + ep++; + } + return (c); } void unput(int c) /* put lexical character back on input */ { - if (c == '\n') - lineno--; if (yysptr >= yysbuf + sizeof(yysbuf)) FATAL("pushed back too much: %.20s...", yysbuf); *yysptr++ = c; diff --git a/lib.c b/lib.c index 4b1527e..f09aeb2 100644 --- a/lib.c +++ b/lib.c @@ -59,7 +59,7 @@ void recinit(unsigned int n) { if ( (record = (char *) malloc(n)) == NULL || (fields = (char *) malloc(n+1)) == NULL - || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL + || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL ) FATAL("out of space for $0 and fields"); *fldtab[0] = dollar0; diff --git a/main.c b/main.c index a183c45..1c38a1e 100644 --- a/main.c +++ b/main.c @@ -88,7 +88,7 @@ int main(int argc, char *argv[]) exit(0); break; } - if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */ + if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ argc--; argv++; break; diff --git a/run.c b/run.c index 96743f7..497810c 100644 --- a/run.c +++ b/run.c @@ -1181,25 +1181,26 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */ { Cell *x, *y, *z; int n1, n2; - char *s; + char *s = NULL; + int ssz = 0; x = execute(a[0]); + n1 = strlen(getsval(x)); + adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); + (void) strncpy(s, x->sval, ssz); + y = execute(a[1]); - getsval(x); - getsval(y); - n1 = strlen(x->sval); - n2 = strlen(y->sval); - s = (char *) malloc(n1 + n2 + 1); - if (s == NULL) - FATAL("out of space concatenating %.15s... and %.15s...", - x->sval, y->sval); - strcpy(s, x->sval); - strcpy(s+n1, y->sval); + n2 = strlen(getsval(y)); + adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); + (void) strncpy(s + n1, y->sval, ssz - n1); + tempfree(x); tempfree(y); + z = gettemp(); z->sval = s; z->tval = STR; + return(z); }