diff --git a/ChangeLog b/ChangeLog index 22ad7d2..7d8fe86 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2019-12-27 Arnold D. Robbins + + * b.c (replace_repeat): Fix a bug whereby a{0,3} could match + four a's. Thanks to Anonymous AWK fan + for the report. Also, minor code formatting cleanups. + * testdir/T.int-expr: New file. + 2019-12-11 Arnold D. Robbins * README: Renamed to ... diff --git a/FIXES b/FIXES index 1ad5ecb..ad5585c 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,10 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +December 27, 2019: + Fix a bug whereby a{0,3} could match four a's. Thanks to + "Anonymous AWK fan" for the report. + December 11, 2019: Further printf-related fixes for 32 bit systems. Thanks again to Christos Zoulas. diff --git a/b.c b/b.c index 1e53652..ac88dac 100644 --- a/b.c +++ b/b.c @@ -908,7 +908,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, int i, j; uschar *buf = 0; int ret = 1; - int init_q = (firstnum == 0); /* first added char will be ? */ + bool init_q = (firstnum == 0); /* first added char will be ? */ int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */ int prefix_length = reptok - basestr; /* prefix includes first rep */ int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */ @@ -935,7 +935,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, buf[j++] = '('; buf[j++] = ')'; } - for (i=1; i < firstnum; i++) { /* copy x reps */ + for (i = 1; i < firstnum; i++) { /* copy x reps */ memcpy(&buf[j], atom, atomlen); j += atomlen; } @@ -944,7 +944,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, } else if (special_case == REPEAT_WITH_Q) { if (init_q) buf[j++] = '?'; - for (i = 0; i < n_q_reps; i++) { /* copy x? reps */ + for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */ memcpy(&buf[j], atom, atomlen); j += atomlen; buf[j++] = '?'; @@ -1166,15 +1166,17 @@ rescan: if (commafound) { if (digitfound) { /* {n,m} */ m = num; - if (m 0) { - if ((n==0) && (m==0)) { + if (n == 0 && m == 0) { return EMPTYRE; } /* must rescan input for next token */ @@ -1313,7 +1315,7 @@ void freefa(fa *f) /* free a finite automaton */ for (i = 0; i <= f->accept; i++) { xfree(f->re[i].lfollow); if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) - xfree((f->re[i].lval.np)); + xfree(f->re[i].lval.np); } xfree(f->restr); xfree(f->out); diff --git a/main.c b/main.c index 4d49518..c8a2f32 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20191211"; +const char *version = "version 20191227"; #define DEBUG #include diff --git a/testdir/T.int-expr b/testdir/T.int-expr new file mode 100755 index 0000000..e71a075 --- /dev/null +++ b/testdir/T.int-expr @@ -0,0 +1,82 @@ +echo T.int-expr: test interval expressions + +awk=${awk-../a.out} + +rm -f foo + +cat << \EOF > prog +NF == 0 { next } +$1 == "pat" { pattern = $2; next } +{ + check = ($1 ~ pattern) + printf("%s ~ /%s/ -> should be %d, is %d\n", $1, pattern, $2, check) +} +EOF + +cat << \EOF > foo.in +pat ab{0}c +ac 1 +abc 0 + +pat ab{1}c +ac 0 +abc 1 +abbc 0 + +pat ab{1,}c +ac 0 +abc 1 +abbc 1 +abbbc 1 +abbbbc 1 + +pat ab{0,1}c +ac 1 +abc 1 +abbc 0 + +pat ab{0,3}c +ac 1 +abc 1 +abbc 1 +abbbc 1 +abbbbc 0 + +pat ab{1,3}c +ac 0 +abc 1 +abbc 1 +abbbc 1 +abbbbc 0 +EOF + +cat << \EOF > foo1 +ac ~ /ab{0}c/ -> should be 1, is 1 +abc ~ /ab{0}c/ -> should be 0, is 0 +ac ~ /ab{1}c/ -> should be 0, is 0 +abc ~ /ab{1}c/ -> should be 1, is 1 +abbc ~ /ab{1}c/ -> should be 0, is 0 +ac ~ /ab{1,}c/ -> should be 0, is 0 +abc ~ /ab{1,}c/ -> should be 1, is 1 +abbc ~ /ab{1,}c/ -> should be 1, is 1 +abbbc ~ /ab{1,}c/ -> should be 1, is 1 +abbbbc ~ /ab{1,}c/ -> should be 1, is 1 +ac ~ /ab{0,1}c/ -> should be 1, is 1 +abc ~ /ab{0,1}c/ -> should be 1, is 1 +abbc ~ /ab{0,1}c/ -> should be 0, is 0 +ac ~ /ab{0,3}c/ -> should be 1, is 1 +abc ~ /ab{0,3}c/ -> should be 1, is 1 +abbc ~ /ab{0,3}c/ -> should be 1, is 1 +abbbc ~ /ab{0,3}c/ -> should be 1, is 1 +abbbbc ~ /ab{0,3}c/ -> should be 0, is 0 +ac ~ /ab{1,3}c/ -> should be 0, is 0 +abc ~ /ab{1,3}c/ -> should be 1, is 1 +abbc ~ /ab{1,3}c/ -> should be 1, is 1 +abbbc ~ /ab{1,3}c/ -> should be 1, is 1 +abbbbc ~ /ab{1,3}c/ -> should be 0, is 0 +EOF + + +$awk -f prog foo.in > foo2 +diff foo1 foo2 || echo 'BAD: T.int-expr (1)' +rm -f prog