Bug fix in interval expressions.

This commit is contained in:
Arnold D. Robbins 2019-12-27 12:02:52 +02:00
parent 1951e01288
commit 7db55ba13f
5 changed files with 105 additions and 10 deletions

View File

@ -1,3 +1,10 @@
2019-12-27 Arnold D. Robbins <arnold@skeeve.com>
* b.c (replace_repeat): Fix a bug whereby a{0,3} could match
four a's. Thanks to Anonymous AWK fan <awkfan77@mailfence.com>
for the report. Also, minor code formatting cleanups.
* testdir/T.int-expr: New file.
2019-12-11 Arnold D. Robbins <arnold@skeeve.com>
* README: Renamed to ...

4
FIXES
View File

@ -25,6 +25,10 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
December 27, 2019:
Fix a bug whereby a{0,3} could match four a's. Thanks to
"Anonymous AWK fan" for the report.
December 11, 2019:
Further printf-related fixes for 32 bit systems.
Thanks again to Christos Zoulas.

20
b.c
View File

@ -908,7 +908,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
int i, j;
uschar *buf = 0;
int ret = 1;
int init_q = (firstnum == 0); /* first added char will be ? */
bool init_q = (firstnum == 0); /* first added char will be ? */
int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
int prefix_length = reptok - basestr; /* prefix includes first rep */
int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */
@ -935,7 +935,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
buf[j++] = '(';
buf[j++] = ')';
}
for (i=1; i < firstnum; i++) { /* copy x reps */
for (i = 1; i < firstnum; i++) { /* copy x reps */
memcpy(&buf[j], atom, atomlen);
j += atomlen;
}
@ -944,7 +944,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
} else if (special_case == REPEAT_WITH_Q) {
if (init_q)
buf[j++] = '?';
for (i = 0; i < n_q_reps; i++) { /* copy x? reps */
for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */
memcpy(&buf[j], atom, atomlen);
j += atomlen;
buf[j++] = '?';
@ -1166,15 +1166,17 @@ rescan:
if (commafound) {
if (digitfound) { /* {n,m} */
m = num;
if (m<n)
if (m < n)
FATAL("illegal repetition expression: class %.20s",
lastre);
if ((n==0) && (m==1)) {
if (n == 0 && m == 1) {
return QUEST;
}
} else { /* {n,} */
if (n==0) return STAR;
if (n==1) return PLUS;
if (n == 0)
return STAR;
else if (n == 1)
return PLUS;
}
} else {
if (digitfound) { /* {n} same as {n,n} */
@ -1187,7 +1189,7 @@ rescan:
}
if (repeat(starttok, prestr-starttok, lastatom,
startreptok - lastatom, n, m) > 0) {
if ((n==0) && (m==0)) {
if (n == 0 && m == 0) {
return EMPTYRE;
}
/* must rescan input for next token */
@ -1313,7 +1315,7 @@ void freefa(fa *f) /* free a finite automaton */
for (i = 0; i <= f->accept; i++) {
xfree(f->re[i].lfollow);
if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
xfree((f->re[i].lval.np));
xfree(f->re[i].lval.np);
}
xfree(f->restr);
xfree(f->out);

2
main.c
View File

@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
const char *version = "version 20191211";
const char *version = "version 20191227";
#define DEBUG
#include <stdio.h>

82
testdir/T.int-expr Executable file
View File

@ -0,0 +1,82 @@
echo T.int-expr: test interval expressions
awk=${awk-../a.out}
rm -f foo
cat << \EOF > prog
NF == 0 { next }
$1 == "pat" { pattern = $2; next }
{
check = ($1 ~ pattern)
printf("%s ~ /%s/ -> should be %d, is %d\n", $1, pattern, $2, check)
}
EOF
cat << \EOF > foo.in
pat ab{0}c
ac 1
abc 0
pat ab{1}c
ac 0
abc 1
abbc 0
pat ab{1,}c
ac 0
abc 1
abbc 1
abbbc 1
abbbbc 1
pat ab{0,1}c
ac 1
abc 1
abbc 0
pat ab{0,3}c
ac 1
abc 1
abbc 1
abbbc 1
abbbbc 0
pat ab{1,3}c
ac 0
abc 1
abbc 1
abbbc 1
abbbbc 0
EOF
cat << \EOF > foo1
ac ~ /ab{0}c/ -> should be 1, is 1
abc ~ /ab{0}c/ -> should be 0, is 0
ac ~ /ab{1}c/ -> should be 0, is 0
abc ~ /ab{1}c/ -> should be 1, is 1
abbc ~ /ab{1}c/ -> should be 0, is 0
ac ~ /ab{1,}c/ -> should be 0, is 0
abc ~ /ab{1,}c/ -> should be 1, is 1
abbc ~ /ab{1,}c/ -> should be 1, is 1
abbbc ~ /ab{1,}c/ -> should be 1, is 1
abbbbc ~ /ab{1,}c/ -> should be 1, is 1
ac ~ /ab{0,1}c/ -> should be 1, is 1
abc ~ /ab{0,1}c/ -> should be 1, is 1
abbc ~ /ab{0,1}c/ -> should be 0, is 0
ac ~ /ab{0,3}c/ -> should be 1, is 1
abc ~ /ab{0,3}c/ -> should be 1, is 1
abbc ~ /ab{0,3}c/ -> should be 1, is 1
abbbc ~ /ab{0,3}c/ -> should be 1, is 1
abbbbc ~ /ab{0,3}c/ -> should be 0, is 0
ac ~ /ab{1,3}c/ -> should be 0, is 0
abc ~ /ab{1,3}c/ -> should be 1, is 1
abbc ~ /ab{1,3}c/ -> should be 1, is 1
abbbc ~ /ab{1,3}c/ -> should be 1, is 1
abbbbc ~ /ab{1,3}c/ -> should be 0, is 0
EOF
$awk -f prog foo.in > foo2
diff foo1 foo2 || echo 'BAD: T.int-expr (1)'
rm -f prog