Bug fix in interval expressions.

This commit is contained in:
Arnold D. Robbins 2019-12-27 12:02:52 +02:00
parent 1951e01288
commit 7db55ba13f
5 changed files with 105 additions and 10 deletions

View File

@ -1,3 +1,10 @@
2019-12-27 Arnold D. Robbins <arnold@skeeve.com>
* b.c (replace_repeat): Fix a bug whereby a{0,3} could match
four a's. Thanks to Anonymous AWK fan <awkfan77@mailfence.com>
for the report. Also, minor code formatting cleanups.
* testdir/T.int-expr: New file.
2019-12-11 Arnold D. Robbins <arnold@skeeve.com> 2019-12-11 Arnold D. Robbins <arnold@skeeve.com>
* README: Renamed to ... * README: Renamed to ...

4
FIXES
View File

@ -25,6 +25,10 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987. was sent to the printers in August, 1987.
December 27, 2019:
Fix a bug whereby a{0,3} could match four a's. Thanks to
"Anonymous AWK fan" for the report.
December 11, 2019: December 11, 2019:
Further printf-related fixes for 32 bit systems. Further printf-related fixes for 32 bit systems.
Thanks again to Christos Zoulas. Thanks again to Christos Zoulas.

20
b.c
View File

@ -908,7 +908,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
int i, j; int i, j;
uschar *buf = 0; uschar *buf = 0;
int ret = 1; int ret = 1;
int init_q = (firstnum == 0); /* first added char will be ? */ bool init_q = (firstnum == 0); /* first added char will be ? */
int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */ int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
int prefix_length = reptok - basestr; /* prefix includes first rep */ int prefix_length = reptok - basestr; /* prefix includes first rep */
int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */ int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */
@ -935,7 +935,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
buf[j++] = '('; buf[j++] = '(';
buf[j++] = ')'; buf[j++] = ')';
} }
for (i=1; i < firstnum; i++) { /* copy x reps */ for (i = 1; i < firstnum; i++) { /* copy x reps */
memcpy(&buf[j], atom, atomlen); memcpy(&buf[j], atom, atomlen);
j += atomlen; j += atomlen;
} }
@ -944,7 +944,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
} else if (special_case == REPEAT_WITH_Q) { } else if (special_case == REPEAT_WITH_Q) {
if (init_q) if (init_q)
buf[j++] = '?'; buf[j++] = '?';
for (i = 0; i < n_q_reps; i++) { /* copy x? reps */ for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */
memcpy(&buf[j], atom, atomlen); memcpy(&buf[j], atom, atomlen);
j += atomlen; j += atomlen;
buf[j++] = '?'; buf[j++] = '?';
@ -1166,15 +1166,17 @@ rescan:
if (commafound) { if (commafound) {
if (digitfound) { /* {n,m} */ if (digitfound) { /* {n,m} */
m = num; m = num;
if (m<n) if (m < n)
FATAL("illegal repetition expression: class %.20s", FATAL("illegal repetition expression: class %.20s",
lastre); lastre);
if ((n==0) && (m==1)) { if (n == 0 && m == 1) {
return QUEST; return QUEST;
} }
} else { /* {n,} */ } else { /* {n,} */
if (n==0) return STAR; if (n == 0)
if (n==1) return PLUS; return STAR;
else if (n == 1)
return PLUS;
} }
} else { } else {
if (digitfound) { /* {n} same as {n,n} */ if (digitfound) { /* {n} same as {n,n} */
@ -1187,7 +1189,7 @@ rescan:
} }
if (repeat(starttok, prestr-starttok, lastatom, if (repeat(starttok, prestr-starttok, lastatom,
startreptok - lastatom, n, m) > 0) { startreptok - lastatom, n, m) > 0) {
if ((n==0) && (m==0)) { if (n == 0 && m == 0) {
return EMPTYRE; return EMPTYRE;
} }
/* must rescan input for next token */ /* must rescan input for next token */
@ -1313,7 +1315,7 @@ void freefa(fa *f) /* free a finite automaton */
for (i = 0; i <= f->accept; i++) { for (i = 0; i <= f->accept; i++) {
xfree(f->re[i].lfollow); xfree(f->re[i].lfollow);
if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
xfree((f->re[i].lval.np)); xfree(f->re[i].lval.np);
} }
xfree(f->restr); xfree(f->restr);
xfree(f->out); xfree(f->out);

2
main.c
View File

@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE. THIS SOFTWARE.
****************************************************************/ ****************************************************************/
const char *version = "version 20191211"; const char *version = "version 20191227";
#define DEBUG #define DEBUG
#include <stdio.h> #include <stdio.h>

82
testdir/T.int-expr Executable file
View File

@ -0,0 +1,82 @@
echo T.int-expr: test interval expressions
awk=${awk-../a.out}
rm -f foo
cat << \EOF > prog
NF == 0 { next }
$1 == "pat" { pattern = $2; next }
{
check = ($1 ~ pattern)
printf("%s ~ /%s/ -> should be %d, is %d\n", $1, pattern, $2, check)
}
EOF
cat << \EOF > foo.in
pat ab{0}c
ac 1
abc 0
pat ab{1}c
ac 0
abc 1
abbc 0
pat ab{1,}c
ac 0
abc 1
abbc 1
abbbc 1
abbbbc 1
pat ab{0,1}c
ac 1
abc 1
abbc 0
pat ab{0,3}c
ac 1
abc 1
abbc 1
abbbc 1
abbbbc 0
pat ab{1,3}c
ac 0
abc 1
abbc 1
abbbc 1
abbbbc 0
EOF
cat << \EOF > foo1
ac ~ /ab{0}c/ -> should be 1, is 1
abc ~ /ab{0}c/ -> should be 0, is 0
ac ~ /ab{1}c/ -> should be 0, is 0
abc ~ /ab{1}c/ -> should be 1, is 1
abbc ~ /ab{1}c/ -> should be 0, is 0
ac ~ /ab{1,}c/ -> should be 0, is 0
abc ~ /ab{1,}c/ -> should be 1, is 1
abbc ~ /ab{1,}c/ -> should be 1, is 1
abbbc ~ /ab{1,}c/ -> should be 1, is 1
abbbbc ~ /ab{1,}c/ -> should be 1, is 1
ac ~ /ab{0,1}c/ -> should be 1, is 1
abc ~ /ab{0,1}c/ -> should be 1, is 1
abbc ~ /ab{0,1}c/ -> should be 0, is 0
ac ~ /ab{0,3}c/ -> should be 1, is 1
abc ~ /ab{0,3}c/ -> should be 1, is 1
abbc ~ /ab{0,3}c/ -> should be 1, is 1
abbbc ~ /ab{0,3}c/ -> should be 1, is 1
abbbbc ~ /ab{0,3}c/ -> should be 0, is 0
ac ~ /ab{1,3}c/ -> should be 0, is 0
abc ~ /ab{1,3}c/ -> should be 1, is 1
abbc ~ /ab{1,3}c/ -> should be 1, is 1
abbbc ~ /ab{1,3}c/ -> should be 1, is 1
abbbbc ~ /ab{1,3}c/ -> should be 0, is 0
EOF
$awk -f prog foo.in > foo2
diff foo1 foo2 || echo 'BAD: T.int-expr (1)'
rm -f prog