diff --git a/b.c b/b.c index 94b1534..31df715 100644 --- a/b.c +++ b/b.c @@ -616,6 +616,94 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ return (0); } + +/* + * NAME + * fnematch + * + * DESCRIPTION + * A stream-fed version of nematch which transfers characters to a + * null-terminated buffer. All characters up to and including the last + * character of the matching text or EOF are placed in the buffer. If + * a match is found, patbeg and patlen are set appropriately. + * + * RETURN VALUES + * 0 No match found. + * 1 Match found. + */ + +int fnematch(fa *pfa, FILE *f, uschar **pbuf, int *pbufsize, int quantum) +{ + uschar *buf = *pbuf; + int bufsize = *pbufsize; + int c, i, j, k, ns, s; + + s = pfa->initstat; + patlen = 0; + + /* + * All indices relative to buf. + * i <= j <= k <= bufsize + * + * i: origin of active substring + * j: current character + * k: destination of next getc() + */ + i = -1, k = 0; + do { + j = i++; + do { + if (++j == k) { + if (k == bufsize) + if (!adjbuf((char **) &buf, &bufsize, bufsize+1, quantum, 0, "fnematch")) + FATAL("stream '%.30s...' too long", buf); + buf[k++] = (c = getc(f)) != EOF ? c : 0; + } + c = buf[j]; + /* assert(c < NCHARS); */ + + if ((ns = pfa->gototab[s][c]) != 0) + s = ns; + else + s = cgoto(pfa, s, c); + + if (pfa->out[s]) { /* final state */ + patlen = j - i + 1; + if (c == 0) /* don't count $ */ + patlen--; + } + } while (buf[j] && s != 1); + s = 2; + } while (buf[i] && !patlen); + + /* adjbuf() may have relocated a resized buffer. Inform the world. */ + *pbuf = buf; + *pbufsize = bufsize; + + if (patlen) { + patbeg = (char *) buf + i; + /* + * Under no circumstances is the last character fed to + * the automaton part of the match. It is EOF's nullbyte, + * or it sent the automaton into a state with no further + * transitions available (s==1), or both. Room for a + * terminating nullbyte is guaranteed. + * + * ungetc any chars after the end of matching text + * (except for EOF's nullbyte, if present) and null + * terminate the buffer. + */ + do + if (buf[--k] && ungetc(buf[k], f) == EOF) + FATAL("unable to ungetc '%c'", buf[k]); + while (k > i + patlen); + buf[k] = 0; + return 1; + } + else + return 0; +} + Node *reparse(const char *p) /* parses regular expression pointed to by p */ { /* uses relex() to scan regular expression */ Node *np; diff --git a/lib.c b/lib.c index 6ad0549..b08f0ba 100644 --- a/lib.c +++ b/lib.c @@ -204,41 +204,57 @@ void nextfile(void) int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */ { - int sep, c; + int sep, c, isrec; char *rr, *buf = *pbuf; int bufsize = *pbufsize; char *rs = getsval(rsloc); - if ((sep = *rs) == 0) { - sep = '\n'; - while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ - ; - if (c != EOF) - ungetc(c, inf); - } - for (rr = buf; ; ) { - for (; (c=getc(inf)) != sep && c != EOF; ) { - if (rr-buf+1 > bufsize) - if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1")) - FATAL("input record `%.30s...' too long", buf); +#ifdef RS_AS_REGEXP + if (*rs && rs[1]) { + int found; + + fa *pfa = makedfa(rs, 1); + found = fnematch(pfa, inf, &buf, &bufsize, recsize); + if (found) + *patbeg = 0; + } else { +#endif + if ((sep = *rs) == 0) { + sep = '\n'; + while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ + ; + if (c != EOF) + ungetc(c, inf); + } + for (rr = buf; ; ) { + for (; (c=getc(inf)) != sep && c != EOF; ) { + if (rr-buf+1 > bufsize) + if (!adjbuf(&buf, &bufsize, 1+rr-buf, + recsize, &rr, "readrec 1")) + FATAL("input record `%.30s...' too long", buf); + *rr++ = c; + } + if (*rs == sep || c == EOF) + break; + if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ + break; + if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, + "readrec 2")) + FATAL("input record `%.30s...' too long", buf); + *rr++ = '\n'; *rr++ = c; } - if (*rs == sep || c == EOF) - break; - if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ - break; - if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2")) + if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) FATAL("input record `%.30s...' too long", buf); - *rr++ = '\n'; - *rr++ = c; + *rr = 0; +#ifdef RS_AS_REGEXP } - if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) - FATAL("input record `%.30s...' too long", buf); - *rr = 0; - dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) ); +#endif *pbuf = buf; *pbufsize = bufsize; - return c == EOF && rr == buf ? 0 : 1; + isrec = *buf || !feof(inf); + dprintf( ("readrec saw <%s>, returns %d\n", buf, isrec) ); + return isrec; } char *getargv(int n) /* get ARGV[n] */ diff --git a/proto.h b/proto.h index add45fb..f498f31 100644 --- a/proto.h +++ b/proto.h @@ -54,6 +54,7 @@ extern int member(int, const char *); extern int match(fa *, const char *); extern int pmatch(fa *, const char *); extern int nematch(fa *, const char *); +extern int fnematch(fa *, FILE *, uschar **, int *, int); extern Node *reparse(const char *); extern Node *regexp(void); extern Node *primary(void);