Add RS as regex code, ifdefed-out, from NetBSD.

This commit is contained in:
Arnold D. Robbins 2019-07-28 20:12:05 +03:00
parent 34a6f41cca
commit 643a5a3dad
3 changed files with 130 additions and 25 deletions

88
b.c
View File

@ -616,6 +616,94 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
return (0); return (0);
} }
/*
* NAME
* fnematch
*
* DESCRIPTION
* A stream-fed version of nematch which transfers characters to a
* null-terminated buffer. All characters up to and including the last
* character of the matching text or EOF are placed in the buffer. If
* a match is found, patbeg and patlen are set appropriately.
*
* RETURN VALUES
* 0 No match found.
* 1 Match found.
*/
int fnematch(fa *pfa, FILE *f, uschar **pbuf, int *pbufsize, int quantum)
{
uschar *buf = *pbuf;
int bufsize = *pbufsize;
int c, i, j, k, ns, s;
s = pfa->initstat;
patlen = 0;
/*
* All indices relative to buf.
* i <= j <= k <= bufsize
*
* i: origin of active substring
* j: current character
* k: destination of next getc()
*/
i = -1, k = 0;
do {
j = i++;
do {
if (++j == k) {
if (k == bufsize)
if (!adjbuf((char **) &buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
FATAL("stream '%.30s...' too long", buf);
buf[k++] = (c = getc(f)) != EOF ? c : 0;
}
c = buf[j];
/* assert(c < NCHARS); */
if ((ns = pfa->gototab[s][c]) != 0)
s = ns;
else
s = cgoto(pfa, s, c);
if (pfa->out[s]) { /* final state */
patlen = j - i + 1;
if (c == 0) /* don't count $ */
patlen--;
}
} while (buf[j] && s != 1);
s = 2;
} while (buf[i] && !patlen);
/* adjbuf() may have relocated a resized buffer. Inform the world. */
*pbuf = buf;
*pbufsize = bufsize;
if (patlen) {
patbeg = (char *) buf + i;
/*
* Under no circumstances is the last character fed to
* the automaton part of the match. It is EOF's nullbyte,
* or it sent the automaton into a state with no further
* transitions available (s==1), or both. Room for a
* terminating nullbyte is guaranteed.
*
* ungetc any chars after the end of matching text
* (except for EOF's nullbyte, if present) and null
* terminate the buffer.
*/
do
if (buf[--k] && ungetc(buf[k], f) == EOF)
FATAL("unable to ungetc '%c'", buf[k]);
while (k > i + patlen);
buf[k] = 0;
return 1;
}
else
return 0;
}
Node *reparse(const char *p) /* parses regular expression pointed to by p */ Node *reparse(const char *p) /* parses regular expression pointed to by p */
{ /* uses relex() to scan regular expression */ { /* uses relex() to scan regular expression */
Node *np; Node *np;

66
lib.c
View File

@ -204,41 +204,57 @@ void nextfile(void)
int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
{ {
int sep, c; int sep, c, isrec;
char *rr, *buf = *pbuf; char *rr, *buf = *pbuf;
int bufsize = *pbufsize; int bufsize = *pbufsize;
char *rs = getsval(rsloc); char *rs = getsval(rsloc);
if ((sep = *rs) == 0) { #ifdef RS_AS_REGEXP
sep = '\n'; if (*rs && rs[1]) {
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ int found;
;
if (c != EOF) fa *pfa = makedfa(rs, 1);
ungetc(c, inf); found = fnematch(pfa, inf, &buf, &bufsize, recsize);
} if (found)
for (rr = buf; ; ) { *patbeg = 0;
for (; (c=getc(inf)) != sep && c != EOF; ) { } else {
if (rr-buf+1 > bufsize) #endif
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1")) if ((sep = *rs) == 0) {
FATAL("input record `%.30s...' too long", buf); sep = '\n';
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
;
if (c != EOF)
ungetc(c, inf);
}
for (rr = buf; ; ) {
for (; (c=getc(inf)) != sep && c != EOF; ) {
if (rr-buf+1 > bufsize)
if (!adjbuf(&buf, &bufsize, 1+rr-buf,
recsize, &rr, "readrec 1"))
FATAL("input record `%.30s...' too long", buf);
*rr++ = c;
}
if (*rs == sep || c == EOF)
break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
break;
if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
"readrec 2"))
FATAL("input record `%.30s...' too long", buf);
*rr++ = '\n';
*rr++ = c; *rr++ = c;
} }
if (*rs == sep || c == EOF) if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
break;
if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
FATAL("input record `%.30s...' too long", buf); FATAL("input record `%.30s...' too long", buf);
*rr++ = '\n'; *rr = 0;
*rr++ = c; #ifdef RS_AS_REGEXP
} }
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) #endif
FATAL("input record `%.30s...' too long", buf);
*rr = 0;
dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
*pbuf = buf; *pbuf = buf;
*pbufsize = bufsize; *pbufsize = bufsize;
return c == EOF && rr == buf ? 0 : 1; isrec = *buf || !feof(inf);
dprintf( ("readrec saw <%s>, returns %d\n", buf, isrec) );
return isrec;
} }
char *getargv(int n) /* get ARGV[n] */ char *getargv(int n) /* get ARGV[n] */

View File

@ -54,6 +54,7 @@ extern int member(int, const char *);
extern int match(fa *, const char *); extern int match(fa *, const char *);
extern int pmatch(fa *, const char *); extern int pmatch(fa *, const char *);
extern int nematch(fa *, const char *); extern int nematch(fa *, const char *);
extern int fnematch(fa *, FILE *, uschar **, int *, int);
extern Node *reparse(const char *); extern Node *reparse(const char *);
extern Node *regexp(void); extern Node *regexp(void);
extern Node *primary(void); extern Node *primary(void);