awk/lib.c

841 lines
20 KiB
C
Raw Normal View History

2012-12-22 10:35:39 -05:00
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
Permission to use, copy, modify, and distribute this software and
its documentation for any purpose and without fee is hereby
granted, provided that the above copyright notice appear in all
copies and that both that the copyright notice and this
permission notice and warranty disclaimer appear in supporting
documentation, and that the name Lucent Technologies or any of
its entities not be used in advertising or publicity pertaining
to distribution of the software without specific, written prior
permission.
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
#define DEBUG
#include <stdio.h>
#include <string.h>
#include <strings.h>
2012-12-22 10:35:39 -05:00
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <stdarg.h>
#include <limits.h>
#include <math.h>
2012-12-22 10:35:39 -05:00
#include "awk.h"
char EMPTY[] = { '\0' };
2012-12-22 10:35:39 -05:00
FILE *infile = NULL;
bool innew; /* true = infile has not been read by readrec */
char *file = EMPTY;
2012-12-22 10:35:39 -05:00
char *record;
int recsize = RECSIZE;
char *fields;
int fieldssize = RECSIZE;
Cell **fldtab; /* pointers to Cells */
static size_t len_inputFS = 0;
static char *inputFS = NULL; /* FS at time of input, for field splitting */
2012-12-22 10:35:39 -05:00
#define MAXFLD 2
int nfields = MAXFLD; /* last allocated slot for $i */
2019-11-10 14:19:18 -05:00
bool donefld; /* true = implies rec broken into fields */
bool donerec; /* true = record is valid (no flds have changed) */
2012-12-22 10:35:39 -05:00
int lastfld = 0; /* last used field */
int argno = 1; /* current input argument number */
extern Awkfloat *ARGC;
static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
2012-12-22 10:35:39 -05:00
void recinit(unsigned int n)
{
2020-10-13 13:52:43 -04:00
if ( (record = (char *) malloc(n)) == NULL
|| (fields = (char *) malloc(n+1)) == NULL
|| (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
|| (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
2012-12-22 10:35:39 -05:00
FATAL("out of space for $0 and fields");
*record = '\0';
2012-12-22 10:35:39 -05:00
*fldtab[0] = dollar0;
fldtab[0]->sval = record;
fldtab[0]->nval = tostring("0");
makefields(1, nfields);
}
void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
{
char temp[50];
int i;
for (i = n1; i <= n2; i++) {
2020-10-13 13:52:43 -04:00
fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
2012-12-22 10:35:39 -05:00
if (fldtab[i] == NULL)
FATAL("out of space in makefields %d", i);
*fldtab[i] = dollar1;
snprintf(temp, sizeof(temp), "%d", i);
2012-12-22 10:35:39 -05:00
fldtab[i]->nval = tostring(temp);
}
}
void initgetrec(void)
{
int i;
char *p;
for (i = 1; i < *ARGC; i++) {
p = getargv(i); /* find 1st real filename */
if (p == NULL || *p == '\0') { /* deleted or zapped */
argno++;
continue;
}
if (!isclvar(p)) {
setsval(lookup("FILENAME", symtab), p);
return;
}
setclvar(p); /* a commandline assignment before filename */
argno++;
}
infile = stdin; /* no filenames, so use stdin */
innew = true;
2012-12-22 10:35:39 -05:00
}
/*
* POSIX specifies that fields are supposed to be evaluated as if they were
* split using the value of FS at the time that the record's value ($0) was
* read.
*
* Since field-splitting is done lazily, we save the current value of FS
* whenever a new record is read in (implicitly or via getline), or when
* a new value is assigned to $0.
*/
void savefs(void)
{
size_t len;
if ((len = strlen(getsval(fsloc))) < len_inputFS) {
strcpy(inputFS, *FS); /* for subsequent field splitting */
return;
}
len_inputFS = len + 1;
2020-10-13 13:52:43 -04:00
inputFS = (char *) realloc(inputFS, len_inputFS);
if (inputFS == NULL)
FATAL("field separator %.10s... is too long", *FS);
memcpy(inputFS, *FS, len_inputFS);
}
2019-11-10 14:19:18 -05:00
static bool firsttime = true;
2012-12-22 10:35:39 -05:00
2019-11-10 14:19:18 -05:00
int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record */
2012-12-22 10:35:39 -05:00
{ /* note: cares whether buf == record */
int c;
char *buf = *pbuf;
uschar saveb0;
int bufsize = *pbufsize, savebufsize = bufsize;
if (firsttime) {
2019-11-10 14:19:18 -05:00
firsttime = false;
2012-12-22 10:35:39 -05:00
initgetrec();
}
DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
*RS, *FS, *ARGC, *FILENAME);
2012-12-22 10:35:39 -05:00
if (isrecord) {
2019-11-10 14:19:18 -05:00
donefld = false;
donerec = true;
savefs();
2012-12-22 10:35:39 -05:00
}
saveb0 = buf[0];
buf[0] = 0;
while (argno < *ARGC || infile == stdin) {
DPRINTF("argno=%d, file=|%s|\n", argno, file);
2012-12-22 10:35:39 -05:00
if (infile == NULL) { /* have to open a new file */
file = getargv(argno);
if (file == NULL || *file == '\0') { /* deleted or zapped */
argno++;
continue;
}
if (isclvar(file)) { /* a var=value arg */
setclvar(file);
argno++;
continue;
}
*FILENAME = file;
DPRINTF("opening file %s\n", file);
2012-12-22 10:35:39 -05:00
if (*file == '-' && *(file+1) == '\0')
infile = stdin;
else if ((infile = fopen(file, "r")) == NULL)
FATAL("can't open file %s", file);
innew = true;
2012-12-22 10:35:39 -05:00
setfval(fnrloc, 0.0);
}
c = readrec(&buf, &bufsize, infile, innew);
if (innew)
innew = false;
2012-12-22 10:35:39 -05:00
if (c != 0 || buf[0] != '\0') { /* normal record */
if (isrecord) {
double result;
2012-12-22 10:35:39 -05:00
if (freeable(fldtab[0]))
xfree(fldtab[0]->sval);
fldtab[0]->sval = buf; /* buf == record */
fldtab[0]->tval = REC | STR | DONTFREE;
if (is_number(fldtab[0]->sval, & result)) {
fldtab[0]->fval = result;
2012-12-22 10:35:39 -05:00
fldtab[0]->tval |= NUM;
}
}
setfval(nrloc, nrloc->fval+1);
setfval(fnrloc, fnrloc->fval+1);
*pbuf = buf;
*pbufsize = bufsize;
return 1;
}
/* EOF arrived on this file; set up next */
if (infile != stdin)
fclose(infile);
infile = NULL;
argno++;
}
buf[0] = saveb0;
*pbuf = buf;
*pbufsize = savebufsize;
return 0; /* true end of file */
}
void nextfile(void)
{
if (infile != NULL && infile != stdin)
fclose(infile);
infile = NULL;
argno++;
}
int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
2012-12-22 10:35:39 -05:00
{
int sep, c, isrec;
2012-12-22 10:35:39 -05:00
char *rr, *buf = *pbuf;
int bufsize = *pbufsize;
char *rs = getsval(rsloc);
2012-12-22 10:35:39 -05:00
if (*rs && rs[1]) {
2019-11-10 14:19:18 -05:00
bool found;
fa *pfa = makedfa(rs, 1);
if (newflag)
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
else {
int tempstat = pfa->initstat;
pfa->initstat = 2;
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
pfa->initstat = tempstat;
}
if (found)
setptr(patbeg, '\0');
Fix readrec's definition of a record I botched readrec's definition of a record, when I implemented RS regular expression support. This is the relevant hunk from the old diff: ``` - return c == EOF && rr == buf ? 0 : 1; + isrec = *buf || !feof(inf); + dprintf( ("readrec saw <%s>, returns %d\n", buf, isrec) ); + return isrec; ``` Problem #1 Unlike testing with EOF, `*buf || !feof(inf)` is blind to stdio errors. This can cause an infinite loop whose each iteration fabricates an empty record. The following demonstration uses standard terminal access control policy to produce a persistent error condition. Note that the "i/o error" message does not come from readrec(). It's produced much later by closeall() at shutdown. ``` $ trap '' SIGTTIN && awk 'END {print NR}' & [1] 33517 $ # After fg, type ^D $ fg trap '' SIGTTIN && awk 'END {print NR}' 13847376 awk: i/o error occurred on /dev/stdin input record number 13847376, file source line number 1 ``` Each time awk tries to read the terminal from the background, while ignoring SIGTTIN, the read fails with EIO, getc returns EOF, the stream's end-of-file indicator remains clear, and `!feof` erroneously promotoes the empty buffer to an empty record. So long as the error persists, the stream's position does not advance and end-of-file is never set. Problem #2: When RS is a regex, `*buf || !feof(inf)` can't see an empty record's terminator at the end of a stream. ``` $ echo a | awk 1 RS='a\n' $ ``` That pipeline should have found one empty record and printed a blank line, but `*buf || !feof(inf)` considers reaching the end of the stream the conclusion of a fruitless search. That's only correct when the terminator is a single character, because a regex RS search can set the end-of-file marker even when it succeeds. The Fix `isrec` must be 0 **iff** no record is found. The correct definition of "no record" is a failure to find a record terminator and a failure to find any data (possibly from a final, unterminated record). Conceptually, for any RS: ``` isrec = (noTERM && noDATA) ? 0 : 1 ``` noDATA is an expression that's true if `buf` is empty, false otherwise. When RS is null or a single character, noTERM is an expression that is true when the sought after character is not found, false otherwise. Since the search for a single character can only end with that character or EOF, noTERM is `c == EOF`. ``` isrec = (c == EOF && rr == buf) ? 0 : 1 ``` When RS is a regular expression: noTERM is an expression that is true if a match for RS is not found, false otherwise. This is simply the inverse of the result of the function that conducts the search, `!found`. ``` isrec = (found == 0 && *buf == '\0') ? 0 : 1 ```
2021-04-23 02:13:33 -04:00
isrec = (found == 0 && *buf == '\0') ? 0 : 1;
} else {
if ((sep = *rs) == 0) {
sep = '\n';
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
;
if (c != EOF)
ungetc(c, inf);
}
for (rr = buf; ; ) {
for (; (c=getc(inf)) != sep && c != EOF; ) {
if (rr-buf+1 > bufsize)
if (!adjbuf(&buf, &bufsize, 1+rr-buf,
recsize, &rr, "readrec 1"))
FATAL("input record `%.30s...' too long", buf);
*rr++ = c;
}
if (*rs == sep || c == EOF)
break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
break;
if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
"readrec 2"))
FATAL("input record `%.30s...' too long", buf);
*rr++ = '\n';
2012-12-22 10:35:39 -05:00
*rr++ = c;
}
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
2012-12-22 10:35:39 -05:00
FATAL("input record `%.30s...' too long", buf);
*rr = 0;
Fix readrec's definition of a record I botched readrec's definition of a record, when I implemented RS regular expression support. This is the relevant hunk from the old diff: ``` - return c == EOF && rr == buf ? 0 : 1; + isrec = *buf || !feof(inf); + dprintf( ("readrec saw <%s>, returns %d\n", buf, isrec) ); + return isrec; ``` Problem #1 Unlike testing with EOF, `*buf || !feof(inf)` is blind to stdio errors. This can cause an infinite loop whose each iteration fabricates an empty record. The following demonstration uses standard terminal access control policy to produce a persistent error condition. Note that the "i/o error" message does not come from readrec(). It's produced much later by closeall() at shutdown. ``` $ trap '' SIGTTIN && awk 'END {print NR}' & [1] 33517 $ # After fg, type ^D $ fg trap '' SIGTTIN && awk 'END {print NR}' 13847376 awk: i/o error occurred on /dev/stdin input record number 13847376, file source line number 1 ``` Each time awk tries to read the terminal from the background, while ignoring SIGTTIN, the read fails with EIO, getc returns EOF, the stream's end-of-file indicator remains clear, and `!feof` erroneously promotoes the empty buffer to an empty record. So long as the error persists, the stream's position does not advance and end-of-file is never set. Problem #2: When RS is a regex, `*buf || !feof(inf)` can't see an empty record's terminator at the end of a stream. ``` $ echo a | awk 1 RS='a\n' $ ``` That pipeline should have found one empty record and printed a blank line, but `*buf || !feof(inf)` considers reaching the end of the stream the conclusion of a fruitless search. That's only correct when the terminator is a single character, because a regex RS search can set the end-of-file marker even when it succeeds. The Fix `isrec` must be 0 **iff** no record is found. The correct definition of "no record" is a failure to find a record terminator and a failure to find any data (possibly from a final, unterminated record). Conceptually, for any RS: ``` isrec = (noTERM && noDATA) ? 0 : 1 ``` noDATA is an expression that's true if `buf` is empty, false otherwise. When RS is null or a single character, noTERM is an expression that is true when the sought after character is not found, false otherwise. Since the search for a single character can only end with that character or EOF, noTERM is `c == EOF`. ``` isrec = (c == EOF && rr == buf) ? 0 : 1 ``` When RS is a regular expression: noTERM is an expression that is true if a match for RS is not found, false otherwise. This is simply the inverse of the result of the function that conducts the search, `!found`. ``` isrec = (found == 0 && *buf == '\0') ? 0 : 1 ```
2021-04-23 02:13:33 -04:00
isrec = (c == EOF && rr == buf) ? 0 : 1;
2012-12-22 10:35:39 -05:00
}
*pbuf = buf;
*pbufsize = bufsize;
DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
return isrec;
2012-12-22 10:35:39 -05:00
}
char *getargv(int n) /* get ARGV[n] */
{
Cell *x;
char *s, temp[50];
extern Array *ARGVtab;
snprintf(temp, sizeof(temp), "%d", n);
2012-12-22 10:35:39 -05:00
if (lookup(temp, ARGVtab) == NULL)
return NULL;
x = setsymtab(temp, "", 0.0, STR, ARGVtab);
s = getsval(x);
DPRINTF("getargv(%d) returns |%s|\n", n, s);
2012-12-22 10:35:39 -05:00
return s;
}
void setclvar(char *s) /* set var=value from s */
{
char *p;
Cell *q;
double result;
2012-12-22 10:35:39 -05:00
for (p=s; *p != '='; p++)
;
*p++ = 0;
p = qstring(p, '\0');
q = setsymtab(s, p, 0.0, STR, symtab);
setsval(q, p);
if (is_number(q->sval, & result)) {
q->fval = result;
2012-12-22 10:35:39 -05:00
q->tval |= NUM;
}
DPRINTF("command line set %s to |%s|\n", s, p);
2012-12-22 10:35:39 -05:00
}
void fldbld(void) /* create fields from current record */
{
/* this relies on having fields[] the same length as $0 */
/* the fields are all stored in this one array with \0's */
/* possibly with a final trailing \0 not associated with any field */
char *r, *fr, sep;
Cell *p;
int i, j, n;
if (donefld)
return;
if (!isstr(fldtab[0]))
getsval(fldtab[0]);
r = fldtab[0]->sval;
n = strlen(r);
if (n > fieldssize) {
xfree(fields);
2020-10-13 13:52:43 -04:00
if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
2012-12-22 10:35:39 -05:00
FATAL("out of space for fields in fldbld %d", n);
fieldssize = n;
}
fr = fields;
i = 0; /* number of fields accumulated here */
if (inputFS == NULL) /* make sure we have a copy of FS */
savefs();
2012-12-22 10:35:39 -05:00
if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS);
} else if ((sep = *inputFS) == ' ') { /* default whitespace */
for (i = 0; ; ) {
while (*r == ' ' || *r == '\t' || *r == '\n')
r++;
if (*r == 0)
break;
i++;
if (i > nfields)
growfldtab(i);
if (freeable(fldtab[i]))
xfree(fldtab[i]->sval);
fldtab[i]->sval = fr;
fldtab[i]->tval = FLD | STR | DONTFREE;
do
*fr++ = *r++;
while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
*fr++ = 0;
}
*fr = 0;
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
for (i = 0; *r != '\0'; r += n) {
char buf[MB_LEN_MAX + 1];
2012-12-22 10:35:39 -05:00
i++;
if (i > nfields)
growfldtab(i);
if (freeable(fldtab[i]))
xfree(fldtab[i]->sval);
n = mblen(r, MB_LEN_MAX);
if (n < 0)
n = 1;
memcpy(buf, r, n);
buf[n] = '\0';
2012-12-22 10:35:39 -05:00
fldtab[i]->sval = tostring(buf);
fldtab[i]->tval = FLD | STR;
}
*fr = 0;
} else if (*r != 0) { /* if 0, it's a null field */
/* subtlecase : if length(FS) == 1 && length(RS > 0)
* \n is NOT a field separator (cf awk book 61,84).
* this variable is tested in the inner while loop.
*/
int rtest = '\n'; /* normal case */
if (strlen(*RS) > 0)
rtest = '\0';
for (;;) {
i++;
if (i > nfields)
growfldtab(i);
if (freeable(fldtab[i]))
xfree(fldtab[i]->sval);
fldtab[i]->sval = fr;
fldtab[i]->tval = FLD | STR | DONTFREE;
while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */
*fr++ = *r++;
*fr++ = 0;
if (*r++ == 0)
break;
}
*fr = 0;
}
if (i > nfields)
FATAL("record `%.30s...' has too many fields; can't happen", r);
cleanfld(i+1, lastfld); /* clean out junk from previous record */
lastfld = i;
2019-11-10 14:19:18 -05:00
donefld = true;
2012-12-22 10:35:39 -05:00
for (j = 1; j <= lastfld; j++) {
double result;
2012-12-22 10:35:39 -05:00
p = fldtab[j];
if(is_number(p->sval, & result)) {
p->fval = result;
2012-12-22 10:35:39 -05:00
p->tval |= NUM;
}
}
setfval(nfloc, (Awkfloat) lastfld);
2019-11-10 14:19:18 -05:00
donerec = true; /* restore */
2012-12-22 10:35:39 -05:00
if (dbg) {
for (j = 0; j <= lastfld; j++) {
p = fldtab[j];
printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
}
}
}
void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */
{ /* nvals remain intact */
Cell *p;
int i;
for (i = n1; i <= n2; i++) {
p = fldtab[i];
if (freeable(p))
xfree(p->sval);
p->sval = EMPTY,
2012-12-22 10:35:39 -05:00
p->tval = FLD | STR | DONTFREE;
}
}
void newfld(int n) /* add field n after end of existing lastfld */
{
if (n > nfields)
growfldtab(n);
cleanfld(lastfld+1, n);
lastfld = n;
setfval(nfloc, (Awkfloat) n);
}
void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
{
if (n < 0)
FATAL("cannot set NF to a negative value");
if (n > nfields)
growfldtab(n);
if (lastfld < n)
cleanfld(lastfld+1, n);
else
cleanfld(n+1, lastfld);
lastfld = n;
}
2012-12-22 10:35:39 -05:00
Cell *fieldadr(int n) /* get nth field */
{
if (n < 0)
FATAL("trying to access out of range field %d", n);
if (n > nfields) /* fields after NF are empty */
growfldtab(n); /* but does not increase NF */
return(fldtab[n]);
}
void growfldtab(int n) /* make new fields up to at least $n */
{
int nf = 2 * nfields;
size_t s;
if (n > nf)
nf = n;
s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */
if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
2020-10-13 13:52:43 -04:00
fldtab = (Cell **) realloc(fldtab, s);
2012-12-22 10:35:39 -05:00
else /* overflow sizeof int */
xfree(fldtab); /* make it null */
if (fldtab == NULL)
FATAL("out of space creating %d fields", nf);
makefields(nfields+1, nf);
nfields = nf;
}
int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */
{
/* this relies on having fields[] the same length as $0 */
/* the fields are all stored in this one array with \0's */
char *fr;
int i, tempstat, n;
fa *pfa;
n = strlen(rec);
if (n > fieldssize) {
xfree(fields);
2020-10-13 13:52:43 -04:00
if ((fields = (char *) malloc(n+1)) == NULL)
2012-12-22 10:35:39 -05:00
FATAL("out of space for fields in refldbld %d", n);
fieldssize = n;
}
fr = fields;
*fr = '\0';
if (*rec == '\0')
return 0;
pfa = makedfa(fs, 1);
DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
2012-12-22 10:35:39 -05:00
tempstat = pfa->initstat;
for (i = 1; ; i++) {
if (i > nfields)
growfldtab(i);
if (freeable(fldtab[i]))
xfree(fldtab[i]->sval);
fldtab[i]->tval = FLD | STR | DONTFREE;
fldtab[i]->sval = fr;
DPRINTF("refldbld: i=%d\n", i);
2012-12-22 10:35:39 -05:00
if (nematch(pfa, rec)) {
pfa->initstat = 2; /* horrible coupling to b.c */
DPRINTF("match %s (%d chars)\n", patbeg, patlen);
2012-12-22 10:35:39 -05:00
strncpy(fr, rec, patbeg-rec);
fr += patbeg - rec + 1;
*(fr-1) = '\0';
rec = patbeg + patlen;
} else {
DPRINTF("no match %s\n", rec);
2012-12-22 10:35:39 -05:00
strcpy(fr, rec);
pfa->initstat = tempstat;
break;
}
}
return i;
2012-12-22 10:35:39 -05:00
}
void recbld(void) /* create $0 from $1..$NF if necessary */
{
int i;
char *r, *p;
char *sep = getsval(ofsloc);
2012-12-22 10:35:39 -05:00
2019-11-10 14:19:18 -05:00
if (donerec)
2012-12-22 10:35:39 -05:00
return;
r = record;
for (i = 1; i <= *NF; i++) {
p = getsval(fldtab[i]);
if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
FATAL("created $0 `%.30s...' too long", record);
while ((*r = *p++) != 0)
r++;
if (i < *NF) {
if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
2012-12-22 10:35:39 -05:00
FATAL("created $0 `%.30s...' too long", record);
for (p = sep; (*r = *p++) != 0; )
2012-12-22 10:35:39 -05:00
r++;
}
}
if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
FATAL("built giant record `%.30s...'", record);
*r = '\0';
DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
2012-12-22 10:35:39 -05:00
if (freeable(fldtab[0]))
xfree(fldtab[0]->sval);
fldtab[0]->tval = REC | STR | DONTFREE;
fldtab[0]->sval = record;
DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
DPRINTF("recbld = |%s|\n", record);
2019-11-10 14:19:18 -05:00
donerec = true;
2012-12-22 10:35:39 -05:00
}
int errorflag = 0;
void yyerror(const char *s)
{
SYNTAX("%s", s);
}
void SYNTAX(const char *fmt, ...)
{
extern char *cmdname, *curfname;
static int been_here = 0;
va_list varg;
if (been_here++ > 2)
return;
fprintf(stderr, "%s: ", cmdname);
va_start(varg, fmt);
vfprintf(stderr, fmt, varg);
va_end(varg);
fprintf(stderr, " at source line %d", lineno);
if (curfname != NULL)
fprintf(stderr, " in function %s", curfname);
2019-11-10 14:19:18 -05:00
if (compile_time == COMPILING && cursource() != NULL)
2012-12-22 10:35:39 -05:00
fprintf(stderr, " source file %s", cursource());
fprintf(stderr, "\n");
errorflag = 2;
eprint();
}
extern int bracecnt, brackcnt, parencnt;
void bracecheck(void)
{
int c;
static int beenhere = 0;
if (beenhere++)
return;
while ((c = input()) != EOF && c != '\0')
bclass(c);
bcheck2(bracecnt, '{', '}');
bcheck2(brackcnt, '[', ']');
bcheck2(parencnt, '(', ')');
}
void bcheck2(int n, int c1, int c2)
{
if (n == 1)
fprintf(stderr, "\tmissing %c\n", c2);
else if (n > 1)
fprintf(stderr, "\t%d missing %c's\n", n, c2);
else if (n == -1)
fprintf(stderr, "\textra %c\n", c2);
else if (n < -1)
fprintf(stderr, "\t%d extra %c's\n", -n, c2);
}
void FATAL(const char *fmt, ...)
{
extern char *cmdname;
va_list varg;
fflush(stdout);
fprintf(stderr, "%s: ", cmdname);
va_start(varg, fmt);
vfprintf(stderr, fmt, varg);
va_end(varg);
error();
if (dbg > 1) /* core dump if serious debugging on */
abort();
exit(2);
}
void WARNING(const char *fmt, ...)
{
extern char *cmdname;
va_list varg;
fflush(stdout);
fprintf(stderr, "%s: ", cmdname);
va_start(varg, fmt);
vfprintf(stderr, fmt, varg);
va_end(varg);
error();
}
void error()
{
extern Node *curnode;
fprintf(stderr, "\n");
2019-11-10 14:19:18 -05:00
if (compile_time != ERROR_PRINTING) {
if (NR && *NR > 0) {
fprintf(stderr, " input record number %d", (int) (*FNR));
if (strcmp(*FILENAME, "-") != 0)
fprintf(stderr, ", file %s", *FILENAME);
fprintf(stderr, "\n");
}
if (curnode)
fprintf(stderr, " source line number %d", curnode->lineno);
else if (lineno)
fprintf(stderr, " source line number %d", lineno);
if (compile_time == COMPILING && cursource() != NULL)
fprintf(stderr, " source file %s", cursource());
fprintf(stderr, "\n");
eprint();
2012-12-22 10:35:39 -05:00
}
}
void eprint(void) /* try to print context around error */
{
char *p, *q;
int c;
static int been_here = 0;
extern char ebuf[], *ep;
2019-11-10 14:19:18 -05:00
if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
2012-12-22 10:35:39 -05:00
return;
if (ebuf == ep)
return;
2012-12-22 10:35:39 -05:00
p = ep - 1;
if (p > ebuf && *p == '\n')
p--;
for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
;
while (*p == '\n')
p++;
fprintf(stderr, " context is\n\t");
for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
;
for ( ; p < q; p++)
if (*p)
putc(*p, stderr);
fprintf(stderr, " >>> ");
for ( ; p < ep; p++)
if (*p)
putc(*p, stderr);
fprintf(stderr, " <<< ");
if (*ep)
while ((c = input()) != '\n' && c != '\0' && c != EOF) {
putc(c, stderr);
bclass(c);
}
putc('\n', stderr);
ep = ebuf;
}
void bclass(int c)
{
switch (c) {
case '{': bracecnt++; break;
case '}': bracecnt--; break;
case '[': brackcnt++; break;
case ']': brackcnt--; break;
case '(': parencnt++; break;
case ')': parencnt--; break;
}
}
double errcheck(double x, const char *s)
{
if (errno == EDOM) {
errno = 0;
WARNING("%s argument out of domain", s);
x = 1;
} else if (errno == ERANGE) {
errno = 0;
WARNING("%s result out of range", s);
x = 1;
}
return x;
}
int isclvar(const char *s) /* is s of form var=something ? */
{
const char *os = s;
if (!isalpha((uschar) *s) && *s != '_')
return 0;
for ( ; *s; s++)
if (!(isalnum((uschar) *s) || *s == '_'))
break;
return *s == '=' && s > os;
2012-12-22 10:35:39 -05:00
}
/* strtod is supposed to be a proper test of what's a valid number */
/* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
/* wrong: violates 4.10.1.4 of ansi C standard */
2020-06-12 07:30:03 -04:00
/* well, not quite. As of C99, hex floating point is allowed. so this is
* a bit of a mess. We work around the mess by checking for a hexadecimal
* value and disallowing it. Similarly, we now follow gawk and allow only
* +nan, -nan, +inf, and -inf for NaN and infinity values.
2020-06-12 07:30:03 -04:00
*/
2012-12-22 10:35:39 -05:00
/*
* This routine now has a more complicated interface, the main point
* being to avoid the double conversion of a string to double, and
* also to convey out, if requested, the information that the numeric
* value was a leading string or is all of the string. The latter bit
* is used in getfval().
*/
bool is_valid_number(const char *s, bool trailing_stuff_ok,
bool *no_trailing, double *result)
2012-12-22 10:35:39 -05:00
{
double r;
char *ep;
bool retval = false;
bool is_nan = false;
bool is_inf = false;
if (no_trailing)
*no_trailing = false;
while (isspace(*s))
s++;
// no hex floating point, sorry
if (s[0] == '0' && tolower(s[1]) == 'x')
return false;
// allow +nan, -nan, +inf, -inf, any other letter, no
if (s[0] == '+' || s[0] == '-') {
is_nan = (strncasecmp(s+1, "nan", 3) == 0);
is_inf = (strncasecmp(s+1, "inf", 3) == 0);
if ((is_nan || is_inf)
&& (isspace(s[4]) || s[4] == '\0'))
goto convert;
else if (! isdigit(s[1]) && s[1] != '.')
return false;
}
else if (! isdigit(s[0]) && s[0] != '.')
return false;
convert:
2012-12-22 10:35:39 -05:00
errno = 0;
r = strtod(s, &ep);
if (ep == s || errno == ERANGE)
return false;
if (isnan(r) && s[0] == '-' && signbit(r) == 0)
r = -r;
if (result != NULL)
*result = r;
/*
* check for trailing stuff
*/
while (isspace(*ep))
ep++;
if (no_trailing != NULL)
*no_trailing = (*ep == '\0');
// return true if found the end, or trailing stuff is allowed
retval = *ep == '\0' || trailing_stuff_ok;
return retval;
2012-12-22 10:35:39 -05:00
}