3 more fixes (#75)

* LC_NUMERIC radix issue.

According to https://pubs.opengroup.org/onlinepubs/7990989775/xcu/awk.html
The period character is the character recognized in processing awk
programs.  Make it so that during output we also print the period
character, since this is what other awk implementations do, and it
makes sense from an interoperability point of view.

* print "T.builtin" in the error message

* Fix backslash continuation line handling.

* Keep track of RS processing so we apply the regex properly only once
per record.
This commit is contained in:
zoulasc 2020-02-28 06:23:54 -05:00 committed by GitHub
parent 91eaf7f701
commit ffee7780fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 57 additions and 16 deletions

1
lex.c
View File

@ -388,6 +388,7 @@ int string(void)
case '\\':
c = input();
switch (c) {
case '\n': break;
case '"': *bp++ = '"'; break;
case 'n': *bp++ = '\n'; break;
case 't': *bp++ = '\t'; break;

17
lib.c
View File

@ -35,6 +35,7 @@ THIS SOFTWARE.
char EMPTY[] = { '\0' };
FILE *infile = NULL;
bool innew; /* true = infile has not been read by readrec */
char *file = EMPTY;
char *record;
int recsize = RECSIZE;
@ -106,6 +107,7 @@ void initgetrec(void)
argno++;
}
infile = stdin; /* no filenames, so use stdin */
innew = true;
}
/*
@ -175,7 +177,9 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record *
FATAL("can't open file %s", file);
setfval(fnrloc, 0.0);
}
c = readrec(&buf, &bufsize, infile);
c = readrec(&buf, &bufsize, infile, innew);
if (innew)
innew = false;
if (c != 0 || buf[0] != '\0') { /* normal record */
if (isrecord) {
if (freeable(fldtab[0]))
@ -213,7 +217,7 @@ void nextfile(void)
argno++;
}
int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
{
int sep, c, isrec;
char *rr, *buf = *pbuf;
@ -224,7 +228,14 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
bool found;
fa *pfa = makedfa(rs, 1);
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
if (newflag)
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
else {
int tempstat = pfa->initstat;
pfa->initstat = 2;
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
pfa->initstat = tempstat;
}
if (found)
setptr(patbeg, '\0');
} else {

1
main.c
View File

@ -214,7 +214,6 @@ int main(int argc, char *argv[])
if (!safe)
envinit(environ);
yyparse();
setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
if (fs)
*FS = qstring(fs, '\0');
dprintf( ("errorflag=%d\n", errorflag) );

View File

@ -122,7 +122,7 @@ extern void growfldtab(int n);
extern void savefs(void);
extern int getrec(char **, int *, bool);
extern void nextfile(void);
extern int readrec(char **buf, int *bufsize, FILE *inf);
extern int readrec(char **buf, int *bufsize, FILE *inf, bool isnew);
extern char *getargv(int);
extern void setclvar(char *);
extern void fldbld(void);
@ -191,7 +191,7 @@ extern Cell *bltin(Node **, int);
extern Cell *printstat(Node **, int);
extern Cell *nullproc(Node **, int);
extern FILE *redirect(int, Node *);
extern FILE *openfile(int, const char *);
extern FILE *openfile(int, const char *, bool *);
extern const char *filename(FILE *);
extern Cell *closefile(Node **, int);
extern void closeall(void);

24
run.c
View File

@ -405,6 +405,7 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
char *buf;
int bufsize = recsize;
int mode;
bool newflag;
if ((buf = malloc(bufsize)) == NULL)
FATAL("out of memory in getline");
@ -416,12 +417,12 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
mode = ptoi(a[1]);
if (mode == '|') /* input pipe */
mode = LE; /* arbitrary flag */
fp = openfile(mode, getsval(x));
fp = openfile(mode, getsval(x), &newflag);
tempfree(x);
if (fp == NULL)
n = -1;
else
n = readrec(&buf, &bufsize, fp);
n = readrec(&buf, &bufsize, fp, newflag);
if (n <= 0) {
;
} else if (a[0] != NULL) { /* getline var <file */
@ -1658,7 +1659,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
if (isrec(x) || strlen(getsval(x)) == 0) {
flush_all(); /* fflush() or fflush("") -> all */
u = 0;
} else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
u = EOF;
else
u = fflush(fp);
@ -1718,7 +1719,7 @@ FILE *redirect(int a, Node *b) /* set up all i/o redirections */
x = execute(b);
fname = getsval(x);
fp = openfile(a, fname);
fp = openfile(a, fname, NULL);
if (fp == NULL)
FATAL("can't open file %s", fname);
tempfree(x);
@ -1750,7 +1751,7 @@ static void stdinit(void) /* in case stdin, etc., are not constants */
files[2].mode = GT;
}
FILE *openfile(int a, const char *us)
FILE *openfile(int a, const char *us, bool *pnewflag)
{
const char *s = us;
size_t i;
@ -1760,11 +1761,12 @@ FILE *openfile(int a, const char *us)
if (*s == '\0')
FATAL("null file name in print or getline");
for (i = 0; i < nfiles; i++)
if (files[i].fname && strcmp(s, files[i].fname) == 0) {
if (a == files[i].mode || (a==APPEND && files[i].mode==GT))
return files[i].fp;
if (a == FFLUSH)
return files[i].fp;
if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
(a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
a == FFLUSH)) {
if (pnewflag)
*pnewflag = false;
return files[i].fp;
}
if (a == FFLUSH) /* didn't find it, so don't create it! */
return NULL;
@ -1801,6 +1803,8 @@ FILE *openfile(int a, const char *us)
files[i].fname = tostring(s);
files[i].fp = fp;
files[i].mode = m;
if (pnewflag)
*pnewflag = true;
if (fp != stdin && fp != stdout && fp != stderr)
(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
}

View File

@ -35,6 +35,9 @@ if locale -a | grep -qsi de_DE.UTF-8; then
$awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1
echo 'dürst|DÜRST|Dürst' >foo2
diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8'
(export LC_NUMERIC=de_DE.UTF-8 && $awk 'BEGIN { print 0.01 }' /dev/null) >foo1
echo "0.01" >foo2
diff foo1 foo2 || echo 'BAD: T.builtin LC_NUMERIC radix (.) handling'
fi
$awk 'BEGIN {
@ -70,3 +73,18 @@ echo '1
3' >foo1
$awk '{ n = split($0, x); print length(x) }' <foo0 >foo2
diff foo1 foo2 || echo 'BAD: T.builtin length array'
# Test for backslash handling
cat << \EOF >foo0
BEGIN {
print "A\
B";
print "CD"
}
EOF
$awk -f foo0 /dev/null >foo1
cat << \EOF >foo2
AB
CD
EOF
diff foo1 foo2 || echo 'BAD: T.builtin continuation handling (backslash)'

View File

@ -186,6 +186,14 @@ BEGIN { RS = ""
}' >foo1
$awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'
# Test for RS regex being reapplied
echo aaa1a2a | $awk 1 RS='^a' >foo1
cat << \EOF > foo2
aa1a2a
EOF
diff foo1 foo2 || echo 'BAD: T.misc ^regex reapplied fails'
# The following should not produce a warning about changing a constant
# nor about a curdled tempcell list