3 more fixes (#75)

* LC_NUMERIC radix issue.

According to https://pubs.opengroup.org/onlinepubs/7990989775/xcu/awk.html
The period character is the character recognized in processing awk
programs.  Make it so that during output we also print the period
character, since this is what other awk implementations do, and it
makes sense from an interoperability point of view.

* print "T.builtin" in the error message

* Fix backslash continuation line handling.

* Keep track of RS processing so we apply the regex properly only once
per record.
This commit is contained in:
zoulasc 2020-02-28 06:23:54 -05:00 committed by GitHub
parent 91eaf7f701
commit ffee7780fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 57 additions and 16 deletions

1
lex.c
View File

@ -388,6 +388,7 @@ int string(void)
case '\\': case '\\':
c = input(); c = input();
switch (c) { switch (c) {
case '\n': break;
case '"': *bp++ = '"'; break; case '"': *bp++ = '"'; break;
case 'n': *bp++ = '\n'; break; case 'n': *bp++ = '\n'; break;
case 't': *bp++ = '\t'; break; case 't': *bp++ = '\t'; break;

17
lib.c
View File

@ -35,6 +35,7 @@ THIS SOFTWARE.
char EMPTY[] = { '\0' }; char EMPTY[] = { '\0' };
FILE *infile = NULL; FILE *infile = NULL;
bool innew; /* true = infile has not been read by readrec */
char *file = EMPTY; char *file = EMPTY;
char *record; char *record;
int recsize = RECSIZE; int recsize = RECSIZE;
@ -106,6 +107,7 @@ void initgetrec(void)
argno++; argno++;
} }
infile = stdin; /* no filenames, so use stdin */ infile = stdin; /* no filenames, so use stdin */
innew = true;
} }
/* /*
@ -175,7 +177,9 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record *
FATAL("can't open file %s", file); FATAL("can't open file %s", file);
setfval(fnrloc, 0.0); setfval(fnrloc, 0.0);
} }
c = readrec(&buf, &bufsize, infile); c = readrec(&buf, &bufsize, infile, innew);
if (innew)
innew = false;
if (c != 0 || buf[0] != '\0') { /* normal record */ if (c != 0 || buf[0] != '\0') { /* normal record */
if (isrecord) { if (isrecord) {
if (freeable(fldtab[0])) if (freeable(fldtab[0]))
@ -213,7 +217,7 @@ void nextfile(void)
argno++; argno++;
} }
int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */ int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
{ {
int sep, c, isrec; int sep, c, isrec;
char *rr, *buf = *pbuf; char *rr, *buf = *pbuf;
@ -224,7 +228,14 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
bool found; bool found;
fa *pfa = makedfa(rs, 1); fa *pfa = makedfa(rs, 1);
found = fnematch(pfa, inf, &buf, &bufsize, recsize); if (newflag)
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
else {
int tempstat = pfa->initstat;
pfa->initstat = 2;
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
pfa->initstat = tempstat;
}
if (found) if (found)
setptr(patbeg, '\0'); setptr(patbeg, '\0');
} else { } else {

1
main.c
View File

@ -214,7 +214,6 @@ int main(int argc, char *argv[])
if (!safe) if (!safe)
envinit(environ); envinit(environ);
yyparse(); yyparse();
setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
if (fs) if (fs)
*FS = qstring(fs, '\0'); *FS = qstring(fs, '\0');
dprintf( ("errorflag=%d\n", errorflag) ); dprintf( ("errorflag=%d\n", errorflag) );

View File

@ -122,7 +122,7 @@ extern void growfldtab(int n);
extern void savefs(void); extern void savefs(void);
extern int getrec(char **, int *, bool); extern int getrec(char **, int *, bool);
extern void nextfile(void); extern void nextfile(void);
extern int readrec(char **buf, int *bufsize, FILE *inf); extern int readrec(char **buf, int *bufsize, FILE *inf, bool isnew);
extern char *getargv(int); extern char *getargv(int);
extern void setclvar(char *); extern void setclvar(char *);
extern void fldbld(void); extern void fldbld(void);
@ -191,7 +191,7 @@ extern Cell *bltin(Node **, int);
extern Cell *printstat(Node **, int); extern Cell *printstat(Node **, int);
extern Cell *nullproc(Node **, int); extern Cell *nullproc(Node **, int);
extern FILE *redirect(int, Node *); extern FILE *redirect(int, Node *);
extern FILE *openfile(int, const char *); extern FILE *openfile(int, const char *, bool *);
extern const char *filename(FILE *); extern const char *filename(FILE *);
extern Cell *closefile(Node **, int); extern Cell *closefile(Node **, int);
extern void closeall(void); extern void closeall(void);

24
run.c
View File

@ -405,6 +405,7 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
char *buf; char *buf;
int bufsize = recsize; int bufsize = recsize;
int mode; int mode;
bool newflag;
if ((buf = malloc(bufsize)) == NULL) if ((buf = malloc(bufsize)) == NULL)
FATAL("out of memory in getline"); FATAL("out of memory in getline");
@ -416,12 +417,12 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
mode = ptoi(a[1]); mode = ptoi(a[1]);
if (mode == '|') /* input pipe */ if (mode == '|') /* input pipe */
mode = LE; /* arbitrary flag */ mode = LE; /* arbitrary flag */
fp = openfile(mode, getsval(x)); fp = openfile(mode, getsval(x), &newflag);
tempfree(x); tempfree(x);
if (fp == NULL) if (fp == NULL)
n = -1; n = -1;
else else
n = readrec(&buf, &bufsize, fp); n = readrec(&buf, &bufsize, fp, newflag);
if (n <= 0) { if (n <= 0) {
; ;
} else if (a[0] != NULL) { /* getline var <file */ } else if (a[0] != NULL) { /* getline var <file */
@ -1658,7 +1659,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
if (isrec(x) || strlen(getsval(x)) == 0) { if (isrec(x) || strlen(getsval(x)) == 0) {
flush_all(); /* fflush() or fflush("") -> all */ flush_all(); /* fflush() or fflush("") -> all */
u = 0; u = 0;
} else if ((fp = openfile(FFLUSH, getsval(x))) == NULL) } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
u = EOF; u = EOF;
else else
u = fflush(fp); u = fflush(fp);
@ -1718,7 +1719,7 @@ FILE *redirect(int a, Node *b) /* set up all i/o redirections */
x = execute(b); x = execute(b);
fname = getsval(x); fname = getsval(x);
fp = openfile(a, fname); fp = openfile(a, fname, NULL);
if (fp == NULL) if (fp == NULL)
FATAL("can't open file %s", fname); FATAL("can't open file %s", fname);
tempfree(x); tempfree(x);
@ -1750,7 +1751,7 @@ static void stdinit(void) /* in case stdin, etc., are not constants */
files[2].mode = GT; files[2].mode = GT;
} }
FILE *openfile(int a, const char *us) FILE *openfile(int a, const char *us, bool *pnewflag)
{ {
const char *s = us; const char *s = us;
size_t i; size_t i;
@ -1760,11 +1761,12 @@ FILE *openfile(int a, const char *us)
if (*s == '\0') if (*s == '\0')
FATAL("null file name in print or getline"); FATAL("null file name in print or getline");
for (i = 0; i < nfiles; i++) for (i = 0; i < nfiles; i++)
if (files[i].fname && strcmp(s, files[i].fname) == 0) { if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
if (a == files[i].mode || (a==APPEND && files[i].mode==GT)) (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
return files[i].fp; a == FFLUSH)) {
if (a == FFLUSH) if (pnewflag)
return files[i].fp; *pnewflag = false;
return files[i].fp;
} }
if (a == FFLUSH) /* didn't find it, so don't create it! */ if (a == FFLUSH) /* didn't find it, so don't create it! */
return NULL; return NULL;
@ -1801,6 +1803,8 @@ FILE *openfile(int a, const char *us)
files[i].fname = tostring(s); files[i].fname = tostring(s);
files[i].fp = fp; files[i].fp = fp;
files[i].mode = m; files[i].mode = m;
if (pnewflag)
*pnewflag = true;
if (fp != stdin && fp != stdout && fp != stderr) if (fp != stdin && fp != stdout && fp != stderr)
(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
} }

View File

@ -35,6 +35,9 @@ if locale -a | grep -qsi de_DE.UTF-8; then
$awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1 $awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1
echo 'dürst|DÜRST|Dürst' >foo2 echo 'dürst|DÜRST|Dürst' >foo2
diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8' diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8'
(export LC_NUMERIC=de_DE.UTF-8 && $awk 'BEGIN { print 0.01 }' /dev/null) >foo1
echo "0.01" >foo2
diff foo1 foo2 || echo 'BAD: T.builtin LC_NUMERIC radix (.) handling'
fi fi
$awk 'BEGIN { $awk 'BEGIN {
@ -70,3 +73,18 @@ echo '1
3' >foo1 3' >foo1
$awk '{ n = split($0, x); print length(x) }' <foo0 >foo2 $awk '{ n = split($0, x); print length(x) }' <foo0 >foo2
diff foo1 foo2 || echo 'BAD: T.builtin length array' diff foo1 foo2 || echo 'BAD: T.builtin length array'
# Test for backslash handling
cat << \EOF >foo0
BEGIN {
print "A\
B";
print "CD"
}
EOF
$awk -f foo0 /dev/null >foo1
cat << \EOF >foo2
AB
CD
EOF
diff foo1 foo2 || echo 'BAD: T.builtin continuation handling (backslash)'

View File

@ -186,6 +186,14 @@ BEGIN { RS = ""
}' >foo1 }' >foo1
$awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails' $awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'
# Test for RS regex being reapplied
echo aaa1a2a | $awk 1 RS='^a' >foo1
cat << \EOF > foo2
aa1a2a
EOF
diff foo1 foo2 || echo 'BAD: T.misc ^regex reapplied fails'
# The following should not produce a warning about changing a constant # The following should not produce a warning about changing a constant
# nor about a curdled tempcell list # nor about a curdled tempcell list