3 more fixes (#75)
* LC_NUMERIC radix issue. According to https://pubs.opengroup.org/onlinepubs/7990989775/xcu/awk.html The period character is the character recognized in processing awk programs. Make it so that during output we also print the period character, since this is what other awk implementations do, and it makes sense from an interoperability point of view. * print "T.builtin" in the error message * Fix backslash continuation line handling. * Keep track of RS processing so we apply the regex properly only once per record.
This commit is contained in:
parent
91eaf7f701
commit
ffee7780fe
1
lex.c
1
lex.c
@ -388,6 +388,7 @@ int string(void)
|
|||||||
case '\\':
|
case '\\':
|
||||||
c = input();
|
c = input();
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
case '\n': break;
|
||||||
case '"': *bp++ = '"'; break;
|
case '"': *bp++ = '"'; break;
|
||||||
case 'n': *bp++ = '\n'; break;
|
case 'n': *bp++ = '\n'; break;
|
||||||
case 't': *bp++ = '\t'; break;
|
case 't': *bp++ = '\t'; break;
|
||||||
|
17
lib.c
17
lib.c
@ -35,6 +35,7 @@ THIS SOFTWARE.
|
|||||||
|
|
||||||
char EMPTY[] = { '\0' };
|
char EMPTY[] = { '\0' };
|
||||||
FILE *infile = NULL;
|
FILE *infile = NULL;
|
||||||
|
bool innew; /* true = infile has not been read by readrec */
|
||||||
char *file = EMPTY;
|
char *file = EMPTY;
|
||||||
char *record;
|
char *record;
|
||||||
int recsize = RECSIZE;
|
int recsize = RECSIZE;
|
||||||
@ -106,6 +107,7 @@ void initgetrec(void)
|
|||||||
argno++;
|
argno++;
|
||||||
}
|
}
|
||||||
infile = stdin; /* no filenames, so use stdin */
|
infile = stdin; /* no filenames, so use stdin */
|
||||||
|
innew = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -175,7 +177,9 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record *
|
|||||||
FATAL("can't open file %s", file);
|
FATAL("can't open file %s", file);
|
||||||
setfval(fnrloc, 0.0);
|
setfval(fnrloc, 0.0);
|
||||||
}
|
}
|
||||||
c = readrec(&buf, &bufsize, infile);
|
c = readrec(&buf, &bufsize, infile, innew);
|
||||||
|
if (innew)
|
||||||
|
innew = false;
|
||||||
if (c != 0 || buf[0] != '\0') { /* normal record */
|
if (c != 0 || buf[0] != '\0') { /* normal record */
|
||||||
if (isrecord) {
|
if (isrecord) {
|
||||||
if (freeable(fldtab[0]))
|
if (freeable(fldtab[0]))
|
||||||
@ -213,7 +217,7 @@ void nextfile(void)
|
|||||||
argno++;
|
argno++;
|
||||||
}
|
}
|
||||||
|
|
||||||
int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
|
int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
|
||||||
{
|
{
|
||||||
int sep, c, isrec;
|
int sep, c, isrec;
|
||||||
char *rr, *buf = *pbuf;
|
char *rr, *buf = *pbuf;
|
||||||
@ -224,7 +228,14 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
|
|||||||
bool found;
|
bool found;
|
||||||
|
|
||||||
fa *pfa = makedfa(rs, 1);
|
fa *pfa = makedfa(rs, 1);
|
||||||
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
|
if (newflag)
|
||||||
|
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
|
||||||
|
else {
|
||||||
|
int tempstat = pfa->initstat;
|
||||||
|
pfa->initstat = 2;
|
||||||
|
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
|
||||||
|
pfa->initstat = tempstat;
|
||||||
|
}
|
||||||
if (found)
|
if (found)
|
||||||
setptr(patbeg, '\0');
|
setptr(patbeg, '\0');
|
||||||
} else {
|
} else {
|
||||||
|
1
main.c
1
main.c
@ -214,7 +214,6 @@ int main(int argc, char *argv[])
|
|||||||
if (!safe)
|
if (!safe)
|
||||||
envinit(environ);
|
envinit(environ);
|
||||||
yyparse();
|
yyparse();
|
||||||
setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
|
|
||||||
if (fs)
|
if (fs)
|
||||||
*FS = qstring(fs, '\0');
|
*FS = qstring(fs, '\0');
|
||||||
dprintf( ("errorflag=%d\n", errorflag) );
|
dprintf( ("errorflag=%d\n", errorflag) );
|
||||||
|
4
proto.h
4
proto.h
@ -122,7 +122,7 @@ extern void growfldtab(int n);
|
|||||||
extern void savefs(void);
|
extern void savefs(void);
|
||||||
extern int getrec(char **, int *, bool);
|
extern int getrec(char **, int *, bool);
|
||||||
extern void nextfile(void);
|
extern void nextfile(void);
|
||||||
extern int readrec(char **buf, int *bufsize, FILE *inf);
|
extern int readrec(char **buf, int *bufsize, FILE *inf, bool isnew);
|
||||||
extern char *getargv(int);
|
extern char *getargv(int);
|
||||||
extern void setclvar(char *);
|
extern void setclvar(char *);
|
||||||
extern void fldbld(void);
|
extern void fldbld(void);
|
||||||
@ -191,7 +191,7 @@ extern Cell *bltin(Node **, int);
|
|||||||
extern Cell *printstat(Node **, int);
|
extern Cell *printstat(Node **, int);
|
||||||
extern Cell *nullproc(Node **, int);
|
extern Cell *nullproc(Node **, int);
|
||||||
extern FILE *redirect(int, Node *);
|
extern FILE *redirect(int, Node *);
|
||||||
extern FILE *openfile(int, const char *);
|
extern FILE *openfile(int, const char *, bool *);
|
||||||
extern const char *filename(FILE *);
|
extern const char *filename(FILE *);
|
||||||
extern Cell *closefile(Node **, int);
|
extern Cell *closefile(Node **, int);
|
||||||
extern void closeall(void);
|
extern void closeall(void);
|
||||||
|
24
run.c
24
run.c
@ -405,6 +405,7 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
|
|||||||
char *buf;
|
char *buf;
|
||||||
int bufsize = recsize;
|
int bufsize = recsize;
|
||||||
int mode;
|
int mode;
|
||||||
|
bool newflag;
|
||||||
|
|
||||||
if ((buf = malloc(bufsize)) == NULL)
|
if ((buf = malloc(bufsize)) == NULL)
|
||||||
FATAL("out of memory in getline");
|
FATAL("out of memory in getline");
|
||||||
@ -416,12 +417,12 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
|
|||||||
mode = ptoi(a[1]);
|
mode = ptoi(a[1]);
|
||||||
if (mode == '|') /* input pipe */
|
if (mode == '|') /* input pipe */
|
||||||
mode = LE; /* arbitrary flag */
|
mode = LE; /* arbitrary flag */
|
||||||
fp = openfile(mode, getsval(x));
|
fp = openfile(mode, getsval(x), &newflag);
|
||||||
tempfree(x);
|
tempfree(x);
|
||||||
if (fp == NULL)
|
if (fp == NULL)
|
||||||
n = -1;
|
n = -1;
|
||||||
else
|
else
|
||||||
n = readrec(&buf, &bufsize, fp);
|
n = readrec(&buf, &bufsize, fp, newflag);
|
||||||
if (n <= 0) {
|
if (n <= 0) {
|
||||||
;
|
;
|
||||||
} else if (a[0] != NULL) { /* getline var <file */
|
} else if (a[0] != NULL) { /* getline var <file */
|
||||||
@ -1658,7 +1659,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
|||||||
if (isrec(x) || strlen(getsval(x)) == 0) {
|
if (isrec(x) || strlen(getsval(x)) == 0) {
|
||||||
flush_all(); /* fflush() or fflush("") -> all */
|
flush_all(); /* fflush() or fflush("") -> all */
|
||||||
u = 0;
|
u = 0;
|
||||||
} else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
|
} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
|
||||||
u = EOF;
|
u = EOF;
|
||||||
else
|
else
|
||||||
u = fflush(fp);
|
u = fflush(fp);
|
||||||
@ -1718,7 +1719,7 @@ FILE *redirect(int a, Node *b) /* set up all i/o redirections */
|
|||||||
|
|
||||||
x = execute(b);
|
x = execute(b);
|
||||||
fname = getsval(x);
|
fname = getsval(x);
|
||||||
fp = openfile(a, fname);
|
fp = openfile(a, fname, NULL);
|
||||||
if (fp == NULL)
|
if (fp == NULL)
|
||||||
FATAL("can't open file %s", fname);
|
FATAL("can't open file %s", fname);
|
||||||
tempfree(x);
|
tempfree(x);
|
||||||
@ -1750,7 +1751,7 @@ static void stdinit(void) /* in case stdin, etc., are not constants */
|
|||||||
files[2].mode = GT;
|
files[2].mode = GT;
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE *openfile(int a, const char *us)
|
FILE *openfile(int a, const char *us, bool *pnewflag)
|
||||||
{
|
{
|
||||||
const char *s = us;
|
const char *s = us;
|
||||||
size_t i;
|
size_t i;
|
||||||
@ -1760,11 +1761,12 @@ FILE *openfile(int a, const char *us)
|
|||||||
if (*s == '\0')
|
if (*s == '\0')
|
||||||
FATAL("null file name in print or getline");
|
FATAL("null file name in print or getline");
|
||||||
for (i = 0; i < nfiles; i++)
|
for (i = 0; i < nfiles; i++)
|
||||||
if (files[i].fname && strcmp(s, files[i].fname) == 0) {
|
if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
|
||||||
if (a == files[i].mode || (a==APPEND && files[i].mode==GT))
|
(a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
|
||||||
return files[i].fp;
|
a == FFLUSH)) {
|
||||||
if (a == FFLUSH)
|
if (pnewflag)
|
||||||
return files[i].fp;
|
*pnewflag = false;
|
||||||
|
return files[i].fp;
|
||||||
}
|
}
|
||||||
if (a == FFLUSH) /* didn't find it, so don't create it! */
|
if (a == FFLUSH) /* didn't find it, so don't create it! */
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -1801,6 +1803,8 @@ FILE *openfile(int a, const char *us)
|
|||||||
files[i].fname = tostring(s);
|
files[i].fname = tostring(s);
|
||||||
files[i].fp = fp;
|
files[i].fp = fp;
|
||||||
files[i].mode = m;
|
files[i].mode = m;
|
||||||
|
if (pnewflag)
|
||||||
|
*pnewflag = true;
|
||||||
if (fp != stdin && fp != stdout && fp != stderr)
|
if (fp != stdin && fp != stdout && fp != stderr)
|
||||||
(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
|
(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
|
||||||
}
|
}
|
||||||
|
@ -35,6 +35,9 @@ if locale -a | grep -qsi de_DE.UTF-8; then
|
|||||||
$awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1
|
$awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1
|
||||||
echo 'dürst|DÜRST|Dürst' >foo2
|
echo 'dürst|DÜRST|Dürst' >foo2
|
||||||
diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8'
|
diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8'
|
||||||
|
(export LC_NUMERIC=de_DE.UTF-8 && $awk 'BEGIN { print 0.01 }' /dev/null) >foo1
|
||||||
|
echo "0.01" >foo2
|
||||||
|
diff foo1 foo2 || echo 'BAD: T.builtin LC_NUMERIC radix (.) handling'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$awk 'BEGIN {
|
$awk 'BEGIN {
|
||||||
@ -70,3 +73,18 @@ echo '1
|
|||||||
3' >foo1
|
3' >foo1
|
||||||
$awk '{ n = split($0, x); print length(x) }' <foo0 >foo2
|
$awk '{ n = split($0, x); print length(x) }' <foo0 >foo2
|
||||||
diff foo1 foo2 || echo 'BAD: T.builtin length array'
|
diff foo1 foo2 || echo 'BAD: T.builtin length array'
|
||||||
|
|
||||||
|
# Test for backslash handling
|
||||||
|
cat << \EOF >foo0
|
||||||
|
BEGIN {
|
||||||
|
print "A\
|
||||||
|
B";
|
||||||
|
print "CD"
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
$awk -f foo0 /dev/null >foo1
|
||||||
|
cat << \EOF >foo2
|
||||||
|
AB
|
||||||
|
CD
|
||||||
|
EOF
|
||||||
|
diff foo1 foo2 || echo 'BAD: T.builtin continuation handling (backslash)'
|
||||||
|
@ -186,6 +186,14 @@ BEGIN { RS = ""
|
|||||||
}' >foo1
|
}' >foo1
|
||||||
$awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'
|
$awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'
|
||||||
|
|
||||||
|
# Test for RS regex being reapplied
|
||||||
|
echo aaa1a2a | $awk 1 RS='^a' >foo1
|
||||||
|
cat << \EOF > foo2
|
||||||
|
|
||||||
|
aa1a2a
|
||||||
|
|
||||||
|
EOF
|
||||||
|
diff foo1 foo2 || echo 'BAD: T.misc ^regex reapplied fails'
|
||||||
|
|
||||||
# The following should not produce a warning about changing a constant
|
# The following should not produce a warning about changing a constant
|
||||||
# nor about a curdled tempcell list
|
# nor about a curdled tempcell list
|
||||||
|
Loading…
x
Reference in New Issue
Block a user