3 more fixes (#75)
* LC_NUMERIC radix issue. According to https://pubs.opengroup.org/onlinepubs/7990989775/xcu/awk.html The period character is the character recognized in processing awk programs. Make it so that during output we also print the period character, since this is what other awk implementations do, and it makes sense from an interoperability point of view. * print "T.builtin" in the error message * Fix backslash continuation line handling. * Keep track of RS processing so we apply the regex properly only once per record.
This commit is contained in:
parent
91eaf7f701
commit
ffee7780fe
1
lex.c
1
lex.c
@ -388,6 +388,7 @@ int string(void)
|
||||
case '\\':
|
||||
c = input();
|
||||
switch (c) {
|
||||
case '\n': break;
|
||||
case '"': *bp++ = '"'; break;
|
||||
case 'n': *bp++ = '\n'; break;
|
||||
case 't': *bp++ = '\t'; break;
|
||||
|
15
lib.c
15
lib.c
@ -35,6 +35,7 @@ THIS SOFTWARE.
|
||||
|
||||
char EMPTY[] = { '\0' };
|
||||
FILE *infile = NULL;
|
||||
bool innew; /* true = infile has not been read by readrec */
|
||||
char *file = EMPTY;
|
||||
char *record;
|
||||
int recsize = RECSIZE;
|
||||
@ -106,6 +107,7 @@ void initgetrec(void)
|
||||
argno++;
|
||||
}
|
||||
infile = stdin; /* no filenames, so use stdin */
|
||||
innew = true;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -175,7 +177,9 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record *
|
||||
FATAL("can't open file %s", file);
|
||||
setfval(fnrloc, 0.0);
|
||||
}
|
||||
c = readrec(&buf, &bufsize, infile);
|
||||
c = readrec(&buf, &bufsize, infile, innew);
|
||||
if (innew)
|
||||
innew = false;
|
||||
if (c != 0 || buf[0] != '\0') { /* normal record */
|
||||
if (isrecord) {
|
||||
if (freeable(fldtab[0]))
|
||||
@ -213,7 +217,7 @@ void nextfile(void)
|
||||
argno++;
|
||||
}
|
||||
|
||||
int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
|
||||
int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
|
||||
{
|
||||
int sep, c, isrec;
|
||||
char *rr, *buf = *pbuf;
|
||||
@ -224,7 +228,14 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
|
||||
bool found;
|
||||
|
||||
fa *pfa = makedfa(rs, 1);
|
||||
if (newflag)
|
||||
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
|
||||
else {
|
||||
int tempstat = pfa->initstat;
|
||||
pfa->initstat = 2;
|
||||
found = fnematch(pfa, inf, &buf, &bufsize, recsize);
|
||||
pfa->initstat = tempstat;
|
||||
}
|
||||
if (found)
|
||||
setptr(patbeg, '\0');
|
||||
} else {
|
||||
|
1
main.c
1
main.c
@ -214,7 +214,6 @@ int main(int argc, char *argv[])
|
||||
if (!safe)
|
||||
envinit(environ);
|
||||
yyparse();
|
||||
setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
|
||||
if (fs)
|
||||
*FS = qstring(fs, '\0');
|
||||
dprintf( ("errorflag=%d\n", errorflag) );
|
||||
|
4
proto.h
4
proto.h
@ -122,7 +122,7 @@ extern void growfldtab(int n);
|
||||
extern void savefs(void);
|
||||
extern int getrec(char **, int *, bool);
|
||||
extern void nextfile(void);
|
||||
extern int readrec(char **buf, int *bufsize, FILE *inf);
|
||||
extern int readrec(char **buf, int *bufsize, FILE *inf, bool isnew);
|
||||
extern char *getargv(int);
|
||||
extern void setclvar(char *);
|
||||
extern void fldbld(void);
|
||||
@ -191,7 +191,7 @@ extern Cell *bltin(Node **, int);
|
||||
extern Cell *printstat(Node **, int);
|
||||
extern Cell *nullproc(Node **, int);
|
||||
extern FILE *redirect(int, Node *);
|
||||
extern FILE *openfile(int, const char *);
|
||||
extern FILE *openfile(int, const char *, bool *);
|
||||
extern const char *filename(FILE *);
|
||||
extern Cell *closefile(Node **, int);
|
||||
extern void closeall(void);
|
||||
|
22
run.c
22
run.c
@ -405,6 +405,7 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
|
||||
char *buf;
|
||||
int bufsize = recsize;
|
||||
int mode;
|
||||
bool newflag;
|
||||
|
||||
if ((buf = malloc(bufsize)) == NULL)
|
||||
FATAL("out of memory in getline");
|
||||
@ -416,12 +417,12 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
|
||||
mode = ptoi(a[1]);
|
||||
if (mode == '|') /* input pipe */
|
||||
mode = LE; /* arbitrary flag */
|
||||
fp = openfile(mode, getsval(x));
|
||||
fp = openfile(mode, getsval(x), &newflag);
|
||||
tempfree(x);
|
||||
if (fp == NULL)
|
||||
n = -1;
|
||||
else
|
||||
n = readrec(&buf, &bufsize, fp);
|
||||
n = readrec(&buf, &bufsize, fp, newflag);
|
||||
if (n <= 0) {
|
||||
;
|
||||
} else if (a[0] != NULL) { /* getline var <file */
|
||||
@ -1658,7 +1659,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
||||
if (isrec(x) || strlen(getsval(x)) == 0) {
|
||||
flush_all(); /* fflush() or fflush("") -> all */
|
||||
u = 0;
|
||||
} else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
|
||||
} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
|
||||
u = EOF;
|
||||
else
|
||||
u = fflush(fp);
|
||||
@ -1718,7 +1719,7 @@ FILE *redirect(int a, Node *b) /* set up all i/o redirections */
|
||||
|
||||
x = execute(b);
|
||||
fname = getsval(x);
|
||||
fp = openfile(a, fname);
|
||||
fp = openfile(a, fname, NULL);
|
||||
if (fp == NULL)
|
||||
FATAL("can't open file %s", fname);
|
||||
tempfree(x);
|
||||
@ -1750,7 +1751,7 @@ static void stdinit(void) /* in case stdin, etc., are not constants */
|
||||
files[2].mode = GT;
|
||||
}
|
||||
|
||||
FILE *openfile(int a, const char *us)
|
||||
FILE *openfile(int a, const char *us, bool *pnewflag)
|
||||
{
|
||||
const char *s = us;
|
||||
size_t i;
|
||||
@ -1760,10 +1761,11 @@ FILE *openfile(int a, const char *us)
|
||||
if (*s == '\0')
|
||||
FATAL("null file name in print or getline");
|
||||
for (i = 0; i < nfiles; i++)
|
||||
if (files[i].fname && strcmp(s, files[i].fname) == 0) {
|
||||
if (a == files[i].mode || (a==APPEND && files[i].mode==GT))
|
||||
return files[i].fp;
|
||||
if (a == FFLUSH)
|
||||
if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
|
||||
(a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
|
||||
a == FFLUSH)) {
|
||||
if (pnewflag)
|
||||
*pnewflag = false;
|
||||
return files[i].fp;
|
||||
}
|
||||
if (a == FFLUSH) /* didn't find it, so don't create it! */
|
||||
@ -1801,6 +1803,8 @@ FILE *openfile(int a, const char *us)
|
||||
files[i].fname = tostring(s);
|
||||
files[i].fp = fp;
|
||||
files[i].mode = m;
|
||||
if (pnewflag)
|
||||
*pnewflag = true;
|
||||
if (fp != stdin && fp != stdout && fp != stderr)
|
||||
(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
|
||||
}
|
||||
|
@ -35,6 +35,9 @@ if locale -a | grep -qsi de_DE.UTF-8; then
|
||||
$awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1
|
||||
echo 'dürst|DÜRST|Dürst' >foo2
|
||||
diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8'
|
||||
(export LC_NUMERIC=de_DE.UTF-8 && $awk 'BEGIN { print 0.01 }' /dev/null) >foo1
|
||||
echo "0.01" >foo2
|
||||
diff foo1 foo2 || echo 'BAD: T.builtin LC_NUMERIC radix (.) handling'
|
||||
fi
|
||||
|
||||
$awk 'BEGIN {
|
||||
@ -70,3 +73,18 @@ echo '1
|
||||
3' >foo1
|
||||
$awk '{ n = split($0, x); print length(x) }' <foo0 >foo2
|
||||
diff foo1 foo2 || echo 'BAD: T.builtin length array'
|
||||
|
||||
# Test for backslash handling
|
||||
cat << \EOF >foo0
|
||||
BEGIN {
|
||||
print "A\
|
||||
B";
|
||||
print "CD"
|
||||
}
|
||||
EOF
|
||||
$awk -f foo0 /dev/null >foo1
|
||||
cat << \EOF >foo2
|
||||
AB
|
||||
CD
|
||||
EOF
|
||||
diff foo1 foo2 || echo 'BAD: T.builtin continuation handling (backslash)'
|
||||
|
@ -186,6 +186,14 @@ BEGIN { RS = ""
|
||||
}' >foo1
|
||||
$awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'
|
||||
|
||||
# Test for RS regex being reapplied
|
||||
echo aaa1a2a | $awk 1 RS='^a' >foo1
|
||||
cat << \EOF > foo2
|
||||
|
||||
aa1a2a
|
||||
|
||||
EOF
|
||||
diff foo1 foo2 || echo 'BAD: T.misc ^regex reapplied fails'
|
||||
|
||||
# The following should not produce a warning about changing a constant
|
||||
# nor about a curdled tempcell list
|
||||
|
Loading…
Reference in New Issue
Block a user