Rework floating point conversions. (#98)
This commit is contained in:
parent
e508d2861c
commit
cc9e9b68d1
53
awk.1
53
awk.1
@ -577,3 +577,56 @@ The scope rules for variables in functions are a botch;
|
|||||||
the syntax is worse.
|
the syntax is worse.
|
||||||
.PP
|
.PP
|
||||||
Only eight-bit characters sets are handled correctly.
|
Only eight-bit characters sets are handled correctly.
|
||||||
|
.SH UNUSUAL FLOATING-POINT VALUES
|
||||||
|
.I Awk
|
||||||
|
was designed before IEEE 754 arithmetic defined Not-A-Number (NaN)
|
||||||
|
and Infinity values, which are supported by all modern floating-point
|
||||||
|
hardware.
|
||||||
|
.PP
|
||||||
|
Because
|
||||||
|
.I awk
|
||||||
|
uses
|
||||||
|
.IR strtod (3)
|
||||||
|
and
|
||||||
|
.IR atof (3)
|
||||||
|
to convert string values to double-precision floating-point values,
|
||||||
|
modern C libraries also convert strings starting with
|
||||||
|
.B inf
|
||||||
|
and
|
||||||
|
.B nan
|
||||||
|
into infinity and NaN values respectively. This led to strange results,
|
||||||
|
with something like this:
|
||||||
|
.PP
|
||||||
|
.EX
|
||||||
|
.nf
|
||||||
|
echo nancy | awk '{ print $1 + 0 }'
|
||||||
|
.fi
|
||||||
|
.EE
|
||||||
|
.PP
|
||||||
|
printing
|
||||||
|
.B nan
|
||||||
|
instead of zero.
|
||||||
|
.PP
|
||||||
|
.I Awk
|
||||||
|
now follows GNU AWK, and prefilters string values before attempting
|
||||||
|
to convert them to numbers, as follows:
|
||||||
|
.TP
|
||||||
|
.I "Hexadecimal values"
|
||||||
|
Hexadecimal values (allowed since C99) convert to zero, as they did
|
||||||
|
prior to C99.
|
||||||
|
.TP
|
||||||
|
.I "NaN values"
|
||||||
|
The two strings
|
||||||
|
.B +nan
|
||||||
|
and
|
||||||
|
.B \-nan
|
||||||
|
(case independent) convert to NaN. No others do.
|
||||||
|
(NaNs can have signs.)
|
||||||
|
.TP
|
||||||
|
.I "Infinity values"
|
||||||
|
The two strings
|
||||||
|
.B +inf
|
||||||
|
and
|
||||||
|
.B \-inf
|
||||||
|
(case independent) convert to positive and negative infinity, respectively.
|
||||||
|
No others do.
|
||||||
|
7
lex.c
7
lex.c
@ -191,7 +191,12 @@ int yylex(void)
|
|||||||
return word(buf);
|
return word(buf);
|
||||||
if (isdigit(c)) {
|
if (isdigit(c)) {
|
||||||
char *cp = tostring(buf);
|
char *cp = tostring(buf);
|
||||||
yylval.cp = setsymtab(buf, cp, atof(buf), CON|NUM, symtab);
|
double result;
|
||||||
|
|
||||||
|
if (is_number(cp, & result))
|
||||||
|
yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
|
||||||
|
else
|
||||||
|
yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
|
||||||
free(cp);
|
free(cp);
|
||||||
/* should this also have STR set? */
|
/* should this also have STR set? */
|
||||||
RET(NUMBER);
|
RET(NUMBER);
|
||||||
|
79
lib.c
79
lib.c
@ -30,6 +30,7 @@ THIS SOFTWARE.
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
#include <math.h>
|
||||||
#include "awk.h"
|
#include "awk.h"
|
||||||
|
|
||||||
char EMPTY[] = { '\0' };
|
char EMPTY[] = { '\0' };
|
||||||
@ -181,12 +182,14 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record *
|
|||||||
innew = false;
|
innew = false;
|
||||||
if (c != 0 || buf[0] != '\0') { /* normal record */
|
if (c != 0 || buf[0] != '\0') { /* normal record */
|
||||||
if (isrecord) {
|
if (isrecord) {
|
||||||
|
double result;
|
||||||
|
|
||||||
if (freeable(fldtab[0]))
|
if (freeable(fldtab[0]))
|
||||||
xfree(fldtab[0]->sval);
|
xfree(fldtab[0]->sval);
|
||||||
fldtab[0]->sval = buf; /* buf == record */
|
fldtab[0]->sval = buf; /* buf == record */
|
||||||
fldtab[0]->tval = REC | STR | DONTFREE;
|
fldtab[0]->tval = REC | STR | DONTFREE;
|
||||||
if (is_number(fldtab[0]->sval)) {
|
if (is_number(fldtab[0]->sval, & result)) {
|
||||||
fldtab[0]->fval = atof(fldtab[0]->sval);
|
fldtab[0]->fval = result;
|
||||||
fldtab[0]->tval |= NUM;
|
fldtab[0]->tval |= NUM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -293,6 +296,7 @@ void setclvar(char *s) /* set var=value from s */
|
|||||||
{
|
{
|
||||||
char *p;
|
char *p;
|
||||||
Cell *q;
|
Cell *q;
|
||||||
|
double result;
|
||||||
|
|
||||||
for (p=s; *p != '='; p++)
|
for (p=s; *p != '='; p++)
|
||||||
;
|
;
|
||||||
@ -300,8 +304,8 @@ void setclvar(char *s) /* set var=value from s */
|
|||||||
p = qstring(p, '\0');
|
p = qstring(p, '\0');
|
||||||
q = setsymtab(s, p, 0.0, STR, symtab);
|
q = setsymtab(s, p, 0.0, STR, symtab);
|
||||||
setsval(q, p);
|
setsval(q, p);
|
||||||
if (is_number(q->sval)) {
|
if (is_number(q->sval, & result)) {
|
||||||
q->fval = atof(q->sval);
|
q->fval = result;
|
||||||
q->tval |= NUM;
|
q->tval |= NUM;
|
||||||
}
|
}
|
||||||
DPRINTF("command line set %s to |%s|\n", s, p);
|
DPRINTF("command line set %s to |%s|\n", s, p);
|
||||||
@ -402,9 +406,11 @@ void fldbld(void) /* create fields from current record */
|
|||||||
lastfld = i;
|
lastfld = i;
|
||||||
donefld = true;
|
donefld = true;
|
||||||
for (j = 1; j <= lastfld; j++) {
|
for (j = 1; j <= lastfld; j++) {
|
||||||
|
double result;
|
||||||
|
|
||||||
p = fldtab[j];
|
p = fldtab[j];
|
||||||
if(is_number(p->sval)) {
|
if(is_number(p->sval, & result)) {
|
||||||
p->fval = atof(p->sval);
|
p->fval = result;
|
||||||
p->tval |= NUM;
|
p->tval |= NUM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -756,24 +762,67 @@ int isclvar(const char *s) /* is s of form var=something ? */
|
|||||||
/* strtod is supposed to be a proper test of what's a valid number */
|
/* strtod is supposed to be a proper test of what's a valid number */
|
||||||
/* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
|
/* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
|
||||||
/* wrong: violates 4.10.1.4 of ansi C standard */
|
/* wrong: violates 4.10.1.4 of ansi C standard */
|
||||||
|
|
||||||
/* well, not quite. As of C99, hex floating point is allowed. so this is
|
/* well, not quite. As of C99, hex floating point is allowed. so this is
|
||||||
* a bit of a mess.
|
* a bit of a mess. We work around the mess by checking for a hexadecimal
|
||||||
|
* value and disallowing it. Similarly, we now follow gawk and allow only
|
||||||
|
* +nan, -nan, +inf, and -inf for NaN and infinity values.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <math.h>
|
/*
|
||||||
int is_number(const char *s)
|
* This routine now has a more complicated interface, the main point
|
||||||
|
* being to avoid the double conversion of a string to double, and
|
||||||
|
* also to convey out, if requested, the information that the numeric
|
||||||
|
* value was a leading string or is all of the string. The latter bit
|
||||||
|
* is used in getfval().
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool is_valid_number(const char *s, bool trailing_stuff_ok,
|
||||||
|
bool *no_trailing, double *result)
|
||||||
{
|
{
|
||||||
double r;
|
double r;
|
||||||
char *ep;
|
char *ep;
|
||||||
|
bool retval = false;
|
||||||
|
|
||||||
|
if (no_trailing)
|
||||||
|
*no_trailing = false;
|
||||||
|
|
||||||
|
while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
|
||||||
|
s++;
|
||||||
|
|
||||||
|
if (s[0] == '0' && tolower(s[1]) == 'x') // no hex floating point, sorry
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// allow +nan, -nan, +inf, -inf, any other letter, no
|
||||||
|
if (s[0] == '+' || s[0] == '-') {
|
||||||
|
if (strcasecmp(s+1, "nan") == 0 || strcasecmp(s+1, "inf") == 0)
|
||||||
|
return true;
|
||||||
|
else if (! isdigit(s[1]) && s[1] != '.')
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else if (! isdigit(s[0]) && s[0] != '.')
|
||||||
|
return false;
|
||||||
|
|
||||||
errno = 0;
|
errno = 0;
|
||||||
r = strtod(s, &ep);
|
r = strtod(s, &ep);
|
||||||
if (ep == s || r == HUGE_VAL || errno == ERANGE)
|
if (ep == s || r == HUGE_VAL || errno == ERANGE)
|
||||||
return 0;
|
return false;
|
||||||
/* allow \r as well. windows files aren't going to go away. */
|
|
||||||
|
if (result != NULL)
|
||||||
|
*result = r;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* check for trailing stuff
|
||||||
|
* allow \r as well. windows files aren't going to go away.
|
||||||
|
*/
|
||||||
while (*ep == ' ' || *ep == '\t' || *ep == '\n' || *ep == '\r')
|
while (*ep == ' ' || *ep == '\t' || *ep == '\n' || *ep == '\r')
|
||||||
ep++;
|
ep++;
|
||||||
if (*ep == '\0')
|
|
||||||
return 1;
|
if (no_trailing)
|
||||||
else
|
*no_trailing = (*ep == '\0');
|
||||||
return 0;
|
|
||||||
|
// return true if found the end, or trailing stuff is allowed
|
||||||
|
retval = (*ep == '\0') || trailing_stuff_ok;
|
||||||
|
|
||||||
|
return retval;
|
||||||
}
|
}
|
||||||
|
4
proto.h
4
proto.h
@ -146,7 +146,9 @@ extern void eprint(void);
|
|||||||
extern void bclass(int);
|
extern void bclass(int);
|
||||||
extern double errcheck(double, const char *);
|
extern double errcheck(double, const char *);
|
||||||
extern int isclvar(const char *);
|
extern int isclvar(const char *);
|
||||||
extern int is_number(const char *);
|
extern bool is_valid_number(const char *s, bool trailing_stuff_ok,
|
||||||
|
bool *no_trailing, double *result);
|
||||||
|
#define is_number(s, val) is_valid_number(s, false, NULL, val)
|
||||||
|
|
||||||
extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what);
|
extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what);
|
||||||
extern void run(Node *);
|
extern void run(Node *);
|
||||||
|
32
run.c
32
run.c
@ -407,6 +407,7 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
|
|||||||
int bufsize = recsize;
|
int bufsize = recsize;
|
||||||
int mode;
|
int mode;
|
||||||
bool newflag;
|
bool newflag;
|
||||||
|
double result;
|
||||||
|
|
||||||
if ((buf = (char *) malloc(bufsize)) == NULL)
|
if ((buf = (char *) malloc(bufsize)) == NULL)
|
||||||
FATAL("out of memory in getline");
|
FATAL("out of memory in getline");
|
||||||
@ -429,15 +430,15 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
|
|||||||
} else if (a[0] != NULL) { /* getline var <file */
|
} else if (a[0] != NULL) { /* getline var <file */
|
||||||
x = execute(a[0]);
|
x = execute(a[0]);
|
||||||
setsval(x, buf);
|
setsval(x, buf);
|
||||||
if (is_number(x->sval)) {
|
if (is_number(x->sval, & result)) {
|
||||||
x->fval = atof(x->sval);
|
x->fval = result;
|
||||||
x->tval |= NUM;
|
x->tval |= NUM;
|
||||||
}
|
}
|
||||||
tempfree(x);
|
tempfree(x);
|
||||||
} else { /* getline <file */
|
} else { /* getline <file */
|
||||||
setsval(fldtab[0], buf);
|
setsval(fldtab[0], buf);
|
||||||
if (is_number(fldtab[0]->sval)) {
|
if (is_number(fldtab[0]->sval, & result)) {
|
||||||
fldtab[0]->fval = atof(fldtab[0]->sval);
|
fldtab[0]->fval = result;
|
||||||
fldtab[0]->tval |= NUM;
|
fldtab[0]->tval |= NUM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -448,8 +449,8 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
|
|||||||
n = getrec(&buf, &bufsize, false);
|
n = getrec(&buf, &bufsize, false);
|
||||||
x = execute(a[0]);
|
x = execute(a[0]);
|
||||||
setsval(x, buf);
|
setsval(x, buf);
|
||||||
if (is_number(x->sval)) {
|
if (is_number(x->sval, & result)) {
|
||||||
x->fval = atof(x->sval);
|
x->fval = result;
|
||||||
x->tval |= NUM;
|
x->tval |= NUM;
|
||||||
}
|
}
|
||||||
tempfree(x);
|
tempfree(x);
|
||||||
@ -726,7 +727,7 @@ Cell *indirect(Node **a, int n) /* $( a[0] ) */
|
|||||||
if ((Awkfloat)INT_MAX < val)
|
if ((Awkfloat)INT_MAX < val)
|
||||||
FATAL("trying to access out of range field %s", x->nval);
|
FATAL("trying to access out of range field %s", x->nval);
|
||||||
m = (int) val;
|
m = (int) val;
|
||||||
if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
|
if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */
|
||||||
FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
|
FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
|
||||||
/* BUG: can x->nval ever be null??? */
|
/* BUG: can x->nval ever be null??? */
|
||||||
tempfree(x);
|
tempfree(x);
|
||||||
@ -1259,6 +1260,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
|||||||
int sep;
|
int sep;
|
||||||
char temp, num[50];
|
char temp, num[50];
|
||||||
int n, tempstat, arg3type;
|
int n, tempstat, arg3type;
|
||||||
|
double result;
|
||||||
|
|
||||||
y = execute(a[0]); /* source string */
|
y = execute(a[0]); /* source string */
|
||||||
origs = s = strdup(getsval(y));
|
origs = s = strdup(getsval(y));
|
||||||
@ -1303,8 +1305,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
|||||||
snprintf(num, sizeof(num), "%d", n);
|
snprintf(num, sizeof(num), "%d", n);
|
||||||
temp = *patbeg;
|
temp = *patbeg;
|
||||||
setptr(patbeg, '\0');
|
setptr(patbeg, '\0');
|
||||||
if (is_number(s))
|
if (is_number(s, & result))
|
||||||
setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
|
setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
|
||||||
else
|
else
|
||||||
setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
|
setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
|
||||||
setptr(patbeg, temp);
|
setptr(patbeg, temp);
|
||||||
@ -1322,8 +1324,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
|||||||
}
|
}
|
||||||
n++;
|
n++;
|
||||||
snprintf(num, sizeof(num), "%d", n);
|
snprintf(num, sizeof(num), "%d", n);
|
||||||
if (is_number(s))
|
if (is_number(s, & result))
|
||||||
setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
|
setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
|
||||||
else
|
else
|
||||||
setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
|
setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
|
||||||
spdone:
|
spdone:
|
||||||
@ -1343,8 +1345,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
|||||||
temp = *s;
|
temp = *s;
|
||||||
setptr(s, '\0');
|
setptr(s, '\0');
|
||||||
snprintf(num, sizeof(num), "%d", n);
|
snprintf(num, sizeof(num), "%d", n);
|
||||||
if (is_number(t))
|
if (is_number(t, & result))
|
||||||
setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
|
setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
|
||||||
else
|
else
|
||||||
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
|
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
|
||||||
setptr(s, temp);
|
setptr(s, temp);
|
||||||
@ -1372,8 +1374,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
|||||||
temp = *s;
|
temp = *s;
|
||||||
setptr(s, '\0');
|
setptr(s, '\0');
|
||||||
snprintf(num, sizeof(num), "%d", n);
|
snprintf(num, sizeof(num), "%d", n);
|
||||||
if (is_number(t))
|
if (is_number(t, & result))
|
||||||
setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
|
setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
|
||||||
else
|
else
|
||||||
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
|
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
|
||||||
setptr(s, temp);
|
setptr(s, temp);
|
||||||
|
24
tran.c
24
tran.c
@ -129,9 +129,11 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */
|
|||||||
free(cp->sval);
|
free(cp->sval);
|
||||||
cp->sval = (char *) ARGVtab;
|
cp->sval = (char *) ARGVtab;
|
||||||
for (i = 0; i < ac; i++) {
|
for (i = 0; i < ac; i++) {
|
||||||
|
double result;
|
||||||
|
|
||||||
sprintf(temp, "%d", i);
|
sprintf(temp, "%d", i);
|
||||||
if (is_number(*av))
|
if (is_number(*av, & result))
|
||||||
setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
|
setsymtab(temp, *av, result, STR|NUM, ARGVtab);
|
||||||
else
|
else
|
||||||
setsymtab(temp, *av, 0.0, STR, ARGVtab);
|
setsymtab(temp, *av, 0.0, STR, ARGVtab);
|
||||||
av++;
|
av++;
|
||||||
@ -148,13 +150,15 @@ void envinit(char **envp) /* set up ENVIRON variable */
|
|||||||
free(cp->sval);
|
free(cp->sval);
|
||||||
cp->sval = (char *) ENVtab;
|
cp->sval = (char *) ENVtab;
|
||||||
for ( ; *envp; envp++) {
|
for ( ; *envp; envp++) {
|
||||||
|
double result;
|
||||||
|
|
||||||
if ((p = strchr(*envp, '=')) == NULL)
|
if ((p = strchr(*envp, '=')) == NULL)
|
||||||
continue;
|
continue;
|
||||||
if( p == *envp ) /* no left hand side name in env string */
|
if( p == *envp ) /* no left hand side name in env string */
|
||||||
continue;
|
continue;
|
||||||
*p++ = 0; /* split into two strings at = */
|
*p++ = 0; /* split into two strings at = */
|
||||||
if (is_number(p))
|
if (is_number(p, & result))
|
||||||
setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
|
setsymtab(*envp, p, result, STR|NUM, ENVtab);
|
||||||
else
|
else
|
||||||
setsymtab(*envp, p, 0.0, STR, ENVtab);
|
setsymtab(*envp, p, 0.0, STR, ENVtab);
|
||||||
p[-1] = '='; /* restore in case env is passed down to a shell */
|
p[-1] = '='; /* restore in case env is passed down to a shell */
|
||||||
@ -399,9 +403,15 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */
|
|||||||
else if (isrec(vp) && !donerec)
|
else if (isrec(vp) && !donerec)
|
||||||
recbld();
|
recbld();
|
||||||
if (!isnum(vp)) { /* not a number */
|
if (!isnum(vp)) { /* not a number */
|
||||||
vp->fval = atof(vp->sval); /* best guess */
|
double fval;
|
||||||
if (is_number(vp->sval) && !(vp->tval&CON))
|
bool no_trailing;
|
||||||
vp->tval |= NUM; /* make NUM only sparingly */
|
|
||||||
|
if (is_valid_number(vp->sval, true, & no_trailing, & fval)) {
|
||||||
|
vp->fval = fval;
|
||||||
|
if (no_trailing && !(vp->tval&CON))
|
||||||
|
vp->tval |= NUM; /* make NUM only sparingly */
|
||||||
|
} else
|
||||||
|
vp->fval = 0.0;
|
||||||
}
|
}
|
||||||
DPRINTF("getfval %p: %s = %g, t=%o\n",
|
DPRINTF("getfval %p: %s = %g, t=%o\n",
|
||||||
(void*)vp, NN(vp->nval), vp->fval, vp->tval);
|
(void*)vp, NN(vp->nval), vp->fval, vp->tval);
|
||||||
|
Loading…
Reference in New Issue
Block a user