diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..17715fc --- /dev/null +++ b/ChangeLog @@ -0,0 +1,225 @@ +2018-08-22 Arnold D. Robbins + + * awktest.tar (testdir/T.expr): Fix test for unary plus. + +2018-08-22 Arnold D. Robbins + + * REGRESS: Extract tests if necessary, set PATH to include '.'. + * regdir/beebe.tar (Makefile): Fix longwrds test to prefix + sort with LC_ALL=C. + * awktest.tar: Updated from fixed test suite, directory + it extracts is now called 'testdir' to match what's in top-level + REGRESS script. + * regdir: Removed, as Brian wants to keep the test suite in + the tar file. + +2018-08-22 Arnold D. Robbins + + * FIXES, lib.c, run.c, makefile, main.c: Merge from Brian's tree. + * REGRESS: New file, from Brian. + * awktest.tar: Restored from Brian's tree. + +2018-08-22 Arnold D. Robbins + + * awkgram.y (UPLUS): New token. In the grammar, call op1() + with it. + * maketab.c (proc): Add entry for UPLUS. + * run.c (arith): Handle UPLUS. + * main.c (version): Updated. + * bugs-fixed/unary-plus.awk, bugs-fixed/unary-plus.bad, + bugs-fixed/unary-plus.ok: New files. + +2018-08-10 Arnold D. Robbins + + * TODO: Updated. + * awk.1: Improve use of macros, add some additional explanation + in a few places, alphabetize list of variables. + +2018-08-08 Arnold D. Robbins + + * awk.h (Cell): Add new field `fmt' to track xFMT value used + for a string conversion. + [CONVC, CONVO]: New flag macros. + * bugs-fixed/README: Updated. + * bugs-fixed/string-conv.awk, bugs-fixed/string-conv.bad, + bugs-fixed/string-conv.ok: New files. + * main.c (version): Updated. + * proto.h (flags2str): Add declaration. + * tran.c (setfval): Clear CONVC and CONVO flags and set vp->fmt + to NULL. + (setsval): Ditto. Add large comment and new code to manage + correct conversion of number to string based on various flags + and the value of vp->fmt. The idea is to not convert again + if xFMT is the same as before and we're doing the same conversion. + Otherwise, clear the old flags, set the new, and reconvert. + (flags2str): New function. For debug prints and for use from a debugger. + +2018-08-05 Arnold D. Robbins + + Fix filename conflicts in regdir where the only difference was + in letter case. This caused problems on Windows systems. + + * regdir/Compare.T1: Renamed from regdir/Compare.T. + * regdir/t.delete0: Renamed from regdir/t.delete. + * regdir/t.getline1: Renamed from regdir/t.getline. + * regdir/t.redir1: Renamed from regdir/t.redir. + * regdir/t.split1: Renamed from regdir/t.split. + * regdir/t.sub0: Renamed from regdir/t.sub. + * regdir/REGRESS: Adjusted. + +2018-08-04 Arnold D. Robbins + + With scalpel, tweasers, magnifying glass and bated breath, + borrow code from the NetBSD version of nawk to fix the years-old + bug whereby decrementing the value of NF did not change the + record. + + * lib.c (fldbld): Set donerec to 1 when done. + (setlastfld): New function. + * proto.h (setlastfld): Add declaration. + * run.c (copycell): Make code smarter about flags (from NetBSD code). + * tran.c (setfree): New function. + * tran.c (setfval): Normalize negative zero to positive zero. + If setting NF, clear donerec and call setlastfld(). + (setsval): Remove call to save_old_OFS(). If setting OFS, call + recbld(). If setting NF, clear donerec and call setlastfld(). + + As part of the process, revert OFS-related changes of 2018-05-22: + + * awk.h (saveOFS, saveOFSlen, save_old_OFS): Remove declarations. + * lib.c (recbld): Use *OFS instead of saveOFS. + * run.c (saveOFS, saveOFSlen, save_old_OFS): Remove. + * tran.c (syminit): Remove initialization of saveOFS and saveOFSlen. + + General stuff that goes along with all this: + + * bugs-fixed/README: Updated. + * bugs-fixed/decr-NF.awk, bugs-fixed/decr-NF.bad, + bugs-fixed/decr-NF.ok: New files. + * main.c (version): Updated. + * regdir/README.TESTS: Fix awk book title. + * regdir/T.misc: Revise test to match fixed code. + * run.c (format): Increase size of buffer used for %a test. (Unrelated + to NF or OFS, but fixes a compiler complaint.) + +2018-06-07 Arnold D. Robbins + + * regdir/beebe.tar: Fix longwrds.ok so that the test will pass. + The file was incorrectly sorted. + +2018-06-06 Arnold D. Robbins + + * regdir/T.lilly: Fix the bug again in the second instance + of the code. Thanks to BWK for pointing this out. + +2018-05-31 Arnold D. Robbins + + * regdir/T.lilly: Fix a syntax error and ordering bug + in creating the 'foo' file. + +2018-05-23 Arnold D. Robbins + + * awk.1: Remove standalone 'awk' at the top of file, it messed up + the formatting. Arrange built-in variable list in alphabetical + order. + +2018-05-23 Arnold D. Robbins + + * main.c (version): Add my email address and a date so that + users can tell this isn't straight BWK awk. + * README.md: Minor updates. + * TODO: Updated. + +2018-05-22 Arnold D. Robbins + + Add POSIX-required formats %a and %A. + + * run.c (format): Check for %a support in C library. If there, + allow %a and %A as valid formats. + * TODO: Updated. + * bugs-fixed/README: Updated. + * bugs-fixed/a-format.awk, bugs-fixed/a-format.bad, + bugs-fixed/a-format.ok: New files. + +2018-05-22 Arnold D. Robbins + + * FIXES: Restored a line from a much earlier version that + apparently got lost when the dates were reordered. + * TODO: Updated. + +2018-05-22 Arnold D. Robbins + + * README.md: New file. + +2018-05-22 Arnold D. Robbins + + * regdir/echo.c, regdir/time.c: Minor fixes to compile without + warning on current GCC / Linux. + +2018-05-22 Arnold D. Robbins + + * TODO: New file. + +2018-05-22 Arnold D. Robbins + + * makefile (gitadd, gitpush): Remove these targets. They + should not be automated and were incorrect for things that + would be done regularly. + +2018-05-22 Arnold D. Robbins + + Fix nawk so that [[:blank:]] only matches space and tab instead + of any whitespace character, originally made May 10, 2018. + See bugs-fixed/space.awk. + + This appears to have been a thinko on Brian's part. + + * b.c (charclasses): Use xisblank() function for [[:blank:]]. + * bugs-fixed/README: Updated. + * bugs-fixed/space.awk, bugs-fixed/space.bad, + bugs-fixed/space.ok: New files. + +2018-05-22 Arnold D. Robbins + + * .gitignore: New file. + +2018-05-22 Arnold D. Robbins + + Fix nawk to provide reasonable exit status for system(), + a la gawk, originally made March 12, 2016. See + bugs-fixed/system-status.awk. + + * run.c (bltin): For FSYSTEM, use the macros defined for wait(2) + to produce a reasonable exit value, instead of doing a floating-point + division by 256. + * awk.1: Document the return status values. + * bugs-fixed/README: Updated. + * bugs-fixed/system-status.awk, bugs-fixed/system-status.bad, + bugs-fixed/system-status.ok: New files. + +2018-05-22 Arnold D. Robbins + + Bug fix with respect to rebuilding a record, originally + made August 19, 2014. See bugs-fixed/ofs-rebuild.awk. + + * awk.h (saveOFS, saveOFSlen): Declare new variables. + * lib.c (recbld): Use them when rebuilding the record. + * run.c (saveOFS, saveOFSlen): Define new variables. + (save_old_OFS): New function to save OFS aside. + * tran.c (syminit): Initialize saveOFS and saveOFSlen. + (setsval): If setting a field, call save_old_OFS(). + * bugs-fixed/README, bugs-fixed/ofs-rebuild.awk, + bugs-fixed/ofs-rebuild.bad, bugs-fixed/ofs-rebuild.ok: New files. + +2018-05-22 Arnold D. Robbins + + * makefile (YACC): Use bison. + +2018-05-22 Arnold D. Robbins + + * ChangeLog: Created. + * regdir: Created. Based on contents of awktest.a. + * .gitattributes: Created, to preserve CR LF in regdir/t.crlf. + * awktest.a: Removed. + * regdir/T.gawk, regdir/T.latin1: Updated from awktest.tar. + * awktest.tar: Removed. diff --git a/FIXES b/FIXES index b17d8a3..5c33dd9 100644 --- a/FIXES +++ b/FIXES @@ -514,6 +514,8 @@ May 12, 1998: Mar 12, 1998: added -V to print version number and die. +[notify dave kerns, dkerns@dacsoup.ih.lucent.com] + Feb 11, 1998: subtle silent bug in lex.c: if the program ended with a number longer than 1 digit, part of the input would be pushed back and diff --git a/REGRESS b/REGRESS index f4372d6..facbd83 100755 --- a/REGRESS +++ b/REGRESS @@ -1,3 +1,20 @@ +#! /bin/sh + +if [ -d testdir ] +then + true # do nothing +elif [ -f awktest.tar ] +then + echo extracting testdir + tar -xpf awktest.tar +else + echo $0: No testdir directory and no awktest.tar to extract it from! >&2 + exit 1 +fi + cd testdir pwd +PATH=.:$PATH +export PATH + REGRESS diff --git a/TODO b/TODO new file mode 100644 index 0000000..5099569 --- /dev/null +++ b/TODO @@ -0,0 +1,20 @@ +Fri Aug 10 11:11:11 IDT 2018 +============================ + +A semi-random list of things to look into. + +1. DONE. Make sure the FIXES file has everything after the date order +was reversed. + +2. DONE. Look into the problem that NF-- doesn't throw away the last field. +This a long-standing issue. + +3. DONE. Add suport for %a and %A printf formats. + +4. DONE. Look into the problem that string conversions are sticky; once +done they never change, even if OFMT or CONVFMT do. Similarly, a +conversion from OFMT becomes the permanent string value, which is +incorrect. + +5. MAYBE: Improve the test suite enough so that it can be cleaned up after +it was run, and old results can be saved for comparison. diff --git a/awk.1 b/awk.1 index 6119613..5830143 100644 --- a/awk.1 +++ b/awk.1 @@ -7,7 +7,6 @@ .fi .ft 1 .. -awk .TH AWK 1 .CT 1 files prog_other .SH NAME @@ -36,7 +35,7 @@ awk \- pattern-directed scanning and processing language scans each input .I file for lines that match any of a set of patterns specified literally in -.IR prog +.I prog or in one or more files specified as .B \-f @@ -53,7 +52,7 @@ The file name .B \- means the standard input. Any -.IR file +.I file of the form .I var=value is treated as an assignment, not a filename, @@ -70,12 +69,12 @@ any number of options may be present. The .B \-F -.IR fs +.I fs option defines the input field separator to be the regular expression -.IR fs. +.IR fs . .PP An input line is normally made up of fields separated by white space, -or by regular expression +or by the regular expression .BR FS . The fields are denoted .BR $1 , @@ -87,7 +86,7 @@ If .BR FS is null, the input line is split into one field per character. .PP -A pattern-action statement has the form +A pattern-action statement has the form: .IP .IB pattern " { " action " } .PP @@ -101,7 +100,7 @@ An action is a sequence of statements. A statement can be one of the following: .PP .EX -.ta \w'\f(CWdelete array[expression]'u +.ta \w'\f(CWdelete array[expression]\fR'u .RS .nf .ft CW @@ -145,7 +144,7 @@ The operators are also available in expressions. Variables may be scalars, array elements (denoted -.IB x [ i ] ) +.IB x [ i ] \fR) or fields. Variables are initialized to the null string. Array subscripts may be any string, @@ -161,11 +160,11 @@ The .B print statement prints its arguments on the standard output (or on a file if -.BI > file +.BI > " file or -.BI >> file +.BI >> " file is present or on a pipe if -.BI | cmd +.BI | " cmd is present), separated by the current output field separator, and terminated by the output record separator. .I file @@ -176,9 +175,10 @@ identical string values in different statements denote the same open file. The .B printf -statement formats its expression list according to the format +statement formats its expression list according to the +.I format (see -.IR printf (3)) . +.IR printf (3)). The built-in function .BI close( expr ) closes the file or pipe @@ -189,13 +189,13 @@ flushes any buffered output for the file or pipe .IR expr . .PP The mathematical functions +.BR atan2 , +.BR cos , .BR exp , .BR log , -.BR sqrt , .BR sin , -.BR cos , and -.BR atan2 +.B sqrt are built in. Other built-in functions: .TF length @@ -203,7 +203,8 @@ Other built-in functions: .B length the length of its argument taken as a string, -or of +number of elements in an array for an array argument, +or length of .B $0 if no argument. .TP @@ -218,14 +219,18 @@ and returns the previous seed. .B int truncates to an integer value .TP -.BI substr( s , " m" , " n\fB) +\fBsubstr(\fIs\fB, \fIm\fR [\fB, \fIn\^\fR]\fB)\fR the .IR n -character substring of .I s that begins at position -.IR m +.I m counted from 1. +If no +.IR m , +use the rest of the string +.I .TP .BI index( s , " t" ) the position in @@ -246,14 +251,14 @@ and .B RLENGTH are set to the position and length of the matched string. .TP -.BI split( s , " a" , " fs\fB) +\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIfs\^\fR]\fB)\fR splits the string .I s into array elements -.IB a [1] , -.IB a [2] , +.IB a [1] \fR, +.IB a [2] \fR, \&..., -.IB a [ n ] , +.IB a [ n ] \fR, and returns .IR n . The separation is done with the regular expression @@ -266,7 +271,7 @@ is not given. An empty string as field separator splits the string into one array element per character. .TP -.BI sub( r , " t" , " s\fB) +\fBsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB) substitutes .I t for the first occurrence of the regular expression @@ -279,7 +284,7 @@ is not given, .B $0 is used. .TP -.B gsub +\fBgsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB) same as .B sub except that all occurrences of the regular expression @@ -289,18 +294,28 @@ and .B gsub return the number of replacements. .TP -.BI sprintf( fmt , " expr" , " ...\fB ) +.BI sprintf( fmt , " expr" , " ...\fB) the string resulting from formatting .I expr ... according to the .IR printf (3) format -.I fmt +.IR fmt . .TP .BI system( cmd ) executes .I cmd -and returns its exit status +and returns its exit status. This will be \-1 upon error, +.IR cmd 's +exit status upon a normal exit, +256 + +.I sig +upon death-by-signal, where +.I sig +is the number of the murdering signal, +or 512 + +.I sig +if there was a core dump. .TP .BI tolower( str ) returns a copy of @@ -321,7 +336,7 @@ sets .B $0 to the next input record from the current input file; .B getline -.BI < file +.BI < " file sets .B $0 to the next record from @@ -359,7 +374,7 @@ Isolated regular expressions in a pattern apply to the entire line. Regular expressions may also occur in relational expressions, using the operators -.BR ~ +.B ~ and .BR !~ . .BI / re / @@ -383,8 +398,12 @@ A relational expression is one of the following: .br .BI ( expr , expr,... ") in " array-name .PP -where a relop is any of the six relational operators in C, -and a matchop is either +where a +.I relop +is any of the six relational operators in C, +and a +.I matchop +is either .B ~ (matches) or @@ -405,57 +424,68 @@ and after the last. and .B END do not combine with other patterns. +They may appear multiple times in a program and execute +in the order they are read by +.IR awk . .PP Variable names with special meanings: .TF FILENAME .TP +.B ARGC +argument count, assignable. +.TP +.B ARGV +argument array, assignable; +non-null members are taken as filenames. +.TP .B CONVFMT conversion format used when converting numbers (default -.BR "%.6g" ) +.BR "%.6g" ). +.TP +.B ENVIRON +array of environment variables; subscripts are names. +.TP +.B FILENAME +the name of the current input file. +.TP +.B FNR +ordinal number of the current record in the current file. .TP .B FS regular expression used to separate fields; also settable by option -.BI \-F fs. +.BI \-F fs\fR. .TP .BR NF -number of fields in the current record +number of fields in the current record. .TP .B NR -ordinal number of the current record -.TP -.B FNR -ordinal number of the current record in the current file -.TP -.B FILENAME -the name of the current input file -.TP -.B RS -input record separator (default newline) -.TP -.B OFS -output field separator (default blank) -.TP -.B ORS -output record separator (default newline) +ordinal number of the current record. .TP .B OFMT output format for numbers (default -.BR "%.6g" ) +.BR "%.6g" ). +.TP +.B OFS +output field separator (default space). +.TP +.B ORS +output record separator (default newline). +.TP +.B RLENGTH +the length of a string matched by +.BR match . +.TP +.B RS +input record separator (default newline). +.TP +.B RSTART +the start position of a string matched by +.BR match . .TP .B SUBSEP -separates multiple subscripts (default 034) -.TP -.B ARGC -argument count, assignable -.TP -.B ARGV -argument array, assignable; -non-null members are taken as filenames -.TP -.B ENVIRON -array of environment variables; subscripts are names. +separates multiple subscripts (default 034). .PD .PP Functions may be defined (at the position of a pattern-action statement) thus: @@ -486,7 +516,7 @@ BEGIN { FS = ",[ \et]*|[ \et]+" } .EE .ns .IP -Same, with input fields separated by comma and/or blanks and tabs. +Same, with input fields separated by comma and/or spaces and tabs. .PP .EX .nf @@ -512,13 +542,13 @@ BEGIN { # Simulate echo(1) .fi .EE .SH SEE ALSO +.IR grep (1), .IR lex (1), .IR sed (1) .br A. V. Aho, B. W. Kernighan, P. J. Weinberger, -.I -The AWK Programming Language, -Addison-Wesley, 1988. ISBN 0-201-07981-X +.IR "The AWK Programming Language" , +Addison-Wesley, 1988. ISBN 0-201-07981-X. .SH BUGS There are no explicit conversions between numbers and strings. To force an expression to be treated as a number add 0 to it; @@ -527,3 +557,7 @@ to force it to be treated as a string concatenate .br The scope rules for variables in functions are a botch; the syntax is worse. +.br +POSIX-standard interval expressions in regular expressions are not supported. +.br +Only eight-bit characters sets are handled correctly. diff --git a/awk.h b/awk.h index a36cdb1..70097b9 100644 --- a/awk.h +++ b/awk.h @@ -81,7 +81,8 @@ typedef struct Cell { char *nval; /* name, for variables only */ char *sval; /* string value */ Awkfloat fval; /* value as number */ - int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */ + int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */ + char *fmt; /* CONVFMT/OFMT value used to convert from number */ struct Cell *cnext; /* ptr to next if chained */ } Cell; @@ -109,6 +110,8 @@ extern Cell *rlengthloc; /* RLENGTH */ #define FCN 040 /* this is a function name */ #define FLD 0100 /* this is a field $1, $2, ... */ #define REC 0200 /* this is $0 */ +#define CONVC 0400 /* string was converted from number via CONVFMT */ +#define CONVO 01000 /* string was converted from number via OFMT */ /* function types */ diff --git a/awkgram.y b/awkgram.y index 5b5c461..e4abeed 100644 --- a/awkgram.y +++ b/awkgram.y @@ -86,7 +86,7 @@ Node *arglist = 0; /* list of args for current function */ %left CAT %left '+' '-' %left '*' '/' '%' -%left NOT UMINUS +%left NOT UMINUS UPLUS %right POWER %right DECR INCR %left INDIRECT @@ -357,7 +357,7 @@ term: | term '%' term { $$ = op2(MOD, $1, $3); } | term POWER term { $$ = op2(POWER, $1, $3); } | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } - | '+' term %prec UMINUS { $$ = $2; } + | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); } | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } diff --git a/awktest.tar b/awktest.tar index d1cff8c..959b452 100644 Binary files a/awktest.tar and b/awktest.tar differ diff --git a/b.c b/b.c index 4f78a60..89a7841 100644 --- a/b.c +++ b/b.c @@ -749,7 +749,7 @@ struct charclass { { "alnum", 5, isalnum }, { "alpha", 5, isalpha }, #ifndef HAS_ISBLANK - { "blank", 5, isspace }, /* was isblank */ + { "blank", 5, xisblank }, #else { "blank", 5, isblank }, #endif diff --git a/bugs-fixed/README b/bugs-fixed/README new file mode 100644 index 0000000..222ef68 --- /dev/null +++ b/bugs-fixed/README @@ -0,0 +1,25 @@ +List of bugs fixed. + +1. ofs-rebuild: OFS value used to rebuild the record was incorrect. +Fixed August 19, 2014. Revised fix August 2018. + +2. system-status: Instead of a floating-point division by 256, use +the wait(2) macros to create a reasonable exit status. Fixed March 12, 2016. + +3. space: Use provided xisblank() function instead of ispace() for +matching [[:blank:]]. + +4. a-format: Add POSIX standard %a and %A to supported formats. Check +at runtime that this format is available. + +5. decr-NF: Decrementing NF did not change $0. This is a decades-old +bug. There are interactions with the old and new value of OFS as well. +Most of the fix came from the NetBSD awk. + +6. string-conv: String conversions of scalars were sticky. Once a +conversion to string happened, even with OFMT, that value was used until +a new numeric value was assigned, even if OFMT differed from CONVFMT, +and also if CONVFMT changed. + +7. unary-plus: Unary plus on a string constant returned the string. +Instead, it should convert the value to numeric and give that value. diff --git a/bugs-fixed/a-format.awk b/bugs-fixed/a-format.awk new file mode 100644 index 0000000..5b7929e --- /dev/null +++ b/bugs-fixed/a-format.awk @@ -0,0 +1,3 @@ +BEGIN { + printf("%a\n", 42) +} diff --git a/bugs-fixed/a-format.bad b/bugs-fixed/a-format.bad new file mode 100644 index 0000000..1281825 --- /dev/null +++ b/bugs-fixed/a-format.bad @@ -0,0 +1,3 @@ +nawk: weird printf conversion %a + source line number 2 +%a42 diff --git a/bugs-fixed/a-format.ok b/bugs-fixed/a-format.ok new file mode 100644 index 0000000..e421e2d --- /dev/null +++ b/bugs-fixed/a-format.ok @@ -0,0 +1 @@ +0x1.5p+5 diff --git a/bugs-fixed/decr-NF.awk b/bugs-fixed/decr-NF.awk new file mode 100644 index 0000000..7474991 --- /dev/null +++ b/bugs-fixed/decr-NF.awk @@ -0,0 +1,11 @@ +BEGIN { + $0 = "a b c d e f" + print NF + OFS = ":" + NF-- + print $0 + print NF + NF++ + print $0 + print NF +} diff --git a/bugs-fixed/decr-NF.bad b/bugs-fixed/decr-NF.bad new file mode 100644 index 0000000..b634e06 --- /dev/null +++ b/bugs-fixed/decr-NF.bad @@ -0,0 +1,5 @@ +6 +a b c d e f +5 +a b c d e f +6 diff --git a/bugs-fixed/decr-NF.ok b/bugs-fixed/decr-NF.ok new file mode 100644 index 0000000..3359cf2 --- /dev/null +++ b/bugs-fixed/decr-NF.ok @@ -0,0 +1,5 @@ +6 +a:b:c:d:e +5 +a:b:c:d:e: +6 diff --git a/bugs-fixed/ofs-rebuild.awk b/bugs-fixed/ofs-rebuild.awk new file mode 100644 index 0000000..dd27000 --- /dev/null +++ b/bugs-fixed/ofs-rebuild.awk @@ -0,0 +1,17 @@ +# The bug here is that nawk should use the value of OFS that +# was current when $0 became invalid to rebuild the record. + +BEGIN { + OFS = ":" + $0 = "a b c d e f g" + $3 = "3333" + # Conceptually, $0 should now be "a:b:3333:d:e:f:g" + + # Change OFS after (conceptually) rebuilding the record + OFS = "<>" + + # Unmodifed nawk prints "a<>b<>3333<>d<>e<>f<>g" because + # it delays rebuilding $0 until it's needed, and then it uses + # the current value of OFS. Oops. + print +} diff --git a/bugs-fixed/ofs-rebuild.bad b/bugs-fixed/ofs-rebuild.bad new file mode 100644 index 0000000..7570811 --- /dev/null +++ b/bugs-fixed/ofs-rebuild.bad @@ -0,0 +1 @@ +a<>b<>3333<>d<>e<>f<>g diff --git a/bugs-fixed/ofs-rebuild.ok b/bugs-fixed/ofs-rebuild.ok new file mode 100644 index 0000000..2689218 --- /dev/null +++ b/bugs-fixed/ofs-rebuild.ok @@ -0,0 +1 @@ +a:b:3333:d:e:f:g diff --git a/bugs-fixed/space.awk b/bugs-fixed/space.awk new file mode 100644 index 0000000..6aa87d2 --- /dev/null +++ b/bugs-fixed/space.awk @@ -0,0 +1,22 @@ +BEGIN { + c[" "] = "\" \"" + c["\a"] = "\\a" + c["\b"] = "\\b" + c["\f"] = "\\f" + c["\n"] = "\\n" + c["\r"] = "\\r" + c["\t"] = "\\t" + c["\v"] = "\\v" + + sort = "LC_ALL=C sort" + + for (i in c) + printf("%s %s [[:space:]]\n", c[i], + i ~ /[[:space:]]/ ? "~" : "!~") | sort + + for (i in c) + printf("%s %s [[:blank:]]\n", c[i], + i ~ /[[:blank:]]/ ? "~" : "!~") | sort + + close(sort) +} diff --git a/bugs-fixed/space.bad b/bugs-fixed/space.bad new file mode 100644 index 0000000..f92055f --- /dev/null +++ b/bugs-fixed/space.bad @@ -0,0 +1,16 @@ +" " ~ [[:blank:]] +" " ~ [[:space:]] +\a !~ [[:blank:]] +\a !~ [[:space:]] +\b !~ [[:blank:]] +\b !~ [[:space:]] +\f ~ [[:blank:]] +\f ~ [[:space:]] +\n ~ [[:blank:]] +\n ~ [[:space:]] +\r ~ [[:blank:]] +\r ~ [[:space:]] +\t ~ [[:blank:]] +\t ~ [[:space:]] +\v ~ [[:blank:]] +\v ~ [[:space:]] diff --git a/bugs-fixed/space.ok b/bugs-fixed/space.ok new file mode 100644 index 0000000..4278c5c --- /dev/null +++ b/bugs-fixed/space.ok @@ -0,0 +1,16 @@ +" " ~ [[:blank:]] +" " ~ [[:space:]] +\a !~ [[:blank:]] +\a !~ [[:space:]] +\b !~ [[:blank:]] +\b !~ [[:space:]] +\f !~ [[:blank:]] +\f ~ [[:space:]] +\n !~ [[:blank:]] +\n ~ [[:space:]] +\r !~ [[:blank:]] +\r ~ [[:space:]] +\t ~ [[:blank:]] +\t ~ [[:space:]] +\v !~ [[:blank:]] +\v ~ [[:space:]] diff --git a/bugs-fixed/string-conv.awk b/bugs-fixed/string-conv.awk new file mode 100644 index 0000000..a1f04ab --- /dev/null +++ b/bugs-fixed/string-conv.awk @@ -0,0 +1,13 @@ +BEGIN { + OFMT = ">>%.6g<<" + a = 12.1234 + print "a =", a + b = a "" + print "1 ->", b + CONVFMT = "%2.2f" + b = a "" + print "2 ->", b + CONVFMT = "%.12g" + b = a "" + print "3 ->", b +} diff --git a/bugs-fixed/string-conv.bad b/bugs-fixed/string-conv.bad new file mode 100644 index 0000000..2ab95e8 --- /dev/null +++ b/bugs-fixed/string-conv.bad @@ -0,0 +1,4 @@ +a = >>12.1234<< +1 -> >>12.1234<< +2 -> >>12.1234<< +3 -> >>12.1234<< diff --git a/bugs-fixed/string-conv.ok b/bugs-fixed/string-conv.ok new file mode 100644 index 0000000..7c09711 --- /dev/null +++ b/bugs-fixed/string-conv.ok @@ -0,0 +1,4 @@ +a = >>12.1234<< +1 -> 12.1234 +2 -> 12.12 +3 -> 12.1234 diff --git a/bugs-fixed/system-status.awk b/bugs-fixed/system-status.awk new file mode 100644 index 0000000..8daf563 --- /dev/null +++ b/bugs-fixed/system-status.awk @@ -0,0 +1,19 @@ +# Unmodified nawk prints the 16 bit exit status divided by 256, but +# does so using floating point arithmetic, yielding strange results. +# +# The fix is to use the various macros defined for wait(2) and to +# use the signal number + 256 for death by signal, or signal number + 512 +# for death by signal with core dump. + +BEGIN { + status = system("exit 42") + print "normal status", status + + status = system("kill -HUP $$") + print "death by signal status", status + + status = system("kill -ABRT $$") + print "death by signal with core dump status", status + + system("rm -f core*") +} diff --git a/bugs-fixed/system-status.bad b/bugs-fixed/system-status.bad new file mode 100644 index 0000000..a1317db --- /dev/null +++ b/bugs-fixed/system-status.bad @@ -0,0 +1,3 @@ +normal status 42 +death by signal status 0.00390625 +death by signal with core dump status 0.523438 diff --git a/bugs-fixed/system-status.ok b/bugs-fixed/system-status.ok new file mode 100644 index 0000000..737828f --- /dev/null +++ b/bugs-fixed/system-status.ok @@ -0,0 +1,3 @@ +normal status 42 +death by signal status 257 +death by signal with core dump status 518 diff --git a/bugs-fixed/unary-plus.awk b/bugs-fixed/unary-plus.awk new file mode 100644 index 0000000..ba6185b --- /dev/null +++ b/bugs-fixed/unary-plus.awk @@ -0,0 +1,4 @@ +BEGIN { + print +"q" + print +"43.12345678912345678" +} diff --git a/bugs-fixed/unary-plus.bad b/bugs-fixed/unary-plus.bad new file mode 100644 index 0000000..76f57d5 --- /dev/null +++ b/bugs-fixed/unary-plus.bad @@ -0,0 +1,2 @@ +q +43.12345678912345678 diff --git a/bugs-fixed/unary-plus.ok b/bugs-fixed/unary-plus.ok new file mode 100644 index 0000000..90f97af --- /dev/null +++ b/bugs-fixed/unary-plus.ok @@ -0,0 +1,2 @@ +0 +43.1235 diff --git a/lib.c b/lib.c index 1b6d86e..ba6ebd4 100644 --- a/lib.c +++ b/lib.c @@ -356,6 +356,7 @@ void fldbld(void) /* create fields from current record */ } } setfval(nfloc, (Awkfloat) lastfld); + donerec = 1; /* restore */ if (dbg) { for (j = 0; j <= lastfld; j++) { p = fldtab[j]; @@ -387,6 +388,19 @@ void newfld(int n) /* add field n after end of existing lastfld */ setfval(nfloc, (Awkfloat) n); } +void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */ +{ + if (n > nfields) + growfldtab(n); + + if (lastfld < n) + cleanfld(lastfld+1, n); + else + cleanfld(n+1, lastfld); + + lastfld = n; +} + Cell *fieldadr(int n) /* get nth field */ { if (n < 0) diff --git a/main.c b/main.c index 45b4dbb..d292ad3 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20130105"; +const char *version = "version 20130105 + fixes by arnold@skeeve.com 20180822"; #define DEBUG #include diff --git a/makefile b/makefile index ff54372..8f5906f 100644 --- a/makefile +++ b/makefile @@ -26,13 +26,13 @@ CFLAGS = -g CFLAGS = -O2 CFLAGS = -CC = gcc -Wall -g -Wwrite-strings +#CC = gcc -Wall -g -Wwrite-strings CC = gcc -g -Wall -pedantic -CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing -CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov +#CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing +#CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov YACC = bison -d -y -YACC = yacc -d +# YACC = yacc -d #YFLAGS = -d -S # -S uses sprintf in yacc parser instead of sprint diff --git a/maketab.c b/maketab.c index 31acd75..e23974c 100644 --- a/maketab.c +++ b/maketab.c @@ -62,6 +62,7 @@ struct xx { DIVIDE, "arith", " / " }, { MOD, "arith", " % " }, { UMINUS, "arith", " -" }, + { UPLUS, "arith", " +" }, { POWER, "arith", " **" }, { PREINCR, "incrdecr", "++" }, { POSTINCR, "incrdecr", "++" }, diff --git a/proto.h b/proto.h index 9a657ef..ad6f2e8 100644 --- a/proto.h +++ b/proto.h @@ -124,6 +124,7 @@ extern void setclvar(char *); extern void fldbld(void); extern void cleanfld(int, int); extern void newfld(int); +extern void setlastfld(int); extern int refldbld(const char *, const char *); extern void recbld(void); extern Cell *fieldadr(int); @@ -193,3 +194,5 @@ extern Cell *gsub(Node **, int); extern FILE *popen(const char *, const char *); extern int pclose(FILE *); + +extern const char *flags2str(int flags); diff --git a/run.c b/run.c index d3f9c68..c818a08 100644 --- a/run.c +++ b/run.c @@ -31,6 +31,8 @@ THIS SOFTWARE. #include #include #include +#include +#include #include "awk.h" #include "ytab.h" @@ -323,14 +325,18 @@ Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ { Cell *y; + /* copy is not constant or field */ + y = gettemp(); + y->tval = x->tval & ~(CON|FLD|REC); y->csub = CCOPY; /* prevents freeing until call is over */ y->nval = x->nval; /* BUG? */ - if (isstr(x)) + if (isstr(x) /* || x->ctype == OCELL */) { y->sval = tostring(x->sval); + y->tval &= ~DONTFREE; + } else + y->tval |= DONTFREE; y->fval = x->fval; - y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */ - /* is DONTFREE right? */ return y; } @@ -817,6 +823,17 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co char *buf = *pbuf; int bufsize = *pbufsize; + static int first = 1; + static int have_a_format = 0; + + if (first) { + char buf[100]; + + sprintf(buf, "%a", 42.0); + have_a_format = (strcmp(buf, "0x1.5p+5") == 0); + first = 0; + } + os = s; p = buf; if ((fmt = (char *) malloc(fmtsz)) == NULL) @@ -859,6 +876,12 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); switch (*s) { + case 'a': case 'A': + if (have_a_format) + flag = *s; + else + flag = 'f'; + break; case 'f': case 'e': case 'g': case 'E': case 'G': flag = 'f'; break; @@ -901,6 +924,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co p += strlen(p); sprintf(p, "%s", t); break; + case 'a': + case 'A': case 'f': sprintf(p, fmt, getfval(x)); break; case 'd': sprintf(p, fmt, (long) getfval(x)); break; case 'u': sprintf(p, fmt, (int) getfval(x)); break; @@ -1003,7 +1028,7 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ x = execute(a[0]); i = getfval(x); tempfree(x); - if (n != UMINUS) { + if (n != UMINUS && n != UPLUS) { y = execute(a[1]); j = getfval(y); tempfree(y); @@ -1033,6 +1058,8 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ case UMINUS: i = -i; break; + case UPLUS: /* handled by getfval(), above */ + break; case POWER: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ i = ipow(i, (int) j); @@ -1479,6 +1506,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis Node *nextarg; FILE *fp; void flush_all(void); + int status = 0; t = ptoi(a[0]); x = execute(a[1]); @@ -1515,7 +1543,20 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis break; case FSYSTEM: fflush(stdout); /* in case something is buffered already */ - u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */ + status = system(getsval(x)); + u = status; + if (status != -1) { + if (WIFEXITED(status)) { + u = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + u = WTERMSIG(status) + 256; +#ifdef WCOREDUMP + if (WCOREDUMP(status)) + u += 256; +#endif + } else /* something else?!? */ + u = 0; + } break; case FRAND: /* in principle, rand() returns something in 0..RAND_MAX */ diff --git a/tran.c b/tran.c index a9fa325..06f32fc 100644 --- a/tran.c +++ b/tran.c @@ -67,6 +67,18 @@ Cell *literal0; extern Cell **fldtab; +static void +setfree(Cell *vp) +{ + if (&vp->sval == FS || &vp->sval == RS || + &vp->sval == OFS || &vp->sval == ORS || + &vp->sval == OFMT || &vp->sval == CONVFMT || + &vp->sval == FILENAME || &vp->sval == SUBSEP) + vp->tval |= DONTFREE; + else + vp->tval &= ~DONTFREE; +} + void syminit(void) /* initialize symbol table with builtin vars */ { literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab); @@ -282,6 +294,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ { int fldno; + f += 0.0; /* normalise negative zero to positive zero */ if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "assign to"); if (isfld(vp)) { @@ -290,13 +303,18 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ if (fldno > *NF) newfld(fldno); dprintf( ("setting field %d to %g\n", fldno, f) ); + } else if (&vp->fval == NF) { + donerec = 0; /* mark $0 invalid */ + setlastfld(f); + dprintf( ("setting NF to %g\n", f) ); } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; } if (freeable(vp)) xfree(vp->sval); /* free any previous string */ - vp->tval &= ~STR; /* mark string invalid */ + vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */ + vp->fmt = NULL; vp->tval |= NUM; /* mark number ok */ if (f == -0) /* who would have thought this possible? */ f = 0; @@ -318,6 +336,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ { char *t; int fldno; + Awkfloat f; dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) ); @@ -332,16 +351,28 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + } else if (&vp->sval == OFS) { + if (donerec == 0) + recbld(); } - t = tostring(s); /* in case it's self-assign */ + t = s ? tostring(s) : tostring(""); /* in case it's self-assign */ if (freeable(vp)) xfree(vp->sval); - vp->tval &= ~NUM; + vp->tval &= ~(NUM|CONVC|CONVO); vp->tval |= STR; - vp->tval &= ~DONTFREE; + vp->fmt = NULL; + setfree(vp); dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", (void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) ); - return(vp->sval = t); + vp->sval = t; + if (&vp->fval == NF) { + donerec = 0; /* mark $0 invalid */ + f = getfval(vp); + setlastfld(f); + dprintf( ("setting NF to %g\n", f) ); + } + + return(vp->sval); } Awkfloat getfval(Cell *vp) /* get float val of a Cell */ @@ -373,17 +404,78 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel fldbld(); else if (isrec(vp) && donerec == 0) recbld(); - if (isstr(vp) == 0) { - if (freeable(vp)) - xfree(vp->sval); - if (modf(vp->fval, &dtemp) == 0) /* it's integral */ - sprintf(s, "%.30g", vp->fval); - else - sprintf(s, *fmt, vp->fval); - vp->sval = tostring(s); - vp->tval &= ~DONTFREE; - vp->tval |= STR; + + /* + * ADR: This is complicated and more fragile than is desirable. + * Retrieving a string value for a number associates the string + * value with the scalar. Previously, the string value was + * sticky, meaning if converted via OFMT that became the value + * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT + * changed after a string value was retrieved, the original value + * was maintained and used. Also not per POSIX. + * + * We work around this design by adding two additional flags, + * CONVC and CONVO, indicating how the string value was + * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy + * of the pointer to the xFMT format string used for the + * conversion. This pointer is only read, **never** dereferenced. + * The next time we do a conversion, if it's coming from the same + * xFMT as last time, and the pointer value is different, we + * know that the xFMT format string changed, and we need to + * redo the conversion. If it's the same, we don't have to. + * + * There are also several cases where we don't do a conversion, + * such as for a field (see the checks below). + */ + + /* Don't duplicate the code for actually updating the value */ +#define update_str_val(vp) \ + { \ + if (freeable(vp)) \ + xfree(vp->sval); \ + if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ + sprintf(s, "%.30g", vp->fval); \ + else \ + sprintf(s, *fmt, vp->fval); \ + vp->sval = tostring(s); \ + vp->tval &= ~DONTFREE; \ + vp->tval |= STR; \ } + + if (isstr(vp) == 0) { + update_str_val(vp); + if (fmt == OFMT) { + vp->tval &= ~CONVC; + vp->tval |= CONVO; + } else { + /* CONVFMT */ + vp->tval &= ~CONVO; + vp->tval |= CONVC; + } + vp->fmt = *fmt; + } else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) { + goto done; + } else if (isstr(vp)) { + if (fmt == OFMT) { + if ((vp->tval & CONVC) != 0 + || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) { + update_str_val(vp); + vp->tval &= ~CONVC; + vp->tval |= CONVO; + vp->fmt = *fmt; + } + } else { + /* CONVFMT */ + if ((vp->tval & CONVO) != 0 + || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) { + update_str_val(vp); + vp->tval &= ~CONVO; + vp->tval |= CONVC; + vp->fmt = *fmt; + } + } + } +done: dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", (void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) ); return(vp->sval); @@ -457,3 +549,37 @@ char *qstring(const char *is, int delim) /* collect string up to next delim */ *bp++ = 0; return (char *) buf; } + +const char *flags2str(int flags) +{ + static const struct ftab { + const char *name; + int value; + } flagtab[] = { + { "NUM", NUM }, + { "STR", STR }, + { "DONTFREE", DONTFREE }, + { "CON", CON }, + { "ARR", ARR }, + { "FCN", FCN }, + { "FLD", FLD }, + { "REC", REC }, + { "CONVC", CONVC }, + { "CONVO", CONVO }, + { NULL, 0 } + }; + static char buf[100]; + int i; + char *cp = buf; + + for (i = 0; flagtab[i].name != NULL; i++) { + if ((flags & flagtab[i].value) != 0) { + if (cp > buf) + *cp++ = '|'; + strcpy(cp, flagtab[i].name); + cp += strlen(cp); + } + } + + return buf; +}