From Ori Bernstein, ori@eigenstate.org, for FS="" in multibyte locale.

This commit is contained in:
Arnold D. Robbins 2019-11-08 14:40:18 +02:00
parent 0e1bebcc09
commit c879fbf013
3 changed files with 15 additions and 5 deletions

6
FIXES
View File

@ -25,6 +25,12 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
November 8, 2019:
Fix from Ori Bernstein to get UTF-8 characters instead of
bytes when FS = "". This is currently the only bit of
the One True Awk that understands multibyte characters.
From Arnold Robbins, apply some cleanups in the test suite.
October 25, 2019:
More fixes and cleanups from NetBSD, courtesy of Christos
Zoulas. Merges PRs 54 and 55.

12
lib.c
View File

@ -332,15 +332,19 @@ void fldbld(void) /* create fields from current record */
}
*fr = 0;
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
for (i = 0; *r != 0; r++) {
char buf[2];
for (i = 0; *r != '\0'; r += n) {
char buf[MB_CUR_MAX + 1];
i++;
if (i > nfields)
growfldtab(i);
if (freeable(fldtab[i]))
xfree(fldtab[i]->sval);
buf[0] = *r;
buf[1] = 0;
n = mblen(r, MB_CUR_MAX);
if (n < 0)
n = 1;
memcpy(buf, r, n);
buf[n] = '\0';
fldtab[i]->sval = tostring(buf);
fldtab[i]->tval = FLD | STR;
}

2
main.c
View File

@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
const char *version = "version 20191025";
const char *version = "version 20191108";
#define DEBUG
#include <stdio.h>