From c879fbf013b5314c27fa236c987ea56a521420e6 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Fri, 8 Nov 2019 14:40:18 +0200 Subject: [PATCH] From Ori Bernstein, ori@eigenstate.org, for FS="" in multibyte locale. --- FIXES | 6 ++++++ lib.c | 12 ++++++++---- main.c | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/FIXES b/FIXES index e22ee22..c7757f9 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,12 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +November 8, 2019: + Fix from Ori Bernstein to get UTF-8 characters instead of + bytes when FS = "". This is currently the only bit of + the One True Awk that understands multibyte characters. + From Arnold Robbins, apply some cleanups in the test suite. + October 25, 2019: More fixes and cleanups from NetBSD, courtesy of Christos Zoulas. Merges PRs 54 and 55. diff --git a/lib.c b/lib.c index b4a0d89..0fa90e4 100644 --- a/lib.c +++ b/lib.c @@ -332,15 +332,19 @@ void fldbld(void) /* create fields from current record */ } *fr = 0; } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */ - for (i = 0; *r != 0; r++) { - char buf[2]; + for (i = 0; *r != '\0'; r += n) { + char buf[MB_CUR_MAX + 1]; + i++; if (i > nfields) growfldtab(i); if (freeable(fldtab[i])) xfree(fldtab[i]->sval); - buf[0] = *r; - buf[1] = 0; + n = mblen(r, MB_CUR_MAX); + if (n < 0) + n = 1; + memcpy(buf, r, n); + buf[n] = '\0'; fldtab[i]->sval = tostring(buf); fldtab[i]->tval = FLD | STR; } diff --git a/main.c b/main.c index 8b14d27..255ded7 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20191025"; +const char *version = "version 20191108"; #define DEBUG #include