From 2f0b15201d182f25dcb75891fc43190760d7529a Mon Sep 17 00:00:00 2001 From: Richard Ipsum Date: Wed, 15 Apr 2020 17:46:39 +0100 Subject: [PATCH] paste: Support -d '\0' POSIX specifies that -d '\0' sets the delimiter to an empty string. --- libutf/utf.c | 13 +++++++++++++ libutf/utftorunestr.c | 16 +++++++++++++++- paste.c | 29 ++++++++++++++++------------- utf.h | 2 ++ 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/libutf/utf.c b/libutf/utf.c index 897c5ef..492e020 100644 --- a/libutf/utf.c +++ b/libutf/utf.c @@ -62,6 +62,19 @@ utfnlen(const char *s, size_t len) return i; } +size_t +utfmemlen(const char *s, size_t len) +{ + const char *p = s; + size_t i; + Rune r; + int n; + + for(i = 0; (n = charntorune(&r, p, len-(p-s))); i++) + p += n; + return i; +} + char * utfrune(const char *s, Rune r) { diff --git a/libutf/utftorunestr.c b/libutf/utftorunestr.c index ae3ad50..e182bc1 100644 --- a/libutf/utftorunestr.c +++ b/libutf/utftorunestr.c @@ -4,10 +4,24 @@ size_t utftorunestr(const char *str, Rune *r) { - size_t i, n; + size_t i; + int n; for (i = 0; (n = chartorune(&r[i], str)) && r[i]; i++) str += n; return i; } + +size_t +utfntorunestr(const char *str, size_t len, Rune *r) +{ + size_t i; + int n; + const char *end = str + len; + + for (i = 0; (n = charntorune(&r[i], str, end - str)); i++) + str += n; + + return i; +} diff --git a/paste.c b/paste.c index b0ac761..0385efa 100644 --- a/paste.c +++ b/paste.c @@ -52,8 +52,10 @@ nextline: c = 0; for (; efgetrune(&c, dsc[i].fp, dsc[i].name) ;) { - for (m = last + 1; m < i; m++) - efputrune(&(delim[m % delimlen]), stdout, ""); + for (m = last + 1; m < i; m++) { + if (delim[m % delimlen] != '\0') + efputrune(&(delim[m % delimlen]), stdout, ""); + } last = i; if (c == '\n') { if (i != fdescrlen - 1) @@ -67,7 +69,7 @@ nextline: if (c == 0 && last != -1) { if (i == fdescrlen - 1) putchar('\n'); - else + else if (d != '\0') efputrune(&d, stdout, ""); last++; } @@ -86,18 +88,18 @@ int main(int argc, char *argv[]) { struct fdescr *dsc; - Rune *delim; - size_t delimlen, i; + Rune *delim_rune = NULL; + size_t delim_runelen, i, delim_bytelen = 1; int seq = 0, ret = 0; - char *adelim = "\t"; + char *delim = "\t"; ARGBEGIN { case 's': seq = 1; break; case 'd': - adelim = EARGF(usage()); - unescape(adelim); + delim = EARGF(usage()); + delim_bytelen = unescape(delim); break; default: usage(); @@ -107,10 +109,11 @@ main(int argc, char *argv[]) usage(); /* populate delimiters */ - /* TODO: fix libutf to accept sizes */ - delim = ereallocarray(NULL, utflen(adelim) + 1, sizeof(*delim)); - if (!(delimlen = utftorunestr(adelim, delim))) + delim_rune = ereallocarray(NULL, + utfmemlen(delim, delim_bytelen) + 1, sizeof(*delim_rune)); + if (!(delim_runelen = utfntorunestr(delim, delim_bytelen, delim_rune))) { usage(); + } /* populate file list */ dsc = ereallocarray(NULL, argc, sizeof(*dsc)); @@ -126,9 +129,9 @@ main(int argc, char *argv[]) } if (seq) { - sequential(dsc, argc, delim, delimlen); + sequential(dsc, argc, delim_rune, delim_runelen); } else { - parallel(dsc, argc, delim, delimlen); + parallel(dsc, argc, delim_rune, delim_runelen); } for (i = 0; i < argc; i++) diff --git a/utf.h b/utf.h index 18a41da..8e0707a 100644 --- a/utf.h +++ b/utf.h @@ -38,6 +38,7 @@ int fullrune(const char *, size_t); char *utfecpy(char *, char *, const char *); size_t utflen(const char *); size_t utfnlen(const char *, size_t); +size_t utfmemlen(const char *, size_t); char *utfrune(const char *, Rune); char *utfrrune(const char *, Rune); char *utfutf(const char *, const char *); @@ -60,6 +61,7 @@ Rune tolowerrune(Rune); Rune toupperrune(Rune); size_t utftorunestr(const char *, Rune *); +size_t utfntorunestr(const char *, size_t, Rune *); int fgetrune(Rune *, FILE *); int efgetrune(Rune *, FILE *, const char *);