From 986a9de51a77e7f6803e1b2259ec0675762077db Mon Sep 17 00:00:00 2001 From: FRIGN Date: Sun, 1 Feb 2015 04:06:06 +0100 Subject: [PATCH] Add even stricter UTF-8-support to wc(1) using readrune() and iswspace(). musl for instance doesn't differentiate between iswspace() and isspace(), but when it does, the code will be ready. It goes without saying that GNU coreutils don't use iswspace()[0]. [0]: http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c --- wc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/wc.c b/wc.c index f283e1b..6af23d4 100644 --- a/wc.c +++ b/wc.c @@ -3,7 +3,9 @@ #include #include #include +#include +#include "utf.h" #include "util.h" static int lflag = 0; @@ -30,16 +32,16 @@ output(const char *str, size_t nc, size_t nl, size_t nw) void wc(FILE *fp, const char *str) { - int word = 0; - int c; + int word = 0, read; + Rune c; size_t nc = 0, nl = 0, nw = 0; - while ((c = getc(fp)) != EOF) { - if (cmode != 'm' || UTF8_POINT(c)) - nc++; + while ((read = readrune(str, fp, &c))) { + nc += (cmode == 'c') ? read : + (c != Runeerror) ? 1 : 0; if (c == '\n') nl++; - if (!isspace(c)) + if (!iswspace(c)) word = 1; else if (word) { word = 0;