Add even stricter UTF-8-support to wc(1)

using readrune() and iswspace().
musl for instance doesn't differentiate between iswspace() and
isspace(), but when it does, the code will be ready.
It goes without saying that GNU coreutils don't use iswspace()[0].

[0]: http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c
This commit is contained in:
FRIGN 2015-02-01 04:06:06 +01:00
parent 696bb992c3
commit 986a9de51a
1 changed files with 8 additions and 6 deletions

14
wc.c
View File

@ -3,7 +3,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <wctype.h>
#include "utf.h"
#include "util.h"
static int lflag = 0;
@ -30,16 +32,16 @@ output(const char *str, size_t nc, size_t nl, size_t nw)
void
wc(FILE *fp, const char *str)
{
int word = 0;
int c;
int word = 0, read;
Rune c;
size_t nc = 0, nl = 0, nw = 0;
while ((c = getc(fp)) != EOF) {
if (cmode != 'm' || UTF8_POINT(c))
nc++;
while ((read = readrune(str, fp, &c))) {
nc += (cmode == 'c') ? read :
(c != Runeerror) ? 1 : 0;
if (c == '\n')
nl++;
if (!isspace(c))
if (!iswspace(c))
word = 1;
else if (word) {
word = 0;