Finish up wc(1)

Use size_t for all counts, fix the manpage and refactor the code.
Here's yet another place where GNU coreutils fail:

sbase:
$ echo "GNU/Turd sucks" | wc -cm
    15

coreutils:
$ echo "GNU/Turd sucks" | wc -cm
     15      15

Take a bloody guess which behaviour is correct[0].

[0]: http://pubs.opengroup.org/onlinepubs/009604499/utilities/wc.html
This commit is contained in:
FRIGN 2015-02-01 03:01:11 +01:00
parent d75cc2e556
commit 8ab096d2a4
3 changed files with 66 additions and 68 deletions

2
README
View File

@ -82,7 +82,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
= sha1sum non-posix none = sha1sum non-posix none
= sha256sum non-posix none = sha256sum non-posix none
= sha512sum non-posix none = sha512sum non-posix none
wc yes none #* wc yes none
= xargs no -I, -L, -p, -s, -t, -x = xargs no -I, -L, -p, -s, -t, -x
=* yes non-posix none =* yes non-posix none

35
wc.1
View File

@ -1,4 +1,4 @@
.Dd January 30, 2015 .Dd February 1, 2015
.Dt WC 1 .Dt WC 1
.Os sbase .Os sbase
.Sh NAME .Sh NAME
@ -6,26 +6,27 @@
.Nd word count .Nd word count
.Sh SYNOPSIS .Sh SYNOPSIS
.Nm .Nm
.Op Fl clmw .Op Fl c | Fl m
.Op Fl lw
.Op Ar file ... .Op Ar file ...
.Sh DESCRIPTION .Sh DESCRIPTION
.Nm .Nm
prints the number of lines, words, and bytes in each file. If any flags are prints the number of lines, words and bytes in each
given, .Ar file ,
unless set differently with flags.
If no
.Ar file
is given
.Nm .Nm
will print only the requested information. If no reads from stdin.
.Ar files
are given,
.Nm
reads stdin.
.Sh OPTIONS .Sh OPTIONS
.Bl -tag -width Ds .Bl -tag -width Ds
.It Fl c .It Fl c | Fl l | Fl m | Fl w
print the number of bytes. Print the number of bytes | lines | characters | words.
.It Fl l
print the number of lines.
.It Fl m
print the number of characters, not bytes.
.It Fl w
print the number of words.
.El .El
.Sh STANDARDS
The
.Nm
utility is compliant with the
.St -p1003.1-2008
specification.

97
wc.c
View File

@ -6,18 +6,58 @@
#include "util.h" #include "util.h"
static void output(const char *, long, long, long); static int lflag = 0;
static void wc(FILE *, const char *); static int wflag = 0;
static char cmode = 0;
static size_t tc = 0, tl = 0, tw = 0;
static int lflag = 0; void
static int wflag = 0; output(const char *str, size_t nc, size_t nl, size_t nw)
static char cmode = 0; {
static long tc = 0, tl = 0, tw = 0; int noflags = !cmode && !lflag && !wflag;
if (lflag || noflags)
printf(" %5zu", nl);
if (wflag || noflags)
printf(" %5zu", nw);
if (cmode || noflags)
printf(" %5zu", nc);
if (str)
printf(" %s", str);
putchar('\n');
}
void
wc(FILE *fp, const char *str)
{
int word = 0;
int c;
size_t nc = 0, nl = 0, nw = 0;
while ((c = getc(fp)) != EOF) {
if (cmode != 'm' || UTF8_POINT(c))
nc++;
if (c == '\n')
nl++;
if (!isspace(c))
word = 1;
else if (word) {
word = 0;
nw++;
}
}
if (word)
nw++;
tc += nc;
tl += nl;
tw += nw;
output(str, nc, nl, nw);
}
static void static void
usage(void) usage(void)
{ {
eprintf("usage: %s [-clmw] [files...]\n", argv0); eprintf("usage: %s [-c | -m] [-lw] [file ...]\n", argv0);
} }
int int
@ -59,46 +99,3 @@ main(int argc, char *argv[])
} }
return 0; return 0;
} }
void
output(const char *str, long nc, long nl, long nw)
{
int noflags = !cmode && !lflag && !wflag;
if (lflag || noflags)
printf(" %5ld", nl);
if (wflag || noflags)
printf(" %5ld", nw);
if (cmode || noflags)
printf(" %5ld", nc);
if (str)
printf(" %s", str);
putchar('\n');
}
void
wc(FILE *fp, const char *str)
{
int word = 0;
int c;
long nc = 0, nl = 0, nw = 0;
while ((c = getc(fp)) != EOF) {
if (cmode != 'm' || UTF8_POINT(c))
nc++;
if (c == '\n')
nl++;
if (!isspace(c))
word = 1;
else if (word) {
word = 0;
nw++;
}
}
if (word)
nw++;
tc += nc;
tl += nl;
tw += nw;
output(str, nc, nl, nw);
}