Audit col(1), add UTF-8-support

Nothing special here, only renaming of variables and adding the
Rune-utility-functions.
Also, I refactored the manpage.
This commit is contained in:
FRIGN 2015-03-22 21:43:59 +01:00
parent a0e5b565a7
commit 521f324319
3 changed files with 77 additions and 93 deletions

2
README
View File

@ -19,7 +19,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
=*| cksum yes none =*| cksum yes none
=*| cmp yes none =*| cmp yes none
#*| cols non-posix none #*| cols non-posix none
col yes none #*| col non-posix none
=*| comm yes none =*| comm yes none
=*| cp yes none (-i) =*| cp yes none (-i)
=*| cron non-posix none =*| cron non-posix none

50
col.1
View File

@ -1,4 +1,4 @@
.Dd March 6, 2014 .Dd March 22, 2014
.Dt COL 1 .Dt COL 1
.Os sbase .Os sbase
.Sh NAME .Sh NAME
@ -9,20 +9,15 @@
.Op Fl bfpx .Op Fl bfpx
.Op Fl l Ar num .Op Fl l Ar num
.Sh DESCRIPTION .Sh DESCRIPTION
The
.Nm .Nm
utility filters all the reverse (and half reverse) line feeds, filters all reverse (and half reverse) line feeds,
as they are produced by as produced by
.Xr nroff 1 .Xr nroff 1
with .2C of with .2C,
.Xr ms 6 .Xr ms 6
or by or
.Xr tbl 1 . .Xr tbl 1 .
.Nm The recognized control sequences are:
also replaces spaces by tabs when it is possible.
The control sequences managed by
.Nm
are:
.Bl -tag -width Ds .Bl -tag -width Ds
.It ESC-7 .It ESC-7
Reverse line-feed Reverse line-feed
@ -42,25 +37,24 @@ Carriage return
New line New line
.El .El
.Pp .Pp
All the other control codes and escape sequences are removed. All other control codes and escape sequences are removed.
.Nm .Nm
transforms all the spaces into tabulators. converts all spaces to tabs.
.Sh OPTIONS .Sh OPTIONS
.Bl -tag -width Ds .Bl -tag -width Ds
.It Fl p .It Fl p
Print unknown escape sequences to the output. Print unknown escape sequences.
.It Fl b .It Fl b
Do not print backspaces in output, Do not print backspaces and instead only print the last
and print only the last overstriked character in the output. character written to each column position.
.It Fl f .It Fl f
Allow forward half line feeds in the output. Allow forward half line feeds in the output.
.It Fl x .It Fl x
Do not convert spaces in tabulators. Do not convert spaces to tabs.
.It Fl l Ar num .It Fl l Ar num
Increment to Buffer
.Ar num .Ar num
the number of lines buffered for lines in memory.
.Nm
.El .El
.Sh SEE ALSO .Sh SEE ALSO
.Xr nroff 1 , .Xr nroff 1 ,
@ -68,13 +62,9 @@ the number of lines buffered for
.Xr ms 6 .Xr ms 6
.Sh BUGS .Sh BUGS
.Nm .Nm
only process text with a maximum of 256 lines with 800 bytes per line, only buffers up to 256 lines with up to 800 bytes per line
although the number of lines can be modified with the if the line-number hasn't been set differently with the
.Fl l .Op Fl l
option. flag.
When the number of lines is bigger, When the number of lines is bigger, the buffer is flushed and
the buffer is flushed to the output, reverse line feeds can not operate on the flushed lines.
so new reverse line feeds can not operate in the flushed lines.
This implementation ignores SI and SO selection character sets,
because it is supposed to work only with UTF-8 strings,
although the UTF-8 support is missed.

118
col.c
View File

@ -1,47 +1,47 @@
/* See LICENSE file for copyright and license details. */ /* See LICENSE file for copyright and license details. */
#include <limits.h>
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <ctype.h>
#include "utf.h"
#include "util.h" #include "util.h"
#define NLINES 256 #define NLINES 256
#define NCOLS 800 #define NCOLS 800
static char **buff; static Rune **buf;
static int obackspace, onotabs, ohalfline, oescape; static int backspace, notabs, halfline, escape;
static unsigned nline, ncol, nchar, nspaces, maxline, bs; static size_t nline, ncol, nchar, nspaces, maxline, bs, pagesize = NLINES;
static size_t pagsize = NLINES;
static void static void
flush(void) flush(void)
{ {
int c; Rune c;
unsigned i, j; size_t i, j;
for (i = 0; i < maxline; ++i) { for (i = 0; i < maxline; ++i) {
for (j = 0; j < NCOLS && (c = buff[i][j]) != '\0'; ++j) for (j = 0; j < NCOLS && (c = buf[i][j]); ++j)
putchar(c); efputrune(&c, stdout, "<stdout>");
putchar('\n'); putchar('\n');
} }
bs = nchar = nline = ncol = 0; bs = nchar = nline = ncol = 0;
} }
static void static void
forward(unsigned n) forward(size_t n)
{ {
unsigned lim; size_t lim;
for (lim = ncol + n; ncol != lim && nchar < NCOLS-1; ++nchar) { for (lim = ncol + n; ncol != lim && nchar < NCOLS - 1; ++nchar) {
switch (buff[nline][nchar]) { switch (buf[nline][nchar]) {
case '\b': case '\b':
--ncol; --ncol;
break; break;
case '\0': case '\0':
buff[nline][nchar] = ' '; buf[nline][nchar] = ' ';
/* FALLTHROUGH */ /* FALLTHROUGH */
default: default:
++ncol; ++ncol;
@ -53,31 +53,30 @@ forward(unsigned n)
static void static void
linefeed(int up, int rcarriage) linefeed(int up, int rcarriage)
{ {
unsigned oncol = ncol; size_t oncol = ncol;
nspaces = 0; nspaces = 0;
if (up > 0) { if (up > 0) {
if (nline == pagsize-1) { if (nline == pagesize - 1) {
flush(); flush();
} else { } else {
if (++nline > maxline) if (++nline > maxline)
maxline = nline; maxline = nline;
} }
} else { } else if (nline > 0) {
if (nline > 0) --nline;
--nline;
} }
bs = 0; bs = 0;
if (rcarriage) { if (rcarriage) {
forward(oncol); forward(oncol);
nchar = ncol = 0; nchar = ncol = 0;
} }
} }
static void static void
newchar(int c) newchar(Rune c)
{ {
char *cp; Rune *cp;
forward(nspaces); forward(nspaces);
nspaces = 0; nspaces = 0;
@ -90,7 +89,7 @@ newchar(int c)
nchar = ncol = 0; nchar = ncol = 0;
break; break;
case '\t': case '\t':
forward(8 - ncol%8); forward(8 - ncol % 8);
break; break;
case '\b': case '\b':
if (ncol > 0) if (ncol > 0)
@ -100,20 +99,18 @@ newchar(int c)
bs = 1; bs = 1;
break; break;
default: default:
cp = &buff[nline][nchar]; cp = &buf[nline][nchar];
if (*cp != '\0' && *cp != ' ' && bs && !obackspace) { if (*cp && *cp != ' ' && bs && !backspace && nchar != NCOLS - 3) {
if (nchar != NCOLS-3) { memmove(cp + 3, cp + 1, (NCOLS - nchar - 2) * sizeof(*cp));
memmove(cp + 3, cp + 1, NCOLS - nchar - 2); cp[1] = '\b';
cp[1] = '\b'; nchar += 2;
nchar += 2;
}
} }
if (nchar != NCOLS-1) { if (nchar != NCOLS - 1) {
for (cp = buff[nline]; cp < &buff[nline][nchar]; ++cp) { for (cp = buf[nline]; cp < &buf[nline][nchar]; ++cp) {
if (*cp == '\0') if (*cp == '\0')
*cp = ' '; *cp = ' ';
} }
buff[nline][nchar++] = c; buf[nline][nchar++] = c;
++ncol; ++ncol;
} }
bs = 0; bs = 0;
@ -123,50 +120,52 @@ newchar(int c)
static void static void
col(void) col(void)
{ {
int c; Rune r;
int ret;
while ((c = getchar()) != EOF) { while (efgetrune(&r, stdin, "<stdin>")) {
switch (c) { switch (r) {
case '\x1b': case '\x1b':
switch (c = getchar()) { ret = efgetrune(&r, stdin, "<stdin>");
switch (r) {
case '8': /* reverse half-line-feed */ case '8': /* reverse half-line-feed */
case '7': /* reverse line-feed */ case '7': /* reverse line-feed */
linefeed(-1, 0); linefeed(-1, 0);
continue; continue;
case '9': /* forward half-line-feed */ case '9': /* forward half-line-feed */
if (ohalfline) if (halfline)
break; break;
linefeed(1, 0); linefeed(1, 0);
continue; continue;
} }
if (!oescape) if (!escape)
continue; continue;
newchar('\x1b'); newchar('\x1b');
if (c != EOF) if (ret)
newchar(c); newchar(r);
break; break;
case '\v': case '\v':
linefeed(-1, 0); linefeed(-1, 0);
break; break;
case ' ': case ' ':
if (!onotabs) { if (!notabs) {
if (++nspaces != 8) if (++nspaces != 8)
continue; continue;
c = '\t'; r = '\t';
nspaces = 0; nspaces = 0;
} }
/* FALLTHROUGH */ /* FALLTHROUGH */
case '\r': case '\r':
case '\b': case '\b':
case '\t': case '\t':
newchar(c); newchar(r);
break; break;
case '\n': case '\n':
linefeed(1, 1); linefeed(1, 1);
break; break;
default: default:
if (!iscntrl(c)) if (!iscntrlrune(r))
newchar(c); newchar(r);
break; break;
} }
} }
@ -175,17 +174,17 @@ col(void)
static void static void
allocbuf(void) allocbuf(void)
{ {
char **bp; Rune **bp;
buff = ereallocarray(NULL, pagsize, sizeof(*buff)); buf = ereallocarray(NULL, pagesize, sizeof(*buf));
for (bp = buff; bp < &buff[pagsize]; ++bp) for (bp = buf; bp < buf + pagesize; ++bp)
*bp = emalloc(NCOLS); *bp = ereallocarray(NULL, NCOLS, sizeof(**buf));
} }
static void static void
usage(void) usage(void)
{ {
enprintf(2, "usage: %s [-p][-l num][-b][-f][-x]\n", argv0); enprintf(2, "usage: %s [-pbfx] [-l num]\n", argv0);
} }
int int
@ -193,35 +192,30 @@ main(int argc, char *argv[])
{ {
ARGBEGIN { ARGBEGIN {
case 'b': case 'b':
obackspace = 1; backspace = 1;
break; break;
case 'f': case 'f':
ohalfline = 1; halfline = 1;
break; break;
case 'l': case 'l':
pagsize = estrtonum(EARGF(usage()), 0, SIZE_MAX); pagesize = estrtonum(EARGF(usage()), 0, MIN(SIZE_MAX, LLONG_MAX));
break; break;
case 'p': case 'p':
oescape = 1; escape = 1;
break; break;
case 'x': case 'x':
onotabs = 1; notabs = 1;
break; break;
default: default:
usage(); usage();
} ARGEND; } ARGEND;
if (argc > 0) if (argc)
usage(); usage();
allocbuf(); allocbuf();
col(); col();
flush(); flush();
if (ferror(stdin))
enprintf(1, "error reading input");
if (ferror(stdout))
enprintf(2, "error writing output");
return 0; return 0;
} }