Audit col(1), add UTF-8-support

Nothing special here, only renaming of variables and adding the
Rune-utility-functions.
Also, I refactored the manpage.
This commit is contained in:
FRIGN 2015-03-22 21:43:59 +01:00
parent a0e5b565a7
commit 521f324319
3 changed files with 77 additions and 93 deletions

2
README
View File

@ -19,7 +19,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
=*| cksum yes none
=*| cmp yes none
#*| cols non-posix none
col yes none
#*| col non-posix none
=*| comm yes none
=*| cp yes none (-i)
=*| cron non-posix none

50
col.1
View File

@ -1,4 +1,4 @@
.Dd March 6, 2014
.Dd March 22, 2014
.Dt COL 1
.Os sbase
.Sh NAME
@ -9,20 +9,15 @@
.Op Fl bfpx
.Op Fl l Ar num
.Sh DESCRIPTION
The
.Nm
utility filters all the reverse (and half reverse) line feeds,
as they are produced by
filters all reverse (and half reverse) line feeds,
as produced by
.Xr nroff 1
with .2C of
with .2C,
.Xr ms 6
or by
or
.Xr tbl 1 .
.Nm
also replaces spaces by tabs when it is possible.
The control sequences managed by
.Nm
are:
The recognized control sequences are:
.Bl -tag -width Ds
.It ESC-7
Reverse line-feed
@ -42,25 +37,24 @@ Carriage return
New line
.El
.Pp
All the other control codes and escape sequences are removed.
All other control codes and escape sequences are removed.
.Nm
transforms all the spaces into tabulators.
converts all spaces to tabs.
.Sh OPTIONS
.Bl -tag -width Ds
.It Fl p
Print unknown escape sequences to the output.
Print unknown escape sequences.
.It Fl b
Do not print backspaces in output,
and print only the last overstriked character in the output.
Do not print backspaces and instead only print the last
character written to each column position.
.It Fl f
Allow forward half line feeds in the output.
.It Fl x
Do not convert spaces in tabulators.
Do not convert spaces to tabs.
.It Fl l Ar num
Increment to
Buffer
.Ar num
the number of lines buffered for
.Nm
lines in memory.
.El
.Sh SEE ALSO
.Xr nroff 1 ,
@ -68,13 +62,9 @@ the number of lines buffered for
.Xr ms 6
.Sh BUGS
.Nm
only process text with a maximum of 256 lines with 800 bytes per line,
although the number of lines can be modified with the
.Fl l
option.
When the number of lines is bigger,
the buffer is flushed to the output,
so new reverse line feeds can not operate in the flushed lines.
This implementation ignores SI and SO selection character sets,
because it is supposed to work only with UTF-8 strings,
although the UTF-8 support is missed.
only buffers up to 256 lines with up to 800 bytes per line
if the line-number hasn't been set differently with the
.Op Fl l
flag.
When the number of lines is bigger, the buffer is flushed and
reverse line feeds can not operate on the flushed lines.

118
col.c
View File

@ -1,47 +1,47 @@
/* See LICENSE file for copyright and license details. */
#include <limits.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "utf.h"
#include "util.h"
#define NLINES 256
#define NCOLS 800
static char **buff;
static Rune **buf;
static int obackspace, onotabs, ohalfline, oescape;
static unsigned nline, ncol, nchar, nspaces, maxline, bs;
static size_t pagsize = NLINES;
static int backspace, notabs, halfline, escape;
static size_t nline, ncol, nchar, nspaces, maxline, bs, pagesize = NLINES;
static void
flush(void)
{
int c;
unsigned i, j;
Rune c;
size_t i, j;
for (i = 0; i < maxline; ++i) {
for (j = 0; j < NCOLS && (c = buff[i][j]) != '\0'; ++j)
putchar(c);
for (j = 0; j < NCOLS && (c = buf[i][j]); ++j)
efputrune(&c, stdout, "<stdout>");
putchar('\n');
}
bs = nchar = nline = ncol = 0;
}
static void
forward(unsigned n)
forward(size_t n)
{
unsigned lim;
size_t lim;
for (lim = ncol + n; ncol != lim && nchar < NCOLS-1; ++nchar) {
switch (buff[nline][nchar]) {
for (lim = ncol + n; ncol != lim && nchar < NCOLS - 1; ++nchar) {
switch (buf[nline][nchar]) {
case '\b':
--ncol;
break;
case '\0':
buff[nline][nchar] = ' ';
buf[nline][nchar] = ' ';
/* FALLTHROUGH */
default:
++ncol;
@ -53,31 +53,30 @@ forward(unsigned n)
static void
linefeed(int up, int rcarriage)
{
unsigned oncol = ncol;
size_t oncol = ncol;
nspaces = 0;
if (up > 0) {
if (nline == pagsize-1) {
if (nline == pagesize - 1) {
flush();
} else {
if (++nline > maxline)
maxline = nline;
}
} else {
if (nline > 0)
--nline;
} else if (nline > 0) {
--nline;
}
bs = 0;
if (rcarriage) {
forward(oncol);
nchar = ncol = 0;
nchar = ncol = 0;
}
}
static void
newchar(int c)
newchar(Rune c)
{
char *cp;
Rune *cp;
forward(nspaces);
nspaces = 0;
@ -90,7 +89,7 @@ newchar(int c)
nchar = ncol = 0;
break;
case '\t':
forward(8 - ncol%8);
forward(8 - ncol % 8);
break;
case '\b':
if (ncol > 0)
@ -100,20 +99,18 @@ newchar(int c)
bs = 1;
break;
default:
cp = &buff[nline][nchar];
if (*cp != '\0' && *cp != ' ' && bs && !obackspace) {
if (nchar != NCOLS-3) {
memmove(cp + 3, cp + 1, NCOLS - nchar - 2);
cp[1] = '\b';
nchar += 2;
}
cp = &buf[nline][nchar];
if (*cp && *cp != ' ' && bs && !backspace && nchar != NCOLS - 3) {
memmove(cp + 3, cp + 1, (NCOLS - nchar - 2) * sizeof(*cp));
cp[1] = '\b';
nchar += 2;
}
if (nchar != NCOLS-1) {
for (cp = buff[nline]; cp < &buff[nline][nchar]; ++cp) {
if (nchar != NCOLS - 1) {
for (cp = buf[nline]; cp < &buf[nline][nchar]; ++cp) {
if (*cp == '\0')
*cp = ' ';
}
buff[nline][nchar++] = c;
buf[nline][nchar++] = c;
++ncol;
}
bs = 0;
@ -123,50 +120,52 @@ newchar(int c)
static void
col(void)
{
int c;
Rune r;
int ret;
while ((c = getchar()) != EOF) {
switch (c) {
while (efgetrune(&r, stdin, "<stdin>")) {
switch (r) {
case '\x1b':
switch (c = getchar()) {
ret = efgetrune(&r, stdin, "<stdin>");
switch (r) {
case '8': /* reverse half-line-feed */
case '7': /* reverse line-feed */
linefeed(-1, 0);
continue;
case '9': /* forward half-line-feed */
if (ohalfline)
if (halfline)
break;
linefeed(1, 0);
continue;
}
if (!oescape)
if (!escape)
continue;
newchar('\x1b');
if (c != EOF)
newchar(c);
if (ret)
newchar(r);
break;
case '\v':
linefeed(-1, 0);
break;
case ' ':
if (!onotabs) {
if (!notabs) {
if (++nspaces != 8)
continue;
c = '\t';
r = '\t';
nspaces = 0;
}
/* FALLTHROUGH */
case '\r':
case '\b':
case '\t':
newchar(c);
newchar(r);
break;
case '\n':
linefeed(1, 1);
break;
default:
if (!iscntrl(c))
newchar(c);
if (!iscntrlrune(r))
newchar(r);
break;
}
}
@ -175,17 +174,17 @@ col(void)
static void
allocbuf(void)
{
char **bp;
Rune **bp;
buff = ereallocarray(NULL, pagsize, sizeof(*buff));
for (bp = buff; bp < &buff[pagsize]; ++bp)
*bp = emalloc(NCOLS);
buf = ereallocarray(NULL, pagesize, sizeof(*buf));
for (bp = buf; bp < buf + pagesize; ++bp)
*bp = ereallocarray(NULL, NCOLS, sizeof(**buf));
}
static void
usage(void)
{
enprintf(2, "usage: %s [-p][-l num][-b][-f][-x]\n", argv0);
enprintf(2, "usage: %s [-pbfx] [-l num]\n", argv0);
}
int
@ -193,35 +192,30 @@ main(int argc, char *argv[])
{
ARGBEGIN {
case 'b':
obackspace = 1;
backspace = 1;
break;
case 'f':
ohalfline = 1;
halfline = 1;
break;
case 'l':
pagsize = estrtonum(EARGF(usage()), 0, SIZE_MAX);
pagesize = estrtonum(EARGF(usage()), 0, MIN(SIZE_MAX, LLONG_MAX));
break;
case 'p':
oescape = 1;
escape = 1;
break;
case 'x':
onotabs = 1;
notabs = 1;
break;
default:
usage();
} ARGEND;
if (argc > 0)
if (argc)
usage();
allocbuf();
col();
flush();
if (ferror(stdin))
enprintf(1, "error reading input");
if (ferror(stdout))
enprintf(2, "error writing output");
return 0;
}