Audit col(1), add UTF-8-support
Nothing special here, only renaming of variables and adding the Rune-utility-functions. Also, I refactored the manpage.
This commit is contained in:
parent
a0e5b565a7
commit
521f324319
2
README
2
README
@ -19,7 +19,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
|
||||
=*| cksum yes none
|
||||
=*| cmp yes none
|
||||
#*| cols non-posix none
|
||||
col yes none
|
||||
#*| col non-posix none
|
||||
=*| comm yes none
|
||||
=*| cp yes none (-i)
|
||||
=*| cron non-posix none
|
||||
|
50
col.1
50
col.1
@ -1,4 +1,4 @@
|
||||
.Dd March 6, 2014
|
||||
.Dd March 22, 2014
|
||||
.Dt COL 1
|
||||
.Os sbase
|
||||
.Sh NAME
|
||||
@ -9,20 +9,15 @@
|
||||
.Op Fl bfpx
|
||||
.Op Fl l Ar num
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm
|
||||
utility filters all the reverse (and half reverse) line feeds,
|
||||
as they are produced by
|
||||
filters all reverse (and half reverse) line feeds,
|
||||
as produced by
|
||||
.Xr nroff 1
|
||||
with .2C of
|
||||
with .2C,
|
||||
.Xr ms 6
|
||||
or by
|
||||
or
|
||||
.Xr tbl 1 .
|
||||
.Nm
|
||||
also replaces spaces by tabs when it is possible.
|
||||
The control sequences managed by
|
||||
.Nm
|
||||
are:
|
||||
The recognized control sequences are:
|
||||
.Bl -tag -width Ds
|
||||
.It ESC-7
|
||||
Reverse line-feed
|
||||
@ -42,25 +37,24 @@ Carriage return
|
||||
New line
|
||||
.El
|
||||
.Pp
|
||||
All the other control codes and escape sequences are removed.
|
||||
All other control codes and escape sequences are removed.
|
||||
.Nm
|
||||
transforms all the spaces into tabulators.
|
||||
converts all spaces to tabs.
|
||||
.Sh OPTIONS
|
||||
.Bl -tag -width Ds
|
||||
.It Fl p
|
||||
Print unknown escape sequences to the output.
|
||||
Print unknown escape sequences.
|
||||
.It Fl b
|
||||
Do not print backspaces in output,
|
||||
and print only the last overstriked character in the output.
|
||||
Do not print backspaces and instead only print the last
|
||||
character written to each column position.
|
||||
.It Fl f
|
||||
Allow forward half line feeds in the output.
|
||||
.It Fl x
|
||||
Do not convert spaces in tabulators.
|
||||
Do not convert spaces to tabs.
|
||||
.It Fl l Ar num
|
||||
Increment to
|
||||
Buffer
|
||||
.Ar num
|
||||
the number of lines buffered for
|
||||
.Nm
|
||||
lines in memory.
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr nroff 1 ,
|
||||
@ -68,13 +62,9 @@ the number of lines buffered for
|
||||
.Xr ms 6
|
||||
.Sh BUGS
|
||||
.Nm
|
||||
only process text with a maximum of 256 lines with 800 bytes per line,
|
||||
although the number of lines can be modified with the
|
||||
.Fl l
|
||||
option.
|
||||
When the number of lines is bigger,
|
||||
the buffer is flushed to the output,
|
||||
so new reverse line feeds can not operate in the flushed lines.
|
||||
This implementation ignores SI and SO selection character sets,
|
||||
because it is supposed to work only with UTF-8 strings,
|
||||
although the UTF-8 support is missed.
|
||||
only buffers up to 256 lines with up to 800 bytes per line
|
||||
if the line-number hasn't been set differently with the
|
||||
.Op Fl l
|
||||
flag.
|
||||
When the number of lines is bigger, the buffer is flushed and
|
||||
reverse line feeds can not operate on the flushed lines.
|
||||
|
118
col.c
118
col.c
@ -1,47 +1,47 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "utf.h"
|
||||
#include "util.h"
|
||||
|
||||
#define NLINES 256
|
||||
#define NCOLS 800
|
||||
|
||||
static char **buff;
|
||||
static Rune **buf;
|
||||
|
||||
static int obackspace, onotabs, ohalfline, oescape;
|
||||
static unsigned nline, ncol, nchar, nspaces, maxline, bs;
|
||||
static size_t pagsize = NLINES;
|
||||
static int backspace, notabs, halfline, escape;
|
||||
static size_t nline, ncol, nchar, nspaces, maxline, bs, pagesize = NLINES;
|
||||
|
||||
static void
|
||||
flush(void)
|
||||
{
|
||||
int c;
|
||||
unsigned i, j;
|
||||
Rune c;
|
||||
size_t i, j;
|
||||
|
||||
for (i = 0; i < maxline; ++i) {
|
||||
for (j = 0; j < NCOLS && (c = buff[i][j]) != '\0'; ++j)
|
||||
putchar(c);
|
||||
for (j = 0; j < NCOLS && (c = buf[i][j]); ++j)
|
||||
efputrune(&c, stdout, "<stdout>");
|
||||
putchar('\n');
|
||||
}
|
||||
bs = nchar = nline = ncol = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
forward(unsigned n)
|
||||
forward(size_t n)
|
||||
{
|
||||
unsigned lim;
|
||||
size_t lim;
|
||||
|
||||
for (lim = ncol + n; ncol != lim && nchar < NCOLS-1; ++nchar) {
|
||||
switch (buff[nline][nchar]) {
|
||||
for (lim = ncol + n; ncol != lim && nchar < NCOLS - 1; ++nchar) {
|
||||
switch (buf[nline][nchar]) {
|
||||
case '\b':
|
||||
--ncol;
|
||||
break;
|
||||
case '\0':
|
||||
buff[nline][nchar] = ' ';
|
||||
buf[nline][nchar] = ' ';
|
||||
/* FALLTHROUGH */
|
||||
default:
|
||||
++ncol;
|
||||
@ -53,31 +53,30 @@ forward(unsigned n)
|
||||
static void
|
||||
linefeed(int up, int rcarriage)
|
||||
{
|
||||
unsigned oncol = ncol;
|
||||
size_t oncol = ncol;
|
||||
|
||||
nspaces = 0;
|
||||
if (up > 0) {
|
||||
if (nline == pagsize-1) {
|
||||
if (nline == pagesize - 1) {
|
||||
flush();
|
||||
} else {
|
||||
if (++nline > maxline)
|
||||
maxline = nline;
|
||||
}
|
||||
} else {
|
||||
if (nline > 0)
|
||||
--nline;
|
||||
} else if (nline > 0) {
|
||||
--nline;
|
||||
}
|
||||
bs = 0;
|
||||
if (rcarriage) {
|
||||
forward(oncol);
|
||||
nchar = ncol = 0;
|
||||
nchar = ncol = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
newchar(int c)
|
||||
newchar(Rune c)
|
||||
{
|
||||
char *cp;
|
||||
Rune *cp;
|
||||
|
||||
forward(nspaces);
|
||||
nspaces = 0;
|
||||
@ -90,7 +89,7 @@ newchar(int c)
|
||||
nchar = ncol = 0;
|
||||
break;
|
||||
case '\t':
|
||||
forward(8 - ncol%8);
|
||||
forward(8 - ncol % 8);
|
||||
break;
|
||||
case '\b':
|
||||
if (ncol > 0)
|
||||
@ -100,20 +99,18 @@ newchar(int c)
|
||||
bs = 1;
|
||||
break;
|
||||
default:
|
||||
cp = &buff[nline][nchar];
|
||||
if (*cp != '\0' && *cp != ' ' && bs && !obackspace) {
|
||||
if (nchar != NCOLS-3) {
|
||||
memmove(cp + 3, cp + 1, NCOLS - nchar - 2);
|
||||
cp[1] = '\b';
|
||||
nchar += 2;
|
||||
}
|
||||
cp = &buf[nline][nchar];
|
||||
if (*cp && *cp != ' ' && bs && !backspace && nchar != NCOLS - 3) {
|
||||
memmove(cp + 3, cp + 1, (NCOLS - nchar - 2) * sizeof(*cp));
|
||||
cp[1] = '\b';
|
||||
nchar += 2;
|
||||
}
|
||||
if (nchar != NCOLS-1) {
|
||||
for (cp = buff[nline]; cp < &buff[nline][nchar]; ++cp) {
|
||||
if (nchar != NCOLS - 1) {
|
||||
for (cp = buf[nline]; cp < &buf[nline][nchar]; ++cp) {
|
||||
if (*cp == '\0')
|
||||
*cp = ' ';
|
||||
}
|
||||
buff[nline][nchar++] = c;
|
||||
buf[nline][nchar++] = c;
|
||||
++ncol;
|
||||
}
|
||||
bs = 0;
|
||||
@ -123,50 +120,52 @@ newchar(int c)
|
||||
static void
|
||||
col(void)
|
||||
{
|
||||
int c;
|
||||
Rune r;
|
||||
int ret;
|
||||
|
||||
while ((c = getchar()) != EOF) {
|
||||
switch (c) {
|
||||
while (efgetrune(&r, stdin, "<stdin>")) {
|
||||
switch (r) {
|
||||
case '\x1b':
|
||||
switch (c = getchar()) {
|
||||
ret = efgetrune(&r, stdin, "<stdin>");
|
||||
switch (r) {
|
||||
case '8': /* reverse half-line-feed */
|
||||
case '7': /* reverse line-feed */
|
||||
linefeed(-1, 0);
|
||||
continue;
|
||||
case '9': /* forward half-line-feed */
|
||||
if (ohalfline)
|
||||
if (halfline)
|
||||
break;
|
||||
linefeed(1, 0);
|
||||
continue;
|
||||
}
|
||||
if (!oescape)
|
||||
if (!escape)
|
||||
continue;
|
||||
newchar('\x1b');
|
||||
if (c != EOF)
|
||||
newchar(c);
|
||||
if (ret)
|
||||
newchar(r);
|
||||
break;
|
||||
case '\v':
|
||||
linefeed(-1, 0);
|
||||
break;
|
||||
case ' ':
|
||||
if (!onotabs) {
|
||||
if (!notabs) {
|
||||
if (++nspaces != 8)
|
||||
continue;
|
||||
c = '\t';
|
||||
r = '\t';
|
||||
nspaces = 0;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
case '\r':
|
||||
case '\b':
|
||||
case '\t':
|
||||
newchar(c);
|
||||
newchar(r);
|
||||
break;
|
||||
case '\n':
|
||||
linefeed(1, 1);
|
||||
break;
|
||||
default:
|
||||
if (!iscntrl(c))
|
||||
newchar(c);
|
||||
if (!iscntrlrune(r))
|
||||
newchar(r);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -175,17 +174,17 @@ col(void)
|
||||
static void
|
||||
allocbuf(void)
|
||||
{
|
||||
char **bp;
|
||||
Rune **bp;
|
||||
|
||||
buff = ereallocarray(NULL, pagsize, sizeof(*buff));
|
||||
for (bp = buff; bp < &buff[pagsize]; ++bp)
|
||||
*bp = emalloc(NCOLS);
|
||||
buf = ereallocarray(NULL, pagesize, sizeof(*buf));
|
||||
for (bp = buf; bp < buf + pagesize; ++bp)
|
||||
*bp = ereallocarray(NULL, NCOLS, sizeof(**buf));
|
||||
}
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
enprintf(2, "usage: %s [-p][-l num][-b][-f][-x]\n", argv0);
|
||||
enprintf(2, "usage: %s [-pbfx] [-l num]\n", argv0);
|
||||
}
|
||||
|
||||
int
|
||||
@ -193,35 +192,30 @@ main(int argc, char *argv[])
|
||||
{
|
||||
ARGBEGIN {
|
||||
case 'b':
|
||||
obackspace = 1;
|
||||
backspace = 1;
|
||||
break;
|
||||
case 'f':
|
||||
ohalfline = 1;
|
||||
halfline = 1;
|
||||
break;
|
||||
case 'l':
|
||||
pagsize = estrtonum(EARGF(usage()), 0, SIZE_MAX);
|
||||
pagesize = estrtonum(EARGF(usage()), 0, MIN(SIZE_MAX, LLONG_MAX));
|
||||
break;
|
||||
case 'p':
|
||||
oescape = 1;
|
||||
escape = 1;
|
||||
break;
|
||||
case 'x':
|
||||
onotabs = 1;
|
||||
notabs = 1;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
} ARGEND;
|
||||
|
||||
if (argc > 0)
|
||||
if (argc)
|
||||
usage();
|
||||
|
||||
allocbuf();
|
||||
col();
|
||||
flush();
|
||||
|
||||
if (ferror(stdin))
|
||||
enprintf(1, "error reading input");
|
||||
if (ferror(stdout))
|
||||
enprintf(2, "error writing output");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user