Audit col(1), add UTF-8-support
Nothing special here, only renaming of variables and adding the Rune-utility-functions. Also, I refactored the manpage.
This commit is contained in:
parent
a0e5b565a7
commit
521f324319
2
README
2
README
@ -19,7 +19,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
|
|||||||
=*| cksum yes none
|
=*| cksum yes none
|
||||||
=*| cmp yes none
|
=*| cmp yes none
|
||||||
#*| cols non-posix none
|
#*| cols non-posix none
|
||||||
col yes none
|
#*| col non-posix none
|
||||||
=*| comm yes none
|
=*| comm yes none
|
||||||
=*| cp yes none (-i)
|
=*| cp yes none (-i)
|
||||||
=*| cron non-posix none
|
=*| cron non-posix none
|
||||||
|
50
col.1
50
col.1
@ -1,4 +1,4 @@
|
|||||||
.Dd March 6, 2014
|
.Dd March 22, 2014
|
||||||
.Dt COL 1
|
.Dt COL 1
|
||||||
.Os sbase
|
.Os sbase
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
@ -9,20 +9,15 @@
|
|||||||
.Op Fl bfpx
|
.Op Fl bfpx
|
||||||
.Op Fl l Ar num
|
.Op Fl l Ar num
|
||||||
.Sh DESCRIPTION
|
.Sh DESCRIPTION
|
||||||
The
|
|
||||||
.Nm
|
.Nm
|
||||||
utility filters all the reverse (and half reverse) line feeds,
|
filters all reverse (and half reverse) line feeds,
|
||||||
as they are produced by
|
as produced by
|
||||||
.Xr nroff 1
|
.Xr nroff 1
|
||||||
with .2C of
|
with .2C,
|
||||||
.Xr ms 6
|
.Xr ms 6
|
||||||
or by
|
or
|
||||||
.Xr tbl 1 .
|
.Xr tbl 1 .
|
||||||
.Nm
|
The recognized control sequences are:
|
||||||
also replaces spaces by tabs when it is possible.
|
|
||||||
The control sequences managed by
|
|
||||||
.Nm
|
|
||||||
are:
|
|
||||||
.Bl -tag -width Ds
|
.Bl -tag -width Ds
|
||||||
.It ESC-7
|
.It ESC-7
|
||||||
Reverse line-feed
|
Reverse line-feed
|
||||||
@ -42,25 +37,24 @@ Carriage return
|
|||||||
New line
|
New line
|
||||||
.El
|
.El
|
||||||
.Pp
|
.Pp
|
||||||
All the other control codes and escape sequences are removed.
|
All other control codes and escape sequences are removed.
|
||||||
.Nm
|
.Nm
|
||||||
transforms all the spaces into tabulators.
|
converts all spaces to tabs.
|
||||||
.Sh OPTIONS
|
.Sh OPTIONS
|
||||||
.Bl -tag -width Ds
|
.Bl -tag -width Ds
|
||||||
.It Fl p
|
.It Fl p
|
||||||
Print unknown escape sequences to the output.
|
Print unknown escape sequences.
|
||||||
.It Fl b
|
.It Fl b
|
||||||
Do not print backspaces in output,
|
Do not print backspaces and instead only print the last
|
||||||
and print only the last overstriked character in the output.
|
character written to each column position.
|
||||||
.It Fl f
|
.It Fl f
|
||||||
Allow forward half line feeds in the output.
|
Allow forward half line feeds in the output.
|
||||||
.It Fl x
|
.It Fl x
|
||||||
Do not convert spaces in tabulators.
|
Do not convert spaces to tabs.
|
||||||
.It Fl l Ar num
|
.It Fl l Ar num
|
||||||
Increment to
|
Buffer
|
||||||
.Ar num
|
.Ar num
|
||||||
the number of lines buffered for
|
lines in memory.
|
||||||
.Nm
|
|
||||||
.El
|
.El
|
||||||
.Sh SEE ALSO
|
.Sh SEE ALSO
|
||||||
.Xr nroff 1 ,
|
.Xr nroff 1 ,
|
||||||
@ -68,13 +62,9 @@ the number of lines buffered for
|
|||||||
.Xr ms 6
|
.Xr ms 6
|
||||||
.Sh BUGS
|
.Sh BUGS
|
||||||
.Nm
|
.Nm
|
||||||
only process text with a maximum of 256 lines with 800 bytes per line,
|
only buffers up to 256 lines with up to 800 bytes per line
|
||||||
although the number of lines can be modified with the
|
if the line-number hasn't been set differently with the
|
||||||
.Fl l
|
.Op Fl l
|
||||||
option.
|
flag.
|
||||||
When the number of lines is bigger,
|
When the number of lines is bigger, the buffer is flushed and
|
||||||
the buffer is flushed to the output,
|
reverse line feeds can not operate on the flushed lines.
|
||||||
so new reverse line feeds can not operate in the flushed lines.
|
|
||||||
This implementation ignores SI and SO selection character sets,
|
|
||||||
because it is supposed to work only with UTF-8 strings,
|
|
||||||
although the UTF-8 support is missed.
|
|
||||||
|
118
col.c
118
col.c
@ -1,47 +1,47 @@
|
|||||||
/* See LICENSE file for copyright and license details. */
|
/* See LICENSE file for copyright and license details. */
|
||||||
|
#include <limits.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
|
#include "utf.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
#define NLINES 256
|
#define NLINES 256
|
||||||
#define NCOLS 800
|
#define NCOLS 800
|
||||||
|
|
||||||
static char **buff;
|
static Rune **buf;
|
||||||
|
|
||||||
static int obackspace, onotabs, ohalfline, oescape;
|
static int backspace, notabs, halfline, escape;
|
||||||
static unsigned nline, ncol, nchar, nspaces, maxline, bs;
|
static size_t nline, ncol, nchar, nspaces, maxline, bs, pagesize = NLINES;
|
||||||
static size_t pagsize = NLINES;
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
flush(void)
|
flush(void)
|
||||||
{
|
{
|
||||||
int c;
|
Rune c;
|
||||||
unsigned i, j;
|
size_t i, j;
|
||||||
|
|
||||||
for (i = 0; i < maxline; ++i) {
|
for (i = 0; i < maxline; ++i) {
|
||||||
for (j = 0; j < NCOLS && (c = buff[i][j]) != '\0'; ++j)
|
for (j = 0; j < NCOLS && (c = buf[i][j]); ++j)
|
||||||
putchar(c);
|
efputrune(&c, stdout, "<stdout>");
|
||||||
putchar('\n');
|
putchar('\n');
|
||||||
}
|
}
|
||||||
bs = nchar = nline = ncol = 0;
|
bs = nchar = nline = ncol = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
forward(unsigned n)
|
forward(size_t n)
|
||||||
{
|
{
|
||||||
unsigned lim;
|
size_t lim;
|
||||||
|
|
||||||
for (lim = ncol + n; ncol != lim && nchar < NCOLS-1; ++nchar) {
|
for (lim = ncol + n; ncol != lim && nchar < NCOLS - 1; ++nchar) {
|
||||||
switch (buff[nline][nchar]) {
|
switch (buf[nline][nchar]) {
|
||||||
case '\b':
|
case '\b':
|
||||||
--ncol;
|
--ncol;
|
||||||
break;
|
break;
|
||||||
case '\0':
|
case '\0':
|
||||||
buff[nline][nchar] = ' ';
|
buf[nline][nchar] = ' ';
|
||||||
/* FALLTHROUGH */
|
/* FALLTHROUGH */
|
||||||
default:
|
default:
|
||||||
++ncol;
|
++ncol;
|
||||||
@ -53,31 +53,30 @@ forward(unsigned n)
|
|||||||
static void
|
static void
|
||||||
linefeed(int up, int rcarriage)
|
linefeed(int up, int rcarriage)
|
||||||
{
|
{
|
||||||
unsigned oncol = ncol;
|
size_t oncol = ncol;
|
||||||
|
|
||||||
nspaces = 0;
|
nspaces = 0;
|
||||||
if (up > 0) {
|
if (up > 0) {
|
||||||
if (nline == pagsize-1) {
|
if (nline == pagesize - 1) {
|
||||||
flush();
|
flush();
|
||||||
} else {
|
} else {
|
||||||
if (++nline > maxline)
|
if (++nline > maxline)
|
||||||
maxline = nline;
|
maxline = nline;
|
||||||
}
|
}
|
||||||
} else {
|
} else if (nline > 0) {
|
||||||
if (nline > 0)
|
--nline;
|
||||||
--nline;
|
|
||||||
}
|
}
|
||||||
bs = 0;
|
bs = 0;
|
||||||
if (rcarriage) {
|
if (rcarriage) {
|
||||||
forward(oncol);
|
forward(oncol);
|
||||||
nchar = ncol = 0;
|
nchar = ncol = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
newchar(int c)
|
newchar(Rune c)
|
||||||
{
|
{
|
||||||
char *cp;
|
Rune *cp;
|
||||||
|
|
||||||
forward(nspaces);
|
forward(nspaces);
|
||||||
nspaces = 0;
|
nspaces = 0;
|
||||||
@ -90,7 +89,7 @@ newchar(int c)
|
|||||||
nchar = ncol = 0;
|
nchar = ncol = 0;
|
||||||
break;
|
break;
|
||||||
case '\t':
|
case '\t':
|
||||||
forward(8 - ncol%8);
|
forward(8 - ncol % 8);
|
||||||
break;
|
break;
|
||||||
case '\b':
|
case '\b':
|
||||||
if (ncol > 0)
|
if (ncol > 0)
|
||||||
@ -100,20 +99,18 @@ newchar(int c)
|
|||||||
bs = 1;
|
bs = 1;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
cp = &buff[nline][nchar];
|
cp = &buf[nline][nchar];
|
||||||
if (*cp != '\0' && *cp != ' ' && bs && !obackspace) {
|
if (*cp && *cp != ' ' && bs && !backspace && nchar != NCOLS - 3) {
|
||||||
if (nchar != NCOLS-3) {
|
memmove(cp + 3, cp + 1, (NCOLS - nchar - 2) * sizeof(*cp));
|
||||||
memmove(cp + 3, cp + 1, NCOLS - nchar - 2);
|
cp[1] = '\b';
|
||||||
cp[1] = '\b';
|
nchar += 2;
|
||||||
nchar += 2;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (nchar != NCOLS-1) {
|
if (nchar != NCOLS - 1) {
|
||||||
for (cp = buff[nline]; cp < &buff[nline][nchar]; ++cp) {
|
for (cp = buf[nline]; cp < &buf[nline][nchar]; ++cp) {
|
||||||
if (*cp == '\0')
|
if (*cp == '\0')
|
||||||
*cp = ' ';
|
*cp = ' ';
|
||||||
}
|
}
|
||||||
buff[nline][nchar++] = c;
|
buf[nline][nchar++] = c;
|
||||||
++ncol;
|
++ncol;
|
||||||
}
|
}
|
||||||
bs = 0;
|
bs = 0;
|
||||||
@ -123,50 +120,52 @@ newchar(int c)
|
|||||||
static void
|
static void
|
||||||
col(void)
|
col(void)
|
||||||
{
|
{
|
||||||
int c;
|
Rune r;
|
||||||
|
int ret;
|
||||||
|
|
||||||
while ((c = getchar()) != EOF) {
|
while (efgetrune(&r, stdin, "<stdin>")) {
|
||||||
switch (c) {
|
switch (r) {
|
||||||
case '\x1b':
|
case '\x1b':
|
||||||
switch (c = getchar()) {
|
ret = efgetrune(&r, stdin, "<stdin>");
|
||||||
|
switch (r) {
|
||||||
case '8': /* reverse half-line-feed */
|
case '8': /* reverse half-line-feed */
|
||||||
case '7': /* reverse line-feed */
|
case '7': /* reverse line-feed */
|
||||||
linefeed(-1, 0);
|
linefeed(-1, 0);
|
||||||
continue;
|
continue;
|
||||||
case '9': /* forward half-line-feed */
|
case '9': /* forward half-line-feed */
|
||||||
if (ohalfline)
|
if (halfline)
|
||||||
break;
|
break;
|
||||||
linefeed(1, 0);
|
linefeed(1, 0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!oescape)
|
if (!escape)
|
||||||
continue;
|
continue;
|
||||||
newchar('\x1b');
|
newchar('\x1b');
|
||||||
if (c != EOF)
|
if (ret)
|
||||||
newchar(c);
|
newchar(r);
|
||||||
break;
|
break;
|
||||||
case '\v':
|
case '\v':
|
||||||
linefeed(-1, 0);
|
linefeed(-1, 0);
|
||||||
break;
|
break;
|
||||||
case ' ':
|
case ' ':
|
||||||
if (!onotabs) {
|
if (!notabs) {
|
||||||
if (++nspaces != 8)
|
if (++nspaces != 8)
|
||||||
continue;
|
continue;
|
||||||
c = '\t';
|
r = '\t';
|
||||||
nspaces = 0;
|
nspaces = 0;
|
||||||
}
|
}
|
||||||
/* FALLTHROUGH */
|
/* FALLTHROUGH */
|
||||||
case '\r':
|
case '\r':
|
||||||
case '\b':
|
case '\b':
|
||||||
case '\t':
|
case '\t':
|
||||||
newchar(c);
|
newchar(r);
|
||||||
break;
|
break;
|
||||||
case '\n':
|
case '\n':
|
||||||
linefeed(1, 1);
|
linefeed(1, 1);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (!iscntrl(c))
|
if (!iscntrlrune(r))
|
||||||
newchar(c);
|
newchar(r);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -175,17 +174,17 @@ col(void)
|
|||||||
static void
|
static void
|
||||||
allocbuf(void)
|
allocbuf(void)
|
||||||
{
|
{
|
||||||
char **bp;
|
Rune **bp;
|
||||||
|
|
||||||
buff = ereallocarray(NULL, pagsize, sizeof(*buff));
|
buf = ereallocarray(NULL, pagesize, sizeof(*buf));
|
||||||
for (bp = buff; bp < &buff[pagsize]; ++bp)
|
for (bp = buf; bp < buf + pagesize; ++bp)
|
||||||
*bp = emalloc(NCOLS);
|
*bp = ereallocarray(NULL, NCOLS, sizeof(**buf));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
usage(void)
|
usage(void)
|
||||||
{
|
{
|
||||||
enprintf(2, "usage: %s [-p][-l num][-b][-f][-x]\n", argv0);
|
enprintf(2, "usage: %s [-pbfx] [-l num]\n", argv0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
@ -193,35 +192,30 @@ main(int argc, char *argv[])
|
|||||||
{
|
{
|
||||||
ARGBEGIN {
|
ARGBEGIN {
|
||||||
case 'b':
|
case 'b':
|
||||||
obackspace = 1;
|
backspace = 1;
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
ohalfline = 1;
|
halfline = 1;
|
||||||
break;
|
break;
|
||||||
case 'l':
|
case 'l':
|
||||||
pagsize = estrtonum(EARGF(usage()), 0, SIZE_MAX);
|
pagesize = estrtonum(EARGF(usage()), 0, MIN(SIZE_MAX, LLONG_MAX));
|
||||||
break;
|
break;
|
||||||
case 'p':
|
case 'p':
|
||||||
oescape = 1;
|
escape = 1;
|
||||||
break;
|
break;
|
||||||
case 'x':
|
case 'x':
|
||||||
onotabs = 1;
|
notabs = 1;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
usage();
|
usage();
|
||||||
} ARGEND;
|
} ARGEND;
|
||||||
|
|
||||||
if (argc > 0)
|
if (argc)
|
||||||
usage();
|
usage();
|
||||||
|
|
||||||
allocbuf();
|
allocbuf();
|
||||||
col();
|
col();
|
||||||
flush();
|
flush();
|
||||||
|
|
||||||
if (ferror(stdin))
|
|
||||||
enprintf(1, "error reading input");
|
|
||||||
if (ferror(stdout))
|
|
||||||
enprintf(2, "error writing output");
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user