Add tablist support and a mandoc-manpage to expand(1)

and mark it as finished in the README.

This is another example showing how broken the GNU coreutils are:

$ echo -e "äää\tüüü\tööö" | gnu-expand -t "5,10,20"
äää    üüü    ööö
$ echo -e "äää\tüüü\tööö" | sbase-expand -t "5,10,20"
äää  üüü  ööö

This is due to the fact that they are still not UTF8-aware and
actually see "ä" as two single characters, expanding the "äää" with
4 spaces to a tab of length 10.
The correct way however is to expand the "äää" with 2 spaces to a
tab of length 5.
One can only imagine how this silently breaks a lot of code around
the world.
WHAT WERE THEY THINKING?
This commit is contained in:
FRIGN 2015-01-25 14:31:02 +01:00
parent 48bf88851a
commit 692c11bf2b
3 changed files with 143 additions and 77 deletions

2
README
View File

@ -28,7 +28,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
= du no -H, -L, (-x) = du no -H, -L, (-x)
=* echo yes none =* echo yes none
=* env yes none =* env yes none
# expand yes none #* expand yes none
expr yes none expr yes none
=* false yes none =* false yes none
fold yes none fold yes none

View File

@ -1,25 +1,50 @@
.TH EXPAND 1 sbase\-VERSION .Dd January 25, 2015
.SH NAME .Dt EXPAND 1 sbase\-VERSION
expand \- expand tabs to spaces .Sh NAME
.SH SYNOPSIS .Nm expand
.B expand .Nd expand tabs to spaces
.RB [ \-t .Sh SYNOPSIS
.IR n ] .Nm expand
.RI [ file ...] .Op Fl i
.SH DESCRIPTION .Op Fl t Ar tablist
expand processes the named files or the standard input, writing the .Op Ar file ...
standard output with tabs changed into spaces. Backspace characters .Sh DESCRIPTION
are preserved into the output and decrement the column count for tab .Nm
calculations. converts tabs to spaces in each
.SH OPTIONS .Ar file
.TP as specified in
.BI \-i .Ar tablist .
Only change tabs to spaces at the start of lines. If no file is given,
.TP .Nm
.BI \-t " n" reads from stdin.
Expand tabs to .Pp
.I n Backspace characters are preserved and decrement the column count
spaces. We currently support only a single numerical argument. for tab calculations.
.SH SEE ALSO .Sh OPTIONS
.IR unexpand (1), .Bl -tag -width Ds
.IR fold (1) .It Fl i
Only expand tabs at the beginning of lines, i.e. expand each
line until a character different from '\et' and ' ' is reached.
.It Fl t Ar tablist
Specify tab size or tabstops.
.Ar tablist
is a list of one (in the former case) or multiple (in the latter case)
strictly positive integers separated by ' ' or ','.
.Pp
The default
.Ar tablist
is "8".
.El
.Sh SEE ALSO
.Xr unexpand 1 ,
.Xr fold 1
.Sh STANDARDS
The
.Nm
utility is compliant with the
.St -p1003.1-2008
specification.
.Pp
The
.Op Fl i
flag is an extension to that specification

143
expand.c
View File

@ -1,89 +1,86 @@
/* See LICENSE file for copyright and license details. */ /* See LICENSE file for copyright and license details. */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include "utf.h" #include "utf.h"
#include "util.h" #include "util.h"
static int expand(const char *, FILE *, int); static int iflag = 0;
static size_t *tablist = NULL;
static size_t tablistlen = 0;
static int iflag = 0; static size_t
parselist(const char *s, size_t slen)
static void
usage(void)
{ {
eprintf("usage: %s [-i] [-t n] [file...]\n", argv0); size_t i, m, len;
} char *sep;
int if (s[0] == ',' || s[0] == ' ')
main(int argc, char *argv[]) eprintf("expand: tablist can't begin with a ',' or ' '.\n");
{ if (s[slen - 1] == ',' || s[slen - 1] == ' ')
FILE *fp; eprintf("expand: tablist can't end with a ',' or ' '.\n");
int tabstop = 8;
int ret = 0;
ARGBEGIN { len = 1;
case 'i': for (i = 0; i < slen; i++) {
iflag = 1; if (s[i] == ',' || s[i] == ' ') {
break; if (i > 0 && (s[i - 1] == ',' || s[i - 1] == ' '))
case 't': eprintf("expand: empty field in tablist.\n");
tabstop = estrtol(EARGF(usage()), 0); len++;
if (!tabstop)
eprintf("tab size cannot be zero\n");
break;
default:
usage();
} ARGEND;
if (argc == 0) {
expand("<stdin>", stdin, tabstop);
} else {
for (; argc > 0; argc--, argv++) {
if (!(fp = fopen(argv[0], "r"))) {
weprintf("fopen %s:", argv[0]);
ret = 1;
continue;
}
expand(argv[0], fp, tabstop);
fclose(fp);
} }
} }
return ret; tablist = emalloc((len + 1) * sizeof(size_t));
m = 0;
for (i = 0; i < slen; i += sep - (s + i) + 1) {
tablist[m++] = strtol(s + i, &sep, 0);
if (tablist[m - 1] == 0)
eprintf("expand: tab size can't be zero.\n");
if (*sep && *sep != ',' && *sep != ' ')
eprintf("expand: invalid number in tablist.\n");
if (m > 1 && tablist[m - 1] < tablist[m - 2])
eprintf("expand: tablist must be ascending.\n");
}
/* tab length = 1 for the overflowing case later in the matcher */
tablist[len] = 1;
return len;
} }
static int static int
expand(const char *file, FILE *fp, int tabstop) expand(const char *file, FILE *fp)
{ {
int col = 0; size_t bol = 1, col = 0, i;
Rune r; Rune r;
int bol = 1;
for (;;) {
if (!readrune(file, fp, &r))
break;
while (readrune(file, fp, &r)) {
switch (r) { switch (r) {
case '\t': case '\t':
if (tablistlen == 1)
i = 0;
else for (i = 0; i < tablistlen; i++)
if (col < tablist[i])
break;
if (bol || !iflag) { if (bol || !iflag) {
do { do {
col++; col++;
putchar(' '); putchar(' ');
} while (col % tabstop); } while (col % tablist[i]);
} else { } else {
putchar('\t'); putchar('\t');
col += tabstop - col % tabstop; col = tablist[i];
} }
break; break;
case '\b': case '\b':
bol = 0;
if (col) if (col)
col--; col--;
bol = 0; putchar('\b');
writerune("<stdout>", stdout, &r);
break; break;
case '\n': case '\n':
col = 0;
bol = 1; bol = 1;
writerune("<stdout>", stdout, &r); col = 0;
putchar('\n');
break; break;
default: default:
col++; col++;
@ -96,3 +93,47 @@ expand(const char *file, FILE *fp, int tabstop)
return 0; return 0;
} }
static void
usage(void)
{
eprintf("usage: %s [-i] [-t tablist] [file ...]\n", argv0);
}
int
main(int argc, char *argv[])
{
FILE *fp;
char *tl = "8";
int ret = 0;
ARGBEGIN {
case 'i':
iflag = 1;
break;
case 't':
tl = EARGF(usage());
if (!*tl)
eprintf("expand: tablist cannot be empty.\n");
break;
default:
usage();
} ARGEND;
tablistlen = parselist(tl, strlen(tl));
if (argc == 0)
expand("<stdin>", stdin);
else {
for (; argc > 0; argc--, argv++) {
if (!(fp = fopen(argv[0], "r"))) {
weprintf("fopen %s:", argv[0]);
ret = 1;
continue;
}
expand(argv[0], fp);
fclose(fp);
}
}
return ret;
}