Add tablist support and a mandoc-manpage to expand(1)
and mark it as finished in the README. This is another example showing how broken the GNU coreutils are: $ echo -e "äää\tüüü\tööö" | gnu-expand -t "5,10,20" äää üüü ööö $ echo -e "äää\tüüü\tööö" | sbase-expand -t "5,10,20" äää üüü ööö This is due to the fact that they are still not UTF8-aware and actually see "ä" as two single characters, expanding the "äää" with 4 spaces to a tab of length 10. The correct way however is to expand the "äää" with 2 spaces to a tab of length 5. One can only imagine how this silently breaks a lot of code around the world. WHAT WERE THEY THINKING?
This commit is contained in:
parent
48bf88851a
commit
692c11bf2b
2
README
2
README
@ -28,7 +28,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
|
||||
= du no -H, -L, (-x)
|
||||
=* echo yes none
|
||||
=* env yes none
|
||||
# expand yes none
|
||||
#* expand yes none
|
||||
expr yes none
|
||||
=* false yes none
|
||||
fold yes none
|
||||
|
75
expand.1
75
expand.1
@ -1,25 +1,50 @@
|
||||
.TH EXPAND 1 sbase\-VERSION
|
||||
.SH NAME
|
||||
expand \- expand tabs to spaces
|
||||
.SH SYNOPSIS
|
||||
.B expand
|
||||
.RB [ \-t
|
||||
.IR n ]
|
||||
.RI [ file ...]
|
||||
.SH DESCRIPTION
|
||||
expand processes the named files or the standard input, writing the
|
||||
standard output with tabs changed into spaces. Backspace characters
|
||||
are preserved into the output and decrement the column count for tab
|
||||
calculations.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.BI \-i
|
||||
Only change tabs to spaces at the start of lines.
|
||||
.TP
|
||||
.BI \-t " n"
|
||||
Expand tabs to
|
||||
.I n
|
||||
spaces. We currently support only a single numerical argument.
|
||||
.SH SEE ALSO
|
||||
.IR unexpand (1),
|
||||
.IR fold (1)
|
||||
.Dd January 25, 2015
|
||||
.Dt EXPAND 1 sbase\-VERSION
|
||||
.Sh NAME
|
||||
.Nm expand
|
||||
.Nd expand tabs to spaces
|
||||
.Sh SYNOPSIS
|
||||
.Nm expand
|
||||
.Op Fl i
|
||||
.Op Fl t Ar tablist
|
||||
.Op Ar file ...
|
||||
.Sh DESCRIPTION
|
||||
.Nm
|
||||
converts tabs to spaces in each
|
||||
.Ar file
|
||||
as specified in
|
||||
.Ar tablist .
|
||||
If no file is given,
|
||||
.Nm
|
||||
reads from stdin.
|
||||
.Pp
|
||||
Backspace characters are preserved and decrement the column count
|
||||
for tab calculations.
|
||||
.Sh OPTIONS
|
||||
.Bl -tag -width Ds
|
||||
.It Fl i
|
||||
Only expand tabs at the beginning of lines, i.e. expand each
|
||||
line until a character different from '\et' and ' ' is reached.
|
||||
.It Fl t Ar tablist
|
||||
Specify tab size or tabstops.
|
||||
.Ar tablist
|
||||
is a list of one (in the former case) or multiple (in the latter case)
|
||||
strictly positive integers separated by ' ' or ','.
|
||||
.Pp
|
||||
The default
|
||||
.Ar tablist
|
||||
is "8".
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr unexpand 1 ,
|
||||
.Xr fold 1
|
||||
.Sh STANDARDS
|
||||
The
|
||||
.Nm
|
||||
utility is compliant with the
|
||||
.St -p1003.1-2008
|
||||
specification.
|
||||
.Pp
|
||||
The
|
||||
.Op Fl i
|
||||
flag is an extension to that specification
|
||||
|
143
expand.c
143
expand.c
@ -1,89 +1,86 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "utf.h"
|
||||
#include "util.h"
|
||||
|
||||
static int expand(const char *, FILE *, int);
|
||||
static int iflag = 0;
|
||||
static size_t *tablist = NULL;
|
||||
static size_t tablistlen = 0;
|
||||
|
||||
static int iflag = 0;
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
static size_t
|
||||
parselist(const char *s, size_t slen)
|
||||
{
|
||||
eprintf("usage: %s [-i] [-t n] [file...]\n", argv0);
|
||||
}
|
||||
size_t i, m, len;
|
||||
char *sep;
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
FILE *fp;
|
||||
int tabstop = 8;
|
||||
int ret = 0;
|
||||
if (s[0] == ',' || s[0] == ' ')
|
||||
eprintf("expand: tablist can't begin with a ',' or ' '.\n");
|
||||
if (s[slen - 1] == ',' || s[slen - 1] == ' ')
|
||||
eprintf("expand: tablist can't end with a ',' or ' '.\n");
|
||||
|
||||
ARGBEGIN {
|
||||
case 'i':
|
||||
iflag = 1;
|
||||
break;
|
||||
case 't':
|
||||
tabstop = estrtol(EARGF(usage()), 0);
|
||||
if (!tabstop)
|
||||
eprintf("tab size cannot be zero\n");
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
} ARGEND;
|
||||
|
||||
if (argc == 0) {
|
||||
expand("<stdin>", stdin, tabstop);
|
||||
} else {
|
||||
for (; argc > 0; argc--, argv++) {
|
||||
if (!(fp = fopen(argv[0], "r"))) {
|
||||
weprintf("fopen %s:", argv[0]);
|
||||
ret = 1;
|
||||
continue;
|
||||
}
|
||||
expand(argv[0], fp, tabstop);
|
||||
fclose(fp);
|
||||
len = 1;
|
||||
for (i = 0; i < slen; i++) {
|
||||
if (s[i] == ',' || s[i] == ' ') {
|
||||
if (i > 0 && (s[i - 1] == ',' || s[i - 1] == ' '))
|
||||
eprintf("expand: empty field in tablist.\n");
|
||||
len++;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
tablist = emalloc((len + 1) * sizeof(size_t));
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < slen; i += sep - (s + i) + 1) {
|
||||
tablist[m++] = strtol(s + i, &sep, 0);
|
||||
if (tablist[m - 1] == 0)
|
||||
eprintf("expand: tab size can't be zero.\n");
|
||||
if (*sep && *sep != ',' && *sep != ' ')
|
||||
eprintf("expand: invalid number in tablist.\n");
|
||||
if (m > 1 && tablist[m - 1] < tablist[m - 2])
|
||||
eprintf("expand: tablist must be ascending.\n");
|
||||
}
|
||||
|
||||
/* tab length = 1 for the overflowing case later in the matcher */
|
||||
tablist[len] = 1;
|
||||
return len;
|
||||
}
|
||||
|
||||
static int
|
||||
expand(const char *file, FILE *fp, int tabstop)
|
||||
expand(const char *file, FILE *fp)
|
||||
{
|
||||
int col = 0;
|
||||
size_t bol = 1, col = 0, i;
|
||||
Rune r;
|
||||
int bol = 1;
|
||||
|
||||
for (;;) {
|
||||
if (!readrune(file, fp, &r))
|
||||
break;
|
||||
|
||||
while (readrune(file, fp, &r)) {
|
||||
switch (r) {
|
||||
case '\t':
|
||||
if (tablistlen == 1)
|
||||
i = 0;
|
||||
else for (i = 0; i < tablistlen; i++)
|
||||
if (col < tablist[i])
|
||||
break;
|
||||
if (bol || !iflag) {
|
||||
do {
|
||||
col++;
|
||||
putchar(' ');
|
||||
} while (col % tabstop);
|
||||
} while (col % tablist[i]);
|
||||
} else {
|
||||
putchar('\t');
|
||||
col += tabstop - col % tabstop;
|
||||
col = tablist[i];
|
||||
}
|
||||
break;
|
||||
case '\b':
|
||||
bol = 0;
|
||||
if (col)
|
||||
col--;
|
||||
bol = 0;
|
||||
writerune("<stdout>", stdout, &r);
|
||||
putchar('\b');
|
||||
break;
|
||||
case '\n':
|
||||
col = 0;
|
||||
bol = 1;
|
||||
writerune("<stdout>", stdout, &r);
|
||||
col = 0;
|
||||
putchar('\n');
|
||||
break;
|
||||
default:
|
||||
col++;
|
||||
@ -96,3 +93,47 @@ expand(const char *file, FILE *fp, int tabstop)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
eprintf("usage: %s [-i] [-t tablist] [file ...]\n", argv0);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
FILE *fp;
|
||||
char *tl = "8";
|
||||
int ret = 0;
|
||||
|
||||
ARGBEGIN {
|
||||
case 'i':
|
||||
iflag = 1;
|
||||
break;
|
||||
case 't':
|
||||
tl = EARGF(usage());
|
||||
if (!*tl)
|
||||
eprintf("expand: tablist cannot be empty.\n");
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
} ARGEND;
|
||||
|
||||
tablistlen = parselist(tl, strlen(tl));
|
||||
|
||||
if (argc == 0)
|
||||
expand("<stdin>", stdin);
|
||||
else {
|
||||
for (; argc > 0; argc--, argv++) {
|
||||
if (!(fp = fopen(argv[0], "r"))) {
|
||||
weprintf("fopen %s:", argv[0]);
|
||||
ret = 1;
|
||||
continue;
|
||||
}
|
||||
expand(argv[0], fp);
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user