diff --git a/README b/README index 8400dad..daedbc3 100644 --- a/README +++ b/README @@ -73,7 +73,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support, =* true yes none =* tty yes none =* uname yes none -# unexpand yes none +#* unexpand yes none = uniq no -f, -s = unlink yes none = uudecode no -o diff --git a/unexpand.1 b/unexpand.1 index 506d9ed..c5a44b9 100644 --- a/unexpand.1 +++ b/unexpand.1 @@ -1,30 +1,45 @@ -.Dd January 30, 2015 +.Dd February 8, 2015 .Dt UNEXPAND 1 .Os sbase .Sh NAME .Nm unexpand -.Nd convert blanks to tabs +.Nd unexpand spaces to tabs .Sh SYNOPSIS .Nm .Op Fl a -.Op Fl t Ar n +.Op Fl t Ar tablist .Op Ar file ... .Sh DESCRIPTION .Nm -processes the named -.Ar files -or the standard input, writing the -standard output with consecutive blanks (spaces and tabs) converted -into tabs. Backspace characters are preserved into the output and -decrement the column count for tab calculations. +converts spaces to tabs in each +.Ar file +as specified in +.Ar tablist . +If no file is given, +.Nm +reads from stdin. +.Pp +Backspace characters are preserved and decrement the column count +for tab calculations. .Sh OPTIONS .Bl -tag -width Ds .It Fl a -Convert blanks to tabs everywhere, not just at the start of lines. -.It Fl t Ar n -Set tab size to -.Ar n -spaces (default: 8). +Convert spaces to tabs everywhere, not just at the start of lines. +.It Fl t Ar tablist +Specify tab size or tabstops. +.Ar tablist +is a list of one (in the former case) or multiple (in the latter case) +strictly positive integers separated by ' ' or ','. +.Pp +The default +.Ar tablist +is "8". .El .Sh SEE ALSO .Xr expand 1 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. diff --git a/unexpand.c b/unexpand.c index 839eac6..8f1fe07 100644 --- a/unexpand.c +++ b/unexpand.c @@ -1,5 +1,5 @@ /* See LICENSE file for copyright and license details. */ -#include +#include #include #include #include @@ -7,15 +7,126 @@ #include "utf.h" #include "util.h" -static void unexpand(const char *, FILE *); +static int aflag = 0; +static size_t *tablist = NULL; +static int tablistlen = 8; -static int aflag = 0; -static int tabsize = 8; +static size_t +parselist(const char *s) +{ + size_t i; + char *p, *tmp; + + tmp = estrdup(s); + for (i = 0; (p = strsep(&tmp, " ,")); i++) { + if (*p == '\0') + eprintf("empty field in tablist\n"); + tablist = erealloc(tablist, (i + 1) * sizeof(*tablist)); + tablist[i] = estrtonum(p, 1, MIN(LLONG_MAX, SIZE_MAX)); + if (i > 0 && tablist[i - 1] >= tablist[i]) + eprintf("tablist must be ascending\n"); + } + tablist = erealloc(tablist, (i + 1) * sizeof(*tablist)); + return i; +} + +static void +unexpandspan(size_t last, size_t col) +{ + size_t off, i, j; + Rune r; + + if (tablistlen == 1) { + i = 0; + off = last % tablist[i]; + + if ((col - last) + off >= tablist[i] && last < col) + last -= off; + + r = '\t'; + for (; last + tablist[i] <= col; last += tablist[i]) + writerune("", stdout, &r); + r = ' '; + for (; last < col; last++) + writerune("", stdout, &r); + } else { + for (i = 0; i < tablistlen; i++) + if (col < tablist[i]) + break; + for (j = 0; j < tablistlen; j++) + if (last < tablist[j]) + break; + r = '\t'; + for (; j < i; j++) { + writerune("", stdout, &r); + last = tablist[j]; + } + r = ' '; + for (; last < col; last++) + writerune("", stdout, &r); + } + +} + +static void +unexpand(const char *file, FILE *fp) +{ + Rune r; + size_t last = 0, col = 0, i; + int bol = 1; + + while (readrune(file, fp, &r)) { + switch (r) { + case ' ': + if (!bol && !aflag) + last++; + col++; + break; + case '\t': + if (tablistlen == 1) { + if (!bol && !aflag) + last += tablist[0] - col % tablist[0]; + col += tablist[0] - col % tablist[0]; + } else { + for (i = 0; i < tablistlen; i++) + if (col < tablist[i]) + break; + if (!bol && !aflag) + last = tablist[i]; + col = tablist[i]; + } + break; + case '\b': + if (bol || aflag) + unexpandspan(last, col); + col -= (col > 0); + last = col; + bol = 0; + break; + case '\n': + if (bol || aflag) + unexpandspan(last, col); + last = col = 0; + bol = 1; + break; + default: + if (bol || aflag) + unexpandspan(last, col); + last = ++col; + bol = 0; + break; + } + if ((r != ' ' && r != '\t') || (!aflag && !bol)) + writerune("", stdout, &r); + } + if (last < col && (bol || aflag)) + unexpandspan(last, col); +} static void usage(void) { - eprintf("usage: %s [-a] [-t n] [file ...]\n", argv0); + eprintf("usage: %s [-a] [-t tablist] [file ...]\n", argv0); } int @@ -23,12 +134,13 @@ main(int argc, char *argv[]) { FILE *fp; int ret = 0; + char *tl = "8"; ARGBEGIN { case 't': - tabsize = estrtonum(EARGF(usage()), 0, INT_MAX); - if (tabsize <= 0) - eprintf("unexpand: invalid tabsize\n"); + tl = EARGF(usage()); + if (!*tl) + eprintf("tablist cannot be empty\n"); /* Fallthrough: -t implies -a */ case 'a': aflag = 1; @@ -37,9 +149,11 @@ main(int argc, char *argv[]) usage(); } ARGEND; - if (argc == 0) { + tablistlen = parselist(tl); + + if (argc == 0) unexpand("", stdin); - } else { + else { for (; argc > 0; argc--, argv++) { if (!(fp = fopen(argv[0], "r"))) { weprintf("fopen %s:", argv[0]); @@ -52,69 +166,3 @@ main(int argc, char *argv[]) } return ret; } - -static void -unexpandspan(unsigned int n, unsigned int col) -{ - unsigned int off = (col-n) % tabsize; - Rune r; - - if (n + off >= tabsize && n > 1) - n += off; - - r = '\t'; - for (; n >= tabsize; n -= tabsize) - writerune("", stdout, &r); - r = ' '; - while (n--) - writerune("", stdout, &r); -} - -static void -unexpand(const char *file, FILE *fp) -{ - unsigned int n = 0, col = 0; - Rune r; - int bol = 1; - - while (1) { - if (!readrune(file, fp, &r)) - break; - - switch (r) { - case ' ': - if (bol || aflag) - n++; - col++; - break; - case '\t': - if (bol || aflag) - n += tabsize - col % tabsize; - col += tabsize - col % tabsize; - break; - case '\b': - if (bol || aflag) - unexpandspan(n, col); - col -= (col > 0); - n = 0; - bol = 0; - break; - case '\n': - if (bol || aflag) - unexpandspan(n, col); - n = col = 0; - bol = 1; - break; - default: - if (bol || aflag) - unexpandspan(n, col); - n = 0; - col++; - bol = 0; - } - if ((r != ' ' && r != '\t') || (!aflag && !bol)) - writerune("", stdout, &r); - } - if (n > 0 && (bol || aflag)) - unexpandspan(n, col); -}