From 8b87d0098a1c91c3e7b3ba6cc77a176d178a6812 Mon Sep 17 00:00:00 2001 From: Tuukka Kataja Date: Mon, 9 Jun 2014 16:52:20 +0100 Subject: [PATCH] Add unexpand(1) --- LICENSE | 1 + Makefile | 1 + README | 4 +- TODO | 2 - expand.1 | 1 + unexpand.1 | 25 ++++++++++ unexpand.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 170 insertions(+), 4 deletions(-) create mode 100644 unexpand.1 create mode 100644 unexpand.c diff --git a/LICENSE b/LICENSE index d46b353..ac9bee2 100644 --- a/LICENSE +++ b/LICENSE @@ -26,6 +26,7 @@ MIT/X Consortium License © 2014 Silvan Jegen © 2014 Laslo Hunhold © 2014 Daniel Bainton +© 2014 Tuukka Kataja Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), diff --git a/Makefile b/Makefile index 27a6693..5919c8d 100644 --- a/Makefile +++ b/Makefile @@ -89,6 +89,7 @@ SRC = \ uudecode.c \ uuencode.c \ uname.c \ + unexpand.c \ uniq.c \ unlink.c \ seq.c \ diff --git a/README b/README index 39a6857..1a7f74d 100644 --- a/README +++ b/README @@ -11,8 +11,8 @@ The following programs are currently implemented: kill ln ls mc md5sum mkdir mkfifo mktemp mv nice nl nohup paste printenv pwd readlink renice rm rmdir sleep setsid sort split sponge strings sync tail tar tee test touch tr true tty uudecode - uuencode uname uniq unlink seq sha1sum sha256sum sha512sum wc - xargs yes + uuencode uname unexpand uniq unlink seq sha1sum sha256sum + sha512sum wc xargs yes sbase is mostly following POSIX but we deviate wherever we think it is appropriate. diff --git a/TODO b/TODO index 59bbd24..288f158 100644 --- a/TODO +++ b/TODO @@ -11,8 +11,6 @@ test [expression...] tr: support for character classes [:alnum:] -unexpand [-a] [-t N] [file...] - od/hd uuencode, uudecode: diff --git a/expand.1 b/expand.1 index 82be286..98c66ab 100644 --- a/expand.1 +++ b/expand.1 @@ -18,4 +18,5 @@ Expand tabs to .I n spaces. We currently support only a single numerical argument. .SH SEE ALSO +.IR unexpand (1), .IR fold (1) diff --git a/unexpand.1 b/unexpand.1 new file mode 100644 index 0000000..62dd957 --- /dev/null +++ b/unexpand.1 @@ -0,0 +1,25 @@ +.TH EXPAND 1 sbase\-VERSION +.SH NAME +unexpand \- convert blanks to tabs +.SH SYNOPSIS +.B unexpand +.RB [ \-a ] +.RB [ \-t +.IR n ] +.RI [ file ...] +.SH DESCRIPTION +unexpand processes the named files or the standard input, writing the +standard output with consecutive blanks (spaces and tabs) converted +into tabs. Backspace characters are preserved into the output and +decrement the column count for tab calculations. +.SH OPTIONS +.TP +.BI \-a +convert blanks to tabs everywhere, not just at the start of lines +.TP +.BI \-t " n" +set tab size to +.I n +spaces (default: 8) +.SH SEE ALSO +.IR expand (1) diff --git a/unexpand.c b/unexpand.c new file mode 100644 index 0000000..5416803 --- /dev/null +++ b/unexpand.c @@ -0,0 +1,140 @@ +/* See LICENSE file for copyright and license details. */ +#include +#include +#include +#include +#include "util.h" + +typedef struct { + FILE *fp; + const char *name; +} Fdescr; + +static void unexpand(Fdescr *dsc); + +static bool aflag = false; +static int tabsize = 8; + +static void +usage(void) +{ + eprintf("usage: %s [-a] [-t n] [file ...]\n", argv0); +} + +int +main(int argc, char *argv[]) +{ + Fdescr dsc; + FILE *fp; + + ARGBEGIN { + case 't': + tabsize = estrtol(EARGF(usage()), 0); + if(tabsize <= 0) + eprintf("unexpand: invalid tabsize\n", argv[0]); + /* Fallthrough: -t implies -a */ + case 'a': + aflag = true; + break; + default: + usage(); + } ARGEND; + + if (argc == 0) { + dsc.name = ""; + dsc.fp = stdin; + unexpand(&dsc); + } else { + for (; argc > 0; argc--, argv++) { + if (!(fp = fopen(*argv, "r"))) { + weprintf("fopen %s:", *argv); + continue; + } + dsc.name = *argv; + dsc.fp = fp; + unexpand(&dsc); + fclose(fp); + } + } + + return EXIT_SUCCESS; +} + +static wint_t +in(Fdescr *f) +{ + wint_t c = fgetwc(f->fp); + + if (c == WEOF && ferror(f->fp)) + eprintf("'%s' read error:", f->name); + + return c; +} + +static void +out(wint_t c) +{ + putwchar(c); + if (ferror(stdout)) + eprintf("write error:"); +} + +static void +unexpandspan(unsigned int n, unsigned int col) +{ + unsigned int off = (col-n) % tabsize; + + if(n + off >= tabsize && n > 1) + n += off; + + for(; n >= tabsize; n -= tabsize) + out('\t'); + while(n--) + out(' '); +} + +static void +unexpand(Fdescr *dsc) +{ + unsigned int n = 0, col = 0; + bool bol = true; + wint_t c; + + while ((c = in(dsc)) != EOF) { + switch (c) { + case ' ': + if (bol || aflag) + n++; + col++; + break; + case '\t': + if (bol || aflag) + n += tabsize - col % tabsize; + col += tabsize - col % tabsize; + break; + case '\b': + if (bol || aflag) + unexpandspan(n, col); + col -= (col > 0); + n = 0; + bol = false; + break; + case '\n': + if (bol || aflag) + unexpandspan(n, col); + n = col = 0; + bol = true; + break; + default: + if (bol || aflag) + unexpandspan(n, col); + n = 0; + col++; + bol = false; + } + if ((c != ' ' && c != '\t') || (!aflag && !bol)) + out(c); + } + if (n > 0 && (bol || aflag)) + unexpandspan(n, col); +}