2013-04-29 12:54:36 -04:00
|
|
|
/* See LICENSE file for copyright and license details. */
|
|
|
|
#include <locale.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <wchar.h>
|
|
|
|
#include "util.h"
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
FILE *fp;
|
|
|
|
const char *name;
|
|
|
|
} Fdescr;
|
|
|
|
|
|
|
|
static size_t unescape(wchar_t *);
|
|
|
|
static wint_t in(Fdescr *);
|
|
|
|
static void out(wchar_t);
|
|
|
|
static void sequential(Fdescr *, int, const wchar_t *, size_t);
|
|
|
|
static void parallel(Fdescr *, int, const wchar_t *, size_t);
|
|
|
|
|
2013-06-14 14:20:47 -04:00
|
|
|
static void
|
|
|
|
usage(void)
|
|
|
|
{
|
|
|
|
eprintf("usage: %s [-s] [-d list] file...\n", argv0);
|
|
|
|
}
|
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
int
|
2013-06-14 14:20:47 -04:00
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
2013-04-29 12:54:36 -04:00
|
|
|
const char *adelim = NULL;
|
|
|
|
bool seq = false;
|
|
|
|
wchar_t *delim;
|
|
|
|
size_t len;
|
|
|
|
Fdescr *dsc;
|
2013-06-14 14:20:47 -04:00
|
|
|
int i;
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
setlocale(LC_CTYPE, "");
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-06-14 14:20:47 -04:00
|
|
|
ARGBEGIN {
|
|
|
|
case 's':
|
|
|
|
seq = true;
|
|
|
|
break;
|
|
|
|
case 'd':
|
|
|
|
adelim = EARGF(usage());
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
usage();
|
|
|
|
} ARGEND;
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
if(argc == 0)
|
2013-06-14 14:20:47 -04:00
|
|
|
usage();
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
/* populate delimeters */
|
|
|
|
if(!adelim)
|
|
|
|
adelim = "\t";
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
len = mbstowcs(NULL, adelim, 0);
|
|
|
|
if(len == (size_t)-1)
|
|
|
|
eprintf("invalid delimiter\n");
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
delim = malloc((len + 1) * sizeof(*delim));
|
|
|
|
if(!delim)
|
|
|
|
eprintf("out of memory\n");
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
mbstowcs(delim, adelim, len);
|
|
|
|
len = unescape(delim);
|
|
|
|
if(len == 0)
|
|
|
|
eprintf("no delimiters specified\n");
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
/* populate file list */
|
|
|
|
dsc = malloc(argc * sizeof(*dsc));
|
|
|
|
if(!dsc)
|
|
|
|
eprintf("out of memory\n");
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
for(i = 0; i < argc; i++) {
|
|
|
|
const char *name = argv[i];
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
if(strcmp(name, "-") == 0)
|
|
|
|
dsc[i].fp = stdin;
|
|
|
|
else
|
|
|
|
dsc[i].fp = fopen(name, "r");
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
if(!dsc[i].fp)
|
|
|
|
eprintf("can't open '%s':", name);
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
dsc[i].name = name;
|
|
|
|
}
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
if(seq)
|
|
|
|
sequential(dsc, argc, delim, len);
|
|
|
|
else
|
|
|
|
parallel(dsc, argc, delim, len);
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
for(i = 0; i < argc; i++) {
|
|
|
|
if(dsc[i].fp != stdin)
|
|
|
|
(void)fclose(dsc[i].fp);
|
|
|
|
}
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
free(delim);
|
|
|
|
free(dsc);
|
2013-06-14 14:20:47 -04:00
|
|
|
|
2013-10-07 11:41:55 -04:00
|
|
|
return EXIT_SUCCESS;
|
2013-04-29 12:54:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static size_t
|
2013-06-14 14:20:47 -04:00
|
|
|
unescape(wchar_t *delim)
|
|
|
|
{
|
2013-04-29 12:54:36 -04:00
|
|
|
wchar_t c;
|
|
|
|
size_t i;
|
|
|
|
size_t len;
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
for(i = 0, len = 0; (c = delim[i++]) != '\0'; len++) {
|
|
|
|
if(c == '\\') {
|
|
|
|
switch(delim[i++]) {
|
|
|
|
case 'n':
|
|
|
|
delim[len] = '\n';
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
delim[len] = '\t';
|
|
|
|
break;
|
|
|
|
case '0':
|
|
|
|
delim[len] = '\0';
|
|
|
|
break;
|
|
|
|
case '\\':
|
|
|
|
delim[len] = '\\';
|
|
|
|
break;
|
|
|
|
case '\0':
|
|
|
|
default:
|
|
|
|
/* POSIX: unspecified results */
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
delim[len] = c;
|
|
|
|
}
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static wint_t
|
2013-06-14 14:20:47 -04:00
|
|
|
in(Fdescr *f)
|
|
|
|
{
|
2013-04-29 12:54:36 -04:00
|
|
|
wint_t c = fgetwc(f->fp);
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
if(c == WEOF && ferror(f->fp))
|
|
|
|
eprintf("'%s' read error:", f->name);
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2013-06-14 14:20:47 -04:00
|
|
|
out(wchar_t c)
|
|
|
|
{
|
2013-04-29 12:54:36 -04:00
|
|
|
putwchar(c);
|
|
|
|
if(ferror(stdout))
|
|
|
|
eprintf("write error:");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2013-06-14 14:20:47 -04:00
|
|
|
sequential(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt)
|
|
|
|
{
|
2013-04-29 12:54:36 -04:00
|
|
|
int i;
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
for(i = 0; i < len; i++) {
|
|
|
|
size_t d = 0;
|
|
|
|
wint_t c, last = WEOF;
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
while((c = in(&dsc[i])) != WEOF) {
|
|
|
|
if(last == '\n') {
|
|
|
|
if(delim[d] != '\0')
|
|
|
|
out(delim[d]);
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
d++;
|
|
|
|
d %= cnt;
|
|
|
|
}
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
if(c != '\n')
|
|
|
|
out((wchar_t)c);
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
last = c;
|
|
|
|
}
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
if(last == '\n')
|
|
|
|
out((wchar_t)last);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2013-06-14 14:20:47 -04:00
|
|
|
parallel(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt)
|
|
|
|
{
|
2013-04-29 12:54:36 -04:00
|
|
|
int last;
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
do {
|
|
|
|
int i;
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
last = 0;
|
|
|
|
for(i = 0; i < len; i++) {
|
|
|
|
wint_t c;
|
|
|
|
wchar_t d = delim[i % cnt];
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
do {
|
|
|
|
wint_t o = in(&dsc[i]);
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
c = o;
|
|
|
|
switch(c) {
|
|
|
|
case WEOF:
|
|
|
|
if(last == 0)
|
|
|
|
break;
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
o = '\n';
|
|
|
|
/* fallthrough */
|
|
|
|
case '\n':
|
|
|
|
if(i != len - 1)
|
|
|
|
o = d;
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
if(o != WEOF) {
|
|
|
|
/* pad with delimiters up to this point */
|
|
|
|
while(++last < i) {
|
|
|
|
if(d != '\0')
|
|
|
|
out(d);
|
|
|
|
}
|
2013-05-04 01:49:51 -04:00
|
|
|
|
2013-04-29 12:54:36 -04:00
|
|
|
out((wchar_t)o);
|
|
|
|
}
|
|
|
|
} while(c != '\n' && c != WEOF);
|
|
|
|
}
|
|
|
|
} while(last > 0);
|
|
|
|
}
|
2013-05-04 01:49:51 -04:00
|
|
|
|