0545d32ce9
In general, POSIX does not define /dev/std{in, out, err} because it does not want to depend on the dev-filesystem. For utilities, it thus introduced the '-'-keyword to denote standard input (and output in some cases) and the programs have to deal with it accordingly. Sadly, the design of many tools doesn't allow strict shell-redirections and many scripts don't even use this feature when possible. Thus, we made the decision to implement it consistently across all tools where it makes sense (namely those which read files). Along the way, I spotted some behavioural bugs in libutil/crypt.c and others where it was forgotten to fshut the files after use.
553 lines
9.5 KiB
C
553 lines
9.5 KiB
C
/* See LICENSE file for copyright and license details. */
|
|
#include <ctype.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "arg.h"
|
|
#include "text.h"
|
|
#include "utf.h"
|
|
#include "util.h"
|
|
|
|
enum {
|
|
INIT = 1,
|
|
GROW = 2,
|
|
};
|
|
|
|
enum {
|
|
EXPAND = 0,
|
|
RESET = 1,
|
|
};
|
|
|
|
enum { FIELD_ERROR = -2, };
|
|
|
|
struct field {
|
|
char *s;
|
|
size_t len;
|
|
};
|
|
|
|
struct line {
|
|
char *text;
|
|
size_t nf;
|
|
size_t maxf;
|
|
struct field *fields;
|
|
};
|
|
|
|
struct spec {
|
|
size_t fileno;
|
|
size_t fldno;
|
|
};
|
|
|
|
struct outlist {
|
|
size_t ns;
|
|
size_t maxs;
|
|
struct spec **specs;
|
|
};
|
|
|
|
struct span {
|
|
size_t nl;
|
|
size_t maxl;
|
|
struct line **lines;
|
|
};
|
|
|
|
static char *sep = NULL;
|
|
static char *replace = NULL;
|
|
static const char defaultofs = ' ';
|
|
static const int jfield = 1; /* POSIX default join field */
|
|
static int unpairsa = 0, unpairsb = 0;
|
|
static int oflag = 0;
|
|
static int pairs = 1;
|
|
static size_t seplen;
|
|
static struct outlist output;
|
|
|
|
static void
|
|
usage(void)
|
|
{
|
|
eprintf("usage: %s [-1 field] [-2 field] [-o list] [-e string] "
|
|
"[-a | -v fileno] [-t delim] file1 file2\n", argv0);
|
|
}
|
|
|
|
static void
|
|
prfield(struct field *fp)
|
|
{
|
|
if (fwrite(fp->s, 1, fp->len, stdout) != fp->len)
|
|
eprintf("fwrite:");
|
|
}
|
|
|
|
static void
|
|
prsep(void)
|
|
{
|
|
if (sep)
|
|
fwrite(sep, 1, seplen, stdout);
|
|
else
|
|
putchar(defaultofs);
|
|
}
|
|
|
|
static void
|
|
swaplines(struct line *la, struct line *lb)
|
|
{
|
|
struct line tmp;
|
|
|
|
tmp = *la;
|
|
*la = *lb;
|
|
*lb = tmp;
|
|
}
|
|
|
|
static void
|
|
prjoin(struct line *la, struct line *lb, size_t jfa, size_t jfb)
|
|
{
|
|
struct spec *sp;
|
|
struct field *joinfield;
|
|
size_t i;
|
|
|
|
if (jfa >= la->nf || jfb >= lb->nf)
|
|
return;
|
|
|
|
joinfield = &la->fields[jfa];
|
|
|
|
if (oflag) {
|
|
for (i = 0; i < output.ns; i++) {
|
|
sp = output.specs[i];
|
|
|
|
if (sp->fileno == 1) {
|
|
if (sp->fldno < la->nf)
|
|
prfield(&la->fields[sp->fldno]);
|
|
else if (replace)
|
|
fputs(replace, stdout);
|
|
} else if (sp->fileno == 2) {
|
|
if (sp->fldno < lb->nf)
|
|
prfield(&lb->fields[sp->fldno]);
|
|
else if (replace)
|
|
fputs(replace, stdout);
|
|
} else if (sp->fileno == 0) {
|
|
prfield(joinfield);
|
|
}
|
|
|
|
if (i < output.ns - 1)
|
|
prsep();
|
|
}
|
|
} else {
|
|
prfield(joinfield);
|
|
prsep();
|
|
|
|
for (i = 0; i < la->nf; i++) {
|
|
if (i != jfa) {
|
|
prfield(&la->fields[i]);
|
|
prsep();
|
|
}
|
|
}
|
|
for (i = 0; i < lb->nf; i++) {
|
|
if (i != jfb) {
|
|
prfield(&lb->fields[i]);
|
|
if (i < la->nf - 1)
|
|
prsep();
|
|
}
|
|
}
|
|
}
|
|
|
|
putchar('\n');
|
|
}
|
|
|
|
static void
|
|
prline(struct line *lp)
|
|
{
|
|
size_t len = strlen(lp->text);
|
|
|
|
if (fwrite(lp->text, 1, len, stdout) != len)
|
|
eprintf("fwrite:");
|
|
|
|
putchar('\n');
|
|
}
|
|
|
|
static int
|
|
linecmp(struct line *la, struct line *lb, size_t jfa, size_t jfb)
|
|
{
|
|
int status;
|
|
|
|
/* return FIELD_ERROR if both lines are short */
|
|
if (jfa >= la->nf) {
|
|
status = jfb >= lb->nf ? FIELD_ERROR : -1;
|
|
} else if (jfb >= lb->nf) {
|
|
status = 1;
|
|
} else {
|
|
status = memcmp(la->fields[jfa].s, lb->fields[jfb].s,
|
|
MAX (la->fields[jfa].len, lb->fields[jfb].len));
|
|
if (status > 0)
|
|
status = 1;
|
|
else if (status < 0)
|
|
status = -1;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static void
|
|
addfield(struct line *lp, char *sp, size_t len)
|
|
{
|
|
if (lp->nf >= lp->maxf) {
|
|
lp->fields = ereallocarray(lp->fields, (GROW * lp->maxf),
|
|
sizeof(struct field));
|
|
lp->maxf *= GROW;
|
|
}
|
|
lp->fields[lp->nf].s = sp;
|
|
lp->fields[lp->nf].len = len;
|
|
lp->nf++;
|
|
}
|
|
|
|
static void
|
|
prspanjoin(struct span *spa, struct span *spb, size_t jfa, size_t jfb)
|
|
{
|
|
size_t i, j;
|
|
|
|
for (i = 0; i < (spa->nl - 1); i++)
|
|
for (j = 0; j < (spb->nl - 1); j++)
|
|
prjoin(spa->lines[i], spb->lines[j], jfa, jfb);
|
|
}
|
|
|
|
static struct line *
|
|
makeline(char *s, size_t len)
|
|
{
|
|
struct line *lp;
|
|
char *sp, *beg, *end;
|
|
size_t i;
|
|
int eol = 0;
|
|
|
|
if (s[len-1] == '\n')
|
|
s[len-1] = '\0';
|
|
|
|
lp = ereallocarray(NULL, INIT, sizeof(struct line));
|
|
lp->text = s;
|
|
lp->fields = ereallocarray(NULL, INIT, sizeof(struct field));
|
|
lp->nf = 0;
|
|
lp->maxf = INIT;
|
|
|
|
for (sp = lp->text; isblank(*sp); sp++)
|
|
;
|
|
|
|
while (!eol) {
|
|
beg = sp;
|
|
|
|
if (sep) {
|
|
if (!(end = utfutf(sp, sep)))
|
|
eol = 1;
|
|
|
|
if (!eol) {
|
|
addfield(lp, beg, end - beg);
|
|
for (i = 0; i < seplen; i++)
|
|
end++;
|
|
}
|
|
} else {
|
|
for (end = sp; !(isblank(*end)); end++) {
|
|
if (*end == '\0') {
|
|
eol = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!eol)
|
|
addfield(lp, beg, end - beg);
|
|
while (isblank(*++end))
|
|
;
|
|
}
|
|
|
|
if (eol)
|
|
addfield(lp, beg, strlen(sp));
|
|
|
|
sp = end;
|
|
}
|
|
|
|
return lp;
|
|
}
|
|
|
|
static int
|
|
addtospan(struct span *sp, FILE *fp, int reset)
|
|
{
|
|
char *newl = NULL;
|
|
size_t len, size = 0;
|
|
|
|
if ((len = getline(&newl, &size, fp)) == -1) {
|
|
if (ferror(fp))
|
|
eprintf("getline:");
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
if (reset)
|
|
sp->nl = 0;
|
|
|
|
if (sp->nl >= sp->maxl) {
|
|
sp->lines = ereallocarray(sp->lines, (GROW * sp->maxl),
|
|
sizeof(struct line *));
|
|
sp->maxl *= GROW;
|
|
}
|
|
|
|
sp->lines[sp->nl] = makeline(newl, len);
|
|
sp->nl++;
|
|
return 1;
|
|
}
|
|
|
|
static void
|
|
initspan(struct span *sp)
|
|
{
|
|
sp->nl = 0;
|
|
sp->maxl = INIT;
|
|
sp->lines = ereallocarray(NULL, INIT, sizeof(struct line *));
|
|
}
|
|
|
|
static void
|
|
freespan(struct span *sp)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < sp->nl; i++) {
|
|
free(sp->lines[i]->fields);
|
|
free(sp->lines[i]->text);
|
|
}
|
|
|
|
free(sp->lines);
|
|
}
|
|
|
|
static void
|
|
initolist(struct outlist *olp)
|
|
{
|
|
olp->ns = 0;
|
|
olp->maxs = 1;
|
|
olp->specs = ereallocarray(NULL, INIT, sizeof(struct spec *));
|
|
}
|
|
|
|
static void
|
|
addspec(struct outlist *olp, struct spec *sp)
|
|
{
|
|
if (olp->ns >= olp->maxs) {
|
|
olp->specs = ereallocarray(olp->specs, (GROW * olp->maxs),
|
|
sizeof(struct spec *));
|
|
olp->maxs *= GROW;
|
|
}
|
|
olp->specs[olp->ns] = sp;
|
|
olp->ns++;
|
|
}
|
|
|
|
static struct spec *
|
|
makespec(char *s)
|
|
{
|
|
struct spec *sp;
|
|
int fileno;
|
|
size_t fldno;
|
|
|
|
switch (s[0]) {
|
|
case '0': /* join field */
|
|
fileno = 0;
|
|
fldno = 0;
|
|
break;
|
|
case '1': case '2':
|
|
if (sscanf(s, "%d.%zu", &fileno, &fldno) != 2)
|
|
eprintf("\"%s\": invalid format\n", s);
|
|
fldno--; /* ugly */
|
|
break;
|
|
default:
|
|
eprintf("%c: invalid file number (must be 0, 1 or 2)\n", s[0]);
|
|
break;
|
|
}
|
|
|
|
sp = ereallocarray(NULL, INIT, sizeof(struct spec));
|
|
sp->fileno = fileno;
|
|
sp->fldno = fldno;
|
|
return sp;
|
|
}
|
|
|
|
static void
|
|
makeolist(struct outlist *olp, char *s)
|
|
{
|
|
char *item, *sp;
|
|
sp = s;
|
|
|
|
while (sp) {
|
|
item = sp;
|
|
sp = strpbrk(sp, ", \t");
|
|
if (sp)
|
|
*sp++ = '\0';
|
|
addspec(olp, makespec(item));
|
|
}
|
|
}
|
|
|
|
static void
|
|
freespecs(struct outlist *olp)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < olp->ns; i++)
|
|
free(olp->specs[i]);
|
|
}
|
|
|
|
static void
|
|
join(FILE *fa, FILE *fb, size_t jfa, size_t jfb)
|
|
{
|
|
struct span spa, spb;
|
|
int cmp, eofa, eofb;
|
|
|
|
initspan(&spa);
|
|
initspan(&spb);
|
|
cmp = eofa = eofb = 0;
|
|
|
|
addtospan(&spa, fa, RESET);
|
|
addtospan(&spb, fb, RESET);
|
|
|
|
while (spa.nl && spb.nl) {
|
|
if ((cmp = linecmp(spa.lines[0], spb.lines[0], jfa, jfb)) < 0) {
|
|
if (unpairsa)
|
|
prline(spa.lines[0]);
|
|
if (!addtospan(&spa, fa, RESET)) {
|
|
if (unpairsb) { /* a is EOF'd; print the rest of b */
|
|
do
|
|
prline(spb.lines[0]);
|
|
while (addtospan(&spb, fb, RESET));
|
|
}
|
|
eofa = eofb = 1;
|
|
} else {
|
|
continue;
|
|
}
|
|
} else if (cmp > 0) {
|
|
if (unpairsb)
|
|
prline(spb.lines[0]);
|
|
if (!addtospan(&spb, fb, RESET)) {
|
|
if (unpairsa) { /* b is EOF'd; print the rest of a */
|
|
do
|
|
prline(spa.lines[0]);
|
|
while (addtospan(&spa, fa, RESET));
|
|
}
|
|
eofa = eofb = 1;
|
|
} else {
|
|
continue;
|
|
}
|
|
} else if (cmp == 0) {
|
|
/* read all consecutive matching lines from a */
|
|
do {
|
|
if (!addtospan(&spa, fa, EXPAND)) {
|
|
eofa = 1;
|
|
spa.nl++;
|
|
break;
|
|
}
|
|
} while (linecmp(spa.lines[spa.nl-1], spb.lines[0], jfa, jfb) == 0);
|
|
|
|
/* read all consecutive matching lines from b */
|
|
do {
|
|
if (!addtospan(&spb, fb, EXPAND)) {
|
|
eofb = 1;
|
|
spb.nl++;
|
|
break;
|
|
}
|
|
} while (linecmp(spa.lines[0], spb.lines[spb.nl-1], jfa, jfb) == 0);
|
|
|
|
if (pairs)
|
|
prspanjoin(&spa, &spb, jfa, jfb);
|
|
|
|
} else { /* FIELD_ERROR: both lines lacked join fields */
|
|
if (unpairsa)
|
|
prline(spa.lines[0]);
|
|
if (unpairsb)
|
|
prline(spb.lines[0]);
|
|
eofa = addtospan(&spa, fa, RESET) ? 0 : 1;
|
|
eofb = addtospan(&spb, fb, RESET) ? 0 : 1;
|
|
if (!eofa && !eofb)
|
|
continue;
|
|
}
|
|
|
|
if (eofa) {
|
|
spa.nl = 0;
|
|
} else {
|
|
swaplines(spa.lines[0], spa.lines[spa.nl - 1]); /* ugly */
|
|
spa.nl = 1;
|
|
}
|
|
|
|
if (eofb) {
|
|
spb.nl = 0;
|
|
} else {
|
|
swaplines(spb.lines[0], spb.lines[spb.nl - 1]); /* ugly */
|
|
spb.nl = 1;
|
|
}
|
|
}
|
|
freespan(&spa);
|
|
freespan(&spb);
|
|
}
|
|
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
size_t jf[2] = { jfield, jfield, };
|
|
FILE *fp[2];
|
|
int n;
|
|
char *fno;
|
|
|
|
ARGBEGIN {
|
|
case '1':
|
|
jf[0] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE_MAX));
|
|
break;
|
|
case '2':
|
|
jf[1] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE_MAX));
|
|
break;
|
|
case 'a':
|
|
fno = EARGF(usage());
|
|
if (strcmp(fno, "1") == 0)
|
|
unpairsa = 1;
|
|
else if (strcmp(fno, "2") == 0)
|
|
unpairsb = 1;
|
|
else
|
|
usage();
|
|
break;
|
|
case 'e':
|
|
replace = EARGF(usage());
|
|
break;
|
|
case 'o':
|
|
oflag = 1;
|
|
initolist(&output);
|
|
makeolist(&output, EARGF(usage()));
|
|
break;
|
|
case 't':
|
|
sep = EARGF(usage());
|
|
break;
|
|
case 'v':
|
|
pairs = 0;
|
|
fno = EARGF(usage());
|
|
if (strcmp(fno, "1") == 0)
|
|
unpairsa = 1;
|
|
else if (strcmp(fno, "2") == 0)
|
|
unpairsb = 1;
|
|
else
|
|
usage();
|
|
break;
|
|
default:
|
|
usage();
|
|
} ARGEND;
|
|
|
|
if (sep)
|
|
seplen = unescape(sep);
|
|
|
|
if (argc != 2)
|
|
usage();
|
|
|
|
for (n = 0; n < 2; n++) {
|
|
if (argv[n][0] == '-' && !argv[n][1]) {
|
|
argv[n] = "<stdin>";
|
|
fp[n] = stdin;
|
|
} else if (!(fp[n] = fopen(argv[n], "r"))) {
|
|
eprintf("fopen %s:", argv[n]);
|
|
}
|
|
}
|
|
|
|
jf[0]--;
|
|
jf[1]--;
|
|
|
|
join(fp[0], fp[1], jf[0], jf[1]);
|
|
|
|
if (oflag)
|
|
freespecs(&output);
|
|
|
|
enfshut(2, fp[0], argv[0]);
|
|
if (fp[0] != fp[1])
|
|
enfshut(2, fp[1], argv[1]);
|
|
enfshut(2, stdout, "<stdout>");
|
|
return 0;
|
|
}
|