2011-06-02 08:03:34 -04:00
|
|
|
/* See LICENSE file for copyright and license details. */
|
2014-04-12 11:53:10 -04:00
|
|
|
#include <ctype.h>
|
2011-06-02 08:03:34 -04:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2014-11-13 12:29:30 -05:00
|
|
|
|
2011-06-02 08:03:34 -04:00
|
|
|
#include "text.h"
|
|
|
|
#include "util.h"
|
|
|
|
|
2014-04-12 11:53:10 -04:00
|
|
|
struct keydef {
|
2014-05-06 07:35:06 -04:00
|
|
|
int start_column;
|
|
|
|
int end_column;
|
|
|
|
int start_char;
|
|
|
|
int end_char;
|
2014-05-06 10:07:05 -04:00
|
|
|
int flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
MOD_N = 1 << 1,
|
|
|
|
MOD_STARTB = 1 << 2,
|
|
|
|
MOD_ENDB = 1 << 3,
|
2014-11-13 12:29:30 -05:00
|
|
|
MOD_R = 1 << 4,
|
2014-04-12 11:53:10 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
struct kdlist {
|
|
|
|
struct keydef keydef;
|
|
|
|
struct kdlist *next;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct kdlist *head = NULL;
|
2014-05-06 12:47:02 -04:00
|
|
|
static struct kdlist *tail = NULL;
|
2014-04-12 11:53:10 -04:00
|
|
|
|
2014-05-06 10:07:05 -04:00
|
|
|
static void addkeydef(char *, int);
|
2014-11-23 14:35:56 -05:00
|
|
|
static void check(FILE *);
|
2011-06-02 08:03:34 -04:00
|
|
|
static int linecmp(const char **, const char **);
|
2014-05-15 14:08:17 -04:00
|
|
|
static char *skipblank(char *);
|
2014-05-06 10:07:05 -04:00
|
|
|
static int parse_flags(char **, int *, int);
|
|
|
|
static int parse_keydef(struct keydef *, char *, int);
|
2014-05-15 14:08:17 -04:00
|
|
|
static char *nextcol(char *);
|
2014-04-12 11:53:10 -04:00
|
|
|
static char *columns(char *, const struct keydef *);
|
2011-06-02 08:03:34 -04:00
|
|
|
|
2014-11-23 14:35:56 -05:00
|
|
|
static int Cflag = 0, cflag = 0, uflag = 0;
|
2014-05-15 14:08:17 -04:00
|
|
|
static char *fieldsep = NULL;
|
2012-05-21 16:09:44 -04:00
|
|
|
|
2014-04-12 11:53:10 -04:00
|
|
|
static void
|
2014-05-06 10:07:05 -04:00
|
|
|
addkeydef(char *def, int flags)
|
2014-04-12 11:53:10 -04:00
|
|
|
{
|
|
|
|
struct kdlist *node;
|
|
|
|
|
2015-02-10 20:08:17 -05:00
|
|
|
node = enmalloc(2, sizeof(*node));
|
2014-11-13 12:29:30 -05:00
|
|
|
if (!head)
|
2014-04-12 11:53:10 -04:00
|
|
|
head = node;
|
2014-11-13 12:29:30 -05:00
|
|
|
if (parse_keydef(&node->keydef, def, flags))
|
2014-05-03 13:06:20 -04:00
|
|
|
enprintf(2, "faulty key definition\n");
|
2014-11-13 12:29:30 -05:00
|
|
|
if (tail)
|
2014-05-06 12:47:02 -04:00
|
|
|
tail->next = node;
|
2014-04-12 11:53:10 -04:00
|
|
|
node->next = NULL;
|
2014-05-06 12:47:02 -04:00
|
|
|
tail = node;
|
2014-04-12 11:53:10 -04:00
|
|
|
}
|
|
|
|
|
2014-11-23 14:35:56 -05:00
|
|
|
static void
|
|
|
|
check(FILE *fp)
|
|
|
|
{
|
|
|
|
static struct { char *buf; size_t size; } prev, cur, tmp;
|
|
|
|
|
|
|
|
if (!prev.buf)
|
|
|
|
getline(&prev.buf, &prev.size, fp);
|
|
|
|
while (getline(&cur.buf, &cur.size, fp) != -1) {
|
|
|
|
if (uflag > linecmp((const char **) &cur.buf, (const char **) &prev.buf)) {
|
|
|
|
if (!Cflag)
|
|
|
|
weprintf("disorder: %s", cur.buf);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
tmp = cur;
|
|
|
|
cur = prev;
|
|
|
|
prev = tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-12 11:53:10 -04:00
|
|
|
static int
|
2011-06-02 08:03:34 -04:00
|
|
|
linecmp(const char **a, const char **b)
|
|
|
|
{
|
2014-04-12 11:53:10 -04:00
|
|
|
char *s1, *s2;
|
|
|
|
int res = 0;
|
2015-01-31 13:12:18 -05:00
|
|
|
long double x, y;
|
2014-04-12 11:53:10 -04:00
|
|
|
struct kdlist *node;
|
|
|
|
|
2014-11-13 12:29:30 -05:00
|
|
|
for (node = head; node && res == 0; node = node->next) {
|
2014-04-12 11:53:10 -04:00
|
|
|
s1 = columns((char *)*a, &node->keydef);
|
|
|
|
s2 = columns((char *)*b, &node->keydef);
|
|
|
|
|
2014-05-06 12:47:02 -04:00
|
|
|
/* if -u is given, don't use default key definition
|
|
|
|
* unless it is the only one */
|
2015-01-31 13:12:18 -05:00
|
|
|
if (uflag && node == tail && head != tail) {
|
2014-04-12 11:53:10 -04:00
|
|
|
res = 0;
|
2015-01-31 13:12:18 -05:00
|
|
|
} else if (node->keydef.flags & MOD_N) {
|
|
|
|
x = strtold(s1, NULL);
|
|
|
|
y = strtold(s2, NULL);
|
|
|
|
res = x < y ? -1 : x > y;
|
|
|
|
} else {
|
2014-04-12 11:53:10 -04:00
|
|
|
res = strcmp(s1, s2);
|
2015-01-31 13:12:18 -05:00
|
|
|
}
|
2014-04-12 11:53:10 -04:00
|
|
|
|
2014-11-13 12:29:30 -05:00
|
|
|
if (node->keydef.flags & MOD_R)
|
2014-05-06 10:07:05 -04:00
|
|
|
res = -res;
|
|
|
|
|
2014-04-12 11:53:10 -04:00
|
|
|
free(s1);
|
|
|
|
free(s2);
|
|
|
|
}
|
2015-03-22 18:37:37 -04:00
|
|
|
|
2014-05-06 10:07:05 -04:00
|
|
|
return res;
|
2014-04-12 11:53:10 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2014-05-06 10:07:05 -04:00
|
|
|
parse_flags(char **s, int *flags, int bflag)
|
|
|
|
{
|
2015-03-23 15:30:07 -04:00
|
|
|
while (isalpha((int)**s)) {
|
2014-11-13 12:29:30 -05:00
|
|
|
switch (*((*s)++)) {
|
2014-05-06 10:07:05 -04:00
|
|
|
case 'b':
|
|
|
|
*flags |= bflag;
|
|
|
|
break;
|
|
|
|
case 'n':
|
|
|
|
*flags |= MOD_N;
|
|
|
|
break;
|
|
|
|
case 'r':
|
|
|
|
*flags |= MOD_R;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -1;
|
|
|
|
}
|
2015-03-22 18:37:37 -04:00
|
|
|
}
|
|
|
|
|
2014-05-06 10:07:05 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
parse_keydef(struct keydef *kd, char *s, int flags)
|
2014-04-12 11:53:10 -04:00
|
|
|
{
|
|
|
|
char *rest = s;
|
2014-05-03 13:06:20 -04:00
|
|
|
|
2014-04-12 11:53:10 -04:00
|
|
|
kd->start_column = 1;
|
|
|
|
kd->start_char = 1;
|
|
|
|
/* 0 means end of line */
|
|
|
|
kd->end_column = 0;
|
|
|
|
kd->end_char = 0;
|
2014-05-06 10:07:05 -04:00
|
|
|
kd->flags = flags;
|
2014-04-12 11:53:10 -04:00
|
|
|
|
2014-05-06 07:35:06 -04:00
|
|
|
kd->start_column = strtol(rest, &rest, 10);
|
2014-11-13 12:29:30 -05:00
|
|
|
if (kd->start_column < 1)
|
2014-05-06 07:35:06 -04:00
|
|
|
return -1;
|
2014-11-13 12:29:30 -05:00
|
|
|
if (*rest == '.')
|
2014-05-06 07:35:06 -04:00
|
|
|
kd->start_char = strtol(rest+1, &rest, 10);
|
2014-11-13 12:29:30 -05:00
|
|
|
if (kd->start_char < 1)
|
2014-05-06 07:35:06 -04:00
|
|
|
return -1;
|
2014-11-19 14:59:37 -05:00
|
|
|
if (parse_flags(&rest, &kd->flags, MOD_STARTB) < 0)
|
2014-05-06 10:07:05 -04:00
|
|
|
return -1;
|
2014-11-13 12:29:30 -05:00
|
|
|
if (*rest == ',') {
|
2014-05-06 07:35:06 -04:00
|
|
|
kd->end_column = strtol(rest+1, &rest, 10);
|
2014-11-13 12:29:30 -05:00
|
|
|
if (kd->end_column && kd->end_column < kd->start_column)
|
2014-05-06 07:35:06 -04:00
|
|
|
return -1;
|
2014-11-13 12:29:30 -05:00
|
|
|
if (*rest == '.') {
|
2014-05-06 07:35:06 -04:00
|
|
|
kd->end_char = strtol(rest+1, &rest, 10);
|
2014-11-13 12:29:30 -05:00
|
|
|
if (kd->end_char < 1)
|
2014-05-06 07:35:06 -04:00
|
|
|
return -1;
|
|
|
|
}
|
2014-11-19 14:59:37 -05:00
|
|
|
if (parse_flags(&rest, &kd->flags, MOD_ENDB) < 0)
|
2014-05-06 10:07:05 -04:00
|
|
|
return -1;
|
2013-12-12 08:08:49 -05:00
|
|
|
}
|
2015-03-22 18:37:37 -04:00
|
|
|
|
|
|
|
return -(*rest);
|
2011-06-02 08:03:34 -04:00
|
|
|
}
|
2013-06-14 14:20:47 -04:00
|
|
|
|
2014-04-12 11:53:10 -04:00
|
|
|
static char *
|
2014-05-15 14:08:17 -04:00
|
|
|
skipblank(char *s)
|
2014-04-12 11:53:10 -04:00
|
|
|
{
|
2015-02-20 07:28:24 -05:00
|
|
|
while (*s && isblank(*s))
|
2014-04-30 10:08:11 -04:00
|
|
|
s++;
|
2015-03-22 18:37:37 -04:00
|
|
|
|
2014-04-12 11:53:10 -04:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *
|
2014-05-15 14:08:17 -04:00
|
|
|
nextcol(char *s)
|
2014-04-12 11:53:10 -04:00
|
|
|
{
|
2014-11-13 16:16:29 -05:00
|
|
|
if (!fieldsep) {
|
2014-05-15 14:08:17 -04:00
|
|
|
s = skipblank(s);
|
2015-02-20 07:28:24 -05:00
|
|
|
while (*s && !isblank(*s))
|
2014-05-15 14:08:17 -04:00
|
|
|
s++;
|
|
|
|
} else {
|
2014-11-13 16:16:29 -05:00
|
|
|
if (!strchr(s, *fieldsep))
|
2014-05-15 14:08:17 -04:00
|
|
|
s = strchr(s, '\0');
|
|
|
|
else
|
|
|
|
s = strchr(s, *fieldsep) + 1;
|
2014-05-03 12:34:51 -04:00
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2014-04-12 11:53:10 -04:00
|
|
|
static char *
|
|
|
|
columns(char *line, const struct keydef *kd)
|
|
|
|
{
|
|
|
|
char *start, *end;
|
2014-05-15 14:08:17 -04:00
|
|
|
int i;
|
2014-04-18 12:21:31 -04:00
|
|
|
|
2014-11-13 12:29:30 -05:00
|
|
|
for (i = 1, start = line; i < kd->start_column; i++)
|
2014-05-15 14:08:17 -04:00
|
|
|
start = nextcol(start);
|
2014-11-13 12:29:30 -05:00
|
|
|
if (kd->flags & MOD_STARTB)
|
2014-05-15 14:08:17 -04:00
|
|
|
start = skipblank(start);
|
|
|
|
start += MIN(kd->start_char, nextcol(start) - start) - 1;
|
2014-04-12 11:53:10 -04:00
|
|
|
|
2014-11-13 12:29:30 -05:00
|
|
|
if (kd->end_column) {
|
|
|
|
for (i = 1, end = line; i < kd->end_column; i++)
|
2014-05-15 14:08:17 -04:00
|
|
|
end = nextcol(end);
|
2014-11-13 12:29:30 -05:00
|
|
|
if (kd->flags & MOD_ENDB)
|
2014-05-15 14:08:17 -04:00
|
|
|
end = skipblank(end);
|
2014-11-13 12:29:30 -05:00
|
|
|
if (kd->end_char)
|
2014-05-15 14:08:17 -04:00
|
|
|
end += MIN(kd->end_char, nextcol(end) - end);
|
2014-04-18 12:21:31 -04:00
|
|
|
else
|
2014-05-15 14:08:17 -04:00
|
|
|
end = nextcol(end);
|
2014-05-03 12:34:51 -04:00
|
|
|
} else {
|
2014-11-13 16:16:29 -05:00
|
|
|
if (!(end = strchr(line, '\n')))
|
2014-05-06 07:37:05 -04:00
|
|
|
end = strchr(line, '\0');
|
2014-05-03 12:34:51 -04:00
|
|
|
}
|
|
|
|
|
2015-02-10 20:08:17 -05:00
|
|
|
return enstrndup(2, start, end - start);
|
2014-04-12 11:53:10 -04:00
|
|
|
}
|
2015-03-07 09:39:39 -05:00
|
|
|
|
|
|
|
static void
|
|
|
|
usage(void)
|
|
|
|
{
|
2015-03-22 18:37:37 -04:00
|
|
|
enprintf(2, "usage: %s [-Cbcmnru] [-o outfile] [-t delim] [-k def]... [file ...]\n", argv0);
|
2015-03-07 09:39:39 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
2015-03-22 18:37:37 -04:00
|
|
|
FILE *fp, *ofp = stdout;
|
2015-03-07 09:39:39 -05:00
|
|
|
struct linebuf linebuf = EMPTY_LINEBUF;
|
2015-03-22 18:37:37 -04:00
|
|
|
size_t i;
|
2015-03-07 09:39:39 -05:00
|
|
|
int global_flags = 0;
|
2015-03-22 18:37:37 -04:00
|
|
|
char *outfile = NULL;
|
2015-03-07 09:39:39 -05:00
|
|
|
|
|
|
|
ARGBEGIN {
|
|
|
|
case 'C':
|
|
|
|
Cflag = 1;
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
global_flags |= MOD_STARTB | MOD_ENDB;
|
|
|
|
break;
|
|
|
|
case 'c':
|
|
|
|
cflag = 1;
|
|
|
|
break;
|
|
|
|
case 'k':
|
|
|
|
addkeydef(EARGF(usage()), global_flags);
|
|
|
|
break;
|
2015-03-22 18:37:37 -04:00
|
|
|
case 'm':
|
|
|
|
/* more or less for free, but for perfomance-reasons,
|
|
|
|
* we should keep this flag in mind and maybe some later
|
|
|
|
* day implement it properly so we don't run out of memory
|
|
|
|
* while merging large sorted files.
|
|
|
|
*/
|
|
|
|
break;
|
2015-03-07 09:39:39 -05:00
|
|
|
case 'n':
|
|
|
|
global_flags |= MOD_N;
|
|
|
|
break;
|
2015-03-22 18:37:37 -04:00
|
|
|
case 'o':
|
|
|
|
outfile = EARGF(usage());
|
|
|
|
break;
|
2015-03-07 09:39:39 -05:00
|
|
|
case 'r':
|
|
|
|
global_flags |= MOD_R;
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
fieldsep = EARGF(usage());
|
|
|
|
if (strlen(fieldsep) != 1)
|
|
|
|
usage();
|
|
|
|
break;
|
|
|
|
case 'u':
|
|
|
|
uflag = 1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
usage();
|
|
|
|
} ARGEND;
|
|
|
|
|
|
|
|
if (!head && global_flags)
|
|
|
|
addkeydef("1", global_flags);
|
|
|
|
addkeydef("1", global_flags & MOD_R);
|
|
|
|
|
2015-03-22 18:37:37 -04:00
|
|
|
if (!argc) {
|
2015-03-07 09:39:39 -05:00
|
|
|
if (Cflag || cflag) {
|
|
|
|
check(stdin);
|
|
|
|
} else {
|
|
|
|
getlines(stdin, &linebuf);
|
|
|
|
}
|
2015-03-22 18:37:37 -04:00
|
|
|
} else for (; *argv; argc--, argv++) {
|
|
|
|
if (!(fp = fopen(*argv, "r"))) {
|
|
|
|
enprintf(2, "fopen %s:", *argv);
|
2015-03-07 09:39:39 -05:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (Cflag || cflag) {
|
|
|
|
check(fp);
|
|
|
|
} else {
|
|
|
|
getlines(fp, &linebuf);
|
|
|
|
}
|
|
|
|
fclose(fp);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Cflag && !cflag) {
|
2015-03-22 18:37:37 -04:00
|
|
|
if (outfile && !(ofp = fopen(outfile, "w")))
|
|
|
|
eprintf("fopen %s:", outfile);
|
|
|
|
|
2015-03-07 09:39:39 -05:00
|
|
|
qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines,
|
|
|
|
(int (*)(const void *, const void *))linecmp);
|
|
|
|
|
|
|
|
for (i = 0; i < linebuf.nlines; i++) {
|
|
|
|
if (!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i],
|
|
|
|
(const char **)&linebuf.lines[i-1])) {
|
2015-03-22 18:37:37 -04:00
|
|
|
fputs(linebuf.lines[i], ofp);
|
2015-03-07 09:39:39 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|