sbase/sort.c
FRIGN 51390a3c51 Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
   functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
   calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
   also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
   so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
   one file when the -c or -C flags are given.
   This can be problematic when you want to check multiple files.
   With the change 5), rewriting check() to return a value, I went
   off-posix after discussing this with Dimitris to just allow
   arbitrary numbers of files. Obviously, this does not break scripts
   and is convenient for everybody who wants to quickly check a big
   amount of files.
   As soon as 1 file is "unsorted", the return value is 1, as expected.
   For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
   return 1 in the following case:
   $ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 12:08:13 +01:00

348 lines
7.1 KiB
C

/* See LICENSE file for copyright and license details. */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "queue.h"
#include "text.h"
#include "utf.h"
#include "util.h"
struct keydef {
int start_column;
int end_column;
int start_char;
int end_char;
int flags;
TAILQ_ENTRY(keydef) entry;
};
enum {
MOD_N = 1 << 0,
MOD_STARTB = 1 << 1,
MOD_ENDB = 1 << 2,
MOD_R = 1 << 3,
};
static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead);
static int Cflag = 0, cflag = 0, uflag = 0;
static char *fieldsep = NULL;
static size_t fieldseplen = 0;
static char *col1, *col2;
static size_t col1siz, col2siz;
static char *
skipblank(char *s)
{
while (*s == ' ' || *s == '\t')
s++;
return s;
}
static char *
skipnonblank(char *s)
{
while (*s && *s != '\n' && *s != ' ' && *s != '\t')
s++;
return s;
}
static char *
skipcolumn(char *s, char *eol, int skip_to_next_col)
{
if (fieldsep) {
if ((s = strstr(s, fieldsep))) {
if (skip_to_next_col)
s += fieldseplen;
} else {
s = eol;
}
} else {
s = skipblank(s);
s = skipnonblank(s);
}
return s;
}
static size_t
columns(char *line, const struct keydef *kd, char **col, size_t *colsiz)
{
Rune r;
char *start, *end, *eol = strchr(line, '\n');
size_t len, utflen, rlen;
int i;
for (i = 1, start = line; i < kd->start_column; i++)
start = skipcolumn(start, eol, 1);
if (kd->flags & MOD_STARTB)
start = skipblank(start);
for (utflen = 0; start < eol && utflen < kd->start_char - 1;) {
rlen = chartorune(&r, start);
start += rlen;
utflen++;
}
if (kd->end_column) {
for (i = 1, end = line; i < kd->end_column; i++)
end = skipcolumn(end, eol, 1);
if (kd->flags & MOD_ENDB)
end = skipblank(end);
if (kd->end_char) {
for (utflen = 0; end < eol && utflen < kd->end_char;) {
rlen = chartorune(&r, end);
end += rlen;
utflen++;
}
} else {
end = skipcolumn(end, eol, 0);
}
} else {
end = eol;
}
len = (start > end) ? 0 : (end - start);
if (!*col || *colsiz < len)
*col = erealloc(*col, len + 1);
memcpy(*col, start, len);
(*col)[len] = '\0';
if (*colsiz < len)
*colsiz = len;
return len;
}
static int
linecmp(const char **a, const char **b)
{
int res = 0;
long double x, y;
struct keydef *kd;
TAILQ_FOREACH(kd, &kdhead, entry) {
columns((char *)*a, kd, &col1, &col1siz);
columns((char *)*b, kd, &col2, &col2siz);
/* if -u is given, don't use default key definition
* unless it is the only one */
if (uflag && kd == TAILQ_LAST(&kdhead, kdhead) &&
TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead)) {
res = 0;
} else if (kd->flags & MOD_N) {
x = strtold(col1, NULL);
y = strtold(col2, NULL);
res = (x < y) ? -1 : (x > y);
} else {
res = strcmp(col1, col2);
}
if (kd->flags & MOD_R)
res = -res;
if (res)
break;
}
return res;
}
static int
check(FILE *fp, const char *fname)
{
static struct { char *buf; size_t size; } prev, cur, tmp;
if (!prev.buf && getline(&prev.buf, &prev.size, fp) < 0)
eprintf("getline:");
while (getline(&cur.buf, &cur.size, fp) > 0) {
if (uflag > linecmp((const char **)&cur.buf,
(const char **)&prev.buf)) {
if (!Cflag)
weprintf("disorder %s: %s", fname, cur.buf);
return 1;
}
tmp = cur;
cur = prev;
prev = tmp;
}
return 0;
}
static int
parse_flags(char **s, int *flags, int bflag)
{
while (isalpha((int)**s)) {
switch (*((*s)++)) {
case 'b':
*flags |= bflag;
break;
case 'n':
*flags |= MOD_N;
break;
case 'r':
*flags |= MOD_R;
break;
default:
return -1;
}
}
return 0;
}
static void
addkeydef(char *kdstr, int flags)
{
struct keydef *kd;
kd = enmalloc(2, sizeof(*kd));
/* parse key definition kdstr with format
* start_column[.start_char][flags][,end_column[.end_char][flags]]
*/
kd->start_column = 1;
kd->start_char = 1;
kd->end_column = 0; /* 0 means end of line */
kd->end_char = 0; /* 0 means end of column */
kd->flags = flags;
if ((kd->start_column = strtol(kdstr, &kdstr, 10)) < 1)
enprintf(2, "invalid start column in key definition\n");
if (*kdstr == '.') {
if ((kd->start_char = strtol(kdstr + 1, &kdstr, 10)) < 1)
enprintf(2, "invalid start character in key "
"definition\n");
}
if (parse_flags(&kdstr, &kd->flags, MOD_STARTB) < 0)
enprintf(2, "invalid start flags in key definition\n");
if (*kdstr == ',') {
if ((kd->end_column = strtol(kdstr + 1, &kdstr, 10)) < 0)
enprintf(2, "invalid end column in key definition\n");
if (*kdstr == '.') {
if ((kd->end_char = strtol(kdstr + 1, &kdstr, 10)) < 0)
enprintf(2, "invalid end character in key "
"definition\n");
}
if (parse_flags(&kdstr, &kd->flags, MOD_ENDB) < 0)
enprintf(2, "invalid end flags in key definition\n");
}
if (*kdstr != '\0')
enprintf(2, "invalid key definition\n");
TAILQ_INSERT_TAIL(&kdhead, kd, entry);
}
static void
usage(void)
{
enprintf(2, "usage: %s [-Cbcmnru] [-o outfile] [-t delim] "
"[-k def]... [file ...]\n", argv0);
}
int
main(int argc, char *argv[])
{
FILE *fp, *ofp = stdout;
struct linebuf linebuf = EMPTY_LINEBUF;
size_t i;
int global_flags = 0, ret = 0;
char *outfile = NULL;
ARGBEGIN {
case 'C':
Cflag = 1;
break;
case 'b':
global_flags |= MOD_STARTB | MOD_ENDB;
break;
case 'c':
cflag = 1;
break;
case 'k':
addkeydef(EARGF(usage()), global_flags);
break;
case 'm':
/* more or less for free, but for performance-reasons,
* we should keep this flag in mind and maybe some later
* day implement it properly so we don't run out of memory
* while merging large sorted files.
*/
break;
case 'n':
global_flags |= MOD_N;
break;
case 'o':
outfile = EARGF(usage());
break;
case 'r':
global_flags |= MOD_R;
break;
case 't':
fieldsep = EARGF(usage());
fieldseplen = strlen(fieldsep);
break;
case 'u':
uflag = 1;
break;
default:
usage();
} ARGEND;
/* -b shall only apply to custom key definitions */
if (TAILQ_EMPTY(&kdhead) && global_flags)
addkeydef("1", global_flags & ~(MOD_STARTB | MOD_ENDB));
addkeydef("1", global_flags & MOD_R);
if (!argc) {
if (Cflag || cflag) {
if (check(stdin, "<stdin>") && !ret)
ret = 1;
} else {
getlines(stdin, &linebuf);
}
} else for (; *argv; argc--, argv++) {
if (!strcmp(*argv, "-")) {
*argv = "<stdin>";
fp = stdin;
} else if (!(fp = fopen(*argv, "r"))) {
enprintf(2, "fopen %s:", *argv);
continue;
}
if (Cflag || cflag) {
if (check(fp, *argv) && !ret)
ret = 1;
} else {
getlines(fp, &linebuf);
}
if (fp != stdin && fshut(fp, *argv))
ret = 2;
}
if (!Cflag && !cflag) {
if (outfile && !(ofp = fopen(outfile, "w")))
eprintf("fopen %s:", outfile);
qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines,
(int (*)(const void *, const void *))linecmp);
for (i = 0; i < linebuf.nlines; i++) {
if (!uflag || i == 0 ||
linecmp((const char **)&linebuf.lines[i],
(const char **)&linebuf.lines[i - 1])) {
fputs(linebuf.lines[i], ofp);
}
}
}
if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
fshut(stderr, "<stderr>"))
ret = 2;
return ret;
}