2011-06-02 08:03:34 -04:00
|
|
|
/* See LICENSE file for copyright and license details. */
|
2014-04-12 11:53:10 -04:00
|
|
|
#include <ctype.h>
|
2011-06-02 08:03:34 -04:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2014-11-13 12:29:30 -05:00
|
|
|
|
2015-08-02 17:55:54 -04:00
|
|
|
#include "queue.h"
|
2011-06-02 08:03:34 -04:00
|
|
|
#include "text.h"
|
2015-08-03 11:35:01 -04:00
|
|
|
#include "utf.h"
|
2011-06-02 08:03:34 -04:00
|
|
|
#include "util.h"
|
|
|
|
|
2014-04-12 11:53:10 -04:00
|
|
|
struct keydef {
|
2014-05-06 07:35:06 -04:00
|
|
|
int start_column;
|
|
|
|
int end_column;
|
|
|
|
int start_char;
|
|
|
|
int end_char;
|
2014-05-06 10:07:05 -04:00
|
|
|
int flags;
|
2015-08-02 17:55:54 -04:00
|
|
|
TAILQ_ENTRY(keydef) entry;
|
2014-05-06 10:07:05 -04:00
|
|
|
};
|
|
|
|
|
2016-05-14 21:56:54 -04:00
|
|
|
struct column {
|
|
|
|
struct line line;
|
|
|
|
size_t cap;
|
|
|
|
};
|
|
|
|
|
2014-05-06 10:07:05 -04:00
|
|
|
enum {
|
2015-08-02 17:55:54 -04:00
|
|
|
MOD_N = 1 << 0,
|
|
|
|
MOD_STARTB = 1 << 1,
|
|
|
|
MOD_ENDB = 1 << 2,
|
|
|
|
MOD_R = 1 << 3,
|
2016-02-15 19:42:25 -05:00
|
|
|
MOD_D = 1 << 4,
|
|
|
|
MOD_F = 1 << 5,
|
|
|
|
MOD_I = 1 << 6,
|
2014-04-12 11:53:10 -04:00
|
|
|
};
|
|
|
|
|
2015-08-02 17:55:54 -04:00
|
|
|
static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead);
|
2014-04-12 11:53:10 -04:00
|
|
|
|
2014-11-23 14:35:56 -05:00
|
|
|
static int Cflag = 0, cflag = 0, uflag = 0;
|
2014-05-15 14:08:17 -04:00
|
|
|
static char *fieldsep = NULL;
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
static size_t fieldseplen = 0;
|
2016-05-14 21:56:54 -04:00
|
|
|
static struct column col1, col2;
|
2012-05-21 16:09:44 -04:00
|
|
|
|
2016-02-28 18:47:10 -05:00
|
|
|
static void
|
2016-03-06 20:04:29 -05:00
|
|
|
skipblank(struct line *a)
|
2015-08-02 19:12:29 -04:00
|
|
|
{
|
2016-02-28 18:47:10 -05:00
|
|
|
while (a->len && (*(a->data) == ' ' || *(a->data) == '\t')) {
|
|
|
|
a->data++;
|
|
|
|
a->len--;
|
|
|
|
}
|
2015-08-02 19:12:29 -04:00
|
|
|
}
|
|
|
|
|
2016-02-28 18:47:10 -05:00
|
|
|
static void
|
2016-03-06 20:04:29 -05:00
|
|
|
skipnonblank(struct line *a)
|
2014-04-12 11:53:10 -04:00
|
|
|
{
|
2016-02-28 18:47:10 -05:00
|
|
|
while (a->len && (*(a->data) != '\n' && *(a->data) != ' ' &&
|
|
|
|
*(a->data) != '\t')) {
|
|
|
|
a->data++;
|
|
|
|
a->len--;
|
|
|
|
}
|
2015-08-02 19:32:21 -04:00
|
|
|
}
|
2014-11-23 14:35:56 -05:00
|
|
|
|
2016-02-28 18:47:10 -05:00
|
|
|
static void
|
2016-03-06 20:04:29 -05:00
|
|
|
skipcolumn(struct line *a, int skip_to_next_col)
|
2015-08-02 19:32:21 -04:00
|
|
|
{
|
2016-02-28 18:47:10 -05:00
|
|
|
char *s;
|
|
|
|
|
2015-08-02 19:32:21 -04:00
|
|
|
if (fieldsep) {
|
2016-02-28 18:47:10 -05:00
|
|
|
if ((s = memmem(a->data, a->len, fieldsep, fieldseplen))) {
|
|
|
|
if (skip_to_next_col) {
|
2016-03-06 18:20:24 -05:00
|
|
|
s += fieldseplen;
|
2016-02-28 18:47:10 -05:00
|
|
|
a->data = s;
|
2016-03-06 18:20:24 -05:00
|
|
|
a->len = a->len - (s - a->data);
|
2016-02-28 18:47:10 -05:00
|
|
|
}
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
} else {
|
2016-02-28 18:47:10 -05:00
|
|
|
a->data += a->len - 1;
|
|
|
|
a->len = 1;
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
}
|
2015-08-02 19:32:21 -04:00
|
|
|
} else {
|
2016-02-28 18:47:10 -05:00
|
|
|
skipblank(a);
|
|
|
|
skipnonblank(a);
|
2015-08-02 19:12:29 -04:00
|
|
|
}
|
2015-08-02 19:32:21 -04:00
|
|
|
}
|
2015-08-02 19:12:29 -04:00
|
|
|
|
2016-05-14 21:56:54 -04:00
|
|
|
static void
|
|
|
|
columns(struct line *line, const struct keydef *kd, struct column *col)
|
2015-08-02 19:32:21 -04:00
|
|
|
{
|
2015-08-03 11:35:01 -04:00
|
|
|
Rune r;
|
2016-03-06 20:04:29 -05:00
|
|
|
struct line start, end;
|
2016-05-14 21:56:54 -04:00
|
|
|
size_t utflen, rlen;
|
2015-08-02 19:32:21 -04:00
|
|
|
int i;
|
2015-08-02 19:12:29 -04:00
|
|
|
|
2016-02-28 18:47:10 -05:00
|
|
|
start.data = line->data;
|
|
|
|
start.len = line->len;
|
|
|
|
for (i = 1; i < kd->start_column; i++)
|
|
|
|
skipcolumn(&start, 1);
|
2015-08-02 19:32:21 -04:00
|
|
|
if (kd->flags & MOD_STARTB)
|
2016-02-28 18:47:10 -05:00
|
|
|
skipblank(&start);
|
|
|
|
for (utflen = 0; start.len > 1 && utflen < kd->start_char - 1;) {
|
|
|
|
rlen = chartorune(&r, start.data);
|
|
|
|
start.data += rlen;
|
|
|
|
start.len -= rlen;
|
2015-08-03 11:35:01 -04:00
|
|
|
utflen++;
|
|
|
|
}
|
2015-08-02 19:12:29 -04:00
|
|
|
|
2016-02-28 18:47:10 -05:00
|
|
|
end.data = line->data;
|
|
|
|
end.len = line->len;
|
2015-08-02 19:32:21 -04:00
|
|
|
if (kd->end_column) {
|
2016-02-28 18:47:10 -05:00
|
|
|
for (i = 1; i < kd->end_column; i++)
|
|
|
|
skipcolumn(&end, 1);
|
2015-08-02 19:32:21 -04:00
|
|
|
if (kd->flags & MOD_ENDB)
|
2016-02-28 18:47:10 -05:00
|
|
|
skipblank(&end);
|
2015-08-03 11:35:01 -04:00
|
|
|
if (kd->end_char) {
|
2016-02-28 18:47:10 -05:00
|
|
|
for (utflen = 0; end.len > 1 && utflen < kd->end_char;) {
|
|
|
|
rlen = chartorune(&r, end.data);
|
|
|
|
end.data += rlen;
|
|
|
|
end.len -= rlen;
|
2015-08-03 11:35:01 -04:00
|
|
|
utflen++;
|
|
|
|
}
|
|
|
|
} else {
|
2016-02-28 18:47:10 -05:00
|
|
|
skipcolumn(&end, 0);
|
2015-08-03 11:35:01 -04:00
|
|
|
}
|
2015-08-02 19:32:21 -04:00
|
|
|
} else {
|
2016-02-28 18:47:10 -05:00
|
|
|
end.data += end.len - 1;
|
|
|
|
end.len = 1;
|
2015-08-02 19:32:21 -04:00
|
|
|
}
|
2016-05-14 21:56:54 -04:00
|
|
|
col->line.len = MAX(0, end.data - start.data);
|
|
|
|
if (!(col->line.data) || col->cap < col->line.len + 1) {
|
|
|
|
free(col->line.data);
|
|
|
|
col->line.data = emalloc(col->line.len + 1);
|
|
|
|
}
|
|
|
|
memcpy(col->line.data, start.data, col->line.len);
|
|
|
|
col->line.data[col->line.len] = '\0';
|
2014-11-23 14:35:56 -05:00
|
|
|
}
|
|
|
|
|
2016-02-15 19:42:25 -05:00
|
|
|
static int
|
2016-03-06 20:04:29 -05:00
|
|
|
skipmodcmp(struct line *a, struct line *b, int flags)
|
2016-02-15 19:42:25 -05:00
|
|
|
{
|
|
|
|
Rune r1, r2;
|
2016-02-28 18:47:10 -05:00
|
|
|
size_t offa = 0, offb = 0;
|
2016-02-15 19:42:25 -05:00
|
|
|
|
|
|
|
do {
|
2016-02-28 18:47:10 -05:00
|
|
|
offa += chartorune(&r1, a->data + offa);
|
|
|
|
offb += chartorune(&r2, b->data + offb);
|
2016-02-15 19:42:25 -05:00
|
|
|
|
|
|
|
if (flags & MOD_D && flags & MOD_I) {
|
2016-02-28 18:47:10 -05:00
|
|
|
while (offa < a->len && ((!isblankrune(r1) &&
|
|
|
|
!isalnumrune(r1)) || (!isprintrune(r1))))
|
|
|
|
offa += chartorune(&r1, a->data + offa);
|
|
|
|
while (offb < b->len && ((!isblankrune(r2) &&
|
|
|
|
!isalnumrune(r2)) || (!isprintrune(r2))))
|
|
|
|
offb += chartorune(&r2, b->data + offb);
|
2016-02-15 19:42:25 -05:00
|
|
|
}
|
|
|
|
else if (flags & MOD_D) {
|
2016-02-28 18:47:10 -05:00
|
|
|
while (offa < a->len && !isblankrune(r1) &&
|
|
|
|
!isalnumrune(r1))
|
|
|
|
offa += chartorune(&r1, a->data + offa);
|
|
|
|
while (offb < b->len && !isblankrune(r2) &&
|
|
|
|
!isalnumrune(r2))
|
|
|
|
offb += chartorune(&r2, b->data + offb);
|
2016-02-15 19:42:25 -05:00
|
|
|
}
|
|
|
|
else if (flags & MOD_I) {
|
2016-02-28 18:47:10 -05:00
|
|
|
while (offa < a->len && !isprintrune(r1))
|
|
|
|
offa += chartorune(&r1, a->data + offa);
|
|
|
|
while (offb < b->len && !isprintrune(r2))
|
|
|
|
offb += chartorune(&r2, b->data + offb);
|
2016-02-15 19:42:25 -05:00
|
|
|
}
|
|
|
|
if (flags & MOD_F) {
|
|
|
|
r1 = toupperrune(r1);
|
|
|
|
r2 = toupperrune(r2);
|
|
|
|
}
|
|
|
|
} while (r1 && r1 == r2);
|
|
|
|
|
|
|
|
return r1 - r2;
|
|
|
|
}
|
|
|
|
|
2014-04-12 11:53:10 -04:00
|
|
|
static int
|
2016-03-06 20:04:29 -05:00
|
|
|
slinecmp(struct line *a, struct line *b)
|
2011-06-02 08:03:34 -04:00
|
|
|
{
|
2014-04-12 11:53:10 -04:00
|
|
|
int res = 0;
|
2019-02-22 17:26:39 -05:00
|
|
|
double x, y;
|
2015-08-02 17:55:54 -04:00
|
|
|
struct keydef *kd;
|
2014-04-12 11:53:10 -04:00
|
|
|
|
2015-08-02 17:55:54 -04:00
|
|
|
TAILQ_FOREACH(kd, &kdhead, entry) {
|
2016-02-28 18:47:10 -05:00
|
|
|
columns(a, kd, &col1);
|
|
|
|
columns(b, kd, &col2);
|
2014-04-12 11:53:10 -04:00
|
|
|
|
2014-05-06 12:47:02 -04:00
|
|
|
/* if -u is given, don't use default key definition
|
|
|
|
* unless it is the only one */
|
2015-08-02 17:55:54 -04:00
|
|
|
if (uflag && kd == TAILQ_LAST(&kdhead, kdhead) &&
|
|
|
|
TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead)) {
|
2014-04-12 11:53:10 -04:00
|
|
|
res = 0;
|
2015-08-02 17:55:54 -04:00
|
|
|
} else if (kd->flags & MOD_N) {
|
2019-02-22 17:26:39 -05:00
|
|
|
x = strtod(col1.line.data, NULL);
|
|
|
|
y = strtod(col2.line.data, NULL);
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
res = (x < y) ? -1 : (x > y);
|
2016-02-15 19:42:25 -05:00
|
|
|
} else if (kd->flags & (MOD_D | MOD_F | MOD_I)) {
|
2016-05-14 21:56:54 -04:00
|
|
|
res = skipmodcmp(&col1.line, &col2.line, kd->flags);
|
2015-01-31 13:12:18 -05:00
|
|
|
} else {
|
2016-05-14 21:56:54 -04:00
|
|
|
res = linecmp(&col1.line, &col2.line);
|
2015-01-31 13:12:18 -05:00
|
|
|
}
|
2014-04-12 11:53:10 -04:00
|
|
|
|
2015-08-02 17:55:54 -04:00
|
|
|
if (kd->flags & MOD_R)
|
2014-05-06 10:07:05 -04:00
|
|
|
res = -res;
|
2015-08-02 17:55:54 -04:00
|
|
|
if (res)
|
|
|
|
break;
|
2014-04-12 11:53:10 -04:00
|
|
|
}
|
2015-03-22 18:37:37 -04:00
|
|
|
|
2014-05-06 10:07:05 -04:00
|
|
|
return res;
|
2014-04-12 11:53:10 -04:00
|
|
|
}
|
|
|
|
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
static int
|
|
|
|
check(FILE *fp, const char *fname)
|
2014-04-12 11:53:10 -04:00
|
|
|
{
|
2016-03-06 20:04:29 -05:00
|
|
|
static struct line prev, cur, tmp;
|
2016-02-28 18:47:10 -05:00
|
|
|
static size_t prevsize, cursize, tmpsize;
|
2016-03-12 14:46:31 -05:00
|
|
|
ssize_t len;
|
2014-04-12 11:53:10 -04:00
|
|
|
|
2016-03-12 14:46:31 -05:00
|
|
|
if (!prev.data) {
|
|
|
|
if ((len = getline(&prev.data, &prevsize, fp)) < 0)
|
|
|
|
eprintf("getline:");
|
|
|
|
prev.len = len;
|
|
|
|
}
|
|
|
|
while ((len = getline(&cur.data, &cursize, fp)) > 0) {
|
|
|
|
cur.len = len;
|
2016-03-06 19:58:31 -05:00
|
|
|
if (uflag > slinecmp(&cur, &prev)) {
|
2016-02-28 18:47:10 -05:00
|
|
|
if (!Cflag) {
|
|
|
|
weprintf("disorder %s: ", fname);
|
|
|
|
fwrite(cur.data, 1, cur.len, stderr);
|
|
|
|
}
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
return 1;
|
2014-05-06 07:35:06 -04:00
|
|
|
}
|
2015-08-02 19:12:29 -04:00
|
|
|
tmp = cur;
|
2016-02-28 18:47:10 -05:00
|
|
|
tmpsize = cursize;
|
2015-08-02 19:12:29 -04:00
|
|
|
cur = prev;
|
2016-02-28 18:47:10 -05:00
|
|
|
cursize = prevsize;
|
2015-08-02 19:12:29 -04:00
|
|
|
prev = tmp;
|
2016-02-28 18:47:10 -05:00
|
|
|
prevsize = tmpsize;
|
2013-12-12 08:08:49 -05:00
|
|
|
}
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
|
|
|
|
return 0;
|
2011-06-02 08:03:34 -04:00
|
|
|
}
|
2013-06-14 14:20:47 -04:00
|
|
|
|
2015-08-02 19:32:21 -04:00
|
|
|
static int
|
|
|
|
parse_flags(char **s, int *flags, int bflag)
|
2014-04-12 11:53:10 -04:00
|
|
|
{
|
2015-08-02 19:32:21 -04:00
|
|
|
while (isalpha((int)**s)) {
|
|
|
|
switch (*((*s)++)) {
|
|
|
|
case 'b':
|
|
|
|
*flags |= bflag;
|
|
|
|
break;
|
2016-02-15 19:42:25 -05:00
|
|
|
case 'd':
|
|
|
|
*flags |= MOD_D;
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
*flags |= MOD_F;
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
*flags |= MOD_I;
|
|
|
|
break;
|
2015-08-02 19:32:21 -04:00
|
|
|
case 'n':
|
|
|
|
*flags |= MOD_N;
|
|
|
|
break;
|
|
|
|
case 'r':
|
|
|
|
*flags |= MOD_R;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
2015-03-22 18:37:37 -04:00
|
|
|
|
2015-08-02 19:32:21 -04:00
|
|
|
return 0;
|
2014-04-12 11:53:10 -04:00
|
|
|
}
|
|
|
|
|
2015-08-02 19:32:21 -04:00
|
|
|
static void
|
|
|
|
addkeydef(char *kdstr, int flags)
|
2014-04-12 11:53:10 -04:00
|
|
|
{
|
2015-08-02 19:32:21 -04:00
|
|
|
struct keydef *kd;
|
2014-05-03 12:34:51 -04:00
|
|
|
|
2015-08-02 19:32:21 -04:00
|
|
|
kd = enmalloc(2, sizeof(*kd));
|
2014-04-18 12:21:31 -04:00
|
|
|
|
2015-08-02 19:32:21 -04:00
|
|
|
/* parse key definition kdstr with format
|
|
|
|
* start_column[.start_char][flags][,end_column[.end_char][flags]]
|
|
|
|
*/
|
|
|
|
kd->start_column = 1;
|
|
|
|
kd->start_char = 1;
|
|
|
|
kd->end_column = 0; /* 0 means end of line */
|
|
|
|
kd->end_char = 0; /* 0 means end of column */
|
|
|
|
kd->flags = flags;
|
2014-04-12 11:53:10 -04:00
|
|
|
|
2015-08-02 19:32:21 -04:00
|
|
|
if ((kd->start_column = strtol(kdstr, &kdstr, 10)) < 1)
|
|
|
|
enprintf(2, "invalid start column in key definition\n");
|
|
|
|
|
|
|
|
if (*kdstr == '.') {
|
|
|
|
if ((kd->start_char = strtol(kdstr + 1, &kdstr, 10)) < 1)
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
enprintf(2, "invalid start character in key "
|
|
|
|
"definition\n");
|
2014-05-03 12:34:51 -04:00
|
|
|
}
|
2015-08-02 19:32:21 -04:00
|
|
|
if (parse_flags(&kdstr, &kd->flags, MOD_STARTB) < 0)
|
|
|
|
enprintf(2, "invalid start flags in key definition\n");
|
|
|
|
|
|
|
|
if (*kdstr == ',') {
|
|
|
|
if ((kd->end_column = strtol(kdstr + 1, &kdstr, 10)) < 0)
|
|
|
|
enprintf(2, "invalid end column in key definition\n");
|
|
|
|
if (*kdstr == '.') {
|
|
|
|
if ((kd->end_char = strtol(kdstr + 1, &kdstr, 10)) < 0)
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
enprintf(2, "invalid end character in key "
|
|
|
|
"definition\n");
|
2015-08-02 19:32:21 -04:00
|
|
|
}
|
|
|
|
if (parse_flags(&kdstr, &kd->flags, MOD_ENDB) < 0)
|
|
|
|
enprintf(2, "invalid end flags in key definition\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*kdstr != '\0')
|
|
|
|
enprintf(2, "invalid key definition\n");
|
|
|
|
|
|
|
|
TAILQ_INSERT_TAIL(&kdhead, kd, entry);
|
2014-04-12 11:53:10 -04:00
|
|
|
}
|
2015-03-07 09:39:39 -05:00
|
|
|
|
|
|
|
static void
|
|
|
|
usage(void)
|
|
|
|
{
|
2016-02-15 19:42:25 -05:00
|
|
|
enprintf(2, "usage: %s [-Cbcdfimnru] [-o outfile] [-t delim] "
|
2015-08-02 19:12:29 -04:00
|
|
|
"[-k def]... [file ...]\n", argv0);
|
2015-03-07 09:39:39 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
2015-03-22 18:37:37 -04:00
|
|
|
FILE *fp, *ofp = stdout;
|
2015-03-07 09:39:39 -05:00
|
|
|
struct linebuf linebuf = EMPTY_LINEBUF;
|
2015-03-22 18:37:37 -04:00
|
|
|
size_t i;
|
Add *fshut() functions to properly flush file streams
This has been a known issue for a long time. Example:
printf "word" > /dev/full
wouldn't report there's not enough space on the device.
This is due to the fact that every libc has internal buffers
for stdout which store fragments of written data until they reach
a certain size or on some callback to flush them all at once to the
kernel.
You can force the libc to flush them with fflush(). In case flushing
fails, you can check the return value of fflush() and report an error.
However, previously, sbase didn't have such checks and without fflush(),
the libc silently flushes the buffers on exit without checking the errors.
No offense, but there's no way for the libc to report errors in the exit-
condition.
GNU coreutils solve this by having onexit-callbacks to handle the flushing
and report issues, but they have obvious deficiencies.
After long discussions on IRC, we came to the conclusion that checking the
return value of every io-function would be a bit too much, and having a
general-purpose fclose-wrapper would be the best way to go.
It turned out that fclose() alone is not enough to detect errors. The right
way to do it is to fflush() + check ferror on the fp and then to a fclose().
This is what fshut does and that's how it's done before each return.
The return value is obviously affected, reporting an error in case a flush
or close failed, but also when reading failed for some reason, the error-
state is caught.
the !!( ... + ...) construction is used to call all functions inside the
brackets and not "terminating" on the first.
We want errors to be reported, but there's no reason to stop flushing buffers
when one other file buffer has issues.
Obviously, functionales come before the flush and ret-logic comes after to
prevent early exits as well without reporting warnings if there are any.
One more advantage of fshut() is that it is even able to report errors
on obscure NFS-setups which the other coreutils are unable to detect,
because they only check the return-value of fflush() and fclose(),
not ferror() as well.
2015-04-04 15:25:17 -04:00
|
|
|
int global_flags = 0, ret = 0;
|
2015-03-22 18:37:37 -04:00
|
|
|
char *outfile = NULL;
|
2015-03-07 09:39:39 -05:00
|
|
|
|
|
|
|
ARGBEGIN {
|
|
|
|
case 'C':
|
|
|
|
Cflag = 1;
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
global_flags |= MOD_STARTB | MOD_ENDB;
|
|
|
|
break;
|
|
|
|
case 'c':
|
|
|
|
cflag = 1;
|
|
|
|
break;
|
2016-02-15 19:42:25 -05:00
|
|
|
case 'd':
|
|
|
|
global_flags |= MOD_D;
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
global_flags |= MOD_F;
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
global_flags |= MOD_I;
|
|
|
|
break;
|
2015-03-07 09:39:39 -05:00
|
|
|
case 'k':
|
|
|
|
addkeydef(EARGF(usage()), global_flags);
|
|
|
|
break;
|
2015-03-22 18:37:37 -04:00
|
|
|
case 'm':
|
2015-05-05 07:41:43 -04:00
|
|
|
/* more or less for free, but for performance-reasons,
|
2015-03-22 18:37:37 -04:00
|
|
|
* we should keep this flag in mind and maybe some later
|
|
|
|
* day implement it properly so we don't run out of memory
|
|
|
|
* while merging large sorted files.
|
|
|
|
*/
|
|
|
|
break;
|
2015-03-07 09:39:39 -05:00
|
|
|
case 'n':
|
|
|
|
global_flags |= MOD_N;
|
|
|
|
break;
|
2015-03-22 18:37:37 -04:00
|
|
|
case 'o':
|
|
|
|
outfile = EARGF(usage());
|
|
|
|
break;
|
2015-03-07 09:39:39 -05:00
|
|
|
case 'r':
|
|
|
|
global_flags |= MOD_R;
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
fieldsep = EARGF(usage());
|
2016-03-07 05:18:30 -05:00
|
|
|
if (!*fieldsep)
|
|
|
|
eprintf("empty delimiter\n");
|
2016-02-28 18:47:10 -05:00
|
|
|
fieldseplen = unescape(fieldsep);
|
2015-03-07 09:39:39 -05:00
|
|
|
break;
|
|
|
|
case 'u':
|
|
|
|
uflag = 1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
usage();
|
2015-11-01 05:16:49 -05:00
|
|
|
} ARGEND
|
2015-03-07 09:39:39 -05:00
|
|
|
|
2015-04-05 14:31:28 -04:00
|
|
|
/* -b shall only apply to custom key definitions */
|
2015-08-02 17:55:54 -04:00
|
|
|
if (TAILQ_EMPTY(&kdhead) && global_flags)
|
|
|
|
addkeydef("1", global_flags & ~(MOD_STARTB | MOD_ENDB));
|
2015-03-07 09:39:39 -05:00
|
|
|
addkeydef("1", global_flags & MOD_R);
|
|
|
|
|
2015-03-22 18:37:37 -04:00
|
|
|
if (!argc) {
|
2015-03-07 09:39:39 -05:00
|
|
|
if (Cflag || cflag) {
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
if (check(stdin, "<stdin>") && !ret)
|
|
|
|
ret = 1;
|
2015-03-07 09:39:39 -05:00
|
|
|
} else {
|
|
|
|
getlines(stdin, &linebuf);
|
|
|
|
}
|
2015-03-22 18:37:37 -04:00
|
|
|
} else for (; *argv; argc--, argv++) {
|
2015-05-19 11:44:15 -04:00
|
|
|
if (!strcmp(*argv, "-")) {
|
2015-05-15 07:28:39 -04:00
|
|
|
*argv = "<stdin>";
|
|
|
|
fp = stdin;
|
|
|
|
} else if (!(fp = fopen(*argv, "r"))) {
|
2015-03-22 18:37:37 -04:00
|
|
|
enprintf(2, "fopen %s:", *argv);
|
2015-03-07 09:39:39 -05:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (Cflag || cflag) {
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
if (check(fp, *argv) && !ret)
|
|
|
|
ret = 1;
|
2015-03-07 09:39:39 -05:00
|
|
|
} else {
|
|
|
|
getlines(fp, &linebuf);
|
|
|
|
}
|
2015-05-15 07:28:39 -04:00
|
|
|
if (fp != stdin && fshut(fp, *argv))
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
ret = 2;
|
2015-03-07 09:39:39 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!Cflag && !cflag) {
|
2015-03-22 18:37:37 -04:00
|
|
|
if (outfile && !(ofp = fopen(outfile, "w")))
|
|
|
|
eprintf("fopen %s:", outfile);
|
|
|
|
|
2016-02-28 18:47:10 -05:00
|
|
|
qsort(linebuf.lines, linebuf.nlines, sizeof(*linebuf.lines),
|
2016-03-06 19:58:31 -05:00
|
|
|
(int (*)(const void *, const void *))slinecmp);
|
2015-03-07 09:39:39 -05:00
|
|
|
|
|
|
|
for (i = 0; i < linebuf.nlines; i++) {
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
if (!uflag || i == 0 ||
|
2016-03-06 19:58:31 -05:00
|
|
|
slinecmp(&linebuf.lines[i], &linebuf.lines[i - 1])) {
|
2016-02-28 18:47:10 -05:00
|
|
|
fwrite(linebuf.lines[i].data, 1,
|
|
|
|
linebuf.lines[i].len, ofp);
|
2015-03-07 09:39:39 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Audit sort(1) and mark it as finished
1) Remove the function prototypes. No need for them, as the
functions are ordered.
2) Add fieldseplen, so the length of the field-separator is not
calculated nearly each time skipcolumn() is called.
3) rename next_col to skip_to_next_col so the purpose is clear,
also reorder the conditional accordingly.
4) Put parentheses around certain ternary expressions.
5) BUGFIX: Don't just exit() in check(), but make it return something,
so we can cleanly fshut() everything.
6) OFF-POSIX: Posix for no apparent reason does not allow more than
one file when the -c or -C flags are given.
This can be problematic when you want to check multiple files.
With the change 5), rewriting check() to return a value, I went
off-posix after discussing this with Dimitris to just allow
arbitrary numbers of files. Obviously, this does not break scripts
and is convenient for everybody who wants to quickly check a big
amount of files.
As soon as 1 file is "unsorted", the return value is 1, as expected.
For convenience reasons, check()'s warning now includes the filename.
7) BUGFIX: Set ret to 2 instead of 1 when the fshut(fp, *argv) fails.
8) BUGFIX: Don't forget to fshut stderr at the end. This would improperly
return 1 in the following case:
$ sort -c unsorted_file 2> /dev/full
9) Other style changes, line length, empty line before return.
2015-08-04 06:45:59 -04:00
|
|
|
if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
|
|
|
|
fshut(stderr, "<stderr>"))
|
2015-05-24 19:33:19 -04:00
|
|
|
ret = 2;
|
|
|
|
|
Add *fshut() functions to properly flush file streams
This has been a known issue for a long time. Example:
printf "word" > /dev/full
wouldn't report there's not enough space on the device.
This is due to the fact that every libc has internal buffers
for stdout which store fragments of written data until they reach
a certain size or on some callback to flush them all at once to the
kernel.
You can force the libc to flush them with fflush(). In case flushing
fails, you can check the return value of fflush() and report an error.
However, previously, sbase didn't have such checks and without fflush(),
the libc silently flushes the buffers on exit without checking the errors.
No offense, but there's no way for the libc to report errors in the exit-
condition.
GNU coreutils solve this by having onexit-callbacks to handle the flushing
and report issues, but they have obvious deficiencies.
After long discussions on IRC, we came to the conclusion that checking the
return value of every io-function would be a bit too much, and having a
general-purpose fclose-wrapper would be the best way to go.
It turned out that fclose() alone is not enough to detect errors. The right
way to do it is to fflush() + check ferror on the fp and then to a fclose().
This is what fshut does and that's how it's done before each return.
The return value is obviously affected, reporting an error in case a flush
or close failed, but also when reading failed for some reason, the error-
state is caught.
the !!( ... + ...) construction is used to call all functions inside the
brackets and not "terminating" on the first.
We want errors to be reported, but there's no reason to stop flushing buffers
when one other file buffer has issues.
Obviously, functionales come before the flush and ret-logic comes after to
prevent early exits as well without reporting warnings if there are any.
One more advantage of fshut() is that it is even able to report errors
on obscure NFS-setups which the other coreutils are unable to detect,
because they only check the return-value of fflush() and fclose(),
not ferror() as well.
2015-04-04 15:25:17 -04:00
|
|
|
return ret;
|
2015-03-07 09:39:39 -05:00
|
|
|
}
|