Add -m and -o flags to sort(1)
Sort comes pretty much automatically, as no script relies on the undefined behaviour of the input _not_ being sorted, we might as well sort the sorted input already. The only downside is memory usage, which can be an issue for large files. The o-flag was trivial to implement.
This commit is contained in:
parent
71adaed519
commit
49e27c1b0c
2
README
2
README
@ -66,7 +66,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
|
||||
=*| sha256sum non-posix none
|
||||
=*| sha512sum non-posix none
|
||||
=*| sleep yes none
|
||||
sort no -m, -o, -d, -f, -i
|
||||
sort no -d, -f, -i
|
||||
=*| split yes none
|
||||
=*| sponge non-posix none
|
||||
#*| strings yes none
|
||||
|
30
sort.1
30
sort.1
@ -1,4 +1,4 @@
|
||||
.Dd January 30, 2015
|
||||
.Dd March 22, 2015
|
||||
.Dt SORT 1
|
||||
.Os sbase
|
||||
.Sh NAME
|
||||
@ -6,17 +6,19 @@
|
||||
.Nd sort lines
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl bnru
|
||||
.Op Fl Cbcmnru
|
||||
.Op Fl o Ar outfile
|
||||
.Op Fl t Ar delim
|
||||
.Op Fl k Ar key ...
|
||||
.Op Ar file ...
|
||||
.Sh DESCRIPTION
|
||||
.Nm
|
||||
writes the sorted concatenation of the given
|
||||
.Ar files
|
||||
to stdout. If no
|
||||
writes the sorted concatenation of each
|
||||
.Ar file
|
||||
is given,
|
||||
to stdout.
|
||||
If no
|
||||
.Ar file
|
||||
is given
|
||||
.Nm
|
||||
reads from stdin.
|
||||
.Sh OPTIONS
|
||||
@ -31,10 +33,10 @@ Skip leading whitespace of columns when sorting.
|
||||
.It Fl c
|
||||
The same as
|
||||
.Fl C
|
||||
except that when disorder is detected, a message is printed to stderr
|
||||
except that when disorder is detected, a message is written to stderr
|
||||
indicating the location of the disorder.
|
||||
.It Fl k Ar key
|
||||
Specifies a key definition of the form
|
||||
Specify a key definition of the form
|
||||
.Sm off
|
||||
.Sy S
|
||||
.No [.
|
||||
@ -67,12 +69,20 @@ can be used to specify options
|
||||
that only apply to this key definition.
|
||||
.Sy b
|
||||
is special in that it only applies to the column that it was specified after.
|
||||
.It Fl m
|
||||
Assume sorted input, merge only.
|
||||
.It Fl n
|
||||
Perform a numeric sort.
|
||||
.It Fl o Ar outfile
|
||||
Write output to
|
||||
.Ar outfile
|
||||
rather than stdout.
|
||||
.It Fl r
|
||||
Reverses the sort.
|
||||
.It Fl t Ar delim
|
||||
Specifies the field delimiter.
|
||||
Set
|
||||
.Ar delim
|
||||
as the field delimiter.
|
||||
.It Fl u
|
||||
Prints equal lines only once.
|
||||
Print equal lines only once.
|
||||
.El
|
||||
|
39
sort.c
39
sort.c
@ -107,6 +107,7 @@ linecmp(const char **a, const char **b)
|
||||
free(s1);
|
||||
free(s2);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -127,6 +128,8 @@ parse_flags(char **s, int *flags, int bflag)
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -163,9 +166,8 @@ parse_keydef(struct keydef *kd, char *s, int flags)
|
||||
if (parse_flags(&rest, &kd->flags, MOD_ENDB) < 0)
|
||||
return -1;
|
||||
}
|
||||
if (*rest != '\0')
|
||||
return -1;
|
||||
return 0;
|
||||
|
||||
return -(*rest);
|
||||
}
|
||||
|
||||
static char *
|
||||
@ -173,6 +175,7 @@ skipblank(char *s)
|
||||
{
|
||||
while (*s && isblank(*s))
|
||||
s++;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -224,16 +227,17 @@ columns(char *line, const struct keydef *kd)
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
enprintf(2, "usage: %s [-Cbcnru] [-t delim] [-k def]... [file...]\n", argv0);
|
||||
enprintf(2, "usage: %s [-Cbcmnru] [-o outfile] [-t delim] [-k def]... [file ...]\n", argv0);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
size_t i;
|
||||
FILE *fp;
|
||||
FILE *fp, *ofp = stdout;
|
||||
struct linebuf linebuf = EMPTY_LINEBUF;
|
||||
size_t i;
|
||||
int global_flags = 0;
|
||||
char *outfile = NULL;
|
||||
|
||||
ARGBEGIN {
|
||||
case 'C':
|
||||
@ -248,9 +252,19 @@ main(int argc, char *argv[])
|
||||
case 'k':
|
||||
addkeydef(EARGF(usage()), global_flags);
|
||||
break;
|
||||
case 'm':
|
||||
/* more or less for free, but for perfomance-reasons,
|
||||
* we should keep this flag in mind and maybe some later
|
||||
* day implement it properly so we don't run out of memory
|
||||
* while merging large sorted files.
|
||||
*/
|
||||
break;
|
||||
case 'n':
|
||||
global_flags |= MOD_N;
|
||||
break;
|
||||
case 'o':
|
||||
outfile = EARGF(usage());
|
||||
break;
|
||||
case 'r':
|
||||
global_flags |= MOD_R;
|
||||
break;
|
||||
@ -270,15 +284,15 @@ main(int argc, char *argv[])
|
||||
addkeydef("1", global_flags);
|
||||
addkeydef("1", global_flags & MOD_R);
|
||||
|
||||
if (argc == 0) {
|
||||
if (!argc) {
|
||||
if (Cflag || cflag) {
|
||||
check(stdin);
|
||||
} else {
|
||||
getlines(stdin, &linebuf);
|
||||
}
|
||||
} else for (; argc > 0; argc--, argv++) {
|
||||
if (!(fp = fopen(argv[0], "r"))) {
|
||||
enprintf(2, "fopen %s:", argv[0]);
|
||||
} else for (; *argv; argc--, argv++) {
|
||||
if (!(fp = fopen(*argv, "r"))) {
|
||||
enprintf(2, "fopen %s:", *argv);
|
||||
continue;
|
||||
}
|
||||
if (Cflag || cflag) {
|
||||
@ -290,13 +304,16 @@ main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
if (!Cflag && !cflag) {
|
||||
if (outfile && !(ofp = fopen(outfile, "w")))
|
||||
eprintf("fopen %s:", outfile);
|
||||
|
||||
qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines,
|
||||
(int (*)(const void *, const void *))linecmp);
|
||||
|
||||
for (i = 0; i < linebuf.nlines; i++) {
|
||||
if (!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i],
|
||||
(const char **)&linebuf.lines[i-1])) {
|
||||
fputs(linebuf.lines[i], stdout);
|
||||
fputs(linebuf.lines[i], ofp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user