Add -m and -o flags to sort(1)

Sort comes pretty much automatically, as no script relies on the
undefined behaviour of the input _not_ being sorted, we might as well
sort the sorted input already.
The only downside is memory usage, which can be an issue for large
files.
The o-flag was trivial to implement.
This commit is contained in:
FRIGN 2015-03-22 23:37:37 +01:00
parent 71adaed519
commit 49e27c1b0c
3 changed files with 49 additions and 22 deletions

2
README
View File

@ -66,7 +66,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
=*| sha256sum non-posix none =*| sha256sum non-posix none
=*| sha512sum non-posix none =*| sha512sum non-posix none
=*| sleep yes none =*| sleep yes none
sort no -m, -o, -d, -f, -i sort no -d, -f, -i
=*| split yes none =*| split yes none
=*| sponge non-posix none =*| sponge non-posix none
#*| strings yes none #*| strings yes none

30
sort.1
View File

@ -1,4 +1,4 @@
.Dd January 30, 2015 .Dd March 22, 2015
.Dt SORT 1 .Dt SORT 1
.Os sbase .Os sbase
.Sh NAME .Sh NAME
@ -6,17 +6,19 @@
.Nd sort lines .Nd sort lines
.Sh SYNOPSIS .Sh SYNOPSIS
.Nm .Nm
.Op Fl bnru .Op Fl Cbcmnru
.Op Fl o Ar outfile
.Op Fl t Ar delim .Op Fl t Ar delim
.Op Fl k Ar key ... .Op Fl k Ar key ...
.Op Ar file ... .Op Ar file ...
.Sh DESCRIPTION .Sh DESCRIPTION
.Nm .Nm
writes the sorted concatenation of the given writes the sorted concatenation of each
.Ar files
to stdout. If no
.Ar file .Ar file
is given, to stdout.
If no
.Ar file
is given
.Nm .Nm
reads from stdin. reads from stdin.
.Sh OPTIONS .Sh OPTIONS
@ -31,10 +33,10 @@ Skip leading whitespace of columns when sorting.
.It Fl c .It Fl c
The same as The same as
.Fl C .Fl C
except that when disorder is detected, a message is printed to stderr except that when disorder is detected, a message is written to stderr
indicating the location of the disorder. indicating the location of the disorder.
.It Fl k Ar key .It Fl k Ar key
Specifies a key definition of the form Specify a key definition of the form
.Sm off .Sm off
.Sy S .Sy S
.No [. .No [.
@ -67,12 +69,20 @@ can be used to specify options
that only apply to this key definition. that only apply to this key definition.
.Sy b .Sy b
is special in that it only applies to the column that it was specified after. is special in that it only applies to the column that it was specified after.
.It Fl m
Assume sorted input, merge only.
.It Fl n .It Fl n
Perform a numeric sort. Perform a numeric sort.
.It Fl o Ar outfile
Write output to
.Ar outfile
rather than stdout.
.It Fl r .It Fl r
Reverses the sort. Reverses the sort.
.It Fl t Ar delim .It Fl t Ar delim
Specifies the field delimiter. Set
.Ar delim
as the field delimiter.
.It Fl u .It Fl u
Prints equal lines only once. Print equal lines only once.
.El .El

39
sort.c
View File

@ -107,6 +107,7 @@ linecmp(const char **a, const char **b)
free(s1); free(s1);
free(s2); free(s2);
} }
return res; return res;
} }
@ -127,6 +128,8 @@ parse_flags(char **s, int *flags, int bflag)
default: default:
return -1; return -1;
} }
}
return 0; return 0;
} }
@ -163,9 +166,8 @@ parse_keydef(struct keydef *kd, char *s, int flags)
if (parse_flags(&rest, &kd->flags, MOD_ENDB) < 0) if (parse_flags(&rest, &kd->flags, MOD_ENDB) < 0)
return -1; return -1;
} }
if (*rest != '\0')
return -1; return -(*rest);
return 0;
} }
static char * static char *
@ -173,6 +175,7 @@ skipblank(char *s)
{ {
while (*s && isblank(*s)) while (*s && isblank(*s))
s++; s++;
return s; return s;
} }
@ -224,16 +227,17 @@ columns(char *line, const struct keydef *kd)
static void static void
usage(void) usage(void)
{ {
enprintf(2, "usage: %s [-Cbcnru] [-t delim] [-k def]... [file...]\n", argv0); enprintf(2, "usage: %s [-Cbcmnru] [-o outfile] [-t delim] [-k def]... [file ...]\n", argv0);
} }
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
size_t i; FILE *fp, *ofp = stdout;
FILE *fp;
struct linebuf linebuf = EMPTY_LINEBUF; struct linebuf linebuf = EMPTY_LINEBUF;
size_t i;
int global_flags = 0; int global_flags = 0;
char *outfile = NULL;
ARGBEGIN { ARGBEGIN {
case 'C': case 'C':
@ -248,9 +252,19 @@ main(int argc, char *argv[])
case 'k': case 'k':
addkeydef(EARGF(usage()), global_flags); addkeydef(EARGF(usage()), global_flags);
break; break;
case 'm':
/* more or less for free, but for perfomance-reasons,
* we should keep this flag in mind and maybe some later
* day implement it properly so we don't run out of memory
* while merging large sorted files.
*/
break;
case 'n': case 'n':
global_flags |= MOD_N; global_flags |= MOD_N;
break; break;
case 'o':
outfile = EARGF(usage());
break;
case 'r': case 'r':
global_flags |= MOD_R; global_flags |= MOD_R;
break; break;
@ -270,15 +284,15 @@ main(int argc, char *argv[])
addkeydef("1", global_flags); addkeydef("1", global_flags);
addkeydef("1", global_flags & MOD_R); addkeydef("1", global_flags & MOD_R);
if (argc == 0) { if (!argc) {
if (Cflag || cflag) { if (Cflag || cflag) {
check(stdin); check(stdin);
} else { } else {
getlines(stdin, &linebuf); getlines(stdin, &linebuf);
} }
} else for (; argc > 0; argc--, argv++) { } else for (; *argv; argc--, argv++) {
if (!(fp = fopen(argv[0], "r"))) { if (!(fp = fopen(*argv, "r"))) {
enprintf(2, "fopen %s:", argv[0]); enprintf(2, "fopen %s:", *argv);
continue; continue;
} }
if (Cflag || cflag) { if (Cflag || cflag) {
@ -290,13 +304,16 @@ main(int argc, char *argv[])
} }
if (!Cflag && !cflag) { if (!Cflag && !cflag) {
if (outfile && !(ofp = fopen(outfile, "w")))
eprintf("fopen %s:", outfile);
qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines, qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines,
(int (*)(const void *, const void *))linecmp); (int (*)(const void *, const void *))linecmp);
for (i = 0; i < linebuf.nlines; i++) { for (i = 0; i < linebuf.nlines; i++) {
if (!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i], if (!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i],
(const char **)&linebuf.lines[i-1])) { (const char **)&linebuf.lines[i-1])) {
fputs(linebuf.lines[i], stdout); fputs(linebuf.lines[i], ofp);
} }
} }
} }