Add -m and -o flags to sort(1)
Sort comes pretty much automatically, as no script relies on the undefined behaviour of the input _not_ being sorted, we might as well sort the sorted input already. The only downside is memory usage, which can be an issue for large files. The o-flag was trivial to implement.
This commit is contained in:
parent
71adaed519
commit
49e27c1b0c
2
README
2
README
@ -66,7 +66,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
|
|||||||
=*| sha256sum non-posix none
|
=*| sha256sum non-posix none
|
||||||
=*| sha512sum non-posix none
|
=*| sha512sum non-posix none
|
||||||
=*| sleep yes none
|
=*| sleep yes none
|
||||||
sort no -m, -o, -d, -f, -i
|
sort no -d, -f, -i
|
||||||
=*| split yes none
|
=*| split yes none
|
||||||
=*| sponge non-posix none
|
=*| sponge non-posix none
|
||||||
#*| strings yes none
|
#*| strings yes none
|
||||||
|
30
sort.1
30
sort.1
@ -1,4 +1,4 @@
|
|||||||
.Dd January 30, 2015
|
.Dd March 22, 2015
|
||||||
.Dt SORT 1
|
.Dt SORT 1
|
||||||
.Os sbase
|
.Os sbase
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
@ -6,17 +6,19 @@
|
|||||||
.Nd sort lines
|
.Nd sort lines
|
||||||
.Sh SYNOPSIS
|
.Sh SYNOPSIS
|
||||||
.Nm
|
.Nm
|
||||||
.Op Fl bnru
|
.Op Fl Cbcmnru
|
||||||
|
.Op Fl o Ar outfile
|
||||||
.Op Fl t Ar delim
|
.Op Fl t Ar delim
|
||||||
.Op Fl k Ar key ...
|
.Op Fl k Ar key ...
|
||||||
.Op Ar file ...
|
.Op Ar file ...
|
||||||
.Sh DESCRIPTION
|
.Sh DESCRIPTION
|
||||||
.Nm
|
.Nm
|
||||||
writes the sorted concatenation of the given
|
writes the sorted concatenation of each
|
||||||
.Ar files
|
|
||||||
to stdout. If no
|
|
||||||
.Ar file
|
.Ar file
|
||||||
is given,
|
to stdout.
|
||||||
|
If no
|
||||||
|
.Ar file
|
||||||
|
is given
|
||||||
.Nm
|
.Nm
|
||||||
reads from stdin.
|
reads from stdin.
|
||||||
.Sh OPTIONS
|
.Sh OPTIONS
|
||||||
@ -31,10 +33,10 @@ Skip leading whitespace of columns when sorting.
|
|||||||
.It Fl c
|
.It Fl c
|
||||||
The same as
|
The same as
|
||||||
.Fl C
|
.Fl C
|
||||||
except that when disorder is detected, a message is printed to stderr
|
except that when disorder is detected, a message is written to stderr
|
||||||
indicating the location of the disorder.
|
indicating the location of the disorder.
|
||||||
.It Fl k Ar key
|
.It Fl k Ar key
|
||||||
Specifies a key definition of the form
|
Specify a key definition of the form
|
||||||
.Sm off
|
.Sm off
|
||||||
.Sy S
|
.Sy S
|
||||||
.No [.
|
.No [.
|
||||||
@ -67,12 +69,20 @@ can be used to specify options
|
|||||||
that only apply to this key definition.
|
that only apply to this key definition.
|
||||||
.Sy b
|
.Sy b
|
||||||
is special in that it only applies to the column that it was specified after.
|
is special in that it only applies to the column that it was specified after.
|
||||||
|
.It Fl m
|
||||||
|
Assume sorted input, merge only.
|
||||||
.It Fl n
|
.It Fl n
|
||||||
Perform a numeric sort.
|
Perform a numeric sort.
|
||||||
|
.It Fl o Ar outfile
|
||||||
|
Write output to
|
||||||
|
.Ar outfile
|
||||||
|
rather than stdout.
|
||||||
.It Fl r
|
.It Fl r
|
||||||
Reverses the sort.
|
Reverses the sort.
|
||||||
.It Fl t Ar delim
|
.It Fl t Ar delim
|
||||||
Specifies the field delimiter.
|
Set
|
||||||
|
.Ar delim
|
||||||
|
as the field delimiter.
|
||||||
.It Fl u
|
.It Fl u
|
||||||
Prints equal lines only once.
|
Print equal lines only once.
|
||||||
.El
|
.El
|
||||||
|
39
sort.c
39
sort.c
@ -107,6 +107,7 @@ linecmp(const char **a, const char **b)
|
|||||||
free(s1);
|
free(s1);
|
||||||
free(s2);
|
free(s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,6 +128,8 @@ parse_flags(char **s, int *flags, int bflag)
|
|||||||
default:
|
default:
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -163,9 +166,8 @@ parse_keydef(struct keydef *kd, char *s, int flags)
|
|||||||
if (parse_flags(&rest, &kd->flags, MOD_ENDB) < 0)
|
if (parse_flags(&rest, &kd->flags, MOD_ENDB) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (*rest != '\0')
|
|
||||||
return -1;
|
return -(*rest);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *
|
static char *
|
||||||
@ -173,6 +175,7 @@ skipblank(char *s)
|
|||||||
{
|
{
|
||||||
while (*s && isblank(*s))
|
while (*s && isblank(*s))
|
||||||
s++;
|
s++;
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -224,16 +227,17 @@ columns(char *line, const struct keydef *kd)
|
|||||||
static void
|
static void
|
||||||
usage(void)
|
usage(void)
|
||||||
{
|
{
|
||||||
enprintf(2, "usage: %s [-Cbcnru] [-t delim] [-k def]... [file...]\n", argv0);
|
enprintf(2, "usage: %s [-Cbcmnru] [-o outfile] [-t delim] [-k def]... [file ...]\n", argv0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char *argv[])
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
size_t i;
|
FILE *fp, *ofp = stdout;
|
||||||
FILE *fp;
|
|
||||||
struct linebuf linebuf = EMPTY_LINEBUF;
|
struct linebuf linebuf = EMPTY_LINEBUF;
|
||||||
|
size_t i;
|
||||||
int global_flags = 0;
|
int global_flags = 0;
|
||||||
|
char *outfile = NULL;
|
||||||
|
|
||||||
ARGBEGIN {
|
ARGBEGIN {
|
||||||
case 'C':
|
case 'C':
|
||||||
@ -248,9 +252,19 @@ main(int argc, char *argv[])
|
|||||||
case 'k':
|
case 'k':
|
||||||
addkeydef(EARGF(usage()), global_flags);
|
addkeydef(EARGF(usage()), global_flags);
|
||||||
break;
|
break;
|
||||||
|
case 'm':
|
||||||
|
/* more or less for free, but for perfomance-reasons,
|
||||||
|
* we should keep this flag in mind and maybe some later
|
||||||
|
* day implement it properly so we don't run out of memory
|
||||||
|
* while merging large sorted files.
|
||||||
|
*/
|
||||||
|
break;
|
||||||
case 'n':
|
case 'n':
|
||||||
global_flags |= MOD_N;
|
global_flags |= MOD_N;
|
||||||
break;
|
break;
|
||||||
|
case 'o':
|
||||||
|
outfile = EARGF(usage());
|
||||||
|
break;
|
||||||
case 'r':
|
case 'r':
|
||||||
global_flags |= MOD_R;
|
global_flags |= MOD_R;
|
||||||
break;
|
break;
|
||||||
@ -270,15 +284,15 @@ main(int argc, char *argv[])
|
|||||||
addkeydef("1", global_flags);
|
addkeydef("1", global_flags);
|
||||||
addkeydef("1", global_flags & MOD_R);
|
addkeydef("1", global_flags & MOD_R);
|
||||||
|
|
||||||
if (argc == 0) {
|
if (!argc) {
|
||||||
if (Cflag || cflag) {
|
if (Cflag || cflag) {
|
||||||
check(stdin);
|
check(stdin);
|
||||||
} else {
|
} else {
|
||||||
getlines(stdin, &linebuf);
|
getlines(stdin, &linebuf);
|
||||||
}
|
}
|
||||||
} else for (; argc > 0; argc--, argv++) {
|
} else for (; *argv; argc--, argv++) {
|
||||||
if (!(fp = fopen(argv[0], "r"))) {
|
if (!(fp = fopen(*argv, "r"))) {
|
||||||
enprintf(2, "fopen %s:", argv[0]);
|
enprintf(2, "fopen %s:", *argv);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (Cflag || cflag) {
|
if (Cflag || cflag) {
|
||||||
@ -290,13 +304,16 @@ main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!Cflag && !cflag) {
|
if (!Cflag && !cflag) {
|
||||||
|
if (outfile && !(ofp = fopen(outfile, "w")))
|
||||||
|
eprintf("fopen %s:", outfile);
|
||||||
|
|
||||||
qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines,
|
qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines,
|
||||||
(int (*)(const void *, const void *))linecmp);
|
(int (*)(const void *, const void *))linecmp);
|
||||||
|
|
||||||
for (i = 0; i < linebuf.nlines; i++) {
|
for (i = 0; i < linebuf.nlines; i++) {
|
||||||
if (!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i],
|
if (!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i],
|
||||||
(const char **)&linebuf.lines[i-1])) {
|
(const char **)&linebuf.lines[i-1])) {
|
||||||
fputs(linebuf.lines[i], stdout);
|
fputs(linebuf.lines[i], ofp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user