Add cut(1)

This commit is contained in:
Truls Becken 2013-10-08 20:39:08 +01:00 committed by sin
parent 9eb15ff232
commit 576a5ce55e
4 changed files with 226 additions and 0 deletions

View File

@ -14,6 +14,7 @@ MIT/X Consortium License
© 2012 Robert Ransom <rransom.8774@gmail.com>
© 2013 Jakob Kramer <jakob.kramer@gmx.de>
© 2013 Anselm R Garbe <anselm@garbe.us>
© 2013 Truls Becken <truls.becken@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),

View File

@ -36,6 +36,7 @@ SRC = \
cmp.c \
comm.c \
cp.c \
cut.c \
date.c \
dirname.c \
echo.c \

60
cut.1 Normal file
View File

@ -0,0 +1,60 @@
.TH CUT 1 sbase\-VERSION
.SH NAME
cut \- extract columns of data
.SH SYNOPSIS
.B cut \-b
.I list
.RB [ \-n ]
.RI [ file ...]
.br
.B cut \-c
.I list
.RI [ file ...]
.br
.B cut \-f
.I list
.RB [ \-d
.IR delim ]
.RB [ \-s ]
.RI [ file ...]
.SH DESCRIPTION
.B cut
out bytes, characters, or delimited fields from each line of the given
files and write to stdout. With no file, or when file is `-', cut reads
from stdin.
.P
.I list
is a comma or space separated list of numbers and ranges where numbering
starts from 1. Ranges are on the form `N-M'. If N or M is missing, the
beginning or end of line is assumed. Numbers and ranges may be repeated,
overlapping, and in any order. Selected input is written in the same
order that it is read, and is written exactly once.
.SH OPTIONS
.TP
.BI \-b \ list
The
.I list
specifies byte positions.
.TP
.BI \-c \ list
The
.I list
specifies character positions.
.TP
.BI \-d \ delim
Use first byte of
.I delim
as field delimiter, instead of tab.
.TP
.BI \-f \ list
The
.I list
specifies field numbers. Lines not containing field delimiters are
passed through untouched.
.TP
.B \-n
Do not split characters. A character is output if its last byte is
selected.
.TP
.B \-s
Suppress lines not containing field delimiters.

164
cut.c Normal file
View File

@ -0,0 +1,164 @@
/* See LICENSE file for copyright and license details. */
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "text.h"
#include "util.h"
static void
usage(void)
{
eprintf("usage: cut -b list [-n] [file...]\n"
" cut -c list [file...]\n"
" cut -f list [-d delim] [-s] [file...]\n");
}
typedef struct Range {
size_t min, max;
struct Range *next;
} Range;
static Range *list = NULL;
static char mode = 0;
static char delim = '\t';
static bool nflag = false;
static bool sflag = false;
static void
insert(Range *r)
{
Range *l, *p, *t;
for(p = NULL, l = list; l; p = l, l = l->next) {
if(r->max && r->max+1 < l->min) {
r->next = l;
break;
} else if(!l->max || r->min < l->max+2) {
l->min = MIN(r->min, l->min);
for(p = l, t = l->next; t; p = t, t = t->next)
if(r->max && r->max+1 < t->min) break;
l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0;
l->next = t;
return;
}
}
if(p) p->next = r; else list = r;
}
static void
parselist(char *str)
{
char *s;
size_t n = 1;
Range *r;
for(s = str; *s; s++) {
if(*s == ' ') *s = ',';
if(*s == ',') n++;
}
if(!(r = malloc(n * sizeof(Range))))
eprintf("malloc:");
for(s = str; n; n--, s++) {
r->min = (*s == '-') ? 1 : strtoul(s, &s, 10);
r->max = (*s == '-') ? strtoul(++s, &s, 10) : r->min;
r->next = NULL;
if(!r->min || (r->max && r->max < r->min) || (*s && *s != ','))
eprintf("cut: bad list value\n");
insert(r++);
}
}
static size_t
seek(const char *s, size_t pos, size_t *prev, size_t count)
{
const char *t;
size_t n = pos - *prev;
if(mode == 'b') {
if((t = memchr(s, 0, n)))
return t - s;
if(nflag)
while(n && !UTF8_POINT(s[n])) n--;
*prev += n;
return n;
} else if(mode == 'c') {
for(n++, t = s; *t; t++)
if(UTF8_POINT(*t) && !--n) break;
} else {
for(t = (count < 2) ? s : s+1; n && *t; t++)
if(*t == delim && !--n && count) break;
}
*prev = pos;
return t - s;
}
static void
cut(FILE *fp)
{
static char *buf = NULL;
static size_t size = 0;
char *s;
size_t i, n, p;
Range *r;
while(afgets(&buf, &size, fp)) {
if(buf[i = strlen(buf)-1] == '\n')
buf[i] = 0;
if(mode == 'f' && !strchr(buf, delim)) {
if(!sflag)
puts(buf);
continue;
}
for(i = 0, p = 1, s = buf, r = list; r; r = r->next, s += n) {
s += seek(s, r->min, &p, i++);
if(!*s) break;
if(!r->max) {
fputs(s, stdout);
break;
}
n = seek(s, r->max + 1, &p, i++);
if(fwrite(s, 1, n, stdout) != n)
eprintf("write error:");
}
putchar('\n');
}
}
int
main(int argc, char *argv[])
{
FILE *fp;
ARGBEGIN {
case 'b':
case 'c':
case 'f':
mode = ARGC();
parselist(ARGF());
break;
case 'd':
delim = *ARGF();
break;
case 'n':
nflag = true;
break;
case 's':
sflag = true;
break;
default:
usage();
} ARGEND;
if(!mode)
usage();
if(!argc)
cut(stdin);
else for(; argc--; argv++) {
if(!(fp = strcmp(*argv, "-") ? fopen(*argv, "r") : stdin))
eprintf("fopen %s:", *argv);
cut(fp);
fclose(fp);
}
return EXIT_SUCCESS;
}