Added POSIX paste(1) command implementation.

This commit adds a simple implementation of the POSIX
standard paste(1) command, and its man page.
TODO and Makefile have been updated accordingly.

Signed-off-by: Christoph Lohmann <20h@r-36.net>
This commit is contained in:
Lorenzo Cogotti 2013-04-29 18:54:36 +02:00 committed by Christoph Lohmann
parent e509d56bae
commit c451683c3f
4 changed files with 354 additions and 2 deletions

View File

@ -43,6 +43,7 @@ SRC = \
mv.c \
nl.c \
nohup.c \
paste.c \
pwd.c \
rm.c \
sleep.c \

2
TODO
View File

@ -28,8 +28,6 @@ md5sum [-c] [file...]
nice [-n N] [command]
paste [-s] [-d list] [file...]
printenv [variable...]
printf [format] [data...]

122
paste.1 Normal file
View File

@ -0,0 +1,122 @@
.TH PASTE 1 paste-VERSION "Apr 2013"
.SH NAME
paste \- merge corresponding or subsequent lines of files
.SH "SYNOPSIS"
.PP
.B paste
[
.B \-s
]
[
.B \-d
.I list
]
.I file...
.SH DESCRIPTION
The
.B paste
utility concatenates the corresponding lines of the given input files,
and writes the resulting lines to standard output. The default operation
of
.B paste
concatenates the corresponding lines of the input files.
The newline of every line except the line from the last input file is
replaced with a tab.
If an end-of-file condition is detected on one or more input files,
but not all input files,
.B paste
behaves as though empty lines were read from the files on which
end-of-file was detected, unless the
.B \-s
option is specified.
.SH OPTIONS
.TP
.B \-d list
unless a backslash character appears in
.I list
each character is an element specifying a delimiter.
If a backslash character appears, that and one or more characters
following it are an element specifying a delimiter.
These elements specify one or more characters to use,
instead of the default tab, to replace the newline of the input
lines. The elements in
.I list
are used circularly; that is, when the
.I list
is exhausted the first element from the list is reused.
When the
.B \-s
option is specified, the last newline in a file is not be modified.
The delimiter is reset to the first element of list after each file
operand is processed.
If a backslash character appears in list, it and the character following
it represents the following delimiters:
.RS
.TP
.I \en
newline character
.TP
.I \et
tab character
.TP
.I \e\e
backslash character
.TP
.I \e0
empty string (not a null character)
.TP
If Any other characters follow the backslash, results are unspecified.
.RE
.TP
.B \-s
concatenate all of the lines of each separate input file in command line
order. The newline of every line except the last line in each input file
are replaced with the tab, unless otherwise specified by the
.B \-d
option.
.PP
If '\-' is specified for one or more input files, the standard input is
used; standard input is read one line at a time, circularly for each
instance of '\-'.
.SH EXIT VALUES
The
.B paste
utility exits 0 on successful completion, and >0 if an error
occurs.
.SH ENVIRONMENT VARIABLES
The following environment variables affect the execution:
.TP
.B LANG
provide a default value for the internationalization variables
that are unset or null.
.TP
.B LC_ALL
if set to a non-empty string value, override the values of all the
other internationalization variables.
.TP
.B LC_CTYPE
determine the locale for the interpretation of sequences of bytes
of text data as characters (for example, single-byte as opposed to
multi-byte characters in arguments and input files).
.TP
.B LC_MESSAGES
determine the locale that should be used to affect the format and
contents of diagnostic messages written to standard error.
.SH CONFORMING TO
The
.B paste
utility is IEEE Std 1003.2 (POSIX.2) compatible.
.SH EXAMPLES
.TP
.I "ls | paste - - - -"
.PP
Write out a directory in four columns.
.TP
.I "paste -s -d '\et\en' file"
.PP
Combine pairs of lines from a file into single lines.
.SH AUTHOR
Written by Lorenzo Cogotti.
.SH SEE ALSO
.BR cut(1)
.BR lam(1)

231
paste.c Normal file
View File

@ -0,0 +1,231 @@
/* See LICENSE file for copyright and license details. */
#include <locale.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include "util.h"
typedef struct {
FILE *fp;
const char *name;
} Fdescr;
static void eusage(void);
static size_t unescape(wchar_t *);
static wint_t in(Fdescr *);
static void out(wchar_t);
static void sequential(Fdescr *, int, const wchar_t *, size_t);
static void parallel(Fdescr *, int, const wchar_t *, size_t);
int
main(int argc, char **argv) {
const char *adelim = NULL;
bool seq = false;
wchar_t *delim;
size_t len;
Fdescr *dsc;
int i, c;
setlocale(LC_CTYPE, "");
while((c = getopt(argc, argv, "sd:")) != -1)
switch(c) {
case 's':
seq = true;
break;
case 'd':
adelim = optarg;
break;
case '?':
default:
eusage();
break;
}
argc -= optind;
argv += optind;
if(argc == 0)
eusage();
/* populate delimeters */
if(!adelim)
adelim = "\t";
len = mbstowcs(NULL, adelim, 0);
if(len == (size_t)-1)
eprintf("invalid delimiter\n");
delim = malloc((len + 1) * sizeof(*delim));
if(!delim)
eprintf("out of memory\n");
mbstowcs(delim, adelim, len);
len = unescape(delim);
if(len == 0)
eprintf("no delimiters specified\n");
/* populate file list */
dsc = malloc(argc * sizeof(*dsc));
if(!dsc)
eprintf("out of memory\n");
for(i = 0; i < argc; i++) {
const char *name = argv[i];
if(strcmp(name, "-") == 0)
dsc[i].fp = stdin;
else
dsc[i].fp = fopen(name, "r");
if(!dsc[i].fp)
eprintf("can't open '%s':", name);
dsc[i].name = name;
}
if(seq)
sequential(dsc, argc, delim, len);
else
parallel(dsc, argc, delim, len);
for(i = 0; i < argc; i++) {
if(dsc[i].fp != stdin)
(void)fclose(dsc[i].fp);
}
free(delim);
free(dsc);
return 0;
}
static void
eusage(void) {
eprintf("usage: paste [-s][-d list] file...\n");
}
static size_t
unescape(wchar_t *delim) {
wchar_t c;
size_t i;
size_t len;
for(i = 0, len = 0; (c = delim[i++]) != '\0'; len++) {
if(c == '\\') {
switch(delim[i++]) {
case 'n':
delim[len] = '\n';
break;
case 't':
delim[len] = '\t';
break;
case '0':
delim[len] = '\0';
break;
case '\\':
delim[len] = '\\';
break;
case '\0':
default:
/* POSIX: unspecified results */
return len;
}
} else
delim[len] = c;
}
return len;
}
static wint_t
in(Fdescr *f) {
wint_t c = fgetwc(f->fp);
if(c == WEOF && ferror(f->fp))
eprintf("'%s' read error:", f->name);
return c;
}
static void
out(wchar_t c) {
putwchar(c);
if(ferror(stdout))
eprintf("write error:");
}
static void
sequential(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt) {
int i;
for(i = 0; i < len; i++) {
size_t d = 0;
wint_t c, last = WEOF;
while((c = in(&dsc[i])) != WEOF) {
if(last == '\n') {
if(delim[d] != '\0')
out(delim[d]);
d++;
d %= cnt;
}
if(c != '\n')
out((wchar_t)c);
last = c;
}
if(last == '\n')
out((wchar_t)last);
}
}
static void
parallel(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt) {
int last;
do {
int i;
last = 0;
for(i = 0; i < len; i++) {
wint_t c;
wchar_t d = delim[i % cnt];
do {
wint_t o = in(&dsc[i]);
c = o;
switch(c) {
case WEOF:
if(last == 0)
break;
o = '\n';
/* fallthrough */
case '\n':
if(i != len - 1)
o = d;
break;
default:
break;
}
if(o != WEOF) {
/* pad with delimiters up to this point */
while(++last < i) {
if(d != '\0')
out(d);
}
out((wchar_t)o);
}
} while(c != '\n' && c != WEOF);
}
} while(last > 0);
}