sbase/sed.c

1721 lines
40 KiB
C
Raw Normal View History

/* FIXME: summary
* decide whether we enforce valid UTF-8, right now it's enforced in certain
* parts of the script, but not the input...
* nul bytes cause explosions due to use of libc string functions. thoughts?
* lack of newline at end of file, currently we add one. what should we do?
* allow "\\t" for "\t" etc. in regex? in replacement text?
* POSIX says don't flush on N when out of input, but GNU and busybox do.
*/
#include <ctype.h>
#include <errno.h>
#include <regex.h>
#include <stdlib.h>
#include <string.h>
#include "utf.h"
#include "util.h"
/* Types */
/* used as queue for writes and stack for {,:,b,t */
typedef struct {
void **data;
size_t size;
size_t cap;
} Vec;
/* used for arbitrary growth, str is a C string
* FIXME: does it make sense to keep track of length? or just rely on libc
* string functions? If we want to support nul bytes everything changes
*/
typedef struct {
char *str;
size_t cap;
} String;
typedef struct Cmd Cmd;
typedef struct {
void (*fn)(Cmd *);
char *(*getarg)(Cmd *, char *);
void (*freearg)(Cmd *);
unsigned char naddr;
} Fninfo;
typedef struct {
union {
size_t lineno;
regex_t *re;
} u;
enum {
IGNORE, /* empty address, ignore */
EVERY , /* every line */
LINE , /* ilne number */
LAST , /* last line ($) */
REGEX , /* use included regex */
LASTRE, /* use most recently used regex */
} type;
} Addr;
/* DISCUSS: naddr is not strictly necessary, but very helpful
* naddr == 0 iff beg.type == EVERY && end.type == IGNORE
* naddr == 1 iff beg.type != IGNORE && end.type == IGNORE
* naddr == 2 iff beg.type != IGNORE && end.type != IGNORE
*/
typedef struct {
Addr beg;
Addr end;
unsigned char naddr;
} Range;
typedef struct {
regex_t *re; /* if NULL use last regex */
String repl;
FILE *file;
size_t occurrence; /* 0 for all (g flag) */
Rune delim;
unsigned int p:1;
} Sarg;
typedef struct {
Rune *set1;
Rune *set2;
} Yarg;
typedef struct {
String str; /* a,c,i text. r file path */
void (*print)(char *, FILE *); /* check_puts for a, write_file for r, unused for c,i */
} ACIRarg;
struct Cmd {
Range range;
Fninfo *fninfo;
union {
Cmd *jump; /* used for b,t when running */
char *label; /* used for :,b,t when building */
ptrdiff_t offset; /* used for { (pointers break during realloc) */
FILE *file; /* used for w */
/* FIXME: Should the following be in the union? or pointers and malloc? */
Sarg s;
Yarg y;
ACIRarg acir;
} u; /* I find your lack of anonymous unions disturbing */
unsigned int in_match:1;
unsigned int negate :1;
};
/* Files for w command (and s' w flag) */
typedef struct {
char *path;
FILE *file;
} Wfile;
/*
* Function Declarations
*/
/* Dynamically allocated arrays and strings */
static void resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next);
static void *pop(Vec *v);
static void push(Vec *v, void *p);
static void stracat(String *dst, char *src);
static void strnacat(String *dst, char *src, size_t n);
static void stracpy(String *dst, char *src);
/* Cleanup and errors */
static void usage(void);
/* Parsing functions and related utilities */
static void compile(char *s, int isfile);
static int read_line(FILE *f, String *s);
static char *make_range(Range *range, char *s);
static char *make_addr(Addr *addr, char *s);
static char *find_delim(char *s, Rune delim, int do_brackets);
static char *chompr(char *s, Rune rune);
static char *chomp(char *s);
static Rune *strtorunes(char *s, size_t nrunes);
static long stol(char *s, char **endp);
static size_t escapes(char *beg, char *end, Rune delim, int n_newline);
static size_t echarntorune(Rune *r, char *s, size_t n);
static void insert_labels(void);
/* Get and Free arg and related utilities */
static char *get_aci_arg(Cmd *c, char *s);
static void aci_append(Cmd *c, char *s);
static void free_acir_arg(Cmd *c);
static char *get_bt_arg(Cmd *c, char *s);
static char *get_r_arg(Cmd *c, char *s);
static char *get_s_arg(Cmd *c, char *s);
static void free_s_arg(Cmd *c);
static char *get_w_arg(Cmd *c, char *s);
static char *get_y_arg(Cmd *c, char *s);
static void free_y_arg(Cmd *c);
static char *get_colon_arg(Cmd *c, char *s);
static char *get_lbrace_arg(Cmd *c, char *s);
static char *get_rbrace_arg(Cmd *c, char *s);
static char *semicolon_arg(char *s);
/* Running */
static void run(void);
static int in_range(Cmd *c);
static int match_addr(Addr *a);
static int next_file(void);
static int is_eof(FILE *f);
static void do_writes(void);
static void write_file(char *path, FILE *out);
static void check_puts(char *s, FILE *f);
static void update_ranges(Cmd *beg, Cmd *end);
/* Sed functions */
static void cmd_y(Cmd *c);
static void cmd_x(Cmd *c);
static void cmd_w(Cmd *c);
static void cmd_t(Cmd *c);
static void cmd_s(Cmd *c);
static void cmd_r(Cmd *c);
static void cmd_q(Cmd *c);
static void cmd_P(Cmd *c);
static void cmd_p(Cmd *c);
static void cmd_N(Cmd *c);
static void cmd_n(Cmd *c);
static void cmd_l(Cmd *c);
static void cmd_i(Cmd *c);
static void cmd_H(Cmd *c);
static void cmd_h(Cmd *c);
static void cmd_G(Cmd *c);
static void cmd_g(Cmd *c);
static void cmd_D(Cmd *c);
static void cmd_d(Cmd *c);
static void cmd_c(Cmd *c);
static void cmd_b(Cmd *c);
static void cmd_a(Cmd *c);
static void cmd_colon(Cmd *c);
static void cmd_equal(Cmd *c);
static void cmd_lbrace(Cmd *c);
static void cmd_rbrace(Cmd *c);
static void cmd_last(Cmd *c);
/* Actions */
static void new_line(void);
static void app_line(void);
static void new_next(void);
static void old_next(void);
/*
* Globals
*/
static Vec braces, labels, branches; /* holds ptrdiff_t. addrs of {, :, bt */
static Vec writes; /* holds cmd*. writes scheduled by a and r commands */
static Vec wfiles; /* holds Wfile*. files for w and s///w commands */
static Cmd *prog, *pc; /* Program, program counter */
static size_t pcap;
static size_t lineno;
static regex_t *lastre; /* last used regex for empty regex search */
static char **files; /* list of file names from argv */
static FILE *file; /* current file we are reading */
static String patt, hold, genbuf;
static struct {
unsigned int n :1; /* -n (no print) */
unsigned int s :1; /* s/// replacement happened */
unsigned int aci_cont:1; /* a,c,i text continuation */
unsigned int s_cont :1; /* s/// replacement text continuation */
unsigned int halt :1; /* halt execution */
} gflags;
/* FIXME: move character inside Fninfo and only use 26*sizeof(Fninfo) instead of 127*sizeof(Fninfo) bytes */
static Fninfo fns[] = {
['a'] = { cmd_a , get_aci_arg , free_acir_arg , 1 }, /* schedule write of text for later */
['b'] = { cmd_b , get_bt_arg , NULL , 2 }, /* branch to label char *label when building, Cmd *jump when running */
['c'] = { cmd_c , get_aci_arg , free_acir_arg , 2 }, /* delete pattern space. at 0 or 1 addr or end of 2 addr, write text */
['d'] = { cmd_d , NULL , NULL , 2 }, /* delete pattern space */
['D'] = { cmd_D , NULL , NULL , 2 }, /* delete to first newline and start new cycle without reading (if no newline, d) */
['g'] = { cmd_g , NULL , NULL , 2 }, /* replace pattern space with hold space */
['G'] = { cmd_G , NULL , NULL , 2 }, /* append newline and hold space to pattern space */
['h'] = { cmd_h , NULL , NULL , 2 }, /* replace hold space with pattern space */
['H'] = { cmd_H , NULL , NULL , 2 }, /* append newline and pattern space to hold space */
['i'] = { cmd_i , get_aci_arg , free_acir_arg , 1 }, /* write text */
['l'] = { cmd_l , NULL , NULL , 2 }, /* write pattern space in 'visually unambiguous form' */
['n'] = { cmd_n , NULL , NULL , 2 }, /* write pattern space (unless -n) read to replace pattern space (if no input, quit) */
['N'] = { cmd_N , NULL , NULL , 2 }, /* append to pattern space separated by newline, line number changes (if no input, quit) */
['p'] = { cmd_p , NULL , NULL , 2 }, /* write pattern space */
['P'] = { cmd_P , NULL , NULL , 2 }, /* write pattern space up to first newline */
['q'] = { cmd_q , NULL , NULL , 1 }, /* quit */
['r'] = { cmd_r , get_r_arg , free_acir_arg , 1 }, /* write contents of file (unable to open/read treated as empty file) */
['s'] = { cmd_s , get_s_arg , free_s_arg , 2 }, /* find/replace/all that crazy s stuff */
['t'] = { cmd_t , get_bt_arg , NULL , 2 }, /* if s/// succeeded (since input or last t) branch to label (branch to end if no label) */
['w'] = { cmd_w , get_w_arg , NULL , 2 }, /* append pattern space to file */
['x'] = { cmd_x , NULL , NULL , 2 }, /* exchange pattern and hold spaces */
['y'] = { cmd_y , get_y_arg , free_y_arg , 2 }, /* replace runes in set1 with runes in set2 */
[':'] = { cmd_colon , get_colon_arg , NULL , 0 }, /* defines label for later b and t commands */
['='] = { cmd_equal , NULL , NULL , 1 }, /* printf("%d\n", line_number); */
['{'] = { cmd_lbrace, get_lbrace_arg, NULL , 2 }, /* if we match, run commands, otherwise jump to close */
['}'] = { cmd_rbrace, get_rbrace_arg, NULL , 0 }, /* noop, hold onto open for ease of building scripts */
[0x7f] = { NULL, NULL, NULL, 0 }, /* index is checked with isascii(3p). fill out rest of array */
};
/*
* Function Definitions
*/
/* given memory pointed to by *ptr that currently holds *nmemb members of size
* size, realloc to hold new_nmemb members, return new_nmemb in *memb and one
* past old end in *next. if realloc fails...explode
*/
static void
resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next)
{
void *n, *tmp;
if (new_nmemb) {
tmp = ereallocarray(*ptr, new_nmemb, size);
} else { /* turns out realloc(*ptr, 0) != free(*ptr) */
free(*ptr);
tmp = NULL;
}
n = (char *)tmp + *nmemb * size;
*nmemb = new_nmemb;
*ptr = tmp;
if (next)
*next = n;
}
static void *
pop(Vec *v)
{
if (!v->size)
return NULL;
return v->data[--v->size];
}
static void
push(Vec *v, void *p)
{
if (v->size == v->cap)
resize((void **)&v->data, &v->cap, sizeof(*v->data), v->cap * 2 + 1, NULL);
v->data[v->size++] = p;
}
static void
stracat(String *dst, char *src)
{
int new = !dst->cap;
size_t len;
len = (new ? 0 : strlen(dst->str)) + strlen(src) + 1;
if (dst->cap < len)
resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
if (new)
*dst->str = '\0';
strcat(dst->str, src);
}
static void
strnacat(String *dst, char *src, size_t n)
{
int new = !dst->cap;
size_t len;
len = strlen(src);
len = (new ? 0 : strlen(dst->str)) + MIN(n, len) + 1;
if (dst->cap < len)
resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
if (new)
*dst->str = '\0';
strlcat(dst->str, src, len);
}
static void
stracpy(String *dst, char *src)
{
size_t len;
len = strlen(src) + 1;
if (dst->cap < len)
resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
strcpy(dst->str, src);
}
static void
leprintf(char *s)
{
if (errno)
eprintf("%zu: %s: %s\n", lineno, s, strerror(errno));
else
eprintf("%zu: %s\n", lineno, s);
}
/* FIXME: write usage message */
static void
usage(void)
{
eprintf("USAGE\n");
}
/* Differences from POSIX
* we allows semicolons and trailing blanks inside {}
* we allow spaces after ! (and in between !s)
*/
static void
compile(char *s, int isfile)
{
FILE *f;
if (!isfile && !*s) /* empty string script */
return;
f = isfile ? fopen(s, "r") : fmemopen(s, strlen(s), "r");
if (!f)
eprintf("fopen/fmemopen:");
/* NOTE: get arg functions can't use genbuf */
while (read_line(f, &genbuf) != EOF) {
s = genbuf.str;
/* if the first two characters of the script are "#n" default output shall be suppressed */
if (++lineno == 1 && *s == '#' && s[1] == 'n') {
gflags.n = 1;
continue;
}
if (gflags.aci_cont) {
aci_append(pc - 1, s);
continue;
}
if (gflags.s_cont)
s = (pc - 1)->fninfo->getarg(pc - 1, s);
while (*s) {
s = chompr(s, ';');
if (!*s || *s == '#')
break;
if ((size_t)(pc - prog) == pcap)
resize((void **)&prog, &pcap, sizeof(*prog), pcap * 2 + 1, (void **)&pc);
pc->range.beg.type = pc->range.end.type = IGNORE;
pc->fninfo = NULL;
pc->in_match = 0;
s = make_range(&pc->range, s);
s = chomp(s);
pc->negate = *s == '!';
s = chompr(s, '!');
if (!isascii(*s) || !(pc->fninfo = &fns[(unsigned)*s])->fn)
leprintf("bad sed function");
if (pc->range.naddr > pc->fninfo->naddr)
leprintf("wrong number of addresses");
s++;
if (pc->fninfo->getarg)
s = pc->fninfo->getarg(pc, s);
pc++;
}
}
Add *fshut() functions to properly flush file streams This has been a known issue for a long time. Example: printf "word" > /dev/full wouldn't report there's not enough space on the device. This is due to the fact that every libc has internal buffers for stdout which store fragments of written data until they reach a certain size or on some callback to flush them all at once to the kernel. You can force the libc to flush them with fflush(). In case flushing fails, you can check the return value of fflush() and report an error. However, previously, sbase didn't have such checks and without fflush(), the libc silently flushes the buffers on exit without checking the errors. No offense, but there's no way for the libc to report errors in the exit- condition. GNU coreutils solve this by having onexit-callbacks to handle the flushing and report issues, but they have obvious deficiencies. After long discussions on IRC, we came to the conclusion that checking the return value of every io-function would be a bit too much, and having a general-purpose fclose-wrapper would be the best way to go. It turned out that fclose() alone is not enough to detect errors. The right way to do it is to fflush() + check ferror on the fp and then to a fclose(). This is what fshut does and that's how it's done before each return. The return value is obviously affected, reporting an error in case a flush or close failed, but also when reading failed for some reason, the error- state is caught. the !!( ... + ...) construction is used to call all functions inside the brackets and not "terminating" on the first. We want errors to be reported, but there's no reason to stop flushing buffers when one other file buffer has issues. Obviously, functionales come before the flush and ret-logic comes after to prevent early exits as well without reporting warnings if there are any. One more advantage of fshut() is that it is even able to report errors on obscure NFS-setups which the other coreutils are unable to detect, because they only check the return-value of fflush() and fclose(), not ferror() as well.
2015-04-04 15:25:17 -04:00
fshut(f, s);
}
/* FIXME: if we decide to honor lack of trailing newline, set/clear a global
* flag when reading a line
*/
static int
read_line(FILE *f, String *s)
{
ssize_t len;
if (!f)
return EOF;
if ((len = getline(&s->str, &s->cap, f)) < 0) {
if (ferror(f))
eprintf("getline:");
return EOF;
}
if (s->str[--len] == '\n')
s->str[len] = '\0';
return 0;
}
/* read first range from s, return pointer to one past end of range */
static char *
make_range(Range *range, char *s)
{
s = make_addr(&range->beg, s);
if (*s == ',')
s = make_addr(&range->end, s + 1);
else
range->end.type = IGNORE;
if (range->beg.type == EVERY && range->end.type == IGNORE) range->naddr = 0;
else if (range->beg.type != IGNORE && range->end.type == IGNORE) range->naddr = 1;
else if (range->beg.type != IGNORE && range->end.type != IGNORE) range->naddr = 2;
else leprintf("this is impossible...");
return s;
}
/* read first addr from s, return pointer to one past end of addr */
static char *
make_addr(Addr *addr, char *s)
{
Rune r;
char *p = s + strlen(s);
size_t rlen = echarntorune(&r, s, p - s);
if (r == '$') {
addr->type = LAST;
s += rlen;
} else if (isdigitrune(r)) {
addr->type = LINE;
addr->u.lineno = stol(s, &s);
} else if (r == '/' || r == '\\') {
Rune delim;
if (r == '\\') {
s += rlen;
rlen = echarntorune(&r, s, p - s);
}
if (r == '\\')
leprintf("bad delimiter '\\'");
delim = r;
s += rlen;
rlen = echarntorune(&r, s, p - s);
if (r == delim) {
addr->type = LASTRE;
s += rlen;
} else {
addr->type = REGEX;
p = find_delim(s, delim, 1);
if (!*p)
leprintf("unclosed regex");
p -= escapes(s, p, delim, 0);
*p++ = '\0';
addr->u.re = emalloc(sizeof(*addr->u.re));
eregcomp(addr->u.re, s, 0);
s = p;
}
} else {
addr->type = EVERY;
}
return s;
}
/* return pointer to first delim in s that is not escaped
* and if do_brackets is set, not in [] (note possible [::], [..], [==], inside [])
* return pointer to trailing nul byte if no delim found
*
* any escaped character that is not special is just itself (POSIX undefined)
* FIXME: pull out into some util thing, will be useful for ed as well
*/
static char *
find_delim(char *s, Rune delim, int do_brackets)
{
enum {
OUTSIDE , /* not in brackets */
BRACKETS_OPENING, /* last char was first [ or last two were first [^ */
BRACKETS_INSIDE , /* inside [] */
INSIDE_OPENING , /* inside [] and last char was [ */
CLASS_INSIDE , /* inside class [::], or colating element [..] or [==], inside [] */
CLASS_CLOSING , /* inside class [::], or colating element [..] or [==], and last character was the respective : . or = */
} state = OUTSIDE;
Rune r, c = 0; /* no c won't be used uninitialized, shutup -Wall */
size_t rlen;
int escape = 0;
char *end = s + strlen(s);
for (; *s; s += rlen) {
rlen = echarntorune(&r, s, end - s);
if (state == BRACKETS_OPENING && r == '^' ) { continue; }
else if (state == BRACKETS_OPENING && r == ']' ) { state = BRACKETS_INSIDE ; continue; }
else if (state == BRACKETS_OPENING ) { state = BRACKETS_INSIDE ; }
if (state == CLASS_CLOSING && r == ']' ) { state = BRACKETS_INSIDE ; }
else if (state == CLASS_CLOSING ) { state = CLASS_INSIDE ; }
else if (state == CLASS_INSIDE && r == c ) { state = CLASS_CLOSING ; }
else if (state == INSIDE_OPENING && (r == ':' ||
r == '.' ||
r == '=') ) { state = CLASS_INSIDE ; c = r; }
else if (state == INSIDE_OPENING && r == ']' ) { state = OUTSIDE ; }
else if (state == BRACKETS_INSIDE && r == '[' ) { state = INSIDE_OPENING ; }
else if (state == BRACKETS_INSIDE && r == ']' ) { state = OUTSIDE ; }
else if (state == OUTSIDE && escape ) { escape = 0 ; }
else if (state == OUTSIDE && r == '\\' ) { escape = 1 ; }
else if (state == OUTSIDE && do_brackets && r == '[' ) { state = BRACKETS_OPENING; }
else if (state == OUTSIDE && r == delim) return s;
}
return s;
}
static char *
chomp(char *s)
{
return chompr(s, 0);
}
/* eat all leading whitespace and occurrences of rune */
static char *
chompr(char *s, Rune rune)
{
Rune r;
size_t rlen;
char *end = s + strlen(s);
while (*s && (rlen = echarntorune(&r, s, end - s)) && (isspacerune(r) || r == rune))
s += rlen;
return s;
}
/* convert first nrunes Runes from UTF-8 string s in allocated Rune*
* NOTE: sequence must be valid UTF-8, check first */
static Rune *
strtorunes(char *s, size_t nrunes)
{
Rune *rs, *rp;
rp = rs = ereallocarray(NULL, nrunes + 1, sizeof(*rs));
while (nrunes--)
s += chartorune(rp++, s);
*rp = '\0';
return rs;
}
static long
stol(char *s, char **endp)
{
long n;
errno = 0;
n = strtol(s, endp, 10);
if (errno)
leprintf("strtol:");
if (*endp == s)
leprintf("strtol: invalid number");
return n;
}
/* from beg to end replace "\\d" with "d" and "\\n" with "\n" (where d is delim)
* if delim is 'n' and n_newline is 0 then "\\n" is replaced with "n" (normal)
* if delim is 'n' and n_newline is 1 then "\\n" is replaced with "\n" (y command)
* if delim is 0 all escaped characters represent themselves (aci text)
* memmove rest of string (beyond end) into place
* return the number of converted escapes (backslashes removed)
* FIXME: this has had too many corner cases slapped on and is ugly. rewrite better
*/
static size_t
escapes(char *beg, char *end, Rune delim, int n_newline)
{
size_t num = 0;
char *src = beg, *dst = beg;
while (src < end) {
/* handle escaped backslash specially so we don't think the second
* backslash is escaping something */
if (*src == '\\' && src[1] == '\\') {
*dst++ = *src++;
if (delim)
*dst++ = *src++;
else
src++;
} else if (*src == '\\' && !delim) {
src++;
} else if (*src == '\\' && src[1]) {
Rune r;
size_t rlen;
num++;
src++;
rlen = echarntorune(&r, src, end - src);
if (r == 'n' && delim == 'n') {
*src = n_newline ? '\n' : 'n'; /* src so we can still memmove() */
} else if (r == 'n') {
*src = '\n';
} else if (r != delim) {
*dst++ = '\\';
num--;
}
memmove(dst, src, rlen);
dst += rlen;
src += rlen;
} else {
*dst++ = *src++;
}
}
memmove(dst, src, strlen(src) + 1);
return num;
}
static size_t
echarntorune(Rune *r, char *s, size_t n)
{
size_t rlen = charntorune(r, s, n);
if (!rlen || *r == Runeerror)
leprintf("invalid UTF-8");
return rlen;
}
static void
insert_labels(void)
{
size_t i;
Cmd *from, *to;
while (branches.size) {
from = prog + (ptrdiff_t)pop(&branches);
if (!from->u.label) {/* no label branch to end of script */
from->u.jump = pc - 1;
} else {
for (i = 0; i < labels.size; i++) {
to = prog + (ptrdiff_t)labels.data[i];
if (!strcmp(from->u.label, to->u.label)) {
from->u.jump = to;
break;
}
}
if (i == labels.size)
leprintf("bad label");
}
}
}
/*
* Getargs / Freeargs
* Read argument from s, return pointer to one past last character of argument
*/
/* POSIX compliant
* i\
* foobar
*
* also allow the following non POSIX compliant
* i # empty line
* ifoobar
* ifoobar\
* baz
*
* FIXME: GNU and busybox discard leading spaces
* i foobar
* i foobar
* ifoobar
* are equivalent in GNU and busybox. We don't. Should we?
*/
static char *
get_aci_arg(Cmd *c, char *s)
{
c->u.acir.print = check_puts;
c->u.acir.str = (String){ NULL, 0 };
gflags.aci_cont = !!*s; /* no continue flag if empty string */
/* neither empty string nor POSIX compliant */
if (*s && !(*s == '\\' && !s[1]))
aci_append(c, s);
return s + strlen(s);
}
static void
aci_append(Cmd *c, char *s)
{
char *end = s + strlen(s), *p = end;
gflags.aci_cont = 0;
while (--p >= s && *p == '\\')
gflags.aci_cont = !gflags.aci_cont;
if (gflags.aci_cont)
*--end = '\n';
escapes(s, end, 0, 0);
stracat(&c->u.acir.str, s);
}
static void
free_acir_arg(Cmd *c)
{
free(c->u.acir.str.str);
}
/* POSIX dictates that label is rest of line, including semicolons, trailing
* whitespace, closing braces, etc. and can be limited to 8 bytes
*
* I allow a semicolon or closing brace to terminate a label name, it's not
* POSIX compliant, but it's useful and every sed version I've tried to date
* does the same.
*
* FIXME: POSIX dictates that leading whitespace is ignored but trailing
* whitespace is not. This is annoying and we should probably get rid of it.
*/
static char *
get_bt_arg(Cmd *c, char *s)
{
char *p = semicolon_arg(s = chomp(s));
if (p != s) {
c->u.label = estrndup(s, p - s);
} else {
c->u.label = NULL;
}
push(&branches, (void *)(c - prog));
return p;
}
/* POSIX dictates file name is rest of line including semicolons, trailing
* whitespace, closing braces, etc. and file name must be preceded by a space
*
* I allow a semicolon or closing brace to terminate a file name and don't
* enforce leading space.
*
* FIXME: decide whether trailing whitespace should be included and fix
* accordingly
*/
static char *
get_r_arg(Cmd *c, char *s)
{
char *p = semicolon_arg(s = chomp(s));
if (p == s)
leprintf("no file name");
c->u.acir.str.str = estrndup(s, p - s);
c->u.acir.print = write_file;
return p;
}
/* we allow "\\n" in replacement text to mean "\n" (undefined in POSIX)
*
* FIXME: allow other escapes in regex and replacement? if so change escapes()
*/
static char *
get_s_arg(Cmd *c, char *s)
{
Rune delim, r;
Cmd buf;
char *p;
int esc, lastre;
/* s/Find/Replace/Flags */
/* Find */
if (!gflags.s_cont) { /* NOT continuing from literal newline in replacement text */
lastre = 0;
c->u.s.repl = (String){ NULL, 0 };
c->u.s.occurrence = 1;
c->u.s.file = NULL;
c->u.s.p = 0;
if (!*s || *s == '\\')
leprintf("bad delimiter");
p = s + strlen(s);
s += echarntorune(&delim, s, p - s);
c->u.s.delim = delim;
echarntorune(&r, s, p - s);
if (r == delim) /* empty regex */
lastre = 1;
p = find_delim(s, delim, 1);
if (!*p)
leprintf("missing second delimiter");
p -= escapes(s, p, delim, 0);
*p = '\0';
if (lastre) {
c->u.s.re = NULL;
} else {
c->u.s.re = emalloc(sizeof(*c->u.s.re));
/* FIXME: different eregcomp that calls fatal */
eregcomp(c->u.s.re, s, 0);
}
s = p + runelen(delim);
}
/* Replace */
delim = c->u.s.delim;
p = find_delim(s, delim, 0);
p -= escapes(s, p, delim, 0);
if (!*p) { /* no third delimiter */
/* FIXME: same backslash counting as aci_append() */
if (p[-1] != '\\')
leprintf("missing third delimiter or <backslash><newline>");
p[-1] = '\n';
gflags.s_cont = 1;
} else {
gflags.s_cont = 0;
}
/* check for bad references in replacement text */
*p = '\0';
for (esc = 0, p = s; *p; p++) {
if (esc) {
esc = 0;
if (isdigit(*p) && c->u.s.re && (size_t)(*p - '0') > c->u.s.re->re_nsub)
leprintf("back reference number greater than number of groups");
} else if (*p == '\\') {
esc = 1;
}
}
stracat(&c->u.s.repl, s);
if (gflags.s_cont)
return p;
s = p + runelen(delim);
/* Flags */
p = semicolon_arg(s = chomp(s));
/* FIXME: currently for simplicity take last of g or occurrence flags and
* ignore multiple p flags. need to fix that */
for (; s < p; s++) {
if (isdigit(*s)) {
c->u.s.occurrence = stol(s, &s);
} else {
switch (*s) {
case 'g': c->u.s.occurrence = 0; break;
case 'p': c->u.s.p = 1; break;
case 'w':
/* must be last flag, take everything up to newline/semicolon
* s == p after this */
s = get_w_arg(&buf, s);
c->u.s.file = buf.u.file;
break;
}
}
}
return p;
}
static void
free_s_arg(Cmd *c)
{
if (c->u.s.re)
regfree(c->u.s.re);
free(c->u.s.re);
free(c->u.s.repl.str);
}
/* see get_r_arg notes */
static char *
get_w_arg(Cmd *c, char *s)
{
char *p = semicolon_arg(s = chomp(s));
Wfile *w, **wp;
if (p == s)
leprintf("no file name");
for (wp = (Wfile **)wfiles.data; (size_t)(wp - (Wfile **)wfiles.data) < wfiles.size; wp++) {
if (strlen((*wp)->path) == (size_t)(p - s) && !strncmp(s, (*wp)->path, p - s)) {
c->u.file = (*wp)->file;
return p;
}
}
w = emalloc(sizeof(*w));
w->path = estrndup(s, p - s);
if (!(w->file = fopen(w->path, "w")))
leprintf("fopen failed");
c->u.file = w->file;
push(&wfiles, w);
return p;
}
static char *
get_y_arg(Cmd *c, char *s)
{
Rune delim;
char *p = s + strlen(s);
size_t rlen = echarntorune(&delim, s, p - s);
size_t nrunes1, nrunes2;
c->u.y.set1 = c->u.y.set2 = NULL;
s += rlen;
p = find_delim(s, delim, 0);
p -= escapes(s, p, delim, 1);
nrunes1 = utfnlen(s, p - s);
c->u.y.set1 = strtorunes(s, nrunes1);
s = p + rlen;
p = find_delim(s, delim, 0);
p -= escapes(s, p, delim, 1);
nrunes2 = utfnlen(s, p - s);
if (nrunes1 != nrunes2)
leprintf("different set lengths");
c->u.y.set2 = strtorunes(s, utfnlen(s, p - s));
return p + rlen;
}
static void
free_y_arg(Cmd *c)
{
free(c->u.y.set1);
free(c->u.y.set2);
}
/* see get_bt_arg notes */
static char *
get_colon_arg(Cmd *c, char *s)
{
char *p = semicolon_arg(s = chomp(s));
if (p == s)
leprintf("no label name");
c->u.label = estrndup(s, p - s);
push(&labels, (void *)(c - prog));
return p;
}
static char *
get_lbrace_arg(Cmd *c, char *s)
{
push(&braces, (void *)(c - prog));
return s;
}
static char *
get_rbrace_arg(Cmd *c, char *s)
{
Cmd *lbrace;
if (!braces.size)
leprintf("extra }");
lbrace = prog + (ptrdiff_t)pop(&braces);
lbrace->u.offset = c - prog;
return s;
}
/* s points to beginning of an argument that may be semicolon terminated
* return pointer to semicolon or nul byte after string
* or closing brace as to not force ; before }
* FIXME: decide whether or not to eat trailing whitespace for arguments that
* we allow semicolon/brace termination that POSIX doesn't
* b, r, t, w, :
* POSIX says trailing whitespace is part of label name, file name, etc.
* we should probably eat it
*/
static char *
semicolon_arg(char *s)
{
char *p = strpbrk(s, ";}");
if (!p)
p = s + strlen(s);
return p;
}
static void
run(void)
{
lineno = 0;
if (braces.size)
leprintf("extra {");
/* genbuf has already been initialized, patt will be in new_line
* (or we'll halt) */
stracpy(&hold, "");
insert_labels();
next_file();
new_line();
for (pc = prog; !gflags.halt; pc++)
pc->fninfo->fn(pc);
}
/* return true if we are in range for c, set c->in_match appropriately */
static int
in_range(Cmd *c)
{
if (match_addr(&c->range.beg)) {
if (c->range.naddr == 2) {
if (c->range.end.type == LINE && c->range.end.u.lineno <= lineno)
c->in_match = 0;
else
c->in_match = 1;
}
return !c->negate;
}
if (c->in_match && match_addr(&c->range.end)) {
c->in_match = 0;
return !c->negate;
}
return c->in_match ^ c->negate;
}
/* return true if addr matches current line */
static int
match_addr(Addr *a)
{
switch (a->type) {
default:
case IGNORE: return 0;
case EVERY: return 1;
case LINE: return lineno == a->u.lineno;
case LAST:
while (is_eof(file) && !next_file())
;
return !file;
case REGEX:
lastre = a->u.re;
return !regexec(a->u.re, patt.str, 0, NULL, 0);
case LASTRE:
if (!lastre)
leprintf("no previous regex");
return !regexec(lastre, patt.str, 0, NULL, 0);
}
}
/* move to next input file
* stdin if first call and no files
* return 0 for success and 1 for no more files
*/
static int
next_file(void)
{
static unsigned char first = 1;
if (file == stdin)
clearerr(file);
else if (file)
Add *fshut() functions to properly flush file streams This has been a known issue for a long time. Example: printf "word" > /dev/full wouldn't report there's not enough space on the device. This is due to the fact that every libc has internal buffers for stdout which store fragments of written data until they reach a certain size or on some callback to flush them all at once to the kernel. You can force the libc to flush them with fflush(). In case flushing fails, you can check the return value of fflush() and report an error. However, previously, sbase didn't have such checks and without fflush(), the libc silently flushes the buffers on exit without checking the errors. No offense, but there's no way for the libc to report errors in the exit- condition. GNU coreutils solve this by having onexit-callbacks to handle the flushing and report issues, but they have obvious deficiencies. After long discussions on IRC, we came to the conclusion that checking the return value of every io-function would be a bit too much, and having a general-purpose fclose-wrapper would be the best way to go. It turned out that fclose() alone is not enough to detect errors. The right way to do it is to fflush() + check ferror on the fp and then to a fclose(). This is what fshut does and that's how it's done before each return. The return value is obviously affected, reporting an error in case a flush or close failed, but also when reading failed for some reason, the error- state is caught. the !!( ... + ...) construction is used to call all functions inside the brackets and not "terminating" on the first. We want errors to be reported, but there's no reason to stop flushing buffers when one other file buffer has issues. Obviously, functionales come before the flush and ret-logic comes after to prevent early exits as well without reporting warnings if there are any. One more advantage of fshut() is that it is even able to report errors on obscure NFS-setups which the other coreutils are unable to detect, because they only check the return-value of fflush() and fclose(), not ferror() as well.
2015-04-04 15:25:17 -04:00
fshut(file, "<file>");
file = NULL;
do {
if (!*files) {
if (first) /* given no files, default to stdin */
file = stdin;
/* else we've used all our files, leave file = NULL */
} else if (!strcmp(*files, "-")) {
file = stdin;
files++;
} else if (!(file = fopen(*files++, "r"))) {
/* warn this file didn't open, but move on to next */
weprintf("fopen:");
}
} while (!file && *files);
first = 0;
return !file;
}
/* test if stream is at EOF */
static int
is_eof(FILE *f)
{
int c;
if (!f || feof(f))
return 1;
c = fgetc(f);
if (c == EOF && ferror(f))
eprintf("fgetc:");
if (c != EOF && ungetc(c, f) == EOF)
eprintf("ungetc EOF\n");
return c == EOF;
}
/* perform writes that were scheduled
* for aci this is check_puts(string, stdout)
* for r this is write_file(path, stdout)
*/
static void
do_writes(void)
{
Cmd *c;
size_t i;
for (i = 0; i < writes.size; i++) {
c = writes.data[i];
c->u.acir.print(c->u.acir.str.str, stdout);
}
writes.size = 0;
}
/* used for r's u.acir.print()
* FIXME: something like util's concat() would be better
*/
static void
write_file(char *path, FILE *out)
{
FILE *in = fopen(path, "r");
if (!in) /* no file is treated as empty file */
return;
while (read_line(in, &genbuf) != EOF)
check_puts(genbuf.str, out);
Add *fshut() functions to properly flush file streams This has been a known issue for a long time. Example: printf "word" > /dev/full wouldn't report there's not enough space on the device. This is due to the fact that every libc has internal buffers for stdout which store fragments of written data until they reach a certain size or on some callback to flush them all at once to the kernel. You can force the libc to flush them with fflush(). In case flushing fails, you can check the return value of fflush() and report an error. However, previously, sbase didn't have such checks and without fflush(), the libc silently flushes the buffers on exit without checking the errors. No offense, but there's no way for the libc to report errors in the exit- condition. GNU coreutils solve this by having onexit-callbacks to handle the flushing and report issues, but they have obvious deficiencies. After long discussions on IRC, we came to the conclusion that checking the return value of every io-function would be a bit too much, and having a general-purpose fclose-wrapper would be the best way to go. It turned out that fclose() alone is not enough to detect errors. The right way to do it is to fflush() + check ferror on the fp and then to a fclose(). This is what fshut does and that's how it's done before each return. The return value is obviously affected, reporting an error in case a flush or close failed, but also when reading failed for some reason, the error- state is caught. the !!( ... + ...) construction is used to call all functions inside the brackets and not "terminating" on the first. We want errors to be reported, but there's no reason to stop flushing buffers when one other file buffer has issues. Obviously, functionales come before the flush and ret-logic comes after to prevent early exits as well without reporting warnings if there are any. One more advantage of fshut() is that it is even able to report errors on obscure NFS-setups which the other coreutils are unable to detect, because they only check the return-value of fflush() and fclose(), not ferror() as well.
2015-04-04 15:25:17 -04:00
fshut(in, path);
}
static void
check_puts(char *s, FILE *f)
{
if (s && fputs(s, f) == EOF)
eprintf("fputs:");
if (fputs("\n", f) == EOF)
eprintf("fputs:");
}
/* iterate from beg to end updating ranges so we don't miss any commands
* e.g. sed -n '1d;1,3p' should still print lines 2 and 3
*/
static void
update_ranges(Cmd *beg, Cmd *end)
{
while (beg < end)
in_range(beg++);
}
/*
* Sed functions
*/
static void
cmd_a(Cmd *c)
{
if (in_range(c))
push(&writes, c);
}
static void
cmd_b(Cmd *c)
{
if (!in_range(c))
return;
/* if we jump backwards update to end, otherwise update to destination */
update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap);
pc = c->u.jump;
}
static void
cmd_c(Cmd *c)
{
if (!in_range(c))
return;
/* write the text on the last line of the match */
if (!c->in_match)
check_puts(c->u.acir.str.str, stdout);
/* otherwise start the next cycle without printing pattern space
* effectively deleting the text */
new_next();
}
static void
cmd_d(Cmd *c)
{
if (!in_range(c))
return;
new_next();
}
static void
cmd_D(Cmd *c)
{
char *p;
if (!in_range(c))
return;
if ((p = strchr(patt.str, '\n'))) {
p++;
memmove(patt.str, p, strlen(p) + 1);
old_next();
} else {
new_next();
}
}
static void
cmd_g(Cmd *c)
{
if (in_range(c))
stracpy(&patt, hold.str);
}
static void
cmd_G(Cmd *c)
{
if (!in_range(c))
return;
stracat(&patt, "\n");
stracat(&patt, hold.str);
}
static void
cmd_h(Cmd *c)
{
if (in_range(c))
stracpy(&hold, patt.str);
}
static void
cmd_H(Cmd *c)
{
if (!in_range(c))
return;
stracat(&hold, "\n");
stracat(&hold, patt.str);
}
static void
cmd_i(Cmd *c)
{
if (in_range(c))
check_puts(c->u.acir.str.str, stdout);
}
/* I think it makes sense to print invalid UTF-8 sequences in octal to satisfy
* the "visually unambiguous form" sed(1p)
*/
static void
cmd_l(Cmd *c)
{
Rune r;
char *p, *end;
size_t rlen;
char *escapes[] = { /* FIXME: 7 entries and search instead of 127 */
['\\'] = "\\\\", ['\a'] = "\\a", ['\b'] = "\\b",
['\f'] = "\\f" , ['\r'] = "\\r", ['\t'] = "\\t",
['\v'] = "\\v" , [0x7f] = NULL, /* fill out the table */
};
if (!in_range(c))
return;
/* FIXME: line wrapping. sed(1p) says "length at which folding occurs is
* unspecified, but should be appropraite for the output device"
* just wrap at 80 Runes?
*/
for (p = patt.str, end = p + strlen(p); p < end; p += rlen) {
if (isascii(*p) && escapes[(unsigned int)*p]) {
printf("%s", escapes[(unsigned int)*p]);
rlen = 1;
} else if (!(rlen = charntorune(&r, p, end - p))) {
/* ran out of chars, print the bytes of the short sequence */
for (; p < end; p++)
printf("\\%03hho", (unsigned char)*p);
break;
} else if (r == Runeerror) {
for (; rlen; rlen--, p++)
printf("\\%03hho", (unsigned char)*p);
} else {
while (fwrite(p, rlen, 1, stdout) < 1 && errno == EINTR)
;
if (ferror(stdout))
eprintf("fwrite:");
}
}
check_puts("$", stdout);
}
static void
cmd_n(Cmd *c)
{
if (!in_range(c))
return;
if (!gflags.n)
check_puts(patt.str, stdout);
do_writes();
new_line();
}
static void
cmd_N(Cmd *c)
{
if (!in_range(c))
return;
do_writes();
app_line();
}
static void
cmd_p(Cmd *c)
{
if (in_range(c))
check_puts(patt.str, stdout);
}
static void
cmd_P(Cmd *c)
{
char *p;
if (!in_range(c))
return;
if ((p = strchr(patt.str, '\n')))
*p = '\0';
check_puts(patt.str, stdout);
if (p)
*p = '\n';
}
static void
cmd_q(Cmd *c)
{
if (!in_range(c))
return;
if (!gflags.n)
check_puts(patt.str, stdout);
do_writes();
gflags.halt = 1;
}
static void
cmd_r(Cmd *c)
{
if (in_range(c))
push(&writes, c);
}
static void
cmd_s(Cmd *c)
{
String tmp;
Rune r;
size_t plen, rlen, len;
char *p, *s, *end;
unsigned int matches = 0, last_empty = 1, qflag = 0, cflags = 0;
regex_t *re;
regmatch_t *rm, *pmatch = NULL;
if (!in_range(c))
return;
if (!c->u.s.re && !lastre)
leprintf("no previous regex");
re = c->u.s.re ? c->u.s.re : lastre;
lastre = re;
plen = re->re_nsub + 1;
2015-04-17 12:48:11 -04:00
pmatch = ereallocarray(NULL, plen, sizeof(regmatch_t));
*genbuf.str = '\0';
s = patt.str;
while (!qflag && !regexec(re, s, plen, pmatch, cflags)) {
cflags = REG_NOTBOL; /* match against beginning of line first time, but not again */
if (!*s) /* match against empty string first time, but not again */
qflag = 1;
/* don't substitute if last match was not empty but this one is.
* s_a*_._g
* foobar -> .f.o.o.b.r.
*/
if ((last_empty || pmatch[0].rm_eo) &&
(++matches == c->u.s.occurrence || !c->u.s.occurrence)) {
/* copy over everything before the match */
strnacat(&genbuf, s, pmatch[0].rm_so);
/* copy over replacement text, taking into account &, backreferences, and \ escapes */
for (p = c->u.s.repl.str, len = strcspn(p, "\\&"); *p; len = strcspn(++p, "\\&")) {
strnacat(&genbuf, p, len);
p += len;
switch (*p) {
default: leprintf("this shouldn't be possible");
case '\0':
/* we're at the end, back up one so the ++p will put us on
* the null byte to break out of the loop */
--p;
break;
case '&':
strnacat(&genbuf, s + pmatch[0].rm_so, pmatch[0].rm_eo - pmatch[0].rm_so);
break;
case '\\':
if (isdigit(*++p)) { /* backreference */
/* only need to check here if using lastre, otherwise we checked when building */
if (!c->u.s.re && (size_t)(*p - '0') > re->re_nsub)
leprintf("back reference number greater than number of groups");
rm = &pmatch[*p - '0'];
strnacat(&genbuf, s + rm->rm_so, rm->rm_eo - rm->rm_so);
} else { /* character after backslash taken literally (well one byte, but it works) */
strnacat(&genbuf, p, 1);
}
break;
}
}
} else {
/* not replacing, copy over everything up to and including the match */
strnacat(&genbuf, s, pmatch[0].rm_eo);
}
if (!pmatch[0].rm_eo) { /* empty match, advance one rune and add it to output */
end = s + strlen(s);
rlen = charntorune(&r, s, end - s);
if (!rlen) { /* ran out of bytes, copy short sequence */
stracat(&genbuf, s);
s = end;
} else { /* copy whether or not it's a good rune */
strnacat(&genbuf, s, rlen);
s += rlen;
}
}
last_empty = !pmatch[0].rm_eo;
s += pmatch[0].rm_eo;
}
free(pmatch);
if (!(matches && matches >= c->u.s.occurrence)) /* no replacement */
return;
gflags.s = 1;
stracat(&genbuf, s);
tmp = patt;
patt = genbuf;
genbuf = tmp;
if (c->u.s.p)
check_puts(patt.str, stdout);
if (c->u.s.file)
check_puts(patt.str, c->u.s.file);
}
static void
cmd_t(Cmd *c)
{
if (!in_range(c) || !gflags.s)
return;
/* if we jump backwards update to end, otherwise update to destination */
update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap);
pc = c->u.jump;
gflags.s = 0;
}
static void
cmd_w(Cmd *c)
{
if (in_range(c))
check_puts(patt.str, c->u.file);
}
static void
cmd_x(Cmd *c)
{
String tmp;
if (!in_range(c))
return;
tmp = patt;
patt = hold;
hold = tmp;
}
static void
cmd_y(Cmd *c)
{
String tmp;
Rune r, *rp;
size_t n, rlen;
char *s, *end, buf[UTFmax];
if (!in_range(c))
return;
*genbuf.str = '\0';
for (s = patt.str, end = s + strlen(s); *s; s += rlen) {
if (!(rlen = charntorune(&r, s, end - s))) { /* ran out of chars, copy rest */
stracat(&genbuf, s);
break;
} else if (r == Runeerror) { /* bad UTF-8 sequence, copy bytes */
strnacat(&genbuf, s, rlen);
} else {
for (rp = c->u.y.set1; *rp; rp++)
if (*rp == r)
break;
if (*rp) { /* found r in set1, replace with Rune from set2 */
n = runetochar(buf, c->u.y.set2 + (rp - c->u.y.set1));
strnacat(&genbuf, buf, n);
} else {
strnacat(&genbuf, s, rlen);
}
}
}
tmp = patt;
patt = genbuf;
genbuf = tmp;
}
static void
cmd_colon(Cmd *c)
{
}
static void
cmd_equal(Cmd *c)
{
if (in_range(c))
printf("%zu\n", lineno);
}
static void
cmd_lbrace(Cmd *c)
{
Cmd *jump;
if (in_range(c))
return;
/* update ranges on all commands we skip */
jump = prog + c->u.offset;
update_ranges(c + 1, jump);
pc = jump;
}
static void
cmd_rbrace(Cmd *c)
{
}
/* not actually a sed function, but acts like one, put in last spot of script */
static void
cmd_last(Cmd *c)
{
if (!gflags.n)
check_puts(patt.str, stdout);
do_writes();
new_next();
}
/*
* Actions
*/
/* read new line, continue current cycle */
static void
new_line(void)
{
while (read_line(file, &patt) == EOF) {
if (next_file()) {
gflags.halt = 1;
return;
}
}
gflags.s = 0;
lineno++;
}
/* append new line, continue current cycle
* FIXME: used for N, POSIX specifies do not print pattern space when out of
* input, but GNU does so busybox does as well. Currently we don't.
* Should we?
*/
static void
app_line(void)
{
while (read_line(file, &genbuf) == EOF) {
if (next_file()) {
gflags.halt = 1;
return;
}
}
stracat(&patt, "\n");
stracat(&patt, genbuf.str);
gflags.s = 0;
lineno++;
}
/* read new line, start new cycle */
static void
new_next(void)
{
update_ranges(pc + 1, prog + pcap);
new_line();
pc = prog - 1;
}
/* keep old pattern space, start new cycle */
static void
old_next(void)
{
update_ranges(pc + 1, prog + pcap);
pc = prog - 1;
}
int
main(int argc, char *argv[])
{
char *arg;
int script = 0;
ARGBEGIN {
case 'n':
gflags.n = 1;
break;
case 'e':
arg = EARGF(usage());
compile(arg, 0);
script = 1;
break;
case 'f':
arg = EARGF(usage());
compile(arg, 1);
script = 1;
break;
default : usage();
} ARGEND;
/* no script to run */
if (!script && !argc)
usage();
/* no script yet, next argument is script */
if (!script)
compile(*argv++, 0);
/* shrink/grow memory to fit and add our last instruction */
resize((void **)&prog, &pcap, sizeof(*prog), pc - prog + 1, NULL);
pc = prog + pcap - 1;
pc->fninfo = &(Fninfo){ cmd_last, NULL, NULL, 0 };
files = argv;
run();
Add *fshut() functions to properly flush file streams This has been a known issue for a long time. Example: printf "word" > /dev/full wouldn't report there's not enough space on the device. This is due to the fact that every libc has internal buffers for stdout which store fragments of written data until they reach a certain size or on some callback to flush them all at once to the kernel. You can force the libc to flush them with fflush(). In case flushing fails, you can check the return value of fflush() and report an error. However, previously, sbase didn't have such checks and without fflush(), the libc silently flushes the buffers on exit without checking the errors. No offense, but there's no way for the libc to report errors in the exit- condition. GNU coreutils solve this by having onexit-callbacks to handle the flushing and report issues, but they have obvious deficiencies. After long discussions on IRC, we came to the conclusion that checking the return value of every io-function would be a bit too much, and having a general-purpose fclose-wrapper would be the best way to go. It turned out that fclose() alone is not enough to detect errors. The right way to do it is to fflush() + check ferror on the fp and then to a fclose(). This is what fshut does and that's how it's done before each return. The return value is obviously affected, reporting an error in case a flush or close failed, but also when reading failed for some reason, the error- state is caught. the !!( ... + ...) construction is used to call all functions inside the brackets and not "terminating" on the first. We want errors to be reported, but there's no reason to stop flushing buffers when one other file buffer has issues. Obviously, functionales come before the flush and ret-logic comes after to prevent early exits as well without reporting warnings if there are any. One more advantage of fshut() is that it is even able to report errors on obscure NFS-setups which the other coreutils are unable to detect, because they only check the return-value of fflush() and fclose(), not ferror() as well.
2015-04-04 15:25:17 -04:00
return !!(fshut(stdin, "<stdin>") + fshut(stdout, "<stdout>"));
}