sbase/sed.c

1734 lines
40 KiB
C
Raw Normal View History

/* FIXME: summary
* decide whether we enforce valid UTF-8, right now it's enforced in certain
* parts of the script, but not the input...
* nul bytes cause explosions due to use of libc string functions. thoughts?
* lack of newline at end of file, currently we add one. what should we do?
* allow "\\t" for "\t" etc. in regex? in replacement text?
* POSIX says don't flush on N when out of input, but GNU and busybox do.
*/
#include <ctype.h>
#include <errno.h>
#include <regex.h>
#include <stdlib.h>
#include <string.h>
#include "utf.h"
#include "util.h"
/* Types */
/* used as queue for writes and stack for {,:,b,t */
typedef struct {
void **data;
size_t size;
size_t cap;
} Vec;
/* used for arbitrary growth, str is a C string
* FIXME: does it make sense to keep track of length? or just rely on libc
* string functions? If we want to support nul bytes everything changes
*/
typedef struct {
char *str;
size_t cap;
} String;
typedef struct Cmd Cmd;
typedef struct {
void (*fn)(Cmd *);
char *(*getarg)(Cmd *, char *);
void (*freearg)(Cmd *);
unsigned char naddr;
} Fninfo;
typedef struct {
union {
size_t lineno;
regex_t *re;
} u;
enum {
IGNORE, /* empty address, ignore */
EVERY , /* every line */
LINE , /* ilne number */
LAST , /* last line ($) */
REGEX , /* use included regex */
LASTRE, /* use most recently used regex */
} type;
} Addr;
/* DISCUSS: naddr is not strictly necessary, but very helpful
* naddr == 0 iff beg.type == EVERY && end.type == IGNORE
* naddr == 1 iff beg.type != IGNORE && end.type == IGNORE
* naddr == 2 iff beg.type != IGNORE && end.type != IGNORE
*/
typedef struct {
Addr beg;
Addr end;
unsigned char naddr;
} Range;
typedef struct {
regex_t *re; /* if NULL use last regex */
String repl;
FILE *file;
size_t occurrence; /* 0 for all (g flag) */
Rune delim;
unsigned int p:1;
} Sarg;
typedef struct {
Rune *set1;
Rune *set2;
} Yarg;
typedef struct {
String str; /* a,c,i text. r file path */
void (*print)(char *, FILE *); /* check_puts for a, write_file for r, unused for c,i */
} ACIRarg;
struct Cmd {
Range range;
Fninfo *fninfo;
union {
Cmd *jump; /* used for b,t when running */
char *label; /* used for :,b,t when building */
ptrdiff_t offset; /* used for { (pointers break during realloc) */
FILE *file; /* used for w */
/* FIXME: Should the following be in the union? or pointers and malloc? */
Sarg s;
Yarg y;
ACIRarg acir;
} u; /* I find your lack of anonymous unions disturbing */
unsigned int in_match:1;
unsigned int negate :1;
};
/* Files for w command (and s' w flag) */
typedef struct {
char *path;
FILE *file;
} Wfile;
/*
* Function Declarations
*/
/* Dynamically allocated arrays and strings */
void resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next);
void *pop(Vec *v);
void push(Vec *v, void *p);
void stracat(String *dst, char *src);
void strnacat(String *dst, char *src, size_t n);
void stracpy(String *dst, char *src);
void strnacpy(String *dst, char *src, size_t n);
/* Cleanup and errors */
static void usage(void);
/* Parsing functions and related utilities */
void compile(char *s, int isfile);
int read_line(FILE *f, String *s);
char *make_range(Range *range, char *s);
char *make_addr(Addr *addr, char *s);
char *find_delim(char *s, Rune delim, int do_brackets);
char *chompr(char *s, Rune rune);
char *chomp(char *s);
Rune *strtorunes(char *s, size_t nrunes);
long stol(char *s, char **endp);
size_t escapes(char *beg, char *end, Rune delim, int n_newline);
size_t echarntorune(Rune *r, char *s, size_t n);
void insert_labels(void);
/* Get and Free arg and related utilities */
char *get_aci_arg(Cmd *c, char *s);
void aci_append(Cmd *c, char *s);
void free_acir_arg(Cmd *c);
char *get_bt_arg(Cmd *c, char *s);
char *get_r_arg(Cmd *c, char *s);
char *get_s_arg(Cmd *c, char *s);
void free_s_arg(Cmd *c);
char *get_w_arg(Cmd *c, char *s);
char *get_y_arg(Cmd *c, char *s);
void free_y_arg(Cmd *c);
char *get_colon_arg(Cmd *c, char *s);
char *get_lbrace_arg(Cmd *c, char *s);
char *get_rbrace_arg(Cmd *c, char *s);
char *semicolon_arg(char *s);
/* Running */
void run(void);
int in_range(Cmd *c);
int match_addr(Addr *a);
int next_file(void);
int is_eof(FILE *f);
void do_writes(void);
void write_file(char *path, FILE *out);
void check_puts(char *s, FILE *f);
void update_ranges(Cmd *beg, Cmd *end);
/* Sed functions */
void cmd_y(Cmd *c);
void cmd_x(Cmd *c);
void cmd_w(Cmd *c);
void cmd_t(Cmd *c);
void cmd_s(Cmd *c);
void cmd_r(Cmd *c);
void cmd_q(Cmd *c);
void cmd_P(Cmd *c);
void cmd_p(Cmd *c);
void cmd_N(Cmd *c);
void cmd_n(Cmd *c);
void cmd_l(Cmd *c);
void cmd_i(Cmd *c);
void cmd_H(Cmd *c);
void cmd_h(Cmd *c);
void cmd_G(Cmd *c);
void cmd_g(Cmd *c);
void cmd_D(Cmd *c);
void cmd_d(Cmd *c);
void cmd_c(Cmd *c);
void cmd_b(Cmd *c);
void cmd_a(Cmd *c);
void cmd_colon(Cmd *c);
void cmd_equal(Cmd *c);
void cmd_lbrace(Cmd *c);
void cmd_rbrace(Cmd *c);
void cmd_last(Cmd *c);
/* Actions */
void new_line(void);
void app_line(void);
void new_next(void);
void old_next(void);
/*
* Globals
*/
Vec braces, labels, branches; /* holds ptrdiff_t. addrs of {, :, bt */
Vec writes; /* holds cmd*. writes scheduled by a and r commands */
Vec wfiles; /* holds Wfile*. files for w and s///w commands */
Cmd *prog, *pc; /* Program, program counter */
size_t pcap;
size_t lineno;
regex_t *lastre; /* last used regex for empty regex search */
char **files; /* list of file names from argv */
FILE *file; /* current file we are reading */
String patt, hold, genbuf;
struct {
unsigned int n :1; /* -n (no print) */
unsigned int s :1; /* s/// replacement happened */
unsigned int aci_cont:1; /* a,c,i text continuation */
unsigned int s_cont :1; /* s/// replacement text continuation */
unsigned int halt :1; /* halt execution */
} gflags;
/* FIXME: move character inside Fninfo and only use 26*sizeof(Fninfo) instead of 127*sizeof(Fninfo) bytes */
Fninfo fns[] = {
['a'] = { cmd_a , get_aci_arg , free_acir_arg , 1 }, /* schedule write of text for later */
['b'] = { cmd_b , get_bt_arg , NULL , 2 }, /* branch to label char *label when building, Cmd *jump when running */
['c'] = { cmd_c , get_aci_arg , free_acir_arg , 2 }, /* delete pattern space. at 0 or 1 addr or end of 2 addr, write text */
['d'] = { cmd_d , NULL , NULL , 2 }, /* delete pattern space */
['D'] = { cmd_D , NULL , NULL , 2 }, /* delete to first newline and start new cycle without reading (if no newline, d) */
['g'] = { cmd_g , NULL , NULL , 2 }, /* replace pattern space with hold space */
['G'] = { cmd_G , NULL , NULL , 2 }, /* append newline and hold space to pattern space */
['h'] = { cmd_h , NULL , NULL , 2 }, /* replace hold space with pattern space */
['H'] = { cmd_H , NULL , NULL , 2 }, /* append newline and pattern space to hold space */
['i'] = { cmd_i , get_aci_arg , free_acir_arg , 1 }, /* write text */
['l'] = { cmd_l , NULL , NULL , 2 }, /* write pattern space in 'visually unambiguous form' */
['n'] = { cmd_n , NULL , NULL , 2 }, /* write pattern space (unless -n) read to replace pattern space (if no input, quit) */
['N'] = { cmd_N , NULL , NULL , 2 }, /* append to pattern space separated by newline, line number changes (if no input, quit) */
['p'] = { cmd_p , NULL , NULL , 2 }, /* write pattern space */
['P'] = { cmd_P , NULL , NULL , 2 }, /* write pattern space up to first newline */
['q'] = { cmd_q , NULL , NULL , 1 }, /* quit */
['r'] = { cmd_r , get_r_arg , free_acir_arg , 1 }, /* write contents of file (unable to open/read treated as empty file) */
['s'] = { cmd_s , get_s_arg , free_s_arg , 2 }, /* find/replace/all that crazy s stuff */
['t'] = { cmd_t , get_bt_arg , NULL , 2 }, /* if s/// succeeded (since input or last t) branch to label (branch to end if no label) */
['w'] = { cmd_w , get_w_arg , NULL , 2 }, /* append pattern space to file */
['x'] = { cmd_x , NULL , NULL , 2 }, /* exchange pattern and hold spaces */
['y'] = { cmd_y , get_y_arg , free_y_arg , 2 }, /* replace runes in set1 with runes in set2 */
[':'] = { cmd_colon , get_colon_arg , NULL , 0 }, /* defines label for later b and t commands */
['='] = { cmd_equal , NULL , NULL , 1 }, /* printf("%d\n", line_number); */
['{'] = { cmd_lbrace, get_lbrace_arg, NULL , 2 }, /* if we match, run commands, otherwise jump to close */
['}'] = { cmd_rbrace, get_rbrace_arg, NULL , 0 }, /* noop, hold onto open for ease of building scripts */
[0x7f] = { NULL, NULL, NULL, 0 }, /* index is checked with isascii(3p). fill out rest of array */
};
/*
* Function Definitions
*/
/* given memory pointed to by *ptr that currently holds *nmemb members of size
* size, realloc to hold new_nmemb members, return new_nmemb in *memb and one
* past old end in *next. if realloc fails...explode
*/
void
resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next)
{
void *n, *tmp;
if (new_nmemb) {
tmp = ereallocarray(*ptr, new_nmemb, size);
} else { /* turns out realloc(*ptr, 0) != free(*ptr) */
free(*ptr);
tmp = NULL;
}
n = (char *)tmp + *nmemb * size;
*nmemb = new_nmemb;
*ptr = tmp;
if (next)
*next = n;
}
void *
pop(Vec *v)
{
if (!v->size)
return NULL;
return v->data[--v->size];
}
void
push(Vec *v, void *p)
{
if (v->size == v->cap)
resize((void **)&v->data, &v->cap, sizeof(*v->data), v->cap * 2 + 1, NULL);
v->data[v->size++] = p;
}
void
stracat(String *dst, char *src)
{
int new = !dst->cap;
size_t len;
len = (new ? 0 : strlen(dst->str)) + strlen(src) + 1;
if (dst->cap < len)
resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
if (new)
*dst->str = '\0';
strcat(dst->str, src);
}
void
strnacat(String *dst, char *src, size_t n)
{
int new = !dst->cap;
size_t len;
len = strlen(src);
len = (new ? 0 : strlen(dst->str)) + MIN(n, len) + 1;
if (dst->cap < len)
resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
if (new)
*dst->str = '\0';
strlcat(dst->str, src, len);
}
void
stracpy(String *dst, char *src)
{
size_t len;
len = strlen(src) + 1;
if (dst->cap < len)
resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
strcpy(dst->str, src);
}
void
strnacpy(String *dst, char *src, size_t n)
{
size_t len;
len = strlen(src);
len = strlen(dst->str) + MIN(n, len) + 1;
if (dst->cap < len)
resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
strlcpy(dst->str, src, len);
}
void
leprintf(char *s)
{
if (errno)
eprintf("%zu: %s: %s\n", lineno, s, strerror(errno));
else
eprintf("%zu: %s\n", lineno, s);
}
/* FIXME: write usage message */
static void
usage(void)
{
eprintf("USAGE\n");
}
/* Differences from POSIX
* we allows semicolons and trailing blanks inside {}
* we allow spaces after ! (and in between !s)
*/
void
compile(char *s, int isfile)
{
FILE *f;
if (!isfile && !*s) /* empty string script */
return;
f = isfile ? fopen(s, "r") : fmemopen(s, strlen(s), "r");
if (!f)
eprintf("fopen/fmemopen failed\n");
/* NOTE: get arg functions can't use genbuf */
while (read_line(f, &genbuf) != EOF) {
s = genbuf.str;
/* if the first two characters of the script are "#n" default output shall be suppressed */
if (++lineno == 1 && *s == '#' && s[1] == 'n') {
gflags.n = 1;
continue;
}
if (gflags.aci_cont) {
aci_append(pc - 1, s);
continue;
}
if (gflags.s_cont)
s = (pc - 1)->fninfo->getarg(pc - 1, s);
while (*s) {
s = chompr(s, ';');
if (!*s || *s == '#')
break;
if ((size_t)(pc - prog) == pcap)
resize((void **)&prog, &pcap, sizeof(*prog), pcap * 2 + 1, (void **)&pc);
pc->range.beg.type = pc->range.end.type = IGNORE;
pc->fninfo = NULL;
pc->in_match = 0;
s = make_range(&pc->range, s);
s = chomp(s);
pc->negate = *s == '!';
s = chompr(s, '!');
if (!isascii(*s) || !(pc->fninfo = &fns[(unsigned)*s])->fn)
leprintf("bad sed function");
if (pc->range.naddr > pc->fninfo->naddr)
leprintf("wrong number of addresses");
s++;
if (pc->fninfo->getarg)
s = pc->fninfo->getarg(pc, s);
pc++;
}
}
if (fclose(f))
weprintf("fclose failed\n");
}
/* FIXME: if we decide to honor lack of trailing newline, set/clear a global
* flag when reading a line
*/
int
read_line(FILE *f, String *s)
{
ssize_t len;
if (!f)
return EOF;
if ((len = getline(&s->str, &s->cap, f)) < 0) {
if (ferror(f))
eprintf("getline failed\n");
return EOF;
}
if (s->str[--len] == '\n')
s->str[len] = '\0';
return 0;
}
/* read first range from s, return pointer to one past end of range */
char *
make_range(Range *range, char *s)
{
s = make_addr(&range->beg, s);
if (*s == ',')
s = make_addr(&range->end, s + 1);
else
range->end.type = IGNORE;
if (range->beg.type == EVERY && range->end.type == IGNORE) range->naddr = 0;
else if (range->beg.type != IGNORE && range->end.type == IGNORE) range->naddr = 1;
else if (range->beg.type != IGNORE && range->end.type != IGNORE) range->naddr = 2;
else leprintf("this is impossible...");
return s;
}
/* read first addr from s, return pointer to one past end of addr */
char *
make_addr(Addr *addr, char *s)
{
Rune r;
char *p = s + strlen(s);
size_t rlen = echarntorune(&r, s, p - s);
if (r == '$') {
addr->type = LAST;
s += rlen;
} else if (isdigitrune(r)) {
addr->type = LINE;
addr->u.lineno = stol(s, &s);
} else if (r == '/' || r == '\\') {
Rune delim;
if (r == '\\') {
s += rlen;
rlen = echarntorune(&r, s, p - s);
}
if (r == '\\')
leprintf("bad delimiter '\\'");
delim = r;
s += rlen;
rlen = echarntorune(&r, s, p - s);
if (r == delim) {
addr->type = LASTRE;
s += rlen;
} else {
addr->type = REGEX;
p = find_delim(s, delim, 1);
if (!*p)
leprintf("unclosed regex");
p -= escapes(s, p, delim, 0);
*p++ = '\0';
addr->u.re = emalloc(sizeof(*addr->u.re));
eregcomp(addr->u.re, s, 0);
s = p;
}
} else {
addr->type = EVERY;
}
return s;
}
/* return pointer to first delim in s that is not escaped
* and if do_brackets is set, not in [] (note possible [::], [..], [==], inside [])
* return pointer to trailing nul byte if no delim found
*
* any escaped character that is not special is just itself (POSIX undefined)
* FIXME: pull out into some util thing, will be useful for ed as well
*/
char *
find_delim(char *s, Rune delim, int do_brackets)
{
enum {
OUTSIDE , /* not in brackets */
BRACKETS_OPENING, /* last char was first [ or last two were first [^ */
BRACKETS_INSIDE , /* inside [] */
INSIDE_OPENING , /* inside [] and last char was [ */
CLASS_INSIDE , /* inside class [::], or colating element [..] or [==], inside [] */
CLASS_CLOSING , /* inside class [::], or colating element [..] or [==], and last character was the respective : . or = */
} state = OUTSIDE;
Rune r, c = 0; /* no c won't be used uninitialized, shutup -Wall */
size_t rlen;
int escape = 0;
char *end = s + strlen(s);
for (; *s; s += rlen) {
rlen = echarntorune(&r, s, end - s);
if (state == BRACKETS_OPENING && r == '^' ) { continue; }
else if (state == BRACKETS_OPENING && r == ']' ) { state = BRACKETS_INSIDE ; continue; }
else if (state == BRACKETS_OPENING ) { state = BRACKETS_INSIDE ; }
if (state == CLASS_CLOSING && r == ']' ) { state = BRACKETS_INSIDE ; }
else if (state == CLASS_CLOSING ) { state = CLASS_INSIDE ; }
else if (state == CLASS_INSIDE && r == c ) { state = CLASS_CLOSING ; }
else if (state == INSIDE_OPENING && (r == ':' ||
r == '.' ||
r == '=') ) { state = CLASS_INSIDE ; c = r; }
else if (state == INSIDE_OPENING && r == ']' ) { state = OUTSIDE ; }
else if (state == BRACKETS_INSIDE && r == '[' ) { state = INSIDE_OPENING ; }
else if (state == BRACKETS_INSIDE && r == ']' ) { state = OUTSIDE ; }
else if (state == OUTSIDE && escape ) { escape = 0 ; }
else if (state == OUTSIDE && r == '\\' ) { escape = 1 ; }
else if (state == OUTSIDE && do_brackets && r == '[' ) { state = BRACKETS_OPENING; }
else if (state == OUTSIDE && r == delim) return s;
}
return s;
}
char *
chomp(char *s)
{
return chompr(s, 0);
}
/* eat all leading whitespace and occurrences of rune */
char *
chompr(char *s, Rune rune)
{
Rune r;
size_t rlen;
char *end = s + strlen(s);
while (*s && (rlen = echarntorune(&r, s, end - s)) && (isspacerune(r) || r == rune))
s += rlen;
return s;
}
/* convert first nrunes Runes from UTF-8 string s in allocated Rune*
* NOTE: sequence must be valid UTF-8, check first */
Rune *
strtorunes(char *s, size_t nrunes)
{
Rune *rs, *rp;
rp = rs = ereallocarray(NULL, nrunes + 1, sizeof(*rs));
while (nrunes--)
s += chartorune(rp++, s);
*rp = '\0';
return rs;
}
long
stol(char *s, char **endp)
{
long n;
errno = 0;
n = strtol(s, endp, 10);
if (errno)
leprintf("strtol:");
if (*endp == s)
leprintf("strtol: invalid number");
return n;
}
/* from beg to end replace "\\d" with "d" and "\\n" with "\n" (where d is delim)
* if delim is 'n' and n_newline is 0 then "\\n" is replaced with "n" (normal)
* if delim is 'n' and n_newline is 1 then "\\n" is replaced with "\n" (y command)
* if delim is 0 all escaped characters represent themselves (aci text)
* memmove rest of string (beyond end) into place
* return the number of converted escapes (backslashes removed)
* FIXME: this has had too many corner cases slapped on and is ugly. rewrite better
*/
size_t
escapes(char *beg, char *end, Rune delim, int n_newline)
{
size_t num = 0;
char *src = beg, *dst = beg;
while (src < end) {
/* handle escaped backslash specially so we don't think the second
* backslash is escaping something */
if (*src == '\\' && src[1] == '\\') {
*dst++ = *src++;
if (delim)
*dst++ = *src++;
else
src++;
} else if (*src == '\\' && !delim) {
src++;
} else if (*src == '\\' && src[1]) {
Rune r;
size_t rlen;
num++;
src++;
rlen = echarntorune(&r, src, end - src);
if (r == 'n' && delim == 'n') {
*src = n_newline ? '\n' : 'n'; /* src so we can still memmove() */
} else if (r == 'n') {
*src = '\n';
} else if (r != delim) {
*dst++ = '\\';
num--;
}
memmove(dst, src, rlen);
dst += rlen;
src += rlen;
} else {
*dst++ = *src++;
}
}
memmove(dst, src, strlen(src) + 1);
return num;
}
size_t
echarntorune(Rune *r, char *s, size_t n)
{
size_t rlen = charntorune(r, s, n);
if (!rlen || *r == Runeerror)
leprintf("invalid UTF-8");
return rlen;
}
void
insert_labels(void)
{
while (branches.size) {
Cmd *from = prog + (ptrdiff_t)pop(&branches);
if (!from->u.label) {/* no label branch to end of script */
from->u.jump = pc - 1;
} else {
size_t i;
Cmd *to;
for (i = 0; i < labels.size; i++) {
to = prog + (ptrdiff_t)labels.data[i];
if (!strcmp(from->u.label, to->u.label)) {
from->u.jump = to;
break;
}
}
if (i == labels.size)
leprintf("bad label");
}
}
}
/*
* Getargs / Freeargs
* Read argument from s, return pointer to one past last character of argument
*/
/* POSIX compliant
* i\
* foobar
*
* also allow the following non POSIX compliant
* i # empty line
* ifoobar
* ifoobar\
* baz
*
* FIXME: GNU and busybox discard leading spaces
* i foobar
* i foobar
* ifoobar
* are equivalent in GNU and busybox. We don't. Should we?
*/
char *
get_aci_arg(Cmd *c, char *s)
{
c->u.acir.print = check_puts;
c->u.acir.str = (String){ NULL, 0 };
gflags.aci_cont = !!*s; /* no continue flag if empty string */
/* neither empty string nor POSIX compliant */
if (*s && !(*s == '\\' && !s[1]))
aci_append(c, s);
return s + strlen(s);
}
void
aci_append(Cmd *c, char *s)
{
char *end = s + strlen(s), *p = end;
gflags.aci_cont = 0;
while (--p >= s && *p == '\\')
gflags.aci_cont = !gflags.aci_cont;
if (gflags.aci_cont)
*--end = '\n';
escapes(s, end, 0, 0);
stracat(&c->u.acir.str, s);
}
void
free_acir_arg(Cmd *c)
{
free(c->u.acir.str.str);
}
/* POSIX dictates that label is rest of line, including semicolons, trailing
* whitespace, closing braces, etc. and can be limited to 8 bytes
*
* I allow a semicolon or closing brace to terminate a label name, it's not
* POSIX compliant, but it's useful and every sed version I've tried to date
* does the same.
*
* FIXME: POSIX dictates that leading whitespace is ignored but trailing
* whitespace is not. This is annoying and we should probably get rid of it.
*/
char *
get_bt_arg(Cmd *c, char *s)
{
char *p = semicolon_arg(s = chomp(s));
if (p != s) {
c->u.label = estrndup(s, p - s);
} else {
c->u.label = NULL;
}
push(&branches, (void *)(c - prog));
return p;
}
/* POSIX dictates file name is rest of line including semicolons, trailing
* whitespace, closing braces, etc. and file name must be preceded by a space
*
* I allow a semicolon or closing brace to terminate a file name and don't
* enforce leading space.
*
* FIXME: decide whether trailing whitespace should be included and fix
* accordingly
*/
char *
get_r_arg(Cmd *c, char *s)
{
char *p = semicolon_arg(s = chomp(s));
if (p == s)
leprintf("no file name");
c->u.acir.str.str = estrndup(s, p - s);
c->u.acir.print = write_file;
return p;
}
/* we allow "\\n" in replacement text to mean "\n" (undefined in POSIX)
*
* FIXME: allow other escapes in regex and replacement? if so change escapes()
*/
char *
get_s_arg(Cmd *c, char *s)
{
Rune delim, r;
Cmd buf;
char *p;
int esc;
/* s/Find/Replace/Flags */
/* Find */
if (!gflags.s_cont) { /* NOT continuing from literal newline in replacement text */
int lastre = 0;
c->u.s.repl = (String){ NULL, 0 };
c->u.s.occurrence = 1;
c->u.s.file = NULL;
c->u.s.p = 0;
if (!*s || *s == '\\')
leprintf("bad delimiter");
p = s + strlen(s);
s += echarntorune(&delim, s, p - s);
c->u.s.delim = delim;
echarntorune(&r, s, p - s);
if (r == delim) /* empty regex */
lastre = 1;
p = find_delim(s, delim, 1);
if (!*p)
leprintf("missing second delimiter");
p -= escapes(s, p, delim, 0);
*p = '\0';
if (lastre) {
c->u.s.re = NULL;
} else {
c->u.s.re = emalloc(sizeof(*c->u.s.re));
/* FIXME: different eregcomp that calls fatal */
eregcomp(c->u.s.re, s, 0);
}
s = p + 1;
}
/* Replace */
delim = c->u.s.delim;
p = find_delim(s, delim, 0);
p -= escapes(s, p, delim, 0);
if (!*p) { /* no third delimiter */
/* FIXME: same backslash counting as aci_append() */
if (p[-1] != '\\')
leprintf("missing third delimiter or <backslash><newline>");
p[-1] = '\n';
gflags.s_cont = 1;
} else {
gflags.s_cont = 0;
}
/* check for bad references in replacement text */
*p = '\0';
for (esc = 0, p = s; *p; p++) {
if (esc) {
esc = 0;
if (isdigit(*p) && c->u.s.re && (size_t)(*p - '0') > c->u.s.re->re_nsub)
leprintf("back reference number greater than number of groups");
} else if (*p == '\\') {
esc = 1;
}
}
stracat(&c->u.s.repl, s);
if (gflags.s_cont)
return p;
s = p + 1;
/* Flags */
p = semicolon_arg(s = chomp(s));
/* FIXME: currently for simplicity take last of g or occurrence flags and
* ignore multiple p flags. need to fix that */
for (; s < p; s++) {
if (isdigit(*s)) {
c->u.s.occurrence = stol(s, &s);
} else {
switch (*s) {
case 'g': c->u.s.occurrence = 0; break;
case 'p': c->u.s.p = 1; break;
case 'w':
/* must be last flag, take everything up to newline/semicolon
* s == p after this */
s = get_w_arg(&buf, s);
c->u.s.file = buf.u.file;
break;
}
}
}
return p;
}
void
free_s_arg(Cmd *c)
{
if (c->u.s.re) {
regfree(c->u.s.re);
free(c->u.s.re);
}
free(c->u.s.repl.str);
}
/* see get_r_arg notes */
char *
get_w_arg(Cmd *c, char *s)
{
char *p = semicolon_arg(s = chomp(s));
Wfile *w, **wp;
if (p == s)
leprintf("no file name");
/* man -Wsigncompare is annoying */
for (wp = (Wfile **)wfiles.data; (size_t)(wp - (Wfile **)wfiles.data) < wfiles.size; wp++) {
if (strlen((*wp)->path) == (size_t)(p - s) && !strncmp(s, (*wp)->path, p - s)) {
c->u.file = (*wp)->file;
return p;
}
}
w = emalloc(sizeof(*w));
w->path = estrndup(s, p - s);
if (!(w->file = fopen(w->path, "w")))
leprintf("fopen failed");
c->u.file = w->file;
push(&wfiles, w);
return p;
}
char *
get_y_arg(Cmd *c, char *s)
{
Rune delim;
char *p = s + strlen(s);
size_t rlen = echarntorune(&delim, s, p - s);
size_t nrunes1, nrunes2;
c->u.y.set1 = c->u.y.set2 = NULL;
s += rlen;
p = find_delim(s, delim, 0);
p -= escapes(s, p, delim, 1);
nrunes1 = utfnlen(s, p - s);
c->u.y.set1 = strtorunes(s, nrunes1);
s = p + rlen;
p = find_delim(s, delim, 0);
p -= escapes(s, p, delim, 1);
nrunes2 = utfnlen(s, p - s);
if (nrunes1 != nrunes2)
leprintf("different set lengths");
c->u.y.set2 = strtorunes(s, utfnlen(s, p - s));
return p + rlen;
}
void
free_y_arg(Cmd *c)
{
free(c->u.y.set1);
free(c->u.y.set2);
}
/* see get_bt_arg notes */
char *
get_colon_arg(Cmd *c, char *s)
{
char *p = semicolon_arg(s = chomp(s));
if (p == s)
leprintf("no label name");
c->u.label = estrndup(s, p - s);