sbase/tr.c

275 lines
5.7 KiB
C
Raw Normal View History

2015-01-20 10:26:08 -05:00
/* See LICENSE file for copyright and license details. */
2013-11-15 11:25:10 -05:00
#include <stdlib.h>
#include "utf.h"
2013-11-15 11:25:10 -05:00
#include "util.h"
static int cflag = 0;
static int dflag = 0;
static int sflag = 0;
struct range {
Rune start;
Rune end;
size_t quant;
};
2015-01-10 09:21:09 -05:00
static struct {
char *name;
int (*check)(Rune);
} classes[] = {
{ "alnum", isalnumrune },
{ "alpha", isalpharune },
{ "blank", isblankrune },
{ "cntrl", iscntrlrune },
{ "digit", isdigitrune },
{ "graph", isgraphrune },
{ "lower", islowerrune },
{ "print", isprintrune },
{ "punct", ispunctrune },
{ "space", isspacerune },
{ "upper", isupperrune },
{ "xdigit", isxdigitrune },
};
static struct range *set1 = NULL;
static size_t set1ranges = 0;
static int (*set1check)(Rune) = NULL;
static struct range *set2 = NULL;
static size_t set2ranges = 0;
static int (*set2check)(Rune) = NULL;
static size_t
rangelen(struct range r)
2013-11-15 11:25:10 -05:00
{
return (r.end - r.start + 1) * r.quant;
}
static size_t
setlen(struct range *set, size_t setranges)
{
2015-01-10 11:38:28 -05:00
size_t len = 0, i;
for (i = 0; i < setranges; i++)
len += rangelen(set[i]);
2014-04-12 14:50:51 -04:00
return len;
2014-04-12 14:50:51 -04:00
}
static int
rstrmatch(Rune *r, char *s, size_t n)
{
size_t i;
for (i = 0; i < n; i++)
if (r[i] != s[i])
return 0;
return 1;
}
static size_t
makeset(char *str, struct range **set, int (**check)(Rune))
{
Rune *rstr;
size_t len, i, j, m, n;
size_t q, setranges = 0;
int factor, base;
/* rstr defines at most len ranges */
unescape(str);
rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr));
len = utftorunestr(str, rstr);
*set = ereallocarray(NULL, len, sizeof(**set));
for (i = 0; i < len; i++) {
if (rstr[i] == '[') {
j = i;
nextbrack:
if (j == len)
goto literal;
for (m = j; m < len; m++)
if (rstr[m] == ']') {
j = m;
break;
}
if (j == i)
goto literal;
/* CLASSES [=EQUIV=] (skip) */
if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
if (j - i != 4)
goto literal;
(*set)[setranges].start = rstr[i + 2];
(*set)[setranges].end = rstr[i + 2];
(*set)[setranges].quant = 1;
setranges++;
i = j;
continue;
}
2013-11-15 11:25:10 -05:00
/* CLASSES [:CLASS:] */
if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
for (n = 0; n < LEN(classes); n++) {
if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
*check = classes[n].check;
return 0;
}
}
eprintf("Invalid character class.\n");
}
2013-11-15 11:25:10 -05:00
/* REPEAT [_*n] (only allowed in set2) */
if (j - i > 2 && rstr[i + 2] == '*' && set1ranges > 0) {
/* check if right side of '*' is a number */
q = 0;
factor = 1;
base = (rstr[i + 3] == '0') ? 8 : 10;
for (n = j - 1; n > i + 2; n--) {
2015-04-20 15:17:53 -04:00
if (rstr[n] < '0' || rstr[n] > '9') {
n = 0;
break;
}
q += (rstr[n] - '0') * factor;
factor *= base;
}
if (n == 0) {
j = m + 1;
goto nextbrack;
}
(*set)[setranges].start = rstr[i + 1];
(*set)[setranges].end = rstr[i + 1];
(*set)[setranges].quant = q ? q : setlen(set1, set1ranges);
setranges++;
i = j;
continue;
}
2013-11-15 11:25:10 -05:00
j = m + 1;
goto nextbrack;
}
literal:
/* RANGES [_-__-_], _-__-_ */
/* LITERALS _______ */
(*set)[setranges].start = rstr[i];
if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
i += 2;
(*set)[setranges].end = rstr[i];
(*set)[setranges].quant = 1;
setranges++;
2013-11-15 11:25:10 -05:00
}
free(rstr);
return setranges;
}
static void
usage(void)
{
eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
}
2013-11-15 11:25:10 -05:00
int
main(int argc, char *argv[])
{
Rune r = 0, lastrune = 0;
2015-01-10 14:55:37 -05:00
size_t off1, off2, i, m;
2013-11-15 11:25:10 -05:00
ARGBEGIN {
case 'c':
case 'C':
cflag = 1;
break;
case 'd':
dflag = 1;
break;
case 's':
sflag = 1;
break;
2013-11-15 11:25:10 -05:00
default:
usage();
} ARGEND;
if (!argc || argc > 2 || (argc == 1 && dflag == sflag))
2013-11-15 11:25:10 -05:00
usage();
set1ranges = makeset(argv[0], &set1, &set1check);
if (argc == 2)
set2ranges = makeset(argv[1], &set2, &set2check);
if (dflag == sflag && !set2ranges && !set2check)
eprintf("set2 must be non-empty.\n");
if (set2check && set2check != islowerrune && set2check != isupperrune)
eprintf("set2 can only be the 'lower' or 'upper' class.\n");
if (set2check && cflag && !dflag)
eprintf("set2 can't be imaged to from a complement.\n");
read:
if (!efgetrune(&r, stdin, "<stdin>"))
Add *fshut() functions to properly flush file streams This has been a known issue for a long time. Example: printf "word" > /dev/full wouldn't report there's not enough space on the device. This is due to the fact that every libc has internal buffers for stdout which store fragments of written data until they reach a certain size or on some callback to flush them all at once to the kernel. You can force the libc to flush them with fflush(). In case flushing fails, you can check the return value of fflush() and report an error. However, previously, sbase didn't have such checks and without fflush(), the libc silently flushes the buffers on exit without checking the errors. No offense, but there's no way for the libc to report errors in the exit- condition. GNU coreutils solve this by having onexit-callbacks to handle the flushing and report issues, but they have obvious deficiencies. After long discussions on IRC, we came to the conclusion that checking the return value of every io-function would be a bit too much, and having a general-purpose fclose-wrapper would be the best way to go. It turned out that fclose() alone is not enough to detect errors. The right way to do it is to fflush() + check ferror on the fp and then to a fclose(). This is what fshut does and that's how it's done before each return. The return value is obviously affected, reporting an error in case a flush or close failed, but also when reading failed for some reason, the error- state is caught. the !!( ... + ...) construction is used to call all functions inside the brackets and not "terminating" on the first. We want errors to be reported, but there's no reason to stop flushing buffers when one other file buffer has issues. Obviously, functionales come before the flush and ret-logic comes after to prevent early exits as well without reporting warnings if there are any. One more advantage of fshut() is that it is even able to report errors on obscure NFS-setups which the other coreutils are unable to detect, because they only check the return-value of fflush() and fclose(), not ferror() as well.
2015-04-04 15:25:17 -04:00
return !!(fshut(stdin, "<stdin>") + fshut(stdout, "<stdout>"));
off1 = off2 = 0;
for (i = 0; i < set1ranges; i++) {
if (set1[i].start <= r && r <= set1[i].end) {
if (dflag) {
if (!cflag || (sflag && r == lastrune))
goto read;
else
goto write;
}
if (sflag) {
if (r == lastrune)
goto read;
else
goto write;
}
for (m = 0; m < i; m++)
off1 += rangelen(set1[m]);
off1 += r - set1[m].start;
if (off1 > setlen(set2, set2ranges) - 1) {
r = set2[set2ranges - 1].end;
goto write;
}
for (m = 0; m < set2ranges; m++) {
if (off2 + rangelen(set2[m]) > off1) {
m++;
break;
}
off2 += rangelen(set2[m]);
}
m--;
r = set2[m].start + (off1 - off2) / set2[m].quant;
2013-11-15 11:25:10 -05:00
goto write;
}
2013-11-15 11:25:10 -05:00
}
if (set1check && set1check(r)) {
if (dflag) {
if (!cflag || (sflag && r == lastrune))
goto read;
else
goto write;
}
if (sflag) {
if (r == lastrune)
goto read;
else
goto write;
}
if (set1check == isupperrune && set2check == islowerrune)
r = tolowerrune(r);
else if (set1check == islowerrune && set2check == isupperrune)
r = toupperrune(r);
else if (set2ranges > 0)
r = cflag ? r : set2[set2ranges - 1].end;
else
eprintf("Misaligned character classes.\n");
} else if (cflag && set2ranges > 0) {
r = set2[set2ranges - 1].end;
}
if (dflag && cflag)
goto read;
if (dflag && sflag && r == lastrune)
goto read;
write:
lastrune = r;
efputrune(&r, stdout, "<stdout>");
goto read;
2013-11-15 11:25:10 -05:00
}