Add Unicode character class support
Thinking about it long enough, the solution seems almost trivial.
This commit is contained in:
parent
369bb01eb1
commit
09704afc24
77
tr.c
77
tr.c
@ -1,3 +1,4 @@
|
|||||||
|
#include <wctype.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
@ -14,34 +15,31 @@ struct range {
|
|||||||
size_t quant;
|
size_t quant;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define DIGIT "0-9"
|
|
||||||
#define UPPER "A-Z"
|
|
||||||
#define LOWER "a-z"
|
|
||||||
#define PUNCT "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
|
||||||
#define ALNUM DIGIT UPPER LOWER
|
|
||||||
|
|
||||||
static struct {
|
static struct {
|
||||||
char *name;
|
char *name;
|
||||||
char *str;
|
int (*check)(wint_t);
|
||||||
} classes[] = {
|
} classes[] = {
|
||||||
{ "alnum", ALNUM },
|
{ "alnum", iswalnum },
|
||||||
{ "alpha", UPPER LOWER },
|
{ "alpha", iswalpha },
|
||||||
{ "blank", " \t" },
|
{ "blank", iswblank },
|
||||||
{ "cntrl", "\000-\037\177" },
|
{ "cntrl", iswcntrl },
|
||||||
{ "digit", DIGIT },
|
{ "digit", iswdigit },
|
||||||
{ "graph", ALNUM PUNCT },
|
{ "graph", iswgraph },
|
||||||
{ "lower", LOWER },
|
{ "lower", iswlower },
|
||||||
{ "print", ALNUM PUNCT " " },
|
{ "print", iswlower },
|
||||||
{ "punct", PUNCT },
|
{ "punct", iswpunct },
|
||||||
{ "space", "\t\n\v\f\r" },
|
{ "space", iswspace },
|
||||||
{ "upper", UPPER },
|
{ "upper", iswupper },
|
||||||
{ "xdigit", DIGIT "A-Fa-f" },
|
{ "xdigit", iswxdigit },
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct range *set1 = NULL;
|
static struct range *set1 = NULL;
|
||||||
static size_t set1ranges = 0;
|
static size_t set1ranges = 0;
|
||||||
static struct range *set2 = NULL;
|
static int (*set1check)(wint_t) = NULL;
|
||||||
static size_t set2ranges = 0;
|
static struct range *set2 = NULL;
|
||||||
|
static size_t set2ranges = 0;
|
||||||
|
static int (*set2check)(wint_t) = NULL;
|
||||||
|
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
rangelen(struct range r)
|
rangelen(struct range r)
|
||||||
@ -72,16 +70,13 @@ rstrmatch(Rune *r, char *s, size_t n)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
makeset(char *str, struct range **set)
|
makeset(char *str, struct range **set, int (**check)(wint_t))
|
||||||
{
|
{
|
||||||
Rune *rstr;
|
Rune *rstr;
|
||||||
size_t len, i, j, m, n;
|
size_t len, i, j, m, n;
|
||||||
size_t q, setranges;
|
size_t q, setranges = 0;
|
||||||
int factor, base;
|
int factor, base;
|
||||||
|
|
||||||
reset:
|
|
||||||
setranges = 0;
|
|
||||||
|
|
||||||
/* rstr defines at most len ranges */
|
/* rstr defines at most len ranges */
|
||||||
len = chartorunearr(str, &rstr);
|
len = chartorunearr(str, &rstr);
|
||||||
*set = emalloc(len * sizeof(**set));
|
*set = emalloc(len * sizeof(**set));
|
||||||
@ -111,8 +106,8 @@ nextbrack:
|
|||||||
if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
|
if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
|
||||||
for (n = 0; n < LEN(classes); n++) {
|
for (n = 0; n < LEN(classes); n++) {
|
||||||
if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
|
if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
|
||||||
str = classes[n].str;
|
*check = classes[n].check;
|
||||||
goto reset;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
eprintf("Invalid character class\n");
|
eprintf("Invalid character class\n");
|
||||||
@ -193,10 +188,10 @@ main(int argc, char *argv[])
|
|||||||
|
|
||||||
if (argc < 1 || argc > 2 || (argc == 1 && dflag == sflag))
|
if (argc < 1 || argc > 2 || (argc == 1 && dflag == sflag))
|
||||||
usage();
|
usage();
|
||||||
set1ranges = makeset(argv[0], &set1);
|
set1ranges = makeset(argv[0], &set1, &set1check);
|
||||||
if (argc == 2)
|
if (argc == 2)
|
||||||
set2ranges = makeset(argv[1], &set2);
|
set2ranges = makeset(argv[1], &set2, &set2check);
|
||||||
if (!dflag && !set2ranges)
|
if (dflag == sflag && !set2ranges && !set2check)
|
||||||
eprintf("set2 must be non-empty\n");
|
eprintf("set2 must be non-empty\n");
|
||||||
read:
|
read:
|
||||||
if (!readrune("<stdin>", stdin, &r))
|
if (!readrune("<stdin>", stdin, &r))
|
||||||
@ -232,6 +227,20 @@ read:
|
|||||||
goto write;
|
goto write;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (set1check && set1check(r)) {
|
||||||
|
if (dflag && !cflag)
|
||||||
|
goto read;
|
||||||
|
if (sflag) {
|
||||||
|
if (r == lastrune)
|
||||||
|
goto read;
|
||||||
|
else
|
||||||
|
goto write;
|
||||||
|
}
|
||||||
|
if (set1check == iswupper && set2check == iswlower)
|
||||||
|
r = towlower(r);
|
||||||
|
if (set1check == iswlower && set2check == iswupper)
|
||||||
|
r = towupper(r);
|
||||||
|
}
|
||||||
if (dflag && cflag)
|
if (dflag && cflag)
|
||||||
goto read;
|
goto read;
|
||||||
if (dflag && sflag && r == lastrune)
|
if (dflag && sflag && r == lastrune)
|
||||||
|
Loading…
Reference in New Issue
Block a user