kiss-mfavila-small/ports/xterm/xterm-359/charclass.c

/* $XTermId: charclass.c,v 1.41 2020/07/06 20:00:12 tom Exp $ */

/*
 * Copyright 2002-2017,2020 by Thomas E. Dickey
 *
 *                         All Rights Reserved
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Except as contained in this notice, the name(s) of the above copyright
 * holders shall not be used in advertising or otherwise to promote the
 * sale, use or other dealings in this Software without prior written
 * authorization.
 *
 *----------------------------------------------------------------------------
 * Compact and efficient reimplementation of the
 * xterm character class mechanism for large character sets
 *
 * Markus Kuhn -- mkuhn@acm.org -- 2000-07-03
 *
 * xterm allows users to select entire words with a double-click on the left
 * mouse button.  Opinions might differ on what type of characters are part of
 * separate words, therefore xterm allows users to configure a class code for
 * each 8-bit character.  Words are maximum length sequences of neighboring
 * characters with identical class code.  Extending this mechanism to Unicode
 * naively would create an at least 2^16 entries (128 kB) long class code
 * table.
 *
 * Instead, we transform the character class table into a list of intervals,
 * that will be accessed via a linear search.  Changes made to the table by the
 * user will be appended.  A special class code IDENT (default) marks
 * characters who have their code number as the class code.
 *
 * We could alternatively use a sorted table of non-overlapping intervals that
 * can be accessed via binary search, but merging in new intervals is
 * significantly more hassle and not worth the effort here.
 */

#include <xterm.h>
#include <charclass.h>

#if OPT_WIDE_CHARS

#ifdef TEST_DRIVER

#include <ctype.h>
#include <wchar.h>
#include <wctype.h>

#if OPT_TRACE
#define Trace if (opt_v) printf
#endif

#undef OPT_REPORT_CCLASS
#define OPT_REPORT_CCLASS 1
#endif /* TEST_DRIVER */

static struct classentry {
    int cclass;
    int first;
    int last;
} *classtab;

typedef enum {
    IDENT = -1,
    OTHER = 0,
    CNTRL = 1,
    ALNUM = 48,
    BLANK = 32,
    U_CJK = 0x4e00,
    U_SUP = 0x2070,
    U_SUB = 0x2080,
    U_HIR = 0x3040,
    U_KAT = 0x30a0,
    U_HAN = 0xac00
} Classes;

#ifdef TEST_DRIVER
static int opt_all;
static int opt_check;
static int opt_quiet;
static int opt_v;
#endif

void
init_classtab(void)
{
    const int size = 50;

    TRACE(("init_classtab {{\n"));

    classtab = TypeMallocN(struct classentry, (unsigned) size);
    if (!classtab)
	abort();
    classtab[0].cclass = size;
    classtab[0].first = 1;
    classtab[0].last = 0;

    /* old xterm default classes */
    SetCharacterClassRange(0, 0, BLANK);
    SetCharacterClassRange(1, 31, CNTRL);
    SetCharacterClassRange('\t', '\t', BLANK);
    SetCharacterClassRange('0', '9', ALNUM);
    SetCharacterClassRange('A', 'Z', ALNUM);
    SetCharacterClassRange('_', '_', ALNUM);
    SetCharacterClassRange('a', 'z', ALNUM);
    SetCharacterClassRange(127, 159, CNTRL);
    SetCharacterClassRange(160, 191, IDENT);
    SetCharacterClassRange(192, 255, ALNUM);
    SetCharacterClassRange(215, 215, IDENT);
    SetCharacterClassRange(247, 247, IDENT);

    /* added Unicode classes */
    SetCharacterClassRange(0x0100, 0xffdf, ALNUM);	/* mostly characters */
    SetCharacterClassRange(0x037e, 0x037e, IDENT);	/* Greek question mark */
    SetCharacterClassRange(0x0387, 0x0387, IDENT);	/* Greek ano teleia */
    SetCharacterClassRange(0x055a, 0x055f, IDENT);	/* Armenian punctuation */
    SetCharacterClassRange(0x0589, 0x0589, IDENT);	/* Armenian full stop */
    SetCharacterClassRange(0x0700, 0x070d, IDENT);	/* Syriac punctuation */
    SetCharacterClassRange(0x104a, 0x104f, IDENT);	/* Myanmar punctuation */
    SetCharacterClassRange(0x10fb, 0x10fb, IDENT);	/* Georgian punctuation */
    SetCharacterClassRange(0x1361, 0x1368, IDENT);	/* Ethiopic punctuation */
    SetCharacterClassRange(0x166d, 0x166e, IDENT);	/* Canadian Syl. punctuation */
    SetCharacterClassRange(0x17d4, 0x17dc, IDENT);	/* Khmer punctuation */
    SetCharacterClassRange(0x1800, 0x180a, IDENT);	/* Mongolian punctuation */
    SetCharacterClassRange(0x2000, 0x200a, BLANK);	/* spaces */
    SetCharacterClassRange(0x200b, 0x27ff, IDENT);	/* punctuation and symbols */
    SetCharacterClassRange(0x2070, 0x207f, U_SUP);	/* superscript */
    SetCharacterClassRange(0x2080, 0x208f, U_SUB);	/* subscript */
    SetCharacterClassRange(0x3000, 0x3000, BLANK);	/* ideographic space */
    SetCharacterClassRange(0x3001, 0x3020, IDENT);	/* ideographic punctuation */
    SetCharacterClassRange(0x3040, 0x309f, U_HIR);	/* Hiragana */
    SetCharacterClassRange(0x30a0, 0x30ff, U_KAT);	/* Katakana */
    SetCharacterClassRange(0x3300, 0x9fff, U_CJK);	/* CJK Ideographs */
    SetCharacterClassRange(0xac00, 0xd7a3, U_HAN);	/* Hangul Syllables */
    SetCharacterClassRange(0xf900, 0xfaff, U_CJK);	/* CJK Ideographs */
    SetCharacterClassRange(0xfe30, 0xfe6b, IDENT);	/* punctuation forms */
    SetCharacterClassRange(0xff00, 0xff0f, IDENT);	/* half/fullwidth ASCII */
    SetCharacterClassRange(0xff1a, 0xff20, IDENT);	/* half/fullwidth ASCII */
    SetCharacterClassRange(0xff3b, 0xff40, IDENT);	/* half/fullwidth ASCII */
    SetCharacterClassRange(0xff5b, 0xff64, IDENT);	/* half/fullwidth ASCII */

    TRACE(("}} init_classtab\n"));
    return;
}

int
CharacterClass(int c)
{
    int i, cclass = IDENT;

    for (i = classtab[0].first; i <= classtab[0].last; i++)
	if (classtab[i].first <= c && classtab[i].last >= c)
	    cclass = classtab[i].cclass;

    if (cclass < 0)
	cclass = c;

    return cclass;
}

#if OPT_REPORT_CCLASS
#define charFormat(code) ((code) > 255 ? "0x%04X" : "%d")
static const char *
class_name(Classes code)
{
    static char buffer[80];
    const char *result = "?";
    switch (code) {
    case ALNUM:
	result = "ALNUM";
	break;
    case BLANK:
	result = "BLANK";
	break;
    case CNTRL:
	result = "CNTRL";
	break;
    case OTHER:
	result = "OTHER";
	break;
    case IDENT:
	result = "IDENT";
	break;
    case U_SUP:
	result = "superscript";
	break;
    case U_SUB:
	result = "subscript";
	break;
    case U_CJK:
	result = "CJK Ideographs";
	break;
    case U_HIR:
	result = "Hiragana";
	break;
    case U_KAT:
	result = "Katakana";
	break;
    case U_HAN:
	result = "Hangul Syllables";
	break;
    default:
	sprintf(buffer, charFormat(code), code);
	result = buffer;
	break;
    }
    return result;
}

/*
 * Special convention for classtab[0]:
 * - classtab[0].cclass is the allocated number of entries in classtab
 * - classtab[0].first = 1 (first used entry in classtab)
 * - classtab[0].last is the last used entry in classtab
 */

int
SetCharacterClassRange(int low, int high, int value)
{
    TRACE(("...SetCharacterClassRange (U+%04X .. U+%04X) = %s\n",
	   low, high, class_name(value)));

    if (high < low)
	return -1;		/* nothing to do */

    /* make sure we have at least one free entry left at table end */
    if (classtab[0].last > classtab[0].cclass - 2) {
	classtab[0].cclass += 5 + classtab[0].cclass / 4;
	classtab = TypeRealloc(struct classentry,
			         (unsigned) classtab[0].cclass, classtab);
	if (!classtab)
	    abort();
    }

    /* simply append new interval to end of interval array */
    classtab[0].last++;
    classtab[classtab[0].last].first = low;
    classtab[classtab[0].last].last = high;
    classtab[classtab[0].last].cclass = value;

    return 0;
}

void
report_wide_char_class(void)
{
    static const Classes known_classes[] =
    {IDENT, ALNUM, CNTRL, BLANK, U_SUP, U_SUB, U_HIR, U_KAT, U_CJK, U_HAN};
    int i;

    printf("\n");
    printf("Unicode charClass data uses the last match\n");
    printf("from these overlapping intervals of character codes:\n");
    for (i = classtab[0].first; i <= classtab[0].last; i++) {
	printf("\tU+%04X .. U+%04X %s\n",
	       classtab[i].first,
	       classtab[i].last,
	       class_name(classtab[i].cclass));
    }
    printf("\n");
    printf("These class-names are used internally (the first character code in a class):\n");
    for (i = 0; i < (int) XtNumber(known_classes); ++i) {
	printf("\t");
	printf(charFormat(known_classes[i]), known_classes[i]);
	printf(" = %s\n", class_name(known_classes[i]));
    }
}
#endif /* OPT_REPORT_CCLASS */

#ifdef NO_LEAKS
void
noleaks_CharacterClass(void)
{
    if (classtab != 0) {
	free(classtab);
	classtab = 0;
    }
}
#endif
#endif /* OPT_WIDE_CHARS */

#ifdef TEST_DRIVER
#if OPT_WIDE_CHARS
static void
usage(void)
{
    static const char *msg[] =
    {
	"Usage: test_charclass [options] [c1[-c1b] [c2-[c2b] [...]]]",
	"",
	"Options:",
	" -a  show all data",
	" -s  show only summary",
	" -v  verbose"
    };
    size_t n;
    for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) {
	fprintf(stderr, "%s\n", msg[n]);
    }
    exit(EXIT_FAILURE);
}

static int
expected_class(int wch)
{
    int result = wch;
    wint_t ch = (wint_t) wch;
    if (ch == '\0' || ch == '\t') {
	result = BLANK;
    } else if (iswcntrl(ch)) {
	result = CNTRL;
    } else if (iswspace(ch)) {
	result = BLANK;
    } else if (ch < 127) {
	if (isalnum(ch) || ch == '_') {
	    result = ALNUM;
	}
    } else if (ch == 170 || ch == 181 || ch == 186) {
	;
    } else if (iswalnum(ch)) {
	result = ALNUM;
    }
    return result;
}

static int
show_cclass_range(int lo, int hi)
{
    int cclass = CharacterClass(lo);
    int ident = (cclass == lo);
    int more = 0;
    if (ident) {
	int ch;
	for (ch = lo + 1; ch <= hi; ch++) {
	    if (CharacterClass(ch) != ch) {
		ident = 0;
		break;
	    }
	}
	if (ident && (hi < 255)) {
	    ch = hi + 1;
	    if (CharacterClass(ch) == ch) {
		if (ch >= 255 || CharacterClass(ch + 1) != ch) {
		    more = 1;
		}
	    }
	}
    }
    if (!more) {
	if (lo == hi) {
	    printf("\t%d", lo);
	} else {
	    printf("\t%d-%d", lo, hi);
	}
	if (!ident)
	    printf(":%d", cclass);
	if (hi < 255)
	    printf(", \\");
	printf("\n");
    }
    return !more;
}

static void
report_resource(int first, int last)
{
    int class_p;
    int ch;
    int dh;

    class_p = CharacterClass(dh = first);
    for (ch = first; ch < last; ++ch) {
	int class_c = CharacterClass(ch);
	if (class_c != class_p) {
	    if (show_cclass_range(dh, ch - 1)) {
		dh = ch;
		class_p = class_c;
	    }
	}
    }
    if (dh < last - 1) {
	show_cclass_range(dh, last - 1);
    }
}

static int
decode_one(const char *source, char **target)
{
    int result = -1;
    long check;
    int radix = 0;
    if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') {
	source += 2;
	radix = 16;
    }
    check = strtol(source, target, radix);
    if (*target != NULL && *target != source)
	result = (int) check;
    return result;
}

static int
decode_range(const char *source, int *lo, int *hi)
{
    int result = 0;
    char *after1;
    char *after2;
    if ((*lo = decode_one(source, &after1)) >= 0) {
	after1 += strspn(after1, ":-.\t ");
	if ((*hi = decode_one(after1, &after2)) < 0) {
	    *hi = *lo;
	}
	result = 1;
    }
    return result;
}

static void
do_range(const char *source)
{
    int lo, hi;
    if (decode_range(source, &lo, &hi)) {
	if (opt_all) {
	    while (lo <= hi) {
		int other_rc = CharacterClass(lo);
		if (!opt_quiet)
		    printf("U+%04X\t%s\n", lo, class_name(other_rc));
		++lo;
	    }
	} else if (opt_check) {
	    while (lo <= hi) {
		int expect = expected_class(lo);
		int actual = CharacterClass(lo);
		if (actual != expect)
		    printf("U+%04X\t%s ->%s\n", lo,
			   class_name(expect),
			   class_name(actual));
		++lo;
	    }
	} else {
	    printf("\"charClass\" resource for [%d..%d]:\n", lo, hi);
	    report_resource(lo, hi + 1);
	}
    }
}
#endif /* OPT_WIDE_CHARS */

/*
 * TODO: add option to show do_range in hex
 */
int
main(int argc, char **argv ENVP_ARG)
{
#if OPT_WIDE_CHARS
    int ch;
#endif

    (void) argc;
    (void) argv;

#if OPT_WIDE_CHARS
    setlocale(LC_ALL, "");
    while ((ch = getopt(argc, argv, "acsv")) != -1) {
	switch (ch) {
	case 'a':
	    opt_all = 1;
	    break;
	case 'c':
	    opt_check = 1;
	    break;
	case 's':
	    opt_quiet = 1;
	    break;
	case 'v':
	    opt_v = 1;
	    break;
	default:
	    usage();
	}
    }
    init_classtab();

    if (optind >= argc) {
	do_range("0-255");
    } else {
	while (optind < argc) {
	    do_range(argv[optind++]);
	}
    }
    report_wide_char_class();
#else
    printf("wide-character support is not configured\n");
#endif /* OPT_WIDE_CHARS */
    return 0;
}
#endif /* TEST_DRIVER */
Update build instructions for XClock, remove hard Xaw dependency, package neXtaw, etc. 2021-05-15 14:52:30 -04:00			`/* $XTermId: charclass.c,v 1.41 2020/07/06 20:00:12 tom Exp $ */`

			`/*`
			`* Copyright 2002-2017,2020 by Thomas E. Dickey`
			`*`
			`* All Rights Reserved`
			`*`
			`* Permission is hereby granted, free of charge, to any person obtaining a`
			`* copy of this software and associated documentation files (the`
			`* "Software"), to deal in the Software without restriction, including`
			`* without limitation the rights to use, copy, modify, merge, publish,`
			`* distribute, sublicense, and/or sell copies of the Software, and to`
			`* permit persons to whom the Software is furnished to do so, subject to`
			`* the following conditions:`
			`*`
			`* The above copyright notice and this permission notice shall be included`
			`* in all copies or substantial portions of the Software.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS`
			`* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF`
			`* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.`
			`* IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY`
			`* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,`
			`* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE`
			`* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.`
			`*`
			`* Except as contained in this notice, the name(s) of the above copyright`
			`* holders shall not be used in advertising or otherwise to promote the`
			`* sale, use or other dealings in this Software without prior written`
			`* authorization.`
			`*`
			`*----------------------------------------------------------------------------`
			`* Compact and efficient reimplementation of the`
			`* xterm character class mechanism for large character sets`
			`*`
			`* Markus Kuhn -- mkuhn@acm.org -- 2000-07-03`
			`*`
			`* xterm allows users to select entire words with a double-click on the left`
			`* mouse button. Opinions might differ on what type of characters are part of`
			`* separate words, therefore xterm allows users to configure a class code for`
			`* each 8-bit character. Words are maximum length sequences of neighboring`
			`* characters with identical class code. Extending this mechanism to Unicode`
			`* naively would create an at least 2^16 entries (128 kB) long class code`
			`* table.`
			`*`
			`* Instead, we transform the character class table into a list of intervals,`
			`* that will be accessed via a linear search. Changes made to the table by the`
			`* user will be appended. A special class code IDENT (default) marks`
			`* characters who have their code number as the class code.`
			`*`
			`* We could alternatively use a sorted table of non-overlapping intervals that`
			`* can be accessed via binary search, but merging in new intervals is`
			`* significantly more hassle and not worth the effort here.`
			`*/`

			`#include <xterm.h>`
			`#include <charclass.h>`

			`#if OPT_WIDE_CHARS`

			`#ifdef TEST_DRIVER`

			`#include <ctype.h>`
			`#include <wchar.h>`
			`#include <wctype.h>`

			`#if OPT_TRACE`
			`#define Trace if (opt_v) printf`
			`#endif`

			`#undef OPT_REPORT_CCLASS`
			`#define OPT_REPORT_CCLASS 1`
			`#endif /* TEST_DRIVER */`

			`static struct classentry {`
			`int cclass;`
			`int first;`
			`int last;`
			`} *classtab;`

			`typedef enum {`
			`IDENT = -1,`
			`OTHER = 0,`
			`CNTRL = 1,`
			`ALNUM = 48,`
			`BLANK = 32,`
			`U_CJK = 0x4e00,`
			`U_SUP = 0x2070,`
			`U_SUB = 0x2080,`
			`U_HIR = 0x3040,`
			`U_KAT = 0x30a0,`
			`U_HAN = 0xac00`
			`} Classes;`

			`#ifdef TEST_DRIVER`
			`static int opt_all;`
			`static int opt_check;`
			`static int opt_quiet;`
			`static int opt_v;`
			`#endif`

			`void`
			`init_classtab(void)`
			`{`
			`const int size = 50;`

			`TRACE(("init_classtab {{\n"));`

			`classtab = TypeMallocN(struct classentry, (unsigned) size);`
			`if (!classtab)`
			`abort();`
			`classtab[0].cclass = size;`
			`classtab[0].first = 1;`
			`classtab[0].last = 0;`

			`/* old xterm default classes */`
			`SetCharacterClassRange(0, 0, BLANK);`
			`SetCharacterClassRange(1, 31, CNTRL);`
			`SetCharacterClassRange('\t', '\t', BLANK);`
			`SetCharacterClassRange('0', '9', ALNUM);`
			`SetCharacterClassRange('A', 'Z', ALNUM);`
			`SetCharacterClassRange('_', '_', ALNUM);`
			`SetCharacterClassRange('a', 'z', ALNUM);`
			`SetCharacterClassRange(127, 159, CNTRL);`
			`SetCharacterClassRange(160, 191, IDENT);`
			`SetCharacterClassRange(192, 255, ALNUM);`
			`SetCharacterClassRange(215, 215, IDENT);`
			`SetCharacterClassRange(247, 247, IDENT);`

			`/* added Unicode classes */`
			`SetCharacterClassRange(0x0100, 0xffdf, ALNUM); /* mostly characters */`
			`SetCharacterClassRange(0x037e, 0x037e, IDENT); /* Greek question mark */`
			`SetCharacterClassRange(0x0387, 0x0387, IDENT); /* Greek ano teleia */`
			`SetCharacterClassRange(0x055a, 0x055f, IDENT); /* Armenian punctuation */`
			`SetCharacterClassRange(0x0589, 0x0589, IDENT); /* Armenian full stop */`
			`SetCharacterClassRange(0x0700, 0x070d, IDENT); /* Syriac punctuation */`
			`SetCharacterClassRange(0x104a, 0x104f, IDENT); /* Myanmar punctuation */`
			`SetCharacterClassRange(0x10fb, 0x10fb, IDENT); /* Georgian punctuation */`
			`SetCharacterClassRange(0x1361, 0x1368, IDENT); /* Ethiopic punctuation */`
			`SetCharacterClassRange(0x166d, 0x166e, IDENT); /* Canadian Syl. punctuation */`
			`SetCharacterClassRange(0x17d4, 0x17dc, IDENT); /* Khmer punctuation */`
			`SetCharacterClassRange(0x1800, 0x180a, IDENT); /* Mongolian punctuation */`
			`SetCharacterClassRange(0x2000, 0x200a, BLANK); /* spaces */`
			`SetCharacterClassRange(0x200b, 0x27ff, IDENT); /* punctuation and symbols */`
			`SetCharacterClassRange(0x2070, 0x207f, U_SUP); /* superscript */`
			`SetCharacterClassRange(0x2080, 0x208f, U_SUB); /* subscript */`
			`SetCharacterClassRange(0x3000, 0x3000, BLANK); /* ideographic space */`
			`SetCharacterClassRange(0x3001, 0x3020, IDENT); /* ideographic punctuation */`
			`SetCharacterClassRange(0x3040, 0x309f, U_HIR); /* Hiragana */`
			`SetCharacterClassRange(0x30a0, 0x30ff, U_KAT); /* Katakana */`
			`SetCharacterClassRange(0x3300, 0x9fff, U_CJK); /* CJK Ideographs */`
			`SetCharacterClassRange(0xac00, 0xd7a3, U_HAN); /* Hangul Syllables */`
			`SetCharacterClassRange(0xf900, 0xfaff, U_CJK); /* CJK Ideographs */`
			`SetCharacterClassRange(0xfe30, 0xfe6b, IDENT); /* punctuation forms */`
			`SetCharacterClassRange(0xff00, 0xff0f, IDENT); /* half/fullwidth ASCII */`
			`SetCharacterClassRange(0xff1a, 0xff20, IDENT); /* half/fullwidth ASCII */`
			`SetCharacterClassRange(0xff3b, 0xff40, IDENT); /* half/fullwidth ASCII */`
			`SetCharacterClassRange(0xff5b, 0xff64, IDENT); /* half/fullwidth ASCII */`

			`TRACE(("}} init_classtab\n"));`
			`return;`
			`}`

			`int`
			`CharacterClass(int c)`
			`{`
			`int i, cclass = IDENT;`

			`for (i = classtab[0].first; i <= classtab[0].last; i++)`
			`if (classtab[i].first <= c && classtab[i].last >= c)`
			`cclass = classtab[i].cclass;`

			`if (cclass < 0)`
			`cclass = c;`

			`return cclass;`
			`}`

			`#if OPT_REPORT_CCLASS`
			`#define charFormat(code) ((code) > 255 ? "0x%04X" : "%d")`
			`static const char *`
			`class_name(Classes code)`
			`{`
			`static char buffer[80];`
			`const char *result = "?";`
			`switch (code) {`
			`case ALNUM:`
			`result = "ALNUM";`
			`break;`
			`case BLANK:`
			`result = "BLANK";`
			`break;`
			`case CNTRL:`
			`result = "CNTRL";`
			`break;`
			`case OTHER:`
			`result = "OTHER";`
			`break;`
			`case IDENT:`
			`result = "IDENT";`
			`break;`
			`case U_SUP:`
			`result = "superscript";`
			`break;`
			`case U_SUB:`
			`result = "subscript";`
			`break;`
			`case U_CJK:`
			`result = "CJK Ideographs";`
			`break;`
			`case U_HIR:`
			`result = "Hiragana";`
			`break;`
			`case U_KAT:`
			`result = "Katakana";`
			`break;`
			`case U_HAN:`
			`result = "Hangul Syllables";`
			`break;`
			`default:`
			`sprintf(buffer, charFormat(code), code);`
			`result = buffer;`
			`break;`
			`}`
			`return result;`
			`}`

			`/*`
			`* Special convention for classtab[0]:`
			`* - classtab[0].cclass is the allocated number of entries in classtab`
			`* - classtab[0].first = 1 (first used entry in classtab)`
			`* - classtab[0].last is the last used entry in classtab`
			`*/`

			`int`
			`SetCharacterClassRange(int low, int high, int value)`
			`{`
			`TRACE(("...SetCharacterClassRange (U+%04X .. U+%04X) = %s\n",`
			`low, high, class_name(value)));`

			`if (high < low)`
			`return -1; /* nothing to do */`

			`/* make sure we have at least one free entry left at table end */`
			`if (classtab[0].last > classtab[0].cclass - 2) {`
			`classtab[0].cclass += 5 + classtab[0].cclass / 4;`
			`classtab = TypeRealloc(struct classentry,`
			`(unsigned) classtab[0].cclass, classtab);`
			`if (!classtab)`
			`abort();`
			`}`

			`/* simply append new interval to end of interval array */`
			`classtab[0].last++;`
			`classtab[classtab[0].last].first = low;`
			`classtab[classtab[0].last].last = high;`
			`classtab[classtab[0].last].cclass = value;`

			`return 0;`
			`}`

			`void`
			`report_wide_char_class(void)`
			`{`
			`static const Classes known_classes[] =`
			`{IDENT, ALNUM, CNTRL, BLANK, U_SUP, U_SUB, U_HIR, U_KAT, U_CJK, U_HAN};`
			`int i;`

			`printf("\n");`
			`printf("Unicode charClass data uses the last match\n");`
			`printf("from these overlapping intervals of character codes:\n");`
			`for (i = classtab[0].first; i <= classtab[0].last; i++) {`
			`printf("\tU+%04X .. U+%04X %s\n",`
			`classtab[i].first,`
			`classtab[i].last,`
			`class_name(classtab[i].cclass));`
			`}`
			`printf("\n");`
			`printf("These class-names are used internally (the first character code in a class):\n");`
			`for (i = 0; i < (int) XtNumber(known_classes); ++i) {`
			`printf("\t");`
			`printf(charFormat(known_classes[i]), known_classes[i]);`
			`printf(" = %s\n", class_name(known_classes[i]));`
			`}`
			`}`
			`#endif /* OPT_REPORT_CCLASS */`

			`#ifdef NO_LEAKS`
			`void`
			`noleaks_CharacterClass(void)`
			`{`
			`if (classtab != 0) {`
			`free(classtab);`
			`classtab = 0;`
			`}`
			`}`
			`#endif`
			`#endif /* OPT_WIDE_CHARS */`

			`#ifdef TEST_DRIVER`
			`#if OPT_WIDE_CHARS`
			`static void`
			`usage(void)`
			`{`
			`static const char *msg[] =`
			`{`
			`"Usage: test_charclass [options] [c1[-c1b] [c2-[c2b] [...]]]",`
			`"",`
			`"Options:",`
			`" -a show all data",`
			`" -s show only summary",`
			`" -v verbose"`
			`};`
			`size_t n;`
			`for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) {`
			`fprintf(stderr, "%s\n", msg[n]);`
			`}`
			`exit(EXIT_FAILURE);`
			`}`

			`static int`
			`expected_class(int wch)`
			`{`
			`int result = wch;`
			`wint_t ch = (wint_t) wch;`
			`if (ch == '\0' \|\| ch == '\t') {`
			`result = BLANK;`
			`} else if (iswcntrl(ch)) {`
			`result = CNTRL;`
			`} else if (iswspace(ch)) {`
			`result = BLANK;`
			`} else if (ch < 127) {`
			`if (isalnum(ch) \|\| ch == '_') {`
			`result = ALNUM;`
			`}`
			`} else if (ch == 170 \|\| ch == 181 \|\| ch == 186) {`
			`;`
			`} else if (iswalnum(ch)) {`
			`result = ALNUM;`
			`}`
			`return result;`
			`}`

			`static int`
			`show_cclass_range(int lo, int hi)`
			`{`
			`int cclass = CharacterClass(lo);`
			`int ident = (cclass == lo);`
			`int more = 0;`
			`if (ident) {`
			`int ch;`
			`for (ch = lo + 1; ch <= hi; ch++) {`
			`if (CharacterClass(ch) != ch) {`
			`ident = 0;`
			`break;`
			`}`
			`}`
			`if (ident && (hi < 255)) {`
			`ch = hi + 1;`
			`if (CharacterClass(ch) == ch) {`
			`if (ch >= 255 \|\| CharacterClass(ch + 1) != ch) {`
			`more = 1;`
			`}`
			`}`
			`}`
			`}`
			`if (!more) {`
			`if (lo == hi) {`
			`printf("\t%d", lo);`
			`} else {`
			`printf("\t%d-%d", lo, hi);`
			`}`
			`if (!ident)`
			`printf(":%d", cclass);`
			`if (hi < 255)`
			`printf(", \\");`
			`printf("\n");`
			`}`
			`return !more;`
			`}`

			`static void`
			`report_resource(int first, int last)`
			`{`
			`int class_p;`
			`int ch;`
			`int dh;`

			`class_p = CharacterClass(dh = first);`
			`for (ch = first; ch < last; ++ch) {`
			`int class_c = CharacterClass(ch);`
			`if (class_c != class_p) {`
			`if (show_cclass_range(dh, ch - 1)) {`
			`dh = ch;`
			`class_p = class_c;`
			`}`
			`}`
			`}`
			`if (dh < last - 1) {`
			`show_cclass_range(dh, last - 1);`
			`}`
			`}`

			`static int`
			`decode_one(const char source, char *target)`
			`{`
			`int result = -1;`
			`long check;`
			`int radix = 0;`
			`if ((source[0] == 'u' \|\| source[0] == 'U') && source[1] == '+') {`
			`source += 2;`
			`radix = 16;`
			`}`
			`check = strtol(source, target, radix);`
			`if (target != NULL && target != source)`
			`result = (int) check;`
			`return result;`
			`}`

			`static int`
			`decode_range(const char source, int lo, int *hi)`
			`{`
			`int result = 0;`
			`char *after1;`
			`char *after2;`
			`if ((*lo = decode_one(source, &after1)) >= 0) {`
			`after1 += strspn(after1, ":-.\t ");`
			`if ((*hi = decode_one(after1, &after2)) < 0) {`
			`hi = lo;`
			`}`
			`result = 1;`
			`}`
			`return result;`
			`}`

			`static void`
			`do_range(const char *source)`
			`{`
			`int lo, hi;`
			`if (decode_range(source, &lo, &hi)) {`
			`if (opt_all) {`
			`while (lo <= hi) {`
			`int other_rc = CharacterClass(lo);`
			`if (!opt_quiet)`
			`printf("U+%04X\t%s\n", lo, class_name(other_rc));`
			`++lo;`
			`}`
			`} else if (opt_check) {`
			`while (lo <= hi) {`
			`int expect = expected_class(lo);`
			`int actual = CharacterClass(lo);`
			`if (actual != expect)`
			`printf("U+%04X\t%s ->%s\n", lo,`
			`class_name(expect),`
			`class_name(actual));`
			`++lo;`
			`}`
			`} else {`
			`printf("\"charClass\" resource for [%d..%d]:\n", lo, hi);`
			`report_resource(lo, hi + 1);`
			`}`
			`}`
			`}`
			`#endif /* OPT_WIDE_CHARS */`

			`/*`
			`* TODO: add option to show do_range in hex`
			`*/`
			`int`
			`main(int argc, char **argv ENVP_ARG)`
			`{`
			`#if OPT_WIDE_CHARS`
			`int ch;`
			`#endif`

			`(void) argc;`
			`(void) argv;`

			`#if OPT_WIDE_CHARS`
			`setlocale(LC_ALL, "");`
			`while ((ch = getopt(argc, argv, "acsv")) != -1) {`
			`switch (ch) {`
			`case 'a':`
			`opt_all = 1;`
			`break;`
			`case 'c':`
			`opt_check = 1;`
			`break;`
			`case 's':`
			`opt_quiet = 1;`
			`break;`
			`case 'v':`
			`opt_v = 1;`
			`break;`
			`default:`
			`usage();`
			`}`
			`}`
			`init_classtab();`

			`if (optind >= argc) {`
			`do_range("0-255");`
			`} else {`
			`while (optind < argc) {`
			`do_range(argv[optind++]);`
			`}`
			`}`
			`report_wide_char_class();`
			`#else`
			`printf("wide-character support is not configured\n");`
			`#endif /* OPT_WIDE_CHARS */`
			`return 0;`
			`}`
			`#endif /* TEST_DRIVER */`