From 2758464a2efe7aa150900c1ba48c4d342bf3d0e6 Mon Sep 17 00:00:00 2001 From: Renaud Fivet Date: Mon, 16 Aug 2021 11:05:24 +0800 Subject: [PATCH] $viewtab = TRUE to visualize hardcoded tabs. Refactor &lef and & mid with stronger assertion on utf8_to_unicode(). --- display.c | 14 ++++--- display.h | 3 ++ eval.c | 93 +++++++++++++++++++---------------------- utf8.c | 121 +++++++++++++++++++++++++----------------------------- utf8.h | 2 +- 5 files changed, 111 insertions(+), 122 deletions(-) diff --git a/display.c b/display.c index fcf0126..492ed45 100644 --- a/display.c +++ b/display.c @@ -69,9 +69,11 @@ static int taboff = 0 ; /* tab offset for display */ int mpresf = FALSE ; /* TRUE if message in last line */ int scrollcount = 1 ; /* number of lines to scroll */ -int discmd = TRUE ; /* display command flag */ -int disinp = TRUE ; /* display input characters (echo) */ +int discmd = TRUE ; /* display command flag */ +int disinp = TRUE ; /* display input characters (echo) */ +/* global variables */ +boolean viewtab = FALSE ; /* $viewtab = TRUE to visualize hardcoded tab */ static int reframe( window_p wp) ; static void updone( window_p wp) ; @@ -198,11 +200,11 @@ static void vtputc( unicode_t c) { if( c > 0x10FFFF) /* Let's assume this is due to sign extension */ c &= 0xFF ; - if( c == '\t') - do { + if( c == '\t') { + sane_vtputc( viewtab ? 0x226B : ' ') ; /* 0x226B: '≫' */ + while( ((vtcol + taboff) % tabwidth) != 0) sane_vtputc( ' ') ; - } while( ((vtcol + taboff) % tabwidth) != 0) ; - else if( c < 0x20 || c == 0x7F) { + } else if( c < 0x20 || c == 0x7F) { sane_vtputc( '^') ; sane_vtputc( c ^ 0x40) ; } else if( c >= 0x80 && c <= 0xA0) { diff --git a/display.h b/display.h index 89371fd..112d3b5 100644 --- a/display.h +++ b/display.h @@ -15,6 +15,9 @@ extern int disinp ; /* display input characters (echo) */ extern int gfcolor ; /* global forgrnd color (white) */ extern int gbcolor ; /* global backgrnd color (black) */ +/* global variables */ +extern boolean viewtab ; /* $viewtab = TRUE to visualize hardcoded tab */ + /* Bindable functions */ BINDABLE( upscreen) ; diff --git a/eval.c b/eval.c index c4f427f..d5a00ca 100644 --- a/eval.c +++ b/eval.c @@ -129,6 +129,7 @@ static const char *envars[] = { "rval", /* child process return value */ "tab", /* tab width, 1... */ "hardtab", /* TRUE for hard coded tab, FALSE for soft ones */ + "viewtab", /* TRUE to visualize hard coded tabs */ "overlap", "jump", #if SCROLLCODE @@ -177,9 +178,10 @@ static const char *envars[] = { #define EVRVAL 36 #define EVTAB 37 #define EVHARDTAB 38 -#define EVOVERLAP 39 -#define EVSCROLLCOUNT 40 -#define EVSCROLL 41 +#define EVVIEWTAB 39 +#define EVOVERLAP 40 +#define EVSCROLLCOUNT 41 +#define EVSCROLL 42 enum function_type { NILNAMIC = 0, @@ -212,7 +214,7 @@ static struct { { "bno", UFBNOT | MONAMIC }, /* bitwise not */ { "bor", UFBOR | DYNAMIC }, /* bitwise or 9-10-87 jwm */ { "bxo", UFBXOR | DYNAMIC }, /* bitwise xor 9-10-87 jwm */ - { "cat", UFCAT | DYNAMIC }, /* concatinate string */ + { "cat", UFCAT | DYNAMIC }, /* concatenate string */ { "chr", UFCHR | MONAMIC }, /* integer to char conversion */ { "div", UFDIV | DYNAMIC }, /* division */ { "env", UFENV | MONAMIC }, /* retrieve a system environment var */ @@ -237,7 +239,7 @@ static struct { { "sgr", UFSGREAT | DYNAMIC }, /* string logical greater than */ { "sin", UFSINDEX | DYNAMIC }, /* find the index of one string in another */ { "sle", UFSLESS | DYNAMIC }, /* string logical less than */ - { "sub", UFSUB | DYNAMIC }, /* subtraction */ + { "sub", UFSUB | DYNAMIC }, /* substraction */ { "tim", UFTIMES | DYNAMIC }, /* multiplication */ { "tru", UFTRUTH | MONAMIC }, /* Truth of the universe logical test */ { "upp", UFUPPER | MONAMIC }, /* uppercase string */ @@ -413,19 +415,14 @@ static const char *gtfun( char *fname) { } break ; case UFLEFT | DYNAMIC: { - int sz1, i ; - - sz1 = strlen( arg1) ; + int sz1 = strlen( arg1) ; sz = 0 ; - for( i = atoi( arg2) ; i > 0 ; i -= 1) { + for( int i = atoi( arg2) ; i > 0 ; i -= 1) { unicode_t c ; - int bytc ; - bytc = utf8_to_unicode( arg1, sz, sz1, &c) ; - if( bytc == 0) + sz += utf8_to_unicode( arg1, sz, sz1, &c) ; + if( sz == sz1) break ; - else - sz += bytc ; } if( sz >= ressize) { @@ -449,26 +446,23 @@ static const char *gtfun( char *fname) { retstr = strcpy( result, &arg1[ strlen( arg1) - sz]) ; break ; case UFMID | TRINAMIC: { - int sz1, start, i, bytc ; + int i ; unicode_t c ; - sz1 = strlen( arg1) ; - start = 0 ; + int sz1 = strlen( arg1) ; + int start = 0 ; for( i = atoi( arg2) - 1 ; i > 0 ; i -= 1) { - bytc = utf8_to_unicode( arg1, start, sz1, &c) ; - if( bytc == 0) + start += utf8_to_unicode( arg1, start, sz1, &c) ; + if( start == sz1) break ; - else - start += bytc ; } sz = start ; + if( sz < sz1) for( i = atoi( arg3) ; i > 0 ; i -= 1) { - bytc = utf8_to_unicode( arg1, sz, sz1, &c) ; - if( bytc == 0) + sz += utf8_to_unicode( arg1, sz, sz1, &c) ; + if( sz == sz1) break ; - else - sz += bytc ; } sz -= start ; @@ -656,8 +650,8 @@ static char *gtusr( char *vname) { return errorm; } -/* - * gtenv() + +/* gtenv() * * char *vname; name of environment variable to retrieve */ @@ -665,24 +659,20 @@ static char *gtenv( char *vname) { unsigned vnum ; /* ordinal number of var referenced */ /* scan the list, looking for the referenced name */ - for (vnum = 0; vnum < ARRAY_SIZE(envars); vnum++) - if (strcmp(vname, envars[vnum]) == 0) - break; + for( vnum = 0 ; vnum < ARRAY_SIZE( envars) ; vnum++) + if( strcmp( vname, envars[ vnum]) == 0) + break ; /* return errorm on a bad reference */ - if (vnum == ARRAY_SIZE(envars)) + if( vnum == ARRAY_SIZE( envars)) { #if ENVFUNC - { char *ename = getenv(vname); - if (ename != NULL) - return ename; - else - return errorm; - } -#else - return errorm; + if( ename != NULL) + return ename ; #endif + return errorm ; + } /* otherwise, fetch the appropriate value */ switch (vnum) { @@ -778,6 +768,8 @@ static char *gtenv( char *vname) { return i_to_a( tabwidth) ; case EVHARDTAB: return ltos( hardtab) ; + case EVVIEWTAB: + return ltos( viewtab) ; case EVOVERLAP: return i_to_a(overlap); case EVSCROLLCOUNT: @@ -1100,6 +1092,9 @@ static int svar(struct variable_description *var, char *value) case EVHARDTAB: hardtab = stol( value) ; break ; + case EVVIEWTAB: + viewtab = stol( value) ; + break ; case EVOVERLAP: overlap = atoi(value); break; @@ -1368,30 +1363,26 @@ static int ernd( int i) { return (i <= 0) ? s : s % i + 1 ; } -/* - * find pattern within source + +/* find pattern within source * * char *source; source string to search * char *pattern; string to look for */ static int sindex( char *source, char *pattern) { - char *sp; /* ptr to current position to scan */ - - /* scanning through the source string */ - sp = source; +/* scanning through the source string */ + char *sp = source ; /* ptr to current position to scan */ int idx = 1 ; int pos = 0 ; int len = strlen( source) ; - while (*sp) { - char *csp; /* ptr to source string during comparison */ - char *cp; /* ptr to place to check for equality */ + while( *sp) { char c ; unicode_t uc ; /* scan through the pattern */ - cp = pattern; - csp = sp; + char *cp = pattern ; /* ptr to place to check for equality */ + char *csp = sp ; /* ptr to source string during comparison */ while( (c = *cp++) && eq( c, *csp)) csp++ ; @@ -1406,7 +1397,7 @@ static int sindex( char *source, char *pattern) { } /* no match at all.. */ - return 0; + return 0 ; } /* diff --git a/utf8.c b/utf8.c index 10cf20c..0531fba 100644 --- a/utf8.c +++ b/utf8.c @@ -1,9 +1,8 @@ -/* utf8.c -- implements utf8.h, converts between unicode and UTF-8 */ - -#define _XOPEN_SOURCE /* wcwidth in wchar.h */ - +/* utf8.c -- implements utf8.h, conversion between unicode and UTF-8 */ #include "utf8.h" +#define _XOPEN_SOURCE /* wcwidth in wchar.h */ + #include #include @@ -11,17 +10,17 @@ /* Display width of UTF-8 character */ int _utf8_width( unicode_t c) { #if CYGWIN - assert( sizeof( wchar_t) == 2) ; /* wcwidth only supports UTF-16 */ - return (c < 0x10000) ? wcwidth( (wchar_t) c) : -1 ; + assert( sizeof( wchar_t) == 2) ; /* wcwidth only supports UTF-16 */ + return (c < 0x10000) ? wcwidth( (wchar_t) c) : -1 ; #else - return wcwidth( (wchar_t) c) ; + return wcwidth( (wchar_t) c) ; #endif } int utf8_width( unicode_t c) { - int w = _utf8_width( c) ; - return (w < 0) ? 2 : w ; /* display \u if can't figure out width */ + int w = _utf8_width( c) ; + return (w < 0) ? 2 : w ; /* display \u if can't figure out width */ } @@ -38,55 +37,49 @@ int utf8_width( unicode_t c) { * are happily accepted and decoded, as are the various "invalid values". */ unsigned utf8_to_unicode( const char *line, unsigned index, unsigned len, - unicode_t *res) { - unicode_t value ; - unsigned c ; - unsigned bytes, mask, i; + unicode_t *res) { + assert( index < len) ; + unsigned c = *res = (unsigned char) line[ index] ; - if( index >= len) - return 0 ; - - *res = c = line[ index] & 0xFFU ; - - /* - * 0xxxxxxx is valid one byte utf8 + /* 0xxxxxxx is valid one byte utf8 * 10xxxxxx is invalid UTF-8 start byte, we assume it is Latin1 * 1100000x is start of overlong encoding sequence * Sequence longer than 4 bytes are invalid * Last valid code is 0x10FFFF, encoding start with 0xF4 */ if( c <= 0xC1 || c > 0xF4) - return 1; + return 1 ; /* Ok, it's 11xxxxxx, do a stupid decode */ - mask = 0x20; - bytes = 2; + unsigned mask = 0x20 ; + unsigned bytes = 2 ; while( (c & mask) != 0) { - bytes++; - mask >>= 1; + bytes++ ; + mask >>= 1 ; } - /* bytes is in range [2..4] as c was in range [C2..F4] */ - len -= index; - if (bytes > len) - return 1; + /* bytes is in range [2..4] as c was in range [C2..F4] */ + len -= index ; + if( bytes > len) + return 1 ; - value = c & (mask-1); + unicode_t value = c & (mask - 1) ; /* Ok, do the bytes */ - line += index; - for (i = 1; i < bytes; i++) { - c = line[i] & 0xFFU ; - if ((c & 0xc0) != 0x80) - return 1; - value = (value << 6) | (c & 0x3f); + line += index ; + for( unsigned i = 2 ; i <= bytes ; i++) { + c = (unsigned char) *++line ; + if( (c & 0xc0) != 0x80) + return 1 ; + + value = (value << 6) | (c & 0x3f) ; } if( value > 0x10FFFF) /* Avoid 110000 - 13FFFF */ return 1 ; - *res = value; - return bytes; + *res = value ; + return bytes ; } @@ -106,12 +99,12 @@ unsigned unicode_to_utf8( unicode_t c, char *utf8) { assert( c <= 0x10FFFF) ; #ifdef NDEBUG - if( c > 0x10FFFF) /* Let's assume this is due to sign extension */ - c &= 0xFF ; + if( c > 0x10FFFF) /* Let's assume this is due to sign extension */ + c &= 0xFF ; #endif if( c <= 0x7f) - *utf8 = (char) c ; + *utf8 = (char) c ; else { unsigned prefix = 0x40 ; char *p = utf8 ; @@ -122,38 +115,38 @@ unsigned unicode_to_utf8( unicode_t c, char *utf8) { c >>= 6 ; } while( c >= prefix) ; - *p-- = *utf8 ; - *utf8++ = (char) (c - 2 * prefix) ; - if( utf8 < p) { /* swap middle two bytes if 4 bytes utf-8 code */ - char c = *p ; - *p = *utf8 ; - *utf8 = c ; - } + *p-- = *utf8 ; + *utf8++ = (char) (c - 2 * prefix) ; + if( utf8 < p) { /* swap middle two bytes if 4 bytes utf-8 code */ + char c = *p ; + *p = *utf8 ; + *utf8 = c ; + } } return bytes ; } unsigned utf8_revdelta( unsigned char *p, unsigned pos) { - unsigned delta = 0 ; + unsigned delta = 0 ; - if( (*p & 0xC0) == 0x80) { - unsigned char c ; + if( (*p & 0xC0) == 0x80) { + unsigned char c ; - c = *--p ; - if( (c & 0xE0) == 0xC0) /* valid 2 bytes unicode seq */ - delta = 1 ; - else if( ((c & 0xC0) == 0x80) && (pos > 1)) { - c = *--p ; - if( (c & 0xF0) == 0xE0) /* valid 3 bytes unicode seq */ - delta = 2 ; - else if( ((c & 0xC0) == 0x80) && (pos > 2)) - if( (p[ -1] & 0xF8) == 0xF0) /* valid 4 bytes unicode seq */ - delta = 3 ; - } - } + c = *--p ; + if( (c & 0xE0) == 0xC0) /* valid 2 bytes unicode seq */ + delta = 1 ; + else if( ((c & 0xC0) == 0x80) && (pos > 1)) { + c = *--p ; + if( (c & 0xF0) == 0xE0) /* valid 3 bytes unicode seq */ + delta = 2 ; + else if( ((c & 0xC0) == 0x80) && (pos > 2)) + if( (p[ -1] & 0xF8) == 0xF0) /* valid 4 bytes unicode seq */ + delta = 3 ; + } + } - return delta ; + return delta ; } diff --git a/utf8.h b/utf8.h index 4939459..24a8d1f 100644 --- a/utf8.h +++ b/utf8.h @@ -1,4 +1,4 @@ -/* utf8.h -- */ +/* utf8.h -- conversion between unicode and UTF-8 */ #ifndef _UTF8_H_ #define _UTF8_H_