$viewtab = TRUE to visualize hardcoded tabs.

Refactor &lef and & mid with stronger assertion on utf8_to_unicode().
2021-08-16 11:05:24 +08:00 · 2021-08-16 11:05:24 +08:00 · 2758464a2e
parent 946c603a39
commit 2758464a2e
5 changed files with 111 additions and 122 deletions
--- a/display.c
+++ b/display.c
@ -72,6 +72,8 @@ int scrollcount = 1 ;	/* number of lines to scroll */
 int discmd = TRUE ;		/* display command flag         */
 int disinp = TRUE ;		/* display input characters (echo)	*/

+/* global variables */
+boolean viewtab = FALSE ;	/* $viewtab = TRUE to visualize hardcoded tab */

 static int reframe( window_p wp) ;
 static void updone( window_p wp) ;
@ -198,11 +200,11 @@ static void vtputc( unicode_t c) {
 	if( c > 0x10FFFF)	/* Let's assume this is due to sign extension */
 		c &= 0xFF ;

-	if( c == '\t')
-		do {
+	if( c == '\t') {
+		sane_vtputc( viewtab ? 0x226B : ' ') ;	/* 0x226B: '≫' */
+		while( ((vtcol + taboff) % tabwidth) != 0)
 			sane_vtputc( ' ') ;
-		} while( ((vtcol + taboff) % tabwidth) != 0) ;
-	else if( c < 0x20 || c == 0x7F) {
+	} else if( c < 0x20 || c == 0x7F) {
 		sane_vtputc( '^') ;
 		sane_vtputc( c ^ 0x40) ;
 	} else if( c >= 0x80 && c <= 0xA0) {
--- a/display.h
+++ b/display.h
@ -15,6 +15,9 @@ extern int disinp ;         /* display input characters (echo)  */
 extern int gfcolor ;        /* global forgrnd color (white) */
 extern int gbcolor ;        /* global backgrnd color (black) */

+/* global variables */
+extern boolean viewtab ;    /* $viewtab = TRUE to visualize hardcoded tab */
+
 /* Bindable functions */
 BINDABLE( upscreen) ;

--- a/eval.c
+++ b/eval.c
@ -129,6 +129,7 @@ static const char *envars[] = {
 	"rval",			/* child process return value */
 	"tab",			/* tab width, 1... */
 	"hardtab",		/* TRUE for hard coded tab, FALSE for soft ones */
+	"viewtab",		/* TRUE to visualize hard coded tabs */
 	"overlap",
 	"jump",
 #if SCROLLCODE
@ -177,9 +178,10 @@ static const char *envars[] = {
 #define	EVRVAL		36
 #define EVTAB		37
 #define EVHARDTAB	38
-#define EVOVERLAP	39
-#define EVSCROLLCOUNT	40
-#define EVSCROLL	41
+#define EVVIEWTAB	39
+#define EVOVERLAP	40
+#define EVSCROLLCOUNT	41
+#define EVSCROLL	42

 enum function_type {
 	NILNAMIC	= 0,
@ -212,7 +214,7 @@ static struct {
 	{ "bno", UFBNOT		| MONAMIC },	/* bitwise not */
 	{ "bor", UFBOR		| DYNAMIC },	/* bitwise or    9-10-87  jwm */
 	{ "bxo", UFBXOR		| DYNAMIC },	/* bitwise xor   9-10-87  jwm */
-	{ "cat", UFCAT		| DYNAMIC },	/* concatinate string */
+	{ "cat", UFCAT		| DYNAMIC },	/* concatenate string */
 	{ "chr", UFCHR		| MONAMIC },	/* integer to char conversion */
 	{ "div", UFDIV		| DYNAMIC },	/* division */
 	{ "env", UFENV		| MONAMIC },	/* retrieve a system environment var */
@ -237,7 +239,7 @@ static struct {
 	{ "sgr", UFSGREAT	| DYNAMIC },	/* string logical greater than */
 	{ "sin", UFSINDEX	| DYNAMIC },	/* find the index of one string in another */
 	{ "sle", UFSLESS	| DYNAMIC },	/* string logical less than */
-	{ "sub", UFSUB		| DYNAMIC },	/* subtraction */
+	{ "sub", UFSUB		| DYNAMIC },	/* substraction */
 	{ "tim", UFTIMES	| DYNAMIC },	/* multiplication */
 	{ "tru", UFTRUTH	| MONAMIC },	/* Truth of the universe logical test */
 	{ "upp", UFUPPER	| MONAMIC },	/* uppercase string */
@ -413,19 +415,14 @@ static const char *gtfun( char *fname) {
 	}
 		break ;
 	case UFLEFT | DYNAMIC: {
-		int	sz1, i ;
-
-		sz1 = strlen( arg1) ;
+		int sz1 = strlen( arg1) ;
 		sz = 0 ;
-		for( i = atoi( arg2) ; i > 0 ; i -= 1) {
+		for( int i = atoi( arg2) ; i > 0 ; i -= 1) {
 			unicode_t c ;
-			int bytc ;

-			bytc = utf8_to_unicode( arg1, sz, sz1, &c) ;
-			if( bytc == 0)
+			sz += utf8_to_unicode( arg1, sz, sz1, &c) ;
+			if( sz == sz1)
 				break ;
-			else
-				sz += bytc ;
 		}

 		if( sz >= ressize) {
@ -449,26 +446,23 @@ static const char *gtfun( char *fname) {
 		retstr = strcpy( result, &arg1[ strlen( arg1) - sz]) ;
 		break ;
 	case UFMID | TRINAMIC: {
-		int sz1, start, i, bytc ;
+		int i ;
 		unicode_t c ;

-		sz1 = strlen( arg1) ;
-		start = 0 ;
+		int sz1 = strlen( arg1) ;
+		int start = 0 ;
 		for( i = atoi( arg2) - 1 ; i > 0 ; i -= 1) {
-			bytc = utf8_to_unicode( arg1, start, sz1, &c) ;
-			if( bytc == 0)
+			start +=  utf8_to_unicode( arg1, start, sz1, &c) ;
+			if( start == sz1)
 				break ;
-			else
-				start += bytc ;
 		}

 		sz = start ;
+		if( sz < sz1)
 		for( i = atoi( arg3) ; i > 0 ; i -= 1) {
-			bytc = utf8_to_unicode( arg1, sz, sz1, &c) ;
-			if( bytc == 0)
+			sz += utf8_to_unicode( arg1, sz, sz1, &c) ;
+			if( sz == sz1)
 				break ;
-			else
-				sz += bytc ;
 		}

 		sz -= start ;
@ -656,8 +650,8 @@ static char *gtusr( char *vname) {
 	return errorm;
 }

-/*
- * gtenv()
+
+/* gtenv()
 *
 * char *vname;			name of environment variable to retrieve
 */
@ -670,19 +664,15 @@ static char *gtenv( char *vname) {
 			break ;

 	/* return errorm on a bad reference */
-	if (vnum == ARRAY_SIZE(envars))
+	if( vnum == ARRAY_SIZE( envars)) {
 #if	ENVFUNC
-	{
 		char *ename = getenv(vname);

 		if( ename != NULL)
 			return ename ;
-		else
+#endif
 		return errorm ;
 	}
-#else
-		return errorm;
-#endif

 	/* otherwise, fetch the appropriate value */
 	switch (vnum) {
@ -778,6 +768,8 @@ static char *gtenv( char *vname) {
 		return i_to_a( tabwidth) ;
 	case EVHARDTAB:
 		return ltos( hardtab) ;
+	case EVVIEWTAB:
+		return ltos( viewtab) ;
 	case EVOVERLAP:
 		return i_to_a(overlap);
 	case EVSCROLLCOUNT:
@ -1100,6 +1092,9 @@ static int svar(struct variable_description *var, char *value)
 		case EVHARDTAB:
 			hardtab = stol( value) ;
 			break ;
+		case EVVIEWTAB:
+			viewtab = stol( value) ;
+			break ;
 		case EVOVERLAP:
 			overlap = atoi(value);
 			break;
@ -1368,30 +1363,26 @@ static int ernd( int i) {
 	return (i <= 0) ? s : s % i + 1 ;
 }

-/*
- * find pattern within source
+
+/* find pattern within source
 *
 * char *source;	source string to search
 * char *pattern;	string to look for
 */
 static int sindex( char *source, char *pattern) {
-	char *sp;		/* ptr to current position to scan */
-
 /* scanning through the source string */
-	sp = source;
+	char *sp = source ;		/* ptr to current position to scan */
 	int idx = 1 ;
 	int pos = 0 ;
 	int len = strlen( source) ;

 	while( *sp) {
-		char *csp;		/* ptr to source string during comparison */
-		char *cp;		/* ptr to place to check for equality */
 		char c ;
 		unicode_t uc ;
 		
 		/* scan through the pattern */
-		cp = pattern;
-		csp = sp;
+		char *cp = pattern ;		/* ptr to place to check for equality */
+		char *csp = sp ;			/* ptr to source string during comparison */

 		while( (c = *cp++) && eq( c, *csp))
 			csp++ ;
--- a/utf8.c
+++ b/utf8.c
@ -1,9 +1,8 @@
-/* utf8.c -- implements utf8.h, converts between unicode and UTF-8 */
+/* utf8.c -- implements utf8.h, conversion between unicode and UTF-8 */
+#include "utf8.h"

 #define _XOPEN_SOURCE   /* wcwidth in wchar.h */

-#include "utf8.h"
-
 #include <assert.h>
 #include <wchar.h>

@ -39,17 +38,10 @@ int utf8_width( unicode_t c) {
 */
 unsigned utf8_to_unicode( const char *line, unsigned index, unsigned len,
                                                            unicode_t *res) {
-    unicode_t   value ;
-    unsigned	c ;
-    unsigned	bytes, mask, i;
+    assert( index < len) ;
+    unsigned c = *res = (unsigned char) line[ index] ;

-	if( index >= len)
-		return 0 ;
-
-    *res = c = line[ index] & 0xFFU ;
-
-    /*
-     * 0xxxxxxx is valid one byte utf8
+    /* 0xxxxxxx is valid one byte utf8
     * 10xxxxxx is invalid UTF-8 start byte, we assume it is Latin1
     * 1100000x is start of overlong encoding sequence
     * Sequence longer than 4 bytes are invalid
@ -59,8 +51,8 @@ unsigned utf8_to_unicode( const char *line, unsigned index, unsigned len,
        return 1 ;

    /* Ok, it's 11xxxxxx, do a stupid decode */
-    mask = 0x20;
-    bytes = 2;
+    unsigned mask = 0x20 ;
+    unsigned bytes = 2 ;
    while( (c & mask) != 0) {
        bytes++ ;
        mask >>= 1 ;
@ -71,14 +63,15 @@ unsigned utf8_to_unicode( const char *line, unsigned index, unsigned len,
    if( bytes > len)
        return 1 ;

-    value = c & (mask-1);
+    unicode_t value = c & (mask - 1) ;

    /* Ok, do the bytes */
    line += index ;
-    for (i = 1; i < bytes; i++) {
-        c = line[i] & 0xFFU ;
+    for( unsigned i = 2 ; i <= bytes ; i++) {
+        c = (unsigned char) *++line ;
        if( (c & 0xc0) != 0x80)
            return 1 ;
+
        value = (value << 6) | (c & 0x3f) ;
    }

--- a/utf8.h
+++ b/utf8.h
@ -1,4 +1,4 @@
-/* utf8.h -- */
+/* utf8.h -- conversion between unicode and UTF-8 */
 #ifndef _UTF8_H_
 #define _UTF8_H_