From cf823e2fadb80924656ac177ef37d117bd4a38be Mon Sep 17 00:00:00 2001 From: Renaud Fivet Date: Wed, 24 Jun 2020 16:38:03 +0800 Subject: [PATCH] Clean up handling of unicode character width (non printable are displayed as \u) and insure modeline displays filename including double and zero width characters. --- display.c | 30 ++++++++++++++++++------------ display.h | 1 - main.c | 4 ++-- utf8.c | 12 ++++-------- utf8.h | 2 +- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/display.c b/display.c index 2beac7f..3cd42d5 100644 --- a/display.c +++ b/display.c @@ -176,8 +176,7 @@ void vttidy(void) * screen. There is no checking for nonsense values; this might be a good * idea during the early stages. */ -void vtmove(int row, int col) -{ +static void vtmove( int row, int col) { vtrow = row; vtcol = col; } @@ -220,6 +219,9 @@ static void vtputc( unicode_t c) { sane_vtputc( '\\') ; sane_vtputc( hex[ c >> 4]) ; sane_vtputc( hex[ c & 15]) ; + } else if( utf8_width( c) < 0) { + sane_vtputc( '\\') ; /* show as non printable */ + sane_vtputc( 'u') ; } else sane_vtputc( c) ; } @@ -232,7 +234,7 @@ static int vtputs( const char *s) { s += utf8_to_unicode( s, 0, 4, &c) ; vtputc( c) ; - n += utf8_width( c) ; + n += utf8_width( c) ; /* To Do: only works if all printable */ } return n ; @@ -569,8 +571,10 @@ void updpos(void) curcol += 2 ; /* displayed as ^c */ else if( c >= 0x80 && c <= 0xA0) curcol += 3 ; /* displayed as \xx */ - else - curcol += utf8_width( c) ; + else { + int width = utf8_width( c) ; + curcol += (width < 0) ? 2 : width ; /* non printable are displayed as \u */ + } } /* if extended, flag so and update the virtual line image */ @@ -977,9 +981,10 @@ static int updateline(int row, struct video *vp1, struct video *vp2) the virtual screen array */ while( ttcol < term.t_ncol) { /* TODO: handle double width unicode char at last screen col */ - TTputc(*cp1); - ttcol += utf8_width( *cp1) ; - *cp2++ = *cp1++; + unicode_t c = *cp1++ ; + TTputc( c) ; + ttcol += utf8_width( c) ; + *cp2++ = c ; } TTrev( FALSE) ; /* turn rev video off */ @@ -1045,9 +1050,10 @@ static int updateline(int row, struct video *vp1, struct video *vp2) #endif while (cp1 != cp5) { /* Ordinary. */ - TTputc(*cp1); - ttcol += utf8_width( *cp1) ; - *cp2++ = *cp1++; + unicode_t c = *cp1++ ; + TTputc( c) ; + ttcol += utf8_width( c) ; + *cp2++ = c ; } if (cp5 != cp3) { /* Erase. */ @@ -1153,7 +1159,7 @@ static void modeline(struct window *wp) char *msg = NULL; char tline[ 6] ; /* buffer for part of mode line */ - vtcol = n - 7; /* strlen(" top ") plus a couple */ + vtcol -= 7 ; /* strlen(" top ") plus a couple */ while (rows--) { lp = lforw(lp); if (lp == wp->w_bufp->b_linep) { diff --git a/display.h b/display.h index 50957ca..0f94d1a 100644 --- a/display.h +++ b/display.h @@ -16,7 +16,6 @@ extern int gbcolor ; /* global backgrnd color (black) */ void vtinit( void) ; void vtfree( void) ; void vttidy( void) ; -void vtmove( int row, int col) ; int upscreen( int f, int n) ; int update( int force) ; void updpos( void) ; diff --git a/main.c b/main.c index f0f0c2f..50c1757 100644 --- a/main.c +++ b/main.c @@ -141,7 +141,7 @@ int main(int argc, char **argv) int errflag; /* C error processing? */ bname_t bname ; /* buffer name of file to read */ - setlocale( LC_CTYPE, "en_GB.UTF-8") ; /* wide character support (UTF-32) */ + setlocale( LC_CTYPE, "") ; /* expects $LANG like en_GB.UTF-8 */ #if PKCODE & BSD sleep(1); /* Time for window manager. */ @@ -226,7 +226,7 @@ int main(int argc, char **argv) } else if( argv[ carg + 1]) { /* -X filename */ if( startup( &argv[ carg + 1][ 0]) == TRUE) startflag = TRUE ; /* don't execute emacs.rc */ - + carg += 1 ; } diff --git a/utf8.c b/utf8.c index 9448356..81a8513 100644 --- a/utf8.c +++ b/utf8.c @@ -10,16 +10,12 @@ /* * Display width of UTF-8 character */ -unsigned utf8_width( unicode_t c) { +int utf8_width( unicode_t c) { #if CYGWIN - assert( sizeof( wchar_t) == 2) ; /* wcwidth only handles UTF-16 */ - return (c < 0x10000) ? (unsigned) wcwidth( (wchar_t) c) : 2 ; -#elif BSD -// assert( sizeof( wchar_t) == 4) ; /* wcwidth should handle UTF-32 */ - int ret = wcwidth( (wchar_t) c) ; - return (ret < 0) ? 1 : (unsigned) ret ; + assert( sizeof( wchar_t) == 2) ; /* wcwidth only supports UTF-16 */ + return (c < 0x10000) ? wcwidth( (wchar_t) c) : -1 ; #else - return (unsigned) wcwidth( (wchar_t) c) ; + return wcwidth( (wchar_t) c) ; #endif } diff --git a/utf8.h b/utf8.h index ae61faf..b3b9505 100644 --- a/utf8.h +++ b/utf8.h @@ -3,7 +3,7 @@ typedef unsigned int unicode_t ; -unsigned utf8_width( unicode_t c) ; +int utf8_width( unicode_t c) ; unsigned utf8_to_unicode( const char *line, unsigned index, unsigned len, unicode_t *res) ; unsigned utf8_revdelta( unsigned char *buf, unsigned pos) ;