Consistent unicode handling in buffer-position (CTL-X =) and $curchar.

2025-07-26 11:24:26 -04:00 · 2017-05-05 10:49:02 +08:00 · 2017-05-05 10:49:02 +08:00 · 99487b126f
commit 99487b126f
parent 3ef30d1c23
5 changed files with 36 additions and 60 deletions
--- a/eval.c
+++ b/eval.c
@ -686,10 +686,13 @@ static char *gtenv( char *vname) {
 		return i_to_a(gacount);
 	case EVLASTKEY:
 		return i_to_a(lastkey);
-	case EVCURCHAR:
-		return (curwp->w_dotp->l_used ==
-			curwp->w_doto ? i_to_a('\n') :
-			i_to_a(lgetc(curwp->w_dotp, curwp->w_doto)));
+	case EVCURCHAR: {
+			unicode_t c ;
+
+			lgetchar( &c) ;
+			return i_to_a( c) ;
+		}
+	
 	case EVDISCMD:
 		return ltos(discmd);
 	case EVVERSION:
--- a/line.c
+++ b/line.c
@ -398,22 +398,20 @@ int linsert( int n, unicode_t c) {
 		return rdonly();	/* we are in read only mode     */

 	if( n > 0) {
-		char utf8[ 6] ;
-		int bytes, i ;
+		char utf8[ 4] ;
+		int bytes ;

 		bytes = unicode_to_utf8(c, utf8) ;
 		if (bytes == 1)
 			return linsert_byte(n, (unsigned char) utf8[0]);

-		for (i = 0; i < n; i++) {
-			int j;
+		do {
+			int j ;

-			for (j = 0; j < bytes; j++) {
-				unsigned char c = utf8[j];
-				if (!linsert_byte(1, c))
-					return FALSE;
-			}
-		}
+			for( j = 0 ; j < bytes ; j += 1)
+				if( !linsert_byte( 1, (unsigned char) utf8[ j]))
+					return FALSE ;
+		} while( --n > 0) ;
 	}

 	return TRUE;
@ -520,11 +518,13 @@ int lnewline(void)
 	return TRUE;
 }

-int lgetchar(unicode_t *c)
-{
-	int len = llength(curwp->w_dotp);
-	char *buf = curwp->w_dotp->l_text;
-	return utf8_to_unicode(buf, curwp->w_doto, len, c);
+int lgetchar( unicode_t *c) {
+	if( curwp->w_dotp->l_used == curwp->w_doto) {
+		*c = (curbp->b_mode & MDDOS) ? '\r' : '\n' ;
+		return 1 ;
+	} else
+		return utf8_to_unicode( curwp->w_dotp->l_text, curwp->w_doto,
+												llength( curwp->w_dotp), c) ;
 }

 /*
--- a/random.c
+++ b/random.c
@ -86,20 +86,12 @@ int showcpos(int f, int n)
 	numlines = 0;
 	predchars = 0;
 	predlines = 0;
-	curchar = 0;
-	bytes = 1 ;
+	bytes = lgetchar( &curchar) ;
 	for( lp = lforw( curbp->b_linep) ; lp != curbp->b_linep ; lp = lforw( lp)) {
 		/* if we are on the current line, record it */
 		if (lp == curwp->w_dotp) {
-			int len ;
-			
 			predlines = numlines;
 			predchars = numchars + curwp->w_doto;
-			len = llength( lp) ;				
-			if( (curwp->w_doto) == len)
-				curchar = (curbp->b_mode & MDDOS) ? '\r' : '\n' ;
-			else
-				bytes = utf8_to_unicode( lp->l_text, curwp->w_doto, len, &curchar) ;
 		}
 		/* on to the next line */
 		++numlines;
--- a/termio.c
+++ b/termio.c
@ -276,30 +276,15 @@ void ttclose(void)
 }

 /*
- * Write a character to the display. On VMS, terminal output is buffered, and
- * we just put the characters in the big array, after checking for overflow.
- * On CPM terminal I/O unbuffered, so we just write the byte out. Ditto on
- * MS-DOS (use the very very raw console output routine).
+ * Write a character to the display.
 */
 int ttputc( unicode_t c) {
-#if     VMS
-    if (nobuf >= NOBUF)
-        ttflush();
-    obuf[nobuf++] = c;
-#endif
+	char utf8[ 4] ;
+	int bytes ;

-#if MSDOS & ~IBMPC
-    bdos(6, c, 0);
-#endif
-
-#if     V7 | USG | BSD
-    char utf8[6];
-    int bytes;
-
-    bytes = unicode_to_utf8(c, utf8);
-    fwrite(utf8, 1, bytes, stdout);
-#endif
-    return 0 ;
+	bytes = unicode_to_utf8( c, utf8) ;
+	fwrite( utf8, 1, bytes, stdout) ;
+	return 0 ;
 }

 /*
--- a/utf8.c
+++ b/utf8.c
@ -66,15 +66,6 @@ unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *re
    return bytes;
 }

-static void reverse_string(char *begin, char *end)
-{
-    do {
-        char a = *begin, b = *end;
-        *end = a; *begin = b;
-        begin++; end--;
-    } while (begin < end);
-}
-
 /*
 * unicode_to_utf8()
 *
@ -108,8 +99,13 @@ unsigned unicode_to_utf8( unicode_t c, char *utf8) {
            c >>= 6 ;
        } while( c >= prefix) ;

-        *p = (char) (c - 2 * prefix) ;
-        reverse_string( utf8, p) ;
+		*p-- = *utf8 ;
+		*utf8++ = (char) (c - 2 * prefix) ;
+		if( utf8 < p) {	/* swap middle two bytes if 4 bytes utf-8 code */
+			char c = *p ;
+			*p = *utf8 ;
+			*utf8 = c ;
+		}
    }

    return bytes ;