1
0
mirror of https://github.com/rfivet/uemacs.git synced 2024-12-20 16:22:38 -05:00

Review cursor movement in presence of mixed latin1/unicode encoding.

This commit is contained in:
Renaud 2015-02-04 12:37:57 +08:00
parent a2d1b54c61
commit 4cbf1e9ae1
4 changed files with 38 additions and 30 deletions

View File

@ -570,18 +570,14 @@ void updpos(void)
i = 0; i = 0;
while (i < curwp->w_doto) { while (i < curwp->w_doto) {
unicode_t c; unicode_t c;
int bytes;
bytes = utf8_to_unicode(lp->l_text, i, curwp->w_doto, &c); i += utf8_to_unicode( lp->l_text, i, curwp->w_doto, &c) ;
i += bytes; if( c == '\t')
if (c == '\t') curcol |= tabmask ;
curcol |= tabmask; else if( c < 0x20 || c == 0x7F)
else if( bytes == 1) {
if( c < 0x20 || c == 0x7F)
curcol += 1 ; /* displayed as ^c */ curcol += 1 ; /* displayed as ^c */
else if( c >= 0x80 && c <= 0xA0) else if( c >= 0x80 && c <= 0xA0)
curcol += 2 ; /* displayed as \xx */ curcol += 2 ; /* displayed as \xx */
}
++curcol; ++curcol;
} }

13
eval.c
View File

@ -515,12 +515,17 @@ static char *gtfun( char *fname) {
case UFTRUTH: case UFTRUTH:
retstr = ltos( atoi( argx) == 42) ; retstr = ltos( atoi( argx) == 42) ;
break ; break ;
case UFASCII: case UFASCII: {
retstr = i_to_a( (int) argx[ 0] & 0xFF) ; unicode_t c ;
utf8_to_unicode( argx, 0, 4, &c) ;
retstr = i_to_a( c) ;
}
break ; break ;
case UFCHR: case UFCHR:
result[0] = atoi(argx); sz = unicode_to_utf8( atoi( argx), result) ;
result[1] = 0; result[ sz] = 0 ;
retstr = result ; retstr = result ;
break ; break ;
case UFGTKEY: case UFGTKEY:

16
line.c
View File

@ -141,13 +141,11 @@ int forwchar(int f, int n)
curwp->w_doto = 0; curwp->w_doto = 0;
curwp->w_flag |= WFMOVE; curwp->w_flag |= WFMOVE;
} else { } else {
do { unicode_t unc ;
unsigned char c; unsigned bytes ;
curwp->w_doto++;
c = lgetc(curwp->w_dotp, curwp->w_doto); bytes = utf8_to_unicode( curwp->w_dotp->l_text, curwp->w_doto, len, &unc) ;
if (is_beginning_utf8(c)) curwp->w_doto += bytes ;
break;
} while (curwp->w_doto < len);
} }
} }
return TRUE; return TRUE;
@ -257,6 +255,8 @@ int insspace(int f, int n)
return TRUE; return TRUE;
} }
static int linsert_byte( int n, int c) ;
/* /*
* linstr -- Insert a string at the current point * linstr -- Insert a string at the current point
*/ */
@ -269,7 +269,7 @@ int linstr( char *instr) {
while( (tmpc = *instr++ & 0xFF)) { while( (tmpc = *instr++ & 0xFF)) {
status = status =
(tmpc == '\n' ? lnewline() : linsert( 1, tmpc)) ; (tmpc == '\n' ? lnewline() : linsert_byte( 1, tmpc)) ;
/* Insertion error? */ /* Insertion error? */
if( status != TRUE) { if( status != TRUE) {

23
utf8.c
View File

@ -15,19 +15,19 @@
*/ */
unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res) unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res)
{ {
unsigned value; unicode_t value ;
unsigned char c = line[index]; unsigned char c = line[index];
unsigned bytes, mask, i; unsigned bytes, mask, i;
*res = c; *res = c;
line += index;
len -= index;
/* /*
* 0xxxxxxx is valid utf8 * 0xxxxxxx is valid one byte utf8
* 10xxxxxx is invalid UTF-8, we assume it is Latin1 * 10xxxxxx is invalid UTF-8 start byte, we assume it is Latin1
* 1100000x is start of overlong encoding sequence
* Sequence longer than 4 bytes are invalid
*/ */
if (c < 0xc0) if( c <= 0xc0 || c > 0xF4 || c == 0xC1)
return 1; return 1;
/* Ok, it's 11xxxxxx, do a stupid decode */ /* Ok, it's 11xxxxxx, do a stupid decode */
@ -39,20 +39,27 @@ unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *re
} }
/* Invalid? Do it as a single byte Latin1 */ /* Invalid? Do it as a single byte Latin1 */
if (bytes > 6) /* if (bytes > 6) * bytes is <= 4 as we limit c value to max 0xF4
return 1; return 1; *
*/
len -= index;
if (bytes > len) if (bytes > len)
return 1; return 1;
value = c & (mask-1); value = c & (mask-1);
/* Ok, do the bytes */ /* Ok, do the bytes */
line += index;
for (i = 1; i < bytes; i++) { for (i = 1; i < bytes; i++) {
c = line[i]; c = line[i];
if ((c & 0xc0) != 0x80) if ((c & 0xc0) != 0x80)
return 1; return 1;
value = (value << 6) | (c & 0x3f); value = (value << 6) | (c & 0x3f);
} }
if( value > 0x10FFFF)
return 1 ;
*res = value; *res = value;
return bytes; return bytes;
} }