mirror of
https://github.com/rfivet/uemacs.git
synced 2024-12-18 15:26:23 -05:00
Review cursor movement in presence of mixed latin1/unicode encoding.
This commit is contained in:
parent
a2d1b54c61
commit
4cbf1e9ae1
14
display.c
14
display.c
@ -570,18 +570,14 @@ void updpos(void)
|
||||
i = 0;
|
||||
while (i < curwp->w_doto) {
|
||||
unicode_t c;
|
||||
int bytes;
|
||||
|
||||
bytes = utf8_to_unicode(lp->l_text, i, curwp->w_doto, &c);
|
||||
i += bytes;
|
||||
if (c == '\t')
|
||||
curcol |= tabmask;
|
||||
else if( bytes == 1) {
|
||||
if( c < 0x20 || c == 0x7F)
|
||||
i += utf8_to_unicode( lp->l_text, i, curwp->w_doto, &c) ;
|
||||
if( c == '\t')
|
||||
curcol |= tabmask ;
|
||||
else if( c < 0x20 || c == 0x7F)
|
||||
curcol += 1 ; /* displayed as ^c */
|
||||
else if( c >= 0x80 && c <= 0xA0)
|
||||
else if( c >= 0x80 && c <= 0xA0)
|
||||
curcol += 2 ; /* displayed as \xx */
|
||||
}
|
||||
|
||||
++curcol;
|
||||
}
|
||||
|
13
eval.c
13
eval.c
@ -515,12 +515,17 @@ static char *gtfun( char *fname) {
|
||||
case UFTRUTH:
|
||||
retstr = ltos( atoi( argx) == 42) ;
|
||||
break ;
|
||||
case UFASCII:
|
||||
retstr = i_to_a( (int) argx[ 0] & 0xFF) ;
|
||||
case UFASCII: {
|
||||
unicode_t c ;
|
||||
|
||||
utf8_to_unicode( argx, 0, 4, &c) ;
|
||||
retstr = i_to_a( c) ;
|
||||
}
|
||||
|
||||
break ;
|
||||
case UFCHR:
|
||||
result[0] = atoi(argx);
|
||||
result[1] = 0;
|
||||
sz = unicode_to_utf8( atoi( argx), result) ;
|
||||
result[ sz] = 0 ;
|
||||
retstr = result ;
|
||||
break ;
|
||||
case UFGTKEY:
|
||||
|
16
line.c
16
line.c
@ -141,13 +141,11 @@ int forwchar(int f, int n)
|
||||
curwp->w_doto = 0;
|
||||
curwp->w_flag |= WFMOVE;
|
||||
} else {
|
||||
do {
|
||||
unsigned char c;
|
||||
curwp->w_doto++;
|
||||
c = lgetc(curwp->w_dotp, curwp->w_doto);
|
||||
if (is_beginning_utf8(c))
|
||||
break;
|
||||
} while (curwp->w_doto < len);
|
||||
unicode_t unc ;
|
||||
unsigned bytes ;
|
||||
|
||||
bytes = utf8_to_unicode( curwp->w_dotp->l_text, curwp->w_doto, len, &unc) ;
|
||||
curwp->w_doto += bytes ;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
@ -257,6 +255,8 @@ int insspace(int f, int n)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static int linsert_byte( int n, int c) ;
|
||||
|
||||
/*
|
||||
* linstr -- Insert a string at the current point
|
||||
*/
|
||||
@ -269,7 +269,7 @@ int linstr( char *instr) {
|
||||
|
||||
while( (tmpc = *instr++ & 0xFF)) {
|
||||
status =
|
||||
(tmpc == '\n' ? lnewline() : linsert( 1, tmpc)) ;
|
||||
(tmpc == '\n' ? lnewline() : linsert_byte( 1, tmpc)) ;
|
||||
|
||||
/* Insertion error? */
|
||||
if( status != TRUE) {
|
||||
|
23
utf8.c
23
utf8.c
@ -15,19 +15,19 @@
|
||||
*/
|
||||
unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res)
|
||||
{
|
||||
unsigned value;
|
||||
unicode_t value ;
|
||||
unsigned char c = line[index];
|
||||
unsigned bytes, mask, i;
|
||||
|
||||
*res = c;
|
||||
line += index;
|
||||
len -= index;
|
||||
|
||||
/*
|
||||
* 0xxxxxxx is valid utf8
|
||||
* 10xxxxxx is invalid UTF-8, we assume it is Latin1
|
||||
* 0xxxxxxx is valid one byte utf8
|
||||
* 10xxxxxx is invalid UTF-8 start byte, we assume it is Latin1
|
||||
* 1100000x is start of overlong encoding sequence
|
||||
* Sequence longer than 4 bytes are invalid
|
||||
*/
|
||||
if (c < 0xc0)
|
||||
if( c <= 0xc0 || c > 0xF4 || c == 0xC1)
|
||||
return 1;
|
||||
|
||||
/* Ok, it's 11xxxxxx, do a stupid decode */
|
||||
@ -39,20 +39,27 @@ unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *re
|
||||
}
|
||||
|
||||
/* Invalid? Do it as a single byte Latin1 */
|
||||
if (bytes > 6)
|
||||
return 1;
|
||||
/* if (bytes > 6) * bytes is <= 4 as we limit c value to max 0xF4
|
||||
return 1; *
|
||||
*/
|
||||
len -= index;
|
||||
if (bytes > len)
|
||||
return 1;
|
||||
|
||||
value = c & (mask-1);
|
||||
|
||||
/* Ok, do the bytes */
|
||||
line += index;
|
||||
for (i = 1; i < bytes; i++) {
|
||||
c = line[i];
|
||||
if ((c & 0xc0) != 0x80)
|
||||
return 1;
|
||||
value = (value << 6) | (c & 0x3f);
|
||||
}
|
||||
|
||||
if( value > 0x10FFFF)
|
||||
return 1 ;
|
||||
|
||||
*res = value;
|
||||
return bytes;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user