mirror of
https://github.com/rfivet/uemacs.git
synced 2024-12-20 16:22:38 -05:00
Review cursor movement in presence of mixed latin1/unicode encoding.
This commit is contained in:
parent
a2d1b54c61
commit
4cbf1e9ae1
14
display.c
14
display.c
@ -570,18 +570,14 @@ void updpos(void)
|
|||||||
i = 0;
|
i = 0;
|
||||||
while (i < curwp->w_doto) {
|
while (i < curwp->w_doto) {
|
||||||
unicode_t c;
|
unicode_t c;
|
||||||
int bytes;
|
|
||||||
|
|
||||||
bytes = utf8_to_unicode(lp->l_text, i, curwp->w_doto, &c);
|
i += utf8_to_unicode( lp->l_text, i, curwp->w_doto, &c) ;
|
||||||
i += bytes;
|
if( c == '\t')
|
||||||
if (c == '\t')
|
curcol |= tabmask ;
|
||||||
curcol |= tabmask;
|
else if( c < 0x20 || c == 0x7F)
|
||||||
else if( bytes == 1) {
|
|
||||||
if( c < 0x20 || c == 0x7F)
|
|
||||||
curcol += 1 ; /* displayed as ^c */
|
curcol += 1 ; /* displayed as ^c */
|
||||||
else if( c >= 0x80 && c <= 0xA0)
|
else if( c >= 0x80 && c <= 0xA0)
|
||||||
curcol += 2 ; /* displayed as \xx */
|
curcol += 2 ; /* displayed as \xx */
|
||||||
}
|
|
||||||
|
|
||||||
++curcol;
|
++curcol;
|
||||||
}
|
}
|
||||||
|
13
eval.c
13
eval.c
@ -515,12 +515,17 @@ static char *gtfun( char *fname) {
|
|||||||
case UFTRUTH:
|
case UFTRUTH:
|
||||||
retstr = ltos( atoi( argx) == 42) ;
|
retstr = ltos( atoi( argx) == 42) ;
|
||||||
break ;
|
break ;
|
||||||
case UFASCII:
|
case UFASCII: {
|
||||||
retstr = i_to_a( (int) argx[ 0] & 0xFF) ;
|
unicode_t c ;
|
||||||
|
|
||||||
|
utf8_to_unicode( argx, 0, 4, &c) ;
|
||||||
|
retstr = i_to_a( c) ;
|
||||||
|
}
|
||||||
|
|
||||||
break ;
|
break ;
|
||||||
case UFCHR:
|
case UFCHR:
|
||||||
result[0] = atoi(argx);
|
sz = unicode_to_utf8( atoi( argx), result) ;
|
||||||
result[1] = 0;
|
result[ sz] = 0 ;
|
||||||
retstr = result ;
|
retstr = result ;
|
||||||
break ;
|
break ;
|
||||||
case UFGTKEY:
|
case UFGTKEY:
|
||||||
|
16
line.c
16
line.c
@ -141,13 +141,11 @@ int forwchar(int f, int n)
|
|||||||
curwp->w_doto = 0;
|
curwp->w_doto = 0;
|
||||||
curwp->w_flag |= WFMOVE;
|
curwp->w_flag |= WFMOVE;
|
||||||
} else {
|
} else {
|
||||||
do {
|
unicode_t unc ;
|
||||||
unsigned char c;
|
unsigned bytes ;
|
||||||
curwp->w_doto++;
|
|
||||||
c = lgetc(curwp->w_dotp, curwp->w_doto);
|
bytes = utf8_to_unicode( curwp->w_dotp->l_text, curwp->w_doto, len, &unc) ;
|
||||||
if (is_beginning_utf8(c))
|
curwp->w_doto += bytes ;
|
||||||
break;
|
|
||||||
} while (curwp->w_doto < len);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return TRUE;
|
return TRUE;
|
||||||
@ -257,6 +255,8 @@ int insspace(int f, int n)
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int linsert_byte( int n, int c) ;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* linstr -- Insert a string at the current point
|
* linstr -- Insert a string at the current point
|
||||||
*/
|
*/
|
||||||
@ -269,7 +269,7 @@ int linstr( char *instr) {
|
|||||||
|
|
||||||
while( (tmpc = *instr++ & 0xFF)) {
|
while( (tmpc = *instr++ & 0xFF)) {
|
||||||
status =
|
status =
|
||||||
(tmpc == '\n' ? lnewline() : linsert( 1, tmpc)) ;
|
(tmpc == '\n' ? lnewline() : linsert_byte( 1, tmpc)) ;
|
||||||
|
|
||||||
/* Insertion error? */
|
/* Insertion error? */
|
||||||
if( status != TRUE) {
|
if( status != TRUE) {
|
||||||
|
23
utf8.c
23
utf8.c
@ -15,19 +15,19 @@
|
|||||||
*/
|
*/
|
||||||
unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res)
|
unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res)
|
||||||
{
|
{
|
||||||
unsigned value;
|
unicode_t value ;
|
||||||
unsigned char c = line[index];
|
unsigned char c = line[index];
|
||||||
unsigned bytes, mask, i;
|
unsigned bytes, mask, i;
|
||||||
|
|
||||||
*res = c;
|
*res = c;
|
||||||
line += index;
|
|
||||||
len -= index;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 0xxxxxxx is valid utf8
|
* 0xxxxxxx is valid one byte utf8
|
||||||
* 10xxxxxx is invalid UTF-8, we assume it is Latin1
|
* 10xxxxxx is invalid UTF-8 start byte, we assume it is Latin1
|
||||||
|
* 1100000x is start of overlong encoding sequence
|
||||||
|
* Sequence longer than 4 bytes are invalid
|
||||||
*/
|
*/
|
||||||
if (c < 0xc0)
|
if( c <= 0xc0 || c > 0xF4 || c == 0xC1)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/* Ok, it's 11xxxxxx, do a stupid decode */
|
/* Ok, it's 11xxxxxx, do a stupid decode */
|
||||||
@ -39,20 +39,27 @@ unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *re
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Invalid? Do it as a single byte Latin1 */
|
/* Invalid? Do it as a single byte Latin1 */
|
||||||
if (bytes > 6)
|
/* if (bytes > 6) * bytes is <= 4 as we limit c value to max 0xF4
|
||||||
return 1;
|
return 1; *
|
||||||
|
*/
|
||||||
|
len -= index;
|
||||||
if (bytes > len)
|
if (bytes > len)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
value = c & (mask-1);
|
value = c & (mask-1);
|
||||||
|
|
||||||
/* Ok, do the bytes */
|
/* Ok, do the bytes */
|
||||||
|
line += index;
|
||||||
for (i = 1; i < bytes; i++) {
|
for (i = 1; i < bytes; i++) {
|
||||||
c = line[i];
|
c = line[i];
|
||||||
if ((c & 0xc0) != 0x80)
|
if ((c & 0xc0) != 0x80)
|
||||||
return 1;
|
return 1;
|
||||||
value = (value << 6) | (c & 0x3f);
|
value = (value << 6) | (c & 0x3f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( value > 0x10FFFF)
|
||||||
|
return 1 ;
|
||||||
|
|
||||||
*res = value;
|
*res = value;
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user