Improve support of utf8 in filename completion and key in of command arguments.

This commit is contained in:
Renaud 2019-08-12 21:41:51 +08:00
parent 81431d2d76
commit 78a7a3ba4e
4 changed files with 71 additions and 66 deletions

91
input.c
View File

@ -308,7 +308,7 @@ int tgetc(void)
if (kbdptr < kbdend) if (kbdptr < kbdend)
return (int) *kbdptr++; return (int) *kbdptr++;
/* at the end of last repitition? */ /* at the end of last repetition? */
if (--kbdrep < 1) { if (--kbdrep < 1) {
kbdmode = STOP; kbdmode = STOP;
#if VISMAC == 0 #if VISMAC == 0
@ -360,8 +360,28 @@ int get1key( void) {
} }
/* GETCMD: Get a command from the keyboard. Process all applicable /* GETCMD: Get a command from the keyboard. Process all applicable
prefix keys prefix keys */
*/
static int get1unicode( int *k) {
/* Accept UTF-8 sequence */
int c = *k ;
if( c > 0xC1 && c <= 0xF4) {
char utf[ 4] ;
char cc ;
utf[ 0] = c ;
utf[ 1] = cc = get1key() ;
if( (c & 0x20) && ((cc & 0xC0) == 0x80)) { /* at least 3 bytes and a valid encoded char */
utf[ 2] = cc = get1key() ;
if( (c & 0x10) && ((cc & 0xC0) == 0x80)) /* at least 4 bytes and a valid encoded char */
utf[ 3] = get1key() ;
}
return utf8_to_unicode( utf, 0, sizeof utf, (unicode_t *) k) ;
} else
return 1 ;
}
int getcmd(void) int getcmd(void)
{ {
int c; /* fetched keystroke */ int c; /* fetched keystroke */
@ -466,23 +486,7 @@ handle_CSI:
} }
#ifdef CYGWIN #ifdef CYGWIN
/* Accept UTF-8 sequence */ get1unicode( &c) ;
if( c <= 0xC1 || c > 0xF4)
return c ;
else {
char utf[ 4] ;
char cc ;
utf[ 0] = c ;
utf[ 1] = cc = get1key() ;
if( (c & 0x20) && ((cc & 0xC0) == 0x80)) { /* at least 3 bytes and a valid encoded char */
utf[ 2] = cc = get1key() ;
if( (c & 0x10) && ((cc & 0xC0) == 0x80)) /* at least 4 bytes and a valid encoded char */
utf[ 3] = get1key() ;
}
utf8_to_unicode( utf, 0, sizeof utf, (unicode_t *) &c) ;
}
#endif #endif
/* otherwise, just return it */ /* otherwise, just return it */
@ -510,7 +514,7 @@ static void echov( int c) {
static void rubc( char c) { static void rubc( char c) {
rubout() ; rubout() ;
if( c < ' ' || c == 0x7F) { if( (c >= 0 && c < ' ') || c == 0x7F) {
/* ^x range */ /* ^x range */
rubout() ; rubout() ;
if( c == '\n') { if( c == '\n') {
@ -598,15 +602,13 @@ int getstring( const char *prompt, char *buf, int nbuf, int eolchar)
/* rubout/erase */ /* rubout/erase */
if (cpos != 0) { if (cpos != 0) {
rubc( buf[ --cpos]) ; rubc( buf[ --cpos]) ;
cpos -= utf8_revdelta( (unsigned char *) &buf[ cpos], cpos) ;
TTflush(); TTflush();
} }
} else if( c == 0x15) { } else if( c == 0x15) {
/* C-U, kill */ /* C-U, kill */
while (cpos != 0) mlwrite( "%s", prompt) ;
rubc( buf[ --cpos]) ; cpos = 0 ;
TTflush();
#if COMPLC #if COMPLC
} else if( (c == 0x09 || c == ' ') && file_f) { } else if( (c == 0x09 || c == ' ') && file_f) {
/* TAB, complete file name */ /* TAB, complete file name */
@ -619,19 +621,16 @@ int getstring( const char *prompt, char *buf, int nbuf, int eolchar)
didtry = 1; didtry = 1;
ocpos = cpos; ocpos = cpos;
while (cpos != 0) { mlwrite( "%s", prompt) ;
rubc( buf[ --cpos]) ; while( cpos != 0) {
if (buf[cpos] == '*' || buf[cpos] == '?') c = buf[ --cpos] ;
iswild = 1; if( c == '*' || c == '?') {
#if MSDOS iswild = 1 ;
if (lsav < 0 && (buf[cpos] == '\\' || cpos = 0 ;
buf[cpos] == '/' || break ;
buf[cpos] == ':' }
&& cpos == 1)) }
lsav = cpos;
#endif
}
TTflush();
if (nskip < 0) { if (nskip < 0) {
buf[ocpos] = 0; buf[ocpos] = 0;
#if UNIX #if UNIX
@ -702,8 +701,8 @@ int getstring( const char *prompt, char *buf, int nbuf, int eolchar)
TTbeep(); TTbeep();
#endif #endif
for( n = 0 ; n < cpos ; n++) { for( n = 0 ; n < cpos ; ) {
c = buf[ n] & 0xFF ; /* NEED better Unicode handling */ n += utf8_to_unicode( buf, n, nbuf, (unicode_t *) &c) ;
echov( c) ; echov( c) ;
} }
@ -716,13 +715,17 @@ int getstring( const char *prompt, char *buf, int nbuf, int eolchar)
} else if( c == 0x11 || c == 0x16) } else if( c == 0x11 || c == 0x16)
/* ^Q or ^V */ /* ^Q or ^V */
quote_f = TRUE ; quote_f = TRUE ;
else else {
/* store as it is */ /* store as it is */
if( cpos < nbuf - 1) { int n ;
buf[ cpos++] = c ;
n = get1unicode( &c) ; /* fetch multiple bytes */
if( cpos + n < nbuf) {
cpos += unicode_to_utf8( c, &buf[ cpos]) ;
echov( c) ; echov( c) ;
TTflush() ; TTflush() ;
} }
}
} }
TTflush() ; TTflush() ;

23
line.c
View File

@ -24,6 +24,7 @@
#include "buffer.h" #include "buffer.h"
#include "estruct.h" #include "estruct.h"
#include "mlout.h" #include "mlout.h"
#include "utf8.h"
#include "window.h" #include "window.h"
@ -91,28 +92,6 @@ char *getkill( void) {
* location. Error if you try and move out of the buffer. Set the flag if the * location. Error if you try and move out of the buffer. Set the flag if the
* line pointer for dot changes. * line pointer for dot changes.
*/ */
static unsigned utf8_revdelta( unsigned char *p, unsigned pos) {
unsigned delta = 0 ;
if( (*p & 0xC0) == 0x80) {
unsigned char c ;
c = *--p ;
if( (c & 0xE0) == 0xC0) /* valid 2 bytes unicode seq */
delta = 1 ;
else if( ((c & 0xC0) == 0x80) && (pos > 1)) {
c = *--p ;
if( (c & 0xF0) == 0xE0) /* valid 3 bytes unicode seq */
delta = 2 ;
else if( ((c & 0xC0) == 0x80) && (pos > 2))
if( (p[ -1] & 0xF8) == 0xF0) /* valid 4 bytes unicode seq */
delta = 3 ;
}
}
return delta ;
}
boolean backchar( int f, int n) { boolean backchar( int f, int n) {
assert( f == TRUE || (f == FALSE && n == 1)) ; assert( f == TRUE || (f == FALSE && n == 1)) ;
if( n < 0) if( n < 0)

22
utf8.c
View File

@ -114,5 +114,27 @@ unsigned unicode_to_utf8( unicode_t c, char *utf8) {
return bytes ; return bytes ;
} }
unsigned utf8_revdelta( unsigned char *p, unsigned pos) {
unsigned delta = 0 ;
if( (*p & 0xC0) == 0x80) {
unsigned char c ;
c = *--p ;
if( (c & 0xE0) == 0xC0) /* valid 2 bytes unicode seq */
delta = 1 ;
else if( ((c & 0xC0) == 0x80) && (pos > 1)) {
c = *--p ;
if( (c & 0xF0) == 0xE0) /* valid 3 bytes unicode seq */
delta = 2 ;
else if( ((c & 0xC0) == 0x80) && (pos > 2))
if( (p[ -1] & 0xF8) == 0xF0) /* valid 4 bytes unicode seq */
delta = 3 ;
}
}
return delta ;
}
/* end of utf8.c */ /* end of utf8.c */

1
utf8.h
View File

@ -5,6 +5,7 @@ typedef unsigned int unicode_t ;
unsigned utf8_to_unicode( char *line, unsigned index, unsigned len, unsigned utf8_to_unicode( char *line, unsigned index, unsigned len,
unicode_t *res) ; unicode_t *res) ;
unsigned utf8_revdelta( unsigned char *buf, unsigned pos) ;
unsigned unicode_to_utf8( unicode_t c, char *utf8) ; unsigned unicode_to_utf8( unicode_t c, char *utf8) ;
#endif #endif