Improve support of utf8 in filename completion and key in of command arguments.

This commit is contained in:
Renaud 2019-08-12 21:41:51 +08:00
parent 81431d2d76
commit 78a7a3ba4e
4 changed files with 71 additions and 66 deletions

91
input.c
View File

@ -308,7 +308,7 @@ int tgetc(void)
if (kbdptr < kbdend)
return (int) *kbdptr++;
/* at the end of last repitition? */
/* at the end of last repetition? */
if (--kbdrep < 1) {
kbdmode = STOP;
#if VISMAC == 0
@ -360,8 +360,28 @@ int get1key( void) {
}
/* GETCMD: Get a command from the keyboard. Process all applicable
prefix keys
*/
prefix keys */
static int get1unicode( int *k) {
/* Accept UTF-8 sequence */
int c = *k ;
if( c > 0xC1 && c <= 0xF4) {
char utf[ 4] ;
char cc ;
utf[ 0] = c ;
utf[ 1] = cc = get1key() ;
if( (c & 0x20) && ((cc & 0xC0) == 0x80)) { /* at least 3 bytes and a valid encoded char */
utf[ 2] = cc = get1key() ;
if( (c & 0x10) && ((cc & 0xC0) == 0x80)) /* at least 4 bytes and a valid encoded char */
utf[ 3] = get1key() ;
}
return utf8_to_unicode( utf, 0, sizeof utf, (unicode_t *) k) ;
} else
return 1 ;
}
int getcmd(void)
{
int c; /* fetched keystroke */
@ -466,23 +486,7 @@ handle_CSI:
}
#ifdef CYGWIN
/* Accept UTF-8 sequence */
if( c <= 0xC1 || c > 0xF4)
return c ;
else {
char utf[ 4] ;
char cc ;
utf[ 0] = c ;
utf[ 1] = cc = get1key() ;
if( (c & 0x20) && ((cc & 0xC0) == 0x80)) { /* at least 3 bytes and a valid encoded char */
utf[ 2] = cc = get1key() ;
if( (c & 0x10) && ((cc & 0xC0) == 0x80)) /* at least 4 bytes and a valid encoded char */
utf[ 3] = get1key() ;
}
utf8_to_unicode( utf, 0, sizeof utf, (unicode_t *) &c) ;
}
get1unicode( &c) ;
#endif
/* otherwise, just return it */
@ -510,7 +514,7 @@ static void echov( int c) {
static void rubc( char c) {
rubout() ;
if( c < ' ' || c == 0x7F) {
if( (c >= 0 && c < ' ') || c == 0x7F) {
/* ^x range */
rubout() ;
if( c == '\n') {
@ -598,15 +602,13 @@ int getstring( const char *prompt, char *buf, int nbuf, int eolchar)
/* rubout/erase */
if (cpos != 0) {
rubc( buf[ --cpos]) ;
cpos -= utf8_revdelta( (unsigned char *) &buf[ cpos], cpos) ;
TTflush();
}
} else if( c == 0x15) {
/* C-U, kill */
while (cpos != 0)
rubc( buf[ --cpos]) ;
TTflush();
mlwrite( "%s", prompt) ;
cpos = 0 ;
#if COMPLC
} else if( (c == 0x09 || c == ' ') && file_f) {
/* TAB, complete file name */
@ -619,19 +621,16 @@ int getstring( const char *prompt, char *buf, int nbuf, int eolchar)
didtry = 1;
ocpos = cpos;
while (cpos != 0) {
rubc( buf[ --cpos]) ;
if (buf[cpos] == '*' || buf[cpos] == '?')
iswild = 1;
#if MSDOS
if (lsav < 0 && (buf[cpos] == '\\' ||
buf[cpos] == '/' ||
buf[cpos] == ':'
&& cpos == 1))
lsav = cpos;
#endif
}
TTflush();
mlwrite( "%s", prompt) ;
while( cpos != 0) {
c = buf[ --cpos] ;
if( c == '*' || c == '?') {
iswild = 1 ;
cpos = 0 ;
break ;
}
}
if (nskip < 0) {
buf[ocpos] = 0;
#if UNIX
@ -702,8 +701,8 @@ int getstring( const char *prompt, char *buf, int nbuf, int eolchar)
TTbeep();
#endif
for( n = 0 ; n < cpos ; n++) {
c = buf[ n] & 0xFF ; /* NEED better Unicode handling */
for( n = 0 ; n < cpos ; ) {
n += utf8_to_unicode( buf, n, nbuf, (unicode_t *) &c) ;
echov( c) ;
}
@ -716,13 +715,17 @@ int getstring( const char *prompt, char *buf, int nbuf, int eolchar)
} else if( c == 0x11 || c == 0x16)
/* ^Q or ^V */
quote_f = TRUE ;
else
else {
/* store as it is */
if( cpos < nbuf - 1) {
buf[ cpos++] = c ;
int n ;
n = get1unicode( &c) ; /* fetch multiple bytes */
if( cpos + n < nbuf) {
cpos += unicode_to_utf8( c, &buf[ cpos]) ;
echov( c) ;
TTflush() ;
}
}
}
TTflush() ;

23
line.c
View File

@ -24,6 +24,7 @@
#include "buffer.h"
#include "estruct.h"
#include "mlout.h"
#include "utf8.h"
#include "window.h"
@ -91,28 +92,6 @@ char *getkill( void) {
* location. Error if you try and move out of the buffer. Set the flag if the
* line pointer for dot changes.
*/
static unsigned utf8_revdelta( unsigned char *p, unsigned pos) {
unsigned delta = 0 ;
if( (*p & 0xC0) == 0x80) {
unsigned char c ;
c = *--p ;
if( (c & 0xE0) == 0xC0) /* valid 2 bytes unicode seq */
delta = 1 ;
else if( ((c & 0xC0) == 0x80) && (pos > 1)) {
c = *--p ;
if( (c & 0xF0) == 0xE0) /* valid 3 bytes unicode seq */
delta = 2 ;
else if( ((c & 0xC0) == 0x80) && (pos > 2))
if( (p[ -1] & 0xF8) == 0xF0) /* valid 4 bytes unicode seq */
delta = 3 ;
}
}
return delta ;
}
boolean backchar( int f, int n) {
assert( f == TRUE || (f == FALSE && n == 1)) ;
if( n < 0)

22
utf8.c
View File

@ -114,5 +114,27 @@ unsigned unicode_to_utf8( unicode_t c, char *utf8) {
return bytes ;
}
unsigned utf8_revdelta( unsigned char *p, unsigned pos) {
unsigned delta = 0 ;
if( (*p & 0xC0) == 0x80) {
unsigned char c ;
c = *--p ;
if( (c & 0xE0) == 0xC0) /* valid 2 bytes unicode seq */
delta = 1 ;
else if( ((c & 0xC0) == 0x80) && (pos > 1)) {
c = *--p ;
if( (c & 0xF0) == 0xE0) /* valid 3 bytes unicode seq */
delta = 2 ;
else if( ((c & 0xC0) == 0x80) && (pos > 2))
if( (p[ -1] & 0xF8) == 0xF0) /* valid 4 bytes unicode seq */
delta = 3 ;
}
}
return delta ;
}
/* end of utf8.c */

1
utf8.h
View File

@ -5,6 +5,7 @@ typedef unsigned int unicode_t ;
unsigned utf8_to_unicode( char *line, unsigned index, unsigned len,
unicode_t *res) ;
unsigned utf8_revdelta( unsigned char *buf, unsigned pos) ;
unsigned unicode_to_utf8( unicode_t c, char *utf8) ;
#endif