1
0
mirror of https://github.com/rkd77/elinks.git synced 2025-01-03 14:57:44 -05:00

terminal UTF-8: Translate all input via UCS-4, #ifdef CONFIG_UTF_8.

This commit is contained in:
Kalle Olavi Niemitalo 2006-08-05 14:01:49 +03:00 committed by Miciah Dashiel Butler Masters
parent 1e9f5f6743
commit 8a1d7e2fa3
3 changed files with 71 additions and 15 deletions

View File

@ -458,22 +458,46 @@ utf_8_to_unicode(unsigned char **string, unsigned char *end)
} }
#endif /* CONFIG_UTF_8 */ #endif /* CONFIG_UTF_8 */
/* Slow algorithm, the common part of cp2u and cp2utf_8. */
static unicode_val_T
cp2u_shared(const struct codepage_desc *from, unsigned char c)
{
int j;
for (j = 0; from->table[j].c; j++)
if (from->table[j].c == c)
return from->table[j].u;
return UCS_NO_CHAR;
}
#ifdef CONFIG_UTF_8
/* Slow algorithm, used for converting input from the terminal. */
unicode_val_T
cp2u(int from, unsigned char c)
{
from &= ~SYSTEM_CHARSET_FLAG;
/* UTF-8 is a multibyte codepage and cannot be handled with
* this function. */
assert(codepages[from].table != table_utf_8);
if_assert_failed return UCS_NO_CHAR;
if (c < 0x80) return c;
else return cp2u_shared(&codepages[from], c);
}
#endif /* CONFIG_UTF_8 */
/* This slow and ugly code is used by the terminal utf_8_io */ /* This slow and ugly code is used by the terminal utf_8_io */
unsigned char * unsigned char *
cp2utf_8(int from, int c) cp2utf_8(int from, int c)
{ {
int j;
from &= ~SYSTEM_CHARSET_FLAG; from &= ~SYSTEM_CHARSET_FLAG;
if (codepages[from].table == table_utf_8 || c < 128) if (codepages[from].table == table_utf_8 || c < 128)
return strings[c]; return strings[c];
for (j = 0; codepages[from].table[j].c; j++) return encode_utf_8(cp2u_shared(&codepages[from], c));
if (codepages[from].table[j].c == c)
return encode_utf_8(codepages[from].table[j].u);
return encode_utf_8(UCS_NO_CHAR);
} }
static void static void

View File

@ -64,6 +64,7 @@ int utf8_cells2bytes(unsigned char *, int, unsigned char *);
inline int unicode_to_cell(unicode_val_T); inline int unicode_to_cell(unicode_val_T);
inline int strlen_utf8(unsigned char **); inline int strlen_utf8(unsigned char **);
inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *); inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);
unicode_val_T cp2u(int, unsigned char);
#endif /* CONFIG_UTF_8 */ #endif /* CONFIG_UTF_8 */
unsigned char *cp2utf_8(int, int); unsigned char *cp2utf_8(int, int);

View File

@ -264,14 +264,14 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
{ {
int utf8_io = -1; int utf8_io = -1;
int key = ilev->info.keyboard.key; int key = ilev->info.keyboard.key;
int modifier = ilev->info.keyboard.modifier;
if (key >= 0x100) if (key >= 0x100)
key = -key; key = -key;
set_kbd_term_event(&tev, key, ilev->info.keyboard.modifier);
reset_timer(); reset_timer();
if (check_kbd_modifier(&tev, KBD_MOD_CTRL) && (key == 'l' || key == 'L')) { if (modifier == KBD_MOD_CTRL && (key == 'l' || key == 'L')) {
redraw_terminal_cls(term); redraw_terminal_cls(term);
break; break;
@ -280,9 +280,32 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
return 0; return 0;
} }
/* Character Conversions. */
#ifdef CONFIG_UTF_8 #ifdef CONFIG_UTF_8
utf8_io = !!term->utf8; /* struct term_event_keyboard carries bytes in the
* charset of the terminal.
* - If the "utf_8_io" option (i.e. term->utf8) is
* true or the "charset" option refers to UTF-8,
* then handle_interlink_event() converts from UTF-8
* to UCS-4, and term_send_ucs() converts from UCS-4
* to the codepage specified with the "charset" option.
* - Otherwise, handle_interlink_event() converts from
* the codepage specified with the "charset" option
* to UCS-4, and term_send_ucs() converts right back.
* TO DO: Change struct term_event_keyboard to carry
* UCS-4 instead, reducing these conversions. */
utf8_io = term->utf8
|| is_cp_utf8(get_opt_codepage_tree(term->spec, "charset"));
#else #else
/* struct term_event_keyboard carries bytes in the
* charset of the terminal.
* - If the "utf_8_io" option is true, then
* handle_interlink_event() converts from UTF-8 to
* UCS-4, and term_send_ucs() converts from UCS-4 to
* the codepage specified with the "charset" option;
* this codepage cannot be UTF-8.
* - Otherwise, handle_interlink_event() passes the
* bytes straight through. */
utf8_io = get_opt_bool_tree(term->spec, "utf_8_io"); utf8_io = get_opt_bool_tree(term->spec, "utf_8_io");
#endif /* CONFIG_UTF_8 */ #endif /* CONFIG_UTF_8 */
@ -295,19 +318,27 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
if (u < interlink->utf_8.min) if (u < interlink->utf_8.min)
u = UCS_NO_CHAR; u = UCS_NO_CHAR;
term_send_ucs(term, u, term_send_ucs(term, u, modifier);
get_kbd_modifier(&tev));
} }
break; break;
} else { } else {
interlink->utf_8.len = 0; interlink->utf_8.len = 0;
term_send_ucs(term, UCS_NO_CHAR, term_send_ucs(term, UCS_NO_CHAR, modifier);
get_kbd_modifier(&tev));
} }
} }
if (key < 0x80 || key > 0xFF || !utf8_io) { if (key < 0x80 || key > 0xFF || !utf8_io) {
#ifdef CONFIG_UTF_8
if (key >= 0 && key <= 0xFF && !utf8_io) {
key = cp2u(get_opt_codepage_tree(term->spec,
"charset"),
key);
term_send_ucs(term, key, modifier);
break;
}
#endif /* !CONFIG_UTF_8 */
set_kbd_term_event(&tev, key, modifier);
term_send_event(term, &tev); term_send_event(term, &tev);
break; break;
@ -326,7 +357,7 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
break; break;
} }
term_send_ucs(term, UCS_NO_CHAR, get_kbd_modifier(&tev)); term_send_ucs(term, UCS_NO_CHAR, modifier);
break; break;
} }