From 8a1d7e2fa346d46af786402176baec936fb02872 Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Sat, 5 Aug 2006 14:01:49 +0300 Subject: [PATCH] terminal UTF-8: Translate all input via UCS-4, #ifdef CONFIG_UTF_8. --- src/intl/charsets.c | 38 ++++++++++++++++++++++++++++------- src/intl/charsets.h | 1 + src/terminal/event.c | 47 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 71 insertions(+), 15 deletions(-) diff --git a/src/intl/charsets.c b/src/intl/charsets.c index dd0ee5b5c..31905ba26 100644 --- a/src/intl/charsets.c +++ b/src/intl/charsets.c @@ -458,22 +458,46 @@ utf_8_to_unicode(unsigned char **string, unsigned char *end) } #endif /* CONFIG_UTF_8 */ +/* Slow algorithm, the common part of cp2u and cp2utf_8. */ +static unicode_val_T +cp2u_shared(const struct codepage_desc *from, unsigned char c) +{ + int j; + + for (j = 0; from->table[j].c; j++) + if (from->table[j].c == c) + return from->table[j].u; + + return UCS_NO_CHAR; +} + +#ifdef CONFIG_UTF_8 +/* Slow algorithm, used for converting input from the terminal. */ +unicode_val_T +cp2u(int from, unsigned char c) +{ + from &= ~SYSTEM_CHARSET_FLAG; + + /* UTF-8 is a multibyte codepage and cannot be handled with + * this function. */ + assert(codepages[from].table != table_utf_8); + if_assert_failed return UCS_NO_CHAR; + + if (c < 0x80) return c; + else return cp2u_shared(&codepages[from], c); +} +#endif /* CONFIG_UTF_8 */ + /* This slow and ugly code is used by the terminal utf_8_io */ unsigned char * cp2utf_8(int from, int c) { - int j; - from &= ~SYSTEM_CHARSET_FLAG; if (codepages[from].table == table_utf_8 || c < 128) return strings[c]; - for (j = 0; codepages[from].table[j].c; j++) - if (codepages[from].table[j].c == c) - return encode_utf_8(codepages[from].table[j].u); - - return encode_utf_8(UCS_NO_CHAR); + return encode_utf_8(cp2u_shared(&codepages[from], c)); } static void diff --git a/src/intl/charsets.h b/src/intl/charsets.h index 246606b2a..8d11707dc 100644 --- a/src/intl/charsets.h +++ b/src/intl/charsets.h @@ -64,6 +64,7 @@ int utf8_cells2bytes(unsigned char *, int, unsigned char *); inline int unicode_to_cell(unicode_val_T); inline int strlen_utf8(unsigned char **); inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *); +unicode_val_T cp2u(int, unsigned char); #endif /* CONFIG_UTF_8 */ unsigned char *cp2utf_8(int, int); diff --git a/src/terminal/event.c b/src/terminal/event.c index c45d0b988..7737cec5f 100644 --- a/src/terminal/event.c +++ b/src/terminal/event.c @@ -264,14 +264,14 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev) { int utf8_io = -1; int key = ilev->info.keyboard.key; + int modifier = ilev->info.keyboard.modifier; if (key >= 0x100) key = -key; - set_kbd_term_event(&tev, key, ilev->info.keyboard.modifier); reset_timer(); - if (check_kbd_modifier(&tev, KBD_MOD_CTRL) && (key == 'l' || key == 'L')) { + if (modifier == KBD_MOD_CTRL && (key == 'l' || key == 'L')) { redraw_terminal_cls(term); break; @@ -280,9 +280,32 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev) return 0; } + /* Character Conversions. */ #ifdef CONFIG_UTF_8 - utf8_io = !!term->utf8; + /* struct term_event_keyboard carries bytes in the + * charset of the terminal. + * - If the "utf_8_io" option (i.e. term->utf8) is + * true or the "charset" option refers to UTF-8, + * then handle_interlink_event() converts from UTF-8 + * to UCS-4, and term_send_ucs() converts from UCS-4 + * to the codepage specified with the "charset" option. + * - Otherwise, handle_interlink_event() converts from + * the codepage specified with the "charset" option + * to UCS-4, and term_send_ucs() converts right back. + * TO DO: Change struct term_event_keyboard to carry + * UCS-4 instead, reducing these conversions. */ + utf8_io = term->utf8 + || is_cp_utf8(get_opt_codepage_tree(term->spec, "charset")); #else + /* struct term_event_keyboard carries bytes in the + * charset of the terminal. + * - If the "utf_8_io" option is true, then + * handle_interlink_event() converts from UTF-8 to + * UCS-4, and term_send_ucs() converts from UCS-4 to + * the codepage specified with the "charset" option; + * this codepage cannot be UTF-8. + * - Otherwise, handle_interlink_event() passes the + * bytes straight through. */ utf8_io = get_opt_bool_tree(term->spec, "utf_8_io"); #endif /* CONFIG_UTF_8 */ @@ -295,19 +318,27 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev) if (u < interlink->utf_8.min) u = UCS_NO_CHAR; - term_send_ucs(term, u, - get_kbd_modifier(&tev)); + term_send_ucs(term, u, modifier); } break; } else { interlink->utf_8.len = 0; - term_send_ucs(term, UCS_NO_CHAR, - get_kbd_modifier(&tev)); + term_send_ucs(term, UCS_NO_CHAR, modifier); } } if (key < 0x80 || key > 0xFF || !utf8_io) { +#ifdef CONFIG_UTF_8 + if (key >= 0 && key <= 0xFF && !utf8_io) { + key = cp2u(get_opt_codepage_tree(term->spec, + "charset"), + key); + term_send_ucs(term, key, modifier); + break; + } +#endif /* !CONFIG_UTF_8 */ + set_kbd_term_event(&tev, key, modifier); term_send_event(term, &tev); break; @@ -326,7 +357,7 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev) break; } - term_send_ucs(term, UCS_NO_CHAR, get_kbd_modifier(&tev)); + term_send_ucs(term, UCS_NO_CHAR, modifier); break; }