From 8a1d7e2fa346d46af786402176baec936fb02872 Mon Sep 17 00:00:00 2001
From: Kalle Olavi Niemitalo <kon@iki.fi>
Date: Sat, 5 Aug 2006 14:01:49 +0300
Subject: [PATCH] terminal UTF-8: Translate all input via UCS-4, #ifdef
 CONFIG_UTF_8.

---
 src/intl/charsets.c  | 38 ++++++++++++++++++++++++++++-------
 src/intl/charsets.h  |  1 +
 src/terminal/event.c | 47 ++++++++++++++++++++++++++++++++++++--------
 3 files changed, 71 insertions(+), 15 deletions(-)

diff --git a/src/intl/charsets.c b/src/intl/charsets.c
index dd0ee5b5c..31905ba26 100644
--- a/src/intl/charsets.c
+++ b/src/intl/charsets.c
@@ -458,22 +458,46 @@ utf_8_to_unicode(unsigned char **string, unsigned char *end)
 }
 #endif /* CONFIG_UTF_8 */
 
+/* Slow algorithm, the common part of cp2u and cp2utf_8.  */
+static unicode_val_T
+cp2u_shared(const struct codepage_desc *from, unsigned char c)
+{
+	int j;
+
+	for (j = 0; from->table[j].c; j++)
+		if (from->table[j].c == c)
+			return from->table[j].u;
+
+	return UCS_NO_CHAR;
+}
+
+#ifdef CONFIG_UTF_8
+/* Slow algorithm, used for converting input from the terminal.  */
+unicode_val_T
+cp2u(int from, unsigned char c)
+{
+	from &= ~SYSTEM_CHARSET_FLAG;
+
+	/* UTF-8 is a multibyte codepage and cannot be handled with
+	 * this function.  */
+	assert(codepages[from].table != table_utf_8);
+	if_assert_failed return UCS_NO_CHAR;
+
+	if (c < 0x80) return c;
+	else return cp2u_shared(&codepages[from], c);
+}
+#endif	/* CONFIG_UTF_8 */
+
 /* This slow and ugly code is used by the terminal utf_8_io */
 unsigned char *
 cp2utf_8(int from, int c)
 {
-	int j;
-
 	from &= ~SYSTEM_CHARSET_FLAG;
 
 	if (codepages[from].table == table_utf_8 || c < 128)
 		return strings[c];
 
-	for (j = 0; codepages[from].table[j].c; j++)
-		if (codepages[from].table[j].c == c)
-			return encode_utf_8(codepages[from].table[j].u);
-
-	return encode_utf_8(UCS_NO_CHAR);
+	return encode_utf_8(cp2u_shared(&codepages[from], c));
 }
 
 static void
diff --git a/src/intl/charsets.h b/src/intl/charsets.h
index 246606b2a..8d11707dc 100644
--- a/src/intl/charsets.h
+++ b/src/intl/charsets.h
@@ -64,6 +64,7 @@ int utf8_cells2bytes(unsigned char *, int, unsigned char *);
 inline int unicode_to_cell(unicode_val_T);
 inline int strlen_utf8(unsigned char **);
 inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);
+unicode_val_T cp2u(int, unsigned char);
 #endif /* CONFIG_UTF_8 */
 
 unsigned char *cp2utf_8(int, int);
diff --git a/src/terminal/event.c b/src/terminal/event.c
index c45d0b988..7737cec5f 100644
--- a/src/terminal/event.c
+++ b/src/terminal/event.c
@@ -264,14 +264,14 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
 	{
 		int utf8_io = -1;
 		int key = ilev->info.keyboard.key;
+		int modifier = ilev->info.keyboard.modifier;
 
 		if (key >= 0x100)
 			key = -key;
-		set_kbd_term_event(&tev, key, ilev->info.keyboard.modifier);
 
 		reset_timer();
 
-		if (check_kbd_modifier(&tev, KBD_MOD_CTRL) && (key == 'l' || key == 'L')) {
+		if (modifier == KBD_MOD_CTRL && (key == 'l' || key == 'L')) {
 			redraw_terminal_cls(term);
 			break;
 
@@ -280,9 +280,32 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
 			return 0;
 		}
 
+		/* Character Conversions.  */
 #ifdef CONFIG_UTF_8
-		utf8_io = !!term->utf8;
+		/* struct term_event_keyboard carries bytes in the
+		 * charset of the terminal.
+		 * - If the "utf_8_io" option (i.e. term->utf8) is
+		 *   true or the "charset" option refers to UTF-8,
+		 *   then handle_interlink_event() converts from UTF-8
+		 *   to UCS-4, and term_send_ucs() converts from UCS-4
+		 *   to the codepage specified with the "charset" option.
+		 * - Otherwise, handle_interlink_event() converts from
+		 *   the codepage specified with the "charset" option
+		 *   to UCS-4, and term_send_ucs() converts right back.
+		 * TO DO: Change struct term_event_keyboard to carry
+		 * UCS-4 instead, reducing these conversions.  */
+		utf8_io = term->utf8
+			|| is_cp_utf8(get_opt_codepage_tree(term->spec, "charset"));
 #else
+		/* struct term_event_keyboard carries bytes in the
+		 * charset of the terminal.
+		 * - If the "utf_8_io" option is true, then
+		 *   handle_interlink_event() converts from UTF-8 to
+		 *   UCS-4, and term_send_ucs() converts from UCS-4 to
+		 *   the codepage specified with the "charset" option;
+		 *   this codepage cannot be UTF-8.
+		 * - Otherwise, handle_interlink_event() passes the
+		 *   bytes straight through.  */
 		utf8_io = get_opt_bool_tree(term->spec, "utf_8_io");
 #endif /* CONFIG_UTF_8 */
 
@@ -295,19 +318,27 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
 
 					if (u < interlink->utf_8.min)
 						u = UCS_NO_CHAR;
-					term_send_ucs(term, u,
-						      get_kbd_modifier(&tev));
+					term_send_ucs(term, u, modifier);
 				}
 				break;
 
 			} else {
 				interlink->utf_8.len = 0;
-				term_send_ucs(term, UCS_NO_CHAR,
-					      get_kbd_modifier(&tev));
+				term_send_ucs(term, UCS_NO_CHAR, modifier);
 			}
 		}
 
 		if (key < 0x80 || key > 0xFF || !utf8_io) {
+#ifdef CONFIG_UTF_8
+			if (key >= 0 && key <= 0xFF && !utf8_io) {
+				key = cp2u(get_opt_codepage_tree(term->spec,
+								 "charset"),
+					   key);
+				term_send_ucs(term, key, modifier);
+				break;
+			}
+#endif /* !CONFIG_UTF_8 */
+			set_kbd_term_event(&tev, key, modifier);
 			term_send_event(term, &tev);
 			break;
 
@@ -326,7 +357,7 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev)
 			break;
 		}
 
-		term_send_ucs(term, UCS_NO_CHAR, get_kbd_modifier(&tev));
+		term_send_ucs(term, UCS_NO_CHAR, modifier);
 		break;
 	}