From a9da075eb5bdf0b15905c9b93d964f1a0a7d6507 Mon Sep 17 00:00:00 2001 From: Kalle Olavi Niemitalo Date: Sat, 5 Aug 2006 19:38:15 +0300 Subject: [PATCH] terminal UTF-8: term_event_keyboard.key is UCS-4, #ifdef CONFIG_UTF_8. Form fields and BFU text-input widgets then convert from UCS-4 to UTF-8. If not all UTF-8 bytes fit, they don't insert anything. Thus it is no longer possible to get invalid UTF-8 by hitting the length limit. It is unclear to me which charset is supposed to be used for strings in internal buffers. I made BFU insert UTF-8 whenever CONFIG_UTF_8, but form fields use the charset of the terminal; that may have to be changed. As a side effect, this change should solve bug 782, because term_send_ucs no longer encodes in UTF-8 if CONFIG_UTF_8 is defined. I think the UTF-8 and codepage encoding calls I added are safe, too. A similar bug may still surface somewhere else, but 782 could be closed for now. This change also lays the foundation for binding actions to non-ASCII keys, but the keystroke name parser doesn't yet support that. The CONFIG_UTF_8 mode does not currently support non-ASCII characters in hot keys, either. --- src/bfu/inpfield.c | 33 +++++++++++-------------- src/terminal/event.c | 19 +++++++++------ src/terminal/event.h | 14 +++++++++-- src/terminal/kbd.h | 4 +-- src/viewer/text/form.c | 55 +++++++++++++++++------------------------- 5 files changed, 61 insertions(+), 64 deletions(-) diff --git a/src/bfu/inpfield.c b/src/bfu/inpfield.c index ff7c9c1e..dcb948a3 100644 --- a/src/bfu/inpfield.c +++ b/src/bfu/inpfield.c @@ -680,32 +680,27 @@ kbd_field(struct dialog_data *dlg_data, struct widget_data *widget_data) if (check_kbd_textinput_key(ev)) { unsigned char *text = widget_data->cdata; int textlen = strlen(text); +#ifdef CONFIG_UTF_8 + const unsigned char *ins = encode_utf_8(get_kbd_key(ev)); + int inslen = utf8charlen(ins); +#else /* !CONFIG_UTF_8 */ + const int inslen = 1; +#endif /* !CONFIG_UTF_8 */ - if (textlen >= widget_data->widget->datalen - 1) + if (textlen >= widget_data->widget->datalen - inslen) goto display_field; /* Shift to position of the cursor */ textlen -= widget_data->info.field.cpos; - text += widget_data->info.field.cpos++; + text += widget_data->info.field.cpos; - memmove(text + 1, text, textlen + 1); - *text = get_kbd_key(ev); + memmove(text + inslen, text, textlen + 1); #ifdef CONFIG_UTF_8 - if (term->utf8) { - static unsigned char buf[7]; - unsigned char *t = buf; - static int i = 0; - unicode_val_T data; - - buf[i++] = *text; - buf[i] = '\0'; - data = utf_8_to_unicode(&t, buf + i); - if (i == 6) i = 0; - if (data == UCS_NO_CHAR) - return EVENT_PROCESSED; - else i = 0; - } -#endif /* CONFIG_UTF_8 */ + memcpy(text, ins, inslen); +#else /* !CONFIG_UTF_8 */ + *text = get_kbd_key(ev); +#endif /* !CONFIG_UTF_8 */ + widget_data->info.field.cpos += inslen; goto display_field; } } diff --git a/src/terminal/event.c b/src/terminal/event.c index 7737cec5..f106fa40 100644 --- a/src/terminal/event.c +++ b/src/terminal/event.c @@ -129,9 +129,15 @@ term_send_event(struct terminal *term, struct term_event *ev) static void term_send_ucs(struct terminal *term, unicode_val_T u, int modifier) { - unsigned char *recoded; +#ifdef CONFIG_UTF_8 struct term_event ev; + set_kbd_term_event(&ev, u, modifier); + term_send_event(term, &ev); +#else /* !CONFIG_UTF_8 */ + struct term_event ev; + unsigned char *recoded; + set_kbd_term_event(&ev, KBD_UNDEF, modifier); recoded = u2cp_no_nbsp(u, get_opt_codepage_tree(term->spec, "charset")); if (!recoded) recoded = "*"; @@ -140,6 +146,7 @@ term_send_ucs(struct terminal *term, unicode_val_T u, int modifier) term_send_event(term, &ev); recoded++; } +#endif /* !CONFIG_UTF_8 */ } static void @@ -282,18 +289,14 @@ handle_interlink_event(struct terminal *term, struct interlink_event *ilev) /* Character Conversions. */ #ifdef CONFIG_UTF_8 - /* struct term_event_keyboard carries bytes in the - * charset of the terminal. + /* struct term_event_keyboard carries UCS-4. * - If the "utf_8_io" option (i.e. term->utf8) is * true or the "charset" option refers to UTF-8, * then handle_interlink_event() converts from UTF-8 - * to UCS-4, and term_send_ucs() converts from UCS-4 - * to the codepage specified with the "charset" option. + * to UCS-4. * - Otherwise, handle_interlink_event() converts from * the codepage specified with the "charset" option - * to UCS-4, and term_send_ucs() converts right back. - * TO DO: Change struct term_event_keyboard to carry - * UCS-4 instead, reducing these conversions. */ + * to UCS-4. */ utf8_io = term->utf8 || is_cp_utf8(get_opt_codepage_tree(term->spec, "charset")); #else diff --git a/src/terminal/event.h b/src/terminal/event.h index 9a491e37..63a420a9 100644 --- a/src/terminal/event.h +++ b/src/terminal/event.h @@ -165,8 +165,18 @@ void in_term(struct terminal *); #define get_kbd_modifier(event) (kbd_get_modifier(&(event)->info.keyboard)) #define check_kbd_modifier(event, mod) (kbd_modifier_is(&(event)->info.keyboard, (mod))) -#define check_kbd_textinput_key(event) (get_kbd_key(event) >= ' ' && get_kbd_key(event) < 256 && check_kbd_modifier(event, KBD_MOD_NONE)) -#define check_kbd_label_key(event) (get_kbd_key(event) > ' ' && get_kbd_key(event) < 256) +#define check_kbd_textinput_key(event) (get_kbd_key(event) >= ' ' && check_kbd_modifier(event, KBD_MOD_NONE)) +#ifdef CONFIG_UTF_8 +/* We must currently limit hotkeys of labels to ASCII, because + * get_kbd_key(event) is in UCS-4 and various event handlers pass it + * to toupper() if check_kbd_label_key() returns true. + * TO DO: Change the event handlers to use unicode_fold_label_case() + * instead. The code that extracts the hotkey from the label string + * will also have to be changed. */ +#define check_kbd_label_key(event) (get_kbd_key(event) > ' ' && get_kbd_key(event) <= 0x7F) +#else /* !CONFIG_UTF_8 */ +#define check_kbd_label_key(event) (get_kbd_key(event) > ' ') +#endif /* !CONFIG_UTF_8 */ /* For mouse events handling */ diff --git a/src/terminal/kbd.h b/src/terminal/kbd.h index b9672e46..a4ee0ca0 100644 --- a/src/terminal/kbd.h +++ b/src/terminal/kbd.h @@ -7,8 +7,8 @@ struct term_event_keyboard { /* Values <= -0x100 are special; e.g. KBD_ENTER. * Values between -0xFF and -2 are not used yet; treat as special. * Value == -1 is KBD_UNDEF; not sent via socket. - * Values between 0 and 0xFF are bytes received from the terminal. - * Values >= 0x100 are not used. */ + * Values >= 0 are characters received from the terminal; + * in UCS-4 #ifdef CONFIG_UTF_8. */ int key; int modifier; }; diff --git a/src/viewer/text/form.c b/src/viewer/text/form.c index 867e0019..ee3ad333 100644 --- a/src/viewer/text/form.c +++ b/src/viewer/text/form.c @@ -1712,8 +1712,8 @@ field_op(struct session *ses, struct document_view *doc_view, } if (form_field_is_readonly(fc) - || strlen(fs->value) >= fc->maxlength #ifndef CONFIG_UTF_8 + || strlen(fs->value) >= fc->maxlength || !insert_in_string(&fs->value, fs->state, "?", 1) #endif /* CONFIG_UTF_8 */ ) @@ -1721,42 +1721,31 @@ field_op(struct session *ses, struct document_view *doc_view, status = FRAME_EVENT_OK; break; } -#ifdef CONFIG_UTF_8 - if (utf8) { - static unsigned char buf[7]; - static int i = 0; - unicode_val_T data; - unsigned char *t; - t = buf; - buf[i++] = get_kbd_key(ev); - buf[i] = 0; - data = utf_8_to_unicode(&t, buf + i); - if (data != UCS_NO_CHAR) { - if (!insert_in_string(&fs->value, fs->state, buf, i)) { - i = 0; - return FRAME_EVENT_OK; - } - fs->state += i; - if (fc->type == FC_PASSWORD) - fs->state_cell++; - else if (fc->type == FC_TEXTAREA) - fs->state_cell = 0; - else - fs->state_cell += unicode_to_cell(data); - i = 0; +#ifdef CONFIG_UTF_8 + { + /* The charset of the terminal; we assume + * fs->value is in this charset. + * (Is that OK?) */ + int cp = get_opt_codepage_tree(ses->tab->term->spec, + "charset"); + + text = u2cp_no_nbsp(get_kbd_key(ev), cp); + length = strlen(text); + + if (strlen(fs->value) + length > fc->maxlength + || !insert_in_string(&fs->value, fs->state, text, length)) { + status = FRAME_EVENT_OK; break; } - if (i == 6) { - i = 0; - } - return FRAME_EVENT_OK; - - } else { - if (!insert_in_string(&fs->value, fs->state, "?", 1)) - return FRAME_EVENT_OK; - fs->value[fs->state++] = get_kbd_key(ev); + fs->state += length; + if (fc->type == FC_PASSWORD) + fs->state_cell += (is_cp_utf8(cp) ? 1 : length); + else if (fc->type == FC_TEXTAREA) + fs->state_cell = 0; + else + fs->state_cell += (is_cp_utf8(cp) ? unicode_to_cell(get_kbd_key(ev)) : length); } #else fs->value[fs->state++] = get_kbd_key(ev);